sogo/SoObjects/Mailer/NSData+Mail.m

408 lines
12 KiB
Objective-C

/* NSData+Mail.m - this file is part of SOGo
*
* Copyright (C) 2007-2017 Inverse inc.
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#import <Foundation/NSArray.h>
#import <Foundation/NSString.h>
#import <NGExtensions/NGBase64Coding.h>
#import <NGExtensions/NGQuotedPrintableCoding.h>
#import <NGExtensions/NSString+Encoding.h>
#import <NGExtensions/NSObject+Logs.h>
#import "NSData+Mail.h"
@implementation NSData (SOGoMailUtilities)
- (NSData *) bodyDataFromEncoding: (NSString *) encoding
{
NSString *realEncoding;
NSData *decodedData;
if ([encoding length] > 0)
{
realEncoding = [encoding lowercaseString];
if ([realEncoding isEqualToString: @"7bit"]
|| [realEncoding isEqualToString: @"8bit"]
|| [realEncoding isEqualToString: @"binary"])
decodedData = self;
else if ([realEncoding isEqualToString: @"base64"])
decodedData = [self dataByDecodingBase64];
else if ([realEncoding isEqualToString: @"quoted-printable"])
decodedData = [self dataByDecodingQuotedPrintableTransferEncoding];
else
{
decodedData = nil;
//NSLog (@"encoding '%@' unknown, returning nil data", realEncoding);
}
}
else
decodedData = self;
return decodedData;
}
- (NSString *) bodyStringFromCharset: (NSString *) charset
{
NSString *lcCharset, *bodyString;
if ([charset length])
lcCharset = [charset lowercaseString];
else
lcCharset = @"us-ascii";
bodyString = [NSString stringWithData: self usingEncodingNamed: lcCharset];
if (![bodyString length])
{
/* UTF-8 is used as a 8bit fallback charset... */
bodyString = [[NSString alloc] initWithData: self
encoding: NSUTF8StringEncoding];
[bodyString autorelease];
}
if (!bodyString)
{
/*
iCalendar invitations sent by Outlook 2002 have the annoying bug that the
mail states an UTF-8 content encoding but the actual iCalendar content is
encoding in Latin-1 (or Windows Western?).
As a result the content decoding will fail (TODO: always?). In this case we
try to decode with Latin-1.
Note: we could check for the Outlook x-mailer, but it was considered better
to try Latin-1 as a fallback in any case (be tolerant).
*/
bodyString = [[NSString alloc] initWithData: self
encoding: NSISOLatin1StringEncoding];
if (!bodyString)
[self errorWithFormat: @"an attempt to use"
@" NSISOLatin1StringEncoding as callback failed"];
[bodyString autorelease];
}
return bodyString;
}
/*
* Excpected form is: "=?charset?encoding?encoded text?=".
*/
- (NSString *) decodedHeader
{
const char *cData;
unsigned int len, i, j;
NSString *decodedString;
cData = [self bytes];
len = [self length];
decodedString = nil;
if (len)
{
if (len > 6)
{
// Find beginning of encoded text
i = 1;
while ((*cData != '=' || *(cData+1) != '?') && i < len)
{
cData++;
i++;
}
if (*cData == '=' && *(cData+1) == '?')
{
NSString *enc;
if (i > 1)
decodedString = [[[NSString alloc] initWithData: [self subdataWithRange: NSMakeRange(0, (i-1))]
encoding: NSASCIIStringEncoding] autorelease];
cData += 2; // skip "=?"
i++;
j = i;
// Find next "?"
while (*cData != '?' && j < len)
{
cData++;
j++;
}
enc = [[[NSString alloc] initWithData:[self subdataWithRange: NSMakeRange(i, j-i)]
encoding: NSASCIIStringEncoding] autorelease];
i = j + 3; // skip "?q?"
if (i < (len-2))
{
NSData *d;
BOOL isQuotedPrintable = NO;
cData++;
// We check if we have a QP or Base64 encoding
if (*cData == 'q' || *cData == 'Q')
isQuotedPrintable = YES;
// Find end of encoded text
j = i;
cData += 2; // skip "q?"
while ((*cData != '?' || *(cData+1) != '=') && (j+1) < len)
{
cData++;
j++;
}
d = [self subdataWithRange: NSMakeRange(i, j-i)];
if (isQuotedPrintable)
d = [d dataByDecodingQuotedPrintable];
else
d = [d dataByDecodingBase64];
if (decodedString)
{
decodedString = [NSString stringWithFormat: @"%@%@",
decodedString, [NSString stringWithData: d
usingEncodingNamed: enc]];
}
else
decodedString = [NSString stringWithData: d
usingEncodingNamed: enc];
j += 2; // skip "?="
if (j < len)
{
// Recursively decode the remaining part
decodedString = [NSString stringWithFormat: @"%@%@",
decodedString,
[[self subdataWithRange: NSMakeRange(j, len-j)] decodedHeader]];
}
}
else
decodedString = nil;
}
}
if (!decodedString)
{
decodedString
= [[NSString alloc] initWithData: self
encoding: NSUTF8StringEncoding];
if (!decodedString)
decodedString
= [[NSString alloc] initWithData: self
encoding: NSISOLatin1StringEncoding];
[decodedString autorelease];
}
}
else
decodedString = @"";
return decodedString;
}
//
// In order to avoid a libxml bug/limitation, we strip the charset= parameter
// to avoid libxml to consider the charset= parameter while it works in UTF-8
// internally, all the time.
//
// A fix was commited by Daniel Veillard following discussions Inverse had
// with him on the issue:
//
// commit a1bc2f2ba4b5317885205d4f71c7c4b1c99ec870
// Author: Daniel Veillard <veillard redhat com>
// Date: Mon May 16 16:03:50 2011 +0800
//
// Add options to ignore the internal encoding
//
// For both XML and HTML, the document can provide an encoding
// either in XMLDecl in XML, or as a meta element in HTML head.
// This adds options to ignore those encodings if the encoding
// is known in advace for example if the content had been converted
// before being passed to the parser.
//
// * parser.c include/libxml/parser.h: add XML_PARSE_IGNORE_ENC option
// for XML parsing
// * include/libxml/HTMLparser.h HTMLparser.c: adds the
// HTML_PARSE_IGNORE_ENC for HTML parsing
// * HTMLtree.c: fix the handling of saving when an unknown encoding is
// defined in meta document header
// * xmllint.c: add a --noenc option to activate the new parser options
//
//
- (NSData *) sanitizedContentUsingVoidTags: (NSArray *) theVoidTags
{
NSMutableData *d;
NSString *found_tag, *tag;
NSEnumerator *tags;
const char *bytes;
char *buf;
int i, j, len;
BOOL found_delimiter, in_meta;
d = [NSMutableData dataWithData: self];
bytes = [d bytes];
len = [d length];
i = 0;
in_meta = NO;
while (i < len)
{
// We check if we see <meta ...> in which case, we substitute de charset= stuff.
if (i < len-5)
{
if ((*bytes == '<') &&
(*(bytes+1) == 'm' || *(bytes+1) == 'M') &&
(*(bytes+2) == 'e' || *(bytes+2) == 'E') &&
(*(bytes+3) == 't' || *(bytes+3) == 'T') &&
(*(bytes+4) == 'a' || *(bytes+4) == 'A') &&
(*(bytes+5) == ' '))
in_meta = YES;
}
// We search for something like :
//
// <meta http-equiv="Content-Type" content="text/html; charset=Windows-1252">
//
if (in_meta && i < len-9)
{
if ((*bytes == 'c' || *bytes == 'C') &&
(*(bytes+1) == 'h' || *(bytes+1) == 'H') &&
(*(bytes+2) == 'a' || *(bytes+2) == 'A') &&
(*(bytes+3) == 'r' || *(bytes+3) == 'R') &&
(*(bytes+4) == 's' || *(bytes+4) == 'S') &&
(*(bytes+5) == 'e' || *(bytes+5) == 'E') &&
(*(bytes+6) == 't' || *(bytes+6) == 'T') &&
(*(bytes+7) == '='))
{
// We search until we find a '"' or a space
j = 8;
found_delimiter = YES;
while (*(bytes+j) != ' ' && *(bytes+j) != '"' && *(bytes+j) != '\'')
{
j++;
// We haven't found anything, let's return the data untouched
if ((i+j) >= len)
{
in_meta = found_delimiter = NO;
break;
}
}
if (found_delimiter)
{
[d replaceBytesInRange: NSMakeRange(i, j)
withBytes: NULL
length: 0];
in_meta = found_delimiter = NO;
}
}
}
bytes++;
i++;
}
/*
* Replace badly formatted void tags
*
* A void tag that begins with a slash is considered invalid.
* We remove the slash from those tags.
*
* Ex: </br> is replaced by <br>
*/
if (!theVoidTags)
{
/* see http://www.w3.org/TR/html4/index/elements.html */
theVoidTags = [[[NSArray alloc] initWithObjects: @"area", @"base",
@"basefont", @"br", @"col", @"frame", @"hr",
@"img", @"input", @"isindex", @"link",
@"meta", @"param", @"", nil] autorelease];
}
bytes = [d bytes];
len = [d length];
i = 0;
while (i < len)
{
if (i < len-3)
{
// Search for ending tags
if ((*bytes == '<') && (*(bytes+1) == '/'))
{
i += 2;
bytes += 2;
j = 0;
found_delimiter = YES;
while (*(bytes+j) != '>')
{
j++;
if ((i+j) >= len)
{
found_delimiter = NO;
break;
}
}
if (found_delimiter && j > 0)
{
// Copy the ending tag to a NSString
buf = malloc((j+1) * sizeof(char));
memset (buf, 0, j+1);
memcpy (buf, bytes, j);
found_tag = [NSString stringWithCString: buf encoding: NSUTF8StringEncoding];
tags = [theVoidTags objectEnumerator];
tag = [tags nextObject];
while (tag && found_tag)
{
if ([tag caseInsensitiveCompare: found_tag] == NSOrderedSame)
{
// Remove the leading slash
//NSLog(@"Found void tag with invalid leading slash: </%@>", found_tag);
i--;
[d replaceBytesInRange: NSMakeRange(i, 1)
withBytes: NULL
length: 0];
bytes = [d bytes];
bytes += i;
len = [d length];
break;
}
tag = [tags nextObject];
}
free(buf);
// Continue the parsing after end tag
i += j;
bytes += j;
}
}
}
bytes++;
i++;
}
return d;
}
@end