b6d657d001
Conflicts: SoObjects/Mailer/NSData+Mail.m
408 lines
12 KiB
Objective-C
408 lines
12 KiB
Objective-C
/* NSData+Mail.m - this file is part of SOGo
|
|
*
|
|
* Copyright (C) 2007-2017 Inverse inc.
|
|
*
|
|
* This file is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This file is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 02111-1307, USA.
|
|
*/
|
|
|
|
#import <Foundation/NSArray.h>
|
|
#import <Foundation/NSString.h>
|
|
|
|
#import <NGExtensions/NGBase64Coding.h>
|
|
#import <NGExtensions/NGQuotedPrintableCoding.h>
|
|
#import <NGExtensions/NSString+Encoding.h>
|
|
#import <NGExtensions/NSObject+Logs.h>
|
|
|
|
#import "NSData+Mail.h"
|
|
|
|
@implementation NSData (SOGoMailUtilities)
|
|
|
|
- (NSData *) bodyDataFromEncoding: (NSString *) encoding
|
|
{
|
|
NSString *realEncoding;
|
|
NSData *decodedData;
|
|
|
|
if ([encoding length] > 0)
|
|
{
|
|
realEncoding = [encoding lowercaseString];
|
|
|
|
if ([realEncoding isEqualToString: @"7bit"]
|
|
|| [realEncoding isEqualToString: @"8bit"]
|
|
|| [realEncoding isEqualToString: @"binary"])
|
|
decodedData = self;
|
|
else if ([realEncoding isEqualToString: @"base64"])
|
|
decodedData = [self dataByDecodingBase64];
|
|
else if ([realEncoding isEqualToString: @"quoted-printable"])
|
|
decodedData = [self dataByDecodingQuotedPrintableTransferEncoding];
|
|
else
|
|
{
|
|
decodedData = nil;
|
|
//NSLog (@"encoding '%@' unknown, returning nil data", realEncoding);
|
|
}
|
|
}
|
|
else
|
|
decodedData = self;
|
|
|
|
return decodedData;
|
|
}
|
|
|
|
- (NSString *) bodyStringFromCharset: (NSString *) charset
|
|
{
|
|
NSString *lcCharset, *bodyString;
|
|
|
|
if ([charset length])
|
|
lcCharset = [charset lowercaseString];
|
|
else
|
|
lcCharset = @"us-ascii";
|
|
|
|
bodyString = [NSString stringWithData: self usingEncodingNamed: lcCharset];
|
|
if (![bodyString length])
|
|
{
|
|
/* UTF-8 is used as a 8bit fallback charset... */
|
|
bodyString = [[NSString alloc] initWithData: self
|
|
encoding: NSUTF8StringEncoding];
|
|
[bodyString autorelease];
|
|
}
|
|
|
|
if (!bodyString)
|
|
{
|
|
/*
|
|
iCalendar invitations sent by Outlook 2002 have the annoying bug that the
|
|
mail states an UTF-8 content encoding but the actual iCalendar content is
|
|
encoding in Latin-1 (or Windows Western?).
|
|
|
|
As a result the content decoding will fail (TODO: always?). In this case we
|
|
try to decode with Latin-1.
|
|
|
|
Note: we could check for the Outlook x-mailer, but it was considered better
|
|
to try Latin-1 as a fallback in any case (be tolerant).
|
|
*/
|
|
|
|
bodyString = [[NSString alloc] initWithData: self
|
|
encoding: NSISOLatin1StringEncoding];
|
|
if (!bodyString)
|
|
[self errorWithFormat: @"an attempt to use"
|
|
@" NSISOLatin1StringEncoding as callback failed"];
|
|
[bodyString autorelease];
|
|
}
|
|
|
|
return bodyString;
|
|
}
|
|
|
|
/*
|
|
* Excpected form is: "=?charset?encoding?encoded text?=".
|
|
*/
|
|
- (NSString *) decodedHeader
|
|
{
|
|
const char *cData;
|
|
unsigned int len, i, j;
|
|
NSString *decodedString;
|
|
|
|
cData = [self bytes];
|
|
len = [self length];
|
|
decodedString = nil;
|
|
|
|
if (len)
|
|
{
|
|
if (len > 6)
|
|
{
|
|
// Find beginning of encoded text
|
|
i = 1;
|
|
while ((*cData != '=' || *(cData+1) != '?') && i < len)
|
|
{
|
|
cData++;
|
|
i++;
|
|
}
|
|
|
|
if (*cData == '=' && *(cData+1) == '?')
|
|
{
|
|
NSString *enc;
|
|
|
|
if (i > 1)
|
|
decodedString = [[[NSString alloc] initWithData: [self subdataWithRange: NSMakeRange(0, (i-1))]
|
|
encoding: NSASCIIStringEncoding] autorelease];
|
|
cData += 2; // skip "=?"
|
|
i++;
|
|
j = i;
|
|
// Find next "?"
|
|
while (*cData != '?' && j < len)
|
|
{
|
|
cData++;
|
|
j++;
|
|
}
|
|
enc = [[[NSString alloc] initWithData:[self subdataWithRange: NSMakeRange(i, j-i)]
|
|
encoding: NSASCIIStringEncoding] autorelease];
|
|
|
|
i = j + 3; // skip "?q?"
|
|
if (i < (len-2))
|
|
{
|
|
NSData *d;
|
|
BOOL isQuotedPrintable = NO;
|
|
|
|
cData++;
|
|
// We check if we have a QP or Base64 encoding
|
|
if (*cData == 'q' || *cData == 'Q')
|
|
isQuotedPrintable = YES;
|
|
|
|
// Find end of encoded text
|
|
j = i;
|
|
cData += 2; // skip "q?"
|
|
while ((*cData != '?' || *(cData+1) != '=') && (j+1) < len)
|
|
{
|
|
cData++;
|
|
j++;
|
|
}
|
|
|
|
d = [self subdataWithRange: NSMakeRange(i, j-i)];
|
|
if (isQuotedPrintable)
|
|
d = [d dataByDecodingQuotedPrintable];
|
|
else
|
|
d = [d dataByDecodingBase64];
|
|
|
|
if (decodedString)
|
|
{
|
|
decodedString = [NSString stringWithFormat: @"%@%@",
|
|
decodedString, [NSString stringWithData: d
|
|
usingEncodingNamed: enc]];
|
|
}
|
|
else
|
|
decodedString = [NSString stringWithData: d
|
|
usingEncodingNamed: enc];
|
|
|
|
j += 2; // skip "?="
|
|
if (j < len)
|
|
{
|
|
// Recursively decode the remaining part
|
|
decodedString = [NSString stringWithFormat: @"%@%@",
|
|
decodedString,
|
|
[[self subdataWithRange: NSMakeRange(j, len-j)] decodedHeader]];
|
|
}
|
|
}
|
|
else
|
|
decodedString = nil;
|
|
}
|
|
}
|
|
if (!decodedString)
|
|
{
|
|
decodedString
|
|
= [[NSString alloc] initWithData: self
|
|
encoding: NSUTF8StringEncoding];
|
|
if (!decodedString)
|
|
decodedString
|
|
= [[NSString alloc] initWithData: self
|
|
encoding: NSISOLatin1StringEncoding];
|
|
[decodedString autorelease];
|
|
}
|
|
}
|
|
else
|
|
decodedString = @"";
|
|
|
|
return decodedString;
|
|
}
|
|
|
|
//
|
|
// In order to avoid a libxml bug/limitation, we strip the charset= parameter
|
|
// to avoid libxml to consider the charset= parameter while it works in UTF-8
|
|
// internally, all the time.
|
|
//
|
|
// A fix was commited by Daniel Veillard following discussions Inverse had
|
|
// with him on the issue:
|
|
//
|
|
// commit a1bc2f2ba4b5317885205d4f71c7c4b1c99ec870
|
|
// Author: Daniel Veillard <veillard redhat com>
|
|
// Date: Mon May 16 16:03:50 2011 +0800
|
|
//
|
|
// Add options to ignore the internal encoding
|
|
//
|
|
// For both XML and HTML, the document can provide an encoding
|
|
// either in XMLDecl in XML, or as a meta element in HTML head.
|
|
// This adds options to ignore those encodings if the encoding
|
|
// is known in advace for example if the content had been converted
|
|
// before being passed to the parser.
|
|
//
|
|
// * parser.c include/libxml/parser.h: add XML_PARSE_IGNORE_ENC option
|
|
// for XML parsing
|
|
// * include/libxml/HTMLparser.h HTMLparser.c: adds the
|
|
// HTML_PARSE_IGNORE_ENC for HTML parsing
|
|
// * HTMLtree.c: fix the handling of saving when an unknown encoding is
|
|
// defined in meta document header
|
|
// * xmllint.c: add a --noenc option to activate the new parser options
|
|
//
|
|
//
|
|
- (NSData *) sanitizedContentUsingVoidTags: (NSArray *) theVoidTags
|
|
{
|
|
NSMutableData *d;
|
|
NSString *found_tag, *tag;
|
|
NSEnumerator *tags;
|
|
const char *bytes;
|
|
char *buf;
|
|
int i, j, len;
|
|
BOOL found_delimiter, in_meta;
|
|
|
|
d = [NSMutableData dataWithData: self];
|
|
bytes = [d bytes];
|
|
len = [d length];
|
|
i = 0;
|
|
|
|
in_meta = NO;
|
|
|
|
while (i < len)
|
|
{
|
|
// We check if we see <meta ...> in which case, we substitute de charset= stuff.
|
|
if (i < len-5)
|
|
{
|
|
if ((*bytes == '<') &&
|
|
(*(bytes+1) == 'm' || *(bytes+1) == 'M') &&
|
|
(*(bytes+2) == 'e' || *(bytes+2) == 'E') &&
|
|
(*(bytes+3) == 't' || *(bytes+3) == 'T') &&
|
|
(*(bytes+4) == 'a' || *(bytes+4) == 'A') &&
|
|
(*(bytes+5) == ' '))
|
|
in_meta = YES;
|
|
}
|
|
|
|
// We search for something like :
|
|
//
|
|
// <meta http-equiv="Content-Type" content="text/html; charset=Windows-1252">
|
|
//
|
|
if (in_meta && i < len-9)
|
|
{
|
|
if ((*bytes == 'c' || *bytes == 'C') &&
|
|
(*(bytes+1) == 'h' || *(bytes+1) == 'H') &&
|
|
(*(bytes+2) == 'a' || *(bytes+2) == 'A') &&
|
|
(*(bytes+3) == 'r' || *(bytes+3) == 'R') &&
|
|
(*(bytes+4) == 's' || *(bytes+4) == 'S') &&
|
|
(*(bytes+5) == 'e' || *(bytes+5) == 'E') &&
|
|
(*(bytes+6) == 't' || *(bytes+6) == 'T') &&
|
|
(*(bytes+7) == '='))
|
|
{
|
|
// We search until we find a '"' or a space
|
|
j = 8;
|
|
found_delimiter = YES;
|
|
|
|
while (*(bytes+j) != ' ' && *(bytes+j) != '"' && *(bytes+j) != '\'')
|
|
{
|
|
j++;
|
|
|
|
// We haven't found anything, let's return the data untouched
|
|
if ((i+j) >= len)
|
|
{
|
|
in_meta = found_delimiter = NO;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found_delimiter)
|
|
{
|
|
[d replaceBytesInRange: NSMakeRange(i, j)
|
|
withBytes: NULL
|
|
length: 0];
|
|
in_meta = found_delimiter = NO;
|
|
}
|
|
}
|
|
}
|
|
|
|
bytes++;
|
|
i++;
|
|
}
|
|
|
|
/*
|
|
* Replace badly formatted void tags
|
|
*
|
|
* A void tag that begins with a slash is considered invalid.
|
|
* We remove the slash from those tags.
|
|
*
|
|
* Ex: </br> is replaced by <br>
|
|
*/
|
|
|
|
if (!theVoidTags)
|
|
{
|
|
/* see http://www.w3.org/TR/html4/index/elements.html */
|
|
theVoidTags = [[[NSArray alloc] initWithObjects: @"area", @"base",
|
|
@"basefont", @"br", @"col", @"frame", @"hr",
|
|
@"img", @"input", @"isindex", @"link",
|
|
@"meta", @"param", @"", nil] autorelease];
|
|
}
|
|
|
|
bytes = [d bytes];
|
|
len = [d length];
|
|
i = 0;
|
|
while (i < len)
|
|
{
|
|
if (i < len-3)
|
|
{
|
|
// Search for ending tags
|
|
if ((*bytes == '<') && (*(bytes+1) == '/'))
|
|
{
|
|
i += 2;
|
|
bytes += 2;
|
|
j = 0;
|
|
found_delimiter = YES;
|
|
|
|
while (*(bytes+j) != '>')
|
|
{
|
|
j++;
|
|
if ((i+j) >= len)
|
|
{
|
|
found_delimiter = NO;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found_delimiter && j > 0)
|
|
{
|
|
// Copy the ending tag to a NSString
|
|
buf = malloc((j+1) * sizeof(char));
|
|
memset (buf, 0, j+1);
|
|
memcpy (buf, bytes, j);
|
|
found_tag = [NSString stringWithCString: buf encoding: NSUTF8StringEncoding];
|
|
|
|
tags = [theVoidTags objectEnumerator];
|
|
tag = [tags nextObject];
|
|
while (tag && found_tag)
|
|
{
|
|
if ([tag caseInsensitiveCompare: found_tag] == NSOrderedSame)
|
|
{
|
|
// Remove the leading slash
|
|
//NSLog(@"Found void tag with invalid leading slash: </%@>", found_tag);
|
|
i--;
|
|
[d replaceBytesInRange: NSMakeRange(i, 1)
|
|
withBytes: NULL
|
|
length: 0];
|
|
bytes = [d bytes];
|
|
bytes += i;
|
|
len = [d length];
|
|
break;
|
|
}
|
|
tag = [tags nextObject];
|
|
}
|
|
free(buf);
|
|
|
|
// Continue the parsing after end tag
|
|
i += j;
|
|
bytes += j;
|
|
}
|
|
}
|
|
}
|
|
|
|
bytes++;
|
|
i++;
|
|
}
|
|
|
|
return d;
|
|
}
|
|
|
|
@end
|