/* NSData+Mail.m - this file is part of SOGo * * Copyright (C) 2007-2017 Inverse inc. * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #import #import #import #import #import #import #import "NSData+Mail.h" @implementation NSData (SOGoMailUtilities) - (NSData *) bodyDataFromEncoding: (NSString *) encoding { NSString *realEncoding; NSData *decodedData; if ([encoding length] > 0) { realEncoding = [encoding lowercaseString]; if ([realEncoding isEqualToString: @"7bit"] || [realEncoding isEqualToString: @"8bit"] || [realEncoding isEqualToString: @"binary"]) decodedData = self; else if ([realEncoding isEqualToString: @"base64"]) decodedData = [self dataByDecodingBase64]; else if ([realEncoding isEqualToString: @"quoted-printable"]) decodedData = [self dataByDecodingQuotedPrintableTransferEncoding]; else { decodedData = nil; //NSLog (@"encoding '%@' unknown, returning nil data", realEncoding); } } else decodedData = self; return decodedData; } - (NSString *) bodyStringFromCharset: (NSString *) charset { NSString *lcCharset, *bodyString; if ([charset length]) lcCharset = [charset lowercaseString]; else lcCharset = @"us-ascii"; bodyString = [NSString stringWithData: self usingEncodingNamed: lcCharset]; if (![bodyString length]) { /* UTF-8 is used as a 8bit fallback charset... */ bodyString = [[NSString alloc] initWithData: self encoding: NSUTF8StringEncoding]; [bodyString autorelease]; } if (!bodyString) { /* iCalendar invitations sent by Outlook 2002 have the annoying bug that the mail states an UTF-8 content encoding but the actual iCalendar content is encoding in Latin-1 (or Windows Western?). As a result the content decoding will fail (TODO: always?). In this case we try to decode with Latin-1. Note: we could check for the Outlook x-mailer, but it was considered better to try Latin-1 as a fallback in any case (be tolerant). */ bodyString = [[NSString alloc] initWithData: self encoding: NSISOLatin1StringEncoding]; if (!bodyString) [self errorWithFormat: @"an attempt to use" @" NSISOLatin1StringEncoding as callback failed"]; [bodyString autorelease]; } return bodyString; } /* * Excpected form is: "=?charset?encoding?encoded text?=". */ - (NSString *) decodedHeader { const char *cData; unsigned int len, i, j; NSString *decodedString; cData = [self bytes]; len = [self length]; decodedString = nil; if (len) { if (len > 6) { // Find beginning of encoded text i = 1; while ((*cData != '=' || *(cData+1) != '?') && i < len) { cData++; i++; } if (*cData == '=' && *(cData+1) == '?') { NSString *enc; if (i > 1) decodedString = [[[NSString alloc] initWithData: [self subdataWithRange: NSMakeRange(0, (i-1))] encoding: NSASCIIStringEncoding] autorelease]; cData += 2; // skip "=?" i++; j = i; // Find next "?" while (*cData != '?' && j < len) { cData++; j++; } enc = [[[NSString alloc] initWithData:[self subdataWithRange: NSMakeRange(i, j-i)] encoding: NSASCIIStringEncoding] autorelease]; i = j + 3; // skip "?q?" if (i < (len-2)) { NSData *d; BOOL isQuotedPrintable = NO; cData++; // We check if we have a QP or Base64 encoding if (*cData == 'q' || *cData == 'Q') isQuotedPrintable = YES; // Find end of encoded text j = i; cData += 2; // skip "q?" while ((*cData != '?' || *(cData+1) != '=') && (j+1) < len) { cData++; j++; } d = [self subdataWithRange: NSMakeRange(i, j-i)]; if (isQuotedPrintable) d = [d dataByDecodingQuotedPrintable]; else d = [d dataByDecodingBase64]; if (decodedString) { decodedString = [NSString stringWithFormat: @"%@%@", decodedString, [NSString stringWithData: d usingEncodingNamed: enc]]; } else decodedString = [NSString stringWithData: d usingEncodingNamed: enc]; j += 2; // skip "?=" if (j < len) { // Recursively decode the remaining part decodedString = [NSString stringWithFormat: @"%@%@", decodedString, [[self subdataWithRange: NSMakeRange(j, len-j)] decodedHeader]]; } } else decodedString = nil; } } if (!decodedString) { decodedString = [[NSString alloc] initWithData: self encoding: NSUTF8StringEncoding]; if (!decodedString) decodedString = [[NSString alloc] initWithData: self encoding: NSISOLatin1StringEncoding]; [decodedString autorelease]; } } else decodedString = @""; return decodedString; } // // In order to avoid a libxml bug/limitation, we strip the charset= parameter // to avoid libxml to consider the charset= parameter while it works in UTF-8 // internally, all the time. // // A fix was commited by Daniel Veillard following discussions Inverse had // with him on the issue: // // commit a1bc2f2ba4b5317885205d4f71c7c4b1c99ec870 // Author: Daniel Veillard // Date: Mon May 16 16:03:50 2011 +0800 // // Add options to ignore the internal encoding // // For both XML and HTML, the document can provide an encoding // either in XMLDecl in XML, or as a meta element in HTML head. // This adds options to ignore those encodings if the encoding // is known in advace for example if the content had been converted // before being passed to the parser. // // * parser.c include/libxml/parser.h: add XML_PARSE_IGNORE_ENC option // for XML parsing // * include/libxml/HTMLparser.h HTMLparser.c: adds the // HTML_PARSE_IGNORE_ENC for HTML parsing // * HTMLtree.c: fix the handling of saving when an unknown encoding is // defined in meta document header // * xmllint.c: add a --noenc option to activate the new parser options // // - (NSData *) sanitizedContentUsingVoidTags: (NSArray *) theVoidTags { NSMutableData *d; NSString *found_tag, *tag; NSEnumerator *tags; const char *bytes; char *buf; int i, j, len; BOOL found_delimiter, in_meta; d = [NSMutableData dataWithData: self]; bytes = [d bytes]; len = [d length]; i = 0; in_meta = NO; while (i < len) { // We check if we see in which case, we substitute de charset= stuff. if (i < len-5) { if ((*bytes == '<') && (*(bytes+1) == 'm' || *(bytes+1) == 'M') && (*(bytes+2) == 'e' || *(bytes+2) == 'E') && (*(bytes+3) == 't' || *(bytes+3) == 'T') && (*(bytes+4) == 'a' || *(bytes+4) == 'A') && (*(bytes+5) == ' ')) in_meta = YES; } // We search for something like : // // // if (in_meta && i < len-9) { if ((*bytes == 'c' || *bytes == 'C') && (*(bytes+1) == 'h' || *(bytes+1) == 'H') && (*(bytes+2) == 'a' || *(bytes+2) == 'A') && (*(bytes+3) == 'r' || *(bytes+3) == 'R') && (*(bytes+4) == 's' || *(bytes+4) == 'S') && (*(bytes+5) == 'e' || *(bytes+5) == 'E') && (*(bytes+6) == 't' || *(bytes+6) == 'T') && (*(bytes+7) == '=')) { // We search until we find a '"' or a space j = 8; found_delimiter = YES; while (*(bytes+j) != ' ' && *(bytes+j) != '"' && *(bytes+j) != '\'') { j++; // We haven't found anything, let's return the data untouched if ((i+j) >= len) { in_meta = found_delimiter = NO; break; } } if (found_delimiter) { [d replaceBytesInRange: NSMakeRange(i, j) withBytes: NULL length: 0]; in_meta = found_delimiter = NO; } } } bytes++; i++; } /* * Replace badly formatted void tags * * A void tag that begins with a slash is considered invalid. * We remove the slash from those tags. * * Ex:
is replaced by
*/ if (!theVoidTags) { /* see http://www.w3.org/TR/html4/index/elements.html */ theVoidTags = [[[NSArray alloc] initWithObjects: @"area", @"base", @"basefont", @"br", @"col", @"frame", @"hr", @"img", @"input", @"isindex", @"link", @"meta", @"param", @"", nil] autorelease]; } bytes = [d bytes]; len = [d length]; i = 0; while (i < len) { if (i < len-3) { // Search for ending tags if ((*bytes == '<') && (*(bytes+1) == '/')) { i += 2; bytes += 2; j = 0; found_delimiter = YES; while (*(bytes+j) != '>') { j++; if ((i+j) >= len) { found_delimiter = NO; break; } } if (found_delimiter && j > 0) { // Copy the ending tag to a NSString buf = malloc((j+1) * sizeof(char)); memset (buf, 0, j+1); memcpy (buf, bytes, j); found_tag = [NSString stringWithCString: buf encoding: NSUTF8StringEncoding]; tags = [theVoidTags objectEnumerator]; tag = [tags nextObject]; while (tag && found_tag) { if ([tag caseInsensitiveCompare: found_tag] == NSOrderedSame) { // Remove the leading slash //NSLog(@"Found void tag with invalid leading slash: ", found_tag); i--; [d replaceBytesInRange: NSMakeRange(i, 1) withBytes: NULL length: 0]; bytes = [d bytes]; bytes += i; len = [d length]; break; } tag = [tags nextObject]; } free(buf); // Continue the parsing after end tag i += j; bytes += j; } } } bytes++; i++; } return d; } @end