Append tags when using a sanitizerContentHandler

pull/17/head
Francis Lachapelle 2014-01-06 14:20:47 -05:00
parent 586d66b113
commit b716331d8e
1 changed files with 128 additions and 75 deletions

View File

@ -57,6 +57,7 @@
NSArray *ignoreContentTags;
NSArray *specialTreatmentTags;
NSArray *voidTags;
BOOL ignoreContent;
BOOL orderedList;
@ -73,6 +74,7 @@
- (void) setIgnoreContentTags: (NSArray *) theTags;
- (void) setSpecialTreatmentTags: (NSArray *) theTags;
- (void) setVoidTags: (NSArray *) theTags;
- (void) setImages: (NSMutableArray *) theImages;
@end
@ -83,16 +85,14 @@
{
if ((self = [super init]))
{
ignoreContentTags = [NSArray arrayWithObjects: @"head", @"script",
@"style", nil];
specialTreatmentTags = [NSArray arrayWithObjects: @"body", @"p", @"ul",
@"li", @"table", @"tr", @"td", @"th",
@"br", @"hr", @"dt", @"dd", nil];
images = nil;
ignoreContentTags = nil;
specialTreatmentTags = nil;
[ignoreContentTags retain];
[specialTreatmentTags retain];
ignoreContent = NO;
images = nil;
result = nil;
orderedList = NO;
@ -110,6 +110,12 @@
if (!htmlToTextContentHandler)
htmlToTextContentHandler = [self new];
[htmlToTextContentHandler setIgnoreContentTags: [NSArray arrayWithObjects: @"head", @"script",
@"style", nil]];
[htmlToTextContentHandler setSpecialTreatmentTags: [NSArray arrayWithObjects: @"p", @"ul",
@"li", @"table", @"tr", @"td", @"th",
@"br", @"hr", @"dt", @"dd", nil]];
return htmlToTextContentHandler;
}
@ -120,6 +126,11 @@
if (!sanitizerContentHandler)
sanitizerContentHandler = [self new];
[sanitizerContentHandler setVoidTags: [NSArray arrayWithObjects: @"area", @"base",
@"basefont", @"br", @"col", @"frame", @"hr",
@"img", @"input", @"isindex", @"link",
@"meta", @"param", @"", nil]];
return sanitizerContentHandler;
}
@ -157,6 +168,11 @@
ASSIGN(specialTreatmentTags, theTags);
}
- (void) setVoidTags: (NSArray *) theTags
{
ASSIGN(voidTags, theTags);
}
//
// We MUST NOT retain the array here
//
@ -264,78 +280,106 @@
else if ([specialTreatmentTags containsObject: tagName])
[self _startSpecialTreatment: tagName];
}
else if ([tagName isEqualToString: @"img"])
else
{
NSString *value;
value = [attributes valueForRawName: @"src"];
//
// Check for Data URI Scheme
//
// data:[<MIME-type>][;charset=<encoding>][;base64],<data>
//
if ([value length] > 5 && [[value substringToIndex: 5] caseInsensitiveCompare: @"data:"] == NSOrderedSame)
if ([tagName isEqualToString: @"img"])
{
NSString *uniqueId, *mimeType, *encoding, *charset;
NGMimeBodyPart *bodyPart;
NGMutableHashMap *map;
NSData *data;
id body;
NSString *value;
int i, j, k;
i = [value indexOf: ';'];
j = [value indexOf: ';' fromIndex: i+1];
k = [value indexOf: ','];
// We try to get the MIME type
mimeType = nil;
value = [attributes valueForRawName: @"src"];
if (i > 5 && i < k)
//
// Check for Data URI Scheme
//
// data:[<MIME-type>][;charset=<encoding>][;base64],<data>
//
if ([value length] > 5 && [[value substringToIndex: 5] caseInsensitiveCompare: @"data:"] == NSOrderedSame)
{
mimeType = [value substringWithRange: NSMakeRange(5, i-5)];
NSString *uniqueId, *mimeType, *encoding;
NGMimeBodyPart *bodyPart;
NGMutableHashMap *map;
NSData *data;
id body;
int i, j, k;
i = [value indexOf: ';'];
j = [value indexOf: ';' fromIndex: i+1];
k = [value indexOf: ','];
// We try to get the MIME type
mimeType = nil;
if (i > 5 && i < k)
{
mimeType = [value substringWithRange: NSMakeRange(5, i-5)];
}
else
i = 5;
// We might get a stupid value. We discard anything that doesn't have a / in it
if ([mimeType indexOf: '/'] < 0)
mimeType = @"image/jpeg";
// We check and skip the charset
if (j < i)
j = i;
// We check the encoding and we completely ignore it
encoding = [value substringWithRange: NSMakeRange(j+1, k-j-1)];
if (![encoding length])
encoding = @"base64";
data = [[value substringFromIndex: k+1] dataUsingEncoding: NSASCIIStringEncoding];
uniqueId = [SOGoObject globallyUniqueObjectId];
map = [[[NGMutableHashMap alloc] initWithCapacity:5] autorelease];
[map setObject: encoding forKey: @"content-transfer-encoding"];
[map setObject:[NSNumber numberWithInt:[data length]] forKey: @"content-length"];
[map setObject: [NSString stringWithFormat: @"inline; filename=\"%@\"", uniqueId] forKey: @"content-disposition"];
[map setObject: [NSString stringWithFormat: @"%@; name=\"%@\"", mimeType, uniqueId] forKey: @"content-type"];
[map setObject: [NSString stringWithFormat: @"<%@>", uniqueId] forKey: @"content-id"];
body = [[NGMimeFileData alloc] initWithBytes: [data bytes] length: [data length]];
bodyPart = [[[NGMimeBodyPart alloc] initWithHeader:map] autorelease];
[bodyPart setBody: body];
[body release];
[images addObject: bodyPart];
[result appendFormat: @"<img src=\"cid:%@\" type=\"%@\"/>", uniqueId, mimeType];
}
else
i = 5;
}
else if (voidTags)
{
NSString *type;
int i;
// We might get a stupid value. We discard anything that doesn't have a / in it
if ([mimeType indexOf: '/'] < 0)
mimeType = @"image/jpeg";
// We check and skip the charset
if (j > i)
charset = [value substringWithRange: NSMakeRange(i+1, j-i-1)];
else
j = i;
[result appendString: @"<"];
[result appendString: rawName];
for (i = 0; i < [attributes count]; i++)
{
[result appendString: @" "];
[result appendString: [attributes nameAtIndex: i]];
[result appendString: @"='"];
[result appendString: [attributes valueAtIndex: i]];
[result appendString: @"'"];
// We check the encoding and we completely ignore it
encoding = [value substringWithRange: NSMakeRange(j+1, k-j-1)];
if (![encoding length])
encoding = @"base64";
data = [[value substringFromIndex: k+1] dataUsingEncoding: NSASCIIStringEncoding];
uniqueId = [SOGoObject globallyUniqueObjectId];
map = [[[NGMutableHashMap alloc] initWithCapacity:5] autorelease];
[map setObject: encoding forKey: @"content-transfer-encoding"];
[map setObject:[NSNumber numberWithInt:[data length]] forKey: @"content-length"];
[map setObject: [NSString stringWithFormat: @"inline; filename=\"%@\"", uniqueId] forKey: @"content-disposition"];
[map setObject: [NSString stringWithFormat: @"%@; name=\"%@\"", mimeType, uniqueId] forKey: @"content-type"];
[map setObject: [NSString stringWithFormat: @"<%@>", uniqueId] forKey: @"content-id"];
body = [[NGMimeFileData alloc] initWithBytes: [data bytes] length: [data length]];
bodyPart = [[[NGMimeBodyPart alloc] initWithHeader:map] autorelease];
[bodyPart setBody: body];
[body release];
[images addObject: bodyPart];
[result appendFormat: @"<img src=\"cid:%@\" type=\"%@\">", uniqueId, mimeType];
type = [attributes typeAtIndex: i];
if (![type isEqualToString: @"CDATA"])
{
[result appendString: @"["];
[result appendString: type];
[result appendString: @"]"];
}
}
if ([voidTags containsObject: tagName])
[result appendString: @"/"];
[result appendString: @">"];
}
}
}
@ -348,13 +392,22 @@
showWhoWeAre();
if (ignoreContent && ignoreContentTags && specialTreatmentTags)
if (ignoreContentTags && specialTreatmentTags)
{
if (ignoreContent)
{
tagName = [rawName lowercaseString];
if ([ignoreContentTags containsObject: tagName])
ignoreContent = NO;
else if ([specialTreatmentTags containsObject: tagName])
[self _endSpecialTreatment: tagName];
}
}
else if (voidTags)
{
tagName = [rawName lowercaseString];
if ([ignoreContentTags containsObject: tagName])
ignoreContent = NO;
else if ([specialTreatmentTags containsObject: tagName])
[self _endSpecialTreatment: tagName];
if (![voidTags containsObject: tagName])
[result appendFormat: @"</%@>", rawName];
}
}