diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index 5ba1c0721..e170cccf6 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -31,7 +31,7 @@ #define DEFAULT_CHARSET 1 #define FONTNAME_LEN_MAX 100 - +#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F // // Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details. // @@ -911,8 +911,13 @@ const unsigned short ansicpg874[256] = { RTFStack *stack; const unsigned short *default_charset; - char c; - + + // convenience variables for parsing + unsigned char c; + NSData *d; + NSString *s; + unichar uch; + stack = [[RTFStack alloc] init]; fontTable = nil; colorTable = nil; @@ -921,7 +926,6 @@ const unsigned short ansicpg874[256] = { _html = [[NSMutableData alloc] init]; [_html appendBytes: "" length: 34]; - // Check if we got RTF data @@ -938,14 +942,11 @@ const unsigned short ansicpg874[256] = { { unsigned int len; const char *cw; - NSString *s; char nextByte = *(_bytes+1); if (nextByte == '\'') { // A hexadecimal value, based on the specified character set (may be used to identify 8-bit values). - NSData *d; - const char *b1, *b2; unsigned short index; @@ -1073,26 +1074,26 @@ const unsigned short ansicpg874[256] = { [_html appendBytes: v length: strlen(v)]; free(v); } - else if ([s hasPrefix: @"fcs"]) - { - // ignore - } - else if ([s hasPrefix: @"fs"]) - { - // ignore - } - else if ([s hasPrefix: @"fbidis"]) - { - // ignore - } - else if ([s hasPrefix: @"fromhtml"]) - { - // ignore - } - else if ([s hasPrefix: @"fromtext"]) - { - // ignore - } + // else if ([s hasPrefix: @"fcs"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fs"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fbidis"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fromhtml"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fromtext"]) + // { + // // ignore + // } else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) { RTFFontInfo *fontInfo; @@ -1183,17 +1184,14 @@ const unsigned short ansicpg874[256] = { else if ([s hasPrefix: @"u"] && [s length] > 1 && (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) { - NSData *d; - unichar ch; int arg; - arg = [[s substringFromIndex: 1] intValue]; if (arg < 0) // a negative value means a value greater than 32767 arg = 32767 - arg; - ch = (unichar) arg; - s = [NSString stringWithCharacters: &ch length: 1]; + uch = (unichar) arg; + s = [NSString stringWithCharacters: &uch length: 1]; d = [s dataUsingEncoding: NSUTF8StringEncoding]; [_html appendData: d]; } @@ -1274,11 +1272,22 @@ const unsigned short ansicpg874[256] = { } else { + c = *_bytes; // We avoid appending NULL bytes or endlines - if (*_bytes && (*_bytes != '\n')) + if (c && (c != '\n')) { - /* end lines are not part of rtf */ - [_html appendBytes: _bytes length: 1]; + if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT) + { + // in this case utf8 and ascii encoding are the same + [_html appendBytes: &c length: 1]; + } + else + { + uch = c; + s = [NSString stringWithCharacters: &uch length: 1]; + d = [s dataUsingEncoding: NSUTF8StringEncoding]; + [_html appendData: d]; + } } ADVANCE; }