From fa4d3a0e20a64a005b1114e20c1e81083374c1dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Mon, 9 Nov 2015 07:59:13 +0100 Subject: [PATCH 1/7] Reimplementation of [RTFHandler parseFontTable] --- OpenChange/RTFHandler.m | 280 ++++++++++++++++++++++++++++++---------- 1 file changed, 214 insertions(+), 66 deletions(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index 0df3f1fd3..f02411884 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -29,6 +29,8 @@ #define ADVANCE_N(N) _bytes += (N); _current_pos += (N); #define REWIND _bytes--; _current_pos--; +#define DEFAULT_CHARSET 1 +#define FONTNAME_LEN_MAX 100 // // Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details. @@ -282,6 +284,7 @@ const unsigned short ansicpg874[256] = { { } + return self; } @@ -332,6 +335,23 @@ const unsigned short ansicpg874[256] = { return NSMapGet(fontInfos, key); } +- (NSString *) description +{ + NSMutableString *description; + NSEnumerator *enumerator; + RTFFontInfo *fontInfo; + + description = [NSMutableString stringWithFormat: @"Number of fonts: %u\n", [fontInfos count]]; + enumerator = [fontInfos objectEnumerator]; + while ((fontInfo = [enumerator nextObject])) + { + [description appendString: [fontInfo description]]; + [description appendString: @"\n"]; + } + + return description; +} + @end // @@ -455,6 +475,87 @@ const unsigned short ansicpg874[256] = { return start+1; } + +- (const char *) parseControlWordAndSetLenIn: (unsigned int *) len + setHasIntArgumentIn: (BOOL *) hasArg + setIntArgumentIn: (int *) arg +{ + const char *start; + const char *end = NULL; + const char *startArg = NULL; + const char *endArg = NULL; + + ADVANCE; + start = _bytes; + + /* + A control word is defined by: + + \ + */ + while (isalpha(*_bytes)) + { + end = _bytes; + ADVANCE; + } + + if (end == NULL) + { + return NULL; + } + + /* + The can be one of the following: + + - A space. This serves only to delimit a control word and is + ignored in subsequent processing. + + - A numeric digit or an ASCII minus sign (-), which indicates + that a numeric parameter is associated with the control word. + Only this case requires to include it in the control word. + + - Any character other than a letter or a digit + */ + + if (*_bytes == '-' || isdigit(*_bytes)) + { + startArg = _bytes; + endArg = _bytes; + ADVANCE; + while (isdigit(*_bytes)) + { + endArg = _bytes; + ADVANCE; + } + } + + *hasArg = NO; + *arg = 0; + if (startArg) + { + NSString *s; + unsigned int argLength = endArg - startArg + 1; + // the next guard is to protect against a single '-' + if (argLength > 1 || (*startArg != '-')) + { + s = [[NSString alloc] initWithBytesNoCopy: (void *) startArg + length: argLength + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + [s autorelease]; + *hasArg = YES; + *arg = [s intValue]; // Warning: it does not detect conversion errors + } + } + + + /* In other cases, the delimiting character terminates the control + word and is not part of the control word. */ + + + *len = end - start + 1; + return start; +} // // {\colortbl\red0\green0\blue0;\red128\green0\blue0;\red255\green0\blue0;} @@ -531,107 +632,154 @@ const unsigned short ansicpg874[256] = { // - (RTFFontTable *) parseFontTable { - NSMutableString *fontName; RTFFontTable *fontTable; RTFFontInfo *fontInfo; - unsigned int count; + unsigned int level; fontTable = [[[RTFFontTable alloc] init] autorelease]; - fontName = nil; fontInfo = nil; - count = 0; + level = 0; do { if (*_bytes == '{') { - if (fontTable) + if (fontTable && level == 1) { fontInfo = [[[RTFFontInfo alloc] init] autorelease]; - fontName = [[[NSMutableString alloc] init] autorelease]; } ADVANCE; - count++; + level++; } else if (*_bytes == '}') { - if (fontTable) //&& ![NSAllMapTableValues(fontTable->fontInfos) containsObject: fontInfo]) + if (fontTable && level == 2) //&& ![NSAllMapTableValues(fontTable->fontInfos) containsObject: fontInfo]) { - ASSIGN(fontInfo->name, fontName); [fontTable addFontInfo: fontInfo atIndex: fontInfo->index]; } ADVANCE; - count--; + level--; } else if (*_bytes == '\\') { const char *cw; unsigned int len; - NSString *s; + BOOL hasArg; + int arg; - cw = [self parseControlWord: &len]; - - // Skip our control word - if (strncmp((const char*)cw, "fonttbl", len) == 0) + cw = [self parseControlWordAndSetLenIn: &len + setHasIntArgumentIn: &hasArg + setIntArgumentIn: &arg]; + if (level != 2) + continue; + else if (cw == NULL) continue; - // We must at least parse - s = [[NSString alloc] initWithBytesNoCopy: (void *)cw+1 - length: len-1 - encoding: NSASCIIStringEncoding - freeWhenDone: NO]; - [s autorelease]; - - // If we got a fontnum, let's parse all three fields at once) - if (isdigit(*(cw+1))) + if (len == 1) { - fontInfo->index = [s intValue]; - - // We now parse - cw = [self parseControlWord: &len]; - if (len == 0) // Possibly parsing a space - cw = [self parseControlWord: &len]; - - fontInfo->family = [[NSString alloc] initWithBytesNoCopy: (void *)cw+1 - length: len-1 - encoding: NSASCIIStringEncoding - freeWhenDone: NO]; - - cw = [self parseControlWord: &len]; - if (len == 0) // Possibly parsing a space - cw = [self parseControlWord: &len]; - - fontInfo->charset = [[NSString alloc] initWithBytesNoCopy: (void *)cw+1 - length: len-1 - encoding: NSASCIIStringEncoding - freeWhenDone: NO]; - - // We now skip everything until we find our final group closer ('}') - int cc = 1; - - do + if (strncmp((const char*) cw, "f", len) == 0) { - if (*_bytes == '{') - cc++; - if (*_bytes == '}') - cc--; - - ADVANCE; + if (hasArg) + fontInfo->index = arg; } - while (cc != 0); - - // move back our buffer; - REWIND; + } - } - else + else if (len == 4) + { + if (strncmp((const char*) cw, "fnil", len) == 0) + { + fontInfo->family = @"nil"; + } + else if (strncmp((const char*) cw, "fprq", len) == 0) + { + if (hasArg) + fontInfo->pitch = arg; + } + } + else if (len == 5) + { + if (strncmp((const char*) cw, "fbidi", len) == 0) + { + fontInfo->family = @"bidi"; + } + else if (strncmp((const char*) cw, "ftech", len) == 0) + { + fontInfo->family = @"tech"; + } + } + else if (len == 6) + { + if (strncmp((const char*) cw, "froman", len) == 0) + { + fontInfo->family = @"roman"; + } + else if (strncmp((const char*) cw, "fswiss", len) == 0) + { + fontInfo->family = @"swiss"; + } + else if (strncmp((const char*) cw, "fdecor", len) == 0) + { + fontInfo->family = @"decor"; + } + } + else if (len == 7) + { + if (strncmp((const char*) cw, "fmodern", len) == 0) + { + fontInfo->family = @"modern"; + } + } + else if (len == 8) + { + if (strncmp((const char* ) cw, "fcharset", len) == 0) + { + if (hasArg) + fontInfo->charset = [[NSString alloc] initWithFormat: @"%i", arg]; + } + else if (strncmp((const char*) cw, "fscript", len) == 0) + { + fontInfo->family = @"fscript"; + } + } + } + else // no char { - if (isalnum(*_bytes)) - [fontName appendFormat: @"%c", *_bytes]; + if (level == 2 && isalnum(*_bytes)) + { + // we assume this is the fontname + unsigned int fontnameLen; + const char *delim = strpbrk(_bytes, ";{}\\"); + if (delim == NULL) + { + // no delimiter found, we skip to next characters + ADVANCE; + continue; + } + fontnameLen = delim - _bytes; + // only valid if the delimiter is a correct ';' + if (*delim == ';') + { + // there is no explicit limit length but we took 100 + // as protection + if (delim && fontnameLen <= FONTNAME_LEN_MAX) + { + fontInfo->name = [[NSString alloc] initWithBytesNoCopy: (char *) _bytes + length: fontnameLen + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + ADVANCE_N(fontnameLen); + } + } + else { + // advance just before the special character + ADVANCE_N(fontnameLen - 1); + } + } ADVANCE; - } - } while (count != 0); + } + + } while (level > 0); return fontTable; } @@ -791,7 +939,7 @@ const unsigned short ansicpg874[256] = { REWIND; } else if (strncmp(cw, "stylesheet", 10) == 0) - { + { _bytes = cw-2; _current_pos -= 12; // Length: {\stylesheet [self parseStyleSheet]; From 01dcf3d6c8835f1860b429bab8bea4c5ee77ad22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Tue, 10 Nov 2015 12:50:22 +0100 Subject: [PATCH 2/7] Fixed activation of characters sets in [RTFHandler parse] --- OpenChange/RTFHandler.h | 3 +- OpenChange/RTFHandler.m | 124 +++++++++++++++++++++++++++++++++------- 2 files changed, 104 insertions(+), 23 deletions(-) diff --git a/OpenChange/RTFHandler.h b/OpenChange/RTFHandler.h index 52567cc29..b0ea0b684 100644 --- a/OpenChange/RTFHandler.h +++ b/OpenChange/RTFHandler.h @@ -67,6 +67,7 @@ int font_index; int color_index; int start_pos; + const unsigned short *charset; } @end @@ -77,7 +78,7 @@ { @public NSString *family; - NSString *charset; + unsigned char charset; NSString *name; unsigned int pitch; unsigned int index; diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index f02411884..bd2051298 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -231,7 +231,7 @@ const unsigned short ansicpg874[256] = { - (void) dealloc { - RELEASE(a); + [a release]; [super dealloc]; } @@ -246,7 +246,7 @@ const unsigned short ansicpg874[256] = { if ([a count]) { - o = AUTORELEASE([[a lastObject] retain]); + o = [[[a lastObject] retain] autorelease]; [a removeLastObject]; } @@ -259,7 +259,7 @@ const unsigned short ansicpg874[256] = { if ([a count]) { - o = AUTORELEASE([[a lastObject] retain]); + o = [[[a lastObject] retain] autorelease]; } return o; @@ -285,17 +285,27 @@ const unsigned short ansicpg874[256] = { } + charset = DEFAULT_CHARSET; return self; } - (void) dealloc { - RELEASE(family); - RELEASE(charset); - RELEASE(name); + [family release]; + [name release]; [super dealloc]; } +- (NSString *) description +{ + NSString *description; + description = [NSString stringWithFormat: + @"%u name=%@ family=%@ charset=%u pitch=%u", + index, name, family, charset, pitch + ]; + return description; +} + @end // @@ -377,7 +387,7 @@ const unsigned short ansicpg874[256] = { - (void) dealloc { - RELEASE(colorDefs); + [colorDefs release]; [super dealloc]; } @@ -408,16 +418,55 @@ const unsigned short ansicpg874[256] = { _current_pos = 0; _charsets = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 10); + // 238 — Eastern European - cpg1250 NSMapInsert(_charsets, @"ansicpg1250", ansicpg1250); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 238], ansicpg1250); + // 204 — Russian - cpg1251 NSMapInsert(_charsets, @"ansicpg1251", ansicpg1251); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 204], ansicpg1251); + // 0 - Latin 1 - cpg1252 - also know as ANSI + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 0], ansicpg1252); NSMapInsert(_charsets, @"ansicpg1252", ansicpg1252); + // 161 - Greek cpg1253 NSMapInsert(_charsets, @"ansicpg1253", ansicpg1253); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 161], ansicpg1253); + // 162 — Turkish - cpg1254 NSMapInsert(_charsets, @"ansicpg1254", ansicpg1254); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 162], ansicpg1254); + // 177 — Hebrew Traditional - cpg1255 + // also 181 - Hebrew user NSMapInsert(_charsets, @"ansicpg1255", ansicpg1255); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 177], ansicpg1255); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 181], ansicpg1255); + // 178 — Arabic - cpg1256 + // also 179 - Arabic traditional + // also 180 - Arabic User NSMapInsert(_charsets, @"ansicpg1256", ansicpg1256); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 178], ansicpg1256); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 179], ansicpg1256); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 180], ansicpg1256); + // 186 — Baltic - pg 1257 NSMapInsert(_charsets, @"ansicpg1257", ansicpg1257); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 186], ansicpg1257); + // 163 — Vietnamese - pg1259 NSMapInsert(_charsets, @"ansicpg1258", ansicpg1258); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 163], ansicpg1258); + // 222 — Thai - cpg874 NSMapInsert(_charsets, @"ansicpg874", ansicpg874); + NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 222], ansicpg874); + + // TODO: check differences between traditional/user/no-qualified for Arabic and Hebrew + // TODO: missing codepage for the following codes: + // 2 — Symbol + // 3 — Invalid + // 77 — Mac + // 128 — Shift Jis + // 129 — Hangul + // 130 — Johab + // 134 — GB2312 + // 136 — Big5 + // 254 — PC 437 + // 255 — OEM } return self; @@ -426,7 +475,7 @@ const unsigned short ansicpg874[256] = { - (void) dealloc { NSFreeMapTable(_charsets); - RELEASE(_data); + [_data release]; [super dealloc]; } @@ -734,8 +783,11 @@ const unsigned short ansicpg874[256] = { { if (strncmp((const char* ) cw, "fcharset", len) == 0) { - if (hasArg) - fontInfo->charset = [[NSString alloc] initWithFormat: @"%i", arg]; + if (hasArg) + { + fontInfo->charset = arg; + } + } else if (strncmp((const char*) cw, "fscript", len) == 0) { @@ -858,13 +910,13 @@ const unsigned short ansicpg874[256] = { RTFFontTable *fontTable; RTFStack *stack; - unsigned short *charset; + const unsigned short *default_charset; char c; stack = [[RTFStack alloc] init]; fontTable = nil; colorTable = nil; - charset = NULL; + default_charset = ansicpg1252; formattingOptions = nil; _html = [[NSMutableData alloc] init]; @@ -873,8 +925,9 @@ const unsigned short ansicpg874[256] = { // Check if we got RTF data + // this does not allow \s\n before '}' neither newline before control command if (_len > 4 && strncmp((const char*)_bytes, "{\\rtf", 4) != 0) - return NO; + return nil; while (_current_pos < _len) { @@ -887,7 +940,7 @@ const unsigned short ansicpg874[256] = { const char *cw; NSString *s; - if (*(_bytes+1) == '\'' && charset) + if (*(_bytes+1) == '\'') { // A hexadecimal value, based on the specified character set (may be used to identify 8-bit values). NSString *s; @@ -895,6 +948,14 @@ const unsigned short ansicpg874[256] = { const char *b1, *b2; unsigned short index; + + const unsigned short * active_charset; + + if (formattingOptions && formattingOptions->charset) + active_charset = formattingOptions->charset; + else + active_charset = default_charset; + ADVANCE; ADVANCE; @@ -905,7 +966,7 @@ const unsigned short ansicpg874[256] = { index = (isdigit(*b1) ? *b1 - 48 : toupper(*b1) - 55) * 16; index += (isdigit(*b2) ? *b2 - 48 : toupper(*b2) - 55); - s = [NSString stringWithCharacters: &(charset[index]) length: 1]; + s = [NSString stringWithCharacters: &(active_charset[index]) length: 1]; d = [s dataUsingEncoding: NSUTF8StringEncoding]; [_html appendData: d]; continue; @@ -924,9 +985,10 @@ const unsigned short ansicpg874[256] = { freeWhenDone: NO]; [s autorelease]; + // todo: This keyword should be emitted in the RTF header section right after the \ansi, \mac, \pc or \pca keyword. if (strncmp(cw, "ansicpg", 7) == 0) { - charset = NSMapGet(_charsets, s); + default_charset = NSMapGet(_charsets, s); } else if (strncmp(cw, "fonttbl", 7) == 0) { @@ -1055,6 +1117,18 @@ const unsigned short ansicpg874[256] = { v = calloc(7, sizeof(char)); sprintf(v, ""); } + + if (fontInfo && fontInfo->charset) + { + if (fontInfo->charset == 1) + /* charset 1 is default charset */ + formattingOptions->charset = NULL; + else { + NSNumber *key = [NSNumber numberWithUnsignedChar: fontInfo->charset]; + formattingOptions->charset = NSMapGet(_charsets, key); + } + } + [_html appendBytes: v length: strlen(v)]; free(v); } @@ -1088,6 +1162,7 @@ const unsigned short ansicpg874[256] = { } else if ([s hasPrefix: @"u"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) { + // XXX TPFOX u argumrnt can be negative NSData *d; unichar ch; @@ -1109,7 +1184,7 @@ const unsigned short ansicpg874[256] = { } // If a space delimits the control word, the space does not appear in the document. - // Any characters following the delimiter, including spaces, will appear in the document. + // Any characters following the delimiter, including spaces, will appear in the document. (except newline!) if (*_bytes == ' ') { ADVANCE; @@ -1126,6 +1201,7 @@ const unsigned short ansicpg874[256] = { formattingOptions->font_index = -1; formattingOptions->color_index = -1; formattingOptions->start_pos = [_html length]; + formattingOptions->charset = default_charset; [stack push: formattingOptions]; ADVANCE; } @@ -1172,17 +1248,21 @@ const unsigned short ansicpg874[256] = { } else { - // We avoid appending NULL bytes - if (*_bytes) - [_html appendBytes: _bytes length: 1]; + /* XXXX TODO add special stick together chars? */ + // We avoid appending NULL bytes or endlines + if (*_bytes && (*_bytes != '\n')) + { + /* end lines are not part of rtf */ + [_html appendBytes: _bytes length: 1]; + } ADVANCE; } } [_html appendBytes: "" length: 14]; - RELEASE(stack); - return AUTORELEASE(_html); + [stack release]; + return [_html autorelease]; } @end From 8e3e4a5445ed8f9e3a61914a7536d32d5f3256b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Thu, 12 Nov 2015 15:39:04 +0100 Subject: [PATCH 3/7] Added RTF escapes parsing --- OpenChange/RTFHandler.m | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index bd2051298..1add58bcc 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -939,11 +939,11 @@ const unsigned short ansicpg874[256] = { unsigned int len; const char *cw; NSString *s; + char nextByte = *(_bytes+1); - if (*(_bytes+1) == '\'') + if (nextByte == '\'') { // A hexadecimal value, based on the specified character set (may be used to identify 8-bit values). - NSString *s; NSData *d; const char *b1, *b2; @@ -971,11 +971,31 @@ const unsigned short ansicpg874[256] = { [_html appendData: d]; continue; } - else if (*(_bytes+1) == '*') + else if (nextByte == '*') { [self parseIgnoringEverything]; continue; } + else if (!isalpha(nextByte)) + { + // escape + character + ADVANCE_N(2); + // check for special escapes for the no-implemented features + // for control of word breaking + if (nextByte == '~') + // no breaking space + nextByte = ' '; + else if (nextByte == '-') + // optional hyphen; we skip it + continue; + else if (nextByte == '_') + // no breaking hyphen, treat it as a normal hyphen + nextByte = '-'; + + [_html appendBytes: &nextByte length: 1]; + continue; + } + cw = [self parseControlWord: &len]; @@ -1248,7 +1268,6 @@ const unsigned short ansicpg874[256] = { } else { - /* XXXX TODO add special stick together chars? */ // We avoid appending NULL bytes or endlines if (*_bytes && (*_bytes != '\n')) { From 2518b37e322ba93879703d9f8aaa6b2fe960211a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Thu, 12 Nov 2015 16:17:31 +0100 Subject: [PATCH 4/7] Fixed RTF support for unicode characters greater than 32767 --- OpenChange/RTFHandler.m | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index 1add58bcc..7cd7ab404 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -1180,13 +1180,19 @@ const unsigned short ansicpg874[256] = { [_html appendBytes: "" length: 9]; formattingOptions->strikethrough = NO; } - else if ([s hasPrefix: @"u"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) + else if ([s hasPrefix: @"u"] && [s length] > 1 && + (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) { - // XXX TPFOX u argumrnt can be negative NSData *d; unichar ch; + int arg; + + arg = [[s substringFromIndex: 1] intValue]; + if (arg < 0) + // a negative value means a value greater than 32767 + arg = 32767 - arg; - ch = (unichar)[[s substringFromIndex: 1] intValue]; + ch = (unichar) arg; s = [NSString stringWithCharacters: &ch length: 1]; d = [s dataUsingEncoding: NSUTF8StringEncoding]; [_html appendData: d]; From 3155bd01726c81110ab82b7de72b55ac8c59c38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Thu, 12 Nov 2015 19:04:31 +0100 Subject: [PATCH 5/7] Fixed bug which created unnecesary font tags --- OpenChange/RTFHandler.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index 7cd7ab404..5ba1c0721 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -1093,7 +1093,7 @@ const unsigned short ansicpg874[256] = { { // ignore } - else if ([s hasPrefix: @"f"] && [s length] > 1) + else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) { RTFFontInfo *fontInfo; int font_index; From 2f63542e07b1b71efbaba1ad5d924b3a52a3fc5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Fri, 13 Nov 2015 12:53:02 +0100 Subject: [PATCH 6/7] Fixed problem with ascii chars that had different unicode value Also cleaned a bit the parse loop --- OpenChange/RTFHandler.m | 79 +++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index 5ba1c0721..e170cccf6 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -31,7 +31,7 @@ #define DEFAULT_CHARSET 1 #define FONTNAME_LEN_MAX 100 - +#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F // // Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details. // @@ -911,8 +911,13 @@ const unsigned short ansicpg874[256] = { RTFStack *stack; const unsigned short *default_charset; - char c; - + + // convenience variables for parsing + unsigned char c; + NSData *d; + NSString *s; + unichar uch; + stack = [[RTFStack alloc] init]; fontTable = nil; colorTable = nil; @@ -921,7 +926,6 @@ const unsigned short ansicpg874[256] = { _html = [[NSMutableData alloc] init]; [_html appendBytes: "" length: 34]; - // Check if we got RTF data @@ -938,14 +942,11 @@ const unsigned short ansicpg874[256] = { { unsigned int len; const char *cw; - NSString *s; char nextByte = *(_bytes+1); if (nextByte == '\'') { // A hexadecimal value, based on the specified character set (may be used to identify 8-bit values). - NSData *d; - const char *b1, *b2; unsigned short index; @@ -1073,26 +1074,26 @@ const unsigned short ansicpg874[256] = { [_html appendBytes: v length: strlen(v)]; free(v); } - else if ([s hasPrefix: @"fcs"]) - { - // ignore - } - else if ([s hasPrefix: @"fs"]) - { - // ignore - } - else if ([s hasPrefix: @"fbidis"]) - { - // ignore - } - else if ([s hasPrefix: @"fromhtml"]) - { - // ignore - } - else if ([s hasPrefix: @"fromtext"]) - { - // ignore - } + // else if ([s hasPrefix: @"fcs"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fs"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fbidis"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fromhtml"]) + // { + // // ignore + // } + // else if ([s hasPrefix: @"fromtext"]) + // { + // // ignore + // } else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) { RTFFontInfo *fontInfo; @@ -1183,17 +1184,14 @@ const unsigned short ansicpg874[256] = { else if ([s hasPrefix: @"u"] && [s length] > 1 && (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) { - NSData *d; - unichar ch; int arg; - arg = [[s substringFromIndex: 1] intValue]; if (arg < 0) // a negative value means a value greater than 32767 arg = 32767 - arg; - ch = (unichar) arg; - s = [NSString stringWithCharacters: &ch length: 1]; + uch = (unichar) arg; + s = [NSString stringWithCharacters: &uch length: 1]; d = [s dataUsingEncoding: NSUTF8StringEncoding]; [_html appendData: d]; } @@ -1274,11 +1272,22 @@ const unsigned short ansicpg874[256] = { } else { + c = *_bytes; // We avoid appending NULL bytes or endlines - if (*_bytes && (*_bytes != '\n')) + if (c && (c != '\n')) { - /* end lines are not part of rtf */ - [_html appendBytes: _bytes length: 1]; + if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT) + { + // in this case utf8 and ascii encoding are the same + [_html appendBytes: &c length: 1]; + } + else + { + uch = c; + s = [NSString stringWithCharacters: &uch length: 1]; + d = [s dataUsingEncoding: NSUTF8StringEncoding]; + [_html appendData: d]; + } } ADVANCE; } From 94d4da6e1ef1846da93d36fd7cc4c2c09284f0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Amor=20Garc=C3=ADa?= Date: Fri, 13 Nov 2015 15:53:36 +0100 Subject: [PATCH 7/7] Reimplemented [RTFHandler parse] to have an unambiguous decoding of control words [RTFHandler parseFontTable] reimplemented on the same line. --- OpenChange/RTFHandler.m | 746 ++++++++++++++++++++++++---------------- 1 file changed, 452 insertions(+), 294 deletions(-) diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index e170cccf6..b18754e51 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -25,9 +25,9 @@ // // Useful macros // -#define ADVANCE _bytes++; _current_pos++; -#define ADVANCE_N(N) _bytes += (N); _current_pos += (N); -#define REWIND _bytes--; _current_pos--; +#define ADVANCE self->_bytes++; self->_current_pos++; +#define ADVANCE_N(N) self->_bytes += (N); self->_current_pos += (N); +#define REWIND self->_bytes--; self->_current_pos--; #define DEFAULT_CHARSET 1 #define FONTNAME_LEN_MAX 100 @@ -408,16 +408,47 @@ const unsigned short ansicpg874[256] = { // @implementation RTFHandler -- (id) initWithData: (NSData *) theData +static NSMapTable *_charsets = nil; +static NSMapTable *_cws = nil; +typedef enum { + CW_UNKNOWN = 0, + CW_ANSICPG, + CW_B, + CW_CF, + CW_COLORTBL, + CW_F, + CW_FONTTBL, + CW_I, + CW_PAR, + CW_PICT, + CW_SOFTLINE, + CW_STRIKE, + CW_STYLESHEET, + CW_TAB, + CW_U, + CW_UL, + CW_ULNONE +} commandWordId; + +static NSMapTable *_fontCws = nil; +typedef enum { + FONTCW_UNKNOWN = 0, + FONTCW_F, + FONTCW_FBIDI, + FONTCW_FCHARSET, + FONTCW_FDECOR, + FONTCW_FMODERN, + FONTCW_FNIL, + FONTCW_FPRQ, + FONTCW_FROMAN, + FONTCW_FSCRIPT, + FONTCW_FSWISS, + FONTCW_FTECH +} fontCommandWordId; + +static void _init_charsets_table() { - if ((self = [super init])) - { - ASSIGN(_data, theData); - _bytes = (char *)[_data bytes]; - _len = [_data length]; - _current_pos = 0; - - _charsets = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 10); + _charsets = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 23); // 238 — Eastern European - cpg1250 NSMapInsert(_charsets, @"ansicpg1250", ansicpg1250); NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 238], ansicpg1250); @@ -467,6 +498,59 @@ const unsigned short ansicpg874[256] = { // 136 — Big5 // 254 — PC 437 // 255 — OEM +} + +static void _init_cws_table() +{ + _cws = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 16); + NSMapInsert(_cws, @"ansicpg", (void *) CW_ANSICPG); + NSMapInsert(_cws, @"b", (void *) CW_B); + NSMapInsert(_cws, @"cf", (void *) CW_CF); + NSMapInsert(_cws, @"colortbl", (void *) CW_COLORTBL); + NSMapInsert(_cws, @"f", (void *) CW_F); + NSMapInsert(_cws, @"fonttbl", (void *) CW_FONTTBL); + NSMapInsert(_cws, @"i", (void *) CW_I); + NSMapInsert(_cws, @"par", (void *) CW_PAR); + NSMapInsert(_cws, @"pict", (void *) CW_PICT); + NSMapInsert(_cws, @"softline", (void *) CW_SOFTLINE); + NSMapInsert(_cws, @"strike", (void *) CW_STRIKE); + NSMapInsert(_cws, @"stylesheet", (void *) CW_STYLESHEET); + NSMapInsert(_cws, @"tab", (void *) CW_TAB); + NSMapInsert(_cws, @"u", (void *) CW_U); + NSMapInsert(_cws, @"ul", (void *) CW_UL); + NSMapInsert(_cws, @"ulnone", (void *) CW_ULNONE); +} + +static void _init_fontCws_table() +{ + _fontCws = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 23); + NSMapInsert(_fontCws, @"f", (void *) FONTCW_F); + NSMapInsert(_fontCws, @"fbidi", (void *) FONTCW_FBIDI); + NSMapInsert(_fontCws, @"fcharset", (void *) FONTCW_FCHARSET); + NSMapInsert(_fontCws, @"fdecor", (void *) FONTCW_FDECOR); + NSMapInsert(_fontCws, @"fmodern", (void *) FONTCW_FMODERN); + NSMapInsert(_fontCws, @"fnil", (void *) FONTCW_FNIL); + NSMapInsert(_fontCws, @"fprq", (void *) FONTCW_FPRQ); + NSMapInsert(_fontCws, @"froman", (void *) FONTCW_FROMAN); + NSMapInsert(_fontCws, @"fscript", (void *) FONTCW_FSCRIPT); + NSMapInsert(_fontCws, @"fswiss", (void *) FONTCW_FSWISS); + NSMapInsert(_fontCws, @"ftech", (void *) FONTCW_FTECH); +} + +- (id) initWithData: (NSData *) theData +{ + if ((self = [super init])) + { + ASSIGN(_data, theData); + _bytes = (char *)[_data bytes]; + _len = [_data length]; + _current_pos = 0; + if (_charsets == nil) + _init_charsets_table(); + if (_cws == nil) + _init_cws_table(); + if (_fontCws == nil) + _init_fontCws_table(); } return self; @@ -600,8 +684,6 @@ const unsigned short ansicpg874[256] = { /* In other cases, the delimiting character terminates the control word and is not part of the control word. */ - - *len = end - start + 1; return start; } @@ -716,6 +798,8 @@ const unsigned short ansicpg874[256] = { unsigned int len; BOOL hasArg; int arg; + NSString *cwKey; + fontCommandWordId cwId; cw = [self parseControlWordAndSetLenIn: &len setHasIntArgumentIn: &hasArg @@ -725,76 +809,57 @@ const unsigned short ansicpg874[256] = { else if (cw == NULL) continue; - if (len == 1) - { - if (strncmp((const char*) cw, "f", len) == 0) - { - if (hasArg) - fontInfo->index = arg; - } - - } - else if (len == 4) - { - if (strncmp((const char*) cw, "fnil", len) == 0) - { - fontInfo->family = @"nil"; - } - else if (strncmp((const char*) cw, "fprq", len) == 0) - { - if (hasArg) - fontInfo->pitch = arg; - } - } - else if (len == 5) - { - if (strncmp((const char*) cw, "fbidi", len) == 0) - { - fontInfo->family = @"bidi"; - } - else if (strncmp((const char*) cw, "ftech", len) == 0) - { - fontInfo->family = @"tech"; - } - } - else if (len == 6) - { - if (strncmp((const char*) cw, "froman", len) == 0) - { - fontInfo->family = @"roman"; - } - else if (strncmp((const char*) cw, "fswiss", len) == 0) - { - fontInfo->family = @"swiss"; - } - else if (strncmp((const char*) cw, "fdecor", len) == 0) - { - fontInfo->family = @"decor"; - } - } - else if (len == 7) - { - if (strncmp((const char*) cw, "fmodern", len) == 0) - { - fontInfo->family = @"modern"; - } - } - else if (len == 8) - { - if (strncmp((const char* ) cw, "fcharset", len) == 0) - { - if (hasArg) - { - fontInfo->charset = arg; - } + cwKey= [[NSString alloc] initWithBytesNoCopy: (void *)cw + length: len + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + [cwKey autorelease]; - } - else if (strncmp((const char*) cw, "fscript", len) == 0) - { - fontInfo->family = @"fscript"; - } + cwId = (fontCommandWordId) NSMapGet(_fontCws, cwKey); + switch (cwId) + { + case FONTCW_F: + if (hasArg) + fontInfo->index = arg; + break; + case FONTCW_FBIDI: + fontInfo->family = @"bidi"; + break; + case FONTCW_FCHARSET: + if (hasArg) + fontInfo->charset = arg; + break; + case FONTCW_FDECOR: + fontInfo->family = @"decor"; + break; + case FONTCW_FMODERN: + fontInfo->family = @"modern"; + break; + case FONTCW_FNIL: + fontInfo->family = @"nil"; + break; + case FONTCW_FPRQ: + if (hasArg) + fontInfo->pitch = arg; + break; + case FONTCW_FROMAN: + fontInfo->family = @"roman"; + break; + case FONTCW_FSCRIPT: + fontInfo->family = @"script"; + break; + case FONTCW_FSWISS: + fontInfo->family = @"swiss"; + break; + case FONTCW_FTECH: + fontInfo->family = @"tech"; + break; + case FONTCW_UNKNOWN: + default: + // do nothing + break; } - } + } else // no char { if (level == 2 && isalnum(*_bytes)) @@ -900,9 +965,238 @@ const unsigned short ansicpg874[256] = { [self parseIgnoringEverything]; } -// -// -// + +// todo: This keyword is only valid in the RTF header section right after the \ansi, \mac, \pc or \pca keyword. +inline static void parseAnsicpg (BOOL hasArg, int arg, const unsigned short **out_default_char) +{ + NSString *key; + const unsigned short *res; + + if (!hasArg) + return; + key = [NSString stringWithFormat: @"anscicpg%i", arg]; + res = NSMapGet(_charsets, key); + if (res) + *out_default_char = res; +} + +inline static void parseB(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->bold = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->bold = YES; + } +} + +inline static void parseCf(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions, RTFColorTable *colorTable) +{ + RTFColorDef *colorDef; + char *v; + + if (!hasArg) + return; + if (!formattingOptions) + return; + + colorDef = [colorTable colorDefAtIndex: arg]; + if (!colorDef) + return; + + if (formattingOptions->color_index >= 0) + { + [self->_html appendBytes: "" length: 7]; + } + + formattingOptions->color_index = arg; + + v = calloc(23, sizeof(char)); + sprintf(v, "", colorDef->red, colorDef->green, colorDef->blue); + [self->_html appendBytes: v length: strlen(v)]; + free(v); +} + + +inline static void parseColorTableWrapper(RTFHandler *self, RTFColorTable **colorTable) +{ + *colorTable = [self parseColorTable]; +} + +inline static void parseF(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions, RTFFontTable *fontTable) +{ + RTFFontInfo *fontInfo; + + if (!hasArg) + return; + if (!formattingOptions) + return; + + if (formattingOptions->font_index >= 0 && arg != formattingOptions->font_index) + { + [self->_html appendBytes: "" length: 7]; + } + + formattingOptions->font_index = arg; + + fontInfo = [fontTable fontInfoAtIndex: arg]; + char *v = NULL; + if (fontInfo && fontInfo->name) + { + if ([fontInfo->name length] < 128) + { + int tag_size = 15 + [fontInfo->name length]; + v = calloc(tag_size, sizeof(char)); + snprintf(v, tag_size, "", [fontInfo->name UTF8String]); + } + else + { + NSLog(@"RTFHandler: Font %u has %d chars length, parse error? " + "Ignored", arg, [fontInfo->name length]); + v = calloc(7, sizeof(char)); + sprintf(v, ""); + } + } + else + { + // RTF badformed? We don't know about that font (arg index not found). + // Anyhow, we still open the html tag because in the future + // we will close it (e.g. when new font is used). + v = calloc(7, sizeof(char)); + sprintf(v, ""); + } + + if (fontInfo && fontInfo->charset) + { + if (fontInfo->charset == DEFAULT_CHARSET) + /* charset 1 is default charset */ + formattingOptions->charset = NULL; + else { + NSNumber *key = [NSNumber numberWithUnsignedChar: fontInfo->charset]; + formattingOptions->charset = NSMapGet(_charsets, key); + } + } + + [self->_html appendBytes: v length: strlen(v)]; + free(v); +} + +inline static void parseFontTableWrapper(RTFHandler *self, const char * cw, RTFFontTable **fontTable) +{ + // We rewind our buffer so we start at the beginning of {\fonttbl... + self->_bytes = cw-2; + self->_current_pos -= 9; // Length: {\fonttbl + *fontTable = [self parseFontTable]; + + // We go back 1 byte in order to end our section properly ('}' character) + REWIND; +} + +inline static void parseI(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->italic = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->italic = YES; + } +} + +inline static void parsePar(RTFHandler *self) +{ + [self->_html appendBytes: "
" length: 4]; +} + +inline static void parsePictureWrapper(RTFHandler *self, const char * cw) +{ + self->_bytes = cw-2; + self->_current_pos -= 6; // Length: {\pict + [self parsePicture]; + REWIND; +} + +// same implementation that /par +inline static void parseSoftline(RTFHandler *self) +{ + [self->_html appendBytes: "
" length: 4]; +} + +inline static void parseStrike(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 9]; + formattingOptions->strikethrough = NO; + } + else + { + [self->_html appendBytes: "" length: 8]; + formattingOptions->strikethrough = YES; + } +} + +inline static void parseStyleSheetWrapper(RTFHandler *self, const char * cw) +{ + self->_bytes = cw-2; + self->_current_pos -= 12; // Length: {\stylesheet + [self parseStyleSheet]; + REWIND; +} + +inline static void parseTab(RTFHandler *self) +{ + [self->_html appendBytes: "  " length: 12]; +} + +inline static void parseU(RTFHandler *self, BOOL hasArg, int arg) +{ + unichar uch; + NSString *s; + NSData *d; + + if (!hasArg) + return; + if (arg < 0) + // a negative value means a value greater than 32767 + arg = 32767 - arg; + + uch = (unichar) arg; + s = [NSString stringWithCharacters: &uch length: 1]; + d = [s dataUsingEncoding: NSUTF8StringEncoding]; + [self->_html appendData: d]; +} + +inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg ==0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->underline = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->underline = YES; + } +} + + - (NSMutableData *) parse { RTFFormattingOptions *formattingOptions; @@ -910,7 +1204,7 @@ const unsigned short ansicpg874[256] = { RTFFontTable *fontTable; RTFStack *stack; - const unsigned short *default_charset; + const unsigned short *defaultCharset; // convenience variables for parsing unsigned char c; @@ -921,7 +1215,7 @@ const unsigned short ansicpg874[256] = { stack = [[RTFStack alloc] init]; fontTable = nil; colorTable = nil; - default_charset = ansicpg1252; + defaultCharset = ansicpg1252; formattingOptions = nil; _html = [[NSMutableData alloc] init]; @@ -942,6 +1236,10 @@ const unsigned short ansicpg874[256] = { { unsigned int len; const char *cw; + BOOL hasArg; + int arg; + NSString *cwKey; + commandWordId cwId; char nextByte = *(_bytes+1); if (nextByte == '\'') @@ -955,7 +1253,7 @@ const unsigned short ansicpg874[256] = { if (formattingOptions && formattingOptions->charset) active_charset = formattingOptions->charset; else - active_charset = default_charset; + active_charset = defaultCharset; ADVANCE; @@ -998,213 +1296,73 @@ const unsigned short ansicpg874[256] = { } - cw = [self parseControlWord: &len]; + cw = [self parseControlWordAndSetLenIn: &len + setHasIntArgumentIn: &hasArg + setIntArgumentIn: &arg]; + if (cw == NULL) + continue; + + cwKey= [[NSString alloc] initWithBytesNoCopy: (void *)cw + length: len + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + [cwKey autorelease]; - s = [[NSString alloc] initWithBytesNoCopy: (void *)cw - length: len - encoding: NSASCIIStringEncoding - freeWhenDone: NO]; - [s autorelease]; - - // todo: This keyword should be emitted in the RTF header section right after the \ansi, \mac, \pc or \pca keyword. - if (strncmp(cw, "ansicpg", 7) == 0) + cwId = (commandWordId) NSMapGet(_cws, cwKey); + switch (cwId) { - default_charset = NSMapGet(_charsets, s); - } - else if (strncmp(cw, "fonttbl", 7) == 0) - { - // We rewind our buffer so we start at the beginning of {\fonttbl... - _bytes = cw-2; - _current_pos -= 9; // Length: {\fonttbl - fontTable = [self parseFontTable]; - - // We go back 1 byte in order to end our section properly ('}' character) - REWIND; - } - else if (strncmp(cw, "stylesheet", 10) == 0) - { - _bytes = cw-2; - _current_pos -= 12; // Length: {\stylesheet - [self parseStyleSheet]; - REWIND; - } - else if (strncmp(cw, "colortbl", 8) == 0) - { - colorTable = [self parseColorTable]; - } - else if (strncmp(cw, "pict", 4) == 0) - { - _bytes = cw-2; - _current_pos -= 6; // Length: {\pict - [self parsePicture]; - REWIND; - } - else if ([s isEqualToString: @"b"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->bold = YES; - } - else if ([s isEqualToString: @"b0"] && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->bold = NO; - } - else if ([s hasPrefix: @"cf"] && [s length] > 2) - { - RTFColorDef *colorDef; - int color_index; - char *v; - - if (!formattingOptions) continue; - - color_index = [[s substringFromIndex: 2] intValue]; - colorDef = [colorTable colorDefAtIndex: color_index]; - if (!colorDef) continue; - - if (formattingOptions->color_index >= 0) - { - [_html appendBytes: "
" length: 7]; - } - - formattingOptions->color_index = color_index; - - v = malloc(23*sizeof(char)); - memset(v, 0, 23); - sprintf(v, "", colorDef->red, colorDef->green, colorDef->blue); - [_html appendBytes: v length: strlen(v)]; - free(v); - } - // else if ([s hasPrefix: @"fcs"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fs"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fbidis"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fromhtml"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fromtext"]) - // { - // // ignore - // } - else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) - { - RTFFontInfo *fontInfo; - int font_index; - - font_index = [[s substringFromIndex: 1] intValue]; - - if (!formattingOptions) - continue; - - if (formattingOptions->font_index >= 0 && - font_index != formattingOptions->font_index) - { - [_html appendBytes: "" length: 7]; - } - - formattingOptions->font_index = font_index; - - fontInfo = [fontTable fontInfoAtIndex: font_index]; - char *v = NULL; - if (fontInfo && fontInfo->name) - { - if ([fontInfo->name length] < 128) - { - int tag_size = 15 + [fontInfo->name length]; - v = calloc(tag_size, sizeof(char)); - snprintf(v, tag_size, "", [fontInfo->name UTF8String]); - } - else - { - NSLog(@"RTFHandler: Font %u has %d chars length, parse error? " - "Ignored", font_index, [fontInfo->name length]); - v = calloc(7, sizeof(char)); - sprintf(v, ""); - } - } - else - { - // RTF badformed? We don't know about that font (font_index). - // Anyhow, we still open the html tag because in the future - // we will close it (e.g. when new font is used). - v = calloc(7, sizeof(char)); - sprintf(v, ""); - } - - if (fontInfo && fontInfo->charset) - { - if (fontInfo->charset == 1) - /* charset 1 is default charset */ - formattingOptions->charset = NULL; - else { - NSNumber *key = [NSNumber numberWithUnsignedChar: fontInfo->charset]; - formattingOptions->charset = NSMapGet(_charsets, key); - } - } - - [_html appendBytes: v length: strlen(v)]; - free(v); - } - else if ([s isEqualToString: @"i"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->italic = YES; - } - else if ([s isEqualToString: @"i0"] && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->italic = NO; - } - else if ([s isEqualToString: @"tab"]) - { - [_html appendBytes: "  " length: 12]; - } - else if ([s isEqualToString: @"softline"] || [s isEqualToString: @"par"]) - { - [_html appendBytes: "
" length: 4]; - } - else if ([s isEqualToString: @"strike"] && formattingOptions) - { - [_html appendBytes: "" length: 8]; - formattingOptions->strikethrough = YES; - } - else if ([s isEqualToString: @"strike0"] && formattingOptions) - { - [_html appendBytes: "" length: 9]; - formattingOptions->strikethrough = NO; - } - else if ([s hasPrefix: @"u"] && [s length] > 1 && - (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) - { - int arg; - arg = [[s substringFromIndex: 1] intValue]; - if (arg < 0) - // a negative value means a value greater than 32767 - arg = 32767 - arg; - - uch = (unichar) arg; - s = [NSString stringWithCharacters: &uch length: 1]; - d = [s dataUsingEncoding: NSUTF8StringEncoding]; - [_html appendData: d]; - } - else if ([s isEqualToString: @"ul"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->underline = YES; - } - else if (([s isEqualToString: @"ul0"] || [s isEqualToString: @"ulnone"]) - && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->underline = NO; + case CW_ANSICPG: + parseAnsicpg(hasArg, arg, &defaultCharset); + break; + case CW_B: + parseB(self, hasArg, arg, formattingOptions); + break; + case CW_CF: + parseCf(self, hasArg, arg, formattingOptions, colorTable); + break; + case CW_COLORTBL: + parseColorTableWrapper(self, &colorTable); + break; + case CW_F: + parseF(self, hasArg, arg, formattingOptions, fontTable); + break; + case CW_FONTTBL: + parseFontTableWrapper(self, cw, &fontTable); + break; + case CW_I: + parseI(self, hasArg, arg, formattingOptions); + break; + case CW_PAR: + parsePar(self); + break; + case CW_PICT: + parsePictureWrapper(self, cw); + break; + case CW_SOFTLINE: + parseSoftline(self); + break; + case CW_STRIKE: + parseStrike(self, hasArg, arg, formattingOptions); + break; + case CW_STYLESHEET: + parseStyleSheetWrapper(self, cw); + break; + case CW_TAB: + parseTab(self); + break; + case CW_U: + parseU(self, hasArg, arg); + break; + case CW_UL: + parseUl(self, hasArg, arg, formattingOptions); + break; + case CW_ULNONE: + parseUl(self, YES, 0, formattingOptions); + break; + case CW_UNKNOWN: + default: + // do nothing + break; } // If a space delimits the control word, the space does not appear in the document. @@ -1225,7 +1383,7 @@ const unsigned short ansicpg874[256] = { formattingOptions->font_index = -1; formattingOptions->color_index = -1; formattingOptions->start_pos = [_html length]; - formattingOptions->charset = default_charset; + formattingOptions->charset = defaultCharset; [stack push: formattingOptions]; ADVANCE; }