Fixed problem with ascii chars that had different unicode value

Also cleaned a bit the parse loop
This commit is contained in:
Javier Amor García 2015-11-13 12:53:02 +01:00
parent 3155bd0172
commit 2f63542e07

View file

@ -31,7 +31,7 @@
#define DEFAULT_CHARSET 1 #define DEFAULT_CHARSET 1
#define FONTNAME_LEN_MAX 100 #define FONTNAME_LEN_MAX 100
#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F
// //
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details. // Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
// //
@ -911,8 +911,13 @@ const unsigned short ansicpg874[256] = {
RTFStack *stack; RTFStack *stack;
const unsigned short *default_charset; const unsigned short *default_charset;
char c;
// convenience variables for parsing
unsigned char c;
NSData *d;
NSString *s;
unichar uch;
stack = [[RTFStack alloc] init]; stack = [[RTFStack alloc] init];
fontTable = nil; fontTable = nil;
colorTable = nil; colorTable = nil;
@ -921,7 +926,6 @@ const unsigned short ansicpg874[256] = {
_html = [[NSMutableData alloc] init]; _html = [[NSMutableData alloc] init];
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34]; [_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
// Check if we got RTF data // Check if we got RTF data
@ -938,14 +942,11 @@ const unsigned short ansicpg874[256] = {
{ {
unsigned int len; unsigned int len;
const char *cw; const char *cw;
NSString *s;
char nextByte = *(_bytes+1); char nextByte = *(_bytes+1);
if (nextByte == '\'') if (nextByte == '\'')
{ {
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values). // A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
NSData *d;
const char *b1, *b2; const char *b1, *b2;
unsigned short index; unsigned short index;
@ -1073,26 +1074,26 @@ const unsigned short ansicpg874[256] = {
[_html appendBytes: v length: strlen(v)]; [_html appendBytes: v length: strlen(v)];
free(v); free(v);
} }
else if ([s hasPrefix: @"fcs"]) // else if ([s hasPrefix: @"fcs"])
{ // {
// ignore // // ignore
} // }
else if ([s hasPrefix: @"fs"]) // else if ([s hasPrefix: @"fs"])
{ // {
// ignore // // ignore
} // }
else if ([s hasPrefix: @"fbidis"]) // else if ([s hasPrefix: @"fbidis"])
{ // {
// ignore // // ignore
} // }
else if ([s hasPrefix: @"fromhtml"]) // else if ([s hasPrefix: @"fromhtml"])
{ // {
// ignore // // ignore
} // }
else if ([s hasPrefix: @"fromtext"]) // else if ([s hasPrefix: @"fromtext"])
{ // {
// ignore // // ignore
} // }
else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1]))
{ {
RTFFontInfo *fontInfo; RTFFontInfo *fontInfo;
@ -1183,17 +1184,14 @@ const unsigned short ansicpg874[256] = {
else if ([s hasPrefix: @"u"] && [s length] > 1 && else if ([s hasPrefix: @"u"] && [s length] > 1 &&
(isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1]))
{ {
NSData *d;
unichar ch;
int arg; int arg;
arg = [[s substringFromIndex: 1] intValue]; arg = [[s substringFromIndex: 1] intValue];
if (arg < 0) if (arg < 0)
// a negative value means a value greater than 32767 // a negative value means a value greater than 32767
arg = 32767 - arg; arg = 32767 - arg;
ch = (unichar) arg; uch = (unichar) arg;
s = [NSString stringWithCharacters: &ch length: 1]; s = [NSString stringWithCharacters: &uch length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding]; d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d]; [_html appendData: d];
} }
@ -1274,11 +1272,22 @@ const unsigned short ansicpg874[256] = {
} }
else else
{ {
c = *_bytes;
// We avoid appending NULL bytes or endlines // We avoid appending NULL bytes or endlines
if (*_bytes && (*_bytes != '\n')) if (c && (c != '\n'))
{ {
/* end lines are not part of rtf */ if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT)
[_html appendBytes: _bytes length: 1]; {
// in this case utf8 and ascii encoding are the same
[_html appendBytes: &c length: 1];
}
else
{
uch = c;
s = [NSString stringWithCharacters: &uch length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d];
}
} }
ADVANCE; ADVANCE;
} }