Fixed problem with ascii chars that had different unicode value

Also cleaned a bit the parse loop
This commit is contained in:
Javier Amor García 2015-11-13 12:53:02 +01:00
parent 3155bd0172
commit 2f63542e07

View file

@ -31,7 +31,7 @@
#define DEFAULT_CHARSET 1
#define FONTNAME_LEN_MAX 100
#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F
//
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
//
@ -911,8 +911,13 @@ const unsigned short ansicpg874[256] = {
RTFStack *stack;
const unsigned short *default_charset;
char c;
// convenience variables for parsing
unsigned char c;
NSData *d;
NSString *s;
unichar uch;
stack = [[RTFStack alloc] init];
fontTable = nil;
colorTable = nil;
@ -921,7 +926,6 @@ const unsigned short ansicpg874[256] = {
_html = [[NSMutableData alloc] init];
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
// Check if we got RTF data
@ -938,14 +942,11 @@ const unsigned short ansicpg874[256] = {
{
unsigned int len;
const char *cw;
NSString *s;
char nextByte = *(_bytes+1);
if (nextByte == '\'')
{
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
NSData *d;
const char *b1, *b2;
unsigned short index;
@ -1073,26 +1074,26 @@ const unsigned short ansicpg874[256] = {
[_html appendBytes: v length: strlen(v)];
free(v);
}
else if ([s hasPrefix: @"fcs"])
{
// ignore
}
else if ([s hasPrefix: @"fs"])
{
// ignore
}
else if ([s hasPrefix: @"fbidis"])
{
// ignore
}
else if ([s hasPrefix: @"fromhtml"])
{
// ignore
}
else if ([s hasPrefix: @"fromtext"])
{
// ignore
}
// else if ([s hasPrefix: @"fcs"])
// {
// // ignore
// }
// else if ([s hasPrefix: @"fs"])
// {
// // ignore
// }
// else if ([s hasPrefix: @"fbidis"])
// {
// // ignore
// }
// else if ([s hasPrefix: @"fromhtml"])
// {
// // ignore
// }
// else if ([s hasPrefix: @"fromtext"])
// {
// // ignore
// }
else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1]))
{
RTFFontInfo *fontInfo;
@ -1183,17 +1184,14 @@ const unsigned short ansicpg874[256] = {
else if ([s hasPrefix: @"u"] && [s length] > 1 &&
(isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1]))
{
NSData *d;
unichar ch;
int arg;
arg = [[s substringFromIndex: 1] intValue];
if (arg < 0)
// a negative value means a value greater than 32767
arg = 32767 - arg;
ch = (unichar) arg;
s = [NSString stringWithCharacters: &ch length: 1];
uch = (unichar) arg;
s = [NSString stringWithCharacters: &uch length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d];
}
@ -1274,11 +1272,22 @@ const unsigned short ansicpg874[256] = {
}
else
{
c = *_bytes;
// We avoid appending NULL bytes or endlines
if (*_bytes && (*_bytes != '\n'))
if (c && (c != '\n'))
{
/* end lines are not part of rtf */
[_html appendBytes: _bytes length: 1];
if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT)
{
// in this case utf8 and ascii encoding are the same
[_html appendBytes: &c length: 1];
}
else
{
uch = c;
s = [NSString stringWithCharacters: &uch length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d];
}
}
ADVANCE;
}