Fixed problem with ascii chars that had different unicode value
Also cleaned a bit the parse loop
This commit is contained in:
parent
3155bd0172
commit
2f63542e07
|
@ -31,7 +31,7 @@
|
||||||
|
|
||||||
#define DEFAULT_CHARSET 1
|
#define DEFAULT_CHARSET 1
|
||||||
#define FONTNAME_LEN_MAX 100
|
#define FONTNAME_LEN_MAX 100
|
||||||
|
#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F
|
||||||
//
|
//
|
||||||
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
|
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
|
||||||
//
|
//
|
||||||
|
@ -911,7 +911,12 @@ const unsigned short ansicpg874[256] = {
|
||||||
RTFStack *stack;
|
RTFStack *stack;
|
||||||
|
|
||||||
const unsigned short *default_charset;
|
const unsigned short *default_charset;
|
||||||
char c;
|
|
||||||
|
// convenience variables for parsing
|
||||||
|
unsigned char c;
|
||||||
|
NSData *d;
|
||||||
|
NSString *s;
|
||||||
|
unichar uch;
|
||||||
|
|
||||||
stack = [[RTFStack alloc] init];
|
stack = [[RTFStack alloc] init];
|
||||||
fontTable = nil;
|
fontTable = nil;
|
||||||
|
@ -923,7 +928,6 @@ const unsigned short ansicpg874[256] = {
|
||||||
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
|
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Check if we got RTF data
|
// Check if we got RTF data
|
||||||
// this does not allow \s\n before '}' neither newline before control command
|
// this does not allow \s\n before '}' neither newline before control command
|
||||||
if (_len > 4 && strncmp((const char*)_bytes, "{\\rtf", 4) != 0)
|
if (_len > 4 && strncmp((const char*)_bytes, "{\\rtf", 4) != 0)
|
||||||
|
@ -938,14 +942,11 @@ const unsigned short ansicpg874[256] = {
|
||||||
{
|
{
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
const char *cw;
|
const char *cw;
|
||||||
NSString *s;
|
|
||||||
char nextByte = *(_bytes+1);
|
char nextByte = *(_bytes+1);
|
||||||
|
|
||||||
if (nextByte == '\'')
|
if (nextByte == '\'')
|
||||||
{
|
{
|
||||||
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
|
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
|
||||||
NSData *d;
|
|
||||||
|
|
||||||
const char *b1, *b2;
|
const char *b1, *b2;
|
||||||
unsigned short index;
|
unsigned short index;
|
||||||
|
|
||||||
|
@ -1073,26 +1074,26 @@ const unsigned short ansicpg874[256] = {
|
||||||
[_html appendBytes: v length: strlen(v)];
|
[_html appendBytes: v length: strlen(v)];
|
||||||
free(v);
|
free(v);
|
||||||
}
|
}
|
||||||
else if ([s hasPrefix: @"fcs"])
|
// else if ([s hasPrefix: @"fcs"])
|
||||||
{
|
// {
|
||||||
// ignore
|
// // ignore
|
||||||
}
|
// }
|
||||||
else if ([s hasPrefix: @"fs"])
|
// else if ([s hasPrefix: @"fs"])
|
||||||
{
|
// {
|
||||||
// ignore
|
// // ignore
|
||||||
}
|
// }
|
||||||
else if ([s hasPrefix: @"fbidis"])
|
// else if ([s hasPrefix: @"fbidis"])
|
||||||
{
|
// {
|
||||||
// ignore
|
// // ignore
|
||||||
}
|
// }
|
||||||
else if ([s hasPrefix: @"fromhtml"])
|
// else if ([s hasPrefix: @"fromhtml"])
|
||||||
{
|
// {
|
||||||
// ignore
|
// // ignore
|
||||||
}
|
// }
|
||||||
else if ([s hasPrefix: @"fromtext"])
|
// else if ([s hasPrefix: @"fromtext"])
|
||||||
{
|
// {
|
||||||
// ignore
|
// // ignore
|
||||||
}
|
// }
|
||||||
else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1]))
|
else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1]))
|
||||||
{
|
{
|
||||||
RTFFontInfo *fontInfo;
|
RTFFontInfo *fontInfo;
|
||||||
|
@ -1183,17 +1184,14 @@ const unsigned short ansicpg874[256] = {
|
||||||
else if ([s hasPrefix: @"u"] && [s length] > 1 &&
|
else if ([s hasPrefix: @"u"] && [s length] > 1 &&
|
||||||
(isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1]))
|
(isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1]))
|
||||||
{
|
{
|
||||||
NSData *d;
|
|
||||||
unichar ch;
|
|
||||||
int arg;
|
int arg;
|
||||||
|
|
||||||
arg = [[s substringFromIndex: 1] intValue];
|
arg = [[s substringFromIndex: 1] intValue];
|
||||||
if (arg < 0)
|
if (arg < 0)
|
||||||
// a negative value means a value greater than 32767
|
// a negative value means a value greater than 32767
|
||||||
arg = 32767 - arg;
|
arg = 32767 - arg;
|
||||||
|
|
||||||
ch = (unichar) arg;
|
uch = (unichar) arg;
|
||||||
s = [NSString stringWithCharacters: &ch length: 1];
|
s = [NSString stringWithCharacters: &uch length: 1];
|
||||||
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
||||||
[_html appendData: d];
|
[_html appendData: d];
|
||||||
}
|
}
|
||||||
|
@ -1274,11 +1272,22 @@ const unsigned short ansicpg874[256] = {
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
c = *_bytes;
|
||||||
// We avoid appending NULL bytes or endlines
|
// We avoid appending NULL bytes or endlines
|
||||||
if (*_bytes && (*_bytes != '\n'))
|
if (c && (c != '\n'))
|
||||||
{
|
{
|
||||||
/* end lines are not part of rtf */
|
if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT)
|
||||||
[_html appendBytes: _bytes length: 1];
|
{
|
||||||
|
// in this case utf8 and ascii encoding are the same
|
||||||
|
[_html appendBytes: &c length: 1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uch = c;
|
||||||
|
s = [NSString stringWithCharacters: &uch length: 1];
|
||||||
|
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
||||||
|
[_html appendData: d];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ADVANCE;
|
ADVANCE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue