Merge pull request #242 from zentyal/jag/rtf-parser-charset-in-plain-rtf-text
RTFHandler: fixed problem with plain chars from no-ascii charset
This commit is contained in:
commit
27fb035f34
|
@ -32,7 +32,7 @@
|
|||
|
||||
#define DEFAULT_CHARSET 1
|
||||
#define FONTNAME_LEN_MAX 100
|
||||
#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F
|
||||
|
||||
//
|
||||
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
|
||||
//
|
||||
|
@ -608,7 +608,6 @@ static void _init_fontCws_table()
|
|||
word and is not part of the control word. */
|
||||
|
||||
end = _bytes;
|
||||
|
||||
*len = end-start-1;
|
||||
|
||||
return start+1;
|
||||
|
@ -1215,7 +1214,6 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
unsigned char c;
|
||||
NSData *d;
|
||||
NSString *s;
|
||||
unichar uch;
|
||||
|
||||
stack = [[RTFStack alloc] init];
|
||||
fontTable = nil;
|
||||
|
@ -1223,7 +1221,7 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
defaultCharset = ansicpg1252;
|
||||
formattingOptions = nil;
|
||||
|
||||
_html = [[NSMutableData alloc] init];
|
||||
_html = [[[NSMutableData alloc] init] autorelease];
|
||||
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
|
||||
|
||||
|
||||
|
@ -1251,10 +1249,10 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
{
|
||||
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
|
||||
const char *b1, *b2;
|
||||
unsigned short index;
|
||||
short index;
|
||||
short tmp;
|
||||
|
||||
const unsigned short * active_charset;
|
||||
|
||||
if (formattingOptions && formattingOptions->charset)
|
||||
active_charset = formattingOptions->charset;
|
||||
else
|
||||
|
@ -1267,8 +1265,21 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
b1 = ADVANCE;
|
||||
b2 = ADVANCE;
|
||||
|
||||
index = (isdigit(*b1) ? *b1 - 48 : toupper(*b1) - 55) * 16;
|
||||
index += (isdigit(*b2) ? *b2 - 48 : toupper(*b2) - 55);
|
||||
tmp = (isdigit(*b1) ? *b1 - 48 : toupper(*b1) - 55);
|
||||
if (tmp < 0 || tmp > 16)
|
||||
{
|
||||
// Incorrect first hexadecimal character. Skipping.
|
||||
continue;
|
||||
}
|
||||
index = tmp*16;
|
||||
|
||||
tmp = (isdigit(*b2) ? *b2 - 48 : toupper(*b2) - 55);
|
||||
if (tmp < 0 || tmp > 16)
|
||||
{
|
||||
// Incorrect second hexadecimal character. Skipping.
|
||||
continue;
|
||||
}
|
||||
index += tmp;
|
||||
|
||||
s = [NSString stringWithCharacters: &(active_charset[index]) length: 1];
|
||||
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
||||
|
@ -1437,20 +1448,17 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
{
|
||||
c = *_bytes;
|
||||
// We avoid appending NULL bytes or endlines
|
||||
if (c && (c != '\n'))
|
||||
if (c && (c != '\n') && (c != '\r'))
|
||||
{
|
||||
if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT)
|
||||
{
|
||||
// in this case utf8 and ascii encoding are the same
|
||||
[_html appendBytes: &c length: 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
uch = c;
|
||||
s = [NSString stringWithCharacters: &uch length: 1];
|
||||
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
||||
[_html appendData: d];
|
||||
}
|
||||
const unsigned short * active_charset;
|
||||
if (formattingOptions && formattingOptions->charset)
|
||||
active_charset = formattingOptions->charset;
|
||||
else
|
||||
active_charset = defaultCharset;
|
||||
|
||||
s = [NSString stringWithCharacters: &(active_charset[c]) length: 1];
|
||||
d = [s dataUsingEncoding: NSUTF8StringEncoding];
|
||||
[_html appendData: d];
|
||||
}
|
||||
ADVANCE;
|
||||
}
|
||||
|
@ -1459,7 +1467,7 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
|
|||
[_html appendBytes: "</body></html>" length: 14];
|
||||
|
||||
[stack release];
|
||||
return [_html autorelease];
|
||||
return _html;
|
||||
}
|
||||
|
||||
/* This method is for ease of testing and should not be used in normal operations */
|
||||
|
|
1
Tests/Unit/Fixtures/bad_hex_and_cr.rtf
Normal file
1
Tests/Unit/Fixtures/bad_hex_and_cr.rtf
Normal file
File diff suppressed because one or more lines are too long
1
Tests/Unit/Fixtures/cyr_event_ru_editor.rtf
Normal file
1
Tests/Unit/Fixtures/cyr_event_ru_editor.rtf
Normal file
File diff suppressed because one or more lines are too long
|
@ -44,26 +44,18 @@
|
|||
return nil;
|
||||
|
||||
RTFHandler *handler = [[RTFHandler alloc] initWithData: rtf];
|
||||
NSMutableData *data2 = [handler parse];
|
||||
if (data2 == nil)
|
||||
NSMutableData *data = [handler parse];
|
||||
if (data == nil)
|
||||
{
|
||||
NSString *error = [NSString stringWithFormat: @"Couldn't parse RTF data:\n %s",
|
||||
(char *)[rtf bytes]];
|
||||
testWithMessage(NO, error);
|
||||
}
|
||||
|
||||
html = [[NSString alloc] initWithData: data2 encoding: NSUTF8StringEncoding];
|
||||
html = [[NSString alloc] initWithData: data encoding: NSUTF8StringEncoding];
|
||||
if (html == nil)
|
||||
{
|
||||
html = [[NSString alloc] initWithData: data2 encoding: NSASCIIStringEncoding];
|
||||
}
|
||||
if (html == nil)
|
||||
{
|
||||
html = [[NSString alloc] initWithData: data2 encoding: NSISOLatin1StringEncoding];
|
||||
}
|
||||
if (html == nil)
|
||||
{
|
||||
NSString *error = [NSString stringWithFormat: @"Couldn't convert parsed data"];
|
||||
NSString *error = [NSString stringWithFormat: @"Couldn't convert parsed data to UTF8 string"];
|
||||
testWithMessage(NO, error);
|
||||
}
|
||||
return html;
|
||||
|
@ -305,4 +297,22 @@
|
|||
againstExpectedHTML: expected];
|
||||
}
|
||||
|
||||
- (void) test_cyr_event_ru_editor
|
||||
{
|
||||
NSString *file =@"cyr_event_ru_editor.rtf";
|
||||
NSString *expected=@"<html><meta charset='utf-8'><body><font face=\"Calibri\"><font face=\"Calibri Cyr\"><font color=\"#000000\">йчсмй</font></font><font color=\"#000000\"><br></font></font></body></html>";
|
||||
|
||||
[self checkHTMLConversionOfRTFFile: file
|
||||
againstExpectedHTML: expected];
|
||||
}
|
||||
|
||||
- (void) test_bad_hex_and_cr
|
||||
{
|
||||
NSString *file =@"bad_hex_and_cr.rtf";
|
||||
NSString *expected=@"<html><meta charset='utf-8'><body><font face=\"Calibri\"><font face=\"Calibri Cyr\"><font color=\"#000000\">Good hex:H Bad1Hex: Bad2Hex: Ignored Carriadge Return</font></font></font></body></html>";
|
||||
|
||||
[self checkHTMLConversionOfRTFFile: file
|
||||
againstExpectedHTML: expected];
|
||||
}
|
||||
|
||||
@end
|
||||
|
|
Loading…
Reference in a new issue