Merge pull request #242 from zentyal/jag/rtf-parser-charset-in-plain-rtf-text

RTFHandler: fixed problem with plain chars from no-ascii charset
This commit is contained in:
Enrique J. Hernández 2016-02-03 09:53:42 +01:00
commit 27fb035f34
4 changed files with 54 additions and 34 deletions

View file

@ -32,7 +32,7 @@
#define DEFAULT_CHARSET 1
#define FONTNAME_LEN_MAX 100
#define UTF8_FIRST_BYTE_LAST_CODEPOINT 0x7F
//
// Charset definitions. See http://msdn.microsoft.com/en-us/goglobal/bb964654 for all details.
//
@ -608,7 +608,6 @@ static void _init_fontCws_table()
word and is not part of the control word. */
end = _bytes;
*len = end-start-1;
return start+1;
@ -1215,7 +1214,6 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
unsigned char c;
NSData *d;
NSString *s;
unichar uch;
stack = [[RTFStack alloc] init];
fontTable = nil;
@ -1223,7 +1221,7 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
defaultCharset = ansicpg1252;
formattingOptions = nil;
_html = [[NSMutableData alloc] init];
_html = [[[NSMutableData alloc] init] autorelease];
[_html appendBytes: "<html><meta charset='utf-8'><body>" length: 34];
@ -1251,10 +1249,10 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
{
// A hexadecimal value, based on the specified character set (may be used to identify 8-bit values).
const char *b1, *b2;
unsigned short index;
short index;
short tmp;
const unsigned short * active_charset;
if (formattingOptions && formattingOptions->charset)
active_charset = formattingOptions->charset;
else
@ -1267,8 +1265,21 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
b1 = ADVANCE;
b2 = ADVANCE;
index = (isdigit(*b1) ? *b1 - 48 : toupper(*b1) - 55) * 16;
index += (isdigit(*b2) ? *b2 - 48 : toupper(*b2) - 55);
tmp = (isdigit(*b1) ? *b1 - 48 : toupper(*b1) - 55);
if (tmp < 0 || tmp > 16)
{
// Incorrect first hexadecimal character. Skipping.
continue;
}
index = tmp*16;
tmp = (isdigit(*b2) ? *b2 - 48 : toupper(*b2) - 55);
if (tmp < 0 || tmp > 16)
{
// Incorrect second hexadecimal character. Skipping.
continue;
}
index += tmp;
s = [NSString stringWithCharacters: &(active_charset[index]) length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
@ -1437,20 +1448,17 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
{
c = *_bytes;
// We avoid appending NULL bytes or endlines
if (c && (c != '\n'))
if (c && (c != '\n') && (c != '\r'))
{
if (c <= UTF8_FIRST_BYTE_LAST_CODEPOINT)
{
// in this case utf8 and ascii encoding are the same
[_html appendBytes: &c length: 1];
}
else
{
uch = c;
s = [NSString stringWithCharacters: &uch length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d];
}
const unsigned short * active_charset;
if (formattingOptions && formattingOptions->charset)
active_charset = formattingOptions->charset;
else
active_charset = defaultCharset;
s = [NSString stringWithCharacters: &(active_charset[c]) length: 1];
d = [s dataUsingEncoding: NSUTF8StringEncoding];
[_html appendData: d];
}
ADVANCE;
}
@ -1459,7 +1467,7 @@ inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormatting
[_html appendBytes: "</body></html>" length: 14];
[stack release];
return [_html autorelease];
return _html;
}
/* This method is for ease of testing and should not be used in normal operations */

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -44,26 +44,18 @@
return nil;
RTFHandler *handler = [[RTFHandler alloc] initWithData: rtf];
NSMutableData *data2 = [handler parse];
if (data2 == nil)
NSMutableData *data = [handler parse];
if (data == nil)
{
NSString *error = [NSString stringWithFormat: @"Couldn't parse RTF data:\n %s",
(char *)[rtf bytes]];
testWithMessage(NO, error);
}
html = [[NSString alloc] initWithData: data2 encoding: NSUTF8StringEncoding];
html = [[NSString alloc] initWithData: data encoding: NSUTF8StringEncoding];
if (html == nil)
{
html = [[NSString alloc] initWithData: data2 encoding: NSASCIIStringEncoding];
}
if (html == nil)
{
html = [[NSString alloc] initWithData: data2 encoding: NSISOLatin1StringEncoding];
}
if (html == nil)
{
NSString *error = [NSString stringWithFormat: @"Couldn't convert parsed data"];
NSString *error = [NSString stringWithFormat: @"Couldn't convert parsed data to UTF8 string"];
testWithMessage(NO, error);
}
return html;
@ -305,4 +297,22 @@
againstExpectedHTML: expected];
}
- (void) test_cyr_event_ru_editor
{
NSString *file =@"cyr_event_ru_editor.rtf";
NSString *expected=@"<html><meta charset='utf-8'><body><font face=\"Calibri\"><font face=\"Calibri Cyr\"><font color=\"#000000\">йчсмй</font></font><font color=\"#000000\"><br></font></font></body></html>";
[self checkHTMLConversionOfRTFFile: file
againstExpectedHTML: expected];
}
- (void) test_bad_hex_and_cr
{
NSString *file =@"bad_hex_and_cr.rtf";
NSString *expected=@"<html><meta charset='utf-8'><body><font face=\"Calibri\"><font face=\"Calibri Cyr\"><font color=\"#000000\">Good hex:H Bad1Hex: Bad2Hex: Ignored Carriadge Return</font></font></font></body></html>";
[self checkHTMLConversionOfRTFFile: file
againstExpectedHTML: expected];
}
@end