diff --git a/tests/check-qjson.c b/tests/check-qjson.c index f1405ad47a..69f5a187c9 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -157,13 +157,7 @@ static void utf8_string(void) * They're all marked "bug:" below, and are to be replaced by * correct ones as the bugs get fixed. * - * The JSON parser rejects some invalid sequences, but accepts - * others without correcting the problem. - * - * We should either reject all invalid sequences, or minimize - * overlong sequences and replace all other invalid sequences by a - * suitable replacement character. A common choice for - * replacement is U+FFFD. + * The JSON parser rejects some, but not all invalid sequences. * * Problem: we can't easily deal with embedded U+0000. Parsing * the JSON string "this \\u0000" is fun" yields "this \0 is fun", @@ -185,11 +179,8 @@ static void utf8_string(void) } test_cases[] = { /* * Bug markers used here: - * - bug: not corrected - * JSON parser fails to correct invalid sequence(s) - * - bug: rejected - * JSON parser rejects invalid sequence(s) - * We may choose to define this as feature + * - bug: not rejected + * JSON parser fails to reject invalid sequence(s) */ /* 0 Control characters */ @@ -257,13 +248,13 @@ static void utf8_string(void) /* 2.1.5 5 bytes U+200000 */ { "\xF8\x88\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 2.1.6 6 bytes U+4000000 */ { "\xFC\x84\x80\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 2.2 Last possible sequence of a certain length */ @@ -296,19 +287,19 @@ static void utf8_string(void) /* 2.2.4 4 bytes U+1FFFFF */ { "\xF7\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 2.2.5 5 bytes U+3FFFFFF */ { "\xFB\xBF\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 2.2.6 6 bytes U+7FFFFFFF */ { "\xFD\xBF\xBF\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 2.3 Other boundary conditions */ @@ -347,49 +338,49 @@ static void utf8_string(void) /* 3.1.1 First continuation byte */ { "\x80", - "\x80", /* bug: not corrected */ + "\x80", /* bug: not rejected */ "\\uFFFD", }, /* 3.1.2 Last continuation byte */ { "\xBF", - "\xBF", /* bug: not corrected */ + "\xBF", /* bug: not rejected */ "\\uFFFD", }, /* 3.1.3 2 continuation bytes */ { "\x80\xBF", - "\x80\xBF", /* bug: not corrected */ + "\x80\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, /* 3.1.4 3 continuation bytes */ { "\x80\xBF\x80", - "\x80\xBF\x80", /* bug: not corrected */ + "\x80\xBF\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.5 4 continuation bytes */ { "\x80\xBF\x80\xBF", - "\x80\xBF\x80\xBF", /* bug: not corrected */ + "\x80\xBF\x80\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.6 5 continuation bytes */ { "\x80\xBF\x80\xBF\x80", - "\x80\xBF\x80\xBF\x80", /* bug: not corrected */ + "\x80\xBF\x80\xBF\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.7 6 continuation bytes */ { "\x80\xBF\x80\xBF\x80\xBF", - "\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */ + "\x80\xBF\x80\xBF\x80\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.8 7 continuation bytes */ { "\x80\xBF\x80\xBF\x80\xBF\x80", - "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */ + "\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.1.9 Sequence of all 64 possible continuation bytes */ @@ -402,7 +393,7 @@ static void utf8_string(void) "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF", - /* bug: not corrected */ + /* bug: not rejected */ "\x80\x81\x82\x83\x84\x85\x86\x87" "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" "\x90\x91\x92\x93\x94\x95\x96\x97" @@ -427,7 +418,7 @@ static void utf8_string(void) "\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF " "\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 " "\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF ", - NULL, /* bug: rejected (partly, see FIXME below) */ + NULL, /* bug: accepted partly, see FIXME below */ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " @@ -437,7 +428,7 @@ static void utf8_string(void) { "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ", - /* bug: not corrected */ + /* bug: not rejected */ "\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 " "\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ", "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD " @@ -446,131 +437,131 @@ static void utf8_string(void) /* 3.2.3 All 8 first bytes of 4-byte sequences, followed by space */ { "\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ", - NULL, /* bug: rejected (partly, see FIXME below) */ + NULL, /* bug: accepted partly, see FIXME below */ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.4 All 4 first bytes of 5-byte sequences, followed by space */ { "\xF8 \xF9 \xFA \xFB ", - NULL, /* bug: rejected */ + NULL, "\\uFFFD \\uFFFD \\uFFFD \\uFFFD ", }, /* 3.2.5 All 2 first bytes of 6-byte sequences, followed by space */ { "\xFC \xFD ", - NULL, /* bug: rejected */ + NULL, "\\uFFFD \\uFFFD ", }, /* 3.3 Sequences with last continuation byte missing */ /* 3.3.1 2-byte sequence with last byte missing (U+0000) */ { "\xC0", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.3.2 3-byte sequence with last byte missing (U+0000) */ { "\xE0\x80", - "\xE0\x80", /* bug: not corrected */ + "\xE0\x80", /* bug: not rejected */ "\\uFFFD", }, /* 3.3.3 4-byte sequence with last byte missing (U+0000) */ { "\xF0\x80\x80", - "\xF0\x80\x80", /* bug: not corrected */ + "\xF0\x80\x80", /* bug: not rejected */ "\\uFFFD", }, /* 3.3.4 5-byte sequence with last byte missing (U+0000) */ { "\xF8\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.3.5 6-byte sequence with last byte missing (U+0000) */ { "\xFC\x80\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.3.6 2-byte sequence with last byte missing (U+07FF) */ { "\xDF", - "\xDF", /* bug: not corrected */ + "\xDF", /* bug: not rejected */ "\\uFFFD", }, /* 3.3.7 3-byte sequence with last byte missing (U+FFFF) */ { "\xEF\xBF", - "\xEF\xBF", /* bug: not corrected */ + "\xEF\xBF", /* bug: not rejected */ "\\uFFFD", }, /* 3.3.8 4-byte sequence with last byte missing (U+1FFFFF) */ { "\xF7\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.3.9 5-byte sequence with last byte missing (U+3FFFFFF) */ { "\xFB\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.3.10 6-byte sequence with last byte missing (U+7FFFFFFF) */ { "\xFD\xBF\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 3.4 Concatenation of incomplete sequences */ { "\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80" "\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF", - NULL, /* bug: rejected (partly, see FIXME below) */ + NULL, /* bug: accepted partly, see FIXME below */ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD" "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 3.5 Impossible bytes */ { "\xFE", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { "\xFF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { "\xFE\xFE\xFF\xFF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD\\uFFFD\\uFFFD\\uFFFD", }, /* 4 Overlong sequences */ /* 4.1 Overlong '/' */ { "\xC0\xAF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { "\xE0\x80\xAF", - "\xE0\x80\xAF", /* bug: not corrected */ + "\xE0\x80\xAF", /* bug: not rejected */ "\\uFFFD", }, { "\xF0\x80\x80\xAF", - "\xF0\x80\x80\xAF", /* bug: not corrected */ + "\xF0\x80\x80\xAF", /* bug: not rejected */ "\\uFFFD", }, { "\xF8\x80\x80\x80\xAF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { "\xFC\x80\x80\x80\x80\xAF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* @@ -582,13 +573,13 @@ static void utf8_string(void) { /* \U+007F */ "\xC1\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { /* \U+07FF */ "\xE0\x9F\xBF", - "\xE0\x9F\xBF", /* bug: not corrected */ + "\xE0\x9F\xBF", /* bug: not rejected */ "\\uFFFD", }, { @@ -599,50 +590,50 @@ static void utf8_string(void) * also 2.2.3 */ "\xF0\x8F\xBF\xBC", - "\xF0\x8F\xBF\xBC", /* bug: not corrected */ + "\xF0\x8F\xBF\xBC", /* bug: not rejected */ "\\uFFFD", }, { /* \U+1FFFFF */ "\xF8\x87\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { /* \U+3FFFFFF */ "\xFC\x83\xBF\xBF\xBF\xBF", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 4.3 Overlong representation of the NUL character */ { /* \U+0000 */ "\xC0\x80", - NULL, /* bug: rejected */ + NULL, "\\u0000", }, { /* \U+0000 */ "\xE0\x80\x80", - "\xE0\x80\x80", /* bug: not corrected */ + "\xE0\x80\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+0000 */ "\xF0\x80\x80\x80", - "\xF0\x80\x80\x80", /* bug: not corrected */ + "\xF0\x80\x80\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+0000 */ "\xF8\x80\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, { /* \U+0000 */ "\xFC\x80\x80\x80\x80\x80", - NULL, /* bug: rejected */ + NULL, "\\uFFFD", }, /* 5 Illegal code positions */ @@ -650,92 +641,92 @@ static void utf8_string(void) { /* \U+D800 */ "\xED\xA0\x80", - "\xED\xA0\x80", /* bug: not corrected */ + "\xED\xA0\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DB7F */ "\xED\xAD\xBF", - "\xED\xAD\xBF", /* bug: not corrected */ + "\xED\xAD\xBF", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DB80 */ "\xED\xAE\x80", - "\xED\xAE\x80", /* bug: not corrected */ + "\xED\xAE\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DBFF */ "\xED\xAF\xBF", - "\xED\xAF\xBF", /* bug: not corrected */ + "\xED\xAF\xBF", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DC00 */ "\xED\xB0\x80", - "\xED\xB0\x80", /* bug: not corrected */ + "\xED\xB0\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DF80 */ "\xED\xBE\x80", - "\xED\xBE\x80", /* bug: not corrected */ + "\xED\xBE\x80", /* bug: not rejected */ "\\uFFFD", }, { /* \U+DFFF */ "\xED\xBF\xBF", - "\xED\xBF\xBF", /* bug: not corrected */ + "\xED\xBF\xBF", /* bug: not rejected */ "\\uFFFD", }, /* 5.2 Paired UTF-16 surrogates */ { /* \U+D800\U+DC00 */ "\xED\xA0\x80\xED\xB0\x80", - "\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */ + "\xED\xA0\x80\xED\xB0\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+D800\U+DFFF */ "\xED\xA0\x80\xED\xBF\xBF", - "\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */ + "\xED\xA0\x80\xED\xBF\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DC00 */ "\xED\xAD\xBF\xED\xB0\x80", - "\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */ + "\xED\xAD\xBF\xED\xB0\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DB7F\U+DFFF */ "\xED\xAD\xBF\xED\xBF\xBF", - "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */ + "\xED\xAD\xBF\xED\xBF\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DC00 */ "\xED\xAE\x80\xED\xB0\x80", - "\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */ + "\xED\xAE\x80\xED\xB0\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DB80\U+DFFF */ "\xED\xAE\x80\xED\xBF\xBF", - "\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */ + "\xED\xAE\x80\xED\xBF\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DC00 */ "\xED\xAF\xBF\xED\xB0\x80", - "\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */ + "\xED\xAF\xBF\xED\xB0\x80", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, { /* \U+DBFF\U+DFFF */ "\xED\xAF\xBF\xED\xBF\xBF", - "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */ + "\xED\xAF\xBF\xED\xBF\xBF", /* bug: not rejected */ "\\uFFFD\\uFFFD", }, /* 5.3 Other illegal code positions */ @@ -743,25 +734,25 @@ static void utf8_string(void) { /* \U+FFFE */ "\xEF\xBF\xBE", - "\xEF\xBF\xBE", /* bug: not corrected */ + "\xEF\xBF\xBE", /* bug: not rejected */ "\\uFFFD", }, { /* \U+FFFF */ "\xEF\xBF\xBF", - "\xEF\xBF\xBF", /* bug: not corrected */ + "\xEF\xBF\xBF", /* bug: not rejected */ "\\uFFFD", }, { /* U+FDD0 */ "\xEF\xB7\x90", - "\xEF\xB7\x90", /* bug: not corrected */ + "\xEF\xB7\x90", /* bug: not rejected */ "\\uFFFD", }, { /* U+FDEF */ "\xEF\xB7\xAF", - "\xEF\xB7\xAF", /* bug: not corrected */ + "\xEF\xB7\xAF", /* bug: not rejected */ "\\uFFFD", }, /* Plane 1 .. 16 noncharacters */ @@ -783,7 +774,7 @@ static void utf8_string(void) "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF" "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF" "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF", - /* bug: not corrected */ + /* bug: not rejected */ "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF" "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF" "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"