Add files via upload

pull/1854/head
bobqianic 2024-02-14 01:29:33 +00:00 committed by GitHub
parent b7bc969d65
commit 9d3d1d23f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 570 additions and 290 deletions

View File

@ -2,227 +2,489 @@
#include <cassert>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
#include <string>
#include <map>
#include <unordered_map>
#include <algorithm>
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
{0xCE6, 0xCEF}, {0xD66, 0xD6F}, {0xDE6, 0xDEF}, {0xE50, 0xE59}, {0xED0, 0xED9}, {0xF20, 0xF29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x1369, 0x1371}, {0x17E0, 0x17E9}, {0x1810, 0x1819}, {0x1946, 0x194F},
{0x19D0, 0x19DA}, {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1B50, 0x1B59}, {0x1BB0, 0x1BB9}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2460, 0x2468},
{0x2474, 0x247C}, {0x2488, 0x2490}, {0x24EA, 0x24EA}, {0x24F5, 0x24FD}, {0x24FF, 0x24FF}, {0x2776, 0x277E}, {0x2780, 0x2788}, {0x278A, 0x2792}, {0xA620, 0xA629}, {0xA8D0, 0xA8D9}, {0xA900, 0xA909},
{0xA9D0, 0xA9D9}, {0xA9F0, 0xA9F9}, {0xAA50, 0xAA59}, {0xABF0, 0xABF9}, {0xFF10, 0xFF19}, {0x104A0, 0x104A9}, {0x10A40, 0x10A43}, {0x10D30, 0x10D39}, {0x10E60, 0x10E68}, {0x11052, 0x1105A},
{0x11066, 0x1106F}, {0x110F0, 0x110F9}, {0x11136, 0x1113F}, {0x111D0, 0x111D9}, {0x112F0, 0x112F9}, {0x11450, 0x11459}, {0x114D0, 0x114D9}, {0x11650, 0x11659}, {0x116C0, 0x116C9}, {0x11730, 0x11739},
{0x118E0, 0x118E9}, {0x11950, 0x11959}, {0x11C50, 0x11C59}, {0x11D50, 0x11D59}, {0x11DA0, 0x11DA9}, {0x16A60, 0x16A69}, {0x16B50, 0x16B59}, {0x1D7CE, 0x1D7FF}, {0x1E140, 0x1E149}, {0x1E2F0, 0x1E2F9},
{0x1E950, 0x1E959}, {0x1F100, 0x1F10A}, {0x1FBF0, 0x1FBF9},
static const std::vector<std::pair<uint32_t, uint32_t>> number_ranges = {
{0x30,0x39},{0xb2,0xb3},{0xb9,0xb9},{0xbc,0xbe},{0x660,0x669},
{0x6f0,0x6f9},{0x7c0,0x7c9},{0x966,0x96f},{0x9e6,0x9ef},{0x9f4,0x9f9},
{0xa66,0xa6f},{0xae6,0xaef},{0xb66,0xb6f},{0xb72,0xb77},{0xbe6,0xbf2},
{0xc66,0xc6f},{0xc78,0xc7e},{0xce6,0xcef},{0xd58,0xd5e},{0xd66,0xd78},
{0xde6,0xdef},{0xe50,0xe59},{0xed0,0xed9},{0xf20,0xf33},{0x1040,0x1049},
{0x1090,0x1099},{0x1369,0x137c},{0x16ee,0x16f0},{0x17e0,0x17e9},{0x17f0,0x17f9},
{0x1810,0x1819},{0x1946,0x194f},{0x19d0,0x19da},{0x1a80,0x1a89},{0x1a90,0x1a99},
{0x1b50,0x1b59},{0x1bb0,0x1bb9},{0x1c40,0x1c49},{0x1c50,0x1c59},{0x2070,0x2070},
{0x2074,0x2079},{0x2080,0x2089},{0x2150,0x2182},{0x2185,0x2189},{0x2460,0x249b},
{0x24ea,0x24ff},{0x2776,0x2793},{0x2cfd,0x2cfd},{0x3007,0x3007},{0x3021,0x3029},
{0x3038,0x303a},{0x3192,0x3195},{0x3220,0x3229},{0x3248,0x324f},{0x3251,0x325f},
{0x3280,0x3289},{0x32b1,0x32bf},{0xa620,0xa629},{0xa6e6,0xa6ef},{0xa830,0xa835},
{0xa8d0,0xa8d9},{0xa900,0xa909},{0xa9d0,0xa9d9},{0xa9f0,0xa9f9},{0xaa50,0xaa59},
{0xabf0,0xabf9},{0xff10,0xff19},{0x10107,0x10133},{0x10140,0x10178},{0x1018a,0x1018b},
{0x102e1,0x102fb},{0x10320,0x10323},{0x10341,0x10341},{0x1034a,0x1034a},{0x103d1,0x103d5},
{0x104a0,0x104a9},{0x10858,0x1085f},{0x10879,0x1087f},{0x108a7,0x108af},{0x108fb,0x108ff},
{0x10916,0x1091b},{0x109bc,0x109bd},{0x109c0,0x109cf},{0x109d2,0x109ff},{0x10a40,0x10a48},
{0x10a7d,0x10a7e},{0x10a9d,0x10a9f},{0x10aeb,0x10aef},{0x10b58,0x10b5f},{0x10b78,0x10b7f},
{0x10ba9,0x10baf},{0x10cfa,0x10cff},{0x10d30,0x10d39},{0x10e60,0x10e7e},{0x10f1d,0x10f26},
{0x10f51,0x10f54},{0x10fc5,0x10fcb},{0x11052,0x1106f},{0x110f0,0x110f9},{0x11136,0x1113f},
{0x111d0,0x111d9},{0x111e1,0x111f4},{0x112f0,0x112f9},{0x11450,0x11459},{0x114d0,0x114d9},
{0x11650,0x11659},{0x116c0,0x116c9},{0x11730,0x1173b},{0x118e0,0x118f2},{0x11950,0x11959},
{0x11c50,0x11c6c},{0x11d50,0x11d59},{0x11da0,0x11da9},{0x11fc0,0x11fd4},{0x12400,0x1246e},
{0x16a60,0x16a69},{0x16ac0,0x16ac9},{0x16b50,0x16b59},{0x16b5b,0x16b61},{0x16e80,0x16e96},
{0x1d2e0,0x1d2f3},{0x1d360,0x1d378},{0x1d7ce,0x1d7ff},{0x1e140,0x1e149},{0x1e2f0,0x1e2f9},
{0x1e8c7,0x1e8cf},{0x1e950,0x1e959},{0x1ec71,0x1ecab},{0x1ecad,0x1ecaf},{0x1ecb1,0x1ecb4},
{0x1ed01,0x1ed2d},{0x1ed2f,0x1ed3d},{0x1f100,0x1f10c},{0x1f100,0x1f10c}
};
static const std::vector<std::pair<uint32_t, uint32_t>> letter_ranges = {
{0x41, 0x5A}, {0x61, 0x7A}, {0xAA, 0xAA}, {0xB5, 0xB5}, {0xBA, 0xBA}, {0xC0, 0xD6}, {0xD8, 0xF6}, {0xF8, 0x2C1}, {0x2C6, 0x2D1}, {0x2E0, 0x2E4}, {0x2EC, 0x2EC}, {0x2EE, 0x2EE}, {0x370, 0x374},
{0x376, 0x377}, {0x37A, 0x37D}, {0x37F, 0x37F}, {0x386, 0x386}, {0x388, 0x38A}, {0x38C, 0x38C}, {0x38E, 0x3A1}, {0x3A3, 0x3F5}, {0x3F7, 0x481}, {0x48A, 0x52F}, {0x531, 0x556}, {0x559, 0x559},
{0x560, 0x588}, {0x5D0, 0x5EA}, {0x5EF, 0x5F2}, {0x620, 0x64A}, {0x66E, 0x66F}, {0x671, 0x6D3}, {0x6D5, 0x6D5}, {0x6E5, 0x6E6}, {0x6EE, 0x6EF}, {0x6FA, 0x6FC}, {0x6FF, 0x6FF}, {0x710, 0x710},
{0x712, 0x72F}, {0x74D, 0x7A5}, {0x7B1, 0x7B1}, {0x7CA, 0x7EA}, {0x7F4, 0x7F5}, {0x7FA, 0x7FA}, {0x800, 0x815}, {0x81A, 0x81A}, {0x824, 0x824}, {0x828, 0x828}, {0x840, 0x858}, {0x860, 0x86A},
{0x8A0, 0x8B4}, {0x8B6, 0x8C7}, {0x904, 0x939}, {0x93D, 0x93D}, {0x950, 0x950}, {0x958, 0x961}, {0x971, 0x980}, {0x985, 0x98C}, {0x98F, 0x990}, {0x993, 0x9A8}, {0x9AA, 0x9B0}, {0x9B2, 0x9B2},
{0x9B6, 0x9B9}, {0x9BD, 0x9BD}, {0x9CE, 0x9CE}, {0x9DC, 0x9DD}, {0x9DF, 0x9E1}, {0x9F0, 0x9F1}, {0x9FC, 0x9FC}, {0xA05, 0xA0A}, {0xA0F, 0xA10}, {0xA13, 0xA28}, {0xA2A, 0xA30}, {0xA32, 0xA33},
{0xA35, 0xA36}, {0xA38, 0xA39}, {0xA59, 0xA5C}, {0xA5E, 0xA5E}, {0xA72, 0xA74}, {0xA85, 0xA8D}, {0xA8F, 0xA91}, {0xA93, 0xAA8}, {0xAAA, 0xAB0}, {0xAB2, 0xAB3}, {0xAB5, 0xAB9}, {0xABD, 0xABD},
{0xAD0, 0xAD0}, {0xAE0, 0xAE1}, {0xAF9, 0xAF9}, {0xB05, 0xB0C}, {0xB0F, 0xB10}, {0xB13, 0xB28}, {0xB2A, 0xB30}, {0xB32, 0xB33}, {0xB35, 0xB39}, {0xB3D, 0xB3D}, {0xB5C, 0xB5D}, {0xB5F, 0xB61},
{0xB71, 0xB71}, {0xB83, 0xB83}, {0xB85, 0xB8A}, {0xB8E, 0xB90}, {0xB92, 0xB95}, {0xB99, 0xB9A}, {0xB9C, 0xB9C}, {0xB9E, 0xB9F}, {0xBA3, 0xBA4}, {0xBA8, 0xBAA}, {0xBAE, 0xBB9}, {0xBD0, 0xBD0},
{0xC05, 0xC0C}, {0xC0E, 0xC10}, {0xC12, 0xC28}, {0xC2A, 0xC39}, {0xC3D, 0xC3D}, {0xC58, 0xC5A}, {0xC60, 0xC61}, {0xC80, 0xC80}, {0xC85, 0xC8C}, {0xC8E, 0xC90}, {0xC92, 0xCA8}, {0xCAA, 0xCB3},
{0xCB5, 0xCB9}, {0xCBD, 0xCBD}, {0xCDE, 0xCDE}, {0xCE0, 0xCE1}, {0xCF1, 0xCF2}, {0xD04, 0xD0C}, {0xD0E, 0xD10}, {0xD12, 0xD3A}, {0xD3D, 0xD3D}, {0xD4E, 0xD4E}, {0xD54, 0xD56}, {0xD5F, 0xD61},
{0xD7A, 0xD7F}, {0xD85, 0xD96}, {0xD9A, 0xDB1}, {0xDB3, 0xDBB}, {0xDBD, 0xDBD}, {0xDC0, 0xDC6}, {0xE01, 0xE30}, {0xE32, 0xE33}, {0xE40, 0xE46}, {0xE81, 0xE82}, {0xE84, 0xE84}, {0xE86, 0xE8A},
{0xE8C, 0xEA3}, {0xEA5, 0xEA5}, {0xEA7, 0xEB0}, {0xEB2, 0xEB3}, {0xEBD, 0xEBD}, {0xEC0, 0xEC4}, {0xEC6, 0xEC6}, {0xEDC, 0xEDF}, {0xF00, 0xF00}, {0xF40, 0xF47}, {0xF49, 0xF6C}, {0xF88, 0xF8C},
{0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7},
{0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C},
{0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3},
{0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, {0x1880, 0x1884}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
{0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F},
{0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D},
{0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4},
{0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107},
{0x210A, 0x2113}, {0x2115, 0x2115}, {0x2119, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212F, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E},
{0x2183, 0x2184}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F},
{0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2E2F, 0x2E2F}, {0x3005, 0x3006},
{0x3031, 0x3035}, {0x303B, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF},
{0x4E00, 0x9FFC}, {0xA000, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA717, 0xA71F}, {0xA722, 0xA788},
{0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE},
{0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B},
{0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4},
{0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
{0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
{0xFB46, 0xFBB1}, {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFB}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFFBE}, {0xFFC2, 0xFFC7},
{0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x10400, 0x1049D},
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
{0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7},
{0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35},
{0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C},
{0x10F27, 0x10F27}, {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147},
{0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},
{0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
{0x1133D, 0x1133D}, {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE},
{0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909},
{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00},
{0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},
{0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0},
{0x12000, 0x12399}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77},
{0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
{0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
{0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D},
{0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
{0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734},
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
};
static const std::vector<std::pair<uint32_t, uint32_t>> whitespace_ranges = {
{0x9, 0xD}, {0x1C, 0x20}, {0x85, 0x85}, {0xA0, 0xA0}, {0x1680, 0x1680}, {0x2000, 0x200A}, {0x2028, 0x2029}, {0x202F, 0x202F}, {0x205F, 0x205F}, {0x3000, 0x3000},
};
static const std::vector<std::pair<uint32_t, uint32_t>> accent_mark_ranges = {
{0x300, 0x36F}, {0x483, 0x489}, {0x591, 0x5BD}, {0x5BF, 0x5BF}, {0x5C1, 0x5C2}, {0x5C4, 0x5C5}, {0x5C7, 0x5C7}, {0x610, 0x61A}, {0x64B, 0x65F}, {0x670, 0x670}, {0x6D6, 0x6DC}, {0x6DF, 0x6E4},
{0x6E7, 0x6E8}, {0x6EA, 0x6ED}, {0x711, 0x711}, {0x730, 0x74A}, {0x7A6, 0x7B0}, {0x7EB, 0x7F3}, {0x7FD, 0x7FD}, {0x816, 0x819}, {0x81B, 0x823}, {0x825, 0x827}, {0x829, 0x82D}, {0x859, 0x85B},
{0x8D3, 0x8E1}, {0x8E3, 0x903}, {0x93A, 0x93C}, {0x93E, 0x94F}, {0x951, 0x957}, {0x962, 0x963}, {0x981, 0x983}, {0x9BC, 0x9BC}, {0x9BE, 0x9C4}, {0x9C7, 0x9C8}, {0x9CB, 0x9CD}, {0x9D7, 0x9D7},
{0x9E2, 0x9E3}, {0x9FE, 0x9FE}, {0xA01, 0xA03}, {0xA3C, 0xA3C}, {0xA3E, 0xA42}, {0xA47, 0xA48}, {0xA4B, 0xA4D}, {0xA51, 0xA51}, {0xA70, 0xA71}, {0xA75, 0xA75}, {0xA81, 0xA83}, {0xABC, 0xABC},
{0xABE, 0xAC5}, {0xAC7, 0xAC9}, {0xACB, 0xACD}, {0xAE2, 0xAE3}, {0xAFA, 0xAFF}, {0xB01, 0xB03}, {0xB3C, 0xB3C}, {0xB3E, 0xB44}, {0xB47, 0xB48}, {0xB4B, 0xB4D}, {0xB55, 0xB57}, {0xB62, 0xB63},
{0xB82, 0xB82}, {0xBBE, 0xBC2}, {0xBC6, 0xBC8}, {0xBCA, 0xBCD}, {0xBD7, 0xBD7}, {0xC00, 0xC04}, {0xC3E, 0xC44}, {0xC46, 0xC48}, {0xC4A, 0xC4D}, {0xC55, 0xC56}, {0xC62, 0xC63}, {0xC81, 0xC83},
{0xCBC, 0xCBC}, {0xCBE, 0xCC4}, {0xCC6, 0xCC8}, {0xCCA, 0xCCD}, {0xCD5, 0xCD6}, {0xCE2, 0xCE3}, {0xD00, 0xD03}, {0xD3B, 0xD3C}, {0xD3E, 0xD44}, {0xD46, 0xD48}, {0xD4A, 0xD4D}, {0xD57, 0xD57},
{0xD62, 0xD63}, {0xD81, 0xD83}, {0xDCA, 0xDCA}, {0xDCF, 0xDD4}, {0xDD6, 0xDD6}, {0xDD8, 0xDDF}, {0xDF2, 0xDF3}, {0xE31, 0xE31}, {0xE34, 0xE3A}, {0xE47, 0xE4E}, {0xEB1, 0xEB1}, {0xEB4, 0xEBC},
{0xEC8, 0xECD}, {0xF18, 0xF19}, {0xF35, 0xF35}, {0xF37, 0xF37}, {0xF39, 0xF39}, {0xF3E, 0xF3F}, {0xF71, 0xF84}, {0xF86, 0xF87}, {0xF8D, 0xF97}, {0xF99, 0xFBC}, {0xFC6, 0xFC6}, {0x102B, 0x103E},
{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, {0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1734},
{0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E},
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2},
{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D},
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
{0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x11100, 0x11102}, {0x11127, 0x11134},
{0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, {0x111CE, 0x111CF}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x11300, 0x11303},
{0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, {0x1145E, 0x1145E},
{0x114B0, 0x114C3}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x1182C, 0x1183A}, {0x11930, 0x11935}, {0x11937, 0x11938},
{0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
{0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
{0x11D47, 0x11D47}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11EF3, 0x11EF6}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, {0x16F8F, 0x16F92},
{0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36},
{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
{0x1E130, 0x1E136}, {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0xE0100, 0xE01EF},
{0x41,0x5a},{0x61,0x7a},{0xaa,0xaa},{0xb5,0xb5},{0xba,0xba},
{0xc0,0xd6},{0xd8,0xf6},{0xf8,0x2c1},{0x2c6,0x2d1},{0x2e0,0x2e4},
{0x2ec,0x2ec},{0x2ee,0x2ee},{0x370,0x374},{0x376,0x377},{0x37a,0x37d},
{0x37f,0x37f},{0x386,0x386},{0x388,0x38a},{0x38c,0x38c},{0x38e,0x3a1},
{0x3a3,0x3f5},{0x3f7,0x481},{0x48a,0x52f},{0x531,0x556},{0x559,0x559},
{0x560,0x588},{0x5d0,0x5ea},{0x5ef,0x5f2},{0x620,0x64a},{0x66e,0x66f},
{0x671,0x6d3},{0x6d5,0x6d5},{0x6e5,0x6e6},{0x6ee,0x6ef},{0x6fa,0x6fc},
{0x6ff,0x6ff},{0x710,0x710},{0x712,0x72f},{0x74d,0x7a5},{0x7b1,0x7b1},
{0x7ca,0x7ea},{0x7f4,0x7f5},{0x7fa,0x7fa},{0x800,0x815},{0x81a,0x81a},
{0x824,0x824},{0x828,0x828},{0x840,0x858},{0x860,0x86a},{0x870,0x887},
{0x889,0x88e},{0x8a0,0x8c9},{0x904,0x939},{0x93d,0x93d},{0x950,0x950},
{0x958,0x961},{0x971,0x980},{0x985,0x98c},{0x98f,0x990},{0x993,0x9a8},
{0x9aa,0x9b0},{0x9b2,0x9b2},{0x9b6,0x9b9},{0x9bd,0x9bd},{0x9ce,0x9ce},
{0x9dc,0x9dd},{0x9df,0x9e1},{0x9f0,0x9f1},{0x9fc,0x9fc},{0xa05,0xa0a},
{0xa0f,0xa10},{0xa13,0xa28},{0xa2a,0xa30},{0xa32,0xa33},{0xa35,0xa36},
{0xa38,0xa39},{0xa59,0xa5c},{0xa5e,0xa5e},{0xa72,0xa74},{0xa85,0xa8d},
{0xa8f,0xa91},{0xa93,0xaa8},{0xaaa,0xab0},{0xab2,0xab3},{0xab5,0xab9},
{0xabd,0xabd},{0xad0,0xad0},{0xae0,0xae1},{0xaf9,0xaf9},{0xb05,0xb0c},
{0xb0f,0xb10},{0xb13,0xb28},{0xb2a,0xb30},{0xb32,0xb33},{0xb35,0xb39},
{0xb3d,0xb3d},{0xb5c,0xb5d},{0xb5f,0xb61},{0xb71,0xb71},{0xb83,0xb83},
{0xb85,0xb8a},{0xb8e,0xb90},{0xb92,0xb95},{0xb99,0xb9a},{0xb9c,0xb9c},
{0xb9e,0xb9f},{0xba3,0xba4},{0xba8,0xbaa},{0xbae,0xbb9},{0xbd0,0xbd0},
{0xc05,0xc0c},{0xc0e,0xc10},{0xc12,0xc28},{0xc2a,0xc39},{0xc3d,0xc3d},
{0xc58,0xc5a},{0xc5d,0xc5d},{0xc60,0xc61},{0xc80,0xc80},{0xc85,0xc8c},
{0xc8e,0xc90},{0xc92,0xca8},{0xcaa,0xcb3},{0xcb5,0xcb9},{0xcbd,0xcbd},
{0xcdd,0xcde},{0xce0,0xce1},{0xcf1,0xcf2},{0xd04,0xd0c},{0xd0e,0xd10},
{0xd12,0xd3a},{0xd3d,0xd3d},{0xd4e,0xd4e},{0xd54,0xd56},{0xd5f,0xd61},
{0xd7a,0xd7f},{0xd85,0xd96},{0xd9a,0xdb1},{0xdb3,0xdbb},{0xdbd,0xdbd},
{0xdc0,0xdc6},{0xe01,0xe30},{0xe32,0xe33},{0xe40,0xe46},{0xe81,0xe82},
{0xe84,0xe84},{0xe86,0xe8a},{0xe8c,0xea3},{0xea5,0xea5},{0xea7,0xeb0},
{0xeb2,0xeb3},{0xebd,0xebd},{0xec0,0xec4},{0xec6,0xec6},{0xedc,0xedf},
{0xf00,0xf00},{0xf40,0xf47},{0xf49,0xf6c},{0xf88,0xf8c},{0x1000,0x102a},
{0x103f,0x103f},{0x1050,0x1055},{0x105a,0x105d},{0x1061,0x1061},{0x1065,0x1066},
{0x106e,0x1070},{0x1075,0x1081},{0x108e,0x108e},{0x10a0,0x10c5},{0x10c7,0x10c7},
{0x10cd,0x10cd},{0x10d0,0x10fa},{0x10fc,0x1248},{0x124a,0x124d},{0x1250,0x1256},
{0x1258,0x1258},{0x125a,0x125d},{0x1260,0x1288},{0x128a,0x128d},{0x1290,0x12b0},
{0x12b2,0x12b5},{0x12b8,0x12be},{0x12c0,0x12c0},{0x12c2,0x12c5},{0x12c8,0x12d6},
{0x12d8,0x1310},{0x1312,0x1315},{0x1318,0x135a},{0x1380,0x138f},{0x13a0,0x13f5},
{0x13f8,0x13fd},{0x1401,0x166c},{0x166f,0x167f},{0x1681,0x169a},{0x16a0,0x16ea},
{0x16f1,0x16f8},{0x1700,0x1711},{0x171f,0x1731},{0x1740,0x1751},{0x1760,0x176c},
{0x176e,0x1770},{0x1780,0x17b3},{0x17d7,0x17d7},{0x17dc,0x17dc},{0x1820,0x1878},
{0x1880,0x1884},{0x1887,0x18a8},{0x18aa,0x18aa},{0x18b0,0x18f5},{0x1900,0x191e},
{0x1950,0x196d},{0x1970,0x1974},{0x1980,0x19ab},{0x19b0,0x19c9},{0x1a00,0x1a16},
{0x1a20,0x1a54},{0x1aa7,0x1aa7},{0x1b05,0x1b33},{0x1b45,0x1b4c},{0x1b83,0x1ba0},
{0x1bae,0x1baf},{0x1bba,0x1be5},{0x1c00,0x1c23},{0x1c4d,0x1c4f},{0x1c5a,0x1c7d},
{0x1c80,0x1c88},{0x1c90,0x1cba},{0x1cbd,0x1cbf},{0x1ce9,0x1cec},{0x1cee,0x1cf3},
{0x1cf5,0x1cf6},{0x1cfa,0x1cfa},{0x1d00,0x1dbf},{0x1e00,0x1f15},{0x1f18,0x1f1d},
{0x1f20,0x1f45},{0x1f48,0x1f4d},{0x1f50,0x1f57},{0x1f59,0x1f59},{0x1f5b,0x1f5b},
{0x1f5d,0x1f5d},{0x1f5f,0x1f7d},{0x1f80,0x1fb4},{0x1fb6,0x1fbc},{0x1fbe,0x1fbe},
{0x1fc2,0x1fc4},{0x1fc6,0x1fcc},{0x1fd0,0x1fd3},{0x1fd6,0x1fdb},{0x1fe0,0x1fec},
{0x1ff2,0x1ff4},{0x1ff6,0x1ffc},{0x2071,0x2071},{0x207f,0x207f},{0x2090,0x209c},
{0x2102,0x2102},{0x2107,0x2107},{0x210a,0x2113},{0x2115,0x2115},{0x2119,0x211d},
{0x2124,0x2124},{0x2126,0x2126},{0x2128,0x2128},{0x212a,0x212d},{0x212f,0x2139},
{0x213c,0x213f},{0x2145,0x2149},{0x214e,0x214e},{0x2183,0x2184},{0x2c00,0x2ce4},
{0x2ceb,0x2cee},{0x2cf2,0x2cf3},{0x2d00,0x2d25},{0x2d27,0x2d27},{0x2d2d,0x2d2d},
{0x2d30,0x2d67},{0x2d6f,0x2d6f},{0x2d80,0x2d96},{0x2da0,0x2da6},{0x2da8,0x2dae},
{0x2db0,0x2db6},{0x2db8,0x2dbe},{0x2dc0,0x2dc6},{0x2dc8,0x2dce},{0x2dd0,0x2dd6},
{0x2dd8,0x2dde},{0x2e2f,0x2e2f},{0x3005,0x3006},{0x3031,0x3035},{0x303b,0x303c},
{0x3041,0x3096},{0x309d,0x309f},{0x30a1,0x30fa},{0x30fc,0x30ff},{0x3105,0x312f},
{0x3131,0x318e},{0x31a0,0x31bf},{0x31f0,0x31ff},{0x3400,0x4dbf},{0x4e00,0xa48c},
{0xa4d0,0xa4fd},{0xa500,0xa60c},{0xa610,0xa61f},{0xa62a,0xa62b},{0xa640,0xa66e},
{0xa67f,0xa69d},{0xa6a0,0xa6e5},{0xa717,0xa71f},{0xa722,0xa788},{0xa78b,0xa7ca},
{0xa7d0,0xa7d1},{0xa7d3,0xa7d3},{0xa7d5,0xa7d9},{0xa7f2,0xa801},{0xa803,0xa805},
{0xa807,0xa80a},{0xa80c,0xa822},{0xa840,0xa873},{0xa882,0xa8b3},{0xa8f2,0xa8f7},
{0xa8fb,0xa8fb},{0xa8fd,0xa8fe},{0xa90a,0xa925},{0xa930,0xa946},{0xa960,0xa97c},
{0xa984,0xa9b2},{0xa9cf,0xa9cf},{0xa9e0,0xa9e4},{0xa9e6,0xa9ef},{0xa9fa,0xa9fe},
{0xaa00,0xaa28},{0xaa40,0xaa42},{0xaa44,0xaa4b},{0xaa60,0xaa76},{0xaa7a,0xaa7a},
{0xaa7e,0xaaaf},{0xaab1,0xaab1},{0xaab5,0xaab6},{0xaab9,0xaabd},{0xaac0,0xaac0},
{0xaac2,0xaac2},{0xaadb,0xaadd},{0xaae0,0xaaea},{0xaaf2,0xaaf4},{0xab01,0xab06},
{0xab09,0xab0e},{0xab11,0xab16},{0xab20,0xab26},{0xab28,0xab2e},{0xab30,0xab5a},
{0xab5c,0xab69},{0xab70,0xabe2},{0xac00,0xd7a3},{0xd7b0,0xd7c6},{0xd7cb,0xd7fb},
{0xf900,0xfa6d},{0xfa70,0xfad9},{0xfb00,0xfb06},{0xfb13,0xfb17},{0xfb1d,0xfb1d},
{0xfb1f,0xfb28},{0xfb2a,0xfb36},{0xfb38,0xfb3c},{0xfb3e,0xfb3e},{0xfb40,0xfb41},
{0xfb43,0xfb44},{0xfb46,0xfbb1},{0xfbd3,0xfd3d},{0xfd50,0xfd8f},{0xfd92,0xfdc7},
{0xfdf0,0xfdfb},{0xfe70,0xfe74},{0xfe76,0xfefc},{0xff21,0xff3a},{0xff41,0xff5a},
{0xff66,0xffbe},{0xffc2,0xffc7},{0xffca,0xffcf},{0xffd2,0xffd7},{0xffda,0xffdc},
{0x10000,0x1000b},{0x1000d,0x10026},{0x10028,0x1003a},{0x1003c,0x1003d},{0x1003f,0x1004d},
{0x10050,0x1005d},{0x10080,0x100fa},{0x10280,0x1029c},{0x102a0,0x102d0},{0x10300,0x1031f},
{0x1032d,0x10340},{0x10342,0x10349},{0x10350,0x10375},{0x10380,0x1039d},{0x103a0,0x103c3},
{0x103c8,0x103cf},{0x10400,0x1049d},{0x104b0,0x104d3},{0x104d8,0x104fb},{0x10500,0x10527},
{0x10530,0x10563},{0x10570,0x1057a},{0x1057c,0x1058a},{0x1058c,0x10592},{0x10594,0x10595},
{0x10597,0x105a1},{0x105a3,0x105b1},{0x105b3,0x105b9},{0x105bb,0x105bc},{0x10600,0x10736},
{0x10740,0x10755},{0x10760,0x10767},{0x10780,0x10785},{0x10787,0x107b0},{0x107b2,0x107ba},
{0x10800,0x10805},{0x10808,0x10808},{0x1080a,0x10835},{0x10837,0x10838},{0x1083c,0x1083c},
{0x1083f,0x10855},{0x10860,0x10876},{0x10880,0x1089e},{0x108e0,0x108f2},{0x108f4,0x108f5},
{0x10900,0x10915},{0x10920,0x10939},{0x10980,0x109b7},{0x109be,0x109bf},{0x10a00,0x10a00},
{0x10a10,0x10a13},{0x10a15,0x10a17},{0x10a19,0x10a35},{0x10a60,0x10a7c},{0x10a80,0x10a9c},
{0x10ac0,0x10ac7},{0x10ac9,0x10ae4},{0x10b00,0x10b35},{0x10b40,0x10b55},{0x10b60,0x10b72},
{0x10b80,0x10b91},{0x10c00,0x10c48},{0x10c80,0x10cb2},{0x10cc0,0x10cf2},{0x10d00,0x10d23},
{0x10e80,0x10ea9},{0x10eb0,0x10eb1},{0x10f00,0x10f1c},{0x10f27,0x10f27},{0x10f30,0x10f45},
{0x10f70,0x10f81},{0x10fb0,0x10fc4},{0x10fe0,0x10ff6},{0x11003,0x11037},{0x11071,0x11072},
{0x11075,0x11075},{0x11083,0x110af},{0x110d0,0x110e8},{0x11103,0x11126},{0x11144,0x11144},
{0x11147,0x11147},{0x11150,0x11172},{0x11176,0x11176},{0x11183,0x111b2},{0x111c1,0x111c4},
{0x111da,0x111da},{0x111dc,0x111dc},{0x11200,0x11211},{0x11213,0x1122b},{0x11280,0x11286},
{0x11288,0x11288},{0x1128a,0x1128d},{0x1128f,0x1129d},{0x1129f,0x112a8},{0x112b0,0x112de},
{0x11305,0x1130c},{0x1130f,0x11310},{0x11313,0x11328},{0x1132a,0x11330},{0x11332,0x11333},
{0x11335,0x11339},{0x1133d,0x1133d},{0x11350,0x11350},{0x1135d,0x11361},{0x11400,0x11434},
{0x11447,0x1144a},{0x1145f,0x11461},{0x11480,0x114af},{0x114c4,0x114c5},{0x114c7,0x114c7},
{0x11580,0x115ae},{0x115d8,0x115db},{0x11600,0x1162f},{0x11644,0x11644},{0x11680,0x116aa},
{0x116b8,0x116b8},{0x11700,0x1171a},{0x11740,0x11746},{0x11800,0x1182b},{0x118a0,0x118df},
{0x118ff,0x11906},{0x11909,0x11909},{0x1190c,0x11913},{0x11915,0x11916},{0x11918,0x1192f},
{0x1193f,0x1193f},{0x11941,0x11941},{0x119a0,0x119a7},{0x119aa,0x119d0},{0x119e1,0x119e1},
{0x119e3,0x119e3},{0x11a00,0x11a00},{0x11a0b,0x11a32},{0x11a3a,0x11a3a},{0x11a50,0x11a50},
{0x11a5c,0x11a89},{0x11a9d,0x11a9d},{0x11ab0,0x11af8},{0x11c00,0x11c08},{0x11c0a,0x11c2e},
{0x11c40,0x11c40},{0x11c72,0x11c8f},{0x11d00,0x11d06},{0x11d08,0x11d09},{0x11d0b,0x11d30},
{0x11d46,0x11d46},{0x11d60,0x11d65},{0x11d67,0x11d68},{0x11d6a,0x11d89},{0x11d98,0x11d98},
{0x11ee0,0x11ef2},{0x11fb0,0x11fb0},{0x12000,0x12399},{0x12480,0x12543},{0x12f90,0x12ff0},
{0x13000,0x1342e},{0x14400,0x14646},{0x16800,0x16a38},{0x16a40,0x16a5e},{0x16a70,0x16abe},
{0x16ad0,0x16aed},{0x16b00,0x16b2f},{0x16b40,0x16b43},{0x16b63,0x16b77},{0x16b7d,0x16b8f},
{0x16e40,0x16e7f},{0x16f00,0x16f4a},{0x16f50,0x16f50},{0x16f93,0x16f9f},{0x16fe0,0x16fe1},
{0x16fe3,0x16fe3},{0x17000,0x187f7},{0x18800,0x18cd5},{0x18d00,0x18d08},{0x1aff0,0x1aff3},
{0x1aff5,0x1affb},{0x1affd,0x1affe},{0x1b000,0x1b122},{0x1b150,0x1b152},{0x1b164,0x1b167},
{0x1b170,0x1b2fb},{0x1bc00,0x1bc6a},{0x1bc70,0x1bc7c},{0x1bc80,0x1bc88},{0x1bc90,0x1bc99},
{0x1d400,0x1d454},{0x1d456,0x1d49c},{0x1d49e,0x1d49f},{0x1d4a2,0x1d4a2},{0x1d4a5,0x1d4a6},
{0x1d4a9,0x1d4ac},{0x1d4ae,0x1d4b9},{0x1d4bb,0x1d4bb},{0x1d4bd,0x1d4c3},{0x1d4c5,0x1d505},
{0x1d507,0x1d50a},{0x1d50d,0x1d514},{0x1d516,0x1d51c},{0x1d51e,0x1d539},{0x1d53b,0x1d53e},
{0x1d540,0x1d544},{0x1d546,0x1d546},{0x1d54a,0x1d550},{0x1d552,0x1d6a5},{0x1d6a8,0x1d6c0},
{0x1d6c2,0x1d6da},{0x1d6dc,0x1d6fa},{0x1d6fc,0x1d714},{0x1d716,0x1d734},{0x1d736,0x1d74e},
{0x1d750,0x1d76e},{0x1d770,0x1d788},{0x1d78a,0x1d7a8},{0x1d7aa,0x1d7c2},{0x1d7c4,0x1d7cb},
{0x1df00,0x1df1e},{0x1e100,0x1e12c},{0x1e137,0x1e13d},{0x1e14e,0x1e14e},{0x1e290,0x1e2ad},
{0x1e2c0,0x1e2eb},{0x1e7e0,0x1e7e6},{0x1e7e8,0x1e7eb},{0x1e7ed,0x1e7ee},{0x1e7f0,0x1e7fe},
{0x1e800,0x1e8c4},{0x1e900,0x1e943},{0x1e94b,0x1e94b},{0x1ee00,0x1ee03},{0x1ee05,0x1ee1f},
{0x1ee21,0x1ee22},{0x1ee24,0x1ee24},{0x1ee27,0x1ee27},{0x1ee29,0x1ee32},{0x1ee34,0x1ee37},
{0x1ee39,0x1ee39},{0x1ee3b,0x1ee3b},{0x1ee42,0x1ee42},{0x1ee47,0x1ee47},{0x1ee49,0x1ee49},
{0x1ee4b,0x1ee4b},{0x1ee4d,0x1ee4f},{0x1ee51,0x1ee52},{0x1ee54,0x1ee54},{0x1ee57,0x1ee57},
{0x1ee59,0x1ee59},{0x1ee5b,0x1ee5b},{0x1ee5d,0x1ee5d},{0x1ee5f,0x1ee5f},{0x1ee61,0x1ee62},
{0x1ee64,0x1ee64},{0x1ee67,0x1ee6a},{0x1ee6c,0x1ee72},{0x1ee74,0x1ee77},{0x1ee79,0x1ee7c},
{0x1ee7e,0x1ee7e},{0x1ee80,0x1ee89},{0x1ee8b,0x1ee9b},{0x1eea1,0x1eea3},{0x1eea5,0x1eea9},
{0x1eeab,0x1eebb},{0x20000,0x2a6df},{0x2a700,0x2b738},{0x2b740,0x2b81d},{0x2b820,0x2cea1},
{0x2ceb0,0x2ebe0},{0x2f800,0x2fa1d},{0x2f800,0x2fa1d}
};
static const std::vector<std::pair<uint32_t, uint32_t>> punctuation_ranges = {
{0x21, 0x23}, {0x25, 0x2A}, {0x2C, 0x2F}, {0x3A, 0x3B}, {0x3F, 0x40}, {0x5B, 0x5D}, {0x5F, 0x5F}, {0x7B, 0x7B}, {0x7D, 0x7D}, {0xA1, 0xA1}, {0xA7, 0xA7}, {0xAB, 0xAB}, {0xB6, 0xB7}, {0xBB, 0xBB},
{0xBF, 0xBF}, {0x37E, 0x37E}, {0x387, 0x387}, {0x55A, 0x55F}, {0x589, 0x58A}, {0x5BE, 0x5BE}, {0x5C0, 0x5C0}, {0x5C3, 0x5C3}, {0x5C6, 0x5C6}, {0x5F3, 0x5F4}, {0x609, 0x60A}, {0x60C, 0x60D},
{0x61B, 0x61B}, {0x61E, 0x61F}, {0x66A, 0x66D}, {0x6D4, 0x6D4}, {0x700, 0x70D}, {0x7F7, 0x7F9}, {0x830, 0x83E}, {0x85E, 0x85E}, {0x964, 0x965}, {0x970, 0x970}, {0x9FD, 0x9FD}, {0xA76, 0xA76},
{0xAF0, 0xAF0}, {0xC77, 0xC77}, {0xC84, 0xC84}, {0xDF4, 0xDF4}, {0xE4F, 0xE4F}, {0xE5A, 0xE5B}, {0xF04, 0xF12}, {0xF14, 0xF14}, {0xF3A, 0xF3D}, {0xF85, 0xF85}, {0xFD0, 0xFD4}, {0xFD9, 0xFDA},
{0x104A, 0x104F}, {0x10FB, 0x10FB}, {0x1360, 0x1368}, {0x1400, 0x1400}, {0x166E, 0x166E}, {0x169B, 0x169C}, {0x16EB, 0x16ED}, {0x1735, 0x1736}, {0x17D4, 0x17D6}, {0x17D8, 0x17DA}, {0x1800, 0x180A},
{0x1944, 0x1945}, {0x1A1E, 0x1A1F}, {0x1AA0, 0x1AA6}, {0x1AA8, 0x1AAD}, {0x1B5A, 0x1B60}, {0x1BFC, 0x1BFF}, {0x1C3B, 0x1C3F}, {0x1C7E, 0x1C7F}, {0x1CC0, 0x1CC7}, {0x1CD3, 0x1CD3}, {0x2010, 0x2027},
{0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x205E}, {0x207D, 0x207E}, {0x208D, 0x208E}, {0x2308, 0x230B}, {0x2329, 0x232A}, {0x2768, 0x2775}, {0x27C5, 0x27C6}, {0x27E6, 0x27EF}, {0x2983, 0x2998},
{0x29D8, 0x29DB}, {0x29FC, 0x29FD}, {0x2CF9, 0x2CFC}, {0x2CFE, 0x2CFF}, {0x2D70, 0x2D70}, {0x2E00, 0x2E2E}, {0x2E30, 0x2E4F}, {0x2E52, 0x2E52}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301F},
{0x3030, 0x3030}, {0x303D, 0x303D}, {0x30A0, 0x30A0}, {0x30FB, 0x30FB}, {0xA4FE, 0xA4FF}, {0xA60D, 0xA60F}, {0xA673, 0xA673}, {0xA67E, 0xA67E}, {0xA6F2, 0xA6F7}, {0xA874, 0xA877}, {0xA8CE, 0xA8CF},
{0xA8F8, 0xA8FA}, {0xA8FC, 0xA8FC}, {0xA92E, 0xA92F}, {0xA95F, 0xA95F}, {0xA9C1, 0xA9CD}, {0xA9DE, 0xA9DF}, {0xAA5C, 0xAA5F}, {0xAADE, 0xAADF}, {0xAAF0, 0xAAF1}, {0xABEB, 0xABEB}, {0xFD3E, 0xFD3F},
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE61}, {0xFE63, 0xFE63}, {0xFE68, 0xFE68}, {0xFE6A, 0xFE6B}, {0xFF01, 0xFF03}, {0xFF05, 0xFF0A}, {0xFF0C, 0xFF0F}, {0xFF1A, 0xFF1B}, {0xFF1F, 0xFF20},
{0xFF3B, 0xFF3D}, {0xFF3F, 0xFF3F}, {0xFF5B, 0xFF5B}, {0xFF5D, 0xFF5D}, {0xFF5F, 0xFF65}, {0x10100, 0x10102}, {0x1039F, 0x1039F}, {0x103D0, 0x103D0}, {0x1056F, 0x1056F}, {0x10857, 0x10857},
{0x1091F, 0x1091F}, {0x1093F, 0x1093F}, {0x10A50, 0x10A58}, {0x10A7F, 0x10A7F}, {0x10AF0, 0x10AF6}, {0x10B39, 0x10B3F}, {0x10B99, 0x10B9C}, {0x10EAD, 0x10EAD}, {0x10F55, 0x10F59}, {0x11047, 0x1104D},
{0x110BB, 0x110BC}, {0x110BE, 0x110C1}, {0x11140, 0x11143}, {0x11174, 0x11175}, {0x111C5, 0x111C8}, {0x111CD, 0x111CD}, {0x111DB, 0x111DB}, {0x111DD, 0x111DF}, {0x11238, 0x1123D}, {0x112A9, 0x112A9},
{0x1144B, 0x1144F}, {0x1145A, 0x1145B}, {0x1145D, 0x1145D}, {0x114C6, 0x114C6}, {0x115C1, 0x115D7}, {0x11641, 0x11643}, {0x11660, 0x1166C}, {0x1173C, 0x1173E}, {0x1183B, 0x1183B}, {0x11944, 0x11946},
{0x119E2, 0x119E2}, {0x11A3F, 0x11A46}, {0x11A9A, 0x11A9C}, {0x11A9E, 0x11AA2}, {0x11C41, 0x11C45}, {0x11C70, 0x11C71}, {0x11EF7, 0x11EF8}, {0x11FFF, 0x11FFF}, {0x12470, 0x12474}, {0x16A6E, 0x16A6F},
{0x16AF5, 0x16AF5}, {0x16B37, 0x16B3B}, {0x16B44, 0x16B44}, {0x16E97, 0x16E9A}, {0x16FE2, 0x16FE2}, {0x1BC9F, 0x1BC9F}, {0x1DA87, 0x1DA8B}, {0x1E95E, 0x1E95F},
{0x21,0x23},{0x25,0x2a},{0x2c,0x2f},{0x3a,0x3b},{0x3f,0x40},
{0x5b,0x5d},{0x5f,0x5f},{0x7b,0x7b},{0x7d,0x7d},{0xa1,0xa1},
{0xa7,0xa7},{0xab,0xab},{0xb6,0xb7},{0xbb,0xbb},{0xbf,0xbf},
{0x37e,0x37e},{0x387,0x387},{0x55a,0x55f},{0x589,0x58a},{0x5be,0x5be},
{0x5c0,0x5c0},{0x5c3,0x5c3},{0x5c6,0x5c6},{0x5f3,0x5f4},{0x609,0x60a},
{0x60c,0x60d},{0x61b,0x61b},{0x61d,0x61f},{0x66a,0x66d},{0x6d4,0x6d4},
{0x700,0x70d},{0x7f7,0x7f9},{0x830,0x83e},{0x85e,0x85e},{0x964,0x965},
{0x970,0x970},{0x9fd,0x9fd},{0xa76,0xa76},{0xaf0,0xaf0},{0xc77,0xc77},
{0xc84,0xc84},{0xdf4,0xdf4},{0xe4f,0xe4f},{0xe5a,0xe5b},{0xf04,0xf12},
{0xf14,0xf14},{0xf3a,0xf3d},{0xf85,0xf85},{0xfd0,0xfd4},{0xfd9,0xfda},
{0x104a,0x104f},{0x10fb,0x10fb},{0x1360,0x1368},{0x1400,0x1400},{0x166e,0x166e},
{0x169b,0x169c},{0x16eb,0x16ed},{0x1735,0x1736},{0x17d4,0x17d6},{0x17d8,0x17da},
{0x1800,0x180a},{0x1944,0x1945},{0x1a1e,0x1a1f},{0x1aa0,0x1aa6},{0x1aa8,0x1aad},
{0x1b5a,0x1b60},{0x1b7d,0x1b7e},{0x1bfc,0x1bff},{0x1c3b,0x1c3f},{0x1c7e,0x1c7f},
{0x1cc0,0x1cc7},{0x1cd3,0x1cd3},{0x2010,0x2027},{0x2030,0x2043},{0x2045,0x2051},
{0x2053,0x205e},{0x207d,0x207e},{0x208d,0x208e},{0x2308,0x230b},{0x2329,0x232a},
{0x2768,0x2775},{0x27c5,0x27c6},{0x27e6,0x27ef},{0x2983,0x2998},{0x29d8,0x29db},
{0x29fc,0x29fd},{0x2cf9,0x2cfc},{0x2cfe,0x2cff},{0x2d70,0x2d70},{0x2e00,0x2e2e},
{0x2e30,0x2e4f},{0x2e52,0x2e5d},{0x3001,0x3003},{0x3008,0x3011},{0x3014,0x301f},
{0x3030,0x3030},{0x303d,0x303d},{0x30a0,0x30a0},{0x30fb,0x30fb},{0xa4fe,0xa4ff},
{0xa60d,0xa60f},{0xa673,0xa673},{0xa67e,0xa67e},{0xa6f2,0xa6f7},{0xa874,0xa877},
{0xa8ce,0xa8cf},{0xa8f8,0xa8fa},{0xa8fc,0xa8fc},{0xa92e,0xa92f},{0xa95f,0xa95f},
{0xa9c1,0xa9cd},{0xa9de,0xa9df},{0xaa5c,0xaa5f},{0xaade,0xaadf},{0xaaf0,0xaaf1},
{0xabeb,0xabeb},{0xfd3e,0xfd3f},{0xfe10,0xfe19},{0xfe30,0xfe52},{0xfe54,0xfe61},
{0xfe63,0xfe63},{0xfe68,0xfe68},{0xfe6a,0xfe6b},{0xff01,0xff03},{0xff05,0xff0a},
{0xff0c,0xff0f},{0xff1a,0xff1b},{0xff1f,0xff20},{0xff3b,0xff3d},{0xff3f,0xff3f},
{0xff5b,0xff5b},{0xff5d,0xff5d},{0xff5f,0xff65},{0x10100,0x10102},{0x1039f,0x1039f},
{0x103d0,0x103d0},{0x1056f,0x1056f},{0x10857,0x10857},{0x1091f,0x1091f},{0x1093f,0x1093f},
{0x10a50,0x10a58},{0x10a7f,0x10a7f},{0x10af0,0x10af6},{0x10b39,0x10b3f},{0x10b99,0x10b9c},
{0x10ead,0x10ead},{0x10f55,0x10f59},{0x10f86,0x10f89},{0x11047,0x1104d},{0x110bb,0x110bc},
{0x110be,0x110c1},{0x11140,0x11143},{0x11174,0x11175},{0x111c5,0x111c8},{0x111cd,0x111cd},
{0x111db,0x111db},{0x111dd,0x111df},{0x11238,0x1123d},{0x112a9,0x112a9},{0x1144b,0x1144f},
{0x1145a,0x1145b},{0x1145d,0x1145d},{0x114c6,0x114c6},{0x115c1,0x115d7},{0x11641,0x11643},
{0x11660,0x1166c},{0x116b9,0x116b9},{0x1173c,0x1173e},{0x1183b,0x1183b},{0x11944,0x11946},
{0x119e2,0x119e2},{0x11a3f,0x11a46},{0x11a9a,0x11a9c},{0x11a9e,0x11aa2},{0x11c41,0x11c45},
{0x11c70,0x11c71},{0x11ef7,0x11ef8},{0x11fff,0x11fff},{0x12470,0x12474},{0x12ff1,0x12ff2},
{0x16a6e,0x16a6f},{0x16af5,0x16af5},{0x16b37,0x16b3b},{0x16b44,0x16b44},{0x16e97,0x16e9a},
{0x16fe2,0x16fe2},{0x1bc9f,0x1bc9f},{0x1da87,0x1da8b},{0x1da87,0x1da8b}
};
static const std::vector<std::pair<uint32_t, uint32_t>> separator_ranges = {
{0x20,0x20},{0xa0,0xa0},{0x1680,0x1680},{0x2000,0x200a},{0x2028,0x2029},
{0x202f,0x202f},{0x205f,0x205f},{0x205f,0x205f}
};
static const std::vector<std::pair<uint32_t, uint32_t>> mark_ranges = {
{0x300,0x36f},{0x483,0x489},{0x591,0x5bd},{0x5bf,0x5bf},{0x5c1,0x5c2},
{0x5c4,0x5c5},{0x5c7,0x5c7},{0x610,0x61a},{0x64b,0x65f},{0x670,0x670},
{0x6d6,0x6dc},{0x6df,0x6e4},{0x6e7,0x6e8},{0x6ea,0x6ed},{0x711,0x711},
{0x730,0x74a},{0x7a6,0x7b0},{0x7eb,0x7f3},{0x7fd,0x7fd},{0x816,0x819},
{0x81b,0x823},{0x825,0x827},{0x829,0x82d},{0x859,0x85b},{0x898,0x89f},
{0x8ca,0x8e1},{0x8e3,0x903},{0x93a,0x93c},{0x93e,0x94f},{0x951,0x957},
{0x962,0x963},{0x981,0x983},{0x9bc,0x9bc},{0x9be,0x9c4},{0x9c7,0x9c8},
{0x9cb,0x9cd},{0x9d7,0x9d7},{0x9e2,0x9e3},{0x9fe,0x9fe},{0xa01,0xa03},
{0xa3c,0xa3c},{0xa3e,0xa42},{0xa47,0xa48},{0xa4b,0xa4d},{0xa51,0xa51},
{0xa70,0xa71},{0xa75,0xa75},{0xa81,0xa83},{0xabc,0xabc},{0xabe,0xac5},
{0xac7,0xac9},{0xacb,0xacd},{0xae2,0xae3},{0xafa,0xaff},{0xb01,0xb03},
{0xb3c,0xb3c},{0xb3e,0xb44},{0xb47,0xb48},{0xb4b,0xb4d},{0xb55,0xb57},
{0xb62,0xb63},{0xb82,0xb82},{0xbbe,0xbc2},{0xbc6,0xbc8},{0xbca,0xbcd},
{0xbd7,0xbd7},{0xc00,0xc04},{0xc3c,0xc3c},{0xc3e,0xc44},{0xc46,0xc48},
{0xc4a,0xc4d},{0xc55,0xc56},{0xc62,0xc63},{0xc81,0xc83},{0xcbc,0xcbc},
{0xcbe,0xcc4},{0xcc6,0xcc8},{0xcca,0xccd},{0xcd5,0xcd6},{0xce2,0xce3},
{0xd00,0xd03},{0xd3b,0xd3c},{0xd3e,0xd44},{0xd46,0xd48},{0xd4a,0xd4d},
{0xd57,0xd57},{0xd62,0xd63},{0xd81,0xd83},{0xdca,0xdca},{0xdcf,0xdd4},
{0xdd6,0xdd6},{0xdd8,0xddf},{0xdf2,0xdf3},{0xe31,0xe31},{0xe34,0xe3a},
{0xe47,0xe4e},{0xeb1,0xeb1},{0xeb4,0xebc},{0xec8,0xecd},{0xf18,0xf19},
{0xf35,0xf35},{0xf37,0xf37},{0xf39,0xf39},{0xf3e,0xf3f},{0xf71,0xf84},
{0xf86,0xf87},{0xf8d,0xf97},{0xf99,0xfbc},{0xfc6,0xfc6},{0x102b,0x103e},
{0x1056,0x1059},{0x105e,0x1060},{0x1062,0x1064},{0x1067,0x106d},{0x1071,0x1074},
{0x1082,0x108d},{0x108f,0x108f},{0x109a,0x109d},{0x135d,0x135f},{0x1712,0x1715},
{0x1732,0x1734},{0x1752,0x1753},{0x1772,0x1773},{0x17b4,0x17d3},{0x17dd,0x17dd},
{0x180b,0x180d},{0x180f,0x180f},{0x1885,0x1886},{0x18a9,0x18a9},{0x1920,0x192b},
{0x1930,0x193b},{0x1a17,0x1a1b},{0x1a55,0x1a5e},{0x1a60,0x1a7c},{0x1a7f,0x1a7f},
{0x1ab0,0x1ace},{0x1b00,0x1b04},{0x1b34,0x1b44},{0x1b6b,0x1b73},{0x1b80,0x1b82},
{0x1ba1,0x1bad},{0x1be6,0x1bf3},{0x1c24,0x1c37},{0x1cd0,0x1cd2},{0x1cd4,0x1ce8},
{0x1ced,0x1ced},{0x1cf4,0x1cf4},{0x1cf7,0x1cf9},{0x1dc0,0x1dff},{0x20d0,0x20f0},
{0x2cef,0x2cf1},{0x2d7f,0x2d7f},{0x2de0,0x2dff},{0x302a,0x302f},{0x3099,0x309a},
{0xa66f,0xa672},{0xa674,0xa67d},{0xa69e,0xa69f},{0xa6f0,0xa6f1},{0xa802,0xa802},
{0xa806,0xa806},{0xa80b,0xa80b},{0xa823,0xa827},{0xa82c,0xa82c},{0xa880,0xa881},
{0xa8b4,0xa8c5},{0xa8e0,0xa8f1},{0xa8ff,0xa8ff},{0xa926,0xa92d},{0xa947,0xa953},
{0xa980,0xa983},{0xa9b3,0xa9c0},{0xa9e5,0xa9e5},{0xaa29,0xaa36},{0xaa43,0xaa43},
{0xaa4c,0xaa4d},{0xaa7b,0xaa7d},{0xaab0,0xaab0},{0xaab2,0xaab4},{0xaab7,0xaab8},
{0xaabe,0xaabf},{0xaac1,0xaac1},{0xaaeb,0xaaef},{0xaaf5,0xaaf6},{0xabe3,0xabea},
{0xabec,0xabed},{0xfb1e,0xfb1e},{0xfe00,0xfe0f},{0xfe20,0xfe2f},{0x101fd,0x101fd},
{0x102e0,0x102e0},{0x10376,0x1037a},{0x10a01,0x10a03},{0x10a05,0x10a06},{0x10a0c,0x10a0f},
{0x10a38,0x10a3a},{0x10a3f,0x10a3f},{0x10ae5,0x10ae6},{0x10d24,0x10d27},{0x10eab,0x10eac},
{0x10f46,0x10f50},{0x10f82,0x10f85},{0x11000,0x11002},{0x11038,0x11046},{0x11070,0x11070},
{0x11073,0x11074},{0x1107f,0x11082},{0x110b0,0x110ba},{0x110c2,0x110c2},{0x11100,0x11102},
{0x11127,0x11134},{0x11145,0x11146},{0x11173,0x11173},{0x11180,0x11182},{0x111b3,0x111c0},
{0x111c9,0x111cc},{0x111ce,0x111cf},{0x1122c,0x11237},{0x1123e,0x1123e},{0x112df,0x112ea},
{0x11300,0x11303},{0x1133b,0x1133c},{0x1133e,0x11344},{0x11347,0x11348},{0x1134b,0x1134d},
{0x11357,0x11357},{0x11362,0x11363},{0x11366,0x1136c},{0x11370,0x11374},{0x11435,0x11446},
{0x1145e,0x1145e},{0x114b0,0x114c3},{0x115af,0x115b5},{0x115b8,0x115c0},{0x115dc,0x115dd},
{0x11630,0x11640},{0x116ab,0x116b7},{0x1171d,0x1172b},{0x1182c,0x1183a},{0x11930,0x11935},
{0x11937,0x11938},{0x1193b,0x1193e},{0x11940,0x11940},{0x11942,0x11943},{0x119d1,0x119d7},
{0x119da,0x119e0},{0x119e4,0x119e4},{0x11a01,0x11a0a},{0x11a33,0x11a39},{0x11a3b,0x11a3e},
{0x11a47,0x11a47},{0x11a51,0x11a5b},{0x11a8a,0x11a99},{0x11c2f,0x11c36},{0x11c38,0x11c3f},
{0x11c92,0x11ca7},{0x11ca9,0x11cb6},{0x11d31,0x11d36},{0x11d3a,0x11d3a},{0x11d3c,0x11d3d},
{0x11d3f,0x11d45},{0x11d47,0x11d47},{0x11d8a,0x11d8e},{0x11d90,0x11d91},{0x11d93,0x11d97},
{0x11ef3,0x11ef6},{0x16af0,0x16af4},{0x16b30,0x16b36},{0x16f4f,0x16f4f},{0x16f51,0x16f87},
{0x16f8f,0x16f92},{0x16fe4,0x16fe4},{0x16ff0,0x16ff1},{0x1bc9d,0x1bc9e},{0x1cf00,0x1cf2d},
{0x1cf30,0x1cf46},{0x1d165,0x1d169},{0x1d16d,0x1d172},{0x1d17b,0x1d182},{0x1d185,0x1d18b},
{0x1d1aa,0x1d1ad},{0x1d242,0x1d244},{0x1da00,0x1da36},{0x1da3b,0x1da6c},{0x1da75,0x1da75},
{0x1da84,0x1da84},{0x1da9b,0x1da9f},{0x1daa1,0x1daaf},{0x1e000,0x1e006},{0x1e008,0x1e018},
{0x1e01b,0x1e021},{0x1e023,0x1e024},{0x1e026,0x1e02a},{0x1e130,0x1e136},{0x1e2ae,0x1e2ae},
{0x1e2ec,0x1e2ef},{0x1e8d0,0x1e8d6},{0x1e944,0x1e94a},{0x1e944,0x1e94a}
};
static const std::vector<std::pair<uint32_t, uint32_t>> symbol_ranges = {
{0x24, 0x24}, {0x2B, 0x2B}, {0x3C, 0x3E}, {0x5E, 0x5E}, {0x60, 0x60}, {0x7C, 0x7C}, {0x7E, 0x7E}, {0xA2, 0xA6}, {0xA8, 0xA9}, {0xAC, 0xAC}, {0xAE, 0xB1}, {0xB4, 0xB4}, {0xB8, 0xB8}, {0xD7, 0xD7},
{0xF7, 0xF7}, {0x2C2, 0x2C5}, {0x2D2, 0x2DF}, {0x2E5, 0x2EB}, {0x2ED, 0x2ED}, {0x2EF, 0x2FF}, {0x375, 0x375}, {0x384, 0x385}, {0x3F6, 0x3F6}, {0x482, 0x482}, {0x58D, 0x58F}, {0x606, 0x608},
{0x60B, 0x60B}, {0x60E, 0x60F}, {0x6DE, 0x6DE}, {0x6E9, 0x6E9}, {0x6FD, 0x6FE}, {0x7F6, 0x7F6}, {0x7FE, 0x7FF}, {0x9F2, 0x9F3}, {0x9FA, 0x9FB}, {0xAF1, 0xAF1}, {0xB70, 0xB70}, {0xBF3, 0xBFA},
{0xC7F, 0xC7F}, {0xD4F, 0xD4F}, {0xD79, 0xD79}, {0xE3F, 0xE3F}, {0xF01, 0xF03}, {0xF13, 0xF13}, {0xF15, 0xF17}, {0xF1A, 0xF1F}, {0xF34, 0xF34}, {0xF36, 0xF36}, {0xF38, 0xF38}, {0xFBE, 0xFC5},
{0xFC7, 0xFCC}, {0xFCE, 0xFCF}, {0xFD5, 0xFD8}, {0x109E, 0x109F}, {0x1390, 0x1399}, {0x166D, 0x166D}, {0x17DB, 0x17DB}, {0x1940, 0x1940}, {0x19DE, 0x19FF}, {0x1B61, 0x1B6A}, {0x1B74, 0x1B7C},
{0x1FBD, 0x1FBD}, {0x1FBF, 0x1FC1}, {0x1FCD, 0x1FCF}, {0x1FDD, 0x1FDF}, {0x1FED, 0x1FEF}, {0x1FFD, 0x1FFE}, {0x2044, 0x2044}, {0x2052, 0x2052}, {0x207A, 0x207C}, {0x208A, 0x208C}, {0x20A0, 0x20BF},
{0x2100, 0x2101}, {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x213A, 0x213B},
{0x2140, 0x2144}, {0x214A, 0x214D}, {0x214F, 0x214F}, {0x218A, 0x218B}, {0x2190, 0x2307}, {0x230C, 0x2328}, {0x232B, 0x2426}, {0x2440, 0x244A}, {0x249C, 0x24E9}, {0x2500, 0x2767}, {0x2794, 0x27C4},
{0x27C7, 0x27E5}, {0x27F0, 0x2982}, {0x2999, 0x29D7}, {0x29DC, 0x29FB}, {0x29FE, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2BFF}, {0x2CE5, 0x2CEA}, {0x2E50, 0x2E51}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303E, 0x303F}, {0x309B, 0x309C}, {0x3190, 0x3191}, {0x3196, 0x319F}, {0x31C0, 0x31E3},
{0x3200, 0x321E}, {0x322A, 0x3247}, {0x3250, 0x3250}, {0x3260, 0x327F}, {0x328A, 0x32B0}, {0x32C0, 0x33FF}, {0x4DC0, 0x4DFF}, {0xA490, 0xA4C6}, {0xA700, 0xA716}, {0xA720, 0xA721}, {0xA789, 0xA78A},
{0xA828, 0xA82B}, {0xA836, 0xA839}, {0xAA77, 0xAA79}, {0xAB5B, 0xAB5B}, {0xAB6A, 0xAB6B}, {0xFB29, 0xFB29}, {0xFBB2, 0xFBC1}, {0xFDFC, 0xFDFD}, {0xFE62, 0xFE62}, {0xFE64, 0xFE66}, {0xFE69, 0xFE69},
{0xFF04, 0xFF04}, {0xFF0B, 0xFF0B}, {0xFF1C, 0xFF1E}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF40}, {0xFF5C, 0xFF5C}, {0xFF5E, 0xFF5E}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, {0x10137, 0x1013F},
{0x10179, 0x10189}, {0x1018C, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FC}, {0x10877, 0x10878}, {0x10AC8, 0x10AC8}, {0x1173F, 0x1173F}, {0x11FD5, 0x11FF1}, {0x16B3C, 0x16B3F},
{0x16B45, 0x16B45}, {0x1BC9C, 0x1BC9C}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D164}, {0x1D16A, 0x1D16C}, {0x1D183, 0x1D184}, {0x1D18C, 0x1D1A9}, {0x1D1AE, 0x1D1E8}, {0x1D200, 0x1D241},
{0x1D245, 0x1D245}, {0x1D300, 0x1D356}, {0x1D6C1, 0x1D6C1}, {0x1D6DB, 0x1D6DB}, {0x1D6FB, 0x1D6FB}, {0x1D715, 0x1D715}, {0x1D735, 0x1D735}, {0x1D74F, 0x1D74F}, {0x1D76F, 0x1D76F}, {0x1D789, 0x1D789},
{0x1D7A9, 0x1D7A9}, {0x1D7C3, 0x1D7C3}, {0x1D800, 0x1D9FF}, {0x1DA37, 0x1DA3A}, {0x1DA6D, 0x1DA74}, {0x1DA76, 0x1DA83}, {0x1DA85, 0x1DA86}, {0x1E14F, 0x1E14F}, {0x1E2FF, 0x1E2FF}, {0x1ECAC, 0x1ECAC},
{0x1ECB0, 0x1ECB0}, {0x1ED2E, 0x1ED2E}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F10D, 0x1F1AD},
{0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6E0, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
{0x1F7E0, 0x1F7EB}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F978}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1FA53},
{0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA},
{0x24,0x24},{0x2b,0x2b},{0x3c,0x3e},{0x5e,0x5e},{0x60,0x60},
{0x7c,0x7c},{0x7e,0x7e},{0xa2,0xa6},{0xa8,0xa9},{0xac,0xac},
{0xae,0xb1},{0xb4,0xb4},{0xb8,0xb8},{0xd7,0xd7},{0xf7,0xf7},
{0x2c2,0x2c5},{0x2d2,0x2df},{0x2e5,0x2eb},{0x2ed,0x2ed},{0x2ef,0x2ff},
{0x375,0x375},{0x384,0x385},{0x3f6,0x3f6},{0x482,0x482},{0x58d,0x58f},
{0x606,0x608},{0x60b,0x60b},{0x60e,0x60f},{0x6de,0x6de},{0x6e9,0x6e9},
{0x6fd,0x6fe},{0x7f6,0x7f6},{0x7fe,0x7ff},{0x888,0x888},{0x9f2,0x9f3},
{0x9fa,0x9fb},{0xaf1,0xaf1},{0xb70,0xb70},{0xbf3,0xbfa},{0xc7f,0xc7f},
{0xd4f,0xd4f},{0xd79,0xd79},{0xe3f,0xe3f},{0xf01,0xf03},{0xf13,0xf13},
{0xf15,0xf17},{0xf1a,0xf1f},{0xf34,0xf34},{0xf36,0xf36},{0xf38,0xf38},
{0xfbe,0xfc5},{0xfc7,0xfcc},{0xfce,0xfcf},{0xfd5,0xfd8},{0x109e,0x109f},
{0x1390,0x1399},{0x166d,0x166d},{0x17db,0x17db},{0x1940,0x1940},{0x19de,0x19ff},
{0x1b61,0x1b6a},{0x1b74,0x1b7c},{0x1fbd,0x1fbd},{0x1fbf,0x1fc1},{0x1fcd,0x1fcf},
{0x1fdd,0x1fdf},{0x1fed,0x1fef},{0x1ffd,0x1ffe},{0x2044,0x2044},{0x2052,0x2052},
{0x207a,0x207c},{0x208a,0x208c},{0x20a0,0x20c0},{0x2100,0x2101},{0x2103,0x2106},
{0x2108,0x2109},{0x2114,0x2114},{0x2116,0x2118},{0x211e,0x2123},{0x2125,0x2125},
{0x2127,0x2127},{0x2129,0x2129},{0x212e,0x212e},{0x213a,0x213b},{0x2140,0x2144},
{0x214a,0x214d},{0x214f,0x214f},{0x218a,0x218b},{0x2190,0x2307},{0x230c,0x2328},
{0x232b,0x2426},{0x2440,0x244a},{0x249c,0x24e9},{0x2500,0x2767},{0x2794,0x27c4},
{0x27c7,0x27e5},{0x27f0,0x2982},{0x2999,0x29d7},{0x29dc,0x29fb},{0x29fe,0x2b73},
{0x2b76,0x2b95},{0x2b97,0x2bff},{0x2ce5,0x2cea},{0x2e50,0x2e51},{0x2e80,0x2e99},
{0x2e9b,0x2ef3},{0x2f00,0x2fd5},{0x2ff0,0x2ffb},{0x3004,0x3004},{0x3012,0x3013},
{0x3020,0x3020},{0x3036,0x3037},{0x303e,0x303f},{0x309b,0x309c},{0x3190,0x3191},
{0x3196,0x319f},{0x31c0,0x31e3},{0x3200,0x321e},{0x322a,0x3247},{0x3250,0x3250},
{0x3260,0x327f},{0x328a,0x32b0},{0x32c0,0x33ff},{0x4dc0,0x4dff},{0xa490,0xa4c6},
{0xa700,0xa716},{0xa720,0xa721},{0xa789,0xa78a},{0xa828,0xa82b},{0xa836,0xa839},
{0xaa77,0xaa79},{0xab5b,0xab5b},{0xab6a,0xab6b},{0xfb29,0xfb29},{0xfbb2,0xfbc2},
{0xfd40,0xfd4f},{0xfdcf,0xfdcf},{0xfdfc,0xfdff},{0xfe62,0xfe62},{0xfe64,0xfe66},
{0xfe69,0xfe69},{0xff04,0xff04},{0xff0b,0xff0b},{0xff1c,0xff1e},{0xff3e,0xff3e},
{0xff40,0xff40},{0xff5c,0xff5c},{0xff5e,0xff5e},{0xffe0,0xffe6},{0xffe8,0xffee},
{0xfffc,0xfffd},{0x10137,0x1013f},{0x10179,0x10189},{0x1018c,0x1018e},{0x10190,0x1019c},
{0x101a0,0x101a0},{0x101d0,0x101fc},{0x10877,0x10878},{0x10ac8,0x10ac8},{0x1173f,0x1173f},
{0x11fd5,0x11ff1},{0x16b3c,0x16b3f},{0x16b45,0x16b45},{0x1bc9c,0x1bc9c},{0x1cf50,0x1cfc3},
{0x1d000,0x1d0f5},{0x1d100,0x1d126},{0x1d129,0x1d164},{0x1d16a,0x1d16c},{0x1d183,0x1d184},
{0x1d18c,0x1d1a9},{0x1d1ae,0x1d1ea},{0x1d200,0x1d241},{0x1d245,0x1d245},{0x1d300,0x1d356},
{0x1d6c1,0x1d6c1},{0x1d6db,0x1d6db},{0x1d6fb,0x1d6fb},{0x1d715,0x1d715},{0x1d735,0x1d735},
{0x1d74f,0x1d74f},{0x1d76f,0x1d76f},{0x1d789,0x1d789},{0x1d7a9,0x1d7a9},{0x1d7c3,0x1d7c3},
{0x1d800,0x1d9ff},{0x1da37,0x1da3a},{0x1da6d,0x1da74},{0x1da76,0x1da83},{0x1da85,0x1da86},
{0x1e14f,0x1e14f},{0x1e2ff,0x1e2ff},{0x1ecac,0x1ecac},{0x1ecb0,0x1ecb0},{0x1ed2e,0x1ed2e},
{0x1eef0,0x1eef1},{0x1f000,0x1f02b},{0x1f030,0x1f093},{0x1f0a0,0x1f0ae},{0x1f0b1,0x1f0bf},
{0x1f0c1,0x1f0cf},{0x1f0d1,0x1f0f5},{0x1f10d,0x1f1ad},{0x1f1e6,0x1f202},{0x1f210,0x1f23b},
{0x1f240,0x1f248},{0x1f250,0x1f251},{0x1f260,0x1f265},{0x1f300,0x1f6d7},{0x1f6dd,0x1f6ec},
{0x1f6f0,0x1f6fc},{0x1f700,0x1f773},{0x1f780,0x1f7d8},{0x1f7e0,0x1f7eb},{0x1f7f0,0x1f7f0},
{0x1f800,0x1f80b},{0x1f810,0x1f847},{0x1f850,0x1f859},{0x1f860,0x1f887},{0x1f890,0x1f8ad},
{0x1f8b0,0x1f8b1},{0x1f900,0x1fa53},{0x1fa60,0x1fa6d},{0x1fa70,0x1fa74},{0x1fa78,0x1fa7c},
{0x1fa80,0x1fa86},{0x1fa90,0x1faac},{0x1fab0,0x1faba},{0x1fac0,0x1fac5},{0x1fad0,0x1fad9},
{0x1fae0,0x1fae7},{0x1faf0,0x1faf6},{0x1fb00,0x1fb92},{0x1fb00,0x1fb92}
};
static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
{0x0, 0x8}, {0xE, 0x1B}, {0x7F, 0x84}, {0x86, 0x9F}, {0xAD, 0xAD}, {0x378, 0x379}, {0x380, 0x383}, {0x38B, 0x38B}, {0x38D, 0x38D}, {0x3A2, 0x3A2}, {0x530, 0x530}, {0x557, 0x558}, {0x58B, 0x58C},
{0x590, 0x590}, {0x5C8, 0x5CF}, {0x5EB, 0x5EE}, {0x5F5, 0x605}, {0x61C, 0x61D}, {0x6DD, 0x6DD}, {0x70E, 0x70F}, {0x74B, 0x74C}, {0x7B2, 0x7BF}, {0x7FB, 0x7FC}, {0x82E, 0x82F}, {0x83F, 0x83F},
{0x85C, 0x85D}, {0x85F, 0x85F}, {0x86B, 0x89F}, {0x8B5, 0x8B5}, {0x8C8, 0x8D2}, {0x8E2, 0x8E2}, {0x984, 0x984}, {0x98D, 0x98E}, {0x991, 0x992}, {0x9A9, 0x9A9}, {0x9B1, 0x9B1}, {0x9B3, 0x9B5},
{0x9BA, 0x9BB}, {0x9C5, 0x9C6}, {0x9C9, 0x9CA}, {0x9CF, 0x9D6}, {0x9D8, 0x9DB}, {0x9DE, 0x9DE}, {0x9E4, 0x9E5}, {0x9FF, 0xA00}, {0xA04, 0xA04}, {0xA0B, 0xA0E}, {0xA11, 0xA12}, {0xA29, 0xA29},
{0xA31, 0xA31}, {0xA34, 0xA34}, {0xA37, 0xA37}, {0xA3A, 0xA3B}, {0xA3D, 0xA3D}, {0xA43, 0xA46}, {0xA49, 0xA4A}, {0xA4E, 0xA50}, {0xA52, 0xA58}, {0xA5D, 0xA5D}, {0xA5F, 0xA65}, {0xA77, 0xA80},
{0xA84, 0xA84}, {0xA8E, 0xA8E}, {0xA92, 0xA92}, {0xAA9, 0xAA9}, {0xAB1, 0xAB1}, {0xAB4, 0xAB4}, {0xABA, 0xABB}, {0xAC6, 0xAC6}, {0xACA, 0xACA}, {0xACE, 0xACF}, {0xAD1, 0xADF}, {0xAE4, 0xAE5},
{0xAF2, 0xAF8}, {0xB00, 0xB00}, {0xB04, 0xB04}, {0xB0D, 0xB0E}, {0xB11, 0xB12}, {0xB29, 0xB29}, {0xB31, 0xB31}, {0xB34, 0xB34}, {0xB3A, 0xB3B}, {0xB45, 0xB46}, {0xB49, 0xB4A}, {0xB4E, 0xB54},
{0xB58, 0xB5B}, {0xB5E, 0xB5E}, {0xB64, 0xB65}, {0xB78, 0xB81}, {0xB84, 0xB84}, {0xB8B, 0xB8D}, {0xB91, 0xB91}, {0xB96, 0xB98}, {0xB9B, 0xB9B}, {0xB9D, 0xB9D}, {0xBA0, 0xBA2}, {0xBA5, 0xBA7},
{0xBAB, 0xBAD}, {0xBBA, 0xBBD}, {0xBC3, 0xBC5}, {0xBC9, 0xBC9}, {0xBCE, 0xBCF}, {0xBD1, 0xBD6}, {0xBD8, 0xBE5}, {0xBFB, 0xBFF}, {0xC0D, 0xC0D}, {0xC11, 0xC11}, {0xC29, 0xC29}, {0xC3A, 0xC3C},
{0xC45, 0xC45}, {0xC49, 0xC49}, {0xC4E, 0xC54}, {0xC57, 0xC57}, {0xC5B, 0xC5F}, {0xC64, 0xC65}, {0xC70, 0xC76}, {0xC8D, 0xC8D}, {0xC91, 0xC91}, {0xCA9, 0xCA9}, {0xCB4, 0xCB4}, {0xCBA, 0xCBB},
{0xCC5, 0xCC5}, {0xCC9, 0xCC9}, {0xCCE, 0xCD4}, {0xCD7, 0xCDD}, {0xCDF, 0xCDF}, {0xCE4, 0xCE5}, {0xCF0, 0xCF0}, {0xCF3, 0xCFF}, {0xD0D, 0xD0D}, {0xD11, 0xD11}, {0xD45, 0xD45}, {0xD49, 0xD49},
{0xD50, 0xD53}, {0xD64, 0xD65}, {0xD80, 0xD80}, {0xD84, 0xD84}, {0xD97, 0xD99}, {0xDB2, 0xDB2}, {0xDBC, 0xDBC}, {0xDBE, 0xDBF}, {0xDC7, 0xDC9}, {0xDCB, 0xDCE}, {0xDD5, 0xDD5}, {0xDD7, 0xDD7},
{0xDE0, 0xDE5}, {0xDF0, 0xDF1}, {0xDF5, 0xE00}, {0xE3B, 0xE3E}, {0xE5C, 0xE80}, {0xE83, 0xE83}, {0xE85, 0xE85}, {0xE8B, 0xE8B}, {0xEA4, 0xEA4}, {0xEA6, 0xEA6}, {0xEBE, 0xEBF}, {0xEC5, 0xEC5},
{0xEC7, 0xEC7}, {0xECE, 0xECF}, {0xEDA, 0xEDB}, {0xEE0, 0xEFF}, {0xF48, 0xF48}, {0xF6D, 0xF70}, {0xF98, 0xF98}, {0xFBD, 0xFBD}, {0xFCD, 0xFCD}, {0xFDB, 0xFFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC},
{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F}, {0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F}, {0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1}, {0x12B6, 0x12B7}, {0x12BF, 0x12BF},
{0x12C1, 0x12C1}, {0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311}, {0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F}, {0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF}, {0x169D, 0x169F},
{0x16F9, 0x16FF}, {0x170D, 0x170D}, {0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F}, {0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F}, {0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF},
{0x180E, 0x180F}, {0x181A, 0x181F}, {0x1879, 0x187F}, {0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F}, {0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943}, {0x196E, 0x196F}, {0x1975, 0x197F},
{0x19AC, 0x19AF}, {0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D}, {0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F}, {0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1AC1, 0x1AFF}, {0x1B4C, 0x1B4F},
{0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB}, {0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1C8F}, {0x1CBB, 0x1CBC}, {0x1CC8, 0x1CCF}, {0x1CFB, 0x1CFF}, {0x1DFA, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F},
{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58}, {0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E}, {0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5}, {0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC},
{0x1FF0, 0x1FF1}, {0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x206F}, {0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F}, {0x20C0, 0x20CF}, {0x20F1, 0x20FF},
{0x218C, 0x218F}, {0x2427, 0x243F}, {0x244B, 0x245F}, {0x2B74, 0x2B75}, {0x2B96, 0x2B96}, {0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8}, {0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F},
{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F}, {0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7}, {0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF}, {0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF},
{0x2E53, 0x2E7F}, {0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF}, {0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098}, {0x3100, 0x3104}, {0x3130, 0x3130}, {0x318F, 0x318F}, {0x31E4, 0x31EF},
{0x321F, 0x321F}, {0x9FFD, 0x9FFF}, {0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F}, {0xA6F8, 0xA6FF}, {0xA7C0, 0xA7C1}, {0xA7CB, 0xA7F4}, {0xA82D, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F},
{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE}, {0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F}, {0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA},
{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10}, {0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F}, {0xAB6C, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF}, {0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA},
{0xD7FC, 0xF8FF}, {0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12}, {0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D}, {0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45}, {0xFBC2, 0xFBD2},
{0xFD40, 0xFD4F}, {0xFD90, 0xFD91}, {0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F}, {0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F}, {0xFE75, 0xFE75}, {0xFEFD, 0xFF00}, {0xFFBF, 0xFFC1},
{0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1}, {0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7}, {0xFFEF, 0xFFFB}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C}, {0x10027, 0x10027}, {0x1003B, 0x1003B},
{0x1003E, 0x1003E}, {0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF}, {0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F}, {0x1019D, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F},
{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF}, {0x10324, 0x1032C}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F}, {0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF}, {0x1049E, 0x1049F},
{0x104AA, 0x104AF}, {0x104D4, 0x104D7}, {0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E}, {0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F}, {0x10768, 0x107FF}, {0x10806, 0x10807},
{0x10809, 0x10809}, {0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E}, {0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF}, {0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E},
{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB}, {0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B}, {0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A36, 0x10A37}, {0x10A3B, 0x10A3E},
{0x10A49, 0x10A4F}, {0x10A59, 0x10A5F}, {0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF}, {0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77}, {0x10B92, 0x10B98}, {0x10B9D, 0x10BA8},
{0x10BB0, 0x10BFF}, {0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9}, {0x10D28, 0x10D2F}, {0x10D3A, 0x10E5F}, {0x10E7F, 0x10E7F}, {0x10EAA, 0x10EAA}, {0x10EAE, 0x10EAF}, {0x10EB2, 0x10EFF},
{0x10F28, 0x10F2F}, {0x10F5A, 0x10FAF}, {0x10FCC, 0x10FDF}, {0x10FF7, 0x10FFF}, {0x1104E, 0x11051}, {0x11070, 0x1107E}, {0x110BD, 0x110BD}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF}, {0x110FA, 0x110FF},
{0x11135, 0x11135}, {0x11148, 0x1114F}, {0x11177, 0x1117F}, {0x111E0, 0x111E0}, {0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F}, {0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E},
{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF}, {0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E}, {0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331}, {0x11334, 0x11334},
{0x1133A, 0x1133A}, {0x11345, 0x11346}, {0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356}, {0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F}, {0x11375, 0x113FF}, {0x1145C, 0x1145C},
{0x11462, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F}, {0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F}, {0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B9, 0x116BF}, {0x116CA, 0x116FF},
{0x1171B, 0x1171C}, {0x1172C, 0x1172F}, {0x11740, 0x117FF}, {0x1183C, 0x1189F}, {0x118F3, 0x118FE}, {0x11907, 0x11908}, {0x1190A, 0x1190B}, {0x11914, 0x11914}, {0x11917, 0x11917}, {0x11936, 0x11936},
{0x11939, 0x1193A}, {0x11947, 0x1194F}, {0x1195A, 0x1199F}, {0x119A8, 0x119A9}, {0x119D8, 0x119D9}, {0x119E5, 0x119FF}, {0x11A48, 0x11A4F}, {0x11AA3, 0x11ABF}, {0x11AF9, 0x11BFF}, {0x11C09, 0x11C09},
{0x11C37, 0x11C37}, {0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91}, {0x11CA8, 0x11CA8}, {0x11CB7, 0x11CFF}, {0x11D07, 0x11D07}, {0x11D0A, 0x11D0A}, {0x11D37, 0x11D39}, {0x11D3B, 0x11D3B},
{0x11D3E, 0x11D3E}, {0x11D48, 0x11D4F}, {0x11D5A, 0x11D5F}, {0x11D66, 0x11D66}, {0x11D69, 0x11D69}, {0x11D8F, 0x11D8F}, {0x11D92, 0x11D92}, {0x11D99, 0x11D9F}, {0x11DAA, 0x11EDF}, {0x11EF9, 0x11FAF},
{0x11FB1, 0x11FBF}, {0x11FF2, 0x11FFE}, {0x1239A, 0x123FF}, {0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF}, {0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F}, {0x16A5F, 0x16A5F},
{0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF}, {0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F}, {0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C}, {0x16B90, 0x16E3F}, {0x16E9B, 0x16EFF},
{0x16F4B, 0x16F4E}, {0x16F88, 0x16F8E}, {0x16FA0, 0x16FDF}, {0x16FE5, 0x16FEF}, {0x16FF2, 0x16FFF}, {0x187F8, 0x187FF}, {0x18CD6, 0x18CFF}, {0x18D09, 0x1AFFF}, {0x1B11F, 0x1B14F}, {0x1B153, 0x1B163},
{0x1B168, 0x1B16F}, {0x1B2FC, 0x1BBFF}, {0x1BC6B, 0x1BC6F}, {0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B}, {0x1BCA0, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128}, {0x1D173, 0x1D17A},
{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2DF}, {0x1D2F4, 0x1D2FF}, {0x1D357, 0x1D35F}, {0x1D379, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D}, {0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8},
{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC}, {0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C}, {0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A}, {0x1D53F, 0x1D53F},
{0x1D545, 0x1D545}, {0x1D547, 0x1D549}, {0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD}, {0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF}, {0x1E007, 0x1E007}, {0x1E019, 0x1E01A},
{0x1E022, 0x1E022}, {0x1E025, 0x1E025}, {0x1E02B, 0x1E0FF}, {0x1E12D, 0x1E12F}, {0x1E13E, 0x1E13F}, {0x1E14A, 0x1E14D}, {0x1E150, 0x1E2BF}, {0x1E2FA, 0x1E2FE}, {0x1E300, 0x1E7FF}, {0x1E8C5, 0x1E8C6},
{0x1E8D7, 0x1E8FF}, {0x1E94C, 0x1E94F}, {0x1E95A, 0x1E95D}, {0x1E960, 0x1EC70}, {0x1ECB5, 0x1ED00}, {0x1ED3E, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20}, {0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26},
{0x1EE28, 0x1EE28}, {0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A}, {0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48}, {0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50},
{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58}, {0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E}, {0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66}, {0x1EE6B, 0x1EE6B},
{0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78}, {0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A}, {0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA}, {0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF},
{0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F1AE, 0x1F1E5}, {0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F},
{0x1F252, 0x1F25F}, {0x1F266, 0x1F2FF}, {0x1F6D8, 0x1F6DF}, {0x1F6ED, 0x1F6EF}, {0x1F6FD, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D9, 0x1F7DF}, {0x1F7EC, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8B2, 0x1F8FF}, {0x1F979, 0x1F979}, {0x1F9CC, 0x1F9CC}, {0x1FA54, 0x1FA5F}, {0x1FA6E, 0x1FA6F}, {0x1FA75, 0x1FA77}, {0x1FA7B, 0x1FA7F},
{0x1FA87, 0x1FA8F}, {0x1FAA9, 0x1FAAF}, {0x1FAB7, 0x1FABF}, {0x1FAC3, 0x1FACF}, {0x1FAD7, 0x1FAFF}, {0x1FB93, 0x1FB93}, {0x1FBCB, 0x1FBEF}, {0x1FBFA, 0x1FFFF}, {0x2A6DE, 0x2A6FF}, {0x2B735, 0x2B73F},
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
static const std::vector<std::pair<uint32_t, uint32_t>> other_ranges = {
{0x0,0x1f},{0x7f,0x9f},{0xad,0xad},{0x378,0x379},{0x380,0x383},
{0x38b,0x38b},{0x38d,0x38d},{0x3a2,0x3a2},{0x530,0x530},{0x557,0x558},
{0x58b,0x58c},{0x590,0x590},{0x5c8,0x5cf},{0x5eb,0x5ee},{0x5f5,0x605},
{0x61c,0x61c},{0x6dd,0x6dd},{0x70e,0x70f},{0x74b,0x74c},{0x7b2,0x7bf},
{0x7fb,0x7fc},{0x82e,0x82f},{0x83f,0x83f},{0x85c,0x85d},{0x85f,0x85f},
{0x86b,0x86f},{0x88f,0x897},{0x8e2,0x8e2},{0x984,0x984},{0x98d,0x98e},
{0x991,0x992},{0x9a9,0x9a9},{0x9b1,0x9b1},{0x9b3,0x9b5},{0x9ba,0x9bb},
{0x9c5,0x9c6},{0x9c9,0x9ca},{0x9cf,0x9d6},{0x9d8,0x9db},{0x9de,0x9de},
{0x9e4,0x9e5},{0x9ff,0xa00},{0xa04,0xa04},{0xa0b,0xa0e},{0xa11,0xa12},
{0xa29,0xa29},{0xa31,0xa31},{0xa34,0xa34},{0xa37,0xa37},{0xa3a,0xa3b},
{0xa3d,0xa3d},{0xa43,0xa46},{0xa49,0xa4a},{0xa4e,0xa50},{0xa52,0xa58},
{0xa5d,0xa5d},{0xa5f,0xa65},{0xa77,0xa80},{0xa84,0xa84},{0xa8e,0xa8e},
{0xa92,0xa92},{0xaa9,0xaa9},{0xab1,0xab1},{0xab4,0xab4},{0xaba,0xabb},
{0xac6,0xac6},{0xaca,0xaca},{0xace,0xacf},{0xad1,0xadf},{0xae4,0xae5},
{0xaf2,0xaf8},{0xb00,0xb00},{0xb04,0xb04},{0xb0d,0xb0e},{0xb11,0xb12},
{0xb29,0xb29},{0xb31,0xb31},{0xb34,0xb34},{0xb3a,0xb3b},{0xb45,0xb46},
{0xb49,0xb4a},{0xb4e,0xb54},{0xb58,0xb5b},{0xb5e,0xb5e},{0xb64,0xb65},
{0xb78,0xb81},{0xb84,0xb84},{0xb8b,0xb8d},{0xb91,0xb91},{0xb96,0xb98},
{0xb9b,0xb9b},{0xb9d,0xb9d},{0xba0,0xba2},{0xba5,0xba7},{0xbab,0xbad},
{0xbba,0xbbd},{0xbc3,0xbc5},{0xbc9,0xbc9},{0xbce,0xbcf},{0xbd1,0xbd6},
{0xbd8,0xbe5},{0xbfb,0xbff},{0xc0d,0xc0d},{0xc11,0xc11},{0xc29,0xc29},
{0xc3a,0xc3b},{0xc45,0xc45},{0xc49,0xc49},{0xc4e,0xc54},{0xc57,0xc57},
{0xc5b,0xc5c},{0xc5e,0xc5f},{0xc64,0xc65},{0xc70,0xc76},{0xc8d,0xc8d},
{0xc91,0xc91},{0xca9,0xca9},{0xcb4,0xcb4},{0xcba,0xcbb},{0xcc5,0xcc5},
{0xcc9,0xcc9},{0xcce,0xcd4},{0xcd7,0xcdc},{0xcdf,0xcdf},{0xce4,0xce5},
{0xcf0,0xcf0},{0xcf3,0xcff},{0xd0d,0xd0d},{0xd11,0xd11},{0xd45,0xd45},
{0xd49,0xd49},{0xd50,0xd53},{0xd64,0xd65},{0xd80,0xd80},{0xd84,0xd84},
{0xd97,0xd99},{0xdb2,0xdb2},{0xdbc,0xdbc},{0xdbe,0xdbf},{0xdc7,0xdc9},
{0xdcb,0xdce},{0xdd5,0xdd5},{0xdd7,0xdd7},{0xde0,0xde5},{0xdf0,0xdf1},
{0xdf5,0xe00},{0xe3b,0xe3e},{0xe5c,0xe80},{0xe83,0xe83},{0xe85,0xe85},
{0xe8b,0xe8b},{0xea4,0xea4},{0xea6,0xea6},{0xebe,0xebf},{0xec5,0xec5},
{0xec7,0xec7},{0xece,0xecf},{0xeda,0xedb},{0xee0,0xeff},{0xf48,0xf48},
{0xf6d,0xf70},{0xf98,0xf98},{0xfbd,0xfbd},{0xfcd,0xfcd},{0xfdb,0xfff},
{0x10c6,0x10c6},{0x10c8,0x10cc},{0x10ce,0x10cf},{0x1249,0x1249},{0x124e,0x124f},
{0x1257,0x1257},{0x1259,0x1259},{0x125e,0x125f},{0x1289,0x1289},{0x128e,0x128f},
{0x12b1,0x12b1},{0x12b6,0x12b7},{0x12bf,0x12bf},{0x12c1,0x12c1},{0x12c6,0x12c7},
{0x12d7,0x12d7},{0x1311,0x1311},{0x1316,0x1317},{0x135b,0x135c},{0x137d,0x137f},
{0x139a,0x139f},{0x13f6,0x13f7},{0x13fe,0x13ff},{0x169d,0x169f},{0x16f9,0x16ff},
{0x1716,0x171e},{0x1737,0x173f},{0x1754,0x175f},{0x176d,0x176d},{0x1771,0x1771},
{0x1774,0x177f},{0x17de,0x17df},{0x17ea,0x17ef},{0x17fa,0x17ff},{0x180e,0x180e},
{0x181a,0x181f},{0x1879,0x187f},{0x18ab,0x18af},{0x18f6,0x18ff},{0x191f,0x191f},
{0x192c,0x192f},{0x193c,0x193f},{0x1941,0x1943},{0x196e,0x196f},{0x1975,0x197f},
{0x19ac,0x19af},{0x19ca,0x19cf},{0x19db,0x19dd},{0x1a1c,0x1a1d},{0x1a5f,0x1a5f},
{0x1a7d,0x1a7e},{0x1a8a,0x1a8f},{0x1a9a,0x1a9f},{0x1aae,0x1aaf},{0x1acf,0x1aff},
{0x1b4d,0x1b4f},{0x1b7f,0x1b7f},{0x1bf4,0x1bfb},{0x1c38,0x1c3a},{0x1c4a,0x1c4c},
{0x1c89,0x1c8f},{0x1cbb,0x1cbc},{0x1cc8,0x1ccf},{0x1cfb,0x1cff},{0x1f16,0x1f17},
{0x1f1e,0x1f1f},{0x1f46,0x1f47},{0x1f4e,0x1f4f},{0x1f58,0x1f58},{0x1f5a,0x1f5a},
{0x1f5c,0x1f5c},{0x1f5e,0x1f5e},{0x1f7e,0x1f7f},{0x1fb5,0x1fb5},{0x1fc5,0x1fc5},
{0x1fd4,0x1fd5},{0x1fdc,0x1fdc},{0x1ff0,0x1ff1},{0x1ff5,0x1ff5},{0x1fff,0x1fff},
{0x200b,0x200f},{0x202a,0x202e},{0x2060,0x206f},{0x2072,0x2073},{0x208f,0x208f},
{0x209d,0x209f},{0x20c1,0x20cf},{0x20f1,0x20ff},{0x218c,0x218f},{0x2427,0x243f},
{0x244b,0x245f},{0x2b74,0x2b75},{0x2b96,0x2b96},{0x2cf4,0x2cf8},{0x2d26,0x2d26},
{0x2d28,0x2d2c},{0x2d2e,0x2d2f},{0x2d68,0x2d6e},{0x2d71,0x2d7e},{0x2d97,0x2d9f},
{0x2da7,0x2da7},{0x2daf,0x2daf},{0x2db7,0x2db7},{0x2dbf,0x2dbf},{0x2dc7,0x2dc7},
{0x2dcf,0x2dcf},{0x2dd7,0x2dd7},{0x2ddf,0x2ddf},{0x2e5e,0x2e7f},{0x2e9a,0x2e9a},
{0x2ef4,0x2eff},{0x2fd6,0x2fef},{0x2ffc,0x2fff},{0x3040,0x3040},{0x3097,0x3098},
{0x3100,0x3104},{0x3130,0x3130},{0x318f,0x318f},{0x31e4,0x31ef},{0x321f,0x321f},
{0xa48d,0xa48f},{0xa4c7,0xa4cf},{0xa62c,0xa63f},{0xa6f8,0xa6ff},{0xa7cb,0xa7cf},
{0xa7d2,0xa7d2},{0xa7d4,0xa7d4},{0xa7da,0xa7f1},{0xa82d,0xa82f},{0xa83a,0xa83f},
{0xa878,0xa87f},{0xa8c6,0xa8cd},{0xa8da,0xa8df},{0xa954,0xa95e},{0xa97d,0xa97f},
{0xa9ce,0xa9ce},{0xa9da,0xa9dd},{0xa9ff,0xa9ff},{0xaa37,0xaa3f},{0xaa4e,0xaa4f},
{0xaa5a,0xaa5b},{0xaac3,0xaada},{0xaaf7,0xab00},{0xab07,0xab08},{0xab0f,0xab10},
{0xab17,0xab1f},{0xab27,0xab27},{0xab2f,0xab2f},{0xab6c,0xab6f},{0xabee,0xabef},
{0xabfa,0xabff},{0xd7a4,0xd7af},{0xd7c7,0xd7ca},{0xd7fc,0xf8ff},{0xfa6e,0xfa6f},
{0xfada,0xfaff},{0xfb07,0xfb12},{0xfb18,0xfb1c},{0xfb37,0xfb37},{0xfb3d,0xfb3d},
{0xfb3f,0xfb3f},{0xfb42,0xfb42},{0xfb45,0xfb45},{0xfbc3,0xfbd2},{0xfd90,0xfd91},
{0xfdc8,0xfdce},{0xfdd0,0xfdef},{0xfe1a,0xfe1f},{0xfe53,0xfe53},{0xfe67,0xfe67},
{0xfe6c,0xfe6f},{0xfe75,0xfe75},{0xfefd,0xff00},{0xffbf,0xffc1},{0xffc8,0xffc9},
{0xffd0,0xffd1},{0xffd8,0xffd9},{0xffdd,0xffdf},{0xffe7,0xffe7},{0xffef,0xfffb},
{0xfffe,0xffff},{0x1000c,0x1000c},{0x10027,0x10027},{0x1003b,0x1003b},{0x1003e,0x1003e},
{0x1004e,0x1004f},{0x1005e,0x1007f},{0x100fb,0x100ff},{0x10103,0x10106},{0x10134,0x10136},
{0x1018f,0x1018f},{0x1019d,0x1019f},{0x101a1,0x101cf},{0x101fe,0x1027f},{0x1029d,0x1029f},
{0x102d1,0x102df},{0x102fc,0x102ff},{0x10324,0x1032c},{0x1034b,0x1034f},{0x1037b,0x1037f},
{0x1039e,0x1039e},{0x103c4,0x103c7},{0x103d6,0x103ff},{0x1049e,0x1049f},{0x104aa,0x104af},
{0x104d4,0x104d7},{0x104fc,0x104ff},{0x10528,0x1052f},{0x10564,0x1056e},{0x1057b,0x1057b},
{0x1058b,0x1058b},{0x10593,0x10593},{0x10596,0x10596},{0x105a2,0x105a2},{0x105b2,0x105b2},
{0x105ba,0x105ba},{0x105bd,0x105ff},{0x10737,0x1073f},{0x10756,0x1075f},{0x10768,0x1077f},
{0x10786,0x10786},{0x107b1,0x107b1},{0x107bb,0x107ff},{0x10806,0x10807},{0x10809,0x10809},
{0x10836,0x10836},{0x10839,0x1083b},{0x1083d,0x1083e},{0x10856,0x10856},{0x1089f,0x108a6},
{0x108b0,0x108df},{0x108f3,0x108f3},{0x108f6,0x108fa},{0x1091c,0x1091e},{0x1093a,0x1093e},
{0x10940,0x1097f},{0x109b8,0x109bb},{0x109d0,0x109d1},{0x10a04,0x10a04},{0x10a07,0x10a0b},
{0x10a14,0x10a14},{0x10a18,0x10a18},{0x10a36,0x10a37},{0x10a3b,0x10a3e},{0x10a49,0x10a4f},
{0x10a59,0x10a5f},{0x10aa0,0x10abf},{0x10ae7,0x10aea},{0x10af7,0x10aff},{0x10b36,0x10b38},
{0x10b56,0x10b57},{0x10b73,0x10b77},{0x10b92,0x10b98},{0x10b9d,0x10ba8},{0x10bb0,0x10bff},
{0x10c49,0x10c7f},{0x10cb3,0x10cbf},{0x10cf3,0x10cf9},{0x10d28,0x10d2f},{0x10d3a,0x10e5f},
{0x10e7f,0x10e7f},{0x10eaa,0x10eaa},{0x10eae,0x10eaf},{0x10eb2,0x10eff},{0x10f28,0x10f2f},
{0x10f5a,0x10f6f},{0x10f8a,0x10faf},{0x10fcc,0x10fdf},{0x10ff7,0x10fff},{0x1104e,0x11051},
{0x11076,0x1107e},{0x110bd,0x110bd},{0x110c3,0x110cf},{0x110e9,0x110ef},{0x110fa,0x110ff},
{0x11135,0x11135},{0x11148,0x1114f},{0x11177,0x1117f},{0x111e0,0x111e0},{0x111f5,0x111ff},
{0x11212,0x11212},{0x1123f,0x1127f},{0x11287,0x11287},{0x11289,0x11289},{0x1128e,0x1128e},
{0x1129e,0x1129e},{0x112aa,0x112af},{0x112eb,0x112ef},{0x112fa,0x112ff},{0x11304,0x11304},
{0x1130d,0x1130e},{0x11311,0x11312},{0x11329,0x11329},{0x11331,0x11331},{0x11334,0x11334},
{0x1133a,0x1133a},{0x11345,0x11346},{0x11349,0x1134a},{0x1134e,0x1134f},{0x11351,0x11356},
{0x11358,0x1135c},{0x11364,0x11365},{0x1136d,0x1136f},{0x11375,0x113ff},{0x1145c,0x1145c},
{0x11462,0x1147f},{0x114c8,0x114cf},{0x114da,0x1157f},{0x115b6,0x115b7},{0x115de,0x115ff},
{0x11645,0x1164f},{0x1165a,0x1165f},{0x1166d,0x1167f},{0x116ba,0x116bf},{0x116ca,0x116ff},
{0x1171b,0x1171c},{0x1172c,0x1172f},{0x11747,0x117ff},{0x1183c,0x1189f},{0x118f3,0x118fe},
{0x11907,0x11908},{0x1190a,0x1190b},{0x11914,0x11914},{0x11917,0x11917},{0x11936,0x11936},
{0x11939,0x1193a},{0x11947,0x1194f},{0x1195a,0x1199f},{0x119a8,0x119a9},{0x119d8,0x119d9},
{0x119e5,0x119ff},{0x11a48,0x11a4f},{0x11aa3,0x11aaf},{0x11af9,0x11bff},{0x11c09,0x11c09},
{0x11c37,0x11c37},{0x11c46,0x11c4f},{0x11c6d,0x11c6f},{0x11c90,0x11c91},{0x11ca8,0x11ca8},
{0x11cb7,0x11cff},{0x11d07,0x11d07},{0x11d0a,0x11d0a},{0x11d37,0x11d39},{0x11d3b,0x11d3b},
{0x11d3e,0x11d3e},{0x11d48,0x11d4f},{0x11d5a,0x11d5f},{0x11d66,0x11d66},{0x11d69,0x11d69},
{0x11d8f,0x11d8f},{0x11d92,0x11d92},{0x11d99,0x11d9f},{0x11daa,0x11edf},{0x11ef9,0x11faf},
{0x11fb1,0x11fbf},{0x11ff2,0x11ffe},{0x1239a,0x123ff},{0x1246f,0x1246f},{0x12475,0x1247f},
{0x12544,0x12f8f},{0x12ff3,0x12fff},{0x1342f,0x143ff},{0x14647,0x167ff},{0x16a39,0x16a3f},
{0x16a5f,0x16a5f},{0x16a6a,0x16a6d},{0x16abf,0x16abf},{0x16aca,0x16acf},{0x16aee,0x16aef},
{0x16af6,0x16aff},{0x16b46,0x16b4f},{0x16b5a,0x16b5a},{0x16b62,0x16b62},{0x16b78,0x16b7c},
{0x16b90,0x16e3f},{0x16e9b,0x16eff},{0x16f4b,0x16f4e},{0x16f88,0x16f8e},{0x16fa0,0x16fdf},
{0x16fe5,0x16fef},{0x16ff2,0x16fff},{0x187f8,0x187ff},{0x18cd6,0x18cff},{0x18d09,0x1afef},
{0x1aff4,0x1aff4},{0x1affc,0x1affc},{0x1afff,0x1afff},{0x1b123,0x1b14f},{0x1b153,0x1b163},
{0x1b168,0x1b16f},{0x1b2fc,0x1bbff},{0x1bc6b,0x1bc6f},{0x1bc7d,0x1bc7f},{0x1bc89,0x1bc8f},
{0x1bc9a,0x1bc9b},{0x1bca0,0x1ceff},{0x1cf2e,0x1cf2f},{0x1cf47,0x1cf4f},{0x1cfc4,0x1cfff},
{0x1d0f6,0x1d0ff},{0x1d127,0x1d128},{0x1d173,0x1d17a},{0x1d1eb,0x1d1ff},{0x1d246,0x1d2df},
{0x1d2f4,0x1d2ff},{0x1d357,0x1d35f},{0x1d379,0x1d3ff},{0x1d455,0x1d455},{0x1d49d,0x1d49d},
{0x1d4a0,0x1d4a1},{0x1d4a3,0x1d4a4},{0x1d4a7,0x1d4a8},{0x1d4ad,0x1d4ad},{0x1d4ba,0x1d4ba},
{0x1d4bc,0x1d4bc},{0x1d4c4,0x1d4c4},{0x1d506,0x1d506},{0x1d50b,0x1d50c},{0x1d515,0x1d515},
{0x1d51d,0x1d51d},{0x1d53a,0x1d53a},{0x1d53f,0x1d53f},{0x1d545,0x1d545},{0x1d547,0x1d549},
{0x1d551,0x1d551},{0x1d6a6,0x1d6a7},{0x1d7cc,0x1d7cd},{0x1da8c,0x1da9a},{0x1daa0,0x1daa0},
{0x1dab0,0x1deff},{0x1df1f,0x1dfff},{0x1e007,0x1e007},{0x1e019,0x1e01a},{0x1e022,0x1e022},
{0x1e025,0x1e025},{0x1e02b,0x1e0ff},{0x1e12d,0x1e12f},{0x1e13e,0x1e13f},{0x1e14a,0x1e14d},
{0x1e150,0x1e28f},{0x1e2af,0x1e2bf},{0x1e2fa,0x1e2fe},{0x1e300,0x1e7df},{0x1e7e7,0x1e7e7},
{0x1e7ec,0x1e7ec},{0x1e7ef,0x1e7ef},{0x1e7ff,0x1e7ff},{0x1e8c5,0x1e8c6},{0x1e8d7,0x1e8ff},
{0x1e94c,0x1e94f},{0x1e95a,0x1e95d},{0x1e960,0x1ec70},{0x1ecb5,0x1ed00},{0x1ed3e,0x1edff},
{0x1ee04,0x1ee04},{0x1ee20,0x1ee20},{0x1ee23,0x1ee23},{0x1ee25,0x1ee26},{0x1ee28,0x1ee28},
{0x1ee33,0x1ee33},{0x1ee38,0x1ee38},{0x1ee3a,0x1ee3a},{0x1ee3c,0x1ee41},{0x1ee43,0x1ee46},
{0x1ee48,0x1ee48},{0x1ee4a,0x1ee4a},{0x1ee4c,0x1ee4c},{0x1ee50,0x1ee50},{0x1ee53,0x1ee53},
{0x1ee55,0x1ee56},{0x1ee58,0x1ee58},{0x1ee5a,0x1ee5a},{0x1ee5c,0x1ee5c},{0x1ee5e,0x1ee5e},
{0x1ee60,0x1ee60},{0x1ee63,0x1ee63},{0x1ee65,0x1ee66},{0x1ee6b,0x1ee6b},{0x1ee73,0x1ee73},
{0x1ee78,0x1ee78},{0x1ee7d,0x1ee7d},{0x1ee7f,0x1ee7f},{0x1ee8a,0x1ee8a},{0x1ee9c,0x1eea0},
{0x1eea4,0x1eea4},{0x1eeaa,0x1eeaa},{0x1eebc,0x1eeef},{0x1eef2,0x1efff},{0x1f02c,0x1f02f},
{0x1f094,0x1f09f},{0x1f0af,0x1f0b0},{0x1f0c0,0x1f0c0},{0x1f0d0,0x1f0d0},{0x1f0f6,0x1f0ff},
{0x1f1ae,0x1f1e5},{0x1f203,0x1f20f},{0x1f23c,0x1f23f},{0x1f249,0x1f24f},{0x1f252,0x1f25f},
{0x1f266,0x1f2ff},{0x1f6d8,0x1f6dc},{0x1f6ed,0x1f6ef},{0x1f6fd,0x1f6ff},{0x1f774,0x1f77f},
{0x1f7d9,0x1f7df},{0x1f7ec,0x1f7ef},{0x1f7f1,0x1f7ff},{0x1f80c,0x1f80f},{0x1f848,0x1f84f},
{0x1f85a,0x1f85f},{0x1f888,0x1f88f},{0x1f8ae,0x1f8af},{0x1f8b2,0x1f8ff},{0x1fa54,0x1fa5f},
{0x1fa6e,0x1fa6f},{0x1fa75,0x1fa77},{0x1fa7d,0x1fa7f},{0x1fa87,0x1fa8f},{0x1faad,0x1faaf},
{0x1fabb,0x1fabf},{0x1fac6,0x1facf},{0x1fada,0x1fadf},{0x1fae8,0x1faef},{0x1faf7,0x1faff},
{0x1fb93,0x1fb93},{0x1fbcb,0x1fbef},{0x1fbfa,0x1ffff},{0x2a6e0,0x2a6ff},{0x2b739,0x2b73f},
{0x2b81e,0x2b81f},{0x2cea2,0x2ceaf},{0x2ebe1,0x2f7ff},{0x2fa1e,0x2ffff},{0x3134b,0xe00ff},
{0x3134b,0xe00ff}
};
// This category is not official and is only used for regex purposes
static const std::vector<std::pair<uint32_t, uint32_t>> whitespace_ranges = {
{0x0009, 0x0009}, {0x000A, 0x000A}, {0x000B, 0x000B}, {0x000C, 0x000C},
{0x000D, 0x000D}, {0x0020, 0x0020}, {0x85, 0x85}, {0xa0, 0xa0}, {0x1680, 0x1680},
{0x2000, 0x200a}, {0x2028, 0x2029}, {0x202f, 0x202f}, {0x205f, 0x205f}, {0x3000, 0x3000}
};
static std::vector<std::pair<uint32_t, uint32_t>> all_ranges;
static std::map<std::pair<uint32_t, uint32_t>, int> codepoint_type_map;
static std::string codepoint_to_utf8(uint32_t cp) {
std::string result;
if (/* 0x00 <= cp && */ cp <= 0x7f) {
@ -268,22 +530,25 @@ static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
throw std::invalid_argument("invalid character");
}
else if (!(utf8[offset + 0] & 0x20)) {
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
offset += 2;
return result;
}
else if (!(utf8[offset + 0] & 0x10)) {
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
offset += 3;
return result;
}
else if (!(utf8[offset + 0] & 0x08)) {
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
offset += 4;
return result;
@ -300,108 +565,122 @@ static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
return result;
}
static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
std::vector<uint16_t> result;
if (/* 0x0000 <= cp && */ cp <= 0xffff) {
result.emplace_back(cp);
}
else if (0x10000 <= cp && cp <= 0x10ffff) {
result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
}
else {
throw std::invalid_argument("invalid codepoint");
}
return result;
}
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
std::vector<uint16_t> result;
for (size_t i = 0; i < cps.size(); ++i) {
auto temp = codepoint_to_utf16(cps[i]);
result.insert(result.end(), temp.begin(), temp.end());
}
return result;
}
static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
assert(offset < utf16.size());
if (((utf16[0] >> 10) << 10) != 0xd800) {
auto result = utf16[offset + 0];
offset += 1;
return result;
}
else {
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00))
throw std::invalid_argument("invalid character");
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
offset += 2;
return result;
}
throw std::invalid_argument("invalid string");
}
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
std::vector<uint32_t> result;
size_t offset = 0;
while (offset < utf16.size())
result.push_back(codepoint_from_utf16(utf16, offset));
return result;
}
#define CODEPOINT_TYPE_UNIDENTIFIED 0
#define CODEPOINT_TYPE_DIGIT 1
#define CODEPOINT_TYPE_LETTER 2
#define CODEPOINT_TYPE_WHITESPACE 3
#define CODEPOINT_TYPE_ACCENT_MARK 4
#define CODEPOINT_TYPE_PUNCTUATION 5
#define CODEPOINT_TYPE_SYMBOL 6
#define CODEPOINT_TYPE_CONTROL 7
#define CODEPOINT_TYPE_OTHER 1
#define CODEPOINT_TYPE_NUMBER 2
#define CODEPOINT_TYPE_LETTER 3
#define CODEPOINT_TYPE_PUNCTUATION 4
#define CODEPOINT_TYPE_MARK 5
#define CODEPOINT_TYPE_SEPARATOR 6
#define CODEPOINT_TYPE_SYMBOL 7
static std::unordered_map<uint32_t, int> codepoint_type_map() {
std::unordered_map<uint32_t, int> codepoint_types;
for (auto p : digit_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
static bool codepoint_type_init_map() {
for (auto i : other_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_OTHER;
}
for(auto p : letter_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
for (auto i : number_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_NUMBER;
}
for(auto p : whitespace_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
for (auto i : letter_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_LETTER;
}
for(auto p : accent_mark_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
for (auto i : punctuation_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_PUNCTUATION;
}
for(auto p : punctuation_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
for (auto i : mark_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_MARK;
}
for (auto p : symbol_ranges) {
for (auto i = p.first; i <= p.second; ++i)
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
for (auto i : separator_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_SEPARATOR;
}
for(auto p : control_ranges) {
for(auto i = p.first; i <= p.second; ++ i)
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
for (auto i : symbol_ranges) {
codepoint_type_map[i] = CODEPOINT_TYPE_SYMBOL;
}
return codepoint_types;
return true;
}
static bool codepoint_type_init_search_vector() {
all_ranges.insert(all_ranges.end(), other_ranges.begin(), other_ranges.end());
all_ranges.insert(all_ranges.end(), number_ranges.begin(), number_ranges.end());
all_ranges.insert(all_ranges.end(), letter_ranges.begin(), letter_ranges.end());
all_ranges.insert(all_ranges.end(), punctuation_ranges.begin(), punctuation_ranges.end());
all_ranges.insert(all_ranges.end(), mark_ranges.begin(), mark_ranges.end());
all_ranges.insert(all_ranges.end(), separator_ranges.begin(), separator_ranges.end());
all_ranges.insert(all_ranges.end(), symbol_ranges.begin(), symbol_ranges.end());
std::sort(all_ranges.begin(), all_ranges.end());
return true;
}
static size_t binary_search_implement(uint32_t cp, const std::vector<std::pair<uint32_t, uint32_t>> & ranges) {
size_t left = 0;
size_t right = ranges.size() - 1;
while (left <= right) {
size_t mid = left + (right - left) / 2;
const auto& range = ranges[mid];
if (cp >= range.first && cp <= range.second) {
// Target is within the range of the current pair.
return mid;
} else if (cp < range.first) {
// Target is less than the start of the range, search in the left half.
right = mid - 1;
} else {
// Target is greater than the end of the range, search in the right half.
left = mid + 1;
}
}
throw std::runtime_error("Target out of range!");
}
static int codepoint_type_binary_search(uint32_t cp) {
try {
auto result = binary_search_implement(cp, all_ranges);
return codepoint_type_map[all_ranges[result]];
} catch (const std::runtime_error & e) {
return CODEPOINT_TYPE_UNIDENTIFIED;
}
}
static bool codepoint_type_init() {
bool map_initialized = codepoint_type_init_map();
bool sv_initialized = codepoint_type_init_search_vector();
if(map_initialized && sv_initialized) {
return true;
}
return false;
}
static int codepoint_type(uint32_t cp) {
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
return codepoint_types[cp];
static bool codepoint_type_initialized = codepoint_type_init();
return codepoint_type_binary_search(cp);
}
static int codepoint_type(const std::string & utf8) {
if (utf8.length() == 0)
if (utf8.length() == 0) {
return CODEPOINT_TYPE_UNIDENTIFIED;
}
return codepoint_type(codepoints_from_utf8(utf8).back());
}
static bool codepoint_is_whitespace(const std::string & utf8) {
if (utf8.length() == 0) {
return false;
}
try {
binary_search_implement(codepoints_from_utf8(utf8).back(), whitespace_ranges);
return true;
} catch (const std::runtime_error & e) {
return false;
}
}
static std::unordered_map<uint8_t, std::string> bytes_to_unicode_map_bpe() {
std::unordered_map<uint8_t, std::string> map;
for (int ch = u'!'; ch <= u'~'; ++ch) {
@ -460,3 +739,4 @@ static uint8_t unicode_to_bytes_bpe(const std::string & utf8) {
return map.at(utf8);
}