gen-charset-table.py: script to import ms charmaps
For example: http://msdn.microsoft.com/en-us/goglobal/cc305147pull/10/head
parent
a009c71034
commit
74c01dc448
|
@ -0,0 +1,84 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import getopt
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
h_template = """unsigned short %(charsetName)s[%(len)d] = { %(values)s }"""
|
||||
CHAR_UNDEF = "0x0000"
|
||||
MAP_LEN = 256
|
||||
itemsPerLine = 16
|
||||
|
||||
def usage():
|
||||
usageMsg = """
|
||||
Usage: %s -f inputFile
|
||||
""" % (os.path.basename(sys.argv[0]))
|
||||
sys.stderr.write(usageMsg)
|
||||
|
||||
def parseCharsetFile(file = None):
|
||||
if file is None:
|
||||
return None
|
||||
|
||||
charmap = [CHAR_UNDEF] * MAP_LEN
|
||||
|
||||
# Sample line:
|
||||
# FD = U+200E : LEFT-TO-RIGHT MARK
|
||||
for line in file.xreadlines():
|
||||
m = re.search("(\w{2}) = U\+(\w{4}) :", line)
|
||||
if not m:
|
||||
sys.stderr.write("Skipping weird line: %s" % line)
|
||||
continue
|
||||
|
||||
ind = int(m.group(1), base=16)
|
||||
unicodeValue = str(m.group(2)).lower()
|
||||
|
||||
charmap[ind] = "0x%s" % (unicodeValue)
|
||||
|
||||
return charmap
|
||||
|
||||
|
||||
def formatCharacterMap(charmap = None):
|
||||
if not charmap:
|
||||
return None
|
||||
|
||||
value = ""
|
||||
for i in xrange(0,MAP_LEN-1):
|
||||
char = charmap[i]
|
||||
if i % itemsPerLine == 0:
|
||||
value += "\n "
|
||||
value += "%s, " % (char)
|
||||
i += 1
|
||||
value += charmap[MAP_LEN-1]
|
||||
|
||||
return value
|
||||
|
||||
if __name__ == '__main__':
|
||||
inputFile = None
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "f:")
|
||||
except getopt.GetoptError, err:
|
||||
sys.stderr.write(str(err))
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
for o, a in opts:
|
||||
if o == "-f":
|
||||
inputFile = a
|
||||
else:
|
||||
assert False, "unhandled option"
|
||||
|
||||
if not inputFile:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
f = open(inputFile, "r", 1)
|
||||
|
||||
charsetMap = parseCharsetFile(f)
|
||||
charsetValues = formatCharacterMap(charsetMap)
|
||||
print h_template % {"len": len(charsetMap),
|
||||
"charsetName": os.path.basename(inputFile),
|
||||
"values": charsetValues}
|
Loading…
Reference in New Issue