diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-11-24 23:05:09 +0000 |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-11-24 23:05:09 +0000 |
commit | aa4febf8f6d0db4cd94d3785b122f078547e94a7 (patch) | |
tree | 071d95027cb9ef8a88536d732aa23eec546fee5b | |
parent | 60204388b819615d1399e85bce073e679fd3e4c2 (diff) | |
download | cpython-aa4febf8f6d0db4cd94d3785b122f078547e94a7.tar.gz |
Sort names independent of the Python version. Fix hex constant warning.
Include all First/Last blocks.
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 42cbcf149e..f7dcad5033 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -16,6 +16,7 @@ # 2002-09-11 wd use string methods # 2002-10-18 mvl update to Unicode 3.2 # 2002-10-22 mvl generate NFC tables +# 2002-11-24 mvl expand all ranges, sort names version-independently # # written by Fredrik Lundh (fredrik@pythonware.com) # @@ -403,10 +404,13 @@ def makeunicodename(unicode, trace): wordlist = words.items() - # sort on falling frequency - # XXX: different Python versions produce a different order - # for words with equal frequency - wordlist.sort(lambda a, b: len(b[1])-len(a[1])) + # sort on falling frequency, then by name + def cmpwords((aword, alist),(bword, blist)): + r = -cmp(len(alist),len(blist)) + if r: + return r + return cmp(aword, bword) + wordlist.sort(cmpwords) # figure out how many phrasebook escapes we need escapes = 0 @@ -541,10 +545,10 @@ class UnicodeData: char = int(s[0], 16) table[char] = s - # expand first-last ranges (ignore surrogates and private use) + # expand first-last ranges if expand: field = None - for i in range(0, 0xD800): + for i in range(0, 0x110000): s = table[i] if s: if s[1][-6:] == "First>": @@ -587,7 +591,7 @@ def myhash(s, magic): h = 0 for c in map(ord, s.upper()): h = (h * magic) + c - ix = h & 0xff000000 + ix = h & 0xff000000L if ix: h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff return h |