summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-11-24 23:05:09 +0000
committerMartin v. Löwis <martin@v.loewis.de>2002-11-24 23:05:09 +0000
commitaa4febf8f6d0db4cd94d3785b122f078547e94a7 (patch)
tree071d95027cb9ef8a88536d732aa23eec546fee5b
parent60204388b819615d1399e85bce073e679fd3e4c2 (diff)
downloadcpython-aa4febf8f6d0db4cd94d3785b122f078547e94a7.tar.gz
Sort names independent of the Python version. Fix hex constant warning.
Include all First/Last blocks.
-rw-r--r--Tools/unicode/makeunicodedata.py18
1 files changed, 11 insertions, 7 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 42cbcf149e..f7dcad5033 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -16,6 +16,7 @@
# 2002-09-11 wd use string methods
# 2002-10-18 mvl update to Unicode 3.2
# 2002-10-22 mvl generate NFC tables
+# 2002-11-24 mvl expand all ranges, sort names version-independently
#
# written by Fredrik Lundh (fredrik@pythonware.com)
#
@@ -403,10 +404,13 @@ def makeunicodename(unicode, trace):
wordlist = words.items()
- # sort on falling frequency
- # XXX: different Python versions produce a different order
- # for words with equal frequency
- wordlist.sort(lambda a, b: len(b[1])-len(a[1]))
+ # sort on falling frequency, then by name
+ def cmpwords((aword, alist),(bword, blist)):
+ r = -cmp(len(alist),len(blist))
+ if r:
+ return r
+ return cmp(aword, bword)
+ wordlist.sort(cmpwords)
# figure out how many phrasebook escapes we need
escapes = 0
@@ -541,10 +545,10 @@ class UnicodeData:
char = int(s[0], 16)
table[char] = s
- # expand first-last ranges (ignore surrogates and private use)
+ # expand first-last ranges
if expand:
field = None
- for i in range(0, 0xD800):
+ for i in range(0, 0x110000):
s = table[i]
if s:
if s[1][-6:] == "First>":
@@ -587,7 +591,7 @@ def myhash(s, magic):
h = 0
for c in map(ord, s.upper()):
h = (h * magic) + c
- ix = h & 0xff000000
+ ix = h & 0xff000000L
if ix:
h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
return h