diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2018-04-10 22:30:35 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2018-04-10 22:30:35 -0700 |
commit | d495a090f07942f0eacbd1830abb150138f404a2 (patch) | |
tree | d28cfae1dbd78def0c840c01499e586509326f1f | |
parent | 7a700ae26299878a3e59ac68dd517a2aef799b5f (diff) | |
download | natsort-d495a090f07942f0eacbd1830abb150138f404a2.tar.gz |
The parsing regular expressions support unicode decimals.
The regular expressions to separate numbers from non-numbers
has been updated to accept more than just ASCII numbers.
-rw-r--r-- | natsort/unicode_numbers.py | 9 | ||||
-rw-r--r-- | natsort/utils.py | 19 |
2 files changed, 18 insertions, 10 deletions
diff --git a/natsort/unicode_numbers.py b/natsort/unicode_numbers.py index 921c5c1..e87654f 100644 --- a/natsort/unicode_numbers.py +++ b/natsort/unicode_numbers.py @@ -280,10 +280,17 @@ for a in numeric_hex: digit_chars = [a for a in numeric_chars if unicodedata.digit(a, None) is not None] +# The decimal characters are a subset of the numberals +# (probably of the digits, but let's be safe). +decimal_chars = [a for a in numeric_chars + if unicodedata.decimal(a, None) is not None] + # Create a single string with the above data. +decimals = ''.join(decimal_chars) digits = ''.join(digit_chars) numeric = ''.join(numeric_chars) - +digits_no_decimals = ''.join([x for x in digits if x not in decimals]) +numeric_no_decimals = ''.join([x for x in numeric if x not in decimals]) # Some code that can be used to create the above list of hex numbers. if __name__ == '__main__': diff --git a/natsort/utils.py b/natsort/utils.py index b6484b0..306762a 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -58,7 +58,7 @@ from unicodedata import normalize # Local imports. from natsort.ns_enum import ns -from natsort.unicode_numbers import digits, numeric +from natsort.unicode_numbers import numeric_no_decimals, digits_no_decimals from natsort.compat.pathlib import PurePath, has_pathlib from natsort.compat.locale import ( get_strxfrm, @@ -80,25 +80,26 @@ if PY_VERSION >= 3: long = int # The regex that locates floats - include Unicode numerals. -_exp = r'(?:[eE][-+]?[0-9]+)?' -_num = r'(?:[0-9]+\.?[0-9]*|\.[0-9]+)' +_nnd = numeric_no_decimals +_exp = r'(?:[eE][-+]?\d+)?' +_num = r'(?:\d+\.?\d*|\.\d+)' _float_sign_exp_re = r'([-+]?{0}{1}|[{2}])' -_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, numeric) +_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, _nnd) _float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) _float_nosign_exp_re = r'({0}{1}|[{2}])' -_float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, numeric) +_float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, _nnd) _float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U) _float_sign_noexp_re = r'([-+]?{0}|[{1}])' -_float_sign_noexp_re = _float_sign_noexp_re.format(_num, numeric) +_float_sign_noexp_re = _float_sign_noexp_re.format(_num, _nnd) _float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) _float_nosign_noexp_re = r'({0}|[{1}])' -_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, numeric) +_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, _nnd) _float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) # Integer regexes - include Unicode digits. -_int_nosign_re = r'([0-9]+|[{0}])'.format(digits) +_int_nosign_re = r'(\d+|[{0}])'.format(digits_no_decimals) _int_nosign_re = re.compile(_int_nosign_re, flags=re.U) -_int_sign_re = r'([-+]?[0-9]+|[{0}])'.format(digits) +_int_sign_re = r'([-+]?\d+|[{0}])'.format(digits_no_decimals) _int_sign_re = re.compile(_int_sign_re, flags=re.U) # This dict will help select the correct regex and number conversion function. |