add numeric comma cleanup1.1.4

author: Val Neekman <val@neekware.com> 2015-09-21 09:20:04 -0400
committer: Val Neekman <val@neekware.com> 2015-09-21 09:20:04 -0400
commit: 069a3684cb5b6e6446a68b0b592e5aeb4cb8ce01 (patch)
tree: d2f2534d8617d3d336c0e2a9d8b1bea71511a64d
parent: d654f33d410d77d9c1f2f27013d8676fcf59c9f5 (diff)
download: python-slugify-1.1.4.tar.gz
1 files changed, 14 insertions, 18 deletions
diff --git a/slugify/slugify.py b/slugify/slugify.py
index 4c134f3..5fbfd67 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -19,18 +19,13 @@ import unidecode
 __all__ = ['slugify']
 
 
-# character entity reference
-CHAR_ENTITY_REXP = re.compile('&(%s);' % '|'.join(name2codepoint))
-
-# decimal character reference
-DECIMAL_REXP = re.compile('&#(\d+);')
-
-# hexadecimal character reference
-HEX_REXP = re.compile('&#x([\da-fA-F]+);')
-
-REPLACE1_REXP = re.compile(r'[\']+')
-REPLACE2_REXP = re.compile(r'[^-a-z0-9]+')
-REMOVE_REXP = re.compile('-{2,}')
+CHAR_ENTITY_PATTERN = re.compile('&(%s);' % '|'.join(name2codepoint))
+DECIMAL_PATTERN = re.compile('&#(\d+);')
+HEX_PATTERN = re.compile('&#x([\da-fA-F]+);')
+QUOTE_PATTERN = re.compile(r'[\']+')
+ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
+DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
+NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
 
 
 def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False):
@@ -102,19 +97,19 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
 
     # character entity reference
     if entities:
-        text = CHAR_ENTITY_REXP.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)
+        text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)
 
     # decimal character reference
     if decimal:
         try:
-            text = DECIMAL_REXP.sub(lambda m: unichr(int(m.group(1))), text)
+            text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
         except:
             pass
 
     # hexadecimal character reference
     if hexadecimal:
         try:
-            text = HEX_REXP.sub(lambda m: unichr(int(m.group(1), 16)), text)
+            text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
         except:
             pass
 
@@ -124,11 +119,12 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
         text = text.encode('ascii', 'ignore')
 
     # replace unwanted characters
-    text = REPLACE1_REXP.sub('', text.lower())  # replace ' with nothing instead with -
-    text = REPLACE2_REXP.sub('-', text.lower())
+    text = QUOTE_PATTERN.sub('', text.lower())  # replace ' with nothing instead with -
+    text = NUMBERS_PATTERN.sub('', text.lower())
+    text = ALLOWED_CHARS_PATTERN.sub('-', text.lower())
 
     # remove redundant -
-    text = REMOVE_REXP.sub('-', text).strip('-')
+    text = DUPLICATE_DASH_PATTERN.sub('-', text).strip('-')
 
     # remove stopwords
     if stopwords:
author	Val Neekman <val@neekware.com>	2015-09-21 09:20:04 -0400
committer	Val Neekman <val@neekware.com>	2015-09-21 09:20:04 -0400
commit	069a3684cb5b6e6446a68b0b592e5aeb4cb8ce01 (patch)
tree	d2f2534d8617d3d336c0e2a9d8b1bea71511a64d
parent	d654f33d410d77d9c1f2f27013d8676fcf59c9f5 (diff)
download	python-slugify-1.1.4.tar.gz