diff options
| author | Val Neekman <val@neekware.com> | 2017-04-01 12:47:34 -0400 |
|---|---|---|
| committer | Val Neekman <val@neekware.com> | 2017-04-01 12:47:34 -0400 |
| commit | b3b38b1fec893326f86363b9821aec6604a11925 (patch) | |
| tree | 48411b2193ecbe11b4b31eff09d1a9e3886b919a /slugify | |
| parent | cf90eeb80c0692480db8a31a159e1dfd24e1c2b8 (diff) | |
| parent | 750c737b37a7261ef551f9ab013dfb0b1b3eb29a (diff) | |
| download | python-slugify-b3b38b1fec893326f86363b9821aec6604a11925.tar.gz | |
add regex_pattern and support for py 3.61.2.2
Diffstat (limited to 'slugify')
| -rw-r--r-- | slugify/__init__.py | 2 | ||||
| -rw-r--r-- | slugify/slugify.py | 27 |
2 files changed, 17 insertions, 12 deletions
diff --git a/slugify/__init__.py b/slugify/__init__.py index daaafd8..2df1ccd 100644 --- a/slugify/__init__.py +++ b/slugify/__init__.py @@ -3,4 +3,4 @@ from .slugify import * __author__ = 'Val Neekman @ Neekware Inc. [@vneekman]' __description__ = 'A Python slugify application that also handles Unicode' -__version__ = '1.2.1' +__version__ = '1.2.2' diff --git a/slugify/slugify.py b/slugify/slugify.py index 327f2c1..af0c609 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -26,6 +26,7 @@ QUOTE_PATTERN = re.compile(r'[\']+') ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+') DUPLICATE_DASH_PATTERN = re.compile('-{2,}') NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)') +DEFAULT_SEPARATOR = '-' def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False): @@ -71,7 +72,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, - separator='-', save_order=False, stopwords=()): + separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None): """ Make a slug from the given text. :param text (str): initial text @@ -83,6 +84,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order :param separator (str): separator between words :param stopwords (iterable): words to discount + :param regex_pattern (str): regex pattern for allowed characters :return (str): """ @@ -91,7 +93,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w text = _unicode(text, 'utf-8', 'ignore') # replace quotes with dashes - pre-process - text = QUOTE_PATTERN.sub('-', text) + text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) # decode unicode text = unidecode.unidecode(text) @@ -129,25 +131,28 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w # remove generated quotes -- post-process text = QUOTE_PATTERN.sub('', text) - # replace unwanted characters + # cleanup numbers text = NUMBERS_PATTERN.sub('', text) - text = ALLOWED_CHARS_PATTERN.sub('-', text) - # remove redundant - - text = DUPLICATE_DASH_PATTERN.sub('-', text).strip('-') + # replace all other unwanted characters + pattern = regex_pattern or ALLOWED_CHARS_PATTERN + text = re.sub(pattern, DEFAULT_SEPARATOR, text) + + # remove redundant + text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR) # remove stopwords if stopwords: stopwords_lower = [s.lower() for s in stopwords] - words = [w for w in text.split('-') if w not in stopwords_lower] - text = '-'.join(words) + words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower] + text = DEFAULT_SEPARATOR.join(words) # smart truncate if requested if max_length > 0: - text = smart_truncate(text, max_length, word_boundary, '-', save_order) + text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order) - if separator != '-': - text = text.replace('-', separator) + if separator != DEFAULT_SEPARATOR: + text = text.replace(DEFAULT_SEPARATOR, separator) return text |
