diff options
author | Val Neekman <val@neekware.com> | 2017-04-01 12:47:34 -0400 |
---|---|---|
committer | Val Neekman <val@neekware.com> | 2017-04-01 12:47:34 -0400 |
commit | b3b38b1fec893326f86363b9821aec6604a11925 (patch) | |
tree | 48411b2193ecbe11b4b31eff09d1a9e3886b919a | |
parent | cf90eeb80c0692480db8a31a159e1dfd24e1c2b8 (diff) | |
parent | 750c737b37a7261ef551f9ab013dfb0b1b3eb29a (diff) | |
download | python-slugify-b3b38b1fec893326f86363b9821aec6604a11925.tar.gz |
add regex_pattern and support for py 3.61.2.2
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | .travis.yml | 1 | ||||
-rw-r--r-- | CHANGELOG.md | 3 | ||||
-rw-r--r-- | MANIFEST.in | 2 | ||||
-rw-r--r-- | README.rst (renamed from README.md) | 96 | ||||
-rwxr-xr-x | setup.py | 1 | ||||
-rw-r--r-- | slugify/__init__.py | 2 | ||||
-rw-r--r-- | slugify/slugify.py | 27 | ||||
-rw-r--r-- | test.py | 30 |
9 files changed, 120 insertions, 45 deletions
@@ -1,3 +1,6 @@ +# JebBrains IDE +.idea/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.travis.yml b/.travis.yml index eb6f274..d852b0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" - pypy install: diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d2b7c6..0d38493 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 1.2.2 + - Add `allowed_cahar + ## 1.2.1 - Including certain files (e.g. license.md) in sdists via MANIFEST.in (@proinsias) - Relax licensing by moving from BSD to MIT diff --git a/MANIFEST.in b/MANIFEST.in index 7b74127..a308077 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include LICENSE -include README.md +include README.rst include requirements.txt @@ -1,36 +1,52 @@ Python Slugify -=============== - -**A Python slugify application that handles unicode** +============== -[![status-image]][status-link] -[![version-image]][version-link] -[![coverage-image]][coverage-link] +|status-image| |version-image| |coverage-image| Overview -======== +-------- A Python **slugify** application that handles **unicode**. How to install -============== +-------------- + +Via ``pip``: + +.. code:: bash + + $ pip install python-slugify + +Via ``easy_install``: + +.. code:: bash + + $ easy_install python-slugify + +From sources via ``git``: + +.. code:: bash + + $ git clone http://github.com/un33k/python-slugify + $ cd python-slugify + $ python setup.py - 1. easy_install python-slugify - 2. pip install python-slugify - 3. git clone http://github.com/un33k/python-slugify - a. cd python-slugify - b. run python setup.py - 4. wget https://github.com/un33k/python-slugify/zipball/master - a. unzip the downloaded file - b. cd into python-slugify-* directory - c. run python setup.py +From sources: + +.. code:: bash + + $ wget https://github.com/un33k/python-slugify/zipball/master + # unzip the downloaded file + # cd into python-slugify-* directory + $ python setup.py How to use -=========== +---------- + +.. code:: python - ```python from slugify import slugify txt = "This is a test ---" @@ -144,38 +160,54 @@ How to use txt = 'foo & bar' r = slugify(txt) self.assertEqual(r, 'foo-bar') - ``` + + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, regex_pattern=regex_pattern) + self.assertEqual(r, "___this-is-a-test___") + + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, separator='_', regex_pattern=regex_pattern) + self.assertNotEqual(r, "_this_is_a_test_") Running the tests -================= +----------------- To run the tests against the current environment: +.. code:: bash + python test.py License -==================== +------- -Released under a ([BSD](LICENSE.md)) license. +Released under a (`MIT`_) license. Version -==================== +------- + X.Y.Z Version +:: + `MAJOR` version -- when you make incompatible API changes, `MINOR` version -- when you add functionality in a backwards-compatible manner, and `PATCH` version -- when you make backwards-compatible bug fixes. -[status-image]: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master -[status-link]: http://travis-ci.org/un33k/python-slugify?branch=master +.. |status-image| image:: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master + :target: http://travis-ci.org/un33k/python-slugify?branch=master + +.. |version-image| image:: https://img.shields.io/pypi/v/python-slugify.svg + :target: https://pypi.python.org/pypi/python-slugify -[version-image]: https://img.shields.io/pypi/v/python-slugify.svg -[version-link]: https://pypi.python.org/pypi/python-slugify +.. |coverage-image| image:: https://coveralls.io/repos/un33k/python-slugify/badge.svg + :target: https://coveralls.io/r/un33k/python-slugify -[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg -[coverage-link]: https://coveralls.io/r/un33k/python-slugify +.. |download-image| image:: https://img.shields.io/pypi/dm/python-slugify.svg + :target: https://pypi.python.org/pypi/python-slugify -[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg -[download-link]: https://pypi.python.org/pypi/python-slugify +.. _MIT: https://github.com/un33k/python-slugify/blob/master/LICENSE @@ -29,6 +29,7 @@ classifiers = [ 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ] diff --git a/slugify/__init__.py b/slugify/__init__.py index daaafd8..2df1ccd 100644 --- a/slugify/__init__.py +++ b/slugify/__init__.py @@ -3,4 +3,4 @@ from .slugify import * __author__ = 'Val Neekman @ Neekware Inc. [@vneekman]' __description__ = 'A Python slugify application that also handles Unicode' -__version__ = '1.2.1' +__version__ = '1.2.2' diff --git a/slugify/slugify.py b/slugify/slugify.py index 327f2c1..af0c609 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -26,6 +26,7 @@ QUOTE_PATTERN = re.compile(r'[\']+') ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+') DUPLICATE_DASH_PATTERN = re.compile('-{2,}') NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)') +DEFAULT_SEPARATOR = '-' def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False): @@ -71,7 +72,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, - separator='-', save_order=False, stopwords=()): + separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None): """ Make a slug from the given text. :param text (str): initial text @@ -83,6 +84,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order :param separator (str): separator between words :param stopwords (iterable): words to discount + :param regex_pattern (str): regex pattern for allowed characters :return (str): """ @@ -91,7 +93,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w text = _unicode(text, 'utf-8', 'ignore') # replace quotes with dashes - pre-process - text = QUOTE_PATTERN.sub('-', text) + text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) # decode unicode text = unidecode.unidecode(text) @@ -129,25 +131,28 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w # remove generated quotes -- post-process text = QUOTE_PATTERN.sub('', text) - # replace unwanted characters + # cleanup numbers text = NUMBERS_PATTERN.sub('', text) - text = ALLOWED_CHARS_PATTERN.sub('-', text) - # remove redundant - - text = DUPLICATE_DASH_PATTERN.sub('-', text).strip('-') + # replace all other unwanted characters + pattern = regex_pattern or ALLOWED_CHARS_PATTERN + text = re.sub(pattern, DEFAULT_SEPARATOR, text) + + # remove redundant + text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR) # remove stopwords if stopwords: stopwords_lower = [s.lower() for s in stopwords] - words = [w for w in text.split('-') if w not in stopwords_lower] - text = '-'.join(words) + words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower] + text = DEFAULT_SEPARATOR.join(words) # smart truncate if requested if max_length > 0: - text = smart_truncate(text, max_length, word_boundary, '-', save_order) + text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order) - if separator != '-': - text = text.replace('-', separator) + if separator != DEFAULT_SEPARATOR: + text = text.replace(DEFAULT_SEPARATOR, separator) return text @@ -167,6 +167,36 @@ class TestSlugification(unittest.TestCase): r = slugify(txt) self.assertEqual(r, '1000-reasons-you-are-1') + def test_regex_pattern_keep_underscore(self): + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, regex_pattern=regex_pattern) + self.assertEqual(r, "___this-is-a-test___") + + def test_regex_pattern_keep_underscore_with_underscore_as_separator(self): + """ + The regex_pattern turns the power to the caller. + Hence the caller must ensure that a custom separator doesn't clash + with the regex_pattern. + """ + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, separator='_', regex_pattern=regex_pattern) + self.assertNotEqual(r, "_this_is_a_test_") + + +class TestUtils(unittest.TestCase): + + def test_smart_truncate_no_max_length(self): + txt = '1,000 reasons you are #1' + r = smart_truncate(txt) + self.assertEqual(r, txt) + + def test_smart_truncate_no_seperator(self): + txt = '1,000 reasons you are #1' + r = smart_truncate(txt, max_length=100, separator='_') + self.assertEqual(r, txt) + class TestUtils(unittest.TestCase): |