add regex_pattern and support for py 3.61.2.2

author: Val Neekman <val@neekware.com> 2017-04-01 12:47:34 -0400
committer: Val Neekman <val@neekware.com> 2017-04-01 12:47:34 -0400
commit: b3b38b1fec893326f86363b9821aec6604a11925 (patch)
tree: 48411b2193ecbe11b4b31eff09d1a9e3886b919a
parent: cf90eeb80c0692480db8a31a159e1dfd24e1c2b8 (diff)
parent: 750c737b37a7261ef551f9ab013dfb0b1b3eb29a (diff)
download: python-slugify-b3b38b1fec893326f86363b9821aec6604a11925.tar.gz
9 files changed, 120 insertions, 45 deletions
diff --git a/.gitignore b/.gitignore
index ba74660..8025fb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# JebBrains IDE
+.idea/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/.travis.yml b/.travis.yml
index eb6f274..d852b0a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ python:
   - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
   - pypy
 
 install:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d2b7c6..0d38493 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 1.2.2
+  - Add `allowed_cahar
+
 ## 1.2.1
   - Including certain files (e.g. license.md) in sdists via MANIFEST.in (@proinsias)
   - Relax licensing by moving from BSD to MIT
diff --git a/MANIFEST.in b/MANIFEST.in
index 7b74127..a308077 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,3 @@
 include LICENSE
-include README.md
+include README.rst
 include requirements.txt
diff --git a/README.md b/README.rst
index 316bfa6..a5a62ac 100644
--- a/README.md
+++ b/README.rst
@@ -1,36 +1,52 @@
 Python Slugify
-===============
-
-**A Python slugify application that handles unicode**
+==============
 
-[![status-image]][status-link]
-[![version-image]][version-link]
-[![coverage-image]][coverage-link]
+|status-image| |version-image| |coverage-image|
 
 Overview
-========
+--------
 
 A Python **slugify** application that handles **unicode**.
 
 
 How to install
-==============
+--------------
+
+Via ``pip``:
+
+.. code:: bash
+
+    $ pip install python-slugify
+
+Via ``easy_install``:
+
+.. code:: bash
+
+    $ easy_install python-slugify
+
+From sources via ``git``:
+
+.. code:: bash
+
+    $ git clone http://github.com/un33k/python-slugify
+    $ cd python-slugify
+    $ python setup.py
 
-    1. easy_install python-slugify
-    2. pip install python-slugify
-    3. git clone http://github.com/un33k/python-slugify
-        a. cd python-slugify
-        b. run python setup.py
-    4. wget https://github.com/un33k/python-slugify/zipball/master
-        a. unzip the downloaded file
-        b. cd into python-slugify-* directory
-        c. run python setup.py
+From sources:
+
+.. code:: bash
+
+    $ wget https://github.com/un33k/python-slugify/zipball/master
+    # unzip the downloaded file
+    # cd into python-slugify-* directory
+    $ python setup.py
 
 
 How to use
-===========
+----------
+
+.. code:: python
 
-   ```python
     from slugify import slugify
 
     txt = "This is a test ---"
@@ -144,38 +160,54 @@ How to use
     txt = 'foo &amp; bar'
     r = slugify(txt)
     self.assertEqual(r, 'foo-bar')
-   ```
+
+    txt = "___This is a test___"
+    regex_pattern = r'[^-a-z0-9_]+'
+    r = slugify(txt, regex_pattern=regex_pattern)
+    self.assertEqual(r, "___this-is-a-test___")
+
+    txt = "___This is a test___"
+    regex_pattern = r'[^-a-z0-9_]+'
+    r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+    self.assertNotEqual(r, "_this_is_a_test_")
 
 Running the tests
-=================
+-----------------
 
 To run the tests against the current environment:
 
+.. code:: bash
+
     python test.py
 
 
 License
-====================
+-------
 
-Released under a ([BSD](LICENSE.md)) license.
+Released under a (`MIT`_) license.
 
 
 Version
-====================
+-------
+
 X.Y.Z Version
 
+::
+
     `MAJOR` version -- when you make incompatible API changes,
     `MINOR` version -- when you add functionality in a backwards-compatible manner, and
     `PATCH` version -- when you make backwards-compatible bug fixes.
 
-[status-image]: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master
-[status-link]: http://travis-ci.org/un33k/python-slugify?branch=master
+.. |status-image| image:: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master
+    :target: http://travis-ci.org/un33k/python-slugify?branch=master
+
+.. |version-image| image:: https://img.shields.io/pypi/v/python-slugify.svg
+    :target: https://pypi.python.org/pypi/python-slugify
 
-[version-image]: https://img.shields.io/pypi/v/python-slugify.svg
-[version-link]: https://pypi.python.org/pypi/python-slugify
+.. |coverage-image| image:: https://coveralls.io/repos/un33k/python-slugify/badge.svg
+    :target: https://coveralls.io/r/un33k/python-slugify
 
-[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg
-[coverage-link]: https://coveralls.io/r/un33k/python-slugify
+.. |download-image| image:: https://img.shields.io/pypi/dm/python-slugify.svg
+    :target: https://pypi.python.org/pypi/python-slugify
 
-[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg
-[download-link]: https://pypi.python.org/pypi/python-slugify
+.. _MIT: https://github.com/un33k/python-slugify/blob/master/LICENSE
diff --git a/setup.py b/setup.py
index 068f8af..f944f42 100755
--- a/setup.py
+++ b/setup.py
@@ -29,6 +29,7 @@ classifiers = [
     'Programming Language :: Python :: 3.3',
     'Programming Language :: Python :: 3.4',
     'Programming Language :: Python :: 3.5',
+    'Programming Language :: Python :: 3.6',
 ]
 
 
diff --git a/slugify/__init__.py b/slugify/__init__.py
index daaafd8..2df1ccd 100644
--- a/slugify/__init__.py
+++ b/slugify/__init__.py
@@ -3,4 +3,4 @@ from .slugify import *
 
 __author__ = 'Val Neekman @ Neekware Inc. [@vneekman]'
 __description__ = 'A Python slugify application that also handles Unicode'
-__version__ = '1.2.1'
+__version__ = '1.2.2'
diff --git a/slugify/slugify.py b/slugify/slugify.py
index 327f2c1..af0c609 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -26,6 +26,7 @@ QUOTE_PATTERN = re.compile(r'[\']+')
 ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
 DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
 NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
+DEFAULT_SEPARATOR = '-'
 
 
 def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False):
@@ -71,7 +72,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s
 
 
 def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
-            separator='-', save_order=False, stopwords=()):
+            separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None):
     """
     Make a slug from the given text.
     :param text (str): initial text
@@ -83,6 +84,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
     :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
     :param separator (str): separator between words
     :param stopwords (iterable): words to discount
+    :param regex_pattern (str): regex pattern for allowed characters
     :return (str):
     """
 
@@ -91,7 +93,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
         text = _unicode(text, 'utf-8', 'ignore')
 
     # replace quotes with dashes - pre-process
-    text = QUOTE_PATTERN.sub('-', text)
+    text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
 
     # decode unicode
     text = unidecode.unidecode(text)
@@ -129,25 +131,28 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
     # remove generated quotes -- post-process
     text = QUOTE_PATTERN.sub('', text)
 
-    # replace unwanted characters
+    # cleanup numbers
     text = NUMBERS_PATTERN.sub('', text)
-    text = ALLOWED_CHARS_PATTERN.sub('-', text)
 
-    # remove redundant -
-    text = DUPLICATE_DASH_PATTERN.sub('-', text).strip('-')
+    # replace all other unwanted characters
+    pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+    text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+
+    # remove redundant
+    text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
 
     # remove stopwords
     if stopwords:
         stopwords_lower = [s.lower() for s in stopwords]
-        words = [w for w in text.split('-') if w not in stopwords_lower]
-        text = '-'.join(words)
+        words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+        text = DEFAULT_SEPARATOR.join(words)
 
     # smart truncate if requested
     if max_length > 0:
-        text = smart_truncate(text, max_length, word_boundary, '-', save_order)
+        text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
 
-    if separator != '-':
-        text = text.replace('-', separator)
+    if separator != DEFAULT_SEPARATOR:
+        text = text.replace(DEFAULT_SEPARATOR, separator)
 
     return text
 
diff --git a/test.py b/test.py
index 768fcb9..6730756 100644
--- a/test.py
+++ b/test.py
@@ -167,6 +167,36 @@ class TestSlugification(unittest.TestCase):
         r = slugify(txt)
         self.assertEqual(r, '1000-reasons-you-are-1')
 
+    def test_regex_pattern_keep_underscore(self):
+        txt = "___This is a test___"
+        regex_pattern = r'[^-a-z0-9_]+'
+        r = slugify(txt, regex_pattern=regex_pattern)
+        self.assertEqual(r, "___this-is-a-test___")
+
+    def test_regex_pattern_keep_underscore_with_underscore_as_separator(self):
+        """
+        The regex_pattern turns the power to the caller.
+        Hence the caller must ensure that a custom separator doesn't clash
+        with the regex_pattern.
+        """
+        txt = "___This is a test___"
+        regex_pattern = r'[^-a-z0-9_]+'
+        r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+        self.assertNotEqual(r, "_this_is_a_test_")
+
+
+class TestUtils(unittest.TestCase):
+
+    def test_smart_truncate_no_max_length(self):
+        txt = '1,000 reasons you are #1'
+        r = smart_truncate(txt)
+        self.assertEqual(r, txt)
+
+    def test_smart_truncate_no_seperator(self):
+        txt = '1,000 reasons you are #1'
+        r = smart_truncate(txt, max_length=100, separator='_')
+        self.assertEqual(r, txt)
+
 
 class TestUtils(unittest.TestCase):
author	Val Neekman <val@neekware.com>	2017-04-01 12:47:34 -0400
committer	Val Neekman <val@neekware.com>	2017-04-01 12:47:34 -0400
commit	b3b38b1fec893326f86363b9821aec6604a11925 (patch)
tree	48411b2193ecbe11b4b31eff09d1a9e3886b919a
parent	cf90eeb80c0692480db8a31a159e1dfd24e1c2b8 (diff)
parent	750c737b37a7261ef551f9ab013dfb0b1b3eb29a (diff)
download	python-slugify-b3b38b1fec893326f86363b9821aec6604a11925.tar.gz