summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVal Neekman <val@neekware.com>2017-04-01 12:47:34 -0400
committerVal Neekman <val@neekware.com>2017-04-01 12:47:34 -0400
commitb3b38b1fec893326f86363b9821aec6604a11925 (patch)
tree48411b2193ecbe11b4b31eff09d1a9e3886b919a
parentcf90eeb80c0692480db8a31a159e1dfd24e1c2b8 (diff)
parent750c737b37a7261ef551f9ab013dfb0b1b3eb29a (diff)
downloadpython-slugify-b3b38b1fec893326f86363b9821aec6604a11925.tar.gz
add regex_pattern and support for py 3.61.2.2
-rw-r--r--.gitignore3
-rw-r--r--.travis.yml1
-rw-r--r--CHANGELOG.md3
-rw-r--r--MANIFEST.in2
-rw-r--r--README.rst (renamed from README.md)96
-rwxr-xr-xsetup.py1
-rw-r--r--slugify/__init__.py2
-rw-r--r--slugify/slugify.py27
-rw-r--r--test.py30
9 files changed, 120 insertions, 45 deletions
diff --git a/.gitignore b/.gitignore
index ba74660..8025fb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# JebBrains IDE
+.idea/
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/.travis.yml b/.travis.yml
index eb6f274..d852b0a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ python:
- "3.3"
- "3.4"
- "3.5"
+ - "3.6"
- pypy
install:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d2b7c6..0d38493 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+## 1.2.2
+ - Add `allowed_cahar
+
## 1.2.1
- Including certain files (e.g. license.md) in sdists via MANIFEST.in (@proinsias)
- Relax licensing by moving from BSD to MIT
diff --git a/MANIFEST.in b/MANIFEST.in
index 7b74127..a308077 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,3 @@
include LICENSE
-include README.md
+include README.rst
include requirements.txt
diff --git a/README.md b/README.rst
index 316bfa6..a5a62ac 100644
--- a/README.md
+++ b/README.rst
@@ -1,36 +1,52 @@
Python Slugify
-===============
-
-**A Python slugify application that handles unicode**
+==============
-[![status-image]][status-link]
-[![version-image]][version-link]
-[![coverage-image]][coverage-link]
+|status-image| |version-image| |coverage-image|
Overview
-========
+--------
A Python **slugify** application that handles **unicode**.
How to install
-==============
+--------------
+
+Via ``pip``:
+
+.. code:: bash
+
+ $ pip install python-slugify
+
+Via ``easy_install``:
+
+.. code:: bash
+
+ $ easy_install python-slugify
+
+From sources via ``git``:
+
+.. code:: bash
+
+ $ git clone http://github.com/un33k/python-slugify
+ $ cd python-slugify
+ $ python setup.py
- 1. easy_install python-slugify
- 2. pip install python-slugify
- 3. git clone http://github.com/un33k/python-slugify
- a. cd python-slugify
- b. run python setup.py
- 4. wget https://github.com/un33k/python-slugify/zipball/master
- a. unzip the downloaded file
- b. cd into python-slugify-* directory
- c. run python setup.py
+From sources:
+
+.. code:: bash
+
+ $ wget https://github.com/un33k/python-slugify/zipball/master
+ # unzip the downloaded file
+ # cd into python-slugify-* directory
+ $ python setup.py
How to use
-===========
+----------
+
+.. code:: python
- ```python
from slugify import slugify
txt = "This is a test ---"
@@ -144,38 +160,54 @@ How to use
txt = 'foo &amp; bar'
r = slugify(txt)
self.assertEqual(r, 'foo-bar')
- ```
+
+ txt = "___This is a test___"
+ regex_pattern = r'[^-a-z0-9_]+'
+ r = slugify(txt, regex_pattern=regex_pattern)
+ self.assertEqual(r, "___this-is-a-test___")
+
+ txt = "___This is a test___"
+ regex_pattern = r'[^-a-z0-9_]+'
+ r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+ self.assertNotEqual(r, "_this_is_a_test_")
Running the tests
-=================
+-----------------
To run the tests against the current environment:
+.. code:: bash
+
python test.py
License
-====================
+-------
-Released under a ([BSD](LICENSE.md)) license.
+Released under a (`MIT`_) license.
Version
-====================
+-------
+
X.Y.Z Version
+::
+
`MAJOR` version -- when you make incompatible API changes,
`MINOR` version -- when you add functionality in a backwards-compatible manner, and
`PATCH` version -- when you make backwards-compatible bug fixes.
-[status-image]: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master
-[status-link]: http://travis-ci.org/un33k/python-slugify?branch=master
+.. |status-image| image:: https://secure.travis-ci.org/un33k/python-slugify.png?branch=master
+ :target: http://travis-ci.org/un33k/python-slugify?branch=master
+
+.. |version-image| image:: https://img.shields.io/pypi/v/python-slugify.svg
+ :target: https://pypi.python.org/pypi/python-slugify
-[version-image]: https://img.shields.io/pypi/v/python-slugify.svg
-[version-link]: https://pypi.python.org/pypi/python-slugify
+.. |coverage-image| image:: https://coveralls.io/repos/un33k/python-slugify/badge.svg
+ :target: https://coveralls.io/r/un33k/python-slugify
-[coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg
-[coverage-link]: https://coveralls.io/r/un33k/python-slugify
+.. |download-image| image:: https://img.shields.io/pypi/dm/python-slugify.svg
+ :target: https://pypi.python.org/pypi/python-slugify
-[download-image]: https://img.shields.io/pypi/dm/python-slugify.svg
-[download-link]: https://pypi.python.org/pypi/python-slugify
+.. _MIT: https://github.com/un33k/python-slugify/blob/master/LICENSE
diff --git a/setup.py b/setup.py
index 068f8af..f944f42 100755
--- a/setup.py
+++ b/setup.py
@@ -29,6 +29,7 @@ classifiers = [
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
]
diff --git a/slugify/__init__.py b/slugify/__init__.py
index daaafd8..2df1ccd 100644
--- a/slugify/__init__.py
+++ b/slugify/__init__.py
@@ -3,4 +3,4 @@ from .slugify import *
__author__ = 'Val Neekman @ Neekware Inc. [@vneekman]'
__description__ = 'A Python slugify application that also handles Unicode'
-__version__ = '1.2.1'
+__version__ = '1.2.2'
diff --git a/slugify/slugify.py b/slugify/slugify.py
index 327f2c1..af0c609 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -26,6 +26,7 @@ QUOTE_PATTERN = re.compile(r'[\']+')
ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
+DEFAULT_SEPARATOR = '-'
def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', save_order=False):
@@ -71,7 +72,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s
def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
- separator='-', save_order=False, stopwords=()):
+ separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None):
"""
Make a slug from the given text.
:param text (str): initial text
@@ -83,6 +84,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
:param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
:param separator (str): separator between words
:param stopwords (iterable): words to discount
+ :param regex_pattern (str): regex pattern for allowed characters
:return (str):
"""
@@ -91,7 +93,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = _unicode(text, 'utf-8', 'ignore')
# replace quotes with dashes - pre-process
- text = QUOTE_PATTERN.sub('-', text)
+ text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
# decode unicode
text = unidecode.unidecode(text)
@@ -129,25 +131,28 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
# remove generated quotes -- post-process
text = QUOTE_PATTERN.sub('', text)
- # replace unwanted characters
+ # cleanup numbers
text = NUMBERS_PATTERN.sub('', text)
- text = ALLOWED_CHARS_PATTERN.sub('-', text)
- # remove redundant -
- text = DUPLICATE_DASH_PATTERN.sub('-', text).strip('-')
+ # replace all other unwanted characters
+ pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+ text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+
+ # remove redundant
+ text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
# remove stopwords
if stopwords:
stopwords_lower = [s.lower() for s in stopwords]
- words = [w for w in text.split('-') if w not in stopwords_lower]
- text = '-'.join(words)
+ words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords_lower]
+ text = DEFAULT_SEPARATOR.join(words)
# smart truncate if requested
if max_length > 0:
- text = smart_truncate(text, max_length, word_boundary, '-', save_order)
+ text = smart_truncate(text, max_length, word_boundary, DEFAULT_SEPARATOR, save_order)
- if separator != '-':
- text = text.replace('-', separator)
+ if separator != DEFAULT_SEPARATOR:
+ text = text.replace(DEFAULT_SEPARATOR, separator)
return text
diff --git a/test.py b/test.py
index 768fcb9..6730756 100644
--- a/test.py
+++ b/test.py
@@ -167,6 +167,36 @@ class TestSlugification(unittest.TestCase):
r = slugify(txt)
self.assertEqual(r, '1000-reasons-you-are-1')
+ def test_regex_pattern_keep_underscore(self):
+ txt = "___This is a test___"
+ regex_pattern = r'[^-a-z0-9_]+'
+ r = slugify(txt, regex_pattern=regex_pattern)
+ self.assertEqual(r, "___this-is-a-test___")
+
+ def test_regex_pattern_keep_underscore_with_underscore_as_separator(self):
+ """
+ The regex_pattern turns the power to the caller.
+ Hence the caller must ensure that a custom separator doesn't clash
+ with the regex_pattern.
+ """
+ txt = "___This is a test___"
+ regex_pattern = r'[^-a-z0-9_]+'
+ r = slugify(txt, separator='_', regex_pattern=regex_pattern)
+ self.assertNotEqual(r, "_this_is_a_test_")
+
+
+class TestUtils(unittest.TestCase):
+
+ def test_smart_truncate_no_max_length(self):
+ txt = '1,000 reasons you are #1'
+ r = smart_truncate(txt)
+ self.assertEqual(r, txt)
+
+ def test_smart_truncate_no_seperator(self):
+ txt = '1,000 reasons you are #1'
+ r = smart_truncate(txt, max_length=100, separator='_')
+ self.assertEqual(r, txt)
+
class TestUtils(unittest.TestCase):