cmdline options

author: Val Neekman (AvidCoder) <un33kvu@gmail.com> 2022-04-27 12:07:55 -0400
committer: Val Neekman (AvidCoder) <un33kvu@gmail.com> 2022-04-27 12:07:55 -0400
commit: c094c8a50371d6da08b782424ace5eca20943c8b (patch)
tree: e3d529d4956ed162cd3545f6170db08fcfba8199
parent: 937779c77420f4acb8acd775bc2c35ed94f1393d (diff)
download: python-slugify-c094c8a50371d6da08b782424ace5eca20943c8b.tar.gz
18 files changed, 574 insertions, 150 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..4148354
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,44 @@
+name: CI
+
+# Run on push only for dev/sandbox
+# Otherwise it may trigger concurrently `push & pull_request` on PRs.
+on:
+  push:
+    branches:
+      - ci
+      - staging
+
+jobs:
+  build:
+    name: Python ${{ matrix.python }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install coveralls --upgrade
+      - name: Run flake8
+        run: |
+          pip install flake8 --upgrade
+          flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 .
+      - name: Run pycodestyle
+        run: |
+          pip install pycodestyle --upgrade
+          pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py
+      - name: Run test
+        run: |
+          coverage run --source=slugify test.py
+      - name: Coveralls
+        run: coveralls --service=github
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
new file mode 100644
index 0000000..d0cb401
--- /dev/null
+++ b/.github/workflows/dev.yml
@@ -0,0 +1,44 @@
+name: DEV
+
+# Run on push only for dev/sandbox
+# Otherwise it may trigger concurrently `push & pull_request` on PRs.
+on:
+  push:
+    branches:
+      - sandbox
+      - dev
+
+jobs:
+  build:
+    name: Python ${{ matrix.python }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install coveralls --upgrade
+      - name: Run flake8
+        run: |
+          pip install flake8 --upgrade
+          flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 .
+      - name: Run pycodestyle
+        run: |
+          pip install pycodestyle --upgrade
+          pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py
+      - name: Run test
+        run: |
+          coverage run --source=slugify test.py
+      - name: Coveralls
+        run: coveralls --service=github
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..f1e75b7
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,43 @@
+name: Main
+
+# Run on push only for dev/sandbox
+# Otherwise it may trigger concurrently `push & pull_request` on PRs.
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  build:
+    name: Python ${{ matrix.python }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+          pip install coveralls --upgrade
+      - name: Run flake8
+        run: |
+          pip install flake8 --upgrade
+          flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 .
+      - name: Run pycodestyle
+        run: |
+          pip install pycodestyle --upgrade
+          pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py
+      - name: Run test
+        run: |
+          coverage run --source=slugify test.py
+      - name: Coveralls
+        run: coveralls --service=github
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.python-version b/.python-version
deleted file mode 100644
index a8733ab..0000000
--- a/.python-version
+++ /dev/null
@@ -1,5 +0,0 @@
-3.9.2
-3.8.8
-3.7.10
-3.6.13
-pypy3.7-7.3.3
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 2f8b3ce..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-language: python
-dist: xenial
-
-python:
-  - "3.6"
-  - "3.7"
-  - "3.8"
-  - "3.9"
-  - "pypy3"
-
-install:
-  - pip install pip -U
-  - pip install -e .
-  - pip install pycodestyle
-  - pip install coveralls
-
-before_script:
-  - "bash format.sh"
-
-script: coverage run --source=slugify test.py
-
-after_success: coveralls
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 2ab09c1..ecfbb80 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,5 @@
 {
   "python.linting.pylintEnabled": false,
   "python.pythonPath": "/usr/bin/python3",
-}
-\ No newline at end of file
+  "cSpell.words": ["Neekman", "shch", "xlate"]
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 777f6dc..2ba0bb3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,25 @@
+## 6.1.2
+
+- Reintroduce the cli options
+
+## 6.1.1
+
+- Remove type hinting (temporarily)
+
+## 6.1.0
+
+- Add `allow_unicode` flag to allow unicode characters in the slug
+
+## 6.0.1
+
+- Rework regex_pattern to mean the opposite (disallowed chars instead of allowed)
+- Thanks to @yyyyyyyan for the initial PR followed by the final PR by @mrezzamoradi
+
+## 6.0.0
+
+- Enable github action
+- Remove tox, as we run the test on github action, the end users can refer to those test
+
 ## 5.0.2
 
 - Enable twine publish
diff --git a/MANIFEST.in b/MANIFEST.in
index 0c78f18..373701c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
-include CHANGELOG.md
 include LICENSE
 include README.md
-include test.py
+include CHANGELOG.md
diff --git a/README.md b/README.md
index 2305794..f93afee 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,8 @@ def slugify(
     stopwords=(),
     regex_pattern=None,
     lowercase=True,
-    replacements=()
+    replacements=(),
+    allow_unicode=False
   ):
   """
   Make a slug from the given text.
@@ -55,9 +56,10 @@ def slugify(
   :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
   :param separator (str): separator between words
   :param stopwords (iterable): words to discount
-  :param regex_pattern (str): regex pattern for allowed characters
+  :param regex_pattern (str): regex pattern for disallowed characters
   :param lowercase (bool): activate case sensitivity by setting it to False
   :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+  :param allow_unicode (bool): allow unicode characters
   :return (str): slugify text
   """
 ```
@@ -75,6 +77,10 @@ txt = '影師嗎'
 r = slugify(txt)
 self.assertEqual(r, "ying-shi-ma")
 
+txt = '影師嗎'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "影師嗎")
+
 txt = 'C\'est déjà l\'été.'
 r = slugify(txt)
 self.assertEqual(r, "c-est-deja-l-ete")
@@ -133,6 +139,14 @@ txt = 'ÜBER Über German Umlaut'
 r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
 self.assertEqual(r, "ueber-ueber-german-umlaut")
 
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True)
+self.assertEqual(r, "i-love")
+
+txt = 'i love 🦄'
+r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+')
+self.assertEqual(r, "🦄")
+
 ```
 
 For more examples, have a look at the [test.py](test.py) file.
@@ -164,10 +178,6 @@ quick-brown-fox-jumps-over-lazy-dog
 
 # Running the tests
 
-To run the tests against all environments:
-
-    tox
-
 To run the tests against the current environment:
 
     python test.py
@@ -188,8 +198,8 @@ X.Y.Z Version
     `MINOR` version -- when you add functionality in a backwards-compatible manner, and
     `PATCH` version -- when you make backwards-compatible bug fixes.
 
-[status-image]: https://travis-ci.org/un33k/python-slugify.svg?branch=master
-[status-link]: https://travis-ci.org/un33k/python-slugify
+[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg
+[status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml
 [version-image]: https://img.shields.io/pypi/v/python-slugify.svg
 [version-link]: https://pypi.python.org/pypi/python-slugify
 [coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg
diff --git a/dev.requirements.txt b/dev.requirements.txt
index 337aa36..5f94d7b 100644
--- a/dev.requirements.txt
+++ b/dev.requirements.txt
@@ -1,2 +1,3 @@
-pycodestyle==2.7.0
-twine==3.4.1
-\ No newline at end of file
+pycodestyle==2.8.0
+twine==3.4.1
+flake8==4.0.1
+\ No newline at end of file
diff --git a/setup.py b/setup.py
index 51b267f..c3c4b3b 100755
--- a/setup.py
+++ b/setup.py
@@ -1,70 +1,85 @@
 #!/usr/bin/env python
-
-# -*- coding: utf-8 -*-
-from setuptools import setup, find_packages
-import re
+# Learn more: https://github.com/un33k/setup.py
 import os
 import sys
-import codecs
 
-name = 'python-slugify'
+from codecs import open
+from shutil import rmtree
+from setuptools import setup
+
+
 package = 'slugify'
-description = 'A Python Slugify application that handles Unicode'
-url = 'https://github.com/un33k/python-slugify'
-author = 'Val Neekman'
-author_email = 'info@neekware.com'
-license = 'MIT'
+python_requires = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+here = os.path.abspath(os.path.dirname(__file__))
+
 install_requires = ['text-unidecode>=1.3']
-extras_require = {'unidecode': ['Unidecode>=1.1.1']}
+extras_requires = {'unidecode': ['Unidecode>=1.1.1']}
+test_requires = []
 
-classifiers = [
-    'Development Status :: 5 - Production/Stable',
-    'Intended Audience :: Developers',
-    'Topic :: Software Development :: Build Tools',
-    'License :: OSI Approved :: MIT License',
-    'Operating System :: OS Independent',
-    'Programming Language :: Python',
-    'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.6',
-    'Programming Language :: Python :: 3.7',
-    'Programming Language :: Python :: 3.8',
-    'Programming Language :: Python :: 3.9',
-]
+about = {}
+with open(os.path.join(here, package, '__version__.py'), 'r', 'utf-8') as f:
+    exec(f.read(), about)
 
+with open('README.md', 'r', 'utf-8') as f:
+    readme = f.read()
 
-def get_version(package):
-    """
-    Return package version as listed in `__version__` in `init.py`.
-    """
-    init_py = codecs.open(os.path.join(package, '__init__.py'), encoding='utf-8').read()
-    return re.search("^__version__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1)
 
+def status(s):
+    print('\033[1m{0}\033[0m'.format(s))
 
-if sys.argv[-1] == 'build':
-    os.system("python setup.py sdist bdist_wheel")
 
+# 'setup.py publish' shortcut.
 if sys.argv[-1] == 'publish':
-    os.system("python setup.py build && twine upload dist/*")
-    args = {'version': get_version(package)}
-    print("You probably want to also tag the version now:")
-    print("  git tag -a %(version)s -m 'version %(version)s' && git push --tags" % args)
-    sys.exit()
+    try:
+        status('Removing previous builds…')
+        rmtree(os.path.join(here, 'dist'))
+    except OSError:
+        pass
+
+    status('Building Source and Wheel (universal) distribution…')
+    os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 
-EXCLUDE_FROM_PACKAGES = []
+    status('Uploading the package to PyPI via Twine…')
+    os.system('twine upload dist/*')
+
+    status('Pushing git tags…')
+    os.system('git tag v{0}'.format(about['__version__']))
+    os.system('git push --tags')
+    sys.exit()
 
 setup(
-    name=name,
-    version=get_version(package),
-    url=url,
-    license=license,
-    description=description,
-    long_description=description,
-    author=author,
-    author_email=author_email,
-    packages=find_packages(exclude=EXCLUDE_FROM_PACKAGES),
+    name=about['__title__'],
+    version=about['__version__'],
+    description=about['__description__'],
+    long_description=readme,
+    long_description_content_type='text/markdown',
+    author=about['__author__'],
+    author_email=about['__author_email__'],
+    url=about['__url__'],
+    license=about['__license__'],
+    packages=[package],
+    package_data={'': ['LICENSE']},
+    package_dir={'slugify': 'slugify'},
+    include_package_data=True,
+    python_requires=python_requires,
     install_requires=install_requires,
-    extras_require=extras_require,
-    python_requires='>=3.6',
-    classifiers=classifiers,
+    tests_require=test_requires,
+    extras_require=extras_requires,
+    zip_safe=False,
+    cmdclass={},
+    project_urls={},
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'Natural Language :: English',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+    ],
     entry_points={'console_scripts': ['slugify=slugify.__main__:main']},
 )
diff --git a/slugify/__init__.py b/slugify/__init__.py
index 6c59f4e..ac21492 100644
--- a/slugify/__init__.py
+++ b/slugify/__init__.py
@@ -1,7 +1,2 @@
 from .special import *
 from .slugify import *
-
-
-__author__ = 'Val Neekman @ Neekware Inc. [@vneekman]'
-__description__ = 'A Python slugify application that also handles Unicode'
-__version__ = '5.0.2'
diff --git a/slugify/__main__.py b/slugify/__main__.py
index f815206..7dd6b01 100644
--- a/slugify/__main__.py
+++ b/slugify/__main__.py
@@ -31,11 +31,13 @@ def parse_args(argv):
     parser.add_argument("--stopwords", nargs='+',
                         help="Words to discount")
     parser.add_argument("--regex-pattern",
-                        help="Python regex pattern for allowed characters")
+                        help="Python regex pattern for disallowed characters")
     parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True,
                         help="Activate case sensitivity")
     parser.add_argument("--replacements", nargs='+',
                         help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
+    parser.add_argument("--allow-unicode", action='store_true', default=False,
+                        help="Allow unicode characters")
 
     args = parser.parse_args(argv[1:])
 
@@ -73,11 +75,12 @@ def slugify_params(args):
         separator=args.separator,
         stopwords=args.stopwords,
         lowercase=args.lowercase,
-        replacements=args.replacements
+        replacements=args.replacements,
+        allow_unicode=args.allow_unicode
     )
 
 
-def main(argv=None): # pragma: no cover
+def main(argv=None):  # pragma: no cover
     """ Run this program """
     if argv is None:
         argv = sys.argv
@@ -89,5 +92,5 @@ def main(argv=None): # pragma: no cover
         sys.exit(-1)
 
 
-if __name__ == '__main__': # pragma: no cover
+if __name__ == '__main__':  # pragma: no cover
     main()
diff --git a/slugify/__version__.py b/slugify/__version__.py
new file mode 100644
index 0000000..55abc97
--- /dev/null
+++ b/slugify/__version__.py
@@ -0,0 +1,8 @@
+__title__ = 'python-slugify'
+__author__ = 'Val Neekman'
+__author_email__ = 'info@neekware.com'
+__description__ = 'A Python slugify application that also handles Unicode'
+__url__ = 'https://github.com/un33k/python-slugify'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.'
+__version__ = '6.1.2'
diff --git a/slugify/slugify.py b/slugify/slugify.py
index bb3aa95..b8c02ad 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -1,17 +1,7 @@
 import re
-import unicodedata
-import types
 import sys
-
-try:
-    from htmlentitydefs import name2codepoint
-    _unicode = unicode
-    _unicode_type = types.UnicodeType
-except ImportError:
-    from html.entities import name2codepoint
-    _unicode = str
-    _unicode_type = str
-    unichr = chr
+import unicodedata
+from html.entities import name2codepoint
 
 try:
     import text_unidecode as unidecode
@@ -25,8 +15,8 @@ CHAR_ENTITY_PATTERN = re.compile(r'&(%s);' % '|'.join(name2codepoint))
 DECIMAL_PATTERN = re.compile(r'&#(\d+);')
 HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
 QUOTE_PATTERN = re.compile(r'[\']+')
-ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+')
-ALLOWED_CHARS_PATTERN_WITH_UPPERCASE = re.compile(r'[^-a-zA-Z0-9]+')
+DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
+DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
 DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
 NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
 DEFAULT_SEPARATOR = '-'
@@ -69,14 +59,14 @@ def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', sav
             else:
                 if save_order:
                     break
-    if not truncated: # pragma: no cover
+    if not truncated:  # pragma: no cover
         truncated = string[:max_length]
     return truncated.strip(separator)
 
 
 def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
             separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
-            replacements=()):
+            replacements=(), allow_unicode=False):
     """
     Make a slug from the given text.
     :param text (str): initial text
@@ -88,9 +78,10 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
     :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order
     :param separator (str): separator between words
     :param stopwords (iterable): words to discount
-    :param regex_pattern (str): regex pattern for allowed characters
+    :param regex_pattern (str): regex pattern for disallowed characters
     :param lowercase (bool): activate case sensitivity by setting it to False
     :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+    :param allow_unicode (bool): allow unicode characters
     :return (str):
     """
 
@@ -100,39 +91,44 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
             text = text.replace(old, new)
 
     # ensure text is unicode
-    if not isinstance(text, _unicode_type):
-        text = _unicode(text, 'utf-8', 'ignore')
+    if not isinstance(text, str):
+        text = str(text, 'utf-8', 'ignore')
 
     # replace quotes with dashes - pre-process
     text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
 
     # decode unicode
-    text = unidecode.unidecode(text)
+    if not allow_unicode:
+        text = unidecode.unidecode(text)
 
     # ensure text is still in unicode
-    if not isinstance(text, _unicode_type):
-        text = _unicode(text, 'utf-8', 'ignore')
+    if not isinstance(text, str):
+        text = str(text, 'utf-8', 'ignore')
 
     # character entity reference
     if entities:
-        text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text)
+        text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text)
 
     # decimal character reference
     if decimal:
         try:
-            text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text)
+            text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
         except Exception:
             pass
 
     # hexadecimal character reference
     if hexadecimal:
         try:
-            text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text)
+            text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
         except Exception:
             pass
 
     # translate
-    text = unicodedata.normalize('NFKD', text)
+    if allow_unicode:
+        text = unicodedata.normalize('NFKC', text)
+    else:
+        text = unicodedata.normalize('NFKD', text)
+
     if sys.version_info < (3,):
         text = text.encode('ascii', 'ignore')
 
@@ -147,10 +143,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
     text = NUMBERS_PATTERN.sub('', text)
 
     # replace all other unwanted characters
-    if lowercase:
-        pattern = regex_pattern or ALLOWED_CHARS_PATTERN
+    if allow_unicode:
+        pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
     else:
-        pattern = regex_pattern or ALLOWED_CHARS_PATTERN_WITH_UPPERCASE
+        pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
     text = re.sub(pattern, DEFAULT_SEPARATOR, text)
 
     # remove redundant
diff --git a/slugify/special.py b/slugify/special.py
index d3478d5..54eb85c 100644
--- a/slugify/special.py
+++ b/slugify/special.py
@@ -20,7 +20,7 @@ _CYRILLIC = [      # package defaults:
     (u'я', u'ya'),   # ia
     (u'х', u'h'),    # kh
     (u'у', u'y'),    # u
-    (u'щ', u'sch'),  # shch
+    (u'щ', u'sch'),  # sch
     (u'ю', u'u'),    # iu / yu
 ]
 CYRILLIC = add_uppercase_char(_CYRILLIC)
diff --git a/test.py b/test.py
index ddf1bf4..931f38f 100644
--- a/test.py
+++ b/test.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import io
-import os
 import sys
 import unittest
 from contextlib import contextmanager
@@ -10,7 +9,7 @@ from slugify import smart_truncate
 from slugify.__main__ import slugify_params, parse_args
 
 
-class TestSlugification(unittest.TestCase):
+class TestSlugify(unittest.TestCase):
 
     def test_extraneous_seperators(self):
 
@@ -234,6 +233,294 @@ class TestSlugification(unittest.TestCase):
         self.assertEqual(r, "ueber-ueber-german-umlaut")
 
 
+class TestSlugifyUnicode(unittest.TestCase):
+
+    def test_extraneous_seperators(self):
+
+        txt = "This is a test ---"
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "this-is-a-test")
+
+        txt = "___This is a test ---"
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "this-is-a-test")
+
+        txt = "___This is a test___"
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "this-is-a-test")
+
+    def test_non_word_characters(self):
+        txt = "This -- is a ## test ---"
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "this-is-a-test")
+
+    def test_phonetic_conversion_of_eastern_scripts(self):
+        txt = '影師嗎'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, txt)
+
+    def test_accented_text(self):
+        txt = 'C\'est déjà l\'été.'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "c-est-déjà-l-été")
+
+        txt = 'Nín hǎo. Wǒ shì zhōng guó rén'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "nín-hǎo-wǒ-shì-zhōng-guó-rén")
+
+    def test_accented_text_with_non_word_characters(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "jaja-lol-méméméoo-a")
+
+    def test_cyrillic_text(self):
+        txt = 'Компьютер'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "компьютер")
+
+    def test_max_length(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=9)
+        self.assertEqual(r, "jaja-lol")
+
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=15)
+        self.assertEqual(r, "jaja-lol-mémémé")
+
+    def test_max_length_cutoff_not_required(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=50)
+        self.assertEqual(r, "jaja-lol-méméméoo-a")
+
+    def test_word_boundary(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=15, word_boundary=True)
+        self.assertEqual(r, "jaja-lol-a")
+
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=17, word_boundary=True)
+        self.assertEqual(r, "jaja-lol-méméméoo")
+
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=18, word_boundary=True)
+        self.assertEqual(r, "jaja-lol-méméméoo")
+
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=19, word_boundary=True)
+        self.assertEqual(r, "jaja-lol-méméméoo-a")
+
+    def test_custom_separator(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=20, word_boundary=True, separator=".")
+        self.assertEqual(r, "jaja.lol.méméméoo.a")
+
+    def test_multi_character_separator(self):
+        txt = 'jaja---lol-méméméoo--a'
+        r = slugify(txt, allow_unicode=True, max_length=20, word_boundary=True, separator="ZZZZZZ")
+        self.assertEqual(r, "jajaZZZZZZlolZZZZZZméméméooZZZZZZa")
+
+    def test_save_order(self):
+        txt = 'one two three four five'
+        r = slugify(txt, allow_unicode=True, max_length=13, word_boundary=True, save_order=True)
+        self.assertEqual(r, "one-two-three")
+
+        txt = 'one two three four five'
+        r = slugify(txt, allow_unicode=True, max_length=13, word_boundary=True, save_order=False)
+        self.assertEqual(r, "one-two-three")
+
+        txt = 'one two three four five'
+        r = slugify(txt, allow_unicode=True, max_length=12, word_boundary=True, save_order=False)
+        self.assertEqual(r, "one-two-four")
+
+        txt = 'one two three four five'
+        r = slugify(txt, allow_unicode=True, max_length=12, word_boundary=True, save_order=True)
+        self.assertEqual(r, "one-two")
+
+    def test_save_order_rtl(self):
+        """For right-to-left unicode languages"""
+        txt = 'دو سه چهار پنج'
+        r = slugify(txt, allow_unicode=True, max_length=10, word_boundary=True, save_order=True)
+        self.assertEqual(r, "دو-سه-چهار")
+
+        txt = 'دو سه چهار پنج'
+        r = slugify(txt, allow_unicode=True, max_length=10, word_boundary=True, save_order=False)
+        self.assertEqual(r, "دو-سه-چهار")
+
+        txt = 'دو سه چهار پنج'
+        r = slugify(txt, allow_unicode=True, max_length=9, word_boundary=True, save_order=False)
+        self.assertEqual(r, "دو-سه-پنج")
+
+        txt = 'دو سه چهار پنج'
+        r = slugify(txt, allow_unicode=True, max_length=9, word_boundary=True, save_order=True)
+        self.assertEqual(r, "دو-سه")
+
+    def test_stopword_removal(self):
+        txt = 'this has a stopword'
+        r = slugify(txt, allow_unicode=True, stopwords=['stopword'])
+        self.assertEqual(r, 'this-has-a')
+
+        txt = 'this has a Öländ'
+        r = slugify(txt, allow_unicode=True, stopwords=['Öländ'])
+        self.assertEqual(r, 'this-has-a')
+
+    def test_stopword_removal_casesensitive(self):
+        txt = 'thIs Has a stopword Stopword'
+        r = slugify(txt, allow_unicode=True, stopwords=['Stopword'], lowercase=False)
+        self.assertEqual(r, 'thIs-Has-a-stopword')
+
+        txt = 'thIs Has a öländ Öländ'
+        r = slugify(txt, allow_unicode=True, stopwords=['Öländ'], lowercase=False)
+        self.assertEqual(r, 'thIs-Has-a-öländ')
+
+    def test_multiple_stopword_occurances(self):
+        txt = 'the quick brown fox jumps over the lazy dog'
+        r = slugify(txt, allow_unicode=True, stopwords=['the'])
+        self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+    def test_differently_cased_stopword_match(self):
+        txt = 'Foo A FOO B foo C'
+        r = slugify(txt, allow_unicode=True, stopwords=['foo'])
+        self.assertEqual(r, 'a-b-c')
+
+        txt = 'Foo A FOO B foo C'
+        r = slugify(txt, allow_unicode=True, stopwords=['FOO'])
+        self.assertEqual(r, 'a-b-c')
+
+    def test_multiple_stopwords(self):
+        txt = 'the quick brown fox jumps over the lazy dog in a hurry'
+        r = slugify(txt, allow_unicode=True, stopwords=['the', 'in', 'a', 'hurry'])
+        self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog')
+
+    def test_stopwords_with_different_separator(self):
+        txt = 'the quick brown fox jumps over the lazy dog'
+        r = slugify(txt, allow_unicode=True, stopwords=['the'], separator=' ')
+        self.assertEqual(r, 'quick brown fox jumps over lazy dog')
+
+    def test_html_entities_on(self):
+        txt = 'foo &amp; bar'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, 'foo-bar')
+
+    def test_html_entities_off(self):
+        txt = 'foo &amp; bår'
+        r = slugify(txt, allow_unicode=True, entities=False)
+        self.assertEqual(r, 'foo-amp-bår')
+
+    def test_html_decimal_on(self):
+        txt = '&#381;'
+        r = slugify(txt, allow_unicode=True, decimal=True)
+        self.assertEqual(r, 'ž')
+
+    def test_html_decimal_off(self):
+        txt = '&#381;'
+        r = slugify(txt, allow_unicode=True, entities=False, decimal=False)
+        self.assertEqual(r, '381')
+
+    def test_html_hexadecimal_on(self):
+        txt = '&#x17D;'
+        r = slugify(txt, allow_unicode=True, hexadecimal=True)
+        self.assertEqual(r, 'ž')
+
+    def test_html_hexadecimal_off(self):
+        txt = '&#x17D;'
+        r = slugify(txt, allow_unicode=True, hexadecimal=False)
+        self.assertEqual(r, 'x17d')
+
+    def test_starts_with_number(self):
+        txt = '10 amazing secrets'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, '10-amazing-secrets')
+
+    def test_contains_numbers(self):
+        txt = 'buildings with 1000 windows'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, 'buildings-with-1000-windows')
+
+    def test_ends_with_number(self):
+        txt = 'recipe number 3'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, 'recipe-number-3')
+
+    def test_numbers_only(self):
+        txt = '404'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, '404')
+
+    def test_numbers_and_symbols(self):
+        txt = '1,000 reasons you are #1'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, '1000-reasons-you-are-1')
+
+        txt = '۱,۰۰۰ reasons you are #۱'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, '۱۰۰۰-reasons-you-are-۱')
+
+    def test_regex_pattern_keep_underscore(self):
+        """allowing unicode should not overrule the passed regex_pattern"""
+        txt = "___This is a test___"
+        regex_pattern = r'[^-a-z0-9_]+'
+        r = slugify(txt, allow_unicode=True, regex_pattern=regex_pattern)
+        self.assertEqual(r, "___this-is-a-test___")
+
+    def test_regex_pattern_keep_underscore_with_underscore_as_separator(self):
+        """
+        The regex_pattern turns the power to the caller.
+        Hence, the caller must ensure that a custom separator doesn't clash
+        with the regex_pattern.
+        """
+        txt = "___This is a test___"
+        regex_pattern = r'[^-a-z0-9_]+'
+        r = slugify(txt, allow_unicode=True, separator='_', regex_pattern=regex_pattern)
+        self.assertNotEqual(r, "_this_is_a_test_")
+
+    def test_replacements(self):
+        txt = '10 | 20 %'
+        r = slugify(txt, allow_unicode=True, replacements=[['|', 'or'], ['%', 'percent']])
+        self.assertEqual(r, "10-or-20-percent")
+
+        txt = 'I ♥ 🦄'
+        r = slugify(txt, allow_unicode=True, replacements=[['♥', 'amour'], ['🦄', 'licorne']])
+        self.assertEqual(r, "i-amour-licorne")
+
+        txt = 'I ♥ 🦄'
+        r = slugify(txt, allow_unicode=True, replacements=[['♥', 'სიყვარული'], ['🦄', 'licorne']])
+        self.assertEqual(r, "i-სიყვარული-licorne")
+
+    def test_replacements_german_umlaut_custom(self):
+        txt = 'ÜBER Über German Umlaut'
+        r = slugify(txt, allow_unicode=True, replacements=[['Ü', 'UE'], ['ü', 'ue']])
+        self.assertEqual(r, "ueber-ueber-german-umlaut")
+
+    def test_emojis(self):
+        """
+        allowing unicode shouldn't allow emojis, even in replacements.
+        the only exception is when it is allowed by the regex_pattern. regex_pattern overrules all
+        """
+        txt = 'i love 🦄'
+        r = slugify(txt, allow_unicode=True)
+        self.assertEqual(r, "i-love")
+
+        txt = 'i love 🦄'
+        r = slugify(txt, allow_unicode=True, decimal=True)
+        self.assertEqual(r, "i-love")
+
+        txt = 'i love 🦄'
+        r = slugify(txt, allow_unicode=True, hexadecimal=True)
+        self.assertEqual(r, "i-love")
+
+        txt = 'i love 🦄'
+        r = slugify(txt, allow_unicode=True, entities=True)
+        self.assertEqual(r, "i-love")
+
+        txt = 'i love you'
+        r = slugify(txt, allow_unicode=True, replacements=[['you', '🦄']])
+        self.assertEqual(r, "i-love")
+
+        txt = 'i love 🦄'
+        r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+')
+        self.assertEqual(r, "🦄")
+
+
 class TestUtils(unittest.TestCase):
 
     def test_smart_truncate_no_max_length(self):
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index a4bee82..0000000
--- a/tox.ini
+++ /dev/null
@@ -1,18 +0,0 @@
-[tox]
-envlist = py{39,38,37,36},pypy3
-
-[testenv]
-deps=
-    -e .
-commands =
-    python -m unittest test
-
-[testenv:format]
-deps = pycodestyle
-allowlist_externals = sh
-commands = sh format.sh
-
-[testenv:coverage]
-deps = coverage
-commands =
-    coverage run --source=slugify test.py
author	Val Neekman (AvidCoder) <un33kvu@gmail.com>	2022-04-27 12:07:55 -0400
committer	Val Neekman (AvidCoder) <un33kvu@gmail.com>	2022-04-27 12:07:55 -0400
commit	c094c8a50371d6da08b782424ace5eca20943c8b (patch)
tree	e3d529d4956ed162cd3545f6170db08fcfba8199
parent	937779c77420f4acb8acd775bc2c35ed94f1393d (diff)
download	python-slugify-c094c8a50371d6da08b782424ace5eca20943c8b.tar.gz