summaryrefslogtreecommitdiff
path: root/slugify
diff options
context:
space:
mode:
Diffstat (limited to 'slugify')
-rw-r--r--slugify/__main__.py5
-rw-r--r--slugify/__version__.py2
-rw-r--r--slugify/slugify.py20
3 files changed, 21 insertions, 6 deletions
diff --git a/slugify/__main__.py b/slugify/__main__.py
index 5a888fe..7dd6b01 100644
--- a/slugify/__main__.py
+++ b/slugify/__main__.py
@@ -36,6 +36,8 @@ def parse_args(argv):
help="Activate case sensitivity")
parser.add_argument("--replacements", nargs='+',
help="""Additional replacement rules e.g. "|->or", "%%->percent".""")
+ parser.add_argument("--allow-unicode", action='store_true', default=False,
+ help="Allow unicode characters")
args = parser.parse_args(argv[1:])
@@ -73,7 +75,8 @@ def slugify_params(args):
separator=args.separator,
stopwords=args.stopwords,
lowercase=args.lowercase,
- replacements=args.replacements
+ replacements=args.replacements,
+ allow_unicode=args.allow_unicode
)
diff --git a/slugify/__version__.py b/slugify/__version__.py
index 1eedf44..e14e887 100644
--- a/slugify/__version__.py
+++ b/slugify/__version__.py
@@ -5,4 +5,4 @@ __description__ = 'A Python slugify application that also handles Unicode'
__url__ = 'https://github.com/un33k/python-slugify'
__license__ = 'MIT'
__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.'
-__version__ = '6.0.1'
+__version__ = '6.1.0'
diff --git a/slugify/slugify.py b/slugify/slugify.py
index 190ea92..ae6c9b6 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -17,6 +17,7 @@ DECIMAL_PATTERN = re.compile(r'&#(\d+);')
HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);')
QUOTE_PATTERN = re.compile(r'[\']+')
DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+')
+DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+')
DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}')
NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'
@@ -66,7 +67,8 @@ def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', sav
def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False,
separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True,
- replacements: typing.Iterable[typing.Iterable[str]] = ()):
+ replacements: typing.Iterable[typing.Iterable[str]] = (),
+ allow_unicode=False):
"""
Make a slug from the given text.
:param text (str): initial text
@@ -81,6 +83,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
:param regex_pattern (str): regex pattern for disallowed characters
:param lowercase (bool): activate case sensitivity by setting it to False
:param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
+ :param allow_unicode (bool): allow unicode characters
:return (str):
"""
@@ -97,7 +100,8 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
# decode unicode
- text = unidecode.unidecode(text)
+ if not allow_unicode:
+ text = unidecode.unidecode(text)
# ensure text is still in unicode
if not isinstance(text, str):
@@ -122,7 +126,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
pass
# translate
- text = unicodedata.normalize('NFKD', text)
+ if allow_unicode:
+ text = unicodedata.normalize('NFKC', text)
+ else:
+ text = unicodedata.normalize('NFKD', text)
+
if sys.version_info < (3,):
text = text.encode('ascii', 'ignore')
@@ -137,7 +145,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
text = NUMBERS_PATTERN.sub('', text)
# replace all other unwanted characters
- pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+ if allow_unicode:
+ pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
+ else:
+ pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
text = re.sub(pattern, DEFAULT_SEPARATOR, text)
# remove redundant