Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,13 @@ required more than simple transliteration.

>>> slugify.slugify(u'Bän...g (bang)')
u'bäng-bang'

>>> slugify.slugify(u'Bäuma means a tree', only_ascii=True)
u'bauma-means-a-tree'

>>> slugify(u'Bakıcı geldi', only_ascii=True)
u'bakici-geldi'

## Thanks

Tomaz Solc, unidecode, https://pypi.python.org/pypi/Unidecode
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='unicode-slugify',
version='0.1.2',
version='0.1.3',
description='A slug generator that turns strings into unicode slugs.',
long_description=open('README.md').read(),
author='Jeff Balogh, Dave Dash',
Expand All @@ -13,7 +13,7 @@
include_package_data=True,
package_data={'': ['README.md']},
zip_safe=False,
install_requires=['six'],
install_requires=['six', 'unidecode'],
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Web Environment',
Expand Down
33 changes: 29 additions & 4 deletions slugify/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import six
import unicodedata
from unidecode import unidecode


def smart_text(s, encoding='utf-8', errors='strict'):
Expand All @@ -26,9 +27,27 @@ def smart_text(s, encoding='utf-8', errors='strict'):
SLUG_OK = '-_~'


def slugify(s, ok=SLUG_OK, lower=True, spaces=False):
# L and N signify letter/number.
# http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False):
"""
Creates a unicode slug for given string with several options.

L and N signify letter/number.
http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table

:param s: Your unicode string.
:param ok: Extra characters outside of alphanumerics to be allowed.
:param lower: Lower the output string.
:param spaces: True allows spaces, False replaces a space with a dash (-).
:param only_ascii: True to replace non-ASCII unicode characters with their ASCII representations.
:type s: String
:type ok: String
:type lower: Bool
:type spaces: Bool
:type only_ascii: Bool
:return: Slugified unicode string

"""

rv = []
for c in unicodedata.normalize('NFKC', smart_text(s)):
cat = unicodedata.category(c)[0]
Expand All @@ -39,4 +58,10 @@ def slugify(s, ok=SLUG_OK, lower=True, spaces=False):
new = ''.join(rv).strip()
if not spaces:
new = re.sub('[-\s]+', '-', new)
return new.lower() if lower else new

new = new.lower() if lower else new

if only_ascii == True:
new = unidecode(new)

return new
21 changes: 20 additions & 1 deletion slugify/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ def test_slugify():
def check(x, y):
eq_(slugify(x), y)

def check_only_ascii(x, y):
eq_(slugify(x, only_ascii=True), y)

def check_only_ascii_capital(x, y):
eq_(slugify(x, lower=False, only_ascii=True), y)

s = [('xx x - "#$@ x', 'xx-x-x'),
(u'Bän...g (bang)', u'bäng-bang'),
(u, u.lower()),
Expand All @@ -33,11 +39,24 @@ def check(x, y):
# I don't really care what slugify returns. Just don't crash.
(u'x𘍿', u'x'),
(u'ϧ΃𘒬𘓣', u'\u03e7'),
(u'¿x', u'x')]
(u'¿x', u'x'),
(u'Bakıcı geldi', u'bak\u0131c\u0131-geldi'),
(u'Bäuma means tree', u'b\xe4uma-means-tree')]

only_ascii = [(u'Bakıcı geldi', u'bakici-geldi'), (u'Bäuma means tree', u'bauma-means-tree')]

only_ascii_capital = [(u'BÄUMA MEANS TREE', u'BAUMA-MEANS-TREE'),
(u'EMİN WAS HERE', u'EMIN-WAS-HERE')]

for val, expected in s:
yield check, val, expected

for val, expected in only_ascii:
yield check_only_ascii, val, expected

for val, expected in only_ascii_capital:
yield check_only_ascii_capital, val, expected


class SmartTextTestCase(unittest.TestCase):

Expand Down