Source code for dk.asciify

# -*- coding: utf-8 -*-

"""Convert unicode strings to visually similar ascii representations.
"""
import re


REPLACEMENTS = {
    u'æ': 'ae',
    u'øôöòóõ': 'o',
    u'àáâãäå': 'a',
    u'èéêë': 'e',
    u'ìíîï': 'i',
    u'üúùû': 'u',
    u'ÿý': 'y',
}
REPL_CHARS = u''.join(REPLACEMENTS.keys())
REPL_CHARS += REPL_CHARS.upper()


def _replacement(ch):
    """Return replacement for `ch`.
    """
    c = ch.lower()
    for key in REPLACEMENTS:
        if c in key:
            val = REPLACEMENTS[key]
            return val.upper() if ch.isupper() else val
    return ch


[docs]def asciify(s, spaces=None, legal=None, replacement=''): """Convert unicode string `s` to a similarly looking ascii string. If `spaces` is specified, runs of space characters are replaced with exactly one `spaces`. If `legal` is specified (as a string), only characters from `legal` will be in the result, otherwise all characters from ascii 32 to ascii 127 are allowed. If `replacement` is passed, then any characters that are elided will be replaced by `replacement`. """ res = "" for ch in s: if ch in REPL_CHARS: res += _replacement(ch) elif legal is None: if 32 <= ord(ch) < 127: res += ch else: res += replacement else: if ch in legal + ' ': res += ch else: res += replacement if spaces: res = re.sub(r'\s+', spaces, res) return str(res)
[docs]def ascii_name(name): """Convert name from unicode to a ascii representation that (while surely a grave bastardization) can be used as a filename without (ever!) causing problems. """ return asciify(name.lower(), spaces='-', legal=u'abcdefghijklmnopqrstuvwxyz-')
[docs]def slug(txt): """Same as above, but intended for URIs. """ return asciify(txt.lower(), spaces='-', legal=u'abcdefghijklmnopqrstuvwxyz-0123456789')