Source code for dk.html.uhtml

# -*- coding: utf-8 -*-

"""
   New version of html.py module that works on/with Unicode.

"""

from dk.text import u8, unicode_repr
import types as _types
import htmlentitydefs as _h
import string as _s
from .css import css
_map = map

raw_string_encodings = ('utf-8', 'iso-8859-1')


INLINE_ELEMENTS = '''
   a abbr acronym b basefont bdo big br cite code dfn em font i img input
   kbd label q s samp select small span strike strong sub sup textarea tt
   u var applet button del iframe ins map object script'''.split()

BLOCKLEVEL_ELEMENTS = '''
   address blockquote center dir div dl fieldset form h1 h2 h3 h4 h5 h6
   hr isindex menu noframes noscript ol p pre table ul dd dt frameset
   li tbody td tfoot th thead tr applet button del iframe ins map object
   script
   '''.split()


[docs]class EscapedString(unicode):
    pass


[docs]def escape_char(unichar):
    if len(unichar) > 1 and (unichar[0] == '&' and unichar[-1] == ';'):
        return str(unichar)
    
    o = ord(unichar)
    t = _h.codepoint2name.get(o, o)
    if t == o:
        if 0 < t < 128:
            return str(unichar)
        else:
            return ''
    else:
        return '&' + t + ';'


[docs]def escaped_array(s):
    """Convert unicode string to list of ascii characters or
       entitydefs like &oslash; etc.
    """
    return [escape_char(ch) for ch in s]


[docs]def escape(s, enc=None):
    """Convert string s (potentially unicode) to a ascii string
       with entitydefs like &oslash; &aelig; etc.
    """
    if s is None:
        return ''
    if not isinstance(s, unicode):
        if enc is not None:
            s = s.decode(enc)
    return ''.join(escape_char(c) for c in s)


[docs]def u8escape(s):
    return escape(s,'u8')


[docs]def rawstr2unicode(s):
    for enc in raw_string_encodings:
        try:
            return unicode(s, enc)
        except UnicodeDecodeError:
            pass
    raise UnicodeError("Could not decode raw string.")


[docs]def normalize(v):
    """returns a stringified unicode version of v
    """
    if not isinstance(v, basestring):
        # all 'other' objects: call their __str__ method
        v = unicode(str(v))
    elif not isinstance(v, unicode):
        # str objects: try to find encoding
        v = rawstr2unicode(v)
    return v


[docs]def quote(v):
    '''
       >>> quote(u"Bjorn's")
       u'"Bjorn\\'s"'
       >>> quote(u'the "best"')
       u'"the &quot;best&quot;"'
    '''  # '
    if u'"' in v:
        v = v.replace(u'"', u'&quot;')
    return u'"%s"' % v


[docs]def norm_attr_name(a):
    """``_foo_bar => _foo_bar``,  ``class_ => class``, ``max_height => max-height``

           >>> norm_attr_name(u'class_')
           u'class'
           >>> norm_attr_name(u'z_index')
           u'z-index'
    """
    if a[0] == u'_':
        return a
    if a[-1] == u'_':
        a = a[:-1]
    return a.replace(u'_', u'-')
    

[docs]class xtag(object):
    """x(ml-style)tag: a tag without content or a closing tag.
       E.g. <br/> would be xtag('br')

       .. note:: [2009-03-11] w3 validator complains that 4.01 loose should not
                              use <foo />  but <foo>.
    """
    def __init__(self, tag_name, **kw):
        self._attr = {}
        self._name = tag_name
        self._nlafter = ''

        for k, v in kw.items():
            self._attr[norm_attr_name(k)] = v

    def __getattr__(self, name):
        try:
            return self._attr[norm_attr_name(name)]
        except KeyError:
            raise AttributeError

    def __setattr__(self, name, value):
        name = norm_attr_name(name)
        if name.startswith('_'):
            object.__setattr__(self, name, value)
        elif name in self._attr:
            self._attr[name] = value
        elif hasattr(self, name):
            object.__setattr__(self, name, value)
        else:
            self._attr[name] = value

[docs]    def attributes(self):
        """return a string like key="val". """
        res = []
        for k, v in self._attr.items():
            if isinstance(v, css):
                v = str(v)

            v = normalize(v)
            if v:
                res.append(u' %s=%s' % (k, quote(escape(v))))
        return ''.join(res)

    def _flatten(self):
        yield self

[docs]    def flatten(self):
        yield self

    def __unicode__(self):
        return u'<' + self._name + self.attributes() + u'>'

    def __xxstr__(self):
        return unicode(self).encode('u8')

    def __repr__(self):
        return repr(unicode(self))


[docs]class stag(xtag):
    """s(ingle)tag
    """
    def __unicode__(self):
        return u'<' + self._name + self.attributes() + u'>'


[docs]class tag(xtag):
    """Regular tag: outputs an open tag with attributes, followed by its
       contents, followed by a closing tag.

       Attributes can be set either as keyword arguments in the constructor
       or by assigning to attributes of the object.

       Content can be any combination of items, iterables, and generators:

         >> table(tr(td(i) for i in range(5)), tr(td(i**i) for i in range(5)))

       NB: Attributes that conflict with Python keywords have an underline
       appended, e.g.:  ``mytag.class_ = ...``
    """
    def __init__(self, tag_name, *content, **kw):
        xtag.__init__(self, tag_name, **kw)
        if len(content) == 1 and type(content[0]) == _types.GeneratorType:
            self._content = list(content[0])
        else:
            self._content = content

    def xcontent():
        def fget(self):
            return self._content

        def fset(self, v):
            self._content = v
        return locals()
    xcontent = property(**xcontent())

    def _flatten(self, lst):
        for item in lst:
            if isinstance(item, (str, unicode, int, long, float)):
                yield item
            elif isinstance(item, xtag):
                for subitem in item.flatten():
                    yield subitem
            else:
                try:
                    for subitem in self._flatten(iter(item)):
                        yield subitem
                except TypeError:
                    yield item

[docs]    def flatten(self, lst=None):
        if lst is None:
            lst = self._content
        yield self.open_tag()
        for item in self._flatten(lst):
            yield item
        yield self.close_tag()
        return
        
[docs]    def open_tag(self):
        return u'<' + self._name + self.attributes() + u'>'

[docs]    def close_tag(self):
        return u'</' + self._name + u'>' + self._nlafter
        
    def __unicode__(self):
        res = []
        for item in self.flatten():
            try:
                res.append(unicode_repr(item))
            except TypeError:
                # generator found for some reason
                print type(item), dir(item)
                raise
        return ''.join(res)


[docs]class opentag(tag):
[docs]    def flatten(self, lst=None):
        yield self.open_tag()


[docs]class closetag(tag):
[docs]    def flatten(self, lst=None):
        yield self.close_tag()


[docs]class text(tag):
    """text tag: outputs its contents without any tags around it. Useful
       for grouping at the top level.
    """
    def __init__(self, *content):
        super(text,self).__init__('text', *content)
        
[docs]    def flatten(self):
        return self._flatten(self._content)


[docs]class lines(text):
    """like text, except each item in content is separated with a <br> tag.
    """
[docs]    def flatten(self):
        content = []
        for c in self._content[:-1]:
            content.append(c)
            content.append('<br>')
        content.append(self._content[-1])
        return self._flatten(content)


[docs]class dtag(tag):
    """d(issappearing)tag: if the content is empty, i.e. self.content == ('',)
       this tag doesn't output anything at all. Useful for legends, table
       captions, etc.
    """
    def __unicode__(self):
        if self._content:
            if len(self._content) == 1 and self._content[0] == u'':
                return u''
            return super(dtag,self).__str__()
        else:
            return u''


def _add(a, b):
    t = {}
    t.update(a)
    t.update(b)
    return t


[docs]def mktag(name, _parent=tag, _nlafter=False, **attrs):
    class _tmp(_parent):
        def __init__(self, *content, **kw):
            _parent.__init__(self, name, *content, **_add(attrs, kw))
            self._nlafter = _nlafter and '\n' or ''
    _tmp.__name__ = name
    return _tmp


[docs]def mkxtag(name, **attrs):
    class _tmp(xtag):
        def __init__(self, **kw):
            xtag.__init__(self, name, **_add(attrs, kw))
    _tmp.__name__ = name
    return _tmp


[docs]def mkdtag(name, **attrs):
    return mktag(name, _parent=dtag, **attrs)


[docs]def mkstag(name):
    return mktag(name, _parent=stag)

doctype401strict = mkstag(
    '!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n'
    '    "http://www.w3.org/TR/html4/strict.dtd"')
doctype401transitional = mkstag(
    '!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"\n'
    '    "http://www.w3.org/TR/html4/loose.dtd"')
doctype401frameset = mkstag(
    '!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"\n'
    '    "http://www.w3.org/TR/html4/frameset.dtd"')

doctype = doctype401strict


xtags = "br hr img input link col meta".split()

for t in xtags:
    globals()[t] = mkxtag(t)

tags = '''
  a abbr acronym address applet area b base bsefont bdo big blockquote
  body button center cite code colgroup dd dfn div dl dt em
  fieldset font form frame frameset h1 h2 h3 h4 h5 h6 head html i
  iframe ins kbd label li map menu nobr noframes noscript ol
  optgroup option p param pre q s samp small span strike strong sub
  sup table tbody td textarea tfoot th thead title tr tt u ul var
  '''.split()

_nlafter = '''
  blockquote body center div dl dt fieldset form frame h1 h2 h3 h4 h5 h6
  head html iframe legend li ol option p pre table tbody title tr ul
  col colgroup
  '''.split()

for t in tags:
    globals()[t] = mktag(t, tag, t in _nlafter)

dtags = "caption legend".split()

for t in dtags:
    globals()[t] = mkdtag(t)

# special case (del is a keyword)
del_ = mktag('del')
dir_ = mktag('dir')
object_ = mktag('object')

start = mkxtag('link', rel='start')
prev = mkxtag('link', rel='prev')
next = mkxtag('link', rel='next')
stylesheet = mkxtag('link', rel='stylesheet', type='text/css', media='screen')
nynorsk = mkxtag('link', rel='alternate', hreflang='nn', lang='nn')
bokmaal = mkxtag('link', rel='alternate', hreflang='nb', lang='nb')
norsk = mkxtag('link', rel='alternate', hreflang='no', lang='no')
english = mkxtag('link', rel='alternate', hreflang='en', lang='en')
pdf = mkxtag('link', rel='alternate', type='application/pdf', media='print')

script = mktag('script', type='text/javascript')
style = mktag('style', type='text/css')

text_input = mkxtag('input', type='text')
hidden_input = mkxtag('input', type='hidden')
password_input = mkxtag('input', type='password')
checkbox_input = mkxtag('input', type='checkbox')
radio_input = mkxtag('input', type='radio')
submit_button = mkxtag('input', type='submit')


[docs]class select(tag):
    def __init__(self, options, selected=None, **kw):
        if 'id' not in kw:
            kw['id'] = 'id_' + kw['name']
        super(select, self).__init__('select', **kw)
        self._options = None
        self.options = options
        if selected is not None:
            selected = u8(selected)
        content = []
        for k, v in self.options:
            if u8(k) == selected:
                opt = option(v, value=k, selected='selected')
            else:
                opt = option(v, value=k)
            content.append(opt)
        self._content = tuple(content)

    def options():
        def fset(self, options):
            if len(options) == 0:
                self._options = []
            else:
                first = options[0]
                
                if len(first) == 2 and not isinstance(first, basestring):
                    self._options = [(unicode_repr(k), unicode_repr(v))
                                     for (k,v) in options]
                else:
                    self._options = [(unicode_repr(o), unicode_repr(o))
                                     for o in options]

        def fget(self):
            return self._options
        return locals()
    options = property(**options())

    def selected():
        def fset(self, v):
            if v not in self.values:
                raise ValueError("Only valid options can be selected.")
            self._selected = v

        def fget(self):
            return self._selected
        return locals()
    selected = property(**selected())

    def values():
        def fget(self):
            return [k for (k,v) in self.options]
        return locals()
    values = property(**values())


[docs]class tabledesc(object):
    def __init__(self, *cols):
        self.cols = cols
        

[docs]def test_doctest():
    """
       ::

           >>> br()
           u'<br>'
           >>> div('hello', b('world'))
           u'<div>hello<b>world</b></div>\\n'
           >>> print select(options=[u'a', u'b'], name='foo')
           u'<select name="foo" id="id_foo"><option value="a">a</option>\\n<option value="b">b</option>\\n</select>'

    """
    import doctest
    doctest.testmod()

if __name__ == "__main__":
    _test()