# -*- coding: utf-8 -*-
"""
New version of html.py module that works on/with Unicode.
"""
from dk.text import u8, unicode_repr
import types as _types
import htmlentitydefs as _h
import string as _s
from .css import css
_map = map
raw_string_encodings = ('utf-8', 'iso-8859-1')
INLINE_ELEMENTS = '''
a abbr acronym b basefont bdo big br cite code dfn em font i img input
kbd label q s samp select small span strike strong sub sup textarea tt
u var applet button del iframe ins map object script'''.split()
BLOCKLEVEL_ELEMENTS = '''
address blockquote center dir div dl fieldset form h1 h2 h3 h4 h5 h6
hr isindex menu noframes noscript ol p pre table ul dd dt frameset
li tbody td tfoot th thead tr applet button del iframe ins map object
script
'''.split()
[docs]class EscapedString(unicode):
pass
[docs]def escape_char(unichar):
if len(unichar) > 1 and (unichar[0] == '&' and unichar[-1] == ';'):
return str(unichar)
o = ord(unichar)
t = _h.codepoint2name.get(o, o)
if t == o:
if 0 < t < 128:
return str(unichar)
else:
return ''
else:
return '&' + t + ';'
[docs]def escaped_array(s):
"""Convert unicode string to list of ascii characters or
entitydefs like ø etc.
"""
return [escape_char(ch) for ch in s]
[docs]def escape(s, enc=None):
"""Convert string s (potentially unicode) to a ascii string
with entitydefs like ø æ etc.
"""
if s is None:
return ''
if not isinstance(s, unicode):
if enc is not None:
s = s.decode(enc)
return ''.join(escape_char(c) for c in s)
[docs]def u8escape(s):
return escape(s,'u8')
[docs]def rawstr2unicode(s):
for enc in raw_string_encodings:
try:
return unicode(s, enc)
except UnicodeDecodeError:
pass
raise UnicodeError("Could not decode raw string.")
[docs]def normalize(v):
"""returns a stringified unicode version of v
"""
if not isinstance(v, basestring):
# all 'other' objects: call their __str__ method
v = unicode(str(v))
elif not isinstance(v, unicode):
# str objects: try to find encoding
v = rawstr2unicode(v)
return v
[docs]def quote(v):
'''
>>> quote(u"Bjorn's")
u'"Bjorn\\'s"'
>>> quote(u'the "best"')
u'"the "best""'
''' # '
if u'"' in v:
v = v.replace(u'"', u'"')
return u'"%s"' % v
[docs]def norm_attr_name(a):
"""``_foo_bar => _foo_bar``, ``class_ => class``, ``max_height => max-height``
>>> norm_attr_name(u'class_')
u'class'
>>> norm_attr_name(u'z_index')
u'z-index'
"""
if a[0] == u'_':
return a
if a[-1] == u'_':
a = a[:-1]
return a.replace(u'_', u'-')
[docs]class xtag(object):
"""x(ml-style)tag: a tag without content or a closing tag.
E.g. <br/> would be xtag('br')
.. note:: [2009-03-11] w3 validator complains that 4.01 loose should not
use <foo /> but <foo>.
"""
def __init__(self, tag_name, **kw):
self._attr = {}
self._name = tag_name
self._nlafter = ''
for k, v in kw.items():
self._attr[norm_attr_name(k)] = v
def __getattr__(self, name):
try:
return self._attr[norm_attr_name(name)]
except KeyError:
raise AttributeError
def __setattr__(self, name, value):
name = norm_attr_name(name)
if name.startswith('_'):
object.__setattr__(self, name, value)
elif name in self._attr:
self._attr[name] = value
elif hasattr(self, name):
object.__setattr__(self, name, value)
else:
self._attr[name] = value
[docs] def attributes(self):
"""return a string like key="val". """
res = []
for k, v in self._attr.items():
if isinstance(v, css):
v = str(v)
v = normalize(v)
if v:
res.append(u' %s=%s' % (k, quote(escape(v))))
return ''.join(res)
def _flatten(self):
yield self
[docs] def flatten(self):
yield self
def __unicode__(self):
return u'<' + self._name + self.attributes() + u'>'
def __xxstr__(self):
return unicode(self).encode('u8')
def __repr__(self):
return repr(unicode(self))
[docs]class stag(xtag):
"""s(ingle)tag
"""
def __unicode__(self):
return u'<' + self._name + self.attributes() + u'>'
[docs]class tag(xtag):
"""Regular tag: outputs an open tag with attributes, followed by its
contents, followed by a closing tag.
Attributes can be set either as keyword arguments in the constructor
or by assigning to attributes of the object.
Content can be any combination of items, iterables, and generators:
>> table(tr(td(i) for i in range(5)), tr(td(i**i) for i in range(5)))
NB: Attributes that conflict with Python keywords have an underline
appended, e.g.: ``mytag.class_ = ...``
"""
def __init__(self, tag_name, *content, **kw):
xtag.__init__(self, tag_name, **kw)
if len(content) == 1 and type(content[0]) == _types.GeneratorType:
self._content = list(content[0])
else:
self._content = content
def xcontent():
def fget(self):
return self._content
def fset(self, v):
self._content = v
return locals()
xcontent = property(**xcontent())
def _flatten(self, lst):
for item in lst:
if isinstance(item, (str, unicode, int, long, float)):
yield item
elif isinstance(item, xtag):
for subitem in item.flatten():
yield subitem
else:
try:
for subitem in self._flatten(iter(item)):
yield subitem
except TypeError:
yield item
[docs] def flatten(self, lst=None):
if lst is None:
lst = self._content
yield self.open_tag()
for item in self._flatten(lst):
yield item
yield self.close_tag()
return
[docs] def open_tag(self):
return u'<' + self._name + self.attributes() + u'>'
[docs] def close_tag(self):
return u'</' + self._name + u'>' + self._nlafter
def __unicode__(self):
res = []
for item in self.flatten():
try:
res.append(unicode_repr(item))
except TypeError:
# generator found for some reason
print type(item), dir(item)
raise
return ''.join(res)
[docs]class opentag(tag):
[docs] def flatten(self, lst=None):
yield self.open_tag()
[docs]class closetag(tag):
[docs] def flatten(self, lst=None):
yield self.close_tag()
[docs]class text(tag):
"""text tag: outputs its contents without any tags around it. Useful
for grouping at the top level.
"""
def __init__(self, *content):
super(text,self).__init__('text', *content)
[docs] def flatten(self):
return self._flatten(self._content)
[docs]class lines(text):
"""like text, except each item in content is separated with a <br> tag.
"""
[docs] def flatten(self):
content = []
for c in self._content[:-1]:
content.append(c)
content.append('<br>')
content.append(self._content[-1])
return self._flatten(content)
[docs]class dtag(tag):
"""d(issappearing)tag: if the content is empty, i.e. self.content == ('',)
this tag doesn't output anything at all. Useful for legends, table
captions, etc.
"""
def __unicode__(self):
if self._content:
if len(self._content) == 1 and self._content[0] == u'':
return u''
return super(dtag,self).__str__()
else:
return u''
def _add(a, b):
t = {}
t.update(a)
t.update(b)
return t
[docs]def mktag(name, _parent=tag, _nlafter=False, **attrs):
class _tmp(_parent):
def __init__(self, *content, **kw):
_parent.__init__(self, name, *content, **_add(attrs, kw))
self._nlafter = _nlafter and '\n' or ''
_tmp.__name__ = name
return _tmp
[docs]def mkxtag(name, **attrs):
class _tmp(xtag):
def __init__(self, **kw):
xtag.__init__(self, name, **_add(attrs, kw))
_tmp.__name__ = name
return _tmp
[docs]def mkdtag(name, **attrs):
return mktag(name, _parent=dtag, **attrs)
[docs]def mkstag(name):
return mktag(name, _parent=stag)
doctype401strict = mkstag(
'!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n'
' "http://www.w3.org/TR/html4/strict.dtd"')
doctype401transitional = mkstag(
'!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"\n'
' "http://www.w3.org/TR/html4/loose.dtd"')
doctype401frameset = mkstag(
'!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"\n'
' "http://www.w3.org/TR/html4/frameset.dtd"')
doctype = doctype401strict
xtags = "br hr img input link col meta".split()
for t in xtags:
globals()[t] = mkxtag(t)
tags = '''
a abbr acronym address applet area b base bsefont bdo big blockquote
body button center cite code colgroup dd dfn div dl dt em
fieldset font form frame frameset h1 h2 h3 h4 h5 h6 head html i
iframe ins kbd label li map menu nobr noframes noscript ol
optgroup option p param pre q s samp small span strike strong sub
sup table tbody td textarea tfoot th thead title tr tt u ul var
'''.split()
_nlafter = '''
blockquote body center div dl dt fieldset form frame h1 h2 h3 h4 h5 h6
head html iframe legend li ol option p pre table tbody title tr ul
col colgroup
'''.split()
for t in tags:
globals()[t] = mktag(t, tag, t in _nlafter)
dtags = "caption legend".split()
for t in dtags:
globals()[t] = mkdtag(t)
# special case (del is a keyword)
del_ = mktag('del')
dir_ = mktag('dir')
object_ = mktag('object')
start = mkxtag('link', rel='start')
prev = mkxtag('link', rel='prev')
next = mkxtag('link', rel='next')
stylesheet = mkxtag('link', rel='stylesheet', type='text/css', media='screen')
nynorsk = mkxtag('link', rel='alternate', hreflang='nn', lang='nn')
bokmaal = mkxtag('link', rel='alternate', hreflang='nb', lang='nb')
norsk = mkxtag('link', rel='alternate', hreflang='no', lang='no')
english = mkxtag('link', rel='alternate', hreflang='en', lang='en')
pdf = mkxtag('link', rel='alternate', type='application/pdf', media='print')
script = mktag('script', type='text/javascript')
style = mktag('style', type='text/css')
text_input = mkxtag('input', type='text')
hidden_input = mkxtag('input', type='hidden')
password_input = mkxtag('input', type='password')
checkbox_input = mkxtag('input', type='checkbox')
radio_input = mkxtag('input', type='radio')
submit_button = mkxtag('input', type='submit')
[docs]class select(tag):
def __init__(self, options, selected=None, **kw):
if 'id' not in kw:
kw['id'] = 'id_' + kw['name']
super(select, self).__init__('select', **kw)
self._options = None
self.options = options
if selected is not None:
selected = u8(selected)
content = []
for k, v in self.options:
if u8(k) == selected:
opt = option(v, value=k, selected='selected')
else:
opt = option(v, value=k)
content.append(opt)
self._content = tuple(content)
def options():
def fset(self, options):
if len(options) == 0:
self._options = []
else:
first = options[0]
if len(first) == 2 and not isinstance(first, basestring):
self._options = [(unicode_repr(k), unicode_repr(v))
for (k,v) in options]
else:
self._options = [(unicode_repr(o), unicode_repr(o))
for o in options]
def fget(self):
return self._options
return locals()
options = property(**options())
def selected():
def fset(self, v):
if v not in self.values:
raise ValueError("Only valid options can be selected.")
self._selected = v
def fget(self):
return self._selected
return locals()
selected = property(**selected())
def values():
def fget(self):
return [k for (k,v) in self.options]
return locals()
values = property(**values())
[docs]class tabledesc(object):
def __init__(self, *cols):
self.cols = cols
[docs]def test_doctest():
"""
::
>>> br()
u'<br>'
>>> div('hello', b('world'))
u'<div>hello<b>world</b></div>\\n'
>>> print select(options=[u'a', u'b'], name='foo')
u'<select name="foo" id="id_foo"><option value="a">a</option>\\n<option value="b">b</option>\\n</select>'
"""
import doctest
doctest.testmod()
if __name__ == "__main__":
_test()