# -*- encoding: utf8 -*- # # $Id: html.py 5409 2011-06-29 07:07:25Z rjones $ # $HeadURL: svn+ssh://svn/svn/trunk/api/eklib/html.py $ # '''Simple, elegant HTML, XHTML and XML generation. Constructing your HTML ---------------------- To construct HTML start with an instance of ``html.HTML()``. Add tags by accessing the tag's attribute on that object. For example: >>> from html import HTML >>> h = HTML() >>> h.p('Hello, world!') >>> print h # or print(h) in python 3+
Hello, world!
You may supply a tag name and some text contents when creating a HTML instance: >>> h = HTML('html', 'text') >>> print h text You may also append text content later using the tag's ``.text()`` method or using augmented addition ``+=``. Any HTML-specific characters (``<>&"``) in the text will be escaped for HTML safety as appropriate unless ``escape=False`` is passed. Each of the following examples uses a new ``HTML`` instance: >>> p = h.p('hello world!\\n') >>> p.br >>> p.text('more → text', escape=False) >>> p += ' ... augmented' >>> h.p >>> print hhello, world!
more → text ... augmented
Note also that the top-level ``HTML`` object adds newlines between tags by default. Finally in the above you'll see an empty paragraph tag - tags with no contents get no closing tag. If the tag should have sub-tags you have two options. You may either add the sub-tags directly on the tag: >>> l = h.ol >>> l.li('item 1') >>> l.li.b('item 2 > 1') >>> print h
column 1 | column 2 |
column 1 | column 2 |
content
Unicode ------- ``HTML`` will work with either regular strings **or** unicode strings, but not **both at the same time**. Obtain the final unicode string by calling ``unicode()`` on the ``HTML`` instance: >>> h = HTML() >>> h.p(u'Some Euro: €1.14') >>> unicode(h) u'Some Euro: €1.14
' If (under Python 2.x) you add non-unicode strings or attempt to get the resultant HTML source through any means other than ``unicode()`` then you will most likely get one of the following errors raised: UnicodeDecodeError Probably means you've added non-unicode strings to your HTML. UnicodeEncodeError Probably means you're trying to get the resultant HTML using ``print`` or ``str()`` (or ``%s``). How generation works -------------------- The HTML document is generated when the ``HTML`` instance is "stringified". This could be done either by invoking ``str()`` on it, or just printing it. It may also be returned directly as the "iterable content" from a WSGI app function. You may also render any tag or sub-tag at any time by stringifying it. Tags with no contents (either text or sub-tags) will have no closing tag. There is no "special list" of tags that must always have closing tags, so if you need to force a closing tag you'll need to provide some content, even if it's just a single space character. Rendering doesn't affect the HTML document's state, so you can add to or otherwise manipulate the HTML after you've stringified it. Creating XHTML -------------- To construct XHTML start with an instance of ``html.XHTML()`` and use it as you would an ``HTML`` instance. Empty elements will now be rendered with the appropriate XHTML minimized tag syntax. For example: >>> from html import XHTML >>> h = XHTML() >>> h.p >>> h.br >>> print hsome text
If a name is not passed in then the instance becomes a container for other tags that itself generates no tag: >>> h = HTML() >>> h.p('text') >>> h.p('text') print hsome text
some text
''' newline_default_on = set('table ol ul dl'.split()) def __init__(self, name=None, text=None, stack=None, newlines=True, escape=True): self._name = name self._content = [] self._attrs = {} # insert newlines between content? if stack is None: stack = [self] self._top = True self._newlines = newlines else: self._top = False self._newlines = name in self.newline_default_on self._stack = stack if text is not None: self.text(text, escape) def __getattr__(self, name): # adding a new tag or newline if name == 'newline': e = '\n' else: e = self.__class__(name, stack=self._stack) if self._top: self._stack[-1]._content.append(e) else: self._content.append(e) return e def __iadd__(self, other): if self._top: self._stack[-1]._content.append(other) else: self._content.append(other) return self def text(self, text, escape=True): '''Add text to the document. If "escape" is True any characters special to HTML will be escaped. ''' if escape: text = cgi.escape(text) # adding text if self._top: self._stack[-1]._content.append(text) else: self._content.append(text) def raw_text(self, text): '''Add raw, unescaped text to the document. This is useful for explicitly adding HTML code or entities. ''' return self.text(text, escape=False) def __call__(self, *content, **kw): if self._name == 'read': if len(content) == 1 and isinstance(content[0], int): raise TypeError('you appear to be calling read(%d) on ' 'a HTML instance' % content) elif len(content) == 0: raise TypeError('you appear to be calling read() on a ' 'HTML instance') # customising a tag with content or attributes escape = kw.pop('escape', True) if content: if escape: self._content = list(map(cgi.escape, content)) else: self._content = content if 'newlines' in kw: # special-case to allow control over newlines self._newlines = kw.pop('newlines') for k in kw: if k == 'klass': self._attrs['class'] = cgi.escape(kw[k], True) else: self._attrs[k] = cgi.escape(kw[k], True) return self def __enter__(self): # we're now adding tags to me! self._stack.append(self) return self def __exit__(self, exc_type, exc_value, exc_tb): # we're done adding tags to me! self._stack.pop() def __repr__(self): return '' % (self._name, id(self)) def _stringify(self, str_type): # turn me and my content into text join = '\n' if self._newlines else '' if self._name is None: return join.join(map(str_type, self._content)) a = ['%s="%s"' % i for i in self._attrs.items()] l = [self._name] + a s = '<%s>%s' % (' '.join(l), join) if self._content: s += join.join(map(str_type, self._content)) s += join + '%s>' % self._name return s def __str__(self): return self._stringify(str) def __unicode__(self): return self._stringify(unicode) def __iter__(self): return iter([str(self)]) class XHTML(HTML): '''Easily generate XHTML. ''' empty_elements = set('base meta link hr br param img area input col \ colgroup basefont isindex frame'.split()) def _stringify(self, str_type): # turn me and my content into text # honor empty and non-empty elements join = '\n' if self._newlines else '' if self._name is None: return join.join(map(str_type, self._content)) a = ['%s="%s"' % i for i in self._attrs.items()] l = [self._name] + a s = '<%s>%s' % (' '.join(l), join) if self._content or not(self._name.lower() in self.empty_elements): s += join.join(map(str_type, self._content)) s += join + '%s>' % self._name else: s = '<%s />%s' % (' '.join(l), join) return s class XML(XHTML): '''Easily generate XML. All tags with no contents are reduced to self-terminating tags. ''' newline_default_on = set() # no tags are special def _stringify(self, str_type): # turn me and my content into text # honor empty and non-empty elements join = '\n' if self._newlines else '' if self._name is None: return join.join(map(str_type, self._content)) a = ['%s="%s"' % i for i in self._attrs.items()] l = [self._name] + a s = '<%s>%s' % (' '.join(l), join) if self._content: s += join.join(map(str_type, self._content)) s += join + '%s>' % self._name else: s = '<%s />%s' % (' '.join(l), join) return s class TestCase(unittest.TestCase): def test_empty_tag(self): 'generation of an empty HTML tag' self.assertEquals(str(HTML().br), 'hello
') def test_escape(self): 'escaping of special HTML characters in text' h = HTML() h.text('<>&') self.assertEquals(str(h), '<>&') def test_no_escape(self): 'no escaping of special HTML characters in text' h = HTML() h.text('<>&', False) self.assertEquals(str(h), '<>&') def test_escape_attr(self): 'escaping of special HTML characters in attributes' h = HTML() h.br(id='<>&"') self.assertEquals(str(h), 'hello, world!\nmore text
') def test_add_text_newlines(self): 'add text to a tag with newlines for prettiness' h = HTML() p = h.p('hello, world!', newlines=True) p.text('more text') self.assertEquals(str(h), '\nhello, world!\nmore text\n
') def test_doc_newlines(self): 'default document adding newlines between tags' h = HTML() h.br h.br self.assertEquals(str(h), '%s
' % TEST) def test_table(self): 'multiple "with" context blocks' h = HTML() with h.table(border='1'): for i in range(2): with h.tr: h.td('column 1') h.td('column 2') self.assertEquals(str(h), '''column 1 | column 2 |
column 1 | column 2 |