dayuan
/
manyi


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
							# -*- test-case-name: twisted.python.test.test_htmlizer -*-
# Copyright (c) Twisted Matrix Laboratories.
# See LICENSE for details.

"""
HTML rendering of Python source.
"""

from twisted.python.compat import _tokenize, escape

import tokenize, keyword
from . import reflect
from twisted.python._oldstyle import _oldStyle


@_oldStyle
class TokenPrinter:
    """
    Format a stream of tokens and intermediate whitespace, for pretty-printing.
    """

    currentCol, currentLine = 0, 1
    lastIdentifier = parameters = 0
    encoding = "utf-8"

    def __init__(self, writer):
        self.writer = writer


    def printtoken(self, type, token, sCoordinates, eCoordinates, line):
        if hasattr(tokenize, "ENCODING") and type == tokenize.ENCODING:
            self.encoding = token
            return

        (srow, scol) = sCoordinates
        (erow, ecol) = eCoordinates
        if self.currentLine < srow:
            self.writer('\n'*(srow-self.currentLine))
            self.currentLine, self.currentCol = srow, 0
        self.writer(' '*(scol-self.currentCol))
        if self.lastIdentifier:
            type = "identifier"
            self.parameters = 1
        elif type == tokenize.NAME:
            if keyword.iskeyword(token):
                type = 'keyword'
            else:
                if self.parameters:
                    type = 'parameter'
                else:
                    type = 'variable'
        else:
            type = tokenize.tok_name.get(type).lower()
        self.writer(token, type)
        self.currentCol = ecol
        self.currentLine += token.count('\n')
        if self.currentLine != erow:
            self.currentCol = 0
        self.lastIdentifier = token in ('def', 'class')
        if token == ':':
            self.parameters = 0


@_oldStyle
class HTMLWriter:
    """
    Write the stream of tokens and whitespace from L{TokenPrinter}, formating
    tokens as HTML spans.
    """

    noSpan = []

    def __init__(self, writer):
        self.writer = writer
        noSpan = []
        reflect.accumulateClassList(self.__class__, "noSpan", noSpan)
        self.noSpan = noSpan


    def write(self, token, type=None):
        if isinstance(token, bytes):
            token = token.decode("utf-8")
        token = escape(token)
        token = token.encode("utf-8")
        if (type is None) or (type in self.noSpan):
            self.writer(token)
        else:
            self.writer(
                b'<span class="py-src-' + type.encode("utf-8") + b'">' +
                token + b'</span>')


class SmallerHTMLWriter(HTMLWriter):
    """
    HTMLWriter that doesn't generate spans for some junk.

    Results in much smaller HTML output.
    """
    noSpan = ["endmarker", "indent", "dedent", "op", "newline", "nl"]


def filter(inp, out, writer=HTMLWriter):
    out.write(b'<pre>')
    printer = TokenPrinter(writer(out.write).write).printtoken
    try:
        for token in _tokenize(inp.readline):
            (tokenType, string, start, end, line) = token
            printer(tokenType, string, start, end, line)
    except tokenize.TokenError:
        pass
    out.write(b'</pre>\n')


def main():
    import sys
    stdout = getattr(sys.stdout, "buffer", sys.stdout)
    with open(sys.argv[1], "rb") as f:
        filter(f, stdout)

if __name__ == '__main__':
    main()