12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184 |
- # coding: utf-8
- """
- mistune
- ~~~~~~~
- The fastest markdown parser in pure Python with renderer feature.
- :copyright: (c) 2014 - 2018 by Hsiaoming Yang.
- """
- import re
- import inspect
- __version__ = '0.8.4'
- __author__ = 'Hsiaoming Yang <me@lepture.com>'
- __all__ = [
- 'BlockGrammar', 'BlockLexer',
- 'InlineGrammar', 'InlineLexer',
- 'Renderer', 'Markdown',
- 'markdown', 'escape',
- ]
- _key_pattern = re.compile(r'\s+')
- _nonalpha_pattern = re.compile(r'\W')
- _escape_pattern = re.compile(r'&(?!#?\w+;)')
- _newline_pattern = re.compile(r'\r\n|\r')
- _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
- _block_code_leading_pattern = re.compile(r'^ {4}', re.M)
- _inline_tags = [
- 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
- 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
- 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
- 'img', 'font',
- ]
- _pre_tags = ['pre', 'script', 'style']
- _valid_end = r'(?!:/|[^\w\s@]*@)\b'
- _valid_attr = r'''\s*[a-zA-Z\-](?:\s*\=\s*(?:"[^"]*"|'[^']*'|[^\s'">]+))?'''
- _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
- _scheme_blacklist = ('javascript:', 'vbscript:')
- def _pure_pattern(regex):
- pattern = regex.pattern
- if pattern.startswith('^'):
- pattern = pattern[1:]
- return pattern
- def _keyify(key):
- key = escape(key.lower(), quote=True)
- return _key_pattern.sub(' ', key)
- def escape(text, quote=False, smart_amp=True):
- """Replace special characters "&", "<" and ">" to HTML-safe sequences.
- The original cgi.escape will always escape "&", but you can control
- this one for a smart escape amp.
- :param quote: if set to True, " and ' will be escaped.
- :param smart_amp: if set to False, & will always be escaped.
- """
- if smart_amp:
- text = _escape_pattern.sub('&', text)
- else:
- text = text.replace('&', '&')
- text = text.replace('<', '<')
- text = text.replace('>', '>')
- if quote:
- text = text.replace('"', '"')
- text = text.replace("'", ''')
- return text
- def escape_link(url):
- """Remove dangerous URL schemes like javascript: and escape afterwards."""
- lower_url = url.lower().strip('\x00\x1a \n\r\t')
- for scheme in _scheme_blacklist:
- if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme):
- return ''
- return escape(url, quote=True, smart_amp=False)
- def preprocessing(text, tab=4):
- text = _newline_pattern.sub('\n', text)
- text = text.expandtabs(tab)
- text = text.replace('\u2424', '\n')
- pattern = re.compile(r'^ +$', re.M)
- return pattern.sub('', text)
- class BlockGrammar(object):
- """Grammars for block level tokens."""
- def_links = re.compile(
- r'^ *\[([^^\]]+)\]: *' # [key]:
- r'<?([^\s>]+)>?' # <link> or link
- r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
- )
- def_footnotes = re.compile(
- r'^\[\^([^\]]+)\]: *('
- r'[^\n]*(?:\n+|$)' # [^key]:
- r'(?: {1,}[^\n]*(?:\n+|$))*'
- r')'
- )
- newline = re.compile(r'^\n+')
- block_code = re.compile(r'^( {4}[^\n]+\n*)+')
- fences = re.compile(
- r'^ *(`{3,}|~{3,}) *([^`\s]+)? *\n' # ```lang
- r'([\s\S]+?)\s*'
- r'\1 *(?:\n+|$)' # ```
- )
- hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
- heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
- lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
- block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
- list_block = re.compile(
- r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?'
- r'(?:'
- r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
- r'|\n+(?=%s)' # def links
- r'|\n+(?=%s)' # def footnotes\
- r'|\n+(?=\1(?(3)\d+\.|[*+-]) )' # heterogeneous bullet
- r'|\n{2,}'
- r'(?! )'
- r'(?!\1(?:[*+-]|\d+\.) )\n*'
- r'|'
- r'\s*$)' % (
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- )
- )
- list_item = re.compile(
- r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
- r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
- flags=re.M
- )
- list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
- paragraph = re.compile(
- r'^((?:[^\n]+\n?(?!'
- r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
- r'))+)\n*' % (
- _pure_pattern(fences).replace(r'\1', r'\2'),
- _pure_pattern(list_block).replace(r'\1', r'\3'),
- _pure_pattern(hrule),
- _pure_pattern(heading),
- _pure_pattern(lheading),
- _pure_pattern(block_quote),
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- '<' + _block_tag,
- )
- )
- block_html = re.compile(
- r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
- r'<!--[\s\S]*?-->',
- r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
- r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
- )
- )
- table = re.compile(
- r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
- )
- nptable = re.compile(
- r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
- )
- text = re.compile(r'^[^\n]+')
- class BlockLexer(object):
- """Block level lexer for block grammars."""
- grammar_class = BlockGrammar
- default_rules = [
- 'newline', 'hrule', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'block_quote',
- 'list_block', 'block_html', 'def_links',
- 'def_footnotes', 'table', 'paragraph', 'text'
- ]
- list_rules = (
- 'newline', 'block_code', 'fences', 'lheading', 'hrule',
- 'block_quote', 'list_block', 'block_html', 'text',
- )
- footnote_rules = (
- 'newline', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'hrule', 'block_quote',
- 'list_block', 'block_html', 'table', 'paragraph', 'text'
- )
- def __init__(self, rules=None, **kwargs):
- self.tokens = []
- self.def_links = {}
- self.def_footnotes = {}
- if not rules:
- rules = self.grammar_class()
- self.rules = rules
- self._max_recursive_depth = kwargs.get('max_recursive_depth', 6)
- self._list_depth = 0
- self._blockquote_depth = 0
- def __call__(self, text, rules=None):
- return self.parse(text, rules)
- def parse(self, text, rules=None):
- text = text.rstrip('\n')
- if not rules:
- rules = self.default_rules
- def manipulate(text):
- for key in rules:
- rule = getattr(self.rules, key)
- m = rule.match(text)
- if not m:
- continue
- getattr(self, 'parse_%s' % key)(m)
- return m
- return False # pragma: no cover
- while text:
- m = manipulate(text)
- if m is not False:
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
- return self.tokens
- def parse_newline(self, m):
- length = len(m.group(0))
- if length > 1:
- self.tokens.append({'type': 'newline'})
- def parse_block_code(self, m):
- # clean leading whitespace
- code = _block_code_leading_pattern.sub('', m.group(0))
- self.tokens.append({
- 'type': 'code',
- 'lang': None,
- 'text': code,
- })
- def parse_fences(self, m):
- self.tokens.append({
- 'type': 'code',
- 'lang': m.group(2),
- 'text': m.group(3),
- })
- def parse_heading(self, m):
- self.tokens.append({
- 'type': 'heading',
- 'level': len(m.group(1)),
- 'text': m.group(2),
- })
- def parse_lheading(self, m):
- """Parse setext heading."""
- self.tokens.append({
- 'type': 'heading',
- 'level': 1 if m.group(2) == '=' else 2,
- 'text': m.group(1),
- })
- def parse_hrule(self, m):
- self.tokens.append({'type': 'hrule'})
- def parse_list_block(self, m):
- bull = m.group(2)
- self.tokens.append({
- 'type': 'list_start',
- 'ordered': '.' in bull,
- })
- self._list_depth += 1
- if self._list_depth > self._max_recursive_depth:
- self.tokens.append({'type': 'list_item_start'})
- self.parse_text(m)
- self.tokens.append({'type': 'list_item_end'})
- else:
- cap = m.group(0)
- self._process_list_item(cap, bull)
- self.tokens.append({'type': 'list_end'})
- self._list_depth -= 1
- def _process_list_item(self, cap, bull):
- cap = self.rules.list_item.findall(cap)
- _next = False
- length = len(cap)
- for i in range(length):
- item = cap[i][0]
- # remove the bullet
- space = len(item)
- item = self.rules.list_bullet.sub('', item)
- # outdent
- if '\n ' in item:
- space = space - len(item)
- pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
- item = pattern.sub('', item)
- # determine whether item is loose or not
- loose = _next
- if not loose and re.search(r'\n\n(?!\s*$)', item):
- loose = True
- rest = len(item)
- if i != length - 1 and rest:
- _next = item[rest-1] == '\n'
- if not loose:
- loose = _next
- if loose:
- t = 'loose_item_start'
- else:
- t = 'list_item_start'
- self.tokens.append({'type': t})
- # recurse
- self.parse(item, self.list_rules)
- self.tokens.append({'type': 'list_item_end'})
- def parse_block_quote(self, m):
- self.tokens.append({'type': 'block_quote_start'})
- self._blockquote_depth += 1
- if self._blockquote_depth > self._max_recursive_depth:
- self.parse_text(m)
- else:
- # clean leading >
- cap = _block_quote_leading_pattern.sub('', m.group(0))
- self.parse(cap)
- self.tokens.append({'type': 'block_quote_end'})
- self._blockquote_depth -= 1
- def parse_def_links(self, m):
- key = _keyify(m.group(1))
- self.def_links[key] = {
- 'link': m.group(2),
- 'title': m.group(3),
- }
- def parse_def_footnotes(self, m):
- key = _keyify(m.group(1))
- if key in self.def_footnotes:
- # footnote is already defined
- return
- self.def_footnotes[key] = 0
- self.tokens.append({
- 'type': 'footnote_start',
- 'key': key,
- })
- text = m.group(2)
- if '\n' in text:
- lines = text.split('\n')
- whitespace = None
- for line in lines[1:]:
- space = len(line) - len(line.lstrip())
- if space and (not whitespace or space < whitespace):
- whitespace = space
- newlines = [lines[0]]
- for line in lines[1:]:
- newlines.append(line[whitespace:])
- text = '\n'.join(newlines)
- self.parse(text, self.footnote_rules)
- self.tokens.append({
- 'type': 'footnote_end',
- 'key': key,
- })
- def parse_table(self, m):
- item = self._process_table(m)
- cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- v = re.sub(r'^ *\| *| *\| *$', '', v)
- cells[i] = re.split(r' *(?<!\\)\| *', v)
- item['cells'] = self._process_cells(cells)
- self.tokens.append(item)
- def parse_nptable(self, m):
- item = self._process_table(m)
- cells = re.sub(r'\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- cells[i] = re.split(r' *(?<!\\)\| *', v)
- item['cells'] = self._process_cells(cells)
- self.tokens.append(item)
- def _process_table(self, m):
- header = re.sub(r'^ *| *\| *$', '', m.group(1))
- header = re.split(r' *\| *', header)
- align = re.sub(r' *|\| *$', '', m.group(2))
- align = re.split(r' *\| *', align)
- for i, v in enumerate(align):
- if re.search(r'^ *-+: *$', v):
- align[i] = 'right'
- elif re.search(r'^ *:-+: *$', v):
- align[i] = 'center'
- elif re.search(r'^ *:-+ *$', v):
- align[i] = 'left'
- else:
- align[i] = None
- item = {
- 'type': 'table',
- 'header': header,
- 'align': align,
- }
- return item
- def _process_cells(self, cells):
- for i, line in enumerate(cells):
- for c, cell in enumerate(line):
- # de-escape any pipe inside the cell here
- cells[i][c] = re.sub('\\\\\|', '|', cell)
- return cells
- def parse_block_html(self, m):
- tag = m.group(1)
- if not tag:
- text = m.group(0)
- self.tokens.append({
- 'type': 'close_html',
- 'text': text
- })
- else:
- attr = m.group(2)
- text = m.group(3)
- self.tokens.append({
- 'type': 'open_html',
- 'tag': tag,
- 'extra': attr,
- 'text': text
- })
- def parse_paragraph(self, m):
- text = m.group(1).rstrip('\n')
- self.tokens.append({'type': 'paragraph', 'text': text})
- def parse_text(self, m):
- text = m.group(0)
- self.tokens.append({'type': 'text', 'text': text})
- class InlineGrammar(object):
- """Grammars for inline level tokens."""
- escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
- inline_html = re.compile(
- r'^(?:%s|%s|%s)' % (
- r'<!--[\s\S]*?-->',
- r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % (
- _valid_end, _valid_attr),
- r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr),
- )
- )
- autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
- link = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\('
- r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
- r'\)'
- )
- reflink = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\s*\[([^^\]]*)\]'
- )
- nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
- url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
- double_emphasis = re.compile(
- r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
- r'|'
- r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
- )
- emphasis = re.compile(
- r'^\b_((?:__|[^_])+?)_\b' # _word_
- r'|'
- r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word*
- )
- code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
- linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
- strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~') # ~~word~~
- footnote = re.compile(r'^\[\^([^\]]+)\]')
- text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
- def hard_wrap(self):
- """Grammar for hard wrap linebreak. You don't need to add two
- spaces at the end of a line.
- """
- self.linebreak = re.compile(r'^ *\n(?!\s*$)')
- self.text = re.compile(
- r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
- )
- class InlineLexer(object):
- """Inline level lexer for inline grammars."""
- grammar_class = InlineGrammar
- default_rules = [
- 'escape', 'inline_html', 'autolink', 'url',
- 'footnote', 'link', 'reflink', 'nolink',
- 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
- inline_html_rules = [
- 'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink',
- 'nolink', 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
- def __init__(self, renderer, rules=None, **kwargs):
- self.renderer = renderer
- self.links = {}
- self.footnotes = {}
- self.footnote_index = 0
- if not rules:
- rules = self.grammar_class()
- kwargs.update(self.renderer.options)
- if kwargs.get('hard_wrap'):
- rules.hard_wrap()
- self.rules = rules
- self._in_link = False
- self._in_footnote = False
- self._parse_inline_html = kwargs.get('parse_inline_html')
- def __call__(self, text, rules=None):
- return self.output(text, rules)
- def setup(self, links, footnotes):
- self.footnote_index = 0
- self.links = links or {}
- self.footnotes = footnotes or {}
- def output(self, text, rules=None):
- text = text.rstrip('\n')
- if not rules:
- rules = list(self.default_rules)
- if self._in_footnote and 'footnote' in rules:
- rules.remove('footnote')
- output = self.renderer.placeholder()
- def manipulate(text):
- for key in rules:
- pattern = getattr(self.rules, key)
- m = pattern.match(text)
- if not m:
- continue
- self.line_match = m
- out = getattr(self, 'output_%s' % key)(m)
- if out is not None:
- return m, out
- return False # pragma: no cover
- while text:
- ret = manipulate(text)
- if ret is not False:
- m, out = ret
- output += out
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
- return output
- def output_escape(self, m):
- text = m.group(1)
- return self.renderer.escape(text)
- def output_autolink(self, m):
- link = m.group(1)
- if m.group(2) == '@':
- is_email = True
- else:
- is_email = False
- return self.renderer.autolink(link, is_email)
- def output_url(self, m):
- link = m.group(1)
- if self._in_link:
- return self.renderer.text(link)
- return self.renderer.autolink(link, False)
- def output_inline_html(self, m):
- tag = m.group(1)
- if self._parse_inline_html and tag in _inline_tags:
- text = m.group(3)
- if tag == 'a':
- self._in_link = True
- text = self.output(text, rules=self.inline_html_rules)
- self._in_link = False
- else:
- text = self.output(text, rules=self.inline_html_rules)
- extra = m.group(2) or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- else:
- html = m.group(0)
- return self.renderer.inline_html(html)
- def output_footnote(self, m):
- key = _keyify(m.group(1))
- if key not in self.footnotes:
- return None
- if self.footnotes[key]:
- return None
- self.footnote_index += 1
- self.footnotes[key] = self.footnote_index
- return self.renderer.footnote_ref(key, self.footnote_index)
- def output_link(self, m):
- return self._process_link(m, m.group(3), m.group(4))
- def output_reflink(self, m):
- key = _keyify(m.group(2) or m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
- def output_nolink(self, m):
- key = _keyify(m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
- def _process_link(self, m, link, title=None):
- line = m.group(0)
- text = m.group(1)
- if line[0] == '!':
- return self.renderer.image(link, title, text)
- self._in_link = True
- text = self.output(text)
- self._in_link = False
- return self.renderer.link(link, title, text)
- def output_double_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.double_emphasis(text)
- def output_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.emphasis(text)
- def output_code(self, m):
- text = m.group(2)
- return self.renderer.codespan(text)
- def output_linebreak(self, m):
- return self.renderer.linebreak()
- def output_strikethrough(self, m):
- text = self.output(m.group(1))
- return self.renderer.strikethrough(text)
- def output_text(self, m):
- text = m.group(0)
- return self.renderer.text(text)
- class Renderer(object):
- """The default HTML renderer for rendering Markdown.
- """
- def __init__(self, **kwargs):
- self.options = kwargs
- def placeholder(self):
- """Returns the default, empty output value for the renderer.
- All renderer methods use the '+=' operator to append to this value.
- Default is a string so rendering HTML can build up a result string with
- the rendered Markdown.
- Can be overridden by Renderer subclasses to be types like an empty
- list, allowing the renderer to create a tree-like structure to
- represent the document (which can then be reprocessed later into a
- separate format like docx or pdf).
- """
- return ''
- def block_code(self, code, lang=None):
- """Rendering block level code. ``pre > code``.
- :param code: text content of the code block.
- :param lang: language of the given code.
- """
- code = code.rstrip('\n')
- if not lang:
- code = escape(code, smart_amp=False)
- return '<pre><code>%s\n</code></pre>\n' % code
- code = escape(code, quote=True, smart_amp=False)
- return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
- def block_quote(self, text):
- """Rendering <blockquote> with the given text.
- :param text: text content of the blockquote.
- """
- return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
- def block_html(self, html):
- """Rendering block level pure html content.
- :param html: text content of the html snippet.
- """
- if self.options.get('skip_style') and \
- html.lower().startswith('<style'):
- return ''
- if self.options.get('escape'):
- return escape(html)
- return html
- def header(self, text, level, raw=None):
- """Rendering header/heading tags like ``<h1>`` ``<h2>``.
- :param text: rendered text content for the header.
- :param level: a number for the header level, for example: 1.
- :param raw: raw text content of the header.
- """
- return '<h%d>%s</h%d>\n' % (level, text, level)
- def hrule(self):
- """Rendering method for ``<hr>`` tag."""
- if self.options.get('use_xhtml'):
- return '<hr />\n'
- return '<hr>\n'
- def list(self, body, ordered=True):
- """Rendering list tags like ``<ul>`` and ``<ol>``.
- :param body: body contents of the list.
- :param ordered: whether this list is ordered or not.
- """
- tag = 'ul'
- if ordered:
- tag = 'ol'
- return '<%s>\n%s</%s>\n' % (tag, body, tag)
- def list_item(self, text):
- """Rendering list item snippet. Like ``<li>``."""
- return '<li>%s</li>\n' % text
- def paragraph(self, text):
- """Rendering paragraph tags. Like ``<p>``."""
- return '<p>%s</p>\n' % text.strip(' ')
- def table(self, header, body):
- """Rendering table element. Wrap header and body in it.
- :param header: header part of the table.
- :param body: body part of the table.
- """
- return (
- '<table>\n<thead>%s</thead>\n'
- '<tbody>\n%s</tbody>\n</table>\n'
- ) % (header, body)
- def table_row(self, content):
- """Rendering a table row. Like ``<tr>``.
- :param content: content of current table row.
- """
- return '<tr>\n%s</tr>\n' % content
- def table_cell(self, content, **flags):
- """Rendering a table cell. Like ``<th>`` ``<td>``.
- :param content: content of current table cell.
- :param header: whether this is header or not.
- :param align: align of current table cell.
- """
- if flags['header']:
- tag = 'th'
- else:
- tag = 'td'
- align = flags['align']
- if not align:
- return '<%s>%s</%s>\n' % (tag, content, tag)
- return '<%s style="text-align:%s">%s</%s>\n' % (
- tag, align, content, tag
- )
- def double_emphasis(self, text):
- """Rendering **strong** text.
- :param text: text content for emphasis.
- """
- return '<strong>%s</strong>' % text
- def emphasis(self, text):
- """Rendering *emphasis* text.
- :param text: text content for emphasis.
- """
- return '<em>%s</em>' % text
- def codespan(self, text):
- """Rendering inline `code` text.
- :param text: text content for inline code.
- """
- text = escape(text.rstrip(), smart_amp=False)
- return '<code>%s</code>' % text
- def linebreak(self):
- """Rendering line break like ``<br>``."""
- if self.options.get('use_xhtml'):
- return '<br />\n'
- return '<br>\n'
- def strikethrough(self, text):
- """Rendering ~~strikethrough~~ text.
- :param text: text content for strikethrough.
- """
- return '<del>%s</del>' % text
- def text(self, text):
- """Rendering unformatted text.
- :param text: text content.
- """
- if self.options.get('parse_block_html'):
- return text
- return escape(text)
- def escape(self, text):
- """Rendering escape sequence.
- :param text: text content.
- """
- return escape(text)
- def autolink(self, link, is_email=False):
- """Rendering a given link or email address.
- :param link: link content or email address.
- :param is_email: whether this is an email or not.
- """
- text = link = escape_link(link)
- if is_email:
- link = 'mailto:%s' % link
- return '<a href="%s">%s</a>' % (link, text)
- def link(self, link, title, text):
- """Rendering a given link with content and title.
- :param link: href link for ``<a>`` tag.
- :param title: title content for `title` attribute.
- :param text: text content for description.
- """
- link = escape_link(link)
- if not title:
- return '<a href="%s">%s</a>' % (link, text)
- title = escape(title, quote=True)
- return '<a href="%s" title="%s">%s</a>' % (link, title, text)
- def image(self, src, title, text):
- """Rendering a image with title and text.
- :param src: source link of the image.
- :param title: title text of the image.
- :param text: alt text of the image.
- """
- src = escape_link(src)
- text = escape(text, quote=True)
- if title:
- title = escape(title, quote=True)
- html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
- else:
- html = '<img src="%s" alt="%s"' % (src, text)
- if self.options.get('use_xhtml'):
- return '%s />' % html
- return '%s>' % html
- def inline_html(self, html):
- """Rendering span level pure html content.
- :param html: text content of the html snippet.
- """
- if self.options.get('escape'):
- return escape(html)
- return html
- def newline(self):
- """Rendering newline element."""
- return ''
- def footnote_ref(self, key, index):
- """Rendering the ref anchor of a footnote.
- :param key: identity key for the footnote.
- :param index: the index count of current footnote.
- """
- html = (
- '<sup class="footnote-ref" id="fnref-%s">'
- '<a href="#fn-%s">%d</a></sup>'
- ) % (escape(key), escape(key), index)
- return html
- def footnote_item(self, key, text):
- """Rendering a footnote item.
- :param key: identity key for the footnote.
- :param text: text content of the footnote.
- """
- back = (
- '<a href="#fnref-%s" class="footnote">↩</a>'
- ) % escape(key)
- text = text.rstrip()
- if text.endswith('</p>'):
- text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
- else:
- text = '%s<p>%s</p>' % (text, back)
- html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
- return html
- def footnotes(self, text):
- """Wrapper for all footnotes.
- :param text: contents of all footnotes.
- """
- html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
- return html % (self.hrule(), text)
- class Markdown(object):
- """The Markdown parser.
- :param renderer: An instance of ``Renderer``.
- :param inline: An inline lexer class or instance.
- :param block: A block lexer class or instance.
- """
- def __init__(self, renderer=None, inline=None, block=None, **kwargs):
- if not renderer:
- renderer = Renderer(**kwargs)
- else:
- kwargs.update(renderer.options)
- self.renderer = renderer
- if inline and inspect.isclass(inline):
- inline = inline(renderer, **kwargs)
- if block and inspect.isclass(block):
- block = block(**kwargs)
- if inline:
- self.inline = inline
- else:
- self.inline = InlineLexer(renderer, **kwargs)
- self.block = block or BlockLexer(BlockGrammar())
- self.footnotes = []
- self.tokens = []
- # detect if it should parse text in block html
- self._parse_block_html = kwargs.get('parse_block_html')
- def __call__(self, text):
- return self.parse(text)
- def render(self, text):
- """Render the Markdown text.
- :param text: markdown formatted text content.
- """
- return self.parse(text)
- def parse(self, text):
- out = self.output(preprocessing(text))
- keys = self.block.def_footnotes
- # reset block
- self.block.def_links = {}
- self.block.def_footnotes = {}
- # reset inline
- self.inline.links = {}
- self.inline.footnotes = {}
- if not self.footnotes:
- return out
- footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
- self.footnotes = sorted(
- footnotes, key=lambda o: keys.get(o['key']), reverse=True
- )
- body = self.renderer.placeholder()
- while self.footnotes:
- note = self.footnotes.pop()
- body += self.renderer.footnote_item(
- note['key'], note['text']
- )
- out += self.renderer.footnotes(body)
- return out
- def pop(self):
- if not self.tokens:
- return None
- self.token = self.tokens.pop()
- return self.token
- def peek(self):
- if self.tokens:
- return self.tokens[-1]
- return None # pragma: no cover
- def output(self, text, rules=None):
- self.tokens = self.block(text, rules)
- self.tokens.reverse()
- self.inline.setup(self.block.def_links, self.block.def_footnotes)
- out = self.renderer.placeholder()
- while self.pop():
- out += self.tok()
- return out
- def tok(self):
- t = self.token['type']
- # sepcial cases
- if t.endswith('_start'):
- t = t[:-6]
- return getattr(self, 'output_%s' % t)()
- def tok_text(self):
- text = self.token['text']
- while self.peek()['type'] == 'text':
- text += '\n' + self.pop()['text']
- return self.inline(text)
- def output_newline(self):
- return self.renderer.newline()
- def output_hrule(self):
- return self.renderer.hrule()
- def output_heading(self):
- return self.renderer.header(
- self.inline(self.token['text']),
- self.token['level'],
- self.token['text'],
- )
- def output_code(self):
- return self.renderer.block_code(
- self.token['text'], self.token['lang']
- )
- def output_table(self):
- aligns = self.token['align']
- aligns_length = len(aligns)
- cell = self.renderer.placeholder()
- # header part
- header = self.renderer.placeholder()
- for i, value in enumerate(self.token['header']):
- align = aligns[i] if i < aligns_length else None
- flags = {'header': True, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
- header += self.renderer.table_row(cell)
- # body part
- body = self.renderer.placeholder()
- for i, row in enumerate(self.token['cells']):
- cell = self.renderer.placeholder()
- for j, value in enumerate(row):
- align = aligns[j] if j < aligns_length else None
- flags = {'header': False, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
- body += self.renderer.table_row(cell)
- return self.renderer.table(header, body)
- def output_block_quote(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'block_quote_end':
- body += self.tok()
- return self.renderer.block_quote(body)
- def output_list(self):
- ordered = self.token['ordered']
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_end':
- body += self.tok()
- return self.renderer.list(body, ordered)
- def output_list_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- if self.token['type'] == 'text':
- body += self.tok_text()
- else:
- body += self.tok()
- return self.renderer.list_item(body)
- def output_loose_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- body += self.tok()
- return self.renderer.list_item(body)
- def output_footnote(self):
- self.inline._in_footnote = True
- body = self.renderer.placeholder()
- key = self.token['key']
- while self.pop()['type'] != 'footnote_end':
- body += self.tok()
- self.footnotes.append({'key': key, 'text': body})
- self.inline._in_footnote = False
- return self.renderer.placeholder()
- def output_close_html(self):
- text = self.token['text']
- return self.renderer.block_html(text)
- def output_open_html(self):
- text = self.token['text']
- tag = self.token['tag']
- if self._parse_block_html and tag not in _pre_tags:
- text = self.inline(text, rules=self.inline.inline_html_rules)
- extra = self.token.get('extra') or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- return self.renderer.block_html(html)
- def output_paragraph(self):
- return self.renderer.paragraph(self.inline(self.token['text']))
- def output_text(self):
- return self.renderer.paragraph(self.tok_text())
- def markdown(text, escape=True, **kwargs):
- """Render markdown formatted text to html.
- :param text: markdown formatted text content.
- :param escape: if set to False, all html tags will not be escaped.
- :param use_xhtml: output with xhtml tags.
- :param hard_wrap: if set to True, it will use the GFM line breaks feature.
- :param parse_block_html: parse text only in block level html.
- :param parse_inline_html: parse text only in inline level html.
- """
- return Markdown(escape=escape, **kwargs)(text)
|