123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- # Author: John MacFarlane <jgm@berkeley.edu>
- # Copyright: (C) 2013 John MacFarlane
- # License: BSD3
- """
- Functions to aid writing python scripts that process the pandoc
- AST serialized as JSON.
- """
- import codecs
- import hashlib
- import io
- import json
- import os
- import sys
- # some utility-functions: make it easier to create your own filters
- def get_filename4code(module, content, ext=None):
- """Generate filename based on content
- The function ensures that the (temporary) directory exists, so that the
- file can be written.
- Example:
- filename = get_filename4code("myfilter", code)
- """
- imagedir = module + "-images"
- fn = hashlib.sha1(content.encode(sys.getfilesystemencoding())).hexdigest()
- try:
- os.mkdir(imagedir)
- sys.stderr.write('Created directory ' + imagedir + '\n')
- except OSError:
- pass
- if ext:
- fn += "." + ext
- return os.path.join(imagedir, fn)
- def get_value(kv, key, value = None):
- """get value from the keyvalues (options)"""
- res = []
- for k, v in kv:
- if k == key:
- value = v
- else:
- res.append([k, v])
- return value, res
- def get_caption(kv):
- """get caption from the keyvalues (options)
- Example:
- if key == 'CodeBlock':
- [[ident, classes, keyvals], code] = value
- caption, typef, keyvals = get_caption(keyvals)
- ...
- return Para([Image([ident, [], keyvals], caption, [filename, typef])])
- """
- caption = []
- typef = ""
- value, res = get_value(kv, u"caption")
- if value is not None:
- caption = [Str(value)]
- typef = "fig:"
- return caption, typef, res
- def get_extension(format, default, **alternates):
- """get the extension for the result, needs a default and some specialisations
- Example:
- filetype = get_extension(format, "png", html="svg", latex="eps")
- """
- try:
- return alternates[format]
- except KeyError:
- return default
- # end of utilities
- def walk(x, action, format, meta):
- """Walk a tree, applying an action to every object.
- Returns a modified tree. An action is a function of the form
- `action(key, value, format, meta)`, where:
- * `key` is the type of the pandoc object (e.g. 'Str', 'Para') `value` is
- * the contents of the object (e.g. a string for 'Str', a list of
- inline elements for 'Para')
- * `format` is the target output format (as supplied by the
- `format` argument of `walk`)
- * `meta` is the document's metadata
- The return of an action is either:
- * `None`: this means that the object should remain unchanged
- * a pandoc object: this will replace the original object
- * a list of pandoc objects: these will replace the original object; the
- list is merged with the neighbors of the orignal objects (spliced into
- the list the original object belongs to); returning an empty list deletes
- the object
- """
- if isinstance(x, list):
- array = []
- for item in x:
- if isinstance(item, dict) and 't' in item:
- res = action(item['t'],
- item['c'] if 'c' in item else None, format, meta)
- if res is None:
- array.append(walk(item, action, format, meta))
- elif isinstance(res, list):
- for z in res:
- array.append(walk(z, action, format, meta))
- else:
- array.append(walk(res, action, format, meta))
- else:
- array.append(walk(item, action, format, meta))
- return array
- elif isinstance(x, dict):
- for k in x:
- x[k] = walk(x[k], action, format, meta)
- return x
- else:
- return x
- def toJSONFilter(action):
- """Like `toJSONFilters`, but takes a single action as argument.
- """
- toJSONFilters([action])
- def toJSONFilters(actions):
- """Generate a JSON-to-JSON filter from stdin to stdout
- The filter:
- * reads a JSON-formatted pandoc document from stdin
- * transforms it by walking the tree and performing the actions
- * returns a new JSON-formatted pandoc document to stdout
- The argument `actions` is a list of functions of the form
- `action(key, value, format, meta)`, as described in more
- detail under `walk`.
- This function calls `applyJSONFilters`, with the `format`
- argument provided by the first command-line argument,
- if present. (Pandoc sets this by default when calling
- filters.)
- """
- try:
- input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
- except AttributeError:
- # Python 2 does not have sys.stdin.buffer.
- # REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
- input_stream = codecs.getreader("utf-8")(sys.stdin)
- source = input_stream.read()
- if len(sys.argv) > 1:
- format = sys.argv[1]
- else:
- format = ""
- sys.stdout.write(applyJSONFilters(actions, source, format))
- def applyJSONFilters(actions, source, format=""):
- """Walk through JSON structure and apply filters
- This:
- * reads a JSON-formatted pandoc document from a source string
- * transforms it by walking the tree and performing the actions
- * returns a new JSON-formatted pandoc document as a string
- The `actions` argument is a list of functions (see `walk`
- for a full description).
- The argument `source` is a string encoded JSON object.
- The argument `format` is a string describing the output format.
- Returns a the new JSON-formatted pandoc document.
- """
- doc = json.loads(source)
- if 'meta' in doc:
- meta = doc['meta']
- elif doc[0]: # old API
- meta = doc[0]['unMeta']
- else:
- meta = {}
- altered = doc
- for action in actions:
- altered = walk(altered, action, format, meta)
- return json.dumps(altered)
- def stringify(x):
- """Walks the tree x and returns concatenated string content,
- leaving out all formatting.
- """
- result = []
- def go(key, val, format, meta):
- if key in ['Str', 'MetaString']:
- result.append(val)
- elif key == 'Code':
- result.append(val[1])
- elif key == 'Math':
- result.append(val[1])
- elif key == 'LineBreak':
- result.append(" ")
- elif key == 'SoftBreak':
- result.append(" ")
- elif key == 'Space':
- result.append(" ")
- walk(x, go, "", {})
- return ''.join(result)
- def attributes(attrs):
- """Returns an attribute list, constructed from the
- dictionary attrs.
- """
- attrs = attrs or {}
- ident = attrs.get("id", "")
- classes = attrs.get("classes", [])
- keyvals = [[x, attrs[x]] for x in attrs if (x != "classes" and x != "id")]
- return [ident, classes, keyvals]
- def elt(eltType, numargs):
- def fun(*args):
- lenargs = len(args)
- if lenargs != numargs:
- raise ValueError(eltType + ' expects ' + str(numargs) +
- ' arguments, but given ' + str(lenargs))
- if numargs == 0:
- xs = []
- elif len(args) == 1:
- xs = args[0]
- else:
- xs = list(args)
- return {'t': eltType, 'c': xs}
- return fun
- # Constructors for block elements
- Plain = elt('Plain', 1)
- Para = elt('Para', 1)
- CodeBlock = elt('CodeBlock', 2)
- RawBlock = elt('RawBlock', 2)
- BlockQuote = elt('BlockQuote', 1)
- OrderedList = elt('OrderedList', 2)
- BulletList = elt('BulletList', 1)
- DefinitionList = elt('DefinitionList', 1)
- Header = elt('Header', 3)
- HorizontalRule = elt('HorizontalRule', 0)
- Table = elt('Table', 5)
- Div = elt('Div', 2)
- Null = elt('Null', 0)
- # Constructors for inline elements
- Str = elt('Str', 1)
- Emph = elt('Emph', 1)
- Strong = elt('Strong', 1)
- Strikeout = elt('Strikeout', 1)
- Superscript = elt('Superscript', 1)
- Subscript = elt('Subscript', 1)
- SmallCaps = elt('SmallCaps', 1)
- Quoted = elt('Quoted', 2)
- Cite = elt('Cite', 2)
- Code = elt('Code', 2)
- Space = elt('Space', 0)
- LineBreak = elt('LineBreak', 0)
- Math = elt('Math', 2)
- RawInline = elt('RawInline', 2)
- Link = elt('Link', 3)
- Image = elt('Image', 3)
- Note = elt('Note', 1)
- SoftBreak = elt('SoftBreak', 0)
- Span = elt('Span', 2)
|