dicttoxml.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. """
  4. Converts a Python dictionary or other native data type into a valid XML string.
  5. Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and dict-like objects) data types, with arbitrary nesting for the collections. Items with a `datetime` type are converted to ISO format strings. Items with a `None` type become empty XML elements.
  6. This module works with both Python 2 and 3.
  7. """
  8. from __future__ import unicode_literals
  9. __version__ = '1.7.4'
  10. version = __version__
  11. from random import randint
  12. import collections
  13. import numbers
  14. import logging
  15. from xml.dom.minidom import parseString
  16. LOG = logging.getLogger("dicttoxml")
  17. # python 3 doesn't have a unicode type
  18. try:
  19. unicode
  20. except:
  21. unicode = str
  22. # python 3 doesn't have a long type
  23. try:
  24. long
  25. except:
  26. long = int
  27. def set_debug(debug=True, filename='dicttoxml.log'):
  28. if debug:
  29. import datetime
  30. print('Debug mode is on. Events are logged at: %s' % (filename))
  31. logging.basicConfig(filename=filename, level=logging.INFO)
  32. LOG.info('\nLogging session starts: %s' % (
  33. str(datetime.datetime.today()))
  34. )
  35. else:
  36. logging.basicConfig(level=logging.WARNING)
  37. print('Debug mode is off.')
  38. def unicode_me(something):
  39. """Converts strings with non-ASCII characters to unicode for LOG.
  40. Python 3 doesn't have a `unicode()` function, so `unicode()` is an alias
  41. for `str()`, but `str()` doesn't take a second argument, hence this kludge.
  42. """
  43. try:
  44. return unicode(something, 'utf-8')
  45. except:
  46. return unicode(something)
  47. ids = [] # initialize list of unique ids
  48. def make_id(element, start=100000, end=999999):
  49. """Returns a random integer"""
  50. return '%s_%s' % (element, randint(start, end))
  51. def get_unique_id(element):
  52. """Returns a unique id for a given element"""
  53. this_id = make_id(element)
  54. dup = True
  55. while dup:
  56. if this_id not in ids:
  57. dup = False
  58. ids.append(this_id)
  59. else:
  60. this_id = make_id(element)
  61. return ids[-1]
  62. def get_xml_type(val):
  63. """Returns the data type for the xml type attribute"""
  64. if type(val).__name__ in ('str', 'unicode'):
  65. return 'str'
  66. if type(val).__name__ in ('int', 'long'):
  67. return 'int'
  68. if type(val).__name__ == 'float':
  69. return 'float'
  70. if type(val).__name__ == 'bool':
  71. return 'bool'
  72. if isinstance(val, numbers.Number):
  73. return 'number'
  74. if type(val).__name__ == 'NoneType':
  75. return 'null'
  76. if isinstance(val, dict):
  77. return 'dict'
  78. if isinstance(val, collections.Iterable):
  79. return 'list'
  80. return type(val).__name__
  81. def escape_xml(s):
  82. if type(s) in (str, unicode):
  83. s = unicode_me(s) # avoid UnicodeDecodeError
  84. s = s.replace('&', '&')
  85. s = s.replace('"', '"')
  86. s = s.replace('\'', ''')
  87. s = s.replace('<', '&lt;')
  88. s = s.replace('>', '&gt;')
  89. return s
  90. def make_attrstring(attr):
  91. """Returns an attribute string in the form key="val" """
  92. attrstring = ' '.join(['%s="%s"' % (k, v) for k, v in attr.items()])
  93. return '%s%s' % (' ' if attrstring != '' else '', attrstring)
  94. def key_is_valid_xml(key):
  95. """Checks that a key is a valid XML name"""
  96. LOG.info('Inside key_is_valid_xml(). Testing "%s"' % (unicode_me(key)))
  97. test_xml = '<?xml version="1.0" encoding="UTF-8" ?><%s>foo</%s>' % (key, key)
  98. try:
  99. parseString(test_xml)
  100. return True
  101. except Exception: # minidom does not implement exceptions well
  102. return False
  103. def make_valid_xml_name(key, attr):
  104. """Tests an XML name and fixes it if invalid"""
  105. LOG.info('Inside make_valid_xml_name(). Testing key "%s" with attr "%s"' % (
  106. unicode_me(key), unicode_me(attr))
  107. )
  108. key = escape_xml(key)
  109. attr = escape_xml(attr)
  110. # pass through if key is already valid
  111. if key_is_valid_xml(key):
  112. return key, attr
  113. # prepend a lowercase n if the key is numeric
  114. if key.isdigit():
  115. return 'n%s' % (key), attr
  116. # replace spaces with underscores if that fixes the problem
  117. if key_is_valid_xml(key.replace(' ', '_')):
  118. return key.replace(' ', '_'), attr
  119. # key is still invalid - move it into a name attribute
  120. attr['name'] = key
  121. key = 'key'
  122. return key, attr
  123. def wrap_cdata(s):
  124. """Wraps a string into CDATA sections"""
  125. s = unicode_me(s).replace(']]>', ']]]]><![CDATA[>')
  126. return '<![CDATA[' + s + ']]>'
  127. def default_item_func(parent):
  128. return 'item'
  129. def convert(obj, ids, attr_type, item_func, cdata, parent='root'):
  130. """Routes the elements of an object to the right function to convert them
  131. based on their data type"""
  132. LOG.info('Inside convert(). obj type is: "%s", obj="%s"' % (type(obj).__name__, unicode_me(obj)))
  133. item_name = item_func(parent)
  134. if isinstance(obj, numbers.Number) or type(obj) in (str, unicode):
  135. return convert_kv(item_name, obj, attr_type, cdata)
  136. if hasattr(obj, 'isoformat'):
  137. return convert_kv(item_name, obj.isoformat(), attr_type, cdata)
  138. if type(obj) == bool:
  139. return convert_bool(item_name, obj, attr_type, cdata)
  140. if obj is None:
  141. return convert_none(item_name, '', attr_type, cdata)
  142. if isinstance(obj, dict):
  143. return convert_dict(obj, ids, parent, attr_type, item_func, cdata)
  144. if isinstance(obj, collections.Iterable):
  145. return convert_list(obj, ids, parent, attr_type, item_func, cdata)
  146. raise TypeError('Unsupported data type: %s (%s)' % (obj, type(obj).__name__))
  147. def convert_dict(obj, ids, parent, attr_type, item_func, cdata):
  148. """Converts a dict into an XML string."""
  149. LOG.info('Inside convert_dict(): obj type is: "%s", obj="%s"' % (
  150. type(obj).__name__, unicode_me(obj))
  151. )
  152. output = []
  153. addline = output.append
  154. item_name = item_func(parent)
  155. for key, val in obj.items():
  156. LOG.info('Looping inside convert_dict(): key="%s", val="%s", type(val)="%s"' % (
  157. unicode_me(key), unicode_me(val), type(val).__name__)
  158. )
  159. attr = {} if not ids else {'id': '%s' % (get_unique_id(parent)) }
  160. key, attr = make_valid_xml_name(key, attr)
  161. if isinstance(val, numbers.Number) or type(val) in (str, unicode):
  162. addline(convert_kv(key, val, attr_type, attr, cdata))
  163. elif hasattr(val, 'isoformat'): # datetime
  164. addline(convert_kv(key, val.isoformat(), attr_type, attr, cdata))
  165. elif type(val) == bool:
  166. addline(convert_bool(key, val, attr_type, attr, cdata))
  167. elif isinstance(val, dict):
  168. if attr_type:
  169. attr['type'] = get_xml_type(val)
  170. addline('<%s%s>%s</%s>' % (
  171. key, make_attrstring(attr),
  172. convert_dict(val, ids, key, attr_type, item_func, cdata),
  173. key
  174. )
  175. )
  176. elif isinstance(val, collections.Iterable):
  177. if attr_type:
  178. attr['type'] = get_xml_type(val)
  179. addline('<%s%s>%s</%s>' % (
  180. key,
  181. make_attrstring(attr),
  182. convert_list(val, ids, key, attr_type, item_func, cdata),
  183. key
  184. )
  185. )
  186. elif val is None:
  187. addline(convert_none(key, val, attr_type, attr, cdata))
  188. else:
  189. raise TypeError('Unsupported data type: %s (%s)' % (
  190. val, type(val).__name__)
  191. )
  192. return ''.join(output)
  193. def convert_list(items, ids, parent, attr_type, item_func, cdata):
  194. """Converts a list into an XML string."""
  195. LOG.info('Inside convert_list()')
  196. output = []
  197. addline = output.append
  198. item_name = item_func(parent)
  199. if ids:
  200. this_id = get_unique_id(parent)
  201. for i, item in enumerate(items):
  202. LOG.info('Looping inside convert_list(): item="%s", item_name="%s", type="%s"' % (
  203. unicode_me(item), item_name, type(item).__name__)
  204. )
  205. attr = {} if not ids else { 'id': '%s_%s' % (this_id, i+1) }
  206. if isinstance(item, numbers.Number) or type(item) in (str, unicode):
  207. addline(convert_kv(item_name, item, attr_type, attr, cdata))
  208. elif hasattr(item, 'isoformat'): # datetime
  209. addline(convert_kv(item_name, item.isoformat(), attr_type, attr, cdata))
  210. elif type(item) == bool:
  211. addline(convert_bool(item_name, item, attr_type, attr, cdata))
  212. elif isinstance(item, dict):
  213. if not attr_type:
  214. addline('<%s>%s</%s>' % (
  215. item_name,
  216. convert_dict(item, ids, parent, attr_type, item_func, cdata),
  217. item_name,
  218. )
  219. )
  220. else:
  221. addline('<%s type="dict">%s</%s>' % (
  222. item_name,
  223. convert_dict(item, ids, parent, attr_type, item_func, cdata),
  224. item_name,
  225. )
  226. )
  227. elif isinstance(item, collections.Iterable):
  228. if not attr_type:
  229. addline('<%s %s>%s</%s>' % (
  230. item_name, make_attrstring(attr),
  231. convert_list(item, ids, item_name, attr_type, item_func, cdata),
  232. item_name,
  233. )
  234. )
  235. else:
  236. addline('<%s type="list"%s>%s</%s>' % (
  237. item_name, make_attrstring(attr),
  238. convert_list(item, ids, item_name, attr_type, item_func, cdata),
  239. item_name,
  240. )
  241. )
  242. elif item is None:
  243. addline(convert_none(item_name, None, attr_type, attr, cdata))
  244. else:
  245. raise TypeError('Unsupported data type: %s (%s)' % (
  246. item, type(item).__name__)
  247. )
  248. return ''.join(output)
  249. def convert_kv(key, val, attr_type, attr={}, cdata=False):
  250. """Converts a number or string into an XML element"""
  251. LOG.info('Inside convert_kv(): key="%s", val="%s", type(val) is: "%s"' % (
  252. unicode_me(key), unicode_me(val), type(val).__name__)
  253. )
  254. key, attr = make_valid_xml_name(key, attr)
  255. if attr_type:
  256. attr['type'] = get_xml_type(val)
  257. attrstring = make_attrstring(attr)
  258. return '<%s%s>%s</%s>' % (
  259. key, attrstring,
  260. wrap_cdata(val) if cdata == True else escape_xml(val),
  261. key
  262. )
  263. def convert_bool(key, val, attr_type, attr={}, cdata=False):
  264. """Converts a boolean into an XML element"""
  265. LOG.info('Inside convert_bool(): key="%s", val="%s", type(val) is: "%s"' % (
  266. unicode_me(key), unicode_me(val), type(val).__name__)
  267. )
  268. key, attr = make_valid_xml_name(key, attr)
  269. if attr_type:
  270. attr['type'] = get_xml_type(val)
  271. attrstring = make_attrstring(attr)
  272. return '<%s%s>%s</%s>' % (key, attrstring, unicode(val).lower(), key)
  273. def convert_none(key, val, attr_type, attr={}, cdata=False):
  274. """Converts a null value into an XML element"""
  275. LOG.info('Inside convert_none(): key="%s"' % (unicode_me(key)))
  276. key, attr = make_valid_xml_name(key, attr)
  277. if attr_type:
  278. attr['type'] = get_xml_type(val)
  279. attrstring = make_attrstring(attr)
  280. return '<%s%s></%s>' % (key, attrstring, key)
  281. def dicttoxml(obj, root=True, custom_root='root', ids=False, attr_type=True,
  282. item_func=default_item_func, cdata=False):
  283. """Converts a python object into XML.
  284. Arguments:
  285. - root specifies whether the output is wrapped in an XML root element
  286. Default is True
  287. - custom_root allows you to specify a custom root element.
  288. Default is 'root'
  289. - ids specifies whether elements get unique ids.
  290. Default is False
  291. - attr_type specifies whether elements get a data type attribute.
  292. Default is True
  293. - item_func specifies what function should generate the element name for
  294. items in a list.
  295. Default is 'item'
  296. - cdata specifies whether string values should be wrapped in CDATA sections.
  297. Default is False
  298. """
  299. LOG.info('Inside dicttoxml(): type(obj) is: "%s", obj="%s"' % (type(obj).__name__, unicode_me(obj)))
  300. output = []
  301. addline = output.append
  302. if root == True:
  303. addline('<?xml version="1.0" encoding="UTF-8" ?>')
  304. addline('<%s>%s</%s>' % (
  305. custom_root,
  306. convert(obj, ids, attr_type, item_func, cdata, parent=custom_root),
  307. custom_root,
  308. )
  309. )
  310. else:
  311. addline(convert(obj, ids, attr_type, item_func, cdata, parent=''))
  312. return ''.join(output).encode('utf-8')