etree_defs.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #ifndef HAS_ETREE_DEFS_H
  2. #define HAS_ETREE_DEFS_H
  3. /* quick check for Python/libxml2/libxslt devel setup */
  4. #include "Python.h"
  5. #ifndef PY_VERSION_HEX
  6. # error the development package of Python (header files etc.) is not installed correctly
  7. #else
  8. # if PY_VERSION_HEX < 0x02060000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03020000
  9. # error this version of lxml requires Python 2.6, 2.7, 3.2 or later
  10. # endif
  11. #endif
  12. #include "libxml/xmlversion.h"
  13. #ifndef LIBXML_VERSION
  14. # error the development package of libxml2 (header files etc.) is not installed correctly
  15. #else
  16. #if LIBXML_VERSION < 20700
  17. # error minimum required version of libxml2 is 2.7.0
  18. #endif
  19. #endif
  20. #include "libxslt/xsltconfig.h"
  21. #ifndef LIBXSLT_VERSION
  22. # error the development package of libxslt (header files etc.) is not installed correctly
  23. #else
  24. #if LIBXSLT_VERSION < 10123
  25. # error minimum required version of libxslt is 1.1.23
  26. #endif
  27. #endif
  28. /* v_arg functions */
  29. #define va_int(ap) va_arg(ap, int)
  30. #define va_charptr(ap) va_arg(ap, char *)
  31. #ifdef PYPY_VERSION
  32. # define IS_PYPY 1
  33. #else
  34. # define IS_PYPY 0
  35. #endif
  36. #if PY_MAJOR_VERSION >= 3
  37. # define IS_PYTHON2 0 /* prefer for special casing Python 2.x */
  38. # define IS_PYTHON3 1 /* avoid */
  39. #else
  40. # define IS_PYTHON2 1
  41. # define IS_PYTHON3 0
  42. #endif
  43. #if IS_PYTHON2
  44. #ifndef LXML_UNICODE_STRINGS
  45. #define LXML_UNICODE_STRINGS 0
  46. #endif
  47. #else
  48. #undef LXML_UNICODE_STRINGS
  49. #define LXML_UNICODE_STRINGS 1
  50. #endif
  51. #if !IS_PYPY
  52. # define PyWeakref_LockObject(obj) (NULL)
  53. #endif
  54. /* Threading is not currently supported by PyPy */
  55. #if IS_PYPY
  56. # ifndef WITHOUT_THREADING
  57. # define WITHOUT_THREADING
  58. # endif
  59. #endif
  60. #if IS_PYPY
  61. # undef PyFile_AsFile
  62. # define PyFile_AsFile(o) (NULL)
  63. # undef PyByteArray_Check
  64. # define PyByteArray_Check(o) (0)
  65. #elif !IS_PYTHON2
  66. /* Python 3+ doesn't have PyFile_*() anymore */
  67. # define PyFile_AsFile(o) (NULL)
  68. #endif
  69. #if PY_VERSION_HEX <= 0x03030000 && !(defined(CYTHON_PEP393_ENABLED) && CYTHON_PEP393_ENABLED)
  70. #define PyUnicode_IS_READY(op) (0)
  71. #define PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
  72. #define PyUnicode_KIND(u) (sizeof(Py_UNICODE))
  73. #define PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
  74. #endif
  75. #if IS_PYPY
  76. # ifndef PyUnicode_FromFormat
  77. # define PyUnicode_FromFormat PyString_FromFormat
  78. # endif
  79. # if !IS_PYTHON2 && !defined(PyBytes_FromFormat)
  80. # ifdef PyString_FromFormat
  81. # define PyBytes_FromFormat PyString_FromFormat
  82. # else
  83. #include <stdarg.h>
  84. static PyObject* PyBytes_FromFormat(const char* format, ...) {
  85. PyObject *string;
  86. va_list vargs;
  87. #ifdef HAVE_STDARG_PROTOTYPES
  88. va_start(vargs, format);
  89. #else
  90. va_start(vargs);
  91. #endif
  92. string = PyUnicode_FromFormatV(format, vargs);
  93. va_end(vargs);
  94. if (string && PyUnicode_Check(string)) {
  95. PyObject *bstring = PyUnicode_AsUTF8String(string);
  96. Py_DECREF(string);
  97. string = bstring;
  98. }
  99. if (string && !PyBytes_CheckExact(string)) {
  100. Py_DECREF(string);
  101. string = NULL;
  102. PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object");
  103. }
  104. return string;
  105. }
  106. # endif
  107. # endif
  108. #endif
  109. /* PySlice_GetIndicesEx() has wrong signature in Py<=3.1 */
  110. #if PY_VERSION_HEX >= 0x03020000
  111. # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(o, l, b, e, s, sl)
  112. #else
  113. # define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(((PySliceObject*)o), l, b, e, s, sl)
  114. #endif
  115. #ifdef WITHOUT_THREADING
  116. # undef PyEval_SaveThread
  117. # define PyEval_SaveThread() (NULL)
  118. # undef PyEval_RestoreThread
  119. # define PyEval_RestoreThread(state) if (state); else {}
  120. # undef PyGILState_Ensure
  121. # define PyGILState_Ensure() (PyGILState_UNLOCKED)
  122. # undef PyGILState_Release
  123. # define PyGILState_Release(state) if (state); else {}
  124. # undef Py_UNBLOCK_THREADS
  125. # define Py_UNBLOCK_THREADS _save = NULL;
  126. # undef Py_BLOCK_THREADS
  127. # define Py_BLOCK_THREADS if (_save); else {}
  128. #endif
  129. #ifdef WITHOUT_THREADING
  130. # define ENABLE_THREADING 0
  131. #else
  132. # define ENABLE_THREADING 1
  133. #endif
  134. #if LIBXML_VERSION < 20704
  135. /* FIXME: hack to make new error reporting compile in old libxml2 versions */
  136. # define xmlStructuredErrorContext NULL
  137. # define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o)
  138. #endif
  139. /* schematron was added in libxml2 2.6.21 */
  140. #ifdef LIBXML_SCHEMATRON_ENABLED
  141. # define ENABLE_SCHEMATRON 1
  142. #else
  143. # define ENABLE_SCHEMATRON 0
  144. # define XML_SCHEMATRON_OUT_QUIET 0
  145. # define XML_SCHEMATRON_OUT_XML 0
  146. # define XML_SCHEMATRON_OUT_ERROR 0
  147. typedef void xmlSchematron;
  148. typedef void xmlSchematronParserCtxt;
  149. typedef void xmlSchematronValidCtxt;
  150. # define xmlSchematronNewDocParserCtxt(doc) NULL
  151. # define xmlSchematronNewParserCtxt(file) NULL
  152. # define xmlSchematronParse(ctxt) NULL
  153. # define xmlSchematronFreeParserCtxt(ctxt)
  154. # define xmlSchematronFree(schema)
  155. # define xmlSchematronNewValidCtxt(schema, options) NULL
  156. # define xmlSchematronValidateDoc(ctxt, doc) 0
  157. # define xmlSchematronFreeValidCtxt(ctxt)
  158. # define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
  159. #endif
  160. #if LIBXML_VERSION < 20708
  161. # define HTML_PARSE_NODEFDTD 4
  162. #endif
  163. #if LIBXML_VERSION < 20900
  164. # define XML_PARSE_BIG_LINES 4194304
  165. #endif
  166. #include "libxml/tree.h"
  167. #ifndef LIBXML2_NEW_BUFFER
  168. typedef xmlBuffer xmlBuf;
  169. # define xmlBufContent(buf) xmlBufferContent(buf)
  170. # define xmlBufUse(buf) xmlBufferLength(buf)
  171. #endif
  172. /* libexslt 1.1.25+ support EXSLT functions in XPath */
  173. #if LIBXSLT_VERSION < 10125
  174. #define exsltDateXpathCtxtRegister(ctxt, prefix)
  175. #define exsltSetsXpathCtxtRegister(ctxt, prefix)
  176. #define exsltMathXpathCtxtRegister(ctxt, prefix)
  177. #define exsltStrXpathCtxtRegister(ctxt, prefix)
  178. #endif
  179. /* work around MSDEV 6.0 */
  180. #if (_MSC_VER == 1200) && (WINVER < 0x0500)
  181. long _ftol( double ); //defined by VC6 C libs
  182. long _ftol2( double dblSource ) { return _ftol( dblSource ); }
  183. #endif
  184. #ifdef __GNUC__
  185. /* Test for GCC > 2.95 */
  186. #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
  187. #define unlikely_condition(x) __builtin_expect((x), 0)
  188. #else /* __GNUC__ > 2 ... */
  189. #define unlikely_condition(x) (x)
  190. #endif /* __GNUC__ > 2 ... */
  191. #else /* __GNUC__ */
  192. #define unlikely_condition(x) (x)
  193. #endif /* __GNUC__ */
  194. #ifndef Py_TYPE
  195. #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
  196. #endif
  197. #define PY_NEW(T) \
  198. (((PyTypeObject*)(T))->tp_new( \
  199. (PyTypeObject*)(T), __pyx_empty_tuple, NULL))
  200. #define _fqtypename(o) ((Py_TYPE(o))->tp_name)
  201. #define lxml_malloc(count, item_size) \
  202. (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
  203. (PyMem_Malloc((count) * item_size)))
  204. #define lxml_realloc(mem, count, item_size) \
  205. (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
  206. (PyMem_Realloc(mem, (count) * item_size)))
  207. #define lxml_free(mem) PyMem_Free(mem)
  208. #if PY_MAJOR_VERSION < 3
  209. #define _isString(obj) (PyString_CheckExact(obj) || \
  210. PyUnicode_CheckExact(obj) || \
  211. PyType_IsSubtype(Py_TYPE(obj), &PyBaseString_Type))
  212. #else
  213. /* builtin subtype type checks are almost as fast as exact checks in Py2.7+
  214. * and Unicode is more common in Py3 */
  215. #define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj))
  216. #endif
  217. #define _isElement(c_node) \
  218. (((c_node)->type == XML_ELEMENT_NODE) || \
  219. ((c_node)->type == XML_COMMENT_NODE) || \
  220. ((c_node)->type == XML_ENTITY_REF_NODE) || \
  221. ((c_node)->type == XML_PI_NODE))
  222. #define _isElementOrXInclude(c_node) \
  223. (_isElement(c_node) || \
  224. ((c_node)->type == XML_XINCLUDE_START) || \
  225. ((c_node)->type == XML_XINCLUDE_END))
  226. #define _getNs(c_node) \
  227. (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
  228. /* PyCapsule was added in Py2.7 */
  229. #if PY_VERSION_HEX >= 0x02070000
  230. #include "string.h"
  231. static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
  232. xmlDoc *c_doc;
  233. void *context;
  234. *is_owned = 0;
  235. if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) {
  236. PyErr_SetString(
  237. PyExc_TypeError,
  238. "Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc");
  239. return NULL;
  240. }
  241. c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc");
  242. if (unlikely_condition(!c_doc)) return NULL;
  243. if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) {
  244. PyErr_Format(
  245. PyExc_ValueError,
  246. "Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type);
  247. return NULL;
  248. }
  249. context = PyCapsule_GetContext(capsule);
  250. if (unlikely_condition(!context && PyErr_Occurred())) return NULL;
  251. if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) {
  252. /* take ownership by setting destructor to NULL */
  253. if (PyCapsule_SetDestructor(capsule, NULL) == 0) {
  254. /* ownership transferred => invalidate capsule by clearing its name */
  255. if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) {
  256. /* this should never happen since everything above succeeded */
  257. xmlFreeDoc(c_doc);
  258. return NULL;
  259. }
  260. *is_owned = 1;
  261. }
  262. }
  263. return c_doc;
  264. }
  265. #else
  266. # define lxml_unpack_xmldoc_capsule(capsule, is_owned) (((capsule) || (is_owned)) ? NULL : NULL)
  267. #endif
  268. /* Macro pair implementation of a depth first tree walker
  269. *
  270. * Calls the code block between the BEGIN and END macros for all elements
  271. * below c_tree_top (exclusively), starting at c_node (inclusively iff
  272. * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
  273. * that match _isElement(), the normal variant will stop on every node
  274. * except text nodes.
  275. *
  276. * To traverse the node and all of its children and siblings in Pyrex, call
  277. * cdef xmlNode* some_node
  278. * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
  279. * # do something with some_node
  280. * END_FOR_EACH_ELEMENT_FROM(some_node)
  281. *
  282. * To traverse only the children and siblings of a node, call
  283. * cdef xmlNode* some_node
  284. * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
  285. * # do something with some_node
  286. * END_FOR_EACH_ELEMENT_FROM(some_node)
  287. *
  288. * To traverse only the children, do:
  289. * cdef xmlNode* some_node
  290. * some_node = parent_node.children
  291. * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
  292. * # do something with some_node
  293. * END_FOR_EACH_ELEMENT_FROM(some_node)
  294. *
  295. * NOTE: 'some_node' MUST be a plain 'xmlNode*' !
  296. *
  297. * NOTE: parent modification during the walk can divert the iterator, but
  298. * should not segfault !
  299. */
  300. #define _LX__ELEMENT_MATCH(c_node, only_elements) \
  301. ((only_elements) ? (_isElement(c_node)) : 1)
  302. #define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
  303. while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
  304. c_node = c_node->next;
  305. #define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
  306. { \
  307. /* walk through children first */ \
  308. xmlNode* _lx__next = c_node->children; \
  309. if (_lx__next != 0) { \
  310. if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \
  311. _lx__next = 0; \
  312. } else { \
  313. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  314. } \
  315. } \
  316. if ((_lx__next == 0) && (c_node != c_stop_node)) { \
  317. /* try siblings */ \
  318. _lx__next = c_node->next; \
  319. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  320. /* back off through parents */ \
  321. while (_lx__next == 0) { \
  322. c_node = c_node->parent; \
  323. if (c_node == 0) \
  324. break; \
  325. if (c_node == c_stop_node) \
  326. break; \
  327. if ((only_elements) && !_isElement(c_node)) \
  328. break; \
  329. /* we already traversed the parents -> siblings */ \
  330. _lx__next = c_node->next; \
  331. _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
  332. } \
  333. } \
  334. c_node = _lx__next; \
  335. }
  336. #define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
  337. { \
  338. if (c_node != 0) { \
  339. const xmlNode* _lx__tree_top = (c_tree_top); \
  340. const int _lx__only_elements = (only_elements); \
  341. /* make sure we start at an element */ \
  342. if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
  343. /* we skip the node, so 'inclusive' is irrelevant */ \
  344. if (c_node == _lx__tree_top) \
  345. c_node = 0; /* nothing to traverse */ \
  346. else { \
  347. c_node = c_node->next; \
  348. _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
  349. } \
  350. } else if (! (inclusive)) { \
  351. /* skip the first node */ \
  352. _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
  353. } \
  354. \
  355. /* now run the user code on the elements we find */ \
  356. while (c_node != 0) { \
  357. /* here goes the code to be run for each element */
  358. #define _LX__END_FOR_EACH_FROM(c_node) \
  359. _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
  360. } \
  361. } \
  362. }
  363. #define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
  364. _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
  365. #define END_FOR_EACH_ELEMENT_FROM(c_node) \
  366. _LX__END_FOR_EACH_FROM(c_node)
  367. #define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
  368. _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
  369. #define END_FOR_EACH_FROM(c_node) \
  370. _LX__END_FOR_EACH_FROM(c_node)
  371. #endif /* HAS_ETREE_DEFS_H */