scanner.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. """JSON token scanner
  2. """
  3. import re
  4. def _import_c_make_scanner():
  5. try:
  6. from simplejson._speedups import make_scanner
  7. return make_scanner
  8. except ImportError:
  9. return None
  10. c_make_scanner = _import_c_make_scanner()
  11. __all__ = ['make_scanner', 'JSONDecodeError']
  12. NUMBER_RE = re.compile(
  13. r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
  14. (re.VERBOSE | re.MULTILINE | re.DOTALL))
  15. class JSONDecodeError(ValueError):
  16. """Subclass of ValueError with the following additional properties:
  17. msg: The unformatted error message
  18. doc: The JSON document being parsed
  19. pos: The start index of doc where parsing failed
  20. end: The end index of doc where parsing failed (may be None)
  21. lineno: The line corresponding to pos
  22. colno: The column corresponding to pos
  23. endlineno: The line corresponding to end (may be None)
  24. endcolno: The column corresponding to end (may be None)
  25. """
  26. # Note that this exception is used from _speedups
  27. def __init__(self, msg, doc, pos, end=None):
  28. ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
  29. self.msg = msg
  30. self.doc = doc
  31. self.pos = pos
  32. self.end = end
  33. self.lineno, self.colno = linecol(doc, pos)
  34. if end is not None:
  35. self.endlineno, self.endcolno = linecol(doc, end)
  36. else:
  37. self.endlineno, self.endcolno = None, None
  38. def __reduce__(self):
  39. return self.__class__, (self.msg, self.doc, self.pos, self.end)
  40. def linecol(doc, pos):
  41. lineno = doc.count('\n', 0, pos) + 1
  42. if lineno == 1:
  43. colno = pos + 1
  44. else:
  45. colno = pos - doc.rindex('\n', 0, pos)
  46. return lineno, colno
  47. def errmsg(msg, doc, pos, end=None):
  48. lineno, colno = linecol(doc, pos)
  49. msg = msg.replace('%r', repr(doc[pos:pos + 1]))
  50. if end is None:
  51. fmt = '%s: line %d column %d (char %d)'
  52. return fmt % (msg, lineno, colno, pos)
  53. endlineno, endcolno = linecol(doc, end)
  54. fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
  55. return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
  56. def py_make_scanner(context):
  57. parse_object = context.parse_object
  58. parse_array = context.parse_array
  59. parse_string = context.parse_string
  60. match_number = NUMBER_RE.match
  61. encoding = context.encoding
  62. strict = context.strict
  63. parse_float = context.parse_float
  64. parse_int = context.parse_int
  65. parse_constant = context.parse_constant
  66. object_hook = context.object_hook
  67. object_pairs_hook = context.object_pairs_hook
  68. memo = context.memo
  69. def _scan_once(string, idx):
  70. errmsg = 'Expecting value'
  71. try:
  72. nextchar = string[idx]
  73. except IndexError:
  74. raise JSONDecodeError(errmsg, string, idx)
  75. if nextchar == '"':
  76. return parse_string(string, idx + 1, encoding, strict)
  77. elif nextchar == '{':
  78. return parse_object((string, idx + 1), encoding, strict,
  79. _scan_once, object_hook, object_pairs_hook, memo)
  80. elif nextchar == '[':
  81. return parse_array((string, idx + 1), _scan_once)
  82. elif nextchar == 'n' and string[idx:idx + 4] == 'null':
  83. return None, idx + 4
  84. elif nextchar == 't' and string[idx:idx + 4] == 'true':
  85. return True, idx + 4
  86. elif nextchar == 'f' and string[idx:idx + 5] == 'false':
  87. return False, idx + 5
  88. m = match_number(string, idx)
  89. if m is not None:
  90. integer, frac, exp = m.groups()
  91. if frac or exp:
  92. res = parse_float(integer + (frac or '') + (exp or ''))
  93. else:
  94. res = parse_int(integer)
  95. return res, m.end()
  96. elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
  97. return parse_constant('NaN'), idx + 3
  98. elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
  99. return parse_constant('Infinity'), idx + 8
  100. elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
  101. return parse_constant('-Infinity'), idx + 9
  102. else:
  103. raise JSONDecodeError(errmsg, string, idx)
  104. def scan_once(string, idx):
  105. if idx < 0:
  106. # Ensure the same behavior as the C speedup, otherwise
  107. # this would work for *some* negative string indices due
  108. # to the behavior of __getitem__ for strings. #98
  109. raise JSONDecodeError('Expecting value', string, idx)
  110. try:
  111. return _scan_once(string, idx)
  112. finally:
  113. memo.clear()
  114. return scan_once
  115. make_scanner = c_make_scanner or py_make_scanner