test_ujson.py 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129
  1. # -*- coding: utf-8 -*-
  2. try:
  3. import json
  4. except ImportError:
  5. import simplejson as json
  6. import calendar
  7. import datetime
  8. import decimal
  9. from functools import partial
  10. import locale
  11. import math
  12. import re
  13. import time
  14. import dateutil
  15. import numpy as np
  16. import pytest
  17. import pytz
  18. import pandas._libs.json as ujson
  19. from pandas._libs.tslib import Timestamp
  20. import pandas.compat as compat
  21. from pandas.compat import StringIO, range, u
  22. from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, date_range
  23. import pandas.util.testing as tm
  24. json_unicode = (json.dumps if compat.PY3
  25. else partial(json.dumps, encoding="utf-8"))
  26. def _clean_dict(d):
  27. """
  28. Sanitize dictionary for JSON by converting all keys to strings.
  29. Parameters
  30. ----------
  31. d : dict
  32. The dictionary to convert.
  33. Returns
  34. -------
  35. cleaned_dict : dict
  36. """
  37. return {str(k): v for k, v in compat.iteritems(d)}
  38. @pytest.fixture(params=[
  39. None, # Column indexed by default.
  40. "split",
  41. "records",
  42. "values",
  43. "index"])
  44. def orient(request):
  45. return request.param
  46. @pytest.fixture(params=[None, True])
  47. def numpy(request):
  48. return request.param
  49. class TestUltraJSONTests(object):
  50. @pytest.mark.skipif(compat.is_platform_32bit(),
  51. reason="not compliant on 32-bit, xref #15865")
  52. def test_encode_decimal(self):
  53. sut = decimal.Decimal("1337.1337")
  54. encoded = ujson.encode(sut, double_precision=15)
  55. decoded = ujson.decode(encoded)
  56. assert decoded == 1337.1337
  57. sut = decimal.Decimal("0.95")
  58. encoded = ujson.encode(sut, double_precision=1)
  59. assert encoded == "1.0"
  60. decoded = ujson.decode(encoded)
  61. assert decoded == 1.0
  62. sut = decimal.Decimal("0.94")
  63. encoded = ujson.encode(sut, double_precision=1)
  64. assert encoded == "0.9"
  65. decoded = ujson.decode(encoded)
  66. assert decoded == 0.9
  67. sut = decimal.Decimal("1.95")
  68. encoded = ujson.encode(sut, double_precision=1)
  69. assert encoded == "2.0"
  70. decoded = ujson.decode(encoded)
  71. assert decoded == 2.0
  72. sut = decimal.Decimal("-1.95")
  73. encoded = ujson.encode(sut, double_precision=1)
  74. assert encoded == "-2.0"
  75. decoded = ujson.decode(encoded)
  76. assert decoded == -2.0
  77. sut = decimal.Decimal("0.995")
  78. encoded = ujson.encode(sut, double_precision=2)
  79. assert encoded == "1.0"
  80. decoded = ujson.decode(encoded)
  81. assert decoded == 1.0
  82. sut = decimal.Decimal("0.9995")
  83. encoded = ujson.encode(sut, double_precision=3)
  84. assert encoded == "1.0"
  85. decoded = ujson.decode(encoded)
  86. assert decoded == 1.0
  87. sut = decimal.Decimal("0.99999999999999944")
  88. encoded = ujson.encode(sut, double_precision=15)
  89. assert encoded == "1.0"
  90. decoded = ujson.decode(encoded)
  91. assert decoded == 1.0
  92. @pytest.mark.parametrize("ensure_ascii", [True, False])
  93. def test_encode_string_conversion(self, ensure_ascii):
  94. string_input = "A string \\ / \b \f \n \r \t </script> &"
  95. not_html_encoded = ('"A string \\\\ \\/ \\b \\f \\n '
  96. '\\r \\t <\\/script> &"')
  97. html_encoded = ('"A string \\\\ \\/ \\b \\f \\n \\r \\t '
  98. '\\u003c\\/script\\u003e \\u0026"')
  99. def helper(expected_output, **encode_kwargs):
  100. output = ujson.encode(string_input,
  101. ensure_ascii=ensure_ascii,
  102. **encode_kwargs)
  103. assert output == expected_output
  104. assert string_input == json.loads(output)
  105. assert string_input == ujson.decode(output)
  106. # Default behavior assumes encode_html_chars=False.
  107. helper(not_html_encoded)
  108. # Make sure explicit encode_html_chars=False works.
  109. helper(not_html_encoded, encode_html_chars=False)
  110. # Make sure explicit encode_html_chars=True does the encoding.
  111. helper(html_encoded, encode_html_chars=True)
  112. @pytest.mark.parametrize("long_number", [
  113. -4342969734183514, -12345678901234.56789012, -528656961.4399388
  114. ])
  115. def test_double_long_numbers(self, long_number):
  116. sut = {u("a"): long_number}
  117. encoded = ujson.encode(sut, double_precision=15)
  118. decoded = ujson.decode(encoded)
  119. assert sut == decoded
  120. def test_encode_non_c_locale(self):
  121. lc_category = locale.LC_NUMERIC
  122. # We just need one of these locales to work.
  123. for new_locale in ("it_IT.UTF-8", "Italian_Italy"):
  124. if tm.can_set_locale(new_locale, lc_category):
  125. with tm.set_locale(new_locale, lc_category):
  126. assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60
  127. assert ujson.loads("4.78", precise_float=True) == 4.78
  128. break
  129. def test_decimal_decode_test_precise(self):
  130. sut = {u("a"): 4.56}
  131. encoded = ujson.encode(sut)
  132. decoded = ujson.decode(encoded, precise_float=True)
  133. assert sut == decoded
  134. @pytest.mark.skipif(compat.is_platform_windows() and not compat.PY3,
  135. reason="buggy on win-64 for py2")
  136. def test_encode_double_tiny_exponential(self):
  137. num = 1e-40
  138. assert num == ujson.decode(ujson.encode(num))
  139. num = 1e-100
  140. assert num == ujson.decode(ujson.encode(num))
  141. num = -1e-45
  142. assert num == ujson.decode(ujson.encode(num))
  143. num = -1e-145
  144. assert np.allclose(num, ujson.decode(ujson.encode(num)))
  145. @pytest.mark.parametrize("unicode_key", [
  146. u("key1"), u("بن")
  147. ])
  148. def test_encode_dict_with_unicode_keys(self, unicode_key):
  149. unicode_dict = {unicode_key: u("value1")}
  150. assert unicode_dict == ujson.decode(ujson.encode(unicode_dict))
  151. @pytest.mark.parametrize("double_input", [
  152. math.pi,
  153. -math.pi # Should work with negatives too.
  154. ])
  155. def test_encode_double_conversion(self, double_input):
  156. output = ujson.encode(double_input)
  157. assert round(double_input, 5) == round(json.loads(output), 5)
  158. assert round(double_input, 5) == round(ujson.decode(output), 5)
  159. def test_encode_with_decimal(self):
  160. decimal_input = 1.0
  161. output = ujson.encode(decimal_input)
  162. assert output == "1.0"
  163. def test_encode_array_of_nested_arrays(self):
  164. nested_input = [[[[]]]] * 20
  165. output = ujson.encode(nested_input)
  166. assert nested_input == json.loads(output)
  167. assert nested_input == ujson.decode(output)
  168. nested_input = np.array(nested_input)
  169. tm.assert_numpy_array_equal(nested_input, ujson.decode(
  170. output, numpy=True, dtype=nested_input.dtype))
  171. def test_encode_array_of_doubles(self):
  172. doubles_input = [31337.31337, 31337.31337,
  173. 31337.31337, 31337.31337] * 10
  174. output = ujson.encode(doubles_input)
  175. assert doubles_input == json.loads(output)
  176. assert doubles_input == ujson.decode(output)
  177. tm.assert_numpy_array_equal(np.array(doubles_input),
  178. ujson.decode(output, numpy=True))
  179. def test_double_precision(self):
  180. double_input = 30.012345678901234
  181. output = ujson.encode(double_input, double_precision=15)
  182. assert double_input == json.loads(output)
  183. assert double_input == ujson.decode(output)
  184. for double_precision in (3, 9):
  185. output = ujson.encode(double_input,
  186. double_precision=double_precision)
  187. rounded_input = round(double_input, double_precision)
  188. assert rounded_input == json.loads(output)
  189. assert rounded_input == ujson.decode(output)
  190. @pytest.mark.parametrize("invalid_val", [
  191. 20, -1, "9", None
  192. ])
  193. def test_invalid_double_precision(self, invalid_val):
  194. double_input = 30.12345678901234567890
  195. expected_exception = (ValueError if isinstance(invalid_val, int)
  196. else TypeError)
  197. with pytest.raises(expected_exception):
  198. ujson.encode(double_input, double_precision=invalid_val)
  199. def test_encode_string_conversion2(self):
  200. string_input = "A string \\ / \b \f \n \r \t"
  201. output = ujson.encode(string_input)
  202. assert string_input == json.loads(output)
  203. assert string_input == ujson.decode(output)
  204. assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
  205. @pytest.mark.parametrize("unicode_input", [
  206. "Räksmörgås اسامة بن محمد بن عوض بن لادن",
  207. "\xe6\x97\xa5\xd1\x88"
  208. ])
  209. def test_encode_unicode_conversion(self, unicode_input):
  210. enc = ujson.encode(unicode_input)
  211. dec = ujson.decode(enc)
  212. assert enc == json_unicode(unicode_input)
  213. assert dec == json.loads(enc)
  214. def test_encode_control_escaping(self):
  215. escaped_input = "\x19"
  216. enc = ujson.encode(escaped_input)
  217. dec = ujson.decode(enc)
  218. assert escaped_input == dec
  219. assert enc == json_unicode(escaped_input)
  220. def test_encode_unicode_surrogate_pair(self):
  221. surrogate_input = "\xf0\x90\x8d\x86"
  222. enc = ujson.encode(surrogate_input)
  223. dec = ujson.decode(enc)
  224. assert enc == json_unicode(surrogate_input)
  225. assert dec == json.loads(enc)
  226. def test_encode_unicode_4bytes_utf8(self):
  227. four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL"
  228. enc = ujson.encode(four_bytes_input)
  229. dec = ujson.decode(enc)
  230. assert enc == json_unicode(four_bytes_input)
  231. assert dec == json.loads(enc)
  232. def test_encode_unicode_4bytes_utf8highest(self):
  233. four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL"
  234. enc = ujson.encode(four_bytes_input)
  235. dec = ujson.decode(enc)
  236. assert enc == json_unicode(four_bytes_input)
  237. assert dec == json.loads(enc)
  238. def test_encode_array_in_array(self):
  239. arr_in_arr_input = [[[[]]]]
  240. output = ujson.encode(arr_in_arr_input)
  241. assert arr_in_arr_input == json.loads(output)
  242. assert output == json.dumps(arr_in_arr_input)
  243. assert arr_in_arr_input == ujson.decode(output)
  244. tm.assert_numpy_array_equal(np.array(arr_in_arr_input),
  245. ujson.decode(output, numpy=True))
  246. @pytest.mark.parametrize("num_input", [
  247. 31337,
  248. -31337, # Negative number.
  249. -9223372036854775808 # Large negative number.
  250. ])
  251. def test_encode_num_conversion(self, num_input):
  252. output = ujson.encode(num_input)
  253. assert num_input == json.loads(output)
  254. assert output == json.dumps(num_input)
  255. assert num_input == ujson.decode(output)
  256. def test_encode_list_conversion(self):
  257. list_input = [1, 2, 3, 4]
  258. output = ujson.encode(list_input)
  259. assert list_input == json.loads(output)
  260. assert list_input == ujson.decode(output)
  261. tm.assert_numpy_array_equal(np.array(list_input),
  262. ujson.decode(output, numpy=True))
  263. def test_encode_dict_conversion(self):
  264. dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
  265. output = ujson.encode(dict_input)
  266. assert dict_input == json.loads(output)
  267. assert dict_input == ujson.decode(output)
  268. @pytest.mark.parametrize("builtin_value", [None, True, False])
  269. def test_encode_builtin_values_conversion(self, builtin_value):
  270. output = ujson.encode(builtin_value)
  271. assert builtin_value == json.loads(output)
  272. assert output == json.dumps(builtin_value)
  273. assert builtin_value == ujson.decode(output)
  274. def test_encode_datetime_conversion(self):
  275. datetime_input = datetime.datetime.fromtimestamp(time.time())
  276. output = ujson.encode(datetime_input, date_unit="s")
  277. expected = calendar.timegm(datetime_input.utctimetuple())
  278. assert int(expected) == json.loads(output)
  279. assert int(expected) == ujson.decode(output)
  280. def test_encode_date_conversion(self):
  281. date_input = datetime.date.fromtimestamp(time.time())
  282. output = ujson.encode(date_input, date_unit="s")
  283. tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0)
  284. expected = calendar.timegm(tup)
  285. assert int(expected) == json.loads(output)
  286. assert int(expected) == ujson.decode(output)
  287. @pytest.mark.parametrize("test", [
  288. datetime.time(),
  289. datetime.time(1, 2, 3),
  290. datetime.time(10, 12, 15, 343243),
  291. ])
  292. def test_encode_time_conversion_basic(self, test):
  293. output = ujson.encode(test)
  294. expected = '"{iso}"'.format(iso=test.isoformat())
  295. assert expected == output
  296. def test_encode_time_conversion_pytz(self):
  297. # see gh-11473: to_json segfaults with timezone-aware datetimes
  298. test = datetime.time(10, 12, 15, 343243, pytz.utc)
  299. output = ujson.encode(test)
  300. expected = '"{iso}"'.format(iso=test.isoformat())
  301. assert expected == output
  302. def test_encode_time_conversion_dateutil(self):
  303. # see gh-11473: to_json segfaults with timezone-aware datetimes
  304. test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc())
  305. output = ujson.encode(test)
  306. expected = '"{iso}"'.format(iso=test.isoformat())
  307. assert expected == output
  308. @pytest.mark.parametrize("decoded_input", [
  309. NaT,
  310. np.datetime64("NaT"),
  311. np.nan,
  312. np.inf,
  313. -np.inf
  314. ])
  315. def test_encode_as_null(self, decoded_input):
  316. assert ujson.encode(decoded_input) == "null", "Expected null"
  317. def test_datetime_units(self):
  318. val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
  319. stamp = Timestamp(val)
  320. roundtrip = ujson.decode(ujson.encode(val, date_unit='s'))
  321. assert roundtrip == stamp.value // 10**9
  322. roundtrip = ujson.decode(ujson.encode(val, date_unit='ms'))
  323. assert roundtrip == stamp.value // 10**6
  324. roundtrip = ujson.decode(ujson.encode(val, date_unit='us'))
  325. assert roundtrip == stamp.value // 10**3
  326. roundtrip = ujson.decode(ujson.encode(val, date_unit='ns'))
  327. assert roundtrip == stamp.value
  328. msg = "Invalid value 'foo' for option 'date_unit'"
  329. with pytest.raises(ValueError, match=msg):
  330. ujson.encode(val, date_unit='foo')
  331. def test_encode_to_utf8(self):
  332. unencoded = "\xe6\x97\xa5\xd1\x88"
  333. enc = ujson.encode(unencoded, ensure_ascii=False)
  334. dec = ujson.decode(enc)
  335. assert enc == json_unicode(unencoded, ensure_ascii=False)
  336. assert dec == json.loads(enc)
  337. def test_decode_from_unicode(self):
  338. unicode_input = u("{\"obj\": 31337}")
  339. dec1 = ujson.decode(unicode_input)
  340. dec2 = ujson.decode(str(unicode_input))
  341. assert dec1 == dec2
  342. def test_encode_recursion_max(self):
  343. # 8 is the max recursion depth
  344. class O2(object):
  345. member = 0
  346. pass
  347. class O1(object):
  348. member = 0
  349. pass
  350. decoded_input = O1()
  351. decoded_input.member = O2()
  352. decoded_input.member.member = decoded_input
  353. with pytest.raises(OverflowError):
  354. ujson.encode(decoded_input)
  355. def test_decode_jibberish(self):
  356. jibberish = "fdsa sda v9sa fdsa"
  357. with pytest.raises(ValueError):
  358. ujson.decode(jibberish)
  359. @pytest.mark.parametrize("broken_json", [
  360. "[", # Broken array start.
  361. "{", # Broken object start.
  362. "]", # Broken array end.
  363. "}", # Broken object end.
  364. ])
  365. def test_decode_broken_json(self, broken_json):
  366. with pytest.raises(ValueError):
  367. ujson.decode(broken_json)
  368. @pytest.mark.parametrize("too_big_char", [
  369. "[",
  370. "{",
  371. ])
  372. def test_decode_depth_too_big(self, too_big_char):
  373. with pytest.raises(ValueError):
  374. ujson.decode(too_big_char * (1024 * 1024))
  375. @pytest.mark.parametrize("bad_string", [
  376. "\"TESTING", # Unterminated.
  377. "\"TESTING\\\"", # Unterminated escape.
  378. "tru", # Broken True.
  379. "fa", # Broken False.
  380. "n", # Broken None.
  381. ])
  382. def test_decode_bad_string(self, bad_string):
  383. with pytest.raises(ValueError):
  384. ujson.decode(bad_string)
  385. @pytest.mark.parametrize("broken_json", [
  386. '{{1337:""}}',
  387. '{{"key":"}',
  388. '[[[true',
  389. ])
  390. def test_decode_broken_json_leak(self, broken_json):
  391. for _ in range(1000):
  392. with pytest.raises(ValueError):
  393. ujson.decode(broken_json)
  394. @pytest.mark.parametrize("invalid_dict", [
  395. "{{{{31337}}}}", # No key.
  396. "{{{{\"key\":}}}}", # No value.
  397. "{{{{\"key\"}}}}", # No colon or value.
  398. ])
  399. def test_decode_invalid_dict(self, invalid_dict):
  400. with pytest.raises(ValueError):
  401. ujson.decode(invalid_dict)
  402. @pytest.mark.parametrize("numeric_int_as_str", [
  403. "31337", "-31337" # Should work with negatives.
  404. ])
  405. def test_decode_numeric_int(self, numeric_int_as_str):
  406. assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str)
  407. @pytest.mark.skipif(compat.PY3, reason="only PY2")
  408. def test_encode_unicode_4bytes_utf8_fail(self):
  409. with pytest.raises(OverflowError):
  410. ujson.encode("\xfd\xbf\xbf\xbf\xbf\xbf")
  411. def test_encode_null_character(self):
  412. wrapped_input = "31337 \x00 1337"
  413. output = ujson.encode(wrapped_input)
  414. assert wrapped_input == json.loads(output)
  415. assert output == json.dumps(wrapped_input)
  416. assert wrapped_input == ujson.decode(output)
  417. alone_input = "\x00"
  418. output = ujson.encode(alone_input)
  419. assert alone_input == json.loads(output)
  420. assert output == json.dumps(alone_input)
  421. assert alone_input == ujson.decode(output)
  422. assert '" \\u0000\\r\\n "' == ujson.dumps(u(" \u0000\r\n "))
  423. def test_decode_null_character(self):
  424. wrapped_input = "\"31337 \\u0000 31337\""
  425. assert ujson.decode(wrapped_input) == json.loads(wrapped_input)
  426. def test_encode_list_long_conversion(self):
  427. long_input = [9223372036854775807, 9223372036854775807,
  428. 9223372036854775807, 9223372036854775807,
  429. 9223372036854775807, 9223372036854775807]
  430. output = ujson.encode(long_input)
  431. assert long_input == json.loads(output)
  432. assert long_input == ujson.decode(output)
  433. tm.assert_numpy_array_equal(np.array(long_input),
  434. ujson.decode(output, numpy=True,
  435. dtype=np.int64))
  436. def test_encode_long_conversion(self):
  437. long_input = 9223372036854775807
  438. output = ujson.encode(long_input)
  439. assert long_input == json.loads(output)
  440. assert output == json.dumps(long_input)
  441. assert long_input == ujson.decode(output)
  442. @pytest.mark.parametrize("int_exp", [
  443. "1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"
  444. ])
  445. def test_decode_numeric_int_exp(self, int_exp):
  446. assert ujson.decode(int_exp) == json.loads(int_exp)
  447. def test_dump_to_file(self):
  448. f = StringIO()
  449. ujson.dump([1, 2, 3], f)
  450. assert "[1,2,3]" == f.getvalue()
  451. def test_dump_to_file_like(self):
  452. class FileLike(object):
  453. def __init__(self):
  454. self.bytes = ''
  455. def write(self, data_bytes):
  456. self.bytes += data_bytes
  457. f = FileLike()
  458. ujson.dump([1, 2, 3], f)
  459. assert "[1,2,3]" == f.bytes
  460. def test_dump_file_args_error(self):
  461. with pytest.raises(TypeError):
  462. ujson.dump([], "")
  463. def test_load_file(self):
  464. data = "[1,2,3,4]"
  465. exp_data = [1, 2, 3, 4]
  466. f = StringIO(data)
  467. assert exp_data == ujson.load(f)
  468. f = StringIO(data)
  469. tm.assert_numpy_array_equal(np.array(exp_data),
  470. ujson.load(f, numpy=True))
  471. def test_load_file_like(self):
  472. class FileLike(object):
  473. def read(self):
  474. try:
  475. self.end
  476. except AttributeError:
  477. self.end = True
  478. return "[1,2,3,4]"
  479. exp_data = [1, 2, 3, 4]
  480. f = FileLike()
  481. assert exp_data == ujson.load(f)
  482. f = FileLike()
  483. tm.assert_numpy_array_equal(np.array(exp_data),
  484. ujson.load(f, numpy=True))
  485. def test_load_file_args_error(self):
  486. with pytest.raises(TypeError):
  487. ujson.load("[]")
  488. def test_version(self):
  489. assert re.match(r'^\d+\.\d+(\.\d+)?$', ujson.__version__), \
  490. "ujson.__version__ must be a string like '1.4.0'"
  491. def test_encode_numeric_overflow(self):
  492. with pytest.raises(OverflowError):
  493. ujson.encode(12839128391289382193812939)
  494. def test_encode_numeric_overflow_nested(self):
  495. class Nested(object):
  496. x = 12839128391289382193812939
  497. for _ in range(0, 100):
  498. with pytest.raises(OverflowError):
  499. ujson.encode(Nested())
  500. @pytest.mark.parametrize("val", [
  501. 3590016419, 2**31, 2**32, (2**32) - 1
  502. ])
  503. def test_decode_number_with_32bit_sign_bit(self, val):
  504. # Test that numbers that fit within 32 bits but would have the
  505. # sign bit set (2**31 <= x < 2**32) are decoded properly.
  506. doc = '{{"id": {val}}}'.format(val=val)
  507. assert ujson.decode(doc)["id"] == val
  508. def test_encode_big_escape(self):
  509. # Make sure no Exception is raised.
  510. for _ in range(10):
  511. base = '\u00e5'.encode("utf-8") if compat.PY3 else "\xc3\xa5"
  512. escape_input = base * 1024 * 1024 * 2
  513. ujson.encode(escape_input)
  514. def test_decode_big_escape(self):
  515. # Make sure no Exception is raised.
  516. for _ in range(10):
  517. base = '\u00e5'.encode("utf-8") if compat.PY3 else "\xc3\xa5"
  518. quote = compat.str_to_bytes("\"")
  519. escape_input = quote + (base * 1024 * 1024 * 2) + quote
  520. ujson.decode(escape_input)
  521. def test_to_dict(self):
  522. d = {u("key"): 31337}
  523. class DictTest(object):
  524. def toDict(self):
  525. return d
  526. o = DictTest()
  527. output = ujson.encode(o)
  528. dec = ujson.decode(output)
  529. assert dec == d
  530. def test_default_handler(self):
  531. class _TestObject(object):
  532. def __init__(self, val):
  533. self.val = val
  534. @property
  535. def recursive_attr(self):
  536. return _TestObject("recursive_attr")
  537. def __str__(self):
  538. return str(self.val)
  539. msg = "Maximum recursion level reached"
  540. with pytest.raises(OverflowError, match=msg):
  541. ujson.encode(_TestObject("foo"))
  542. assert '"foo"' == ujson.encode(_TestObject("foo"),
  543. default_handler=str)
  544. def my_handler(_):
  545. return "foobar"
  546. assert '"foobar"' == ujson.encode(_TestObject("foo"),
  547. default_handler=my_handler)
  548. def my_handler_raises(_):
  549. raise TypeError("I raise for anything")
  550. with pytest.raises(TypeError, match="I raise for anything"):
  551. ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)
  552. def my_int_handler(_):
  553. return 42
  554. assert ujson.decode(ujson.encode(_TestObject("foo"),
  555. default_handler=my_int_handler)) == 42
  556. def my_obj_handler(_):
  557. return datetime.datetime(2013, 2, 3)
  558. assert (ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))) ==
  559. ujson.decode(ujson.encode(_TestObject("foo"),
  560. default_handler=my_obj_handler)))
  561. obj_list = [_TestObject("foo"), _TestObject("bar")]
  562. assert (json.loads(json.dumps(obj_list, default=str)) ==
  563. ujson.decode(ujson.encode(obj_list, default_handler=str)))
  564. class TestNumpyJSONTests(object):
  565. @pytest.mark.parametrize("bool_input", [True, False])
  566. def test_bool(self, bool_input):
  567. b = np.bool(bool_input)
  568. assert ujson.decode(ujson.encode(b)) == b
  569. def test_bool_array(self):
  570. bool_array = np.array([
  571. True, False, True, True,
  572. False, True, False, False], dtype=np.bool)
  573. output = np.array(ujson.decode(
  574. ujson.encode(bool_array)), dtype=np.bool)
  575. tm.assert_numpy_array_equal(bool_array, output)
  576. def test_int(self, any_int_dtype):
  577. klass = np.dtype(any_int_dtype).type
  578. num = klass(1)
  579. assert klass(ujson.decode(ujson.encode(num))) == num
  580. def test_int_array(self, any_int_dtype):
  581. arr = np.arange(100, dtype=np.int)
  582. arr_input = arr.astype(any_int_dtype)
  583. arr_output = np.array(ujson.decode(ujson.encode(arr_input)),
  584. dtype=any_int_dtype)
  585. tm.assert_numpy_array_equal(arr_input, arr_output)
  586. def test_int_max(self, any_int_dtype):
  587. if any_int_dtype in ("int64", "uint64") and compat.is_platform_32bit():
  588. pytest.skip("Cannot test 64-bit integer on 32-bit platform")
  589. klass = np.dtype(any_int_dtype).type
  590. # uint64 max will always overflow,
  591. # as it's encoded to signed.
  592. if any_int_dtype == "uint64":
  593. num = np.iinfo("int64").max
  594. else:
  595. num = np.iinfo(any_int_dtype).max
  596. assert klass(ujson.decode(ujson.encode(num))) == num
  597. def test_float(self, float_dtype):
  598. klass = np.dtype(float_dtype).type
  599. num = klass(256.2013)
  600. assert klass(ujson.decode(ujson.encode(num))) == num
  601. def test_float_array(self, float_dtype):
  602. arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float)
  603. float_input = arr.astype(float_dtype)
  604. float_output = np.array(ujson.decode(
  605. ujson.encode(float_input, double_precision=15)),
  606. dtype=float_dtype)
  607. tm.assert_almost_equal(float_input, float_output)
  608. def test_float_max(self, float_dtype):
  609. klass = np.dtype(float_dtype).type
  610. num = klass(np.finfo(float_dtype).max / 10)
  611. tm.assert_almost_equal(klass(ujson.decode(
  612. ujson.encode(num, double_precision=15))), num)
  613. def test_array_basic(self):
  614. arr = np.arange(96)
  615. arr = arr.reshape((2, 2, 2, 2, 3, 2))
  616. tm.assert_numpy_array_equal(
  617. np.array(ujson.decode(ujson.encode(arr))), arr)
  618. tm.assert_numpy_array_equal(ujson.decode(
  619. ujson.encode(arr), numpy=True), arr)
  620. @pytest.mark.parametrize("shape", [
  621. (10, 10),
  622. (5, 5, 4),
  623. (100, 1),
  624. ])
  625. def test_array_reshaped(self, shape):
  626. arr = np.arange(100)
  627. arr = arr.reshape(shape)
  628. tm.assert_numpy_array_equal(
  629. np.array(ujson.decode(ujson.encode(arr))), arr)
  630. tm.assert_numpy_array_equal(ujson.decode(
  631. ujson.encode(arr), numpy=True), arr)
  632. def test_array_list(self):
  633. arr_list = ["a", list(), dict(), dict(), list(),
  634. 42, 97.8, ["a", "b"], {"key": "val"}]
  635. arr = np.array(arr_list)
  636. tm.assert_numpy_array_equal(
  637. np.array(ujson.decode(ujson.encode(arr))), arr)
  638. def test_array_float(self):
  639. dtype = np.float32
  640. arr = np.arange(100.202, 200.202, 1, dtype=dtype)
  641. arr = arr.reshape((5, 5, 4))
  642. arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype)
  643. tm.assert_almost_equal(arr, arr_out)
  644. arr_out = ujson.decode(ujson.encode(arr), numpy=True, dtype=dtype)
  645. tm.assert_almost_equal(arr, arr_out)
  646. def test_0d_array(self):
  647. with pytest.raises(TypeError):
  648. ujson.encode(np.array(1))
  649. @pytest.mark.parametrize("bad_input,exc_type,kwargs", [
  650. ([{}, []], ValueError, {}),
  651. ([42, None], TypeError, {}),
  652. ([["a"], 42], ValueError, {}),
  653. ([42, {}, "a"], TypeError, {}),
  654. ([42, ["a"], 42], ValueError, {}),
  655. (["a", "b", [], "c"], ValueError, {}),
  656. ([{"a": "b"}], ValueError, dict(labelled=True)),
  657. ({"a": {"b": {"c": 42}}}, ValueError, dict(labelled=True)),
  658. ([{"a": 42, "b": 23}, {"c": 17}], ValueError, dict(labelled=True))
  659. ])
  660. def test_array_numpy_except(self, bad_input, exc_type, kwargs):
  661. with pytest.raises(exc_type):
  662. ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs)
  663. def test_array_numpy_labelled(self):
  664. labelled_input = {"a": []}
  665. output = ujson.loads(ujson.dumps(labelled_input),
  666. numpy=True, labelled=True)
  667. assert (np.empty((1, 0)) == output[0]).all()
  668. assert (np.array(["a"]) == output[1]).all()
  669. assert output[2] is None
  670. labelled_input = [{"a": 42}]
  671. output = ujson.loads(ujson.dumps(labelled_input),
  672. numpy=True, labelled=True)
  673. assert (np.array([u("a")]) == output[2]).all()
  674. assert (np.array([42]) == output[0]).all()
  675. assert output[1] is None
  676. # see gh-10837: write out the dump explicitly
  677. # so there is no dependency on iteration order
  678. input_dumps = ('[{"a": 42, "b":31}, {"a": 24, "c": 99}, '
  679. '{"a": 2.4, "b": 78}]')
  680. output = ujson.loads(input_dumps, numpy=True, labelled=True)
  681. expected_vals = np.array(
  682. [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2))
  683. assert (expected_vals == output[0]).all()
  684. assert output[1] is None
  685. assert (np.array([u("a"), "b"]) == output[2]).all()
  686. input_dumps = ('{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, '
  687. '"3": {"a": 2.4, "b": 78}}')
  688. output = ujson.loads(input_dumps, numpy=True, labelled=True)
  689. expected_vals = np.array(
  690. [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2))
  691. assert (expected_vals == output[0]).all()
  692. assert (np.array(["1", "2", "3"]) == output[1]).all()
  693. assert (np.array(["a", "b"]) == output[2]).all()
  694. class TestPandasJSONTests(object):
  695. def test_dataframe(self, orient, numpy):
  696. if orient == "records" and numpy:
  697. pytest.skip("Not idiomatic pandas")
  698. df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
  699. "a", "b"], columns=["x", "y", "z"])
  700. encode_kwargs = {} if orient is None else dict(orient=orient)
  701. decode_kwargs = {} if numpy is None else dict(numpy=numpy)
  702. output = ujson.decode(ujson.encode(df, **encode_kwargs),
  703. **decode_kwargs)
  704. # Ensure proper DataFrame initialization.
  705. if orient == "split":
  706. dec = _clean_dict(output)
  707. output = DataFrame(**dec)
  708. else:
  709. output = DataFrame(output)
  710. # Corrections to enable DataFrame comparison.
  711. if orient == "values":
  712. df.columns = [0, 1, 2]
  713. df.index = [0, 1]
  714. elif orient == "records":
  715. df.index = [0, 1]
  716. elif orient == "index":
  717. df = df.transpose()
  718. tm.assert_frame_equal(output, df, check_dtype=False)
  719. def test_dataframe_nested(self, orient):
  720. df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
  721. "a", "b"], columns=["x", "y", "z"])
  722. nested = {"df1": df, "df2": df.copy()}
  723. kwargs = {} if orient is None else dict(orient=orient)
  724. exp = {"df1": ujson.decode(ujson.encode(df, **kwargs)),
  725. "df2": ujson.decode(ujson.encode(df, **kwargs))}
  726. assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
  727. def test_dataframe_numpy_labelled(self, orient):
  728. if orient in ("split", "values"):
  729. pytest.skip("Incompatible with labelled=True")
  730. df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[
  731. "a", "b"], columns=["x", "y", "z"], dtype=np.int)
  732. kwargs = {} if orient is None else dict(orient=orient)
  733. output = DataFrame(*ujson.decode(ujson.encode(df, **kwargs),
  734. numpy=True, labelled=True))
  735. if orient is None:
  736. df = df.T
  737. elif orient == "records":
  738. df.index = [0, 1]
  739. tm.assert_frame_equal(output, df)
  740. def test_series(self, orient, numpy):
  741. s = Series([10, 20, 30, 40, 50, 60], name="series",
  742. index=[6, 7, 8, 9, 10, 15]).sort_values()
  743. encode_kwargs = {} if orient is None else dict(orient=orient)
  744. decode_kwargs = {} if numpy is None else dict(numpy=numpy)
  745. output = ujson.decode(ujson.encode(s, **encode_kwargs),
  746. **decode_kwargs)
  747. if orient == "split":
  748. dec = _clean_dict(output)
  749. output = Series(**dec)
  750. else:
  751. output = Series(output)
  752. if orient in (None, "index"):
  753. s.name = None
  754. output = output.sort_values()
  755. s.index = ["6", "7", "8", "9", "10", "15"]
  756. elif orient in ("records", "values"):
  757. s.name = None
  758. s.index = [0, 1, 2, 3, 4, 5]
  759. tm.assert_series_equal(output, s, check_dtype=False)
  760. def test_series_nested(self, orient):
  761. s = Series([10, 20, 30, 40, 50, 60], name="series",
  762. index=[6, 7, 8, 9, 10, 15]).sort_values()
  763. nested = {"s1": s, "s2": s.copy()}
  764. kwargs = {} if orient is None else dict(orient=orient)
  765. exp = {"s1": ujson.decode(ujson.encode(s, **kwargs)),
  766. "s2": ujson.decode(ujson.encode(s, **kwargs))}
  767. assert ujson.decode(ujson.encode(nested, **kwargs)) == exp
  768. def test_index(self):
  769. i = Index([23, 45, 18, 98, 43, 11], name="index")
  770. # Column indexed.
  771. output = Index(ujson.decode(ujson.encode(i)), name="index")
  772. tm.assert_index_equal(i, output)
  773. output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index")
  774. tm.assert_index_equal(i, output)
  775. dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split")))
  776. output = Index(**dec)
  777. tm.assert_index_equal(i, output)
  778. assert i.name == output.name
  779. dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"),
  780. numpy=True))
  781. output = Index(**dec)
  782. tm.assert_index_equal(i, output)
  783. assert i.name == output.name
  784. output = Index(ujson.decode(ujson.encode(i, orient="values")),
  785. name="index")
  786. tm.assert_index_equal(i, output)
  787. output = Index(ujson.decode(ujson.encode(i, orient="values"),
  788. numpy=True), name="index")
  789. tm.assert_index_equal(i, output)
  790. output = Index(ujson.decode(ujson.encode(i, orient="records")),
  791. name="index")
  792. tm.assert_index_equal(i, output)
  793. output = Index(ujson.decode(ujson.encode(i, orient="records"),
  794. numpy=True), name="index")
  795. tm.assert_index_equal(i, output)
  796. output = Index(ujson.decode(ujson.encode(i, orient="index")),
  797. name="index")
  798. tm.assert_index_equal(i, output)
  799. output = Index(ujson.decode(ujson.encode(i, orient="index"),
  800. numpy=True), name="index")
  801. tm.assert_index_equal(i, output)
  802. def test_datetime_index(self):
  803. date_unit = "ns"
  804. rng = date_range("1/1/2000", periods=20)
  805. encoded = ujson.encode(rng, date_unit=date_unit)
  806. decoded = DatetimeIndex(np.array(ujson.decode(encoded)))
  807. tm.assert_index_equal(rng, decoded)
  808. ts = Series(np.random.randn(len(rng)), index=rng)
  809. decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit)))
  810. idx_values = decoded.index.values.astype(np.int64)
  811. decoded.index = DatetimeIndex(idx_values)
  812. tm.assert_series_equal(ts, decoded)
  813. @pytest.mark.parametrize("invalid_arr", [
  814. "[31337,]", # Trailing comma.
  815. "[,31337]", # Leading comma.
  816. "[]]", # Unmatched bracket.
  817. "[,]", # Only comma.
  818. ])
  819. def test_decode_invalid_array(self, invalid_arr):
  820. with pytest.raises(ValueError):
  821. ujson.decode(invalid_arr)
  822. @pytest.mark.parametrize("arr", [
  823. [], [31337]
  824. ])
  825. def test_decode_array(self, arr):
  826. assert arr == ujson.decode(str(arr))
  827. @pytest.mark.parametrize("extreme_num", [
  828. 9223372036854775807, -9223372036854775808
  829. ])
  830. def test_decode_extreme_numbers(self, extreme_num):
  831. assert extreme_num == ujson.decode(str(extreme_num))
  832. @pytest.mark.parametrize("too_extreme_num", [
  833. "9223372036854775808", "-90223372036854775809"
  834. ])
  835. def test_decode_too_extreme_numbers(self, too_extreme_num):
  836. with pytest.raises(ValueError):
  837. ujson.decode(too_extreme_num)
  838. def test_decode_with_trailing_whitespaces(self):
  839. assert {} == ujson.decode("{}\n\t ")
  840. def test_decode_with_trailing_non_whitespaces(self):
  841. with pytest.raises(ValueError):
  842. ujson.decode("{}\n\t a")
  843. def test_decode_array_with_big_int(self):
  844. with pytest.raises(ValueError):
  845. ujson.loads("[18446098363113800555]")
  846. @pytest.mark.parametrize("float_number", [
  847. 1.1234567893, 1.234567893, 1.34567893,
  848. 1.4567893, 1.567893, 1.67893,
  849. 1.7893, 1.893, 1.3,
  850. ])
  851. @pytest.mark.parametrize("sign", [-1, 1])
  852. def test_decode_floating_point(self, sign, float_number):
  853. float_number *= sign
  854. tm.assert_almost_equal(float_number,
  855. ujson.loads(str(float_number)),
  856. check_less_precise=15)
  857. def test_encode_big_set(self):
  858. s = set()
  859. for x in range(0, 100000):
  860. s.add(x)
  861. # Make sure no Exception is raised.
  862. ujson.encode(s)
  863. def test_encode_empty_set(self):
  864. assert "[]" == ujson.encode(set())
  865. def test_encode_set(self):
  866. s = {1, 2, 3, 4, 5, 6, 7, 8, 9}
  867. enc = ujson.encode(s)
  868. dec = ujson.decode(enc)
  869. for v in dec:
  870. assert v in s