12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274 |
- # -*- coding: utf-8 -*-
- # pylint: disable-msg=W0612,E1101
- from datetime import timedelta
- import json
- import os
- import numpy as np
- import pytest
- from pandas.compat import (
- OrderedDict, StringIO, is_platform_32bit, lrange, range)
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- DataFrame, DatetimeIndex, Series, Timestamp, compat, read_json)
- import pandas.util.testing as tm
- from pandas.util.testing import (
- assert_almost_equal, assert_frame_equal, assert_index_equal,
- assert_series_equal, ensure_clean, network)
- _seriesd = tm.getSeriesData()
- _tsd = tm.getTimeSeriesData()
- _frame = DataFrame(_seriesd)
- _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])
- _intframe = DataFrame({k: v.astype(np.int64)
- for k, v in compat.iteritems(_seriesd)})
- _tsframe = DataFrame(_tsd)
- _cat_frame = _frame.copy()
- cat = ['bah'] * 5 + ['bar'] * 5 + ['baz'] * \
- 5 + ['foo'] * (len(_cat_frame) - 15)
- _cat_frame.index = pd.CategoricalIndex(cat, name='E')
- _cat_frame['E'] = list(reversed(cat))
- _cat_frame['sort'] = np.arange(len(_cat_frame), dtype='int64')
- _mixed_frame = _frame.copy()
- class TestPandasContainer(object):
- @pytest.fixture(scope="function", autouse=True)
- def setup(self, datapath):
- self.dirpath = datapath("io", "json", "data")
- self.ts = tm.makeTimeSeries()
- self.ts.name = 'ts'
- self.series = tm.makeStringSeries()
- self.series.name = 'series'
- self.objSeries = tm.makeObjectSeries()
- self.objSeries.name = 'objects'
- self.empty_series = Series([], index=[])
- self.empty_frame = DataFrame({})
- self.frame = _frame.copy()
- self.frame2 = _frame2.copy()
- self.intframe = _intframe.copy()
- self.tsframe = _tsframe.copy()
- self.mixed_frame = _mixed_frame.copy()
- self.categorical = _cat_frame.copy()
- yield
- del self.dirpath
- del self.ts
- del self.series
- del self.objSeries
- del self.empty_series
- del self.empty_frame
- del self.frame
- del self.frame2
- del self.intframe
- del self.tsframe
- del self.mixed_frame
- def test_frame_double_encoded_labels(self):
- df = DataFrame([['a', 'b'], ['c', 'd']],
- index=['index " 1', 'index / 2'],
- columns=['a \\ b', 'y / z'])
- assert_frame_equal(df, read_json(df.to_json(orient='split'),
- orient='split'))
- assert_frame_equal(df, read_json(df.to_json(orient='columns'),
- orient='columns'))
- assert_frame_equal(df, read_json(df.to_json(orient='index'),
- orient='index'))
- df_unser = read_json(df.to_json(orient='records'), orient='records')
- assert_index_equal(df.columns, df_unser.columns)
- tm.assert_numpy_array_equal(df.values, df_unser.values)
- def test_frame_non_unique_index(self):
- df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
- columns=['x', 'y'])
- msg = "DataFrame index must be unique for orient='index'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient='index')
- msg = "DataFrame index must be unique for orient='columns'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient='columns')
- assert_frame_equal(df, read_json(df.to_json(orient='split'),
- orient='split'))
- unser = read_json(df.to_json(orient='records'), orient='records')
- tm.assert_index_equal(df.columns, unser.columns)
- tm.assert_almost_equal(df.values, unser.values)
- unser = read_json(df.to_json(orient='values'), orient='values')
- tm.assert_numpy_array_equal(df.values, unser.values)
- def test_frame_non_unique_columns(self):
- df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
- columns=['x', 'x'])
- msg = "DataFrame columns must be unique for orient='index'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient='index')
- msg = "DataFrame columns must be unique for orient='columns'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient='columns')
- msg = "DataFrame columns must be unique for orient='records'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient='records')
- assert_frame_equal(df, read_json(df.to_json(orient='split'),
- orient='split', dtype=False))
- unser = read_json(df.to_json(orient='values'), orient='values')
- tm.assert_numpy_array_equal(df.values, unser.values)
- # GH4377; duplicate columns not processing correctly
- df = DataFrame([['a', 'b'], ['c', 'd']], index=[
- 1, 2], columns=['x', 'y'])
- result = read_json(df.to_json(orient='split'), orient='split')
- assert_frame_equal(result, df)
- def _check(df):
- result = read_json(df.to_json(orient='split'), orient='split',
- convert_dates=['x'])
- assert_frame_equal(result, df)
- for o in [[['a', 'b'], ['c', 'd']],
- [[1.5, 2.5], [3.5, 4.5]],
- [[1, 2.5], [3, 4.5]],
- [[Timestamp('20130101'), 3.5],
- [Timestamp('20130102'), 4.5]]]:
- _check(DataFrame(o, index=[1, 2], columns=['x', 'x']))
- def test_frame_from_json_to_json(self):
- def _check_orient(df, orient, dtype=None, numpy=False,
- convert_axes=True, check_dtype=True, raise_ok=None,
- sort=None, check_index_type=True,
- check_column_type=True, check_numpy_dtype=False):
- if sort is not None:
- df = df.sort_values(sort)
- else:
- df = df.sort_index()
- # if we are not unique, then check that we are raising ValueError
- # for the appropriate orients
- if not df.index.is_unique and orient in ['index', 'columns']:
- msg = ("DataFrame index must be unique for orient='{}'"
- .format(orient))
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient)
- return
- if (not df.columns.is_unique and
- orient in ['index', 'columns', 'records']):
- # TODO: not executed. fix this.
- with pytest.raises(ValueError, match='ksjkajksfjksjfkjs'):
- df.to_json(orient=orient)
- return
- dfjson = df.to_json(orient=orient)
- try:
- unser = read_json(dfjson, orient=orient, dtype=dtype,
- numpy=numpy, convert_axes=convert_axes)
- except Exception as detail:
- if raise_ok is not None:
- if isinstance(detail, raise_ok):
- return
- raise
- if sort is not None and sort in unser.columns:
- unser = unser.sort_values(sort)
- else:
- unser = unser.sort_index()
- if dtype is False:
- check_dtype = False
- if not convert_axes and df.index.dtype.type == np.datetime64:
- unser.index = DatetimeIndex(
- unser.index.values.astype('i8') * 1e6)
- if orient == "records":
- # index is not captured in this orientation
- tm.assert_almost_equal(df.values, unser.values,
- check_dtype=check_numpy_dtype)
- tm.assert_index_equal(df.columns, unser.columns,
- exact=check_column_type)
- elif orient == "values":
- # index and cols are not captured in this orientation
- if numpy is True and df.shape == (0, 0):
- assert unser.shape[0] == 0
- else:
- tm.assert_almost_equal(df.values, unser.values,
- check_dtype=check_numpy_dtype)
- elif orient == "split":
- # index and col labels might not be strings
- unser.index = [str(i) for i in unser.index]
- unser.columns = [str(i) for i in unser.columns]
- if sort is None:
- unser = unser.sort_index()
- tm.assert_almost_equal(df.values, unser.values,
- check_dtype=check_numpy_dtype)
- else:
- if convert_axes:
- tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
- check_index_type=check_index_type,
- check_column_type=check_column_type)
- else:
- tm.assert_frame_equal(df, unser, check_less_precise=False,
- check_dtype=check_dtype)
- def _check_all_orients(df, dtype=None, convert_axes=True,
- raise_ok=None, sort=None, check_index_type=True,
- check_column_type=True):
- # numpy=False
- if convert_axes:
- _check_orient(df, "columns", dtype=dtype, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "records", dtype=dtype, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "split", dtype=dtype, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "index", dtype=dtype, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "values", dtype=dtype, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "columns", dtype=dtype,
- convert_axes=False, sort=sort)
- _check_orient(df, "records", dtype=dtype,
- convert_axes=False, sort=sort)
- _check_orient(df, "split", dtype=dtype,
- convert_axes=False, sort=sort)
- _check_orient(df, "index", dtype=dtype,
- convert_axes=False, sort=sort)
- _check_orient(df, "values", dtype=dtype,
- convert_axes=False, sort=sort)
- # numpy=True and raise_ok might be not None, so ignore the error
- if convert_axes:
- _check_orient(df, "columns", dtype=dtype, numpy=True,
- raise_ok=raise_ok, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "records", dtype=dtype, numpy=True,
- raise_ok=raise_ok, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "split", dtype=dtype, numpy=True,
- raise_ok=raise_ok, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "index", dtype=dtype, numpy=True,
- raise_ok=raise_ok, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "values", dtype=dtype, numpy=True,
- raise_ok=raise_ok, sort=sort,
- check_index_type=False, check_column_type=False)
- _check_orient(df, "columns", dtype=dtype, numpy=True,
- convert_axes=False, raise_ok=raise_ok, sort=sort)
- _check_orient(df, "records", dtype=dtype, numpy=True,
- convert_axes=False, raise_ok=raise_ok, sort=sort)
- _check_orient(df, "split", dtype=dtype, numpy=True,
- convert_axes=False, raise_ok=raise_ok, sort=sort)
- _check_orient(df, "index", dtype=dtype, numpy=True,
- convert_axes=False, raise_ok=raise_ok, sort=sort)
- _check_orient(df, "values", dtype=dtype, numpy=True,
- convert_axes=False, raise_ok=raise_ok, sort=sort)
- # basic
- _check_all_orients(self.frame)
- assert self.frame.to_json() == self.frame.to_json(orient="columns")
- _check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
- _check_all_orients(self.intframe, dtype=False)
- # big one
- # index and columns are strings as all unserialised JSON object keys
- # are assumed to be strings
- biggie = DataFrame(np.zeros((200, 4)),
- columns=[str(i) for i in range(4)],
- index=[str(i) for i in range(200)])
- _check_all_orients(biggie, dtype=False, convert_axes=False)
- # dtypes
- _check_all_orients(DataFrame(biggie, dtype=np.float64),
- dtype=np.float64, convert_axes=False)
- _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int,
- convert_axes=False)
- _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3',
- convert_axes=False, raise_ok=ValueError)
- # categorical
- _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError)
- # empty
- _check_all_orients(self.empty_frame, check_index_type=False,
- check_column_type=False)
- # time series data
- _check_all_orients(self.tsframe)
- # mixed data
- index = pd.Index(['a', 'b', 'c', 'd', 'e'])
- data = {'A': [0., 1., 2., 3., 4.],
- 'B': [0., 1., 0., 1., 0.],
- 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
- 'D': [True, False, True, False, True]}
- df = DataFrame(data=data, index=index)
- _check_orient(df, "split", check_dtype=False)
- _check_orient(df, "records", check_dtype=False)
- _check_orient(df, "values", check_dtype=False)
- _check_orient(df, "columns", check_dtype=False)
- # index oriented is problematic as it is read back in in a transposed
- # state, so the columns are interpreted as having mixed data and
- # given object dtypes.
- # force everything to have object dtype beforehand
- _check_orient(df.transpose().transpose(), "index", dtype=False)
- def test_frame_from_json_bad_data(self):
- with pytest.raises(ValueError, match='Expected object or value'):
- read_json(StringIO('{"key":b:a:d}'))
- # too few indices
- json = StringIO('{"columns":["A","B"],'
- '"index":["2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
- msg = r"Shape of passed values is \(3, 2\), indices imply \(2, 2\)"
- with pytest.raises(ValueError, match=msg):
- read_json(json, orient="split")
- # too many columns
- json = StringIO('{"columns":["A","B","C"],'
- '"index":["1","2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
- msg = "3 columns passed, passed data had 2 columns"
- with pytest.raises(AssertionError, match=msg):
- read_json(json, orient="split")
- # bad key
- json = StringIO('{"badkey":["A","B"],'
- '"index":["2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}')
- with pytest.raises(ValueError, match=r"unexpected key\(s\): badkey"):
- read_json(json, orient="split")
- def test_frame_from_json_nones(self):
- df = DataFrame([[1, 2], [4, 5, 6]])
- unser = read_json(df.to_json())
- assert np.isnan(unser[2][0])
- df = DataFrame([['1', '2'], ['4', '5', '6']])
- unser = read_json(df.to_json())
- assert np.isnan(unser[2][0])
- unser = read_json(df.to_json(), dtype=False)
- assert unser[2][0] is None
- unser = read_json(df.to_json(), convert_axes=False, dtype=False)
- assert unser['2']['0'] is None
- unser = read_json(df.to_json(), numpy=False)
- assert np.isnan(unser[2][0])
- unser = read_json(df.to_json(), numpy=False, dtype=False)
- assert unser[2][0] is None
- unser = read_json(df.to_json(), numpy=False,
- convert_axes=False, dtype=False)
- assert unser['2']['0'] is None
- # infinities get mapped to nulls which get mapped to NaNs during
- # deserialisation
- df = DataFrame([[1, 2], [4, 5, 6]])
- df.loc[0, 2] = np.inf
- unser = read_json(df.to_json())
- assert np.isnan(unser[2][0])
- unser = read_json(df.to_json(), dtype=False)
- assert np.isnan(unser[2][0])
- df.loc[0, 2] = np.NINF
- unser = read_json(df.to_json())
- assert np.isnan(unser[2][0])
- unser = read_json(df.to_json(), dtype=False)
- assert np.isnan(unser[2][0])
- @pytest.mark.skipif(is_platform_32bit(),
- reason="not compliant on 32-bit, xref #15865")
- def test_frame_to_json_float_precision(self):
- df = pd.DataFrame([dict(a_float=0.95)])
- encoded = df.to_json(double_precision=1)
- assert encoded == '{"a_float":{"0":1.0}}'
- df = pd.DataFrame([dict(a_float=1.95)])
- encoded = df.to_json(double_precision=1)
- assert encoded == '{"a_float":{"0":2.0}}'
- df = pd.DataFrame([dict(a_float=-1.95)])
- encoded = df.to_json(double_precision=1)
- assert encoded == '{"a_float":{"0":-2.0}}'
- df = pd.DataFrame([dict(a_float=0.995)])
- encoded = df.to_json(double_precision=2)
- assert encoded == '{"a_float":{"0":1.0}}'
- df = pd.DataFrame([dict(a_float=0.9995)])
- encoded = df.to_json(double_precision=3)
- assert encoded == '{"a_float":{"0":1.0}}'
- df = pd.DataFrame([dict(a_float=0.99999999999999944)])
- encoded = df.to_json(double_precision=15)
- assert encoded == '{"a_float":{"0":1.0}}'
- def test_frame_to_json_except(self):
- df = DataFrame([1, 2, 3])
- msg = "Invalid value 'garbage' for option 'orient'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient="garbage")
- def test_frame_empty(self):
- df = DataFrame(columns=['jim', 'joe'])
- assert not df._is_mixed_type
- assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
- check_index_type=False)
- # GH 7445
- result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
- expected = '{"test":{}}'
- assert result == expected
- def test_frame_empty_mixedtype(self):
- # mixed type
- df = DataFrame(columns=['jim', 'joe'])
- df['joe'] = df['joe'].astype('i8')
- assert df._is_mixed_type
- assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
- check_index_type=False)
- def test_frame_mixedtype_orient(self): # GH10289
- vals = [[10, 1, 'foo', .1, .01],
- [20, 2, 'bar', .2, .02],
- [30, 3, 'baz', .3, .03],
- [40, 4, 'qux', .4, .04]]
- df = DataFrame(vals, index=list('abcd'),
- columns=['1st', '2nd', '3rd', '4th', '5th'])
- assert df._is_mixed_type
- right = df.copy()
- for orient in ['split', 'index', 'columns']:
- inp = df.to_json(orient=orient)
- left = read_json(inp, orient=orient, convert_axes=False)
- assert_frame_equal(left, right)
- right.index = np.arange(len(df))
- inp = df.to_json(orient='records')
- left = read_json(inp, orient='records', convert_axes=False)
- assert_frame_equal(left, right)
- right.columns = np.arange(df.shape[1])
- inp = df.to_json(orient='values')
- left = read_json(inp, orient='values', convert_axes=False)
- assert_frame_equal(left, right)
- def test_v12_compat(self):
- df = DataFrame(
- [[1.56808523, 0.65727391, 1.81021139, -0.17251653],
- [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
- [1.51493992, 0.11805825, 1.629455, -1.31506612],
- [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
- [0.05951614, -2.69652057, 1.28163262, 0.34703478]],
- columns=['A', 'B', 'C', 'D'],
- index=pd.date_range('2000-01-03', '2000-01-07'))
- df['date'] = pd.Timestamp('19920106 18:21:32.12')
- df.iloc[3, df.columns.get_loc('date')] = pd.Timestamp('20130101')
- df['modified'] = df['date']
- df.iloc[1, df.columns.get_loc('modified')] = pd.NaT
- v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
- df_unser = pd.read_json(v12_json)
- assert_frame_equal(df, df_unser)
- df_iso = df.drop(['modified'], axis=1)
- v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
- df_unser_iso = pd.read_json(v12_iso_json)
- assert_frame_equal(df_iso, df_unser_iso)
- def test_blocks_compat_GH9037(self):
- index = pd.date_range('20000101', periods=10, freq='H')
- df_mixed = DataFrame(OrderedDict(
- float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
- -0.60316077, 0.24653374, 0.28668979, -2.51969012,
- 0.95748401, -1.02970536],
- int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
- 40314334, 21290235, 4991321, 41903419, 16008365],
- str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
- 'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
- float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685,
- -0.48217572, 0.86229683, 1.08935819, 0.93898739,
- -0.03030452, 1.43366348],
- str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
- '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
- int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
- 34193846, 10561746, 24867120, 76131025]
- ), index=index)
- # JSON deserialisation always creates unicode strings
- df_mixed.columns = df_mixed.columns.astype('unicode')
- df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
- orient='split')
- assert_frame_equal(df_mixed, df_roundtrip,
- check_index_type=True,
- check_column_type=True,
- check_frame_type=True,
- by_blocks=True,
- check_exact=True)
- def test_frame_nonprintable_bytes(self):
- # GH14256: failing column caused segfaults, if it is not the last one
- class BinaryThing(object):
- def __init__(self, hexed):
- self.hexed = hexed
- if compat.PY2:
- self.binary = hexed.decode('hex')
- else:
- self.binary = bytes.fromhex(hexed)
- def __str__(self):
- return self.hexed
- hexed = '574b4454ba8c5eb4f98a8f45'
- binthing = BinaryThing(hexed)
- # verify the proper conversion of printable content
- df_printable = DataFrame({'A': [binthing.hexed]})
- assert df_printable.to_json() == \
- '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
- # check if non-printable content throws appropriate Exception
- df_nonprintable = DataFrame({'A': [binthing]})
- msg = "Unsupported UTF-8 sequence length when encoding string"
- with pytest.raises(OverflowError, match=msg):
- df_nonprintable.to_json()
- # the same with multiple columns threw segfaults
- df_mixed = DataFrame({'A': [binthing], 'B': [1]},
- columns=['A', 'B'])
- with pytest.raises(OverflowError):
- df_mixed.to_json()
- # default_handler should resolve exceptions for non-string types
- assert df_nonprintable.to_json(default_handler=str) == \
- '{{"A":{{"0":"{hex}"}}}}'.format(hex=hexed)
- assert df_mixed.to_json(default_handler=str) == \
- '{{"A":{{"0":"{hex}"}},"B":{{"0":1}}}}'.format(hex=hexed)
- def test_label_overflow(self):
- # GH14256: buffer length not checked when writing label
- df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]})
- assert df.to_json() == \
- '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
- bar=('bar' * 100000))
- def test_series_non_unique_index(self):
- s = Series(['a', 'b'], index=[1, 1])
- msg = "Series index must be unique for orient='index'"
- with pytest.raises(ValueError, match=msg):
- s.to_json(orient='index')
- assert_series_equal(s, read_json(s.to_json(orient='split'),
- orient='split', typ='series'))
- unser = read_json(s.to_json(orient='records'),
- orient='records', typ='series')
- tm.assert_numpy_array_equal(s.values, unser.values)
- def test_series_from_json_to_json(self):
- def _check_orient(series, orient, dtype=None, numpy=False,
- check_index_type=True):
- series = series.sort_index()
- unser = read_json(series.to_json(orient=orient),
- typ='series', orient=orient, numpy=numpy,
- dtype=dtype)
- unser = unser.sort_index()
- if orient == "records" or orient == "values":
- assert_almost_equal(series.values, unser.values)
- else:
- if orient == "split":
- assert_series_equal(series, unser,
- check_index_type=check_index_type)
- else:
- assert_series_equal(series, unser, check_names=False,
- check_index_type=check_index_type)
- def _check_all_orients(series, dtype=None, check_index_type=True):
- _check_orient(series, "columns", dtype=dtype,
- check_index_type=check_index_type)
- _check_orient(series, "records", dtype=dtype,
- check_index_type=check_index_type)
- _check_orient(series, "split", dtype=dtype,
- check_index_type=check_index_type)
- _check_orient(series, "index", dtype=dtype,
- check_index_type=check_index_type)
- _check_orient(series, "values", dtype=dtype)
- _check_orient(series, "columns", dtype=dtype, numpy=True,
- check_index_type=check_index_type)
- _check_orient(series, "records", dtype=dtype, numpy=True,
- check_index_type=check_index_type)
- _check_orient(series, "split", dtype=dtype, numpy=True,
- check_index_type=check_index_type)
- _check_orient(series, "index", dtype=dtype, numpy=True,
- check_index_type=check_index_type)
- _check_orient(series, "values", dtype=dtype, numpy=True,
- check_index_type=check_index_type)
- # basic
- _check_all_orients(self.series)
- assert self.series.to_json() == self.series.to_json(orient="index")
- objSeries = Series([str(d) for d in self.objSeries],
- index=self.objSeries.index,
- name=self.objSeries.name)
- _check_all_orients(objSeries, dtype=False)
- # empty_series has empty index with object dtype
- # which cannot be revert
- assert self.empty_series.index.dtype == np.object_
- _check_all_orients(self.empty_series, check_index_type=False)
- _check_all_orients(self.ts)
- # dtype
- s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
- _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
- _check_all_orients(Series(s, dtype=np.int), dtype=np.int)
- def test_series_to_json_except(self):
- s = Series([1, 2, 3])
- msg = "Invalid value 'garbage' for option 'orient'"
- with pytest.raises(ValueError, match=msg):
- s.to_json(orient="garbage")
- def test_series_from_json_precise_float(self):
- s = Series([4.56, 4.56, 4.56])
- result = read_json(s.to_json(), typ='series', precise_float=True)
- assert_series_equal(result, s, check_index_type=False)
- def test_series_with_dtype(self):
- # GH 21986
- s = Series([4.56, 4.56, 4.56])
- result = read_json(s.to_json(), typ='series', dtype=np.int64)
- expected = Series([4] * 3)
- assert_series_equal(result, expected)
- def test_frame_from_json_precise_float(self):
- df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
- result = read_json(df.to_json(), precise_float=True)
- assert_frame_equal(result, df, check_index_type=False,
- check_column_type=False)
- def test_typ(self):
- s = Series(lrange(6), index=['a', 'b', 'c',
- 'd', 'e', 'f'], dtype='int64')
- result = read_json(s.to_json(), typ=None)
- assert_series_equal(result, s)
- def test_reconstruction_index(self):
- df = DataFrame([[1, 2, 3], [4, 5, 6]])
- result = read_json(df.to_json())
- assert_frame_equal(result, df)
- df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
- result = read_json(df.to_json())
- assert_frame_equal(result, df)
- def test_path(self):
- with ensure_clean('test.json') as path:
- for df in [self.frame, self.frame2, self.intframe, self.tsframe,
- self.mixed_frame]:
- df.to_json(path)
- read_json(path)
- def test_axis_dates(self):
- # frame
- json = self.tsframe.to_json()
- result = read_json(json)
- assert_frame_equal(result, self.tsframe)
- # series
- json = self.ts.to_json()
- result = read_json(json, typ='series')
- assert_series_equal(result, self.ts, check_names=False)
- assert result.name is None
- def test_convert_dates(self):
- # frame
- df = self.tsframe.copy()
- df['date'] = Timestamp('20130101')
- json = df.to_json()
- result = read_json(json)
- assert_frame_equal(result, df)
- df['foo'] = 1.
- json = df.to_json(date_unit='ns')
- result = read_json(json, convert_dates=False)
- expected = df.copy()
- expected['date'] = expected['date'].values.view('i8')
- expected['foo'] = expected['foo'].astype('int64')
- assert_frame_equal(result, expected)
- # series
- ts = Series(Timestamp('20130101'), index=self.ts.index)
- json = ts.to_json()
- result = read_json(json, typ='series')
- assert_series_equal(result, ts)
- def test_convert_dates_infer(self):
- # GH10747
- from pandas.io.json import dumps
- infer_words = ['trade_time', 'date', 'datetime', 'sold_at',
- 'modified', 'timestamp', 'timestamps']
- for infer_word in infer_words:
- data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}]
- expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]],
- columns=['id', infer_word])
- result = read_json(dumps(data))[['id', infer_word]]
- assert_frame_equal(result, expected)
- def test_date_format_frame(self):
- df = self.tsframe.copy()
- def test_w_date(date, date_unit=None):
- df['date'] = Timestamp(date)
- df.iloc[1, df.columns.get_loc('date')] = pd.NaT
- df.iloc[5, df.columns.get_loc('date')] = pd.NaT
- if date_unit:
- json = df.to_json(date_format='iso', date_unit=date_unit)
- else:
- json = df.to_json(date_format='iso')
- result = read_json(json)
- assert_frame_equal(result, df)
- test_w_date('20130101 20:43:42.123')
- test_w_date('20130101 20:43:42', date_unit='s')
- test_w_date('20130101 20:43:42.123', date_unit='ms')
- test_w_date('20130101 20:43:42.123456', date_unit='us')
- test_w_date('20130101 20:43:42.123456789', date_unit='ns')
- msg = "Invalid value 'foo' for option 'date_unit'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(date_format='iso', date_unit='foo')
- def test_date_format_series(self):
- def test_w_date(date, date_unit=None):
- ts = Series(Timestamp(date), index=self.ts.index)
- ts.iloc[1] = pd.NaT
- ts.iloc[5] = pd.NaT
- if date_unit:
- json = ts.to_json(date_format='iso', date_unit=date_unit)
- else:
- json = ts.to_json(date_format='iso')
- result = read_json(json, typ='series')
- assert_series_equal(result, ts)
- test_w_date('20130101 20:43:42.123')
- test_w_date('20130101 20:43:42', date_unit='s')
- test_w_date('20130101 20:43:42.123', date_unit='ms')
- test_w_date('20130101 20:43:42.123456', date_unit='us')
- test_w_date('20130101 20:43:42.123456789', date_unit='ns')
- ts = Series(Timestamp('20130101 20:43:42.123'), index=self.ts.index)
- msg = "Invalid value 'foo' for option 'date_unit'"
- with pytest.raises(ValueError, match=msg):
- ts.to_json(date_format='iso', date_unit='foo')
- def test_date_unit(self):
- df = self.tsframe.copy()
- df['date'] = Timestamp('20130101 20:43:42')
- dl = df.columns.get_loc('date')
- df.iloc[1, dl] = Timestamp('19710101 20:43:42')
- df.iloc[2, dl] = Timestamp('21460101 20:43:42')
- df.iloc[4, dl] = pd.NaT
- for unit in ('s', 'ms', 'us', 'ns'):
- json = df.to_json(date_format='epoch', date_unit=unit)
- # force date unit
- result = read_json(json, date_unit=unit)
- assert_frame_equal(result, df)
- # detect date unit
- result = read_json(json, date_unit=None)
- assert_frame_equal(result, df)
- def test_weird_nested_json(self):
- # this used to core dump the parser
- s = r'''{
- "status": "success",
- "data": {
- "posts": [
- {
- "id": 1,
- "title": "A blog post",
- "body": "Some useful content"
- },
- {
- "id": 2,
- "title": "Another blog post",
- "body": "More content"
- }
- ]
- }
- }'''
- read_json(s)
- def test_doc_example(self):
- dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
- dfj2['date'] = Timestamp('20130101')
- dfj2['ints'] = lrange(5)
- dfj2['bools'] = True
- dfj2.index = pd.date_range('20130101', periods=5)
- json = dfj2.to_json()
- result = read_json(json, dtype={'ints': np.int64, 'bools': np.bool_})
- assert_frame_equal(result, result)
- def test_misc_example(self):
- # parsing unordered input fails
- result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- error_msg = """DataFrame\\.index are different
- DataFrame\\.index values are different \\(100\\.0 %\\)
- \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\)
- \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)"""
- with pytest.raises(AssertionError, match=error_msg):
- assert_frame_equal(result, expected, check_index_type=False)
- result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]')
- expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- assert_frame_equal(result, expected)
- @network
- @pytest.mark.single
- def test_round_trip_exception_(self):
- # GH 3867
- csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv'
- df = pd.read_csv(csv)
- s = df.to_json()
- result = pd.read_json(s)
- assert_frame_equal(result.reindex(
- index=df.index, columns=df.columns), df)
- @network
- @pytest.mark.single
- def test_url(self):
- url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa
- result = read_json(url, convert_dates=True)
- for c in ['created_at', 'closed_at', 'updated_at']:
- assert result[c].dtype == 'datetime64[ns]'
- def test_timedelta(self):
- converter = lambda x: pd.to_timedelta(x, unit='ms')
- s = Series([timedelta(23), timedelta(seconds=5)])
- assert s.dtype == 'timedelta64[ns]'
- result = pd.read_json(s.to_json(), typ='series').apply(converter)
- assert_series_equal(result, s)
- s = Series([timedelta(23), timedelta(seconds=5)],
- index=pd.Index([0, 1]))
- assert s.dtype == 'timedelta64[ns]'
- result = pd.read_json(s.to_json(), typ='series').apply(converter)
- assert_series_equal(result, s)
- frame = DataFrame([timedelta(23), timedelta(seconds=5)])
- assert frame[0].dtype == 'timedelta64[ns]'
- assert_frame_equal(frame, pd.read_json(frame.to_json())
- .apply(converter))
- frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
- 'b': [1, 2],
- 'c': pd.date_range(start='20130101', periods=2)})
- result = pd.read_json(frame.to_json(date_unit='ns'))
- result['a'] = pd.to_timedelta(result.a, unit='ns')
- result['c'] = pd.to_datetime(result.c)
- assert_frame_equal(frame, result)
- def test_mixed_timedelta_datetime(self):
- frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
- dtype=object)
- expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
- pd.Timestamp(frame.a[1]).value]})
- result = pd.read_json(frame.to_json(date_unit='ns'),
- dtype={'a': 'int64'})
- assert_frame_equal(result, expected, check_index_type=False)
- def test_default_handler(self):
- value = object()
- frame = DataFrame({'a': [7, value]})
- expected = DataFrame({'a': [7, str(value)]})
- result = pd.read_json(frame.to_json(default_handler=str))
- assert_frame_equal(expected, result, check_index_type=False)
- def test_default_handler_indirect(self):
- from pandas.io.json import dumps
- def default(obj):
- if isinstance(obj, complex):
- return [('mathjs', 'Complex'),
- ('re', obj.real),
- ('im', obj.imag)]
- return str(obj)
- df_list = [9, DataFrame({'a': [1, 'STR', complex(4, -5)],
- 'b': [float('nan'), None, 'N/A']},
- columns=['a', 'b'])]
- expected = ('[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
- '["re",4.0],["im",-5.0]],"N\\/A"]]]')
- assert dumps(df_list, default_handler=default,
- orient="values") == expected
- def test_default_handler_numpy_unsupported_dtype(self):
- # GH12554 to_json raises 'Unhandled numpy dtype 15'
- df = DataFrame({'a': [1, 2.3, complex(4, -5)],
- 'b': [float('nan'), None, complex(1.2, 0)]},
- columns=['a', 'b'])
- expected = ('[["(1+0j)","(nan+0j)"],'
- '["(2.3+0j)","(nan+0j)"],'
- '["(4-5j)","(1.2+0j)"]]')
- assert df.to_json(default_handler=str, orient="values") == expected
- def test_default_handler_raises(self):
- msg = "raisin"
- def my_handler_raises(obj):
- raise TypeError(msg)
- with pytest.raises(TypeError, match=msg):
- DataFrame({'a': [1, 2, object()]}).to_json(
- default_handler=my_handler_raises)
- with pytest.raises(TypeError, match=msg):
- DataFrame({'a': [1, 2, complex(4, -5)]}).to_json(
- default_handler=my_handler_raises)
- def test_categorical(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
- df["B"] = df["A"]
- expected = df.to_json()
- df["B"] = df["A"].astype('category')
- assert expected == df.to_json()
- s = df["A"]
- sc = df["B"]
- assert s.to_json() == sc.to_json()
- def test_datetime_tz(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- tz_range = pd.date_range('20130101', periods=3, tz='US/Eastern')
- tz_naive = tz_range.tz_convert('utc').tz_localize(None)
- df = DataFrame({
- 'A': tz_range,
- 'B': pd.date_range('20130101', periods=3)})
- df_naive = df.copy()
- df_naive['A'] = tz_naive
- expected = df_naive.to_json()
- assert expected == df.to_json()
- stz = Series(tz_range)
- s_naive = Series(tz_naive)
- assert stz.to_json() == s_naive.to_json()
- def test_sparse(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- df = pd.DataFrame(np.random.randn(10, 4))
- df.loc[:8] = np.nan
- sdf = df.to_sparse()
- expected = df.to_json()
- assert expected == sdf.to_json()
- s = pd.Series(np.random.randn(10))
- s.loc[:8] = np.nan
- ss = s.to_sparse()
- expected = s.to_json()
- assert expected == ss.to_json()
- def test_tz_is_utc(self):
- from pandas.io.json import dumps
- exp = '"2013-01-10T05:00:00.000Z"'
- ts = Timestamp('2013-01-10 05:00:00Z')
- assert dumps(ts, iso_dates=True) == exp
- dt = ts.to_pydatetime()
- assert dumps(dt, iso_dates=True) == exp
- ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern')
- assert dumps(ts, iso_dates=True) == exp
- dt = ts.to_pydatetime()
- assert dumps(dt, iso_dates=True) == exp
- ts = Timestamp('2013-01-10 00:00:00-0500')
- assert dumps(ts, iso_dates=True) == exp
- dt = ts.to_pydatetime()
- assert dumps(dt, iso_dates=True) == exp
- def test_tz_range_is_utc(self):
- from pandas.io.json import dumps
- exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
- dfexp = ('{"DT":{'
- '"0":"2013-01-01T05:00:00.000Z",'
- '"1":"2013-01-02T05:00:00.000Z"}}')
- tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2)
- assert dumps(tz_range, iso_dates=True) == exp
- dti = pd.DatetimeIndex(tz_range)
- assert dumps(dti, iso_dates=True) == exp
- df = DataFrame({'DT': dti})
- result = dumps(df, iso_dates=True)
- assert result == dfexp
- tz_range = pd.date_range('2013-01-01 00:00:00', periods=2,
- tz='US/Eastern')
- assert dumps(tz_range, iso_dates=True) == exp
- dti = pd.DatetimeIndex(tz_range)
- assert dumps(dti, iso_dates=True) == exp
- df = DataFrame({'DT': dti})
- assert dumps(df, iso_dates=True) == dfexp
- tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2)
- assert dumps(tz_range, iso_dates=True) == exp
- dti = pd.DatetimeIndex(tz_range)
- assert dumps(dti, iso_dates=True) == exp
- df = DataFrame({'DT': dti})
- assert dumps(df, iso_dates=True) == dfexp
- def test_read_inline_jsonl(self):
- # GH9180
- result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- assert_frame_equal(result, expected)
- @td.skip_if_not_us_locale
- def test_read_s3_jsonl(self, s3_resource):
- # GH17200
- result = read_json('s3n://pandas-test/items.jsonl', lines=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- assert_frame_equal(result, expected)
- def test_read_local_jsonl(self):
- # GH17200
- with ensure_clean('tmp_items.json') as path:
- with open(path, 'w') as infile:
- infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
- result = read_json(path, lines=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- assert_frame_equal(result, expected)
- def test_read_jsonl_unicode_chars(self):
- # GH15132: non-ascii unicode characters
- # \u201d == RIGHT DOUBLE QUOTATION MARK
- # simulate file handle
- json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
- json = StringIO(json)
- result = read_json(json, lines=True)
- expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
- columns=['a', 'b'])
- assert_frame_equal(result, expected)
- # simulate string
- json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
- result = read_json(json, lines=True)
- expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
- columns=['a', 'b'])
- assert_frame_equal(result, expected)
- def test_read_json_large_numbers(self):
- # GH18842
- json = '{"articleId": "1404366058080022500245"}'
- json = StringIO(json)
- result = read_json(json, typ="series")
- expected = Series(1.404366e+21, index=['articleId'])
- assert_series_equal(result, expected)
- json = '{"0": {"articleId": "1404366058080022500245"}}'
- json = StringIO(json)
- result = read_json(json)
- expected = DataFrame(1.404366e+21, index=['articleId'], columns=[0])
- assert_frame_equal(result, expected)
- def test_to_jsonl(self):
- # GH9180
- df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
- result = df.to_json(orient="records", lines=True)
- expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
- assert result == expected
- df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
- result = df.to_json(orient="records", lines=True)
- expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
- assert result == expected
- assert_frame_equal(pd.read_json(result, lines=True), df)
- # GH15096: escaped characters in columns and data
- df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
- columns=["a\\", 'b'])
- result = df.to_json(orient="records", lines=True)
- expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
- '{"a\\\\":"foo\\"","b":"bar"}')
- assert result == expected
- assert_frame_equal(pd.read_json(result, lines=True), df)
- def test_latin_encoding(self):
- if compat.PY2:
- pytest.skip("[unicode] is not implemented as a table column")
- # GH 13774
- pytest.skip("encoding not implemented in .to_json(), "
- "xref #13774")
- values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
- [b'E\xc9, 17', b'a', b'b', b'c'],
- [b'EE, 17', b'', b'a', b'b', b'c'],
- [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
- [b'', b'a', b'b', b'c'],
- [b'\xf8\xfc', b'a', b'b', b'c'],
- [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
- [np.nan, b'', b'b', b'c'],
- [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
- def _try_decode(x, encoding='latin-1'):
- try:
- return x.decode(encoding)
- except AttributeError:
- return x
- # not sure how to remove latin-1 from code in python 2 and 3
- values = [[_try_decode(x) for x in y] for y in values]
- examples = []
- for dtype in ['category', object]:
- for val in values:
- examples.append(Series(val, dtype=dtype))
- def roundtrip(s, encoding='latin-1'):
- with ensure_clean('test.json') as path:
- s.to_json(path, encoding=encoding)
- retr = read_json(path, encoding=encoding)
- assert_series_equal(s, retr, check_categorical=False)
- for s in examples:
- roundtrip(s)
- def test_data_frame_size_after_to_json(self):
- # GH15344
- df = DataFrame({'a': [str(1)]})
- size_before = df.memory_usage(index=True, deep=True).sum()
- df.to_json()
- size_after = df.memory_usage(index=True, deep=True).sum()
- assert size_before == size_after
- @pytest.mark.parametrize('data, expected', [
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
- {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo'),
- {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
- index=[['a', 'b'], ['c', 'd']]),
- {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
- (Series([1, 2, 3], name='A'),
- {'name': 'A', 'data': [1, 2, 3]}),
- (Series([1, 2, 3], name='A').rename_axis('foo'),
- {'name': 'A', 'data': [1, 2, 3]}),
- (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']]),
- {'name': 'A', 'data': [1, 2]}),
- ])
- def test_index_false_to_json_split(self, data, expected):
- # GH 17394
- # Testing index=False in to_json with orient='split'
- result = data.to_json(orient='split', index=False)
- result = json.loads(result)
- assert result == expected
- @pytest.mark.parametrize('data', [
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])),
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']).rename_axis('foo')),
- (DataFrame([[1, 2], [4, 5]], columns=['a', 'b'],
- index=[['a', 'b'], ['c', 'd']])),
- (Series([1, 2, 3], name='A')),
- (Series([1, 2, 3], name='A').rename_axis('foo')),
- (Series([1, 2], name='A', index=[['a', 'b'], ['c', 'd']])),
- ])
- def test_index_false_to_json_table(self, data):
- # GH 17394
- # Testing index=False in to_json with orient='table'
- result = data.to_json(orient='table', index=False)
- result = json.loads(result)
- expected = {
- 'schema': pd.io.json.build_table_schema(data, index=False),
- 'data': DataFrame(data).to_dict(orient='records')
- }
- assert result == expected
- @pytest.mark.parametrize('orient', [
- 'records', 'index', 'columns', 'values'
- ])
- def test_index_false_error_to_json(self, orient):
- # GH 17394
- # Testing error message from to_json with index=False
- df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b'])
- msg = ("'index=False' is only valid when "
- "'orient' is 'split' or 'table'")
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient, index=False)
- @pytest.mark.parametrize('orient', ['split', 'table'])
- @pytest.mark.parametrize('index', [True, False])
- def test_index_false_from_json_to_json(self, orient, index):
- # GH25170
- # Test index=False in from_json to_json
- expected = DataFrame({'a': [1, 2], 'b': [3, 4]})
- dfjson = expected.to_json(orient=orient, index=index)
- result = read_json(dfjson, orient=orient)
- assert_frame_equal(result, expected)
|