test_constructors.py 87 KB


  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime, timedelta
  4. import functools
  5. import itertools
  6. import numpy as np
  7. import numpy.ma as ma
  8. import pytest
  9. from pandas.compat import (
  10. PY2, PY3, PY36, OrderedDict, is_platform_little_endian, lmap, long, lrange,
  11. lzip, range, zip)
  12. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  13. from pandas.core.dtypes.common import is_integer_dtype
  14. import pandas as pd
  15. from pandas import (
  16. Categorical, DataFrame, Index, MultiIndex, Series, Timedelta, Timestamp,
  17. _np_version_under1p13, compat, date_range, isna)
  18. from pandas.tests.frame.common import TestData
  19. import pandas.util.testing as tm
  20. MIXED_FLOAT_DTYPES = ['float16', 'float32', 'float64']
  21. MIXED_INT_DTYPES = ['uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16',
  22. 'int32', 'int64']
  23. class TestDataFrameConstructors(TestData):
  24. def test_constructor(self):
  25. df = DataFrame()
  26. assert len(df.index) == 0
  27. df = DataFrame(data={})
  28. assert len(df.index) == 0
  29. def test_constructor_mixed(self):
  30. index, data = tm.getMixedTypeDict()
  31. # TODO(wesm), incomplete test?
  32. indexed_frame = DataFrame(data, index=index) # noqa
  33. unindexed_frame = DataFrame(data) # noqa
  34. assert self.mixed_frame['foo'].dtype == np.object_
  35. def test_constructor_cast_failure(self):
  36. foo = DataFrame({'a': ['a', 'b', 'c']}, dtype=np.float64)
  37. assert foo['a'].dtype == object
  38. # GH 3010, constructing with odd arrays
  39. df = DataFrame(np.ones((4, 2)))
  40. # this is ok
  41. df['foo'] = np.ones((4, 2)).tolist()
  42. # this is not ok
  43. pytest.raises(ValueError, df.__setitem__, tuple(['test']),
  44. np.ones((4, 2)))
  45. # this is ok
  46. df['foo2'] = np.ones((4, 2)).tolist()
  47. def test_constructor_dtype_copy(self):
  48. orig_df = DataFrame({
  49. 'col1': [1.],
  50. 'col2': [2.],
  51. 'col3': [3.]})
  52. new_df = pd.DataFrame(orig_df, dtype=float, copy=True)
  53. new_df['col1'] = 200.
  54. assert orig_df['col1'][0] == 1.
  55. def test_constructor_dtype_nocast_view(self):
  56. df = DataFrame([[1, 2]])
  57. should_be_view = DataFrame(df, dtype=df[0].dtype)
  58. should_be_view[0][0] = 99
  59. assert df.values[0, 0] == 99
  60. should_be_view = DataFrame(df.values, dtype=df[0].dtype)
  61. should_be_view[0][0] = 97
  62. assert df.values[0, 0] == 97
  63. def test_constructor_dtype_list_data(self):
  64. df = DataFrame([[1, '2'],
  65. [None, 'a']], dtype=object)
  66. assert df.loc[1, 0] is None
  67. assert df.loc[0, 1] == '2'
  68. def test_constructor_list_frames(self):
  69. # see gh-3243
  70. result = DataFrame([DataFrame([])])
  71. assert result.shape == (1, 0)
  72. result = DataFrame([DataFrame(dict(A=lrange(5)))])
  73. assert isinstance(result.iloc[0, 0], DataFrame)
  74. def test_constructor_mixed_dtypes(self):
  75. def _make_mixed_dtypes_df(typ, ad=None):
  76. if typ == 'int':
  77. dtypes = MIXED_INT_DTYPES
  78. arrays = [np.array(np.random.rand(10), dtype=d)
  79. for d in dtypes]
  80. elif typ == 'float':
  81. dtypes = MIXED_FLOAT_DTYPES
  82. arrays = [np.array(np.random.randint(
  83. 10, size=10), dtype=d) for d in dtypes]
  84. zipper = lzip(dtypes, arrays)
  85. for d, a in zipper:
  86. assert(a.dtype == d)
  87. if ad is None:
  88. ad = dict()
  89. ad.update({d: a for d, a in zipper})
  90. return DataFrame(ad)
  91. def _check_mixed_dtypes(df, dtypes=None):
  92. if dtypes is None:
  93. dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
  94. for d in dtypes:
  95. if d in df:
  96. assert(df.dtypes[d] == d)
  97. # mixed floating and integer coexinst in the same frame
  98. df = _make_mixed_dtypes_df('float')
  99. _check_mixed_dtypes(df)
  100. # add lots of types
  101. df = _make_mixed_dtypes_df('float', dict(A=1, B='foo', C='bar'))
  102. _check_mixed_dtypes(df)
  103. # GH 622
  104. df = _make_mixed_dtypes_df('int')
  105. _check_mixed_dtypes(df)
  106. def test_constructor_complex_dtypes(self):
  107. # GH10952
  108. a = np.random.rand(10).astype(np.complex64)
  109. b = np.random.rand(10).astype(np.complex128)
  110. df = DataFrame({'a': a, 'b': b})
  111. assert a.dtype == df.a.dtype
  112. assert b.dtype == df.b.dtype
  113. def test_constructor_dtype_str_na_values(self, string_dtype):
  114. # https://github.com/pandas-dev/pandas/issues/21083
  115. df = DataFrame({'A': ['x', None]}, dtype=string_dtype)
  116. result = df.isna()
  117. expected = DataFrame({"A": [False, True]})
  118. tm.assert_frame_equal(result, expected)
  119. assert df.iloc[1, 0] is None
  120. df = DataFrame({'A': ['x', np.nan]}, dtype=string_dtype)
  121. assert np.isnan(df.iloc[1, 0])
  122. def test_constructor_rec(self):
  123. rec = self.frame.to_records(index=False)
  124. if PY3:
  125. # unicode error under PY2
  126. rec.dtype.names = list(rec.dtype.names)[::-1]
  127. index = self.frame.index
  128. df = DataFrame(rec)
  129. tm.assert_index_equal(df.columns, pd.Index(rec.dtype.names))
  130. df2 = DataFrame(rec, index=index)
  131. tm.assert_index_equal(df2.columns, pd.Index(rec.dtype.names))
  132. tm.assert_index_equal(df2.index, index)
  133. rng = np.arange(len(rec))[::-1]
  134. df3 = DataFrame(rec, index=rng, columns=['C', 'B'])
  135. expected = DataFrame(rec, index=rng).reindex(columns=['C', 'B'])
  136. tm.assert_frame_equal(df3, expected)
  137. def test_constructor_bool(self):
  138. df = DataFrame({0: np.ones(10, dtype=bool),
  139. 1: np.zeros(10, dtype=bool)})
  140. assert df.values.dtype == np.bool_
  141. def test_constructor_overflow_int64(self):
  142. # see gh-14881
  143. values = np.array([2 ** 64 - i for i in range(1, 10)],
  144. dtype=np.uint64)
  145. result = DataFrame({'a': values})
  146. assert result['a'].dtype == np.uint64
  147. # see gh-2355
  148. data_scores = [(6311132704823138710, 273), (2685045978526272070, 23),
  149. (8921811264899370420, 45),
  150. (long(17019687244989530680), 270),
  151. (long(9930107427299601010), 273)]
  152. dtype = [('uid', 'u8'), ('score', 'u8')]
  153. data = np.zeros((len(data_scores),), dtype=dtype)
  154. data[:] = data_scores
  155. df_crawls = DataFrame(data)
  156. assert df_crawls['uid'].dtype == np.uint64
  157. @pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
  158. np.array([2**65]), [2**64 + 1],
  159. np.array([-2**63 - 4], dtype=object),
  160. np.array([-2**64 - 1]), [-2**65 - 2]])
  161. def test_constructor_int_overflow(self, values):
  162. # see gh-18584
  163. value = values[0]
  164. result = DataFrame(values)
  165. assert result[0].dtype == object
  166. assert result[0][0] == value
  167. def test_constructor_ordereddict(self):
  168. import random
  169. nitems = 100
  170. nums = lrange(nitems)
  171. random.shuffle(nums)
  172. expected = ['A%d' % i for i in nums]
  173. df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems)))
  174. assert expected == list(df.columns)
  175. def test_constructor_dict(self):
  176. frame = DataFrame({'col1': self.ts1,
  177. 'col2': self.ts2})
  178. # col2 is padded with NaN
  179. assert len(self.ts1) == 30
  180. assert len(self.ts2) == 25
  181. tm.assert_series_equal(self.ts1, frame['col1'], check_names=False)
  182. exp = pd.Series(np.concatenate([[np.nan] * 5, self.ts2.values]),
  183. index=self.ts1.index, name='col2')
  184. tm.assert_series_equal(exp, frame['col2'])
  185. frame = DataFrame({'col1': self.ts1,
  186. 'col2': self.ts2},
  187. columns=['col2', 'col3', 'col4'])
  188. assert len(frame) == len(self.ts2)
  189. assert 'col1' not in frame
  190. assert isna(frame['col3']).all()
  191. # Corner cases
  192. assert len(DataFrame({})) == 0
  193. # mix dict and array, wrong size - no spec for which error should raise
  194. # first
  195. with pytest.raises(ValueError):
  196. DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']})
  197. # Length-one dict micro-optimization
  198. frame = DataFrame({'A': {'1': 1, '2': 2}})
  199. tm.assert_index_equal(frame.index, pd.Index(['1', '2']))
  200. # empty dict plus index
  201. idx = Index([0, 1, 2])
  202. frame = DataFrame({}, index=idx)
  203. assert frame.index is idx
  204. # empty with index and columns
  205. idx = Index([0, 1, 2])
  206. frame = DataFrame({}, index=idx, columns=idx)
  207. assert frame.index is idx
  208. assert frame.columns is idx
  209. assert len(frame._series) == 3
  210. # with dict of empty list and Series
  211. frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B'])
  212. tm.assert_index_equal(frame.index, Index([], dtype=np.int64))
  213. # GH 14381
  214. # Dict with None value
  215. frame_none = DataFrame(dict(a=None), index=[0])
  216. frame_none_list = DataFrame(dict(a=[None]), index=[0])
  217. with tm.assert_produces_warning(FutureWarning,
  218. check_stacklevel=False):
  219. assert frame_none.get_value(0, 'a') is None
  220. with tm.assert_produces_warning(FutureWarning,
  221. check_stacklevel=False):
  222. assert frame_none_list.get_value(0, 'a') is None
  223. tm.assert_frame_equal(frame_none, frame_none_list)
  224. # GH10856
  225. # dict with scalar values should raise error, even if columns passed
  226. msg = 'If using all scalar values, you must pass an index'
  227. with pytest.raises(ValueError, match=msg):
  228. DataFrame({'a': 0.7})
  229. with pytest.raises(ValueError, match=msg):
  230. DataFrame({'a': 0.7}, columns=['a'])
  231. @pytest.mark.parametrize("scalar", [2, np.nan, None, 'D'])
  232. def test_constructor_invalid_items_unused(self, scalar):
  233. # No error if invalid (scalar) value is in fact not used:
  234. result = DataFrame({'a': scalar}, columns=['b'])
  235. expected = DataFrame(columns=['b'])
  236. tm.assert_frame_equal(result, expected)
  237. @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
  238. def test_constructor_dict_nan_key(self, value):
  239. # GH 18455
  240. cols = [1, value, 3]
  241. idx = ['a', value]
  242. values = [[0, 3], [1, 4], [2, 5]]
  243. data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
  244. result = DataFrame(data).sort_values(1).sort_values('a', axis=1)
  245. expected = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
  246. index=idx, columns=cols)
  247. tm.assert_frame_equal(result, expected)
  248. result = DataFrame(data, index=idx).sort_values('a', axis=1)
  249. tm.assert_frame_equal(result, expected)
  250. result = DataFrame(data, index=idx, columns=cols)
  251. tm.assert_frame_equal(result, expected)
  252. @pytest.mark.parametrize("value", [np.nan, None, float('nan')])
  253. def test_constructor_dict_nan_tuple_key(self, value):
  254. # GH 18455
  255. cols = Index([(11, 21), (value, 22), (13, value)])
  256. idx = Index([('a', value), (value, 2)])
  257. values = [[0, 3], [1, 4], [2, 5]]
  258. data = {cols[c]: Series(values[c], index=idx) for c in range(3)}
  259. result = (DataFrame(data)
  260. .sort_values((11, 21))
  261. .sort_values(('a', value), axis=1))
  262. expected = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
  263. index=idx, columns=cols)
  264. tm.assert_frame_equal(result, expected)
  265. result = DataFrame(data, index=idx).sort_values(('a', value), axis=1)
  266. tm.assert_frame_equal(result, expected)
  267. result = DataFrame(data, index=idx, columns=cols)
  268. tm.assert_frame_equal(result, expected)
  269. @pytest.mark.skipif(not PY36, reason='Insertion order for Python>=3.6')
  270. def test_constructor_dict_order_insertion(self):
  271. # GH19018
  272. # initialization ordering: by insertion order if python>= 3.6
  273. d = {'b': self.ts2, 'a': self.ts1}
  274. frame = DataFrame(data=d)
  275. expected = DataFrame(data=d, columns=list('ba'))
  276. tm.assert_frame_equal(frame, expected)
  277. @pytest.mark.skipif(PY36, reason='order by value for Python<3.6')
  278. def test_constructor_dict_order_by_values(self):
  279. # GH19018
  280. # initialization ordering: by value if python<3.6
  281. d = {'b': self.ts2, 'a': self.ts1}
  282. frame = DataFrame(data=d)
  283. expected = DataFrame(data=d, columns=list('ab'))
  284. tm.assert_frame_equal(frame, expected)
  285. def test_constructor_multi_index(self):
  286. # GH 4078
  287. # construction error with mi and all-nan frame
  288. tuples = [(2, 3), (3, 3), (3, 3)]
  289. mi = MultiIndex.from_tuples(tuples)
  290. df = DataFrame(index=mi, columns=mi)
  291. assert pd.isna(df).values.ravel().all()
  292. tuples = [(3, 3), (2, 3), (3, 3)]
  293. mi = MultiIndex.from_tuples(tuples)
  294. df = DataFrame(index=mi, columns=mi)
  295. assert pd.isna(df).values.ravel().all()
  296. def test_constructor_error_msgs(self):
  297. msg = "Empty data passed with indices specified."
  298. # passing an empty array with columns specified.
  299. with pytest.raises(ValueError, match=msg):
  300. DataFrame(np.empty(0), columns=list('abc'))
  301. msg = "Mixing dicts with non-Series may lead to ambiguous ordering."
  302. # mix dict and array, wrong size
  303. with pytest.raises(ValueError, match=msg):
  304. DataFrame({'A': {'a': 'a', 'b': 'b'},
  305. 'B': ['a', 'b', 'c']})
  306. # wrong size ndarray, GH 3105
  307. msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)"
  308. with pytest.raises(ValueError, match=msg):
  309. DataFrame(np.arange(12).reshape((4, 3)),
  310. columns=['foo', 'bar', 'baz'],
  311. index=pd.date_range('2000-01-01', periods=3))
  312. arr = np.array([[4, 5, 6]])
  313. msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)"
  314. with pytest.raises(ValueError, match=msg):
  315. DataFrame(index=[0], columns=range(0, 4), data=arr)
  316. arr = np.array([4, 5, 6])
  317. msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)"
  318. with pytest.raises(ValueError, match=msg):
  319. DataFrame(index=[0], columns=range(0, 4), data=arr)
  320. # higher dim raise exception
  321. with pytest.raises(ValueError, match='Must pass 2-d input'):
  322. DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1])
  323. # wrong size axis labels
  324. msg = ("Shape of passed values "
  325. r"is \(2, 3\), indices "
  326. r"imply \(1, 3\)")
  327. with pytest.raises(ValueError, match=msg):
  328. DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1])
  329. msg = ("Shape of passed values "
  330. r"is \(2, 3\), indices "
  331. r"imply \(2, 2\)")
  332. with pytest.raises(ValueError, match=msg):
  333. DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2])
  334. msg = ("If using all scalar "
  335. "values, you must pass "
  336. "an index")
  337. with pytest.raises(ValueError, match=msg):
  338. DataFrame({'a': False, 'b': True})
  339. def test_constructor_with_embedded_frames(self):
  340. # embedded data frames
  341. df1 = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]})
  342. df2 = DataFrame([df1, df1 + 10])
  343. df2.dtypes
  344. str(df2)
  345. result = df2.loc[0, 0]
  346. tm.assert_frame_equal(result, df1)
  347. result = df2.loc[1, 0]
  348. tm.assert_frame_equal(result, df1 + 10)
  349. def test_constructor_subclass_dict(self):
  350. # Test for passing dict subclass to constructor
  351. data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)),
  352. 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))}
  353. df = DataFrame(data)
  354. refdf = DataFrame({col: dict(compat.iteritems(val))
  355. for col, val in compat.iteritems(data)})
  356. tm.assert_frame_equal(refdf, df)
  357. data = tm.TestSubDict(compat.iteritems(data))
  358. df = DataFrame(data)
  359. tm.assert_frame_equal(refdf, df)
  360. # try with defaultdict
  361. from collections import defaultdict
  362. data = {}
  363. self.frame['B'][:10] = np.nan
  364. for k, v in compat.iteritems(self.frame):
  365. dct = defaultdict(dict)
  366. dct.update(v.to_dict())
  367. data[k] = dct
  368. frame = DataFrame(data)
  369. tm.assert_frame_equal(self.frame.sort_index(), frame)
  370. def test_constructor_dict_block(self):
  371. expected = np.array([[4., 3., 2., 1.]])
  372. df = DataFrame({'d': [4.], 'c': [3.], 'b': [2.], 'a': [1.]},
  373. columns=['d', 'c', 'b', 'a'])
  374. tm.assert_numpy_array_equal(df.values, expected)
  375. def test_constructor_dict_cast(self):
  376. # cast float tests
  377. test_data = {
  378. 'A': {'1': 1, '2': 2},
  379. 'B': {'1': '1', '2': '2', '3': '3'},
  380. }
  381. frame = DataFrame(test_data, dtype=float)
  382. assert len(frame) == 3
  383. assert frame['B'].dtype == np.float64
  384. assert frame['A'].dtype == np.float64
  385. frame = DataFrame(test_data)
  386. assert len(frame) == 3
  387. assert frame['B'].dtype == np.object_
  388. assert frame['A'].dtype == np.float64
  389. # can't cast to float
  390. test_data = {
  391. 'A': dict(zip(range(20), tm.makeStringIndex(20))),
  392. 'B': dict(zip(range(15), np.random.randn(15)))
  393. }
  394. frame = DataFrame(test_data, dtype=float)
  395. assert len(frame) == 20
  396. assert frame['A'].dtype == np.object_
  397. assert frame['B'].dtype == np.float64
  398. def test_constructor_dict_dont_upcast(self):
  399. d = {'Col1': {'Row1': 'A String', 'Row2': np.nan}}
  400. df = DataFrame(d)
  401. assert isinstance(df['Col1']['Row2'], float)
  402. dm = DataFrame([[1, 2], ['a', 'b']], index=[1, 2], columns=[1, 2])
  403. assert isinstance(dm[1][1], int)
  404. def test_constructor_dict_of_tuples(self):
  405. # GH #1491
  406. data = {'a': (1, 2, 3), 'b': (4, 5, 6)}
  407. result = DataFrame(data)
  408. expected = DataFrame({k: list(v) for k, v in compat.iteritems(data)})
  409. tm.assert_frame_equal(result, expected, check_dtype=False)
  410. def test_constructor_dict_multiindex(self):
  411. def check(result, expected):
  412. return tm.assert_frame_equal(result, expected, check_dtype=True,
  413. check_index_type=True,
  414. check_column_type=True,
  415. check_names=True)
  416. d = {('a', 'a'): {('i', 'i'): 0, ('i', 'j'): 1, ('j', 'i'): 2},
  417. ('b', 'a'): {('i', 'i'): 6, ('i', 'j'): 5, ('j', 'i'): 4},
  418. ('b', 'c'): {('i', 'i'): 7, ('i', 'j'): 8, ('j', 'i'): 9}}
  419. _d = sorted(d.items())
  420. df = DataFrame(d)
  421. expected = DataFrame(
  422. [x[1] for x in _d],
  423. index=MultiIndex.from_tuples([x[0] for x in _d])).T
  424. expected.index = MultiIndex.from_tuples(expected.index)
  425. check(df, expected)
  426. d['z'] = {'y': 123., ('i', 'i'): 111, ('i', 'j'): 111, ('j', 'i'): 111}
  427. _d.insert(0, ('z', d['z']))
  428. expected = DataFrame(
  429. [x[1] for x in _d],
  430. index=Index([x[0] for x in _d], tupleize_cols=False)).T
  431. expected.index = Index(expected.index, tupleize_cols=False)
  432. df = DataFrame(d)
  433. df = df.reindex(columns=expected.columns, index=expected.index)
  434. check(df, expected)
  435. def test_constructor_dict_datetime64_index(self):
  436. # GH 10160
  437. dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
  438. def create_data(constructor):
  439. return {i: {constructor(s): 2 * i}
  440. for i, s in enumerate(dates_as_str)}
  441. data_datetime64 = create_data(np.datetime64)
  442. data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
  443. data_Timestamp = create_data(Timestamp)
  444. expected = DataFrame([{0: 0, 1: None, 2: None, 3: None},
  445. {0: None, 1: 2, 2: None, 3: None},
  446. {0: None, 1: None, 2: 4, 3: None},
  447. {0: None, 1: None, 2: None, 3: 6}],
  448. index=[Timestamp(dt) for dt in dates_as_str])
  449. result_datetime64 = DataFrame(data_datetime64)
  450. result_datetime = DataFrame(data_datetime)
  451. result_Timestamp = DataFrame(data_Timestamp)
  452. tm.assert_frame_equal(result_datetime64, expected)
  453. tm.assert_frame_equal(result_datetime, expected)
  454. tm.assert_frame_equal(result_Timestamp, expected)
  455. def test_constructor_dict_timedelta64_index(self):
  456. # GH 10160
  457. td_as_int = [1, 2, 3, 4]
  458. def create_data(constructor):
  459. return {i: {constructor(s): 2 * i}
  460. for i, s in enumerate(td_as_int)}
  461. data_timedelta64 = create_data(lambda x: np.timedelta64(x, 'D'))
  462. data_timedelta = create_data(lambda x: timedelta(days=x))
  463. data_Timedelta = create_data(lambda x: Timedelta(x, 'D'))
  464. expected = DataFrame([{0: 0, 1: None, 2: None, 3: None},
  465. {0: None, 1: 2, 2: None, 3: None},
  466. {0: None, 1: None, 2: 4, 3: None},
  467. {0: None, 1: None, 2: None, 3: 6}],
  468. index=[Timedelta(td, 'D') for td in td_as_int])
  469. result_timedelta64 = DataFrame(data_timedelta64)
  470. result_timedelta = DataFrame(data_timedelta)
  471. result_Timedelta = DataFrame(data_Timedelta)
  472. tm.assert_frame_equal(result_timedelta64, expected)
  473. tm.assert_frame_equal(result_timedelta, expected)
  474. tm.assert_frame_equal(result_Timedelta, expected)
  475. def test_constructor_period(self):
  476. # PeriodIndex
  477. a = pd.PeriodIndex(['2012-01', 'NaT', '2012-04'], freq='M')
  478. b = pd.PeriodIndex(['2012-02-01', '2012-03-01', 'NaT'], freq='D')
  479. df = pd.DataFrame({'a': a, 'b': b})
  480. assert df['a'].dtype == a.dtype
  481. assert df['b'].dtype == b.dtype
  482. # list of periods
  483. df = pd.DataFrame({'a': a.astype(object).tolist(),
  484. 'b': b.astype(object).tolist()})
  485. assert df['a'].dtype == a.dtype
  486. assert df['b'].dtype == b.dtype
  487. def test_nested_dict_frame_constructor(self):
  488. rng = pd.period_range('1/1/2000', periods=5)
  489. df = DataFrame(np.random.randn(10, 5), columns=rng)
  490. data = {}
  491. for col in df.columns:
  492. for row in df.index:
  493. with tm.assert_produces_warning(FutureWarning,
  494. check_stacklevel=False):
  495. data.setdefault(col, {})[row] = df.get_value(row, col)
  496. result = DataFrame(data, columns=rng)
  497. tm.assert_frame_equal(result, df)
  498. data = {}
  499. for col in df.columns:
  500. for row in df.index:
  501. with tm.assert_produces_warning(FutureWarning,
  502. check_stacklevel=False):
  503. data.setdefault(row, {})[col] = df.get_value(row, col)
  504. result = DataFrame(data, index=rng).T
  505. tm.assert_frame_equal(result, df)
  506. def _check_basic_constructor(self, empty):
  507. # mat: 2d matrix with shape (3, 2) to input. empty - makes sized
  508. # objects
  509. mat = empty((2, 3), dtype=float)
  510. # 2-D input
  511. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  512. assert len(frame.index) == 2
  513. assert len(frame.columns) == 3
  514. # 1-D input
  515. frame = DataFrame(empty((3,)), columns=['A'], index=[1, 2, 3])
  516. assert len(frame.index) == 3
  517. assert len(frame.columns) == 1
  518. # cast type
  519. frame = DataFrame(mat, columns=['A', 'B', 'C'],
  520. index=[1, 2], dtype=np.int64)
  521. assert frame.values.dtype == np.int64
  522. # wrong size axis labels
  523. msg = r'Shape of passed values is \(2, 3\), indices imply \(1, 3\)'
  524. with pytest.raises(ValueError, match=msg):
  525. DataFrame(mat, columns=['A', 'B', 'C'], index=[1])
  526. msg = r'Shape of passed values is \(2, 3\), indices imply \(2, 2\)'
  527. with pytest.raises(ValueError, match=msg):
  528. DataFrame(mat, columns=['A', 'B'], index=[1, 2])
  529. # higher dim raise exception
  530. with pytest.raises(ValueError, match='Must pass 2-d input'):
  531. DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'],
  532. index=[1])
  533. # automatic labeling
  534. frame = DataFrame(mat)
  535. tm.assert_index_equal(frame.index, pd.Index(lrange(2)))
  536. tm.assert_index_equal(frame.columns, pd.Index(lrange(3)))
  537. frame = DataFrame(mat, index=[1, 2])
  538. tm.assert_index_equal(frame.columns, pd.Index(lrange(3)))
  539. frame = DataFrame(mat, columns=['A', 'B', 'C'])
  540. tm.assert_index_equal(frame.index, pd.Index(lrange(2)))
  541. # 0-length axis
  542. frame = DataFrame(empty((0, 3)))
  543. assert len(frame.index) == 0
  544. frame = DataFrame(empty((3, 0)))
  545. assert len(frame.columns) == 0
  546. def test_constructor_ndarray(self):
  547. self._check_basic_constructor(np.ones)
  548. frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A'])
  549. assert len(frame) == 2
  550. @pytest.mark.skipif(PY2 and _np_version_under1p13,
  551. reason="old numpy & py2")
  552. def test_constructor_maskedarray(self):
  553. self._check_basic_constructor(ma.masked_all)
  554. # Check non-masked values
  555. mat = ma.masked_all((2, 3), dtype=float)
  556. mat[0, 0] = 1.0
  557. mat[1, 2] = 2.0
  558. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  559. assert 1.0 == frame['A'][1]
  560. assert 2.0 == frame['C'][2]
  561. # what is this even checking??
  562. mat = ma.masked_all((2, 3), dtype=float)
  563. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  564. assert np.all(~np.asarray(frame == frame))
  565. @pytest.mark.skipif(PY2 and _np_version_under1p13,
  566. reason="old numpy & py2")
  567. def test_constructor_maskedarray_nonfloat(self):
  568. # masked int promoted to float
  569. mat = ma.masked_all((2, 3), dtype=int)
  570. # 2-D input
  571. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  572. assert len(frame.index) == 2
  573. assert len(frame.columns) == 3
  574. assert np.all(~np.asarray(frame == frame))
  575. # cast type
  576. frame = DataFrame(mat, columns=['A', 'B', 'C'],
  577. index=[1, 2], dtype=np.float64)
  578. assert frame.values.dtype == np.float64
  579. # Check non-masked values
  580. mat2 = ma.copy(mat)
  581. mat2[0, 0] = 1
  582. mat2[1, 2] = 2
  583. frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
  584. assert 1 == frame['A'][1]
  585. assert 2 == frame['C'][2]
  586. # masked np.datetime64 stays (use NaT as null)
  587. mat = ma.masked_all((2, 3), dtype='M8[ns]')
  588. # 2-D input
  589. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  590. assert len(frame.index) == 2
  591. assert len(frame.columns) == 3
  592. assert isna(frame).values.all()
  593. # cast type
  594. frame = DataFrame(mat, columns=['A', 'B', 'C'],
  595. index=[1, 2], dtype=np.int64)
  596. assert frame.values.dtype == np.int64
  597. # Check non-masked values
  598. mat2 = ma.copy(mat)
  599. mat2[0, 0] = 1
  600. mat2[1, 2] = 2
  601. frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
  602. assert 1 == frame['A'].view('i8')[1]
  603. assert 2 == frame['C'].view('i8')[2]
  604. # masked bool promoted to object
  605. mat = ma.masked_all((2, 3), dtype=bool)
  606. # 2-D input
  607. frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2])
  608. assert len(frame.index) == 2
  609. assert len(frame.columns) == 3
  610. assert np.all(~np.asarray(frame == frame))
  611. # cast type
  612. frame = DataFrame(mat, columns=['A', 'B', 'C'],
  613. index=[1, 2], dtype=object)
  614. assert frame.values.dtype == object
  615. # Check non-masked values
  616. mat2 = ma.copy(mat)
  617. mat2[0, 0] = True
  618. mat2[1, 2] = False
  619. frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2])
  620. assert frame['A'][1] is True
  621. assert frame['C'][2] is False
  622. @pytest.mark.skipif(PY2 and _np_version_under1p13,
  623. reason="old numpy & py2")
  624. def test_constructor_maskedarray_hardened(self):
  625. # Check numpy masked arrays with hard masks -- from GH24574
  626. mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask()
  627. result = pd.DataFrame(mat_hard, columns=['A', 'B'], index=[1, 2])
  628. expected = pd.DataFrame({
  629. 'A': [np.nan, np.nan],
  630. 'B': [np.nan, np.nan]},
  631. columns=['A', 'B'],
  632. index=[1, 2],
  633. dtype=float)
  634. tm.assert_frame_equal(result, expected)
  635. # Check case where mask is hard but no data are masked
  636. mat_hard = ma.ones((2, 2), dtype=float).harden_mask()
  637. result = pd.DataFrame(mat_hard, columns=['A', 'B'], index=[1, 2])
  638. expected = pd.DataFrame({
  639. 'A': [1.0, 1.0],
  640. 'B': [1.0, 1.0]},
  641. columns=['A', 'B'],
  642. index=[1, 2],
  643. dtype=float)
  644. tm.assert_frame_equal(result, expected)
  645. @pytest.mark.skipif(PY2 and _np_version_under1p13,
  646. reason="old numpy & py2")
  647. def test_constructor_maskedrecarray_dtype(self):
  648. # Ensure constructor honors dtype
  649. data = np.ma.array(
  650. np.ma.zeros(5, dtype=[('date', '<f8'), ('price', '<f8')]),
  651. mask=[False] * 5)
  652. data = data.view(ma.mrecords.mrecarray)
  653. result = pd.DataFrame(data, dtype=int)
  654. expected = pd.DataFrame(np.zeros((5, 2), dtype=int),
  655. columns=['date', 'price'])
  656. tm.assert_frame_equal(result, expected)
  657. @pytest.mark.skipif(PY2 and _np_version_under1p13,
  658. reason="old numpy & py2")
  659. def test_constructor_mrecarray(self):
  660. # Ensure mrecarray produces frame identical to dict of masked arrays
  661. # from GH3479
  662. assert_fr_equal = functools.partial(tm.assert_frame_equal,
  663. check_index_type=True,
  664. check_column_type=True,
  665. check_frame_type=True)
  666. arrays = [
  667. ('float', np.array([1.5, 2.0])),
  668. ('int', np.array([1, 2])),
  669. ('str', np.array(['abc', 'def'])),
  670. ]
  671. for name, arr in arrays[:]:
  672. arrays.append(('masked1_' + name,
  673. np.ma.masked_array(arr, mask=[False, True])))
  674. arrays.append(('masked_all', np.ma.masked_all((2,))))
  675. arrays.append(('masked_none',
  676. np.ma.masked_array([1.0, 2.5], mask=False)))
  677. # call assert_frame_equal for all selections of 3 arrays
  678. for comb in itertools.combinations(arrays, 3):
  679. names, data = zip(*comb)
  680. mrecs = ma.mrecords.fromarrays(data, names=names)
  681. # fill the comb
  682. comb = {k: (v.filled() if hasattr(v, 'filled') else v)
  683. for k, v in comb}
  684. expected = DataFrame(comb, columns=names)
  685. result = DataFrame(mrecs)
  686. assert_fr_equal(result, expected)
  687. # specify columns
  688. expected = DataFrame(comb, columns=names[::-1])
  689. result = DataFrame(mrecs, columns=names[::-1])
  690. assert_fr_equal(result, expected)
  691. # specify index
  692. expected = DataFrame(comb, columns=names, index=[1, 2])
  693. result = DataFrame(mrecs, index=[1, 2])
  694. assert_fr_equal(result, expected)
  695. def test_constructor_corner_shape(self):
  696. df = DataFrame(index=[])
  697. assert df.values.shape == (0, 0)
  698. @pytest.mark.parametrize("data, index, columns, dtype, expected", [
  699. (None, lrange(10), ['a', 'b'], object, np.object_),
  700. (None, None, ['a', 'b'], 'int64', np.dtype('int64')),
  701. (None, lrange(10), ['a', 'b'], int, np.dtype('float64')),
  702. ({}, None, ['foo', 'bar'], None, np.object_),
  703. ({'b': 1}, lrange(10), list('abc'), int, np.dtype('float64'))
  704. ])
  705. def test_constructor_dtype(self, data, index, columns, dtype, expected):
  706. df = DataFrame(data, index, columns, dtype)
  707. assert df.values.dtype == expected
  708. def test_constructor_scalar_inference(self):
  709. data = {'int': 1, 'bool': True,
  710. 'float': 3., 'complex': 4j, 'object': 'foo'}
  711. df = DataFrame(data, index=np.arange(10))
  712. assert df['int'].dtype == np.int64
  713. assert df['bool'].dtype == np.bool_
  714. assert df['float'].dtype == np.float64
  715. assert df['complex'].dtype == np.complex128
  716. assert df['object'].dtype == np.object_
  717. def test_constructor_arrays_and_scalars(self):
  718. df = DataFrame({'a': np.random.randn(10), 'b': True})
  719. exp = DataFrame({'a': df['a'].values, 'b': [True] * 10})
  720. tm.assert_frame_equal(df, exp)
  721. with pytest.raises(ValueError, match='must pass an index'):
  722. DataFrame({'a': False, 'b': True})
  723. def test_constructor_DataFrame(self):
  724. df = DataFrame(self.frame)
  725. tm.assert_frame_equal(df, self.frame)
  726. df_casted = DataFrame(self.frame, dtype=np.int64)
  727. assert df_casted.values.dtype == np.int64
  728. def test_constructor_more(self):
  729. # used to be in test_matrix.py
  730. arr = np.random.randn(10)
  731. dm = DataFrame(arr, columns=['A'], index=np.arange(10))
  732. assert dm.values.ndim == 2
  733. arr = np.random.randn(0)
  734. dm = DataFrame(arr)
  735. assert dm.values.ndim == 2
  736. assert dm.values.ndim == 2
  737. # no data specified
  738. dm = DataFrame(columns=['A', 'B'], index=np.arange(10))
  739. assert dm.values.shape == (10, 2)
  740. dm = DataFrame(columns=['A', 'B'])
  741. assert dm.values.shape == (0, 2)
  742. dm = DataFrame(index=np.arange(10))
  743. assert dm.values.shape == (10, 0)
  744. # can't cast
  745. mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1)
  746. with pytest.raises(ValueError, match='cast'):
  747. DataFrame(mat, index=[0, 1], columns=[0], dtype=float)
  748. dm = DataFrame(DataFrame(self.frame._series))
  749. tm.assert_frame_equal(dm, self.frame)
  750. # int cast
  751. dm = DataFrame({'A': np.ones(10, dtype=int),
  752. 'B': np.ones(10, dtype=np.float64)},
  753. index=np.arange(10))
  754. assert len(dm.columns) == 2
  755. assert dm.values.dtype == np.float64
  756. def test_constructor_empty_list(self):
  757. df = DataFrame([], index=[])
  758. expected = DataFrame(index=[])
  759. tm.assert_frame_equal(df, expected)
  760. # GH 9939
  761. df = DataFrame([], columns=['A', 'B'])
  762. expected = DataFrame({}, columns=['A', 'B'])
  763. tm.assert_frame_equal(df, expected)
  764. # Empty generator: list(empty_gen()) == []
  765. def empty_gen():
  766. return
  767. yield
  768. df = DataFrame(empty_gen(), columns=['A', 'B'])
  769. tm.assert_frame_equal(df, expected)
  770. def test_constructor_list_of_lists(self):
  771. # GH #484
  772. df = DataFrame(data=[[1, 'a'], [2, 'b']], columns=["num", "str"])
  773. assert is_integer_dtype(df['num'])
  774. assert df['str'].dtype == np.object_
  775. # GH 4851
  776. # list of 0-dim ndarrays
  777. expected = DataFrame({0: np.arange(10)})
  778. data = [np.array(x) for x in range(10)]
  779. result = DataFrame(data)
  780. tm.assert_frame_equal(result, expected)
  781. def test_constructor_sequence_like(self):
  782. # GH 3783
  783. # collections.Squence like
  784. class DummyContainer(compat.Sequence):
  785. def __init__(self, lst):
  786. self._lst = lst
  787. def __getitem__(self, n):
  788. return self._lst.__getitem__(n)
  789. def __len__(self, n):
  790. return self._lst.__len__()
  791. lst_containers = [DummyContainer([1, 'a']), DummyContainer([2, 'b'])]
  792. columns = ["num", "str"]
  793. result = DataFrame(lst_containers, columns=columns)
  794. expected = DataFrame([[1, 'a'], [2, 'b']], columns=columns)
  795. tm.assert_frame_equal(result, expected, check_dtype=False)
  796. # GH 4297
  797. # support Array
  798. import array
  799. result = DataFrame({'A': array.array('i', range(10))})
  800. expected = DataFrame({'A': list(range(10))})
  801. tm.assert_frame_equal(result, expected, check_dtype=False)
  802. expected = DataFrame([list(range(10)), list(range(10))])
  803. result = DataFrame([array.array('i', range(10)),
  804. array.array('i', range(10))])
  805. tm.assert_frame_equal(result, expected, check_dtype=False)
  806. def test_constructor_iterable(self):
  807. # GH 21987
  808. class Iter():
  809. def __iter__(self):
  810. for i in range(10):
  811. yield [1, 2, 3]
  812. expected = DataFrame([[1, 2, 3]] * 10)
  813. result = DataFrame(Iter())
  814. tm.assert_frame_equal(result, expected)
  815. def test_constructor_iterator(self):
  816. expected = DataFrame([list(range(10)), list(range(10))])
  817. result = DataFrame([range(10), range(10)])
  818. tm.assert_frame_equal(result, expected)
  819. def test_constructor_generator(self):
  820. # related #2305
  821. gen1 = (i for i in range(10))
  822. gen2 = (i for i in range(10))
  823. expected = DataFrame([list(range(10)), list(range(10))])
  824. result = DataFrame([gen1, gen2])
  825. tm.assert_frame_equal(result, expected)
  826. gen = ([i, 'a'] for i in range(10))
  827. result = DataFrame(gen)
  828. expected = DataFrame({0: range(10), 1: 'a'})
  829. tm.assert_frame_equal(result, expected, check_dtype=False)
  830. def test_constructor_list_of_dicts(self):
  831. data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
  832. OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]),
  833. OrderedDict([['a', 1.5], ['d', 6]]),
  834. OrderedDict(),
  835. OrderedDict([['a', 1.5], ['b', 3], ['c', 4]]),
  836. OrderedDict([['b', 3], ['c', 4], ['d', 6]])]
  837. result = DataFrame(data)
  838. expected = DataFrame.from_dict(dict(zip(range(len(data)), data)),
  839. orient='index')
  840. tm.assert_frame_equal(result, expected.reindex(result.index))
  841. result = DataFrame([{}])
  842. expected = DataFrame(index=[0])
  843. tm.assert_frame_equal(result, expected)
  844. def test_constructor_ordered_dict_preserve_order(self):
  845. # see gh-13304
  846. expected = DataFrame([[2, 1]], columns=['b', 'a'])
  847. data = OrderedDict()
  848. data['b'] = [2]
  849. data['a'] = [1]
  850. result = DataFrame(data)
  851. tm.assert_frame_equal(result, expected)
  852. data = OrderedDict()
  853. data['b'] = 2
  854. data['a'] = 1
  855. result = DataFrame([data])
  856. tm.assert_frame_equal(result, expected)
  857. def test_constructor_ordered_dict_conflicting_orders(self):
  858. # the first dict element sets the ordering for the DataFrame,
  859. # even if there are conflicting orders from subsequent ones
  860. row_one = OrderedDict()
  861. row_one['b'] = 2
  862. row_one['a'] = 1
  863. row_two = OrderedDict()
  864. row_two['a'] = 1
  865. row_two['b'] = 2
  866. row_three = {'b': 2, 'a': 1}
  867. expected = DataFrame([[2, 1], [2, 1]], columns=['b', 'a'])
  868. result = DataFrame([row_one, row_two])
  869. tm.assert_frame_equal(result, expected)
  870. expected = DataFrame([[2, 1], [2, 1], [2, 1]], columns=['b', 'a'])
  871. result = DataFrame([row_one, row_two, row_three])
  872. tm.assert_frame_equal(result, expected)
  873. def test_constructor_list_of_series(self):
  874. data = [OrderedDict([['a', 1.5], ['b', 3.0], ['c', 4.0]]),
  875. OrderedDict([['a', 1.5], ['b', 3.0], ['c', 6.0]])]
  876. sdict = OrderedDict(zip(['x', 'y'], data))
  877. idx = Index(['a', 'b', 'c'])
  878. # all named
  879. data2 = [Series([1.5, 3, 4], idx, dtype='O', name='x'),
  880. Series([1.5, 3, 6], idx, name='y')]
  881. result = DataFrame(data2)
  882. expected = DataFrame.from_dict(sdict, orient='index')
  883. tm.assert_frame_equal(result, expected)
  884. # some unnamed
  885. data2 = [Series([1.5, 3, 4], idx, dtype='O', name='x'),
  886. Series([1.5, 3, 6], idx)]
  887. result = DataFrame(data2)
  888. sdict = OrderedDict(zip(['x', 'Unnamed 0'], data))
  889. expected = DataFrame.from_dict(sdict, orient='index')
  890. tm.assert_frame_equal(result.sort_index(), expected)
  891. # none named
  892. data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
  893. OrderedDict([['a', 1.5], ['b', 3], ['d', 6]]),
  894. OrderedDict([['a', 1.5], ['d', 6]]),
  895. OrderedDict(),
  896. OrderedDict([['a', 1.5], ['b', 3], ['c', 4]]),
  897. OrderedDict([['b', 3], ['c', 4], ['d', 6]])]
  898. data = [Series(d) for d in data]
  899. result = DataFrame(data)
  900. sdict = OrderedDict(zip(range(len(data)), data))
  901. expected = DataFrame.from_dict(sdict, orient='index')
  902. tm.assert_frame_equal(result, expected.reindex(result.index))
  903. result2 = DataFrame(data, index=np.arange(6))
  904. tm.assert_frame_equal(result, result2)
  905. result = DataFrame([Series({})])
  906. expected = DataFrame(index=[0])
  907. tm.assert_frame_equal(result, expected)
  908. data = [OrderedDict([['a', 1.5], ['b', 3.0], ['c', 4.0]]),
  909. OrderedDict([['a', 1.5], ['b', 3.0], ['c', 6.0]])]
  910. sdict = OrderedDict(zip(range(len(data)), data))
  911. idx = Index(['a', 'b', 'c'])
  912. data2 = [Series([1.5, 3, 4], idx, dtype='O'),
  913. Series([1.5, 3, 6], idx)]
  914. result = DataFrame(data2)
  915. expected = DataFrame.from_dict(sdict, orient='index')
  916. tm.assert_frame_equal(result, expected)
  917. def test_constructor_list_of_series_aligned_index(self):
  918. series = [pd.Series(i, index=['b', 'a', 'c'], name=str(i))
  919. for i in range(3)]
  920. result = pd.DataFrame(series)
  921. expected = pd.DataFrame({'b': [0, 1, 2],
  922. 'a': [0, 1, 2],
  923. 'c': [0, 1, 2]},
  924. columns=['b', 'a', 'c'],
  925. index=['0', '1', '2'])
  926. tm.assert_frame_equal(result, expected)
  927. def test_constructor_list_of_derived_dicts(self):
  928. class CustomDict(dict):
  929. pass
  930. d = {'a': 1.5, 'b': 3}
  931. data_custom = [CustomDict(d)]
  932. data = [d]
  933. result_custom = DataFrame(data_custom)
  934. result = DataFrame(data)
  935. tm.assert_frame_equal(result, result_custom)
  936. def test_constructor_ragged(self):
  937. data = {'A': np.random.randn(10),
  938. 'B': np.random.randn(8)}
  939. with pytest.raises(ValueError, match='arrays must all be same length'):
  940. DataFrame(data)
  941. def test_constructor_scalar(self):
  942. idx = Index(lrange(3))
  943. df = DataFrame({"a": 0}, index=idx)
  944. expected = DataFrame({"a": [0, 0, 0]}, index=idx)
  945. tm.assert_frame_equal(df, expected, check_dtype=False)
  946. def test_constructor_Series_copy_bug(self):
  947. df = DataFrame(self.frame['A'], index=self.frame.index, columns=['A'])
  948. df.copy()
  949. def test_constructor_mixed_dict_and_Series(self):
  950. data = {}
  951. data['A'] = {'foo': 1, 'bar': 2, 'baz': 3}
  952. data['B'] = Series([4, 3, 2, 1], index=['bar', 'qux', 'baz', 'foo'])
  953. result = DataFrame(data)
  954. assert result.index.is_monotonic
  955. # ordering ambiguous, raise exception
  956. with pytest.raises(ValueError, match='ambiguous ordering'):
  957. DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}})
  958. # this is OK though
  959. result = DataFrame({'A': ['a', 'b'],
  960. 'B': Series(['a', 'b'], index=['a', 'b'])})
  961. expected = DataFrame({'A': ['a', 'b'], 'B': ['a', 'b']},
  962. index=['a', 'b'])
  963. tm.assert_frame_equal(result, expected)
  964. def test_constructor_mixed_type_rows(self):
  965. # Issue 25075
  966. data = [[1, 2], (3, 4)]
  967. result = DataFrame(data)
  968. expected = DataFrame([[1, 2], [3, 4]])
  969. tm.assert_frame_equal(result, expected)
  970. def test_constructor_tuples(self):
  971. result = DataFrame({'A': [(1, 2), (3, 4)]})
  972. expected = DataFrame({'A': Series([(1, 2), (3, 4)])})
  973. tm.assert_frame_equal(result, expected)
  974. def test_constructor_namedtuples(self):
  975. # GH11181
  976. from collections import namedtuple
  977. named_tuple = namedtuple("Pandas", list('ab'))
  978. tuples = [named_tuple(1, 3), named_tuple(2, 4)]
  979. expected = DataFrame({'a': [1, 2], 'b': [3, 4]})
  980. result = DataFrame(tuples)
  981. tm.assert_frame_equal(result, expected)
  982. # with columns
  983. expected = DataFrame({'y': [1, 2], 'z': [3, 4]})
  984. result = DataFrame(tuples, columns=['y', 'z'])
  985. tm.assert_frame_equal(result, expected)
  986. def test_constructor_orient(self):
  987. data_dict = self.mixed_frame.T._series
  988. recons = DataFrame.from_dict(data_dict, orient='index')
  989. expected = self.mixed_frame.sort_index()
  990. tm.assert_frame_equal(recons, expected)
  991. # dict of sequence
  992. a = {'hi': [32, 3, 3],
  993. 'there': [3, 5, 3]}
  994. rs = DataFrame.from_dict(a, orient='index')
  995. xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
  996. tm.assert_frame_equal(rs, xp)
  997. def test_from_dict_columns_parameter(self):
  998. # GH 18529
  999. # Test new columns parameter for from_dict that was added to make
  1000. # from_items(..., orient='index', columns=[...]) easier to replicate
  1001. result = DataFrame.from_dict(OrderedDict([('A', [1, 2]),
  1002. ('B', [4, 5])]),
  1003. orient='index', columns=['one', 'two'])
  1004. expected = DataFrame([[1, 2], [4, 5]], index=['A', 'B'],
  1005. columns=['one', 'two'])
  1006. tm.assert_frame_equal(result, expected)
  1007. msg = "cannot use columns parameter with orient='columns'"
  1008. with pytest.raises(ValueError, match=msg):
  1009. DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]),
  1010. orient='columns', columns=['one', 'two'])
  1011. with pytest.raises(ValueError, match=msg):
  1012. DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]),
  1013. columns=['one', 'two'])
  1014. def test_constructor_Series_named(self):
  1015. a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x')
  1016. df = DataFrame(a)
  1017. assert df.columns[0] == 'x'
  1018. tm.assert_index_equal(df.index, a.index)
  1019. # ndarray like
  1020. arr = np.random.randn(10)
  1021. s = Series(arr, name='x')
  1022. df = DataFrame(s)
  1023. expected = DataFrame(dict(x=s))
  1024. tm.assert_frame_equal(df, expected)
  1025. s = Series(arr, index=range(3, 13))
  1026. df = DataFrame(s)
  1027. expected = DataFrame({0: s})
  1028. tm.assert_frame_equal(df, expected)
  1029. pytest.raises(ValueError, DataFrame, s, columns=[1, 2])
  1030. # #2234
  1031. a = Series([], name='x')
  1032. df = DataFrame(a)
  1033. assert df.columns[0] == 'x'
  1034. # series with name and w/o
  1035. s1 = Series(arr, name='x')
  1036. df = DataFrame([s1, arr]).T
  1037. expected = DataFrame({'x': s1, 'Unnamed 0': arr},
  1038. columns=['x', 'Unnamed 0'])
  1039. tm.assert_frame_equal(df, expected)
  1040. # this is a bit non-intuitive here; the series collapse down to arrays
  1041. df = DataFrame([arr, s1]).T
  1042. expected = DataFrame({1: s1, 0: arr}, columns=[0, 1])
  1043. tm.assert_frame_equal(df, expected)
  1044. def test_constructor_Series_named_and_columns(self):
  1045. # GH 9232 validation
  1046. s0 = Series(range(5), name=0)
  1047. s1 = Series(range(5), name=1)
  1048. # matching name and column gives standard frame
  1049. tm.assert_frame_equal(pd.DataFrame(s0, columns=[0]),
  1050. s0.to_frame())
  1051. tm.assert_frame_equal(pd.DataFrame(s1, columns=[1]),
  1052. s1.to_frame())
  1053. # non-matching produces empty frame
  1054. assert pd.DataFrame(s0, columns=[1]).empty
  1055. assert pd.DataFrame(s1, columns=[0]).empty
  1056. def test_constructor_Series_differently_indexed(self):
  1057. # name
  1058. s1 = Series([1, 2, 3], index=['a', 'b', 'c'], name='x')
  1059. # no name
  1060. s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
  1061. other_index = Index(['a', 'b'])
  1062. df1 = DataFrame(s1, index=other_index)
  1063. exp1 = DataFrame(s1.reindex(other_index))
  1064. assert df1.columns[0] == 'x'
  1065. tm.assert_frame_equal(df1, exp1)
  1066. df2 = DataFrame(s2, index=other_index)
  1067. exp2 = DataFrame(s2.reindex(other_index))
  1068. assert df2.columns[0] == 0
  1069. tm.assert_index_equal(df2.index, other_index)
  1070. tm.assert_frame_equal(df2, exp2)
  1071. def test_constructor_manager_resize(self):
  1072. index = list(self.frame.index[:5])
  1073. columns = list(self.frame.columns[:3])
  1074. result = DataFrame(self.frame._data, index=index,
  1075. columns=columns)
  1076. tm.assert_index_equal(result.index, Index(index))
  1077. tm.assert_index_equal(result.columns, Index(columns))
  1078. def test_constructor_from_items(self):
  1079. items = [(c, self.frame[c]) for c in self.frame.columns]
  1080. with tm.assert_produces_warning(FutureWarning,
  1081. check_stacklevel=False):
  1082. recons = DataFrame.from_items(items)
  1083. tm.assert_frame_equal(recons, self.frame)
  1084. # pass some columns
  1085. with tm.assert_produces_warning(FutureWarning,
  1086. check_stacklevel=False):
  1087. recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
  1088. tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']])
  1089. # orient='index'
  1090. row_items = [(idx, self.mixed_frame.xs(idx))
  1091. for idx in self.mixed_frame.index]
  1092. with tm.assert_produces_warning(FutureWarning,
  1093. check_stacklevel=False):
  1094. recons = DataFrame.from_items(row_items,
  1095. columns=self.mixed_frame.columns,
  1096. orient='index')
  1097. tm.assert_frame_equal(recons, self.mixed_frame)
  1098. assert recons['A'].dtype == np.float64
  1099. msg = "Must pass columns with orient='index'"
  1100. with pytest.raises(TypeError, match=msg):
  1101. with tm.assert_produces_warning(FutureWarning,
  1102. check_stacklevel=False):
  1103. DataFrame.from_items(row_items, orient='index')
  1104. # orient='index', but thar be tuples
  1105. arr = construct_1d_object_array_from_listlike(
  1106. [('bar', 'baz')] * len(self.mixed_frame))
  1107. self.mixed_frame['foo'] = arr
  1108. row_items = [(idx, list(self.mixed_frame.xs(idx)))
  1109. for idx in self.mixed_frame.index]
  1110. with tm.assert_produces_warning(FutureWarning,
  1111. check_stacklevel=False):
  1112. recons = DataFrame.from_items(row_items,
  1113. columns=self.mixed_frame.columns,
  1114. orient='index')
  1115. tm.assert_frame_equal(recons, self.mixed_frame)
  1116. assert isinstance(recons['foo'][0], tuple)
  1117. with tm.assert_produces_warning(FutureWarning,
  1118. check_stacklevel=False):
  1119. rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
  1120. orient='index',
  1121. columns=['one', 'two', 'three'])
  1122. xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'],
  1123. columns=['one', 'two', 'three'])
  1124. tm.assert_frame_equal(rs, xp)
  1125. def test_constructor_from_items_scalars(self):
  1126. # GH 17312
  1127. msg = (r'The value in each \(key, value\) '
  1128. 'pair must be an array, Series, or dict')
  1129. with pytest.raises(ValueError, match=msg):
  1130. with tm.assert_produces_warning(FutureWarning,
  1131. check_stacklevel=False):
  1132. DataFrame.from_items([('A', 1), ('B', 4)])
  1133. msg = (r'The value in each \(key, value\) '
  1134. 'pair must be an array, Series, or dict')
  1135. with pytest.raises(ValueError, match=msg):
  1136. with tm.assert_produces_warning(FutureWarning,
  1137. check_stacklevel=False):
  1138. DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'],
  1139. orient='index')
  1140. def test_from_items_deprecation(self):
  1141. # GH 17320
  1142. with tm.assert_produces_warning(FutureWarning,
  1143. check_stacklevel=False):
  1144. DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])
  1145. with tm.assert_produces_warning(FutureWarning,
  1146. check_stacklevel=False):
  1147. DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
  1148. columns=['col1', 'col2', 'col3'],
  1149. orient='index')
  1150. def test_constructor_mix_series_nonseries(self):
  1151. df = DataFrame({'A': self.frame['A'],
  1152. 'B': list(self.frame['B'])}, columns=['A', 'B'])
  1153. tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']])
  1154. msg = 'does not match index length'
  1155. with pytest.raises(ValueError, match=msg):
  1156. DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]})
  1157. def test_constructor_miscast_na_int_dtype(self):
  1158. df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
  1159. expected = DataFrame([[np.nan, 1], [1, 0]])
  1160. tm.assert_frame_equal(df, expected)
  1161. def test_constructor_column_duplicates(self):
  1162. # it works! #2079
  1163. df = DataFrame([[8, 5]], columns=['a', 'a'])
  1164. edf = DataFrame([[8, 5]])
  1165. edf.columns = ['a', 'a']
  1166. tm.assert_frame_equal(df, edf)
  1167. idf = DataFrame.from_records([(8, 5)],
  1168. columns=['a', 'a'])
  1169. tm.assert_frame_equal(idf, edf)
  1170. pytest.raises(ValueError, DataFrame.from_dict,
  1171. OrderedDict([('b', 8), ('a', 5), ('a', 6)]))
  1172. def test_constructor_empty_with_string_dtype(self):
  1173. # GH 9428
  1174. expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
  1175. df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
  1176. tm.assert_frame_equal(df, expected)
  1177. df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
  1178. tm.assert_frame_equal(df, expected)
  1179. df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.unicode_)
  1180. tm.assert_frame_equal(df, expected)
  1181. df = DataFrame(index=[0, 1], columns=[0, 1], dtype='U5')
  1182. tm.assert_frame_equal(df, expected)
  1183. def test_constructor_single_value(self):
  1184. # expecting single value upcasting here
  1185. df = DataFrame(0., index=[1, 2, 3], columns=['a', 'b', 'c'])
  1186. tm.assert_frame_equal(df,
  1187. DataFrame(np.zeros(df.shape).astype('float64'),
  1188. df.index, df.columns))
  1189. df = DataFrame(0, index=[1, 2, 3], columns=['a', 'b', 'c'])
  1190. tm.assert_frame_equal(df, DataFrame(np.zeros(df.shape).astype('int64'),
  1191. df.index, df.columns))
  1192. df = DataFrame('a', index=[1, 2], columns=['a', 'c'])
  1193. tm.assert_frame_equal(df, DataFrame(np.array([['a', 'a'], ['a', 'a']],
  1194. dtype=object),
  1195. index=[1, 2], columns=['a', 'c']))
  1196. pytest.raises(ValueError, DataFrame, 'a', [1, 2])
  1197. pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c'])
  1198. msg = 'incompatible data and dtype'
  1199. with pytest.raises(TypeError, match=msg):
  1200. DataFrame('a', [1, 2], ['a', 'c'], float)
  1201. def test_constructor_with_datetimes(self):
  1202. intname = np.dtype(np.int_).name
  1203. floatname = np.dtype(np.float_).name
  1204. datetime64name = np.dtype('M8[ns]').name
  1205. objectname = np.dtype(np.object_).name
  1206. # single item
  1207. df = DataFrame({'A': 1, 'B': 'foo', 'C': 'bar',
  1208. 'D': Timestamp("20010101"),
  1209. 'E': datetime(2001, 1, 2, 0, 0)},
  1210. index=np.arange(10))
  1211. result = df.get_dtype_counts()
  1212. expected = Series({'int64': 1, datetime64name: 2, objectname: 2})
  1213. result.sort_index()
  1214. expected.sort_index()
  1215. tm.assert_series_equal(result, expected)
  1216. # check with ndarray construction ndim==0 (e.g. we are passing a ndim 0
  1217. # ndarray with a dtype specified)
  1218. df = DataFrame({'a': 1., 'b': 2, 'c': 'foo',
  1219. floatname: np.array(1., dtype=floatname),
  1220. intname: np.array(1, dtype=intname)},
  1221. index=np.arange(10))
  1222. result = df.get_dtype_counts()
  1223. expected = {objectname: 1}
  1224. if intname == 'int64':
  1225. expected['int64'] = 2
  1226. else:
  1227. expected['int64'] = 1
  1228. expected[intname] = 1
  1229. if floatname == 'float64':
  1230. expected['float64'] = 2
  1231. else:
  1232. expected['float64'] = 1
  1233. expected[floatname] = 1
  1234. result = result.sort_index()
  1235. expected = Series(expected).sort_index()
  1236. tm.assert_series_equal(result, expected)
  1237. # check with ndarray construction ndim>0
  1238. df = DataFrame({'a': 1., 'b': 2, 'c': 'foo',
  1239. floatname: np.array([1.] * 10, dtype=floatname),
  1240. intname: np.array([1] * 10, dtype=intname)},
  1241. index=np.arange(10))
  1242. result = df.get_dtype_counts()
  1243. result = result.sort_index()
  1244. tm.assert_series_equal(result, expected)
  1245. # GH 2809
  1246. ind = date_range(start="2000-01-01", freq="D", periods=10)
  1247. datetimes = [ts.to_pydatetime() for ts in ind]
  1248. datetime_s = Series(datetimes)
  1249. assert datetime_s.dtype == 'M8[ns]'
  1250. df = DataFrame({'datetime_s': datetime_s})
  1251. result = df.get_dtype_counts()
  1252. expected = Series({datetime64name: 1})
  1253. result = result.sort_index()
  1254. expected = expected.sort_index()
  1255. tm.assert_series_equal(result, expected)
  1256. # GH 2810
  1257. ind = date_range(start="2000-01-01", freq="D", periods=10)
  1258. datetimes = [ts.to_pydatetime() for ts in ind]
  1259. dates = [ts.date() for ts in ind]
  1260. df = DataFrame({'datetimes': datetimes, 'dates': dates})
  1261. result = df.get_dtype_counts()
  1262. expected = Series({datetime64name: 1, objectname: 1})
  1263. result = result.sort_index()
  1264. expected = expected.sort_index()
  1265. tm.assert_series_equal(result, expected)
  1266. # GH 7594
  1267. # don't coerce tz-aware
  1268. import pytz
  1269. tz = pytz.timezone('US/Eastern')
  1270. dt = tz.localize(datetime(2012, 1, 1))
  1271. df = DataFrame({'End Date': dt}, index=[0])
  1272. assert df.iat[0, 0] == dt
  1273. tm.assert_series_equal(df.dtypes, Series(
  1274. {'End Date': 'datetime64[ns, US/Eastern]'}))
  1275. df = DataFrame([{'End Date': dt}])
  1276. assert df.iat[0, 0] == dt
  1277. tm.assert_series_equal(df.dtypes, Series(
  1278. {'End Date': 'datetime64[ns, US/Eastern]'}))
  1279. # tz-aware (UTC and other tz's)
  1280. # GH 8411
  1281. dr = date_range('20130101', periods=3)
  1282. df = DataFrame({'value': dr})
  1283. assert df.iat[0, 0].tz is None
  1284. dr = date_range('20130101', periods=3, tz='UTC')
  1285. df = DataFrame({'value': dr})
  1286. assert str(df.iat[0, 0].tz) == 'UTC'
  1287. dr = date_range('20130101', periods=3, tz='US/Eastern')
  1288. df = DataFrame({'value': dr})
  1289. assert str(df.iat[0, 0].tz) == 'US/Eastern'
  1290. # GH 7822
  1291. # preserver an index with a tz on dict construction
  1292. i = date_range('1/1/2011', periods=5, freq='10s', tz='US/Eastern')
  1293. expected = DataFrame(
  1294. {'a': i.to_series(keep_tz=True).reset_index(drop=True)})
  1295. df = DataFrame()
  1296. df['a'] = i
  1297. tm.assert_frame_equal(df, expected)
  1298. df = DataFrame({'a': i})
  1299. tm.assert_frame_equal(df, expected)
  1300. # multiples
  1301. i_no_tz = date_range('1/1/2011', periods=5, freq='10s')
  1302. df = DataFrame({'a': i, 'b': i_no_tz})
  1303. expected = DataFrame({'a': i.to_series(keep_tz=True)
  1304. .reset_index(drop=True), 'b': i_no_tz})
  1305. tm.assert_frame_equal(df, expected)
  1306. def test_constructor_datetimes_with_nulls(self):
  1307. # gh-15869
  1308. for arr in [np.array([None, None, None, None,
  1309. datetime.now(), None]),
  1310. np.array([None, None, datetime.now(), None])]:
  1311. result = DataFrame(arr).get_dtype_counts()
  1312. expected = Series({'datetime64[ns]': 1})
  1313. tm.assert_series_equal(result, expected)
  1314. def test_constructor_for_list_with_dtypes(self):
  1315. # TODO(wesm): unused
  1316. intname = np.dtype(np.int_).name # noqa
  1317. floatname = np.dtype(np.float_).name # noqa
  1318. datetime64name = np.dtype('M8[ns]').name
  1319. objectname = np.dtype(np.object_).name
  1320. # test list of lists/ndarrays
  1321. df = DataFrame([np.arange(5) for x in range(5)])
  1322. result = df.get_dtype_counts()
  1323. expected = Series({'int64': 5})
  1324. df = DataFrame([np.array(np.arange(5), dtype='int32')
  1325. for x in range(5)])
  1326. result = df.get_dtype_counts()
  1327. expected = Series({'int32': 5})
  1328. # overflow issue? (we always expecte int64 upcasting here)
  1329. df = DataFrame({'a': [2 ** 31, 2 ** 31 + 1]})
  1330. result = df.get_dtype_counts()
  1331. expected = Series({'int64': 1})
  1332. tm.assert_series_equal(result, expected)
  1333. # GH #2751 (construction with no index specified), make sure we cast to
  1334. # platform values
  1335. df = DataFrame([1, 2])
  1336. result = df.get_dtype_counts()
  1337. expected = Series({'int64': 1})
  1338. tm.assert_series_equal(result, expected)
  1339. df = DataFrame([1., 2.])
  1340. result = df.get_dtype_counts()
  1341. expected = Series({'float64': 1})
  1342. tm.assert_series_equal(result, expected)
  1343. df = DataFrame({'a': [1, 2]})
  1344. result = df.get_dtype_counts()
  1345. expected = Series({'int64': 1})
  1346. tm.assert_series_equal(result, expected)
  1347. df = DataFrame({'a': [1., 2.]})
  1348. result = df.get_dtype_counts()
  1349. expected = Series({'float64': 1})
  1350. tm.assert_series_equal(result, expected)
  1351. df = DataFrame({'a': 1}, index=lrange(3))
  1352. result = df.get_dtype_counts()
  1353. expected = Series({'int64': 1})
  1354. tm.assert_series_equal(result, expected)
  1355. df = DataFrame({'a': 1.}, index=lrange(3))
  1356. result = df.get_dtype_counts()
  1357. expected = Series({'float64': 1})
  1358. tm.assert_series_equal(result, expected)
  1359. # with object list
  1360. df = DataFrame({'a': [1, 2, 4, 7], 'b': [1.2, 2.3, 5.1, 6.3],
  1361. 'c': list('abcd'),
  1362. 'd': [datetime(2000, 1, 1) for i in range(4)],
  1363. 'e': [1., 2, 4., 7]})
  1364. result = df.get_dtype_counts()
  1365. expected = Series(
  1366. {'int64': 1, 'float64': 2, datetime64name: 1, objectname: 1})
  1367. result = result.sort_index()
  1368. expected = expected.sort_index()
  1369. tm.assert_series_equal(result, expected)
  1370. def test_constructor_frame_copy(self):
  1371. cop = DataFrame(self.frame, copy=True)
  1372. cop['A'] = 5
  1373. assert (cop['A'] == 5).all()
  1374. assert not (self.frame['A'] == 5).all()
  1375. def test_constructor_ndarray_copy(self):
  1376. df = DataFrame(self.frame.values)
  1377. self.frame.values[5] = 5
  1378. assert (df.values[5] == 5).all()
  1379. df = DataFrame(self.frame.values, copy=True)
  1380. self.frame.values[6] = 6
  1381. assert not (df.values[6] == 6).all()
  1382. def test_constructor_series_copy(self):
  1383. series = self.frame._series
  1384. df = DataFrame({'A': series['A']})
  1385. df['A'][:] = 5
  1386. assert not (series['A'] == 5).all()
  1387. def test_constructor_with_nas(self):
  1388. # GH 5016
  1389. # na's in indices
  1390. def check(df):
  1391. for i in range(len(df.columns)):
  1392. df.iloc[:, i]
  1393. indexer = np.arange(len(df.columns))[isna(df.columns)]
  1394. # No NaN found -> error
  1395. if len(indexer) == 0:
  1396. def f():
  1397. df.loc[:, np.nan]
  1398. pytest.raises(TypeError, f)
  1399. # single nan should result in Series
  1400. elif len(indexer) == 1:
  1401. tm.assert_series_equal(df.iloc[:, indexer[0]],
  1402. df.loc[:, np.nan])
  1403. # multiple nans should result in DataFrame
  1404. else:
  1405. tm.assert_frame_equal(df.iloc[:, indexer],
  1406. df.loc[:, np.nan])
  1407. df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
  1408. check(df)
  1409. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan])
  1410. check(df)
  1411. df = DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]],
  1412. columns=[np.nan, 1.1, 2.2, np.nan])
  1413. check(df)
  1414. df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]],
  1415. columns=[np.nan, 1.1, 2.2, np.nan])
  1416. check(df)
  1417. # GH 21428 (non-unique columns)
  1418. df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]],
  1419. columns=[np.nan, 1, 2, 2])
  1420. check(df)
  1421. def test_constructor_lists_to_object_dtype(self):
  1422. # from #1074
  1423. d = DataFrame({'a': [np.nan, False]})
  1424. assert d['a'].dtype == np.object_
  1425. assert not d['a'][1]
  1426. def test_constructor_categorical(self):
  1427. # GH8626
  1428. # dict creation
  1429. df = DataFrame({'A': list('abc')}, dtype='category')
  1430. expected = Series(list('abc'), dtype='category', name='A')
  1431. tm.assert_series_equal(df['A'], expected)
  1432. # to_frame
  1433. s = Series(list('abc'), dtype='category')
  1434. result = s.to_frame()
  1435. expected = Series(list('abc'), dtype='category', name=0)
  1436. tm.assert_series_equal(result[0], expected)
  1437. result = s.to_frame(name='foo')
  1438. expected = Series(list('abc'), dtype='category', name='foo')
  1439. tm.assert_series_equal(result['foo'], expected)
  1440. # list-like creation
  1441. df = DataFrame(list('abc'), dtype='category')
  1442. expected = Series(list('abc'), dtype='category', name=0)
  1443. tm.assert_series_equal(df[0], expected)
  1444. # ndim != 1
  1445. df = DataFrame([Categorical(list('abc'))])
  1446. expected = DataFrame({0: Series(list('abc'), dtype='category')})
  1447. tm.assert_frame_equal(df, expected)
  1448. df = DataFrame([Categorical(list('abc')), Categorical(list('abd'))])
  1449. expected = DataFrame({0: Series(list('abc'), dtype='category'),
  1450. 1: Series(list('abd'), dtype='category')},
  1451. columns=[0, 1])
  1452. tm.assert_frame_equal(df, expected)
  1453. # mixed
  1454. df = DataFrame([Categorical(list('abc')), list('def')])
  1455. expected = DataFrame({0: Series(list('abc'), dtype='category'),
  1456. 1: list('def')}, columns=[0, 1])
  1457. tm.assert_frame_equal(df, expected)
  1458. # invalid (shape)
  1459. pytest.raises(ValueError,
  1460. lambda: DataFrame([Categorical(list('abc')),
  1461. Categorical(list('abdefg'))]))
  1462. # ndim > 1
  1463. pytest.raises(NotImplementedError,
  1464. lambda: Categorical(np.array([list('abcd')])))
  1465. def test_constructor_categorical_series(self):
  1466. items = [1, 2, 3, 1]
  1467. exp = Series(items).astype('category')
  1468. res = Series(items, dtype='category')
  1469. tm.assert_series_equal(res, exp)
  1470. items = ["a", "b", "c", "a"]
  1471. exp = Series(items).astype('category')
  1472. res = Series(items, dtype='category')
  1473. tm.assert_series_equal(res, exp)
  1474. # insert into frame with different index
  1475. # GH 8076
  1476. index = date_range('20000101', periods=3)
  1477. expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
  1478. categories=['a', 'b', 'c']))
  1479. expected.index = index
  1480. expected = DataFrame({'x': expected})
  1481. df = DataFrame(
  1482. {'x': Series(['a', 'b', 'c'], dtype='category')}, index=index)
  1483. tm.assert_frame_equal(df, expected)
  1484. def test_from_records_to_records(self):
  1485. # from numpy documentation
  1486. arr = np.zeros((2,), dtype=('i4,f4,a10'))
  1487. arr[:] = [(1, 2., 'Hello'), (2, 3., "World")]
  1488. # TODO(wesm): unused
  1489. frame = DataFrame.from_records(arr) # noqa
  1490. index = pd.Index(np.arange(len(arr))[::-1])
  1491. indexed_frame = DataFrame.from_records(arr, index=index)
  1492. tm.assert_index_equal(indexed_frame.index, index)
  1493. # without names, it should go to last ditch
  1494. arr2 = np.zeros((2, 3))
  1495. tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
  1496. # wrong length
  1497. msg = r'Shape of passed values is \(2, 3\), indices imply \(1, 3\)'
  1498. with pytest.raises(ValueError, match=msg):
  1499. DataFrame.from_records(arr, index=index[:-1])
  1500. indexed_frame = DataFrame.from_records(arr, index='f1')
  1501. # what to do?
  1502. records = indexed_frame.to_records()
  1503. assert len(records.dtype.names) == 3
  1504. records = indexed_frame.to_records(index=False)
  1505. assert len(records.dtype.names) == 2
  1506. assert 'index' not in records.dtype.names
  1507. def test_from_records_nones(self):
  1508. tuples = [(1, 2, None, 3),
  1509. (1, 2, None, 3),
  1510. (None, 2, 5, 3)]
  1511. df = DataFrame.from_records(tuples, columns=['a', 'b', 'c', 'd'])
  1512. assert np.isnan(df['c'][0])
  1513. def test_from_records_iterator(self):
  1514. arr = np.array([(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5., 5., 6, 6),
  1515. (7., 7., 8, 8)],
  1516. dtype=[('x', np.float64), ('u', np.float32),
  1517. ('y', np.int64), ('z', np.int32)])
  1518. df = DataFrame.from_records(iter(arr), nrows=2)
  1519. xp = DataFrame({'x': np.array([1.0, 3.0], dtype=np.float64),
  1520. 'u': np.array([1.0, 3.0], dtype=np.float32),
  1521. 'y': np.array([2, 4], dtype=np.int64),
  1522. 'z': np.array([2, 4], dtype=np.int32)})
  1523. tm.assert_frame_equal(df.reindex_like(xp), xp)
  1524. # no dtypes specified here, so just compare with the default
  1525. arr = [(1.0, 2), (3.0, 4), (5., 6), (7., 8)]
  1526. df = DataFrame.from_records(iter(arr), columns=['x', 'y'],
  1527. nrows=2)
  1528. tm.assert_frame_equal(df, xp.reindex(columns=['x', 'y']),
  1529. check_dtype=False)
  1530. def test_from_records_tuples_generator(self):
  1531. def tuple_generator(length):
  1532. for i in range(length):
  1533. letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  1534. yield (i, letters[i % len(letters)], i / length)
  1535. columns_names = ['Integer', 'String', 'Float']
  1536. columns = [[i[j] for i in tuple_generator(
  1537. 10)] for j in range(len(columns_names))]
  1538. data = {'Integer': columns[0],
  1539. 'String': columns[1], 'Float': columns[2]}
  1540. expected = DataFrame(data, columns=columns_names)
  1541. generator = tuple_generator(10)
  1542. result = DataFrame.from_records(generator, columns=columns_names)
  1543. tm.assert_frame_equal(result, expected)
  1544. def test_from_records_lists_generator(self):
  1545. def list_generator(length):
  1546. for i in range(length):
  1547. letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  1548. yield [i, letters[i % len(letters)], i / length]
  1549. columns_names = ['Integer', 'String', 'Float']
  1550. columns = [[i[j] for i in list_generator(
  1551. 10)] for j in range(len(columns_names))]
  1552. data = {'Integer': columns[0],
  1553. 'String': columns[1], 'Float': columns[2]}
  1554. expected = DataFrame(data, columns=columns_names)
  1555. generator = list_generator(10)
  1556. result = DataFrame.from_records(generator, columns=columns_names)
  1557. tm.assert_frame_equal(result, expected)
  1558. def test_from_records_columns_not_modified(self):
  1559. tuples = [(1, 2, 3),
  1560. (1, 2, 3),
  1561. (2, 5, 3)]
  1562. columns = ['a', 'b', 'c']
  1563. original_columns = list(columns)
  1564. df = DataFrame.from_records(tuples, columns=columns, index='a') # noqa
  1565. assert columns == original_columns
  1566. def test_from_records_decimal(self):
  1567. from decimal import Decimal
  1568. tuples = [(Decimal('1.5'),), (Decimal('2.5'),), (None,)]
  1569. df = DataFrame.from_records(tuples, columns=['a'])
  1570. assert df['a'].dtype == object
  1571. df = DataFrame.from_records(tuples, columns=['a'], coerce_float=True)
  1572. assert df['a'].dtype == np.float64
  1573. assert np.isnan(df['a'].values[-1])
  1574. def test_from_records_duplicates(self):
  1575. result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)],
  1576. columns=['a', 'b', 'a'])
  1577. expected = DataFrame([(1, 2, 3), (4, 5, 6)],
  1578. columns=['a', 'b', 'a'])
  1579. tm.assert_frame_equal(result, expected)
  1580. def test_from_records_set_index_name(self):
  1581. def create_dict(order_id):
  1582. return {'order_id': order_id, 'quantity': np.random.randint(1, 10),
  1583. 'price': np.random.randint(1, 10)}
  1584. documents = [create_dict(i) for i in range(10)]
  1585. # demo missing data
  1586. documents.append({'order_id': 10, 'quantity': 5})
  1587. result = DataFrame.from_records(documents, index='order_id')
  1588. assert result.index.name == 'order_id'
  1589. # MultiIndex
  1590. result = DataFrame.from_records(documents,
  1591. index=['order_id', 'quantity'])
  1592. assert result.index.names == ('order_id', 'quantity')
  1593. def test_from_records_misc_brokenness(self):
  1594. # #2179
  1595. data = {1: ['foo'], 2: ['bar']}
  1596. result = DataFrame.from_records(data, columns=['a', 'b'])
  1597. exp = DataFrame(data, columns=['a', 'b'])
  1598. tm.assert_frame_equal(result, exp)
  1599. # overlap in index/index_names
  1600. data = {'a': [1, 2, 3], 'b': [4, 5, 6]}
  1601. result = DataFrame.from_records(data, index=['a', 'b', 'c'])
  1602. exp = DataFrame(data, index=['a', 'b', 'c'])
  1603. tm.assert_frame_equal(result, exp)
  1604. # GH 2623
  1605. rows = []
  1606. rows.append([datetime(2010, 1, 1), 1])
  1607. rows.append([datetime(2010, 1, 2), 'hi']) # test col upconverts to obj
  1608. df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])
  1609. results = df2_obj.get_dtype_counts()
  1610. expected = Series({'datetime64[ns]': 1, 'object': 1})
  1611. rows = []
  1612. rows.append([datetime(2010, 1, 1), 1])
  1613. rows.append([datetime(2010, 1, 2), 1])
  1614. df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])
  1615. results = df2_obj.get_dtype_counts().sort_index()
  1616. expected = Series({'datetime64[ns]': 1, 'int64': 1})
  1617. tm.assert_series_equal(results, expected)
  1618. def test_from_records_empty(self):
  1619. # 3562
  1620. result = DataFrame.from_records([], columns=['a', 'b', 'c'])
  1621. expected = DataFrame(columns=['a', 'b', 'c'])
  1622. tm.assert_frame_equal(result, expected)
  1623. result = DataFrame.from_records([], columns=['a', 'b', 'b'])
  1624. expected = DataFrame(columns=['a', 'b', 'b'])
  1625. tm.assert_frame_equal(result, expected)
  1626. def test_from_records_empty_with_nonempty_fields_gh3682(self):
  1627. a = np.array([(1, 2)], dtype=[('id', np.int64), ('value', np.int64)])
  1628. df = DataFrame.from_records(a, index='id')
  1629. tm.assert_index_equal(df.index, Index([1], name='id'))
  1630. assert df.index.name == 'id'
  1631. tm.assert_index_equal(df.columns, Index(['value']))
  1632. b = np.array([], dtype=[('id', np.int64), ('value', np.int64)])
  1633. df = DataFrame.from_records(b, index='id')
  1634. tm.assert_index_equal(df.index, Index([], name='id'))
  1635. assert df.index.name == 'id'
  1636. def test_from_records_with_datetimes(self):
  1637. # this may fail on certain platforms because of a numpy issue
  1638. # related GH6140
  1639. if not is_platform_little_endian():
  1640. pytest.skip("known failure of test on non-little endian")
  1641. # construction with a null in a recarray
  1642. # GH 6140
  1643. expected = DataFrame({'EXPIRY': [datetime(2005, 3, 1, 0, 0), None]})
  1644. arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
  1645. dtypes = [('EXPIRY', '<M8[ns]')]
  1646. try:
  1647. recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
  1648. except (ValueError):
  1649. pytest.skip("known failure of numpy rec array creation")
  1650. result = DataFrame.from_records(recarray)
  1651. tm.assert_frame_equal(result, expected)
  1652. # coercion should work too
  1653. arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
  1654. dtypes = [('EXPIRY', '<M8[m]')]
  1655. recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
  1656. result = DataFrame.from_records(recarray)
  1657. tm.assert_frame_equal(result, expected)
  1658. def test_from_records_sequencelike(self):
  1659. df = DataFrame({'A': np.array(np.random.randn(6), dtype=np.float64),
  1660. 'A1': np.array(np.random.randn(6), dtype=np.float64),
  1661. 'B': np.array(np.arange(6), dtype=np.int64),
  1662. 'C': ['foo'] * 6,
  1663. 'D': np.array([True, False] * 3, dtype=bool),
  1664. 'E': np.array(np.random.randn(6), dtype=np.float32),
  1665. 'E1': np.array(np.random.randn(6), dtype=np.float32),
  1666. 'F': np.array(np.arange(6), dtype=np.int32)})
  1667. # this is actually tricky to create the recordlike arrays and
  1668. # have the dtypes be intact
  1669. blocks = df._to_dict_of_blocks()
  1670. tuples = []
  1671. columns = []
  1672. dtypes = []
  1673. for dtype, b in compat.iteritems(blocks):
  1674. columns.extend(b.columns)
  1675. dtypes.extend([(c, np.dtype(dtype).descr[0][1])
  1676. for c in b.columns])
  1677. for i in range(len(df.index)):
  1678. tup = []
  1679. for _, b in compat.iteritems(blocks):
  1680. tup.extend(b.iloc[i].values)
  1681. tuples.append(tuple(tup))
  1682. recarray = np.array(tuples, dtype=dtypes).view(np.recarray)
  1683. recarray2 = df.to_records()
  1684. lists = [list(x) for x in tuples]
  1685. # tuples (lose the dtype info)
  1686. result = (DataFrame.from_records(tuples, columns=columns)
  1687. .reindex(columns=df.columns))
  1688. # created recarray and with to_records recarray (have dtype info)
  1689. result2 = (DataFrame.from_records(recarray, columns=columns)
  1690. .reindex(columns=df.columns))
  1691. result3 = (DataFrame.from_records(recarray2, columns=columns)
  1692. .reindex(columns=df.columns))
  1693. # list of tupels (no dtype info)
  1694. result4 = (DataFrame.from_records(lists, columns=columns)
  1695. .reindex(columns=df.columns))
  1696. tm.assert_frame_equal(result, df, check_dtype=False)
  1697. tm.assert_frame_equal(result2, df)
  1698. tm.assert_frame_equal(result3, df)
  1699. tm.assert_frame_equal(result4, df, check_dtype=False)
  1700. # tuples is in the order of the columns
  1701. result = DataFrame.from_records(tuples)
  1702. tm.assert_index_equal(result.columns, pd.Index(lrange(8)))
  1703. # test exclude parameter & we are casting the results here (as we don't
  1704. # have dtype info to recover)
  1705. columns_to_test = [columns.index('C'), columns.index('E1')]
  1706. exclude = list(set(range(8)) - set(columns_to_test))
  1707. result = DataFrame.from_records(tuples, exclude=exclude)
  1708. result.columns = [columns[i] for i in sorted(columns_to_test)]
  1709. tm.assert_series_equal(result['C'], df['C'])
  1710. tm.assert_series_equal(result['E1'], df['E1'].astype('float64'))
  1711. # empty case
  1712. result = DataFrame.from_records([], columns=['foo', 'bar', 'baz'])
  1713. assert len(result) == 0
  1714. tm.assert_index_equal(result.columns,
  1715. pd.Index(['foo', 'bar', 'baz']))
  1716. result = DataFrame.from_records([])
  1717. assert len(result) == 0
  1718. assert len(result.columns) == 0
  1719. def test_from_records_dictlike(self):
  1720. # test the dict methods
  1721. df = DataFrame({'A': np.array(np.random.randn(6), dtype=np.float64),
  1722. 'A1': np.array(np.random.randn(6), dtype=np.float64),
  1723. 'B': np.array(np.arange(6), dtype=np.int64),
  1724. 'C': ['foo'] * 6,
  1725. 'D': np.array([True, False] * 3, dtype=bool),
  1726. 'E': np.array(np.random.randn(6), dtype=np.float32),
  1727. 'E1': np.array(np.random.randn(6), dtype=np.float32),
  1728. 'F': np.array(np.arange(6), dtype=np.int32)})
  1729. # columns is in a different order here than the actual items iterated
  1730. # from the dict
  1731. blocks = df._to_dict_of_blocks()
  1732. columns = []
  1733. for dtype, b in compat.iteritems(blocks):
  1734. columns.extend(b.columns)
  1735. asdict = {x: y for x, y in compat.iteritems(df)}
  1736. asdict2 = {x: y.values for x, y in compat.iteritems(df)}
  1737. # dict of series & dict of ndarrays (have dtype info)
  1738. results = []
  1739. results.append(DataFrame.from_records(
  1740. asdict).reindex(columns=df.columns))
  1741. results.append(DataFrame.from_records(asdict, columns=columns)
  1742. .reindex(columns=df.columns))
  1743. results.append(DataFrame.from_records(asdict2, columns=columns)
  1744. .reindex(columns=df.columns))
  1745. for r in results:
  1746. tm.assert_frame_equal(r, df)
  1747. def test_from_records_with_index_data(self):
  1748. df = DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  1749. data = np.random.randn(10)
  1750. df1 = DataFrame.from_records(df, index=data)
  1751. tm.assert_index_equal(df1.index, Index(data))
  1752. def test_from_records_bad_index_column(self):
  1753. df = DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
  1754. # should pass
  1755. df1 = DataFrame.from_records(df, index=['C'])
  1756. tm.assert_index_equal(df1.index, Index(df.C))
  1757. df1 = DataFrame.from_records(df, index='C')
  1758. tm.assert_index_equal(df1.index, Index(df.C))
  1759. # should fail
  1760. pytest.raises(ValueError, DataFrame.from_records, df, index=[2])
  1761. pytest.raises(KeyError, DataFrame.from_records, df, index=2)
  1762. def test_from_records_non_tuple(self):
  1763. class Record(object):
  1764. def __init__(self, *args):
  1765. self.args = args
  1766. def __getitem__(self, i):
  1767. return self.args[i]
  1768. def __iter__(self):
  1769. return iter(self.args)
  1770. recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]
  1771. tups = lmap(tuple, recs)
  1772. result = DataFrame.from_records(recs)
  1773. expected = DataFrame.from_records(tups)
  1774. tm.assert_frame_equal(result, expected)
  1775. def test_from_records_len0_with_columns(self):
  1776. # #2633
  1777. result = DataFrame.from_records([], index='foo',
  1778. columns=['foo', 'bar'])
  1779. expected = Index(['bar'])
  1780. assert len(result) == 0
  1781. assert result.index.name == 'foo'
  1782. tm.assert_index_equal(result.columns, expected)
  1783. def test_to_frame_with_falsey_names(self):
  1784. # GH 16114
  1785. result = Series(name=0).to_frame().dtypes
  1786. expected = Series({0: np.float64})
  1787. tm.assert_series_equal(result, expected)
  1788. result = DataFrame(Series(name=0)).dtypes
  1789. tm.assert_series_equal(result, expected)
  1790. @pytest.mark.parametrize('dtype', [None, 'uint8', 'category'])
  1791. def test_constructor_range_dtype(self, dtype):
  1792. # GH 16804
  1793. expected = DataFrame({'A': [0, 1, 2, 3, 4]}, dtype=dtype or 'int64')
  1794. result = DataFrame({'A': range(5)}, dtype=dtype)
  1795. tm.assert_frame_equal(result, expected)
  1796. def test_frame_from_list_subclass(self):
  1797. # GH21226
  1798. class List(list):
  1799. pass
  1800. expected = DataFrame([[1, 2, 3], [4, 5, 6]])
  1801. result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])]))
  1802. tm.assert_frame_equal(result, expected)
  1803. class TestDataFrameConstructorWithDatetimeTZ(TestData):
  1804. def test_from_dict(self):
  1805. # 8260
  1806. # support datetime64 with tz
  1807. idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
  1808. name='foo')
  1809. dr = date_range('20130110', periods=3)
  1810. # construction
  1811. df = DataFrame({'A': idx, 'B': dr})
  1812. assert df['A'].dtype, 'M8[ns, US/Eastern'
  1813. assert df['A'].name == 'A'
  1814. tm.assert_series_equal(df['A'], Series(idx, name='A'))
  1815. tm.assert_series_equal(df['B'], Series(dr, name='B'))
  1816. def test_from_index(self):
  1817. # from index
  1818. idx2 = date_range('20130101', periods=3, tz='US/Eastern', name='foo')
  1819. df2 = DataFrame(idx2)
  1820. tm.assert_series_equal(df2['foo'], Series(idx2, name='foo'))
  1821. df2 = DataFrame(Series(idx2))
  1822. tm.assert_series_equal(df2['foo'], Series(idx2, name='foo'))
  1823. idx2 = date_range('20130101', periods=3, tz='US/Eastern')
  1824. df2 = DataFrame(idx2)
  1825. tm.assert_series_equal(df2[0], Series(idx2, name=0))
  1826. df2 = DataFrame(Series(idx2))
  1827. tm.assert_series_equal(df2[0], Series(idx2, name=0))
  1828. def test_frame_dict_constructor_datetime64_1680(self):
  1829. dr = date_range('1/1/2012', periods=10)
  1830. s = Series(dr, index=dr)
  1831. # it works!
  1832. DataFrame({'a': 'foo', 'b': s}, index=dr)
  1833. DataFrame({'a': 'foo', 'b': s.values}, index=dr)
  1834. def test_frame_datetime64_mixed_index_ctor_1681(self):
  1835. dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
  1836. ts = Series(dr)
  1837. # it works!
  1838. d = DataFrame({'A': 'foo', 'B': ts}, index=dr)
  1839. assert d['B'].isna().all()
  1840. def test_frame_timeseries_to_records(self):
  1841. index = date_range('1/1/2000', periods=10)
  1842. df = DataFrame(np.random.randn(10, 3), index=index,
  1843. columns=['a', 'b', 'c'])
  1844. result = df.to_records()
  1845. result['index'].dtype == 'M8[ns]'
  1846. result = df.to_records(index=False)
  1847. def test_frame_timeseries_column(self):
  1848. # GH19157
  1849. dr = date_range(start='20130101T10:00:00', periods=3, freq='T',
  1850. tz='US/Eastern')
  1851. result = DataFrame(dr, columns=['timestamps'])
  1852. expected = DataFrame({'timestamps': [
  1853. Timestamp('20130101T10:00:00', tz='US/Eastern'),
  1854. Timestamp('20130101T10:01:00', tz='US/Eastern'),
  1855. Timestamp('20130101T10:02:00', tz='US/Eastern')]})
  1856. tm.assert_frame_equal(result, expected)
  1857. def test_nested_dict_construction(self):
  1858. # GH22227
  1859. columns = ['Nevada', 'Ohio']
  1860. pop = {'Nevada': {2001: 2.4, 2002: 2.9},
  1861. 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
  1862. result = pd.DataFrame(pop, index=[2001, 2002, 2003], columns=columns)
  1863. expected = pd.DataFrame(
  1864. [(2.4, 1.7), (2.9, 3.6), (np.nan, np.nan)],
  1865. columns=columns,
  1866. index=pd.Index([2001, 2002, 2003])
  1867. )
  1868. tm.assert_frame_equal(result, expected)