test_timeseries.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime, time
  4. import numpy as np
  5. import pytest
  6. from pandas.compat import product
  7. import pandas as pd
  8. from pandas import (
  9. DataFrame, DatetimeIndex, Index, MultiIndex, Series, Timestamp, date_range,
  10. period_range, to_datetime)
  11. from pandas.tests.frame.common import TestData
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import (
  14. assert_frame_equal, assert_index_equal, assert_series_equal)
  15. import pandas.tseries.offsets as offsets
  16. @pytest.fixture(params=product([True, False], [True, False]))
  17. def close_open_fixture(request):
  18. return request.param
  19. class TestDataFrameTimeSeriesMethods(TestData):
  20. def test_diff(self):
  21. the_diff = self.tsframe.diff(1)
  22. assert_series_equal(the_diff['A'],
  23. self.tsframe['A'] - self.tsframe['A'].shift(1))
  24. # int dtype
  25. a = 10000000000000000
  26. b = a + 1
  27. s = Series([a, b])
  28. rs = DataFrame({'s': s}).diff()
  29. assert rs.s[1] == 1
  30. # mixed numeric
  31. tf = self.tsframe.astype('float32')
  32. the_diff = tf.diff(1)
  33. assert_series_equal(the_diff['A'],
  34. tf['A'] - tf['A'].shift(1))
  35. # issue 10907
  36. df = pd.DataFrame({'y': pd.Series([2]), 'z': pd.Series([3])})
  37. df.insert(0, 'x', 1)
  38. result = df.diff(axis=1)
  39. expected = pd.DataFrame({'x': np.nan, 'y': pd.Series(
  40. 1), 'z': pd.Series(1)}).astype('float64')
  41. assert_frame_equal(result, expected)
  42. @pytest.mark.parametrize('tz', [None, 'UTC'])
  43. def test_diff_datetime_axis0(self, tz):
  44. # GH 18578
  45. df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
  46. 1: date_range('2010', freq='D', periods=2, tz=tz)})
  47. result = df.diff(axis=0)
  48. expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
  49. 1: pd.TimedeltaIndex(['NaT', '1 days'])})
  50. assert_frame_equal(result, expected)
  51. @pytest.mark.parametrize('tz', [None, 'UTC'])
  52. def test_diff_datetime_axis1(self, tz):
  53. # GH 18578
  54. df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
  55. 1: date_range('2010', freq='D', periods=2, tz=tz)})
  56. if tz is None:
  57. result = df.diff(axis=1)
  58. expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
  59. 1: pd.TimedeltaIndex(['0 days',
  60. '0 days'])})
  61. assert_frame_equal(result, expected)
  62. else:
  63. with pytest.raises(NotImplementedError):
  64. result = df.diff(axis=1)
  65. def test_diff_timedelta(self):
  66. # GH 4533
  67. df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
  68. Timestamp('20130101 9:02')],
  69. value=[1.0, 2.0]))
  70. res = df.diff()
  71. exp = DataFrame([[pd.NaT, np.nan],
  72. [pd.Timedelta('00:01:00'), 1]],
  73. columns=['time', 'value'])
  74. assert_frame_equal(res, exp)
  75. def test_diff_mixed_dtype(self):
  76. df = DataFrame(np.random.randn(5, 3))
  77. df['A'] = np.array([1, 2, 3, 4, 5], dtype=object)
  78. result = df.diff()
  79. assert result[0].dtype == np.float64
  80. def test_diff_neg_n(self):
  81. rs = self.tsframe.diff(-1)
  82. xp = self.tsframe - self.tsframe.shift(-1)
  83. assert_frame_equal(rs, xp)
  84. def test_diff_float_n(self):
  85. rs = self.tsframe.diff(1.)
  86. xp = self.tsframe.diff(1)
  87. assert_frame_equal(rs, xp)
  88. def test_diff_axis(self):
  89. # GH 9727
  90. df = DataFrame([[1., 2.], [3., 4.]])
  91. assert_frame_equal(df.diff(axis=1), DataFrame(
  92. [[np.nan, 1.], [np.nan, 1.]]))
  93. assert_frame_equal(df.diff(axis=0), DataFrame(
  94. [[np.nan, np.nan], [2., 2.]]))
  95. def test_pct_change(self):
  96. rs = self.tsframe.pct_change(fill_method=None)
  97. assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)
  98. rs = self.tsframe.pct_change(2)
  99. filled = self.tsframe.fillna(method='pad')
  100. assert_frame_equal(rs, filled / filled.shift(2) - 1)
  101. rs = self.tsframe.pct_change(fill_method='bfill', limit=1)
  102. filled = self.tsframe.fillna(method='bfill', limit=1)
  103. assert_frame_equal(rs, filled / filled.shift(1) - 1)
  104. rs = self.tsframe.pct_change(freq='5D')
  105. filled = self.tsframe.fillna(method='pad')
  106. assert_frame_equal(rs,
  107. (filled / filled.shift(freq='5D') - 1)
  108. .reindex_like(filled))
  109. def test_pct_change_shift_over_nas(self):
  110. s = Series([1., 1.5, np.nan, 2.5, 3.])
  111. df = DataFrame({'a': s, 'b': s})
  112. chg = df.pct_change()
  113. expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2])
  114. edf = DataFrame({'a': expected, 'b': expected})
  115. assert_frame_equal(chg, edf)
  116. @pytest.mark.parametrize("freq, periods, fill_method, limit",
  117. [('5B', 5, None, None),
  118. ('3B', 3, None, None),
  119. ('3B', 3, 'bfill', None),
  120. ('7B', 7, 'pad', 1),
  121. ('7B', 7, 'bfill', 3),
  122. ('14B', 14, None, None)])
  123. def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
  124. # GH 7292
  125. rs_freq = self.tsframe.pct_change(freq=freq,
  126. fill_method=fill_method,
  127. limit=limit)
  128. rs_periods = self.tsframe.pct_change(periods,
  129. fill_method=fill_method,
  130. limit=limit)
  131. assert_frame_equal(rs_freq, rs_periods)
  132. empty_ts = DataFrame(index=self.tsframe.index,
  133. columns=self.tsframe.columns)
  134. rs_freq = empty_ts.pct_change(freq=freq,
  135. fill_method=fill_method,
  136. limit=limit)
  137. rs_periods = empty_ts.pct_change(periods,
  138. fill_method=fill_method,
  139. limit=limit)
  140. assert_frame_equal(rs_freq, rs_periods)
  141. def test_frame_ctor_datetime64_column(self):
  142. rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
  143. dates = np.asarray(rng)
  144. df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates})
  145. assert np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))
  146. def test_frame_append_datetime64_column(self):
  147. rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
  148. df = DataFrame(index=np.arange(len(rng)))
  149. df['A'] = rng
  150. assert np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))
  151. def test_frame_datetime64_pre1900_repr(self):
  152. df = DataFrame({'year': date_range('1/1/1700', periods=50,
  153. freq='A-DEC')})
  154. # it works!
  155. repr(df)
  156. def test_frame_append_datetime64_col_other_units(self):
  157. n = 100
  158. units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y']
  159. ns_dtype = np.dtype('M8[ns]')
  160. for unit in units:
  161. dtype = np.dtype('M8[%s]' % unit)
  162. vals = np.arange(n, dtype=np.int64).view(dtype)
  163. df = DataFrame({'ints': np.arange(n)}, index=np.arange(n))
  164. df[unit] = vals
  165. ex_vals = to_datetime(vals.astype('O')).values
  166. assert df[unit].dtype == ns_dtype
  167. assert (df[unit].values == ex_vals).all()
  168. # Test insertion into existing datetime64 column
  169. df = DataFrame({'ints': np.arange(n)}, index=np.arange(n))
  170. df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype)
  171. for unit in units:
  172. dtype = np.dtype('M8[%s]' % unit)
  173. vals = np.arange(n, dtype=np.int64).view(dtype)
  174. tmp = df.copy()
  175. tmp['dates'] = vals
  176. ex_vals = to_datetime(vals.astype('O')).values
  177. assert (tmp['dates'].values == ex_vals).all()
  178. def test_shift(self):
  179. # naive shift
  180. shiftedFrame = self.tsframe.shift(5)
  181. tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
  182. shiftedSeries = self.tsframe['A'].shift(5)
  183. assert_series_equal(shiftedFrame['A'], shiftedSeries)
  184. shiftedFrame = self.tsframe.shift(-5)
  185. tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
  186. shiftedSeries = self.tsframe['A'].shift(-5)
  187. assert_series_equal(shiftedFrame['A'], shiftedSeries)
  188. # shift by 0
  189. unshifted = self.tsframe.shift(0)
  190. assert_frame_equal(unshifted, self.tsframe)
  191. # shift by DateOffset
  192. shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay())
  193. assert len(shiftedFrame) == len(self.tsframe)
  194. shiftedFrame2 = self.tsframe.shift(5, freq='B')
  195. assert_frame_equal(shiftedFrame, shiftedFrame2)
  196. d = self.tsframe.index[0]
  197. shifted_d = d + offsets.BDay(5)
  198. assert_series_equal(self.tsframe.xs(d),
  199. shiftedFrame.xs(shifted_d), check_names=False)
  200. # shift int frame
  201. int_shifted = self.intframe.shift(1) # noqa
  202. # Shifting with PeriodIndex
  203. ps = tm.makePeriodFrame()
  204. shifted = ps.shift(1)
  205. unshifted = shifted.shift(-1)
  206. tm.assert_index_equal(shifted.index, ps.index)
  207. tm.assert_index_equal(unshifted.index, ps.index)
  208. tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values,
  209. ps.iloc[:-1, 0].values)
  210. shifted2 = ps.shift(1, 'B')
  211. shifted3 = ps.shift(1, offsets.BDay())
  212. assert_frame_equal(shifted2, shifted3)
  213. assert_frame_equal(ps, shifted2.shift(-1, 'B'))
  214. msg = 'does not match PeriodIndex freq'
  215. with pytest.raises(ValueError, match=msg):
  216. ps.shift(freq='D')
  217. # shift other axis
  218. # GH 6371
  219. df = DataFrame(np.random.rand(10, 5))
  220. expected = pd.concat([DataFrame(np.nan, index=df.index,
  221. columns=[0]),
  222. df.iloc[:, 0:-1]],
  223. ignore_index=True, axis=1)
  224. result = df.shift(1, axis=1)
  225. assert_frame_equal(result, expected)
  226. # shift named axis
  227. df = DataFrame(np.random.rand(10, 5))
  228. expected = pd.concat([DataFrame(np.nan, index=df.index,
  229. columns=[0]),
  230. df.iloc[:, 0:-1]],
  231. ignore_index=True, axis=1)
  232. result = df.shift(1, axis='columns')
  233. assert_frame_equal(result, expected)
  234. def test_shift_bool(self):
  235. df = DataFrame({'high': [True, False],
  236. 'low': [False, False]})
  237. rs = df.shift(1)
  238. xp = DataFrame(np.array([[np.nan, np.nan],
  239. [True, False]], dtype=object),
  240. columns=['high', 'low'])
  241. assert_frame_equal(rs, xp)
  242. def test_shift_categorical(self):
  243. # GH 9416
  244. s1 = pd.Series(['a', 'b', 'c'], dtype='category')
  245. s2 = pd.Series(['A', 'B', 'C'], dtype='category')
  246. df = DataFrame({'one': s1, 'two': s2})
  247. rs = df.shift(1)
  248. xp = DataFrame({'one': s1.shift(1), 'two': s2.shift(1)})
  249. assert_frame_equal(rs, xp)
  250. def test_shift_fill_value(self):
  251. # GH #24128
  252. df = DataFrame([1, 2, 3, 4, 5],
  253. index=date_range('1/1/2000', periods=5, freq='H'))
  254. exp = DataFrame([0, 1, 2, 3, 4],
  255. index=date_range('1/1/2000', periods=5, freq='H'))
  256. result = df.shift(1, fill_value=0)
  257. assert_frame_equal(result, exp)
  258. exp = DataFrame([0, 0, 1, 2, 3],
  259. index=date_range('1/1/2000', periods=5, freq='H'))
  260. result = df.shift(2, fill_value=0)
  261. assert_frame_equal(result, exp)
  262. def test_shift_empty(self):
  263. # Regression test for #8019
  264. df = DataFrame({'foo': []})
  265. rs = df.shift(-1)
  266. assert_frame_equal(df, rs)
  267. def test_shift_duplicate_columns(self):
  268. # GH 9092; verify that position-based shifting works
  269. # in the presence of duplicate columns
  270. column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]]
  271. data = np.random.randn(20, 5)
  272. shifted = []
  273. for columns in column_lists:
  274. df = pd.DataFrame(data.copy(), columns=columns)
  275. for s in range(5):
  276. df.iloc[:, s] = df.iloc[:, s].shift(s + 1)
  277. df.columns = range(5)
  278. shifted.append(df)
  279. # sanity check the base case
  280. nulls = shifted[0].isna().sum()
  281. assert_series_equal(nulls, Series(range(1, 6), dtype='int64'))
  282. # check all answers are the same
  283. assert_frame_equal(shifted[0], shifted[1])
  284. assert_frame_equal(shifted[0], shifted[2])
  285. def test_tshift(self):
  286. # PeriodIndex
  287. ps = tm.makePeriodFrame()
  288. shifted = ps.tshift(1)
  289. unshifted = shifted.tshift(-1)
  290. assert_frame_equal(unshifted, ps)
  291. shifted2 = ps.tshift(freq='B')
  292. assert_frame_equal(shifted, shifted2)
  293. shifted3 = ps.tshift(freq=offsets.BDay())
  294. assert_frame_equal(shifted, shifted3)
  295. with pytest.raises(ValueError, match='does not match'):
  296. ps.tshift(freq='M')
  297. # DatetimeIndex
  298. shifted = self.tsframe.tshift(1)
  299. unshifted = shifted.tshift(-1)
  300. assert_frame_equal(self.tsframe, unshifted)
  301. shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq)
  302. assert_frame_equal(shifted, shifted2)
  303. inferred_ts = DataFrame(self.tsframe.values,
  304. Index(np.asarray(self.tsframe.index)),
  305. columns=self.tsframe.columns)
  306. shifted = inferred_ts.tshift(1)
  307. unshifted = shifted.tshift(-1)
  308. assert_frame_equal(shifted, self.tsframe.tshift(1))
  309. assert_frame_equal(unshifted, inferred_ts)
  310. no_freq = self.tsframe.iloc[[0, 5, 7], :]
  311. pytest.raises(ValueError, no_freq.tshift)
  312. def test_truncate(self):
  313. ts = self.tsframe[::3]
  314. start, end = self.tsframe.index[3], self.tsframe.index[6]
  315. start_missing = self.tsframe.index[2]
  316. end_missing = self.tsframe.index[7]
  317. # neither specified
  318. truncated = ts.truncate()
  319. assert_frame_equal(truncated, ts)
  320. # both specified
  321. expected = ts[1:3]
  322. truncated = ts.truncate(start, end)
  323. assert_frame_equal(truncated, expected)
  324. truncated = ts.truncate(start_missing, end_missing)
  325. assert_frame_equal(truncated, expected)
  326. # start specified
  327. expected = ts[1:]
  328. truncated = ts.truncate(before=start)
  329. assert_frame_equal(truncated, expected)
  330. truncated = ts.truncate(before=start_missing)
  331. assert_frame_equal(truncated, expected)
  332. # end specified
  333. expected = ts[:3]
  334. truncated = ts.truncate(after=end)
  335. assert_frame_equal(truncated, expected)
  336. truncated = ts.truncate(after=end_missing)
  337. assert_frame_equal(truncated, expected)
  338. pytest.raises(ValueError, ts.truncate,
  339. before=ts.index[-1] - ts.index.freq,
  340. after=ts.index[0] + ts.index.freq)
  341. def test_truncate_copy(self):
  342. index = self.tsframe.index
  343. truncated = self.tsframe.truncate(index[5], index[10])
  344. truncated.values[:] = 5.
  345. assert not (self.tsframe.values[5:11] == 5).any()
  346. def test_truncate_nonsortedindex(self):
  347. # GH 17935
  348. df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']},
  349. index=[5, 3, 2, 9, 0])
  350. msg = 'truncate requires a sorted index'
  351. with pytest.raises(ValueError, match=msg):
  352. df.truncate(before=3, after=9)
  353. rng = pd.date_range('2011-01-01', '2012-01-01', freq='W')
  354. ts = pd.DataFrame({'A': np.random.randn(len(rng)),
  355. 'B': np.random.randn(len(rng))},
  356. index=rng)
  357. msg = 'truncate requires a sorted index'
  358. with pytest.raises(ValueError, match=msg):
  359. ts.sort_values('A', ascending=False).truncate(before='2011-11',
  360. after='2011-12')
  361. df = pd.DataFrame({3: np.random.randn(5),
  362. 20: np.random.randn(5),
  363. 2: np.random.randn(5),
  364. 0: np.random.randn(5)},
  365. columns=[3, 20, 2, 0])
  366. msg = 'truncate requires a sorted index'
  367. with pytest.raises(ValueError, match=msg):
  368. df.truncate(before=2, after=20, axis=1)
  369. def test_asfreq(self):
  370. offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd())
  371. rule_monthly = self.tsframe.asfreq('BM')
  372. tm.assert_almost_equal(offset_monthly['A'], rule_monthly['A'])
  373. filled = rule_monthly.asfreq('B', method='pad') # noqa
  374. # TODO: actually check that this worked.
  375. # don't forget!
  376. filled_dep = rule_monthly.asfreq('B', method='pad') # noqa
  377. # test does not blow up on length-0 DataFrame
  378. zero_length = self.tsframe.reindex([])
  379. result = zero_length.asfreq('BM')
  380. assert result is not zero_length
  381. def test_asfreq_datetimeindex(self):
  382. df = DataFrame({'A': [1, 2, 3]},
  383. index=[datetime(2011, 11, 1), datetime(2011, 11, 2),
  384. datetime(2011, 11, 3)])
  385. df = df.asfreq('B')
  386. assert isinstance(df.index, DatetimeIndex)
  387. ts = df['A'].asfreq('B')
  388. assert isinstance(ts.index, DatetimeIndex)
  389. def test_asfreq_fillvalue(self):
  390. # test for fill value during upsampling, related to issue 3715
  391. # setup
  392. rng = pd.date_range('1/1/2016', periods=10, freq='2S')
  393. ts = pd.Series(np.arange(len(rng)), index=rng)
  394. df = pd.DataFrame({'one': ts})
  395. # insert pre-existing missing value
  396. df.loc['2016-01-01 00:00:08', 'one'] = None
  397. actual_df = df.asfreq(freq='1S', fill_value=9.0)
  398. expected_df = df.asfreq(freq='1S').fillna(9.0)
  399. expected_df.loc['2016-01-01 00:00:08', 'one'] = None
  400. assert_frame_equal(expected_df, actual_df)
  401. expected_series = ts.asfreq(freq='1S').fillna(9.0)
  402. actual_series = ts.asfreq(freq='1S', fill_value=9.0)
  403. assert_series_equal(expected_series, actual_series)
  404. @pytest.mark.parametrize("data,idx,expected_first,expected_last", [
  405. ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
  406. ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
  407. ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
  408. ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
  409. ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
  410. ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
  411. def test_first_last_valid(self, data, idx,
  412. expected_first, expected_last):
  413. N = len(self.frame.index)
  414. mat = np.random.randn(N)
  415. mat[:5] = np.nan
  416. mat[-5:] = np.nan
  417. frame = DataFrame({'foo': mat}, index=self.frame.index)
  418. index = frame.first_valid_index()
  419. assert index == frame.index[5]
  420. index = frame.last_valid_index()
  421. assert index == frame.index[-6]
  422. # GH12800
  423. empty = DataFrame()
  424. assert empty.last_valid_index() is None
  425. assert empty.first_valid_index() is None
  426. # GH17400: no valid entries
  427. frame[:] = np.nan
  428. assert frame.last_valid_index() is None
  429. assert frame.first_valid_index() is None
  430. # GH20499: its preserves freq with holes
  431. frame.index = date_range("20110101", periods=N, freq="B")
  432. frame.iloc[1] = 1
  433. frame.iloc[-2] = 1
  434. assert frame.first_valid_index() == frame.index[1]
  435. assert frame.last_valid_index() == frame.index[-2]
  436. assert frame.first_valid_index().freq == frame.index.freq
  437. assert frame.last_valid_index().freq == frame.index.freq
  438. # GH 21441
  439. df = DataFrame(data, index=idx)
  440. assert expected_first == df.first_valid_index()
  441. assert expected_last == df.last_valid_index()
  442. def test_first_subset(self):
  443. ts = tm.makeTimeDataFrame(freq='12h')
  444. result = ts.first('10d')
  445. assert len(result) == 20
  446. ts = tm.makeTimeDataFrame(freq='D')
  447. result = ts.first('10d')
  448. assert len(result) == 10
  449. result = ts.first('3M')
  450. expected = ts[:'3/31/2000']
  451. assert_frame_equal(result, expected)
  452. result = ts.first('21D')
  453. expected = ts[:21]
  454. assert_frame_equal(result, expected)
  455. result = ts[:0].first('3M')
  456. assert_frame_equal(result, ts[:0])
  457. def test_first_raises(self):
  458. # GH20725
  459. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  460. with pytest.raises(TypeError): # index is not a DatetimeIndex
  461. df.first('1D')
  462. def test_last_subset(self):
  463. ts = tm.makeTimeDataFrame(freq='12h')
  464. result = ts.last('10d')
  465. assert len(result) == 20
  466. ts = tm.makeTimeDataFrame(nper=30, freq='D')
  467. result = ts.last('10d')
  468. assert len(result) == 10
  469. result = ts.last('21D')
  470. expected = ts['2000-01-10':]
  471. assert_frame_equal(result, expected)
  472. result = ts.last('21D')
  473. expected = ts[-21:]
  474. assert_frame_equal(result, expected)
  475. result = ts[:0].last('3M')
  476. assert_frame_equal(result, ts[:0])
  477. def test_last_raises(self):
  478. # GH20725
  479. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  480. with pytest.raises(TypeError): # index is not a DatetimeIndex
  481. df.last('1D')
  482. def test_at_time(self):
  483. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  484. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  485. rs = ts.at_time(rng[1])
  486. assert (rs.index.hour == rng[1].hour).all()
  487. assert (rs.index.minute == rng[1].minute).all()
  488. assert (rs.index.second == rng[1].second).all()
  489. result = ts.at_time('9:30')
  490. expected = ts.at_time(time(9, 30))
  491. assert_frame_equal(result, expected)
  492. result = ts.loc[time(9, 30)]
  493. expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
  494. assert_frame_equal(result, expected)
  495. # midnight, everything
  496. rng = date_range('1/1/2000', '1/31/2000')
  497. ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
  498. result = ts.at_time(time(0, 0))
  499. assert_frame_equal(result, ts)
  500. # time doesn't exist
  501. rng = date_range('1/1/2012', freq='23Min', periods=384)
  502. ts = DataFrame(np.random.randn(len(rng), 2), rng)
  503. rs = ts.at_time('16:00')
  504. assert len(rs) == 0
  505. def test_at_time_raises(self):
  506. # GH20725
  507. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  508. with pytest.raises(TypeError): # index is not a DatetimeIndex
  509. df.at_time('00:00')
  510. @pytest.mark.parametrize('axis', ['index', 'columns', 0, 1])
  511. def test_at_time_axis(self, axis):
  512. # issue 8839
  513. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  514. ts = DataFrame(np.random.randn(len(rng), len(rng)))
  515. ts.index, ts.columns = rng, rng
  516. indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
  517. if axis in ['index', 0]:
  518. expected = ts.loc[indices, :]
  519. elif axis in ['columns', 1]:
  520. expected = ts.loc[:, indices]
  521. result = ts.at_time('9:30', axis=axis)
  522. assert_frame_equal(result, expected)
  523. def test_between_time(self, close_open_fixture):
  524. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  525. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  526. stime = time(0, 0)
  527. etime = time(1, 0)
  528. inc_start, inc_end = close_open_fixture
  529. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  530. exp_len = 13 * 4 + 1
  531. if not inc_start:
  532. exp_len -= 5
  533. if not inc_end:
  534. exp_len -= 4
  535. assert len(filtered) == exp_len
  536. for rs in filtered.index:
  537. t = rs.time()
  538. if inc_start:
  539. assert t >= stime
  540. else:
  541. assert t > stime
  542. if inc_end:
  543. assert t <= etime
  544. else:
  545. assert t < etime
  546. result = ts.between_time('00:00', '01:00')
  547. expected = ts.between_time(stime, etime)
  548. assert_frame_equal(result, expected)
  549. # across midnight
  550. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  551. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  552. stime = time(22, 0)
  553. etime = time(9, 0)
  554. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  555. exp_len = (12 * 11 + 1) * 4 + 1
  556. if not inc_start:
  557. exp_len -= 4
  558. if not inc_end:
  559. exp_len -= 4
  560. assert len(filtered) == exp_len
  561. for rs in filtered.index:
  562. t = rs.time()
  563. if inc_start:
  564. assert (t >= stime) or (t <= etime)
  565. else:
  566. assert (t > stime) or (t <= etime)
  567. if inc_end:
  568. assert (t <= etime) or (t >= stime)
  569. else:
  570. assert (t < etime) or (t >= stime)
  571. def test_between_time_raises(self):
  572. # GH20725
  573. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
  574. with pytest.raises(TypeError): # index is not a DatetimeIndex
  575. df.between_time(start_time='00:00', end_time='12:00')
  576. def test_between_time_axis(self, axis):
  577. # issue 8839
  578. rng = date_range('1/1/2000', periods=100, freq='10min')
  579. ts = DataFrame(np.random.randn(len(rng), len(rng)))
  580. stime, etime = ('08:00:00', '09:00:00')
  581. exp_len = 7
  582. if axis in ['index', 0]:
  583. ts.index = rng
  584. assert len(ts.between_time(stime, etime)) == exp_len
  585. assert len(ts.between_time(stime, etime, axis=0)) == exp_len
  586. if axis in ['columns', 1]:
  587. ts.columns = rng
  588. selected = ts.between_time(stime, etime, axis=1).columns
  589. assert len(selected) == exp_len
  590. def test_between_time_axis_raises(self, axis):
  591. # issue 8839
  592. rng = date_range('1/1/2000', periods=100, freq='10min')
  593. mask = np.arange(0, len(rng))
  594. rand_data = np.random.randn(len(rng), len(rng))
  595. ts = DataFrame(rand_data, index=rng, columns=rng)
  596. stime, etime = ('08:00:00', '09:00:00')
  597. if axis in ['columns', 1]:
  598. ts.index = mask
  599. pytest.raises(TypeError, ts.between_time, stime, etime)
  600. pytest.raises(TypeError, ts.between_time, stime, etime, axis=0)
  601. if axis in ['index', 0]:
  602. ts.columns = mask
  603. pytest.raises(TypeError, ts.between_time, stime, etime, axis=1)
  604. def test_operation_on_NaT(self):
  605. # Both NaT and Timestamp are in DataFrame.
  606. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
  607. pd.Timestamp('2012-05-01')]})
  608. res = df.min()
  609. exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
  610. tm.assert_series_equal(res, exp)
  611. res = df.max()
  612. exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
  613. tm.assert_series_equal(res, exp)
  614. # GH12941, only NaTs are in DataFrame.
  615. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT]})
  616. res = df.min()
  617. exp = pd.Series([pd.NaT], index=["foo"])
  618. tm.assert_series_equal(res, exp)
  619. res = df.max()
  620. exp = pd.Series([pd.NaT], index=["foo"])
  621. tm.assert_series_equal(res, exp)
  622. def test_datetime_assignment_with_NaT_and_diff_time_units(self):
  623. # GH 7492
  624. data_ns = np.array([1, 'nat'], dtype='datetime64[ns]')
  625. result = pd.Series(data_ns).to_frame()
  626. result['new'] = data_ns
  627. expected = pd.DataFrame({0: [1, None],
  628. 'new': [1, None]}, dtype='datetime64[ns]')
  629. tm.assert_frame_equal(result, expected)
  630. # OutOfBoundsDatetime error shouldn't occur
  631. data_s = np.array([1, 'nat'], dtype='datetime64[s]')
  632. result['new'] = data_s
  633. expected = pd.DataFrame({0: [1, None],
  634. 'new': [1e9, None]}, dtype='datetime64[ns]')
  635. tm.assert_frame_equal(result, expected)
  636. def test_frame_to_period(self):
  637. K = 5
  638. dr = date_range('1/1/2000', '1/1/2001')
  639. pr = period_range('1/1/2000', '1/1/2001')
  640. df = DataFrame(np.random.randn(len(dr), K), index=dr)
  641. df['mix'] = 'a'
  642. pts = df.to_period()
  643. exp = df.copy()
  644. exp.index = pr
  645. assert_frame_equal(pts, exp)
  646. pts = df.to_period('M')
  647. tm.assert_index_equal(pts.index, exp.index.asfreq('M'))
  648. df = df.T
  649. pts = df.to_period(axis=1)
  650. exp = df.copy()
  651. exp.columns = pr
  652. assert_frame_equal(pts, exp)
  653. pts = df.to_period('M', axis=1)
  654. tm.assert_index_equal(pts.columns, exp.columns.asfreq('M'))
  655. pytest.raises(ValueError, df.to_period, axis=2)
  656. @pytest.mark.parametrize("fn", ['tz_localize', 'tz_convert'])
  657. def test_tz_convert_and_localize(self, fn):
  658. l0 = date_range('20140701', periods=5, freq='D')
  659. l1 = date_range('20140701', periods=5, freq='D')
  660. int_idx = Index(range(5))
  661. if fn == 'tz_convert':
  662. l0 = l0.tz_localize('UTC')
  663. l1 = l1.tz_localize('UTC')
  664. for idx in [l0, l1]:
  665. l0_expected = getattr(idx, fn)('US/Pacific')
  666. l1_expected = getattr(idx, fn)('US/Pacific')
  667. df1 = DataFrame(np.ones(5), index=l0)
  668. df1 = getattr(df1, fn)('US/Pacific')
  669. assert_index_equal(df1.index, l0_expected)
  670. # MultiIndex
  671. # GH7846
  672. df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
  673. df3 = getattr(df2, fn)('US/Pacific', level=0)
  674. assert not df3.index.levels[0].equals(l0)
  675. assert_index_equal(df3.index.levels[0], l0_expected)
  676. assert_index_equal(df3.index.levels[1], l1)
  677. assert not df3.index.levels[1].equals(l1_expected)
  678. df3 = getattr(df2, fn)('US/Pacific', level=1)
  679. assert_index_equal(df3.index.levels[0], l0)
  680. assert not df3.index.levels[0].equals(l0_expected)
  681. assert_index_equal(df3.index.levels[1], l1_expected)
  682. assert not df3.index.levels[1].equals(l1)
  683. df4 = DataFrame(np.ones(5),
  684. MultiIndex.from_arrays([int_idx, l0]))
  685. # TODO: untested
  686. df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa
  687. assert_index_equal(df3.index.levels[0], l0)
  688. assert not df3.index.levels[0].equals(l0_expected)
  689. assert_index_equal(df3.index.levels[1], l1_expected)
  690. assert not df3.index.levels[1].equals(l1)
  691. # Bad Inputs
  692. # Not DatetimeIndex / PeriodIndex
  693. with pytest.raises(TypeError, match='DatetimeIndex'):
  694. df = DataFrame(index=int_idx)
  695. df = getattr(df, fn)('US/Pacific')
  696. # Not DatetimeIndex / PeriodIndex
  697. with pytest.raises(TypeError, match='DatetimeIndex'):
  698. df = DataFrame(np.ones(5),
  699. MultiIndex.from_arrays([int_idx, l0]))
  700. df = getattr(df, fn)('US/Pacific', level=0)
  701. # Invalid level
  702. with pytest.raises(ValueError, match='not valid'):
  703. df = DataFrame(index=l0)
  704. df = getattr(df, fn)('US/Pacific', level=1)