test_datetime.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. from datetime import datetime, timedelta
  2. from dateutil import tz
  3. import numpy as np
  4. import pandas as pd
  5. from pandas import DataFrame, Index, Series, Timestamp, date_range
  6. from pandas.util import testing as tm
  7. class TestDatetimeIndex(object):
  8. def test_setitem_with_datetime_tz(self):
  9. # 16889
  10. # support .loc with alignment and tz-aware DatetimeIndex
  11. mask = np.array([True, False, True, False])
  12. idx = date_range('20010101', periods=4, tz='UTC')
  13. df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
  14. result = df.copy()
  15. result.loc[mask, :] = df.loc[mask, :]
  16. tm.assert_frame_equal(result, df)
  17. result = df.copy()
  18. result.loc[mask] = df.loc[mask]
  19. tm.assert_frame_equal(result, df)
  20. idx = date_range('20010101', periods=4)
  21. df = DataFrame({'a': np.arange(4)}, index=idx).astype('float64')
  22. result = df.copy()
  23. result.loc[mask, :] = df.loc[mask, :]
  24. tm.assert_frame_equal(result, df)
  25. result = df.copy()
  26. result.loc[mask] = df.loc[mask]
  27. tm.assert_frame_equal(result, df)
  28. def test_indexing_with_datetime_tz(self):
  29. # 8260
  30. # support datetime64 with tz
  31. idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
  32. name='foo')
  33. dr = date_range('20130110', periods=3)
  34. df = DataFrame({'A': idx, 'B': dr})
  35. df['C'] = idx
  36. df.iloc[1, 1] = pd.NaT
  37. df.iloc[1, 2] = pd.NaT
  38. # indexing
  39. result = df.iloc[1]
  40. expected = Series([Timestamp('2013-01-02 00:00:00-0500',
  41. tz='US/Eastern'), np.nan, np.nan],
  42. index=list('ABC'), dtype='object', name=1)
  43. tm.assert_series_equal(result, expected)
  44. result = df.loc[1]
  45. expected = Series([Timestamp('2013-01-02 00:00:00-0500',
  46. tz='US/Eastern'), np.nan, np.nan],
  47. index=list('ABC'), dtype='object', name=1)
  48. tm.assert_series_equal(result, expected)
  49. # indexing - fast_xs
  50. df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')})
  51. result = df.iloc[5]
  52. expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D')
  53. assert result == expected
  54. result = df.loc[5]
  55. assert result == expected
  56. # indexing - boolean
  57. result = df[df.a > df.a[3]]
  58. expected = df.iloc[4:]
  59. tm.assert_frame_equal(result, expected)
  60. # indexing - setting an element
  61. df = DataFrame(data=pd.to_datetime(
  62. ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time'])
  63. df['new_col'] = ['new', 'old']
  64. df.time = df.set_index('time').index.tz_localize('UTC')
  65. v = df[df.new_col == 'new'].set_index('time').index.tz_convert(
  66. 'US/Pacific')
  67. # trying to set a single element on a part of a different timezone
  68. # this converts to object
  69. df2 = df.copy()
  70. df2.loc[df2.new_col == 'new', 'time'] = v
  71. expected = Series([v[0], df.loc[1, 'time']], name='time')
  72. tm.assert_series_equal(df2.time, expected)
  73. v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s')
  74. df.loc[df.new_col == 'new', 'time'] = v
  75. tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v)
  76. def test_consistency_with_tz_aware_scalar(self):
  77. # xef gh-12938
  78. # various ways of indexing the same tz-aware scalar
  79. df = Series([Timestamp('2016-03-30 14:35:25',
  80. tz='Europe/Brussels')]).to_frame()
  81. df = pd.concat([df, df]).reset_index(drop=True)
  82. expected = Timestamp('2016-03-30 14:35:25+0200',
  83. tz='Europe/Brussels')
  84. result = df[0][0]
  85. assert result == expected
  86. result = df.iloc[0, 0]
  87. assert result == expected
  88. result = df.loc[0, 0]
  89. assert result == expected
  90. result = df.iat[0, 0]
  91. assert result == expected
  92. result = df.at[0, 0]
  93. assert result == expected
  94. result = df[0].loc[0]
  95. assert result == expected
  96. result = df[0].at[0]
  97. assert result == expected
  98. def test_indexing_with_datetimeindex_tz(self):
  99. # GH 12050
  100. # indexing on a series with a datetimeindex with tz
  101. index = date_range('2015-01-01', periods=2, tz='utc')
  102. ser = Series(range(2), index=index, dtype='int64')
  103. # list-like indexing
  104. for sel in (index, list(index)):
  105. # getitem
  106. tm.assert_series_equal(ser[sel], ser)
  107. # setitem
  108. result = ser.copy()
  109. result[sel] = 1
  110. expected = Series(1, index=index)
  111. tm.assert_series_equal(result, expected)
  112. # .loc getitem
  113. tm.assert_series_equal(ser.loc[sel], ser)
  114. # .loc setitem
  115. result = ser.copy()
  116. result.loc[sel] = 1
  117. expected = Series(1, index=index)
  118. tm.assert_series_equal(result, expected)
  119. # single element indexing
  120. # getitem
  121. assert ser[index[1]] == 1
  122. # setitem
  123. result = ser.copy()
  124. result[index[1]] = 5
  125. expected = Series([0, 5], index=index)
  126. tm.assert_series_equal(result, expected)
  127. # .loc getitem
  128. assert ser.loc[index[1]] == 1
  129. # .loc setitem
  130. result = ser.copy()
  131. result.loc[index[1]] = 5
  132. expected = Series([0, 5], index=index)
  133. tm.assert_series_equal(result, expected)
  134. def test_partial_setting_with_datetimelike_dtype(self):
  135. # GH9478
  136. # a datetimeindex alignment issue with partial setting
  137. df = DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'),
  138. index=date_range('1/1/2000', periods=3, freq='1H'))
  139. expected = df.copy()
  140. expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT]
  141. mask = df.A < 1
  142. df.loc[mask, 'C'] = df.loc[mask].index
  143. tm.assert_frame_equal(df, expected)
  144. def test_loc_setitem_datetime(self):
  145. # GH 9516
  146. dt1 = Timestamp('20130101 09:00:00')
  147. dt2 = Timestamp('20130101 10:00:00')
  148. for conv in [lambda x: x, lambda x: x.to_datetime64(),
  149. lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]:
  150. df = DataFrame()
  151. df.loc[conv(dt1), 'one'] = 100
  152. df.loc[conv(dt2), 'one'] = 200
  153. expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2])
  154. tm.assert_frame_equal(df, expected)
  155. def test_series_partial_set_datetime(self):
  156. # GH 11497
  157. idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
  158. ser = Series([0.1, 0.2], index=idx, name='s')
  159. result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]]
  160. exp = Series([0.1, 0.2], index=idx, name='s')
  161. tm.assert_series_equal(result, exp, check_index_type=True)
  162. keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'),
  163. Timestamp('2011-01-01')]
  164. exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'),
  165. name='s')
  166. tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
  167. keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'),
  168. Timestamp('2011-01-03')]
  169. exp = Series([np.nan, 0.2, np.nan],
  170. index=pd.DatetimeIndex(keys, name='idx'), name='s')
  171. with tm.assert_produces_warning(FutureWarning,
  172. check_stacklevel=False):
  173. tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
  174. def test_series_partial_set_period(self):
  175. # GH 11497
  176. idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx')
  177. ser = Series([0.1, 0.2], index=idx, name='s')
  178. result = ser.loc[[pd.Period('2011-01-01', freq='D'),
  179. pd.Period('2011-01-02', freq='D')]]
  180. exp = Series([0.1, 0.2], index=idx, name='s')
  181. tm.assert_series_equal(result, exp, check_index_type=True)
  182. keys = [pd.Period('2011-01-02', freq='D'),
  183. pd.Period('2011-01-02', freq='D'),
  184. pd.Period('2011-01-01', freq='D')]
  185. exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'),
  186. name='s')
  187. tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
  188. keys = [pd.Period('2011-01-03', freq='D'),
  189. pd.Period('2011-01-02', freq='D'),
  190. pd.Period('2011-01-03', freq='D')]
  191. exp = Series([np.nan, 0.2, np.nan],
  192. index=pd.PeriodIndex(keys, name='idx'), name='s')
  193. with tm.assert_produces_warning(FutureWarning,
  194. check_stacklevel=False):
  195. result = ser.loc[keys]
  196. tm.assert_series_equal(result, exp)
  197. def test_nanosecond_getitem_setitem_with_tz(self):
  198. # GH 11679
  199. data = ['2016-06-28 08:30:00.123456789']
  200. index = pd.DatetimeIndex(data, dtype='datetime64[ns, America/Chicago]')
  201. df = DataFrame({'a': [10]}, index=index)
  202. result = df.loc[df.index[0]]
  203. expected = Series(10, index=['a'], name=df.index[0])
  204. tm.assert_series_equal(result, expected)
  205. result = df.copy()
  206. result.loc[df.index[0], 'a'] = -1
  207. expected = DataFrame(-1, index=index, columns=['a'])
  208. tm.assert_frame_equal(result, expected)
  209. def test_loc_getitem_across_dst(self):
  210. # GH 21846
  211. idx = pd.date_range('2017-10-29 01:30:00',
  212. tz='Europe/Berlin', periods=5, freq='30 min')
  213. series2 = pd.Series([0, 1, 2, 3, 4],
  214. index=idx)
  215. t_1 = pd.Timestamp('2017-10-29 02:30:00+02:00', tz='Europe/Berlin',
  216. freq='30min')
  217. t_2 = pd.Timestamp('2017-10-29 02:00:00+01:00', tz='Europe/Berlin',
  218. freq='30min')
  219. result = series2.loc[t_1:t_2]
  220. expected = pd.Series([2, 3], index=idx[2:4])
  221. tm.assert_series_equal(result, expected)
  222. result = series2[t_1]
  223. expected = 2
  224. assert result == expected
  225. def test_loc_incremental_setitem_with_dst(self):
  226. # GH 20724
  227. base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific"))
  228. idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
  229. result = pd.Series([0], index=[idxs[0]])
  230. for ts in idxs:
  231. result.loc[ts] = 1
  232. expected = pd.Series(1, index=idxs)
  233. tm.assert_series_equal(result, expected)
  234. def test_loc_setitem_with_existing_dst(self):
  235. # GH 18308
  236. start = pd.Timestamp('2017-10-29 00:00:00+0200', tz='Europe/Madrid')
  237. end = pd.Timestamp('2017-10-29 03:00:00+0100', tz='Europe/Madrid')
  238. ts = pd.Timestamp('2016-10-10 03:00:00', tz='Europe/Madrid')
  239. idx = pd.date_range(start, end, closed='left', freq="H")
  240. result = pd.DataFrame(index=idx, columns=['value'])
  241. result.loc[ts, 'value'] = 12
  242. expected = pd.DataFrame([np.nan] * len(idx) + [12],
  243. index=idx.append(pd.DatetimeIndex([ts])),
  244. columns=['value'],
  245. dtype=object)
  246. tm.assert_frame_equal(result, expected)