1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import DataFrame, MultiIndex, date_range
- import pandas.util.testing as tm
- def test_partial_string_timestamp_multiindex():
- # GH10331
- dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
- abc = ['a', 'b', 'c']
- ix = pd.MultiIndex.from_product([dr, abc])
- df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
- idx = pd.IndexSlice
- # c1
- # 2016-01-01 00:00:00 a 0
- # b 1
- # c 2
- # 2016-01-01 12:00:00 a 3
- # b 4
- # c 5
- # 2016-01-02 00:00:00 a 6
- # b 7
- # c 8
- # 2016-01-02 12:00:00 a 9
- # b 10
- # c 11
- # 2016-01-03 00:00:00 a 12
- # b 13
- # c 14
- # partial string matching on a single index
- for df_swap in (df.swaplevel(),
- df.swaplevel(0),
- df.swaplevel(0, 1)):
- df_swap = df_swap.sort_index()
- just_a = df_swap.loc['a']
- result = just_a.loc['2016-01-01']
- expected = df.loc[idx[:, 'a'], :].iloc[0:2]
- expected.index = expected.index.droplevel(1)
- tm.assert_frame_equal(result, expected)
- # indexing with IndexSlice
- result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
- expected = df
- tm.assert_frame_equal(result, expected)
- # match on secondary index
- result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
- expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
- tm.assert_frame_equal(result, expected)
- # Even though this syntax works on a single index, this is somewhat
- # ambiguous and we don't want to extend this behavior forward to work
- # in multi-indexes. This would amount to selecting a scalar from a
- # column.
- with pytest.raises(KeyError):
- df['2016-01-01']
- # partial string match on year only
- result = df.loc['2016']
- expected = df
- tm.assert_frame_equal(result, expected)
- # partial string match on date
- result = df.loc['2016-01-01']
- expected = df.iloc[0:6]
- tm.assert_frame_equal(result, expected)
- # partial string match on date and hour, from middle
- result = df.loc['2016-01-02 12']
- expected = df.iloc[9:12]
- tm.assert_frame_equal(result, expected)
- # partial string match on secondary index
- result = df_swap.loc[idx[:, '2016-01-02'], :]
- expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
- tm.assert_frame_equal(result, expected)
- # tuple selector with partial string match on date
- result = df.loc[('2016-01-01', 'a'), :]
- expected = df.iloc[[0, 3]]
- tm.assert_frame_equal(result, expected)
- # Slicing date on first level should break (of course)
- with pytest.raises(KeyError):
- df_swap.loc['2016-01-01']
- # GH12685 (partial string with daily resolution or below)
- dr = date_range('2013-01-01', periods=100, freq='D')
- ix = MultiIndex.from_product([dr, ['a', 'b']])
- df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
- result = df.loc[idx['2013-03':'2013-03', :], :]
- expected = df.iloc[118:180]
- tm.assert_frame_equal(result, expected)
|