test_setops.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Index, PeriodIndex, date_range, period_range
  5. import pandas.core.indexes.period as period
  6. import pandas.util.testing as tm
  7. def _permute(obj):
  8. return obj.take(np.random.permutation(len(obj)))
  9. class TestPeriodIndex(object):
  10. def test_joins(self, join_type):
  11. index = period_range('1/1/2000', '1/20/2000', freq='D')
  12. joined = index.join(index[:-5], how=join_type)
  13. assert isinstance(joined, PeriodIndex)
  14. assert joined.freq == index.freq
  15. def test_join_self(self, join_type):
  16. index = period_range('1/1/2000', '1/20/2000', freq='D')
  17. res = index.join(index, how=join_type)
  18. assert index is res
  19. def test_join_does_not_recur(self):
  20. df = tm.makeCustomDataframe(
  21. 3, 2, data_gen_f=lambda *args: np.random.randint(2),
  22. c_idx_type='p', r_idx_type='dt')
  23. s = df.iloc[:2, 0]
  24. res = s.index.join(df.columns, how='outer')
  25. expected = Index([s.index[0], s.index[1],
  26. df.columns[0], df.columns[1]], object)
  27. tm.assert_index_equal(res, expected)
  28. @pytest.mark.parametrize("sort", [None, False])
  29. def test_union(self, sort):
  30. # union
  31. other1 = pd.period_range('1/1/2000', freq='D', periods=5)
  32. rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
  33. expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
  34. rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
  35. other2 = pd.period_range('1/4/2000', freq='D', periods=5)
  36. expected2 = pd.period_range('1/1/2000', freq='D', periods=8)
  37. rng3 = pd.period_range('1/1/2000', freq='D', periods=5)
  38. other3 = pd.PeriodIndex([], freq='D')
  39. expected3 = pd.period_range('1/1/2000', freq='D', periods=5)
  40. rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5)
  41. other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
  42. expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00',
  43. '2000-01-01 11:00', '2000-01-01 12:00',
  44. '2000-01-01 13:00', '2000-01-02 09:00',
  45. '2000-01-02 10:00', '2000-01-02 11:00',
  46. '2000-01-02 12:00', '2000-01-02 13:00'],
  47. freq='H')
  48. rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
  49. '2000-01-01 09:05'], freq='T')
  50. other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05'
  51. '2000-01-01 09:08'],
  52. freq='T')
  53. expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03',
  54. '2000-01-01 09:05', '2000-01-01 09:08'],
  55. freq='T')
  56. rng6 = pd.period_range('2000-01-01', freq='M', periods=7)
  57. other6 = pd.period_range('2000-04-01', freq='M', periods=7)
  58. expected6 = pd.period_range('2000-01-01', freq='M', periods=10)
  59. rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
  60. other7 = pd.period_range('1998-01-01', freq='A', periods=8)
  61. expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
  62. rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
  63. '1/5/2000', '1/4/2000'], freq='D')
  64. other8 = pd.period_range('1/6/2000', freq='D', periods=5)
  65. expected8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
  66. '1/5/2000', '1/4/2000', '1/6/2000',
  67. '1/7/2000', '1/8/2000', '1/9/2000',
  68. '1/10/2000'], freq='D')
  69. for rng, other, expected in [(rng1, other1, expected1),
  70. (rng2, other2, expected2),
  71. (rng3, other3, expected3),
  72. (rng4, other4, expected4),
  73. (rng5, other5, expected5),
  74. (rng6, other6, expected6),
  75. (rng7, other7, expected7),
  76. (rng8, other8, expected8)]:
  77. result_union = rng.union(other, sort=sort)
  78. if sort is None:
  79. expected = expected.sort_values()
  80. tm.assert_index_equal(result_union, expected)
  81. @pytest.mark.parametrize("sort", [None, False])
  82. def test_union_misc(self, sort):
  83. index = period_range('1/1/2000', '1/20/2000', freq='D')
  84. result = index[:-5].union(index[10:], sort=sort)
  85. tm.assert_index_equal(result, index)
  86. # not in order
  87. result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
  88. if sort is None:
  89. tm.assert_index_equal(result, index)
  90. assert tm.equalContents(result, index)
  91. # raise if different frequencies
  92. index = period_range('1/1/2000', '1/20/2000', freq='D')
  93. index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
  94. with pytest.raises(period.IncompatibleFrequency):
  95. index.union(index2, sort=sort)
  96. msg = 'can only call with other PeriodIndex-ed objects'
  97. with pytest.raises(ValueError, match=msg):
  98. index.join(index.to_timestamp())
  99. index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
  100. with pytest.raises(period.IncompatibleFrequency):
  101. index.join(index3)
  102. def test_union_dataframe_index(self):
  103. rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
  104. s1 = pd.Series(np.random.randn(len(rng1)), rng1)
  105. rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
  106. s2 = pd.Series(np.random.randn(len(rng2)), rng2)
  107. df = pd.DataFrame({'s1': s1, 's2': s2})
  108. exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
  109. tm.assert_index_equal(df.index, exp)
  110. @pytest.mark.parametrize("sort", [None, False])
  111. def test_intersection(self, sort):
  112. index = period_range('1/1/2000', '1/20/2000', freq='D')
  113. result = index[:-5].intersection(index[10:], sort=sort)
  114. tm.assert_index_equal(result, index[10:-5])
  115. # not in order
  116. left = _permute(index[:-5])
  117. right = _permute(index[10:])
  118. result = left.intersection(right, sort=sort)
  119. if sort is None:
  120. tm.assert_index_equal(result, index[10:-5])
  121. assert tm.equalContents(result, index[10:-5])
  122. # raise if different frequencies
  123. index = period_range('1/1/2000', '1/20/2000', freq='D')
  124. index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED')
  125. with pytest.raises(period.IncompatibleFrequency):
  126. index.intersection(index2, sort=sort)
  127. index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
  128. with pytest.raises(period.IncompatibleFrequency):
  129. index.intersection(index3, sort=sort)
  130. @pytest.mark.parametrize("sort", [None, False])
  131. def test_intersection_cases(self, sort):
  132. base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')
  133. # if target has the same name, it is preserved
  134. rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
  135. expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
  136. name='idx')
  137. # if target name is different, it will be reset
  138. rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
  139. expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
  140. name=None)
  141. rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
  142. expected4 = PeriodIndex([], name='idx', freq='D')
  143. for (rng, expected) in [(rng2, expected2), (rng3, expected3),
  144. (rng4, expected4)]:
  145. result = base.intersection(rng, sort=sort)
  146. tm.assert_index_equal(result, expected)
  147. assert result.name == expected.name
  148. assert result.freq == expected.freq
  149. # non-monotonic
  150. base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
  151. '2011-01-03'], freq='D', name='idx')
  152. rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
  153. '2011-02-02', '2011-02-03'],
  154. freq='D', name='idx')
  155. expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
  156. name='idx')
  157. rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
  158. '2011-02-03'],
  159. freq='D', name='other')
  160. expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
  161. name=None)
  162. rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
  163. expected4 = PeriodIndex([], freq='D', name='idx')
  164. for (rng, expected) in [(rng2, expected2), (rng3, expected3),
  165. (rng4, expected4)]:
  166. result = base.intersection(rng, sort=sort)
  167. if sort is None:
  168. expected = expected.sort_values()
  169. tm.assert_index_equal(result, expected)
  170. assert result.name == expected.name
  171. assert result.freq == 'D'
  172. # empty same freq
  173. rng = date_range('6/1/2000', '6/15/2000', freq='T')
  174. result = rng[0:0].intersection(rng)
  175. assert len(result) == 0
  176. result = rng.intersection(rng[0:0])
  177. assert len(result) == 0
  178. @pytest.mark.parametrize("sort", [None, False])
  179. def test_difference(self, sort):
  180. # diff
  181. period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000',
  182. '1/4/2000']
  183. rng1 = pd.PeriodIndex(period_rng, freq='D')
  184. other1 = pd.period_range('1/6/2000', freq='D', periods=5)
  185. expected1 = rng1
  186. rng2 = pd.PeriodIndex(period_rng, freq='D')
  187. other2 = pd.period_range('1/4/2000', freq='D', periods=5)
  188. expected2 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000'],
  189. freq='D')
  190. rng3 = pd.PeriodIndex(period_rng, freq='D')
  191. other3 = pd.PeriodIndex([], freq='D')
  192. expected3 = rng3
  193. period_rng = ['2000-01-01 10:00', '2000-01-01 09:00',
  194. '2000-01-01 12:00', '2000-01-01 11:00',
  195. '2000-01-01 13:00']
  196. rng4 = pd.PeriodIndex(period_rng, freq='H')
  197. other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5)
  198. expected4 = rng4
  199. rng5 = pd.PeriodIndex(['2000-01-01 09:03', '2000-01-01 09:01',
  200. '2000-01-01 09:05'], freq='T')
  201. other5 = pd.PeriodIndex(
  202. ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T')
  203. expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T')
  204. period_rng = ['2000-02-01', '2000-01-01', '2000-06-01',
  205. '2000-07-01', '2000-05-01', '2000-03-01',
  206. '2000-04-01']
  207. rng6 = pd.PeriodIndex(period_rng, freq='M')
  208. other6 = pd.period_range('2000-04-01', freq='M', periods=7)
  209. expected6 = pd.PeriodIndex(['2000-02-01', '2000-01-01', '2000-03-01'],
  210. freq='M')
  211. period_rng = ['2003', '2007', '2006', '2005', '2004']
  212. rng7 = pd.PeriodIndex(period_rng, freq='A')
  213. other7 = pd.period_range('1998-01-01', freq='A', periods=8)
  214. expected7 = pd.PeriodIndex(['2007', '2006'], freq='A')
  215. for rng, other, expected in [(rng1, other1, expected1),
  216. (rng2, other2, expected2),
  217. (rng3, other3, expected3),
  218. (rng4, other4, expected4),
  219. (rng5, other5, expected5),
  220. (rng6, other6, expected6),
  221. (rng7, other7, expected7), ]:
  222. result_difference = rng.difference(other, sort=sort)
  223. if sort is None:
  224. expected = expected.sort_values()
  225. tm.assert_index_equal(result_difference, expected)