test_setops.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame, DatetimeIndex, Index, Int64Index, Series, bdate_range,
  8. date_range, to_datetime)
  9. import pandas.util.testing as tm
  10. from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd
  11. START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
  12. class TestDatetimeIndexSetOps(object):
  13. tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore',
  14. 'dateutil/US/Pacific']
  15. # TODO: moved from test_datetimelike; dedup with version below
  16. def test_union2(self):
  17. everything = tm.makeDateIndex(10)
  18. first = everything[:5]
  19. second = everything[5:]
  20. union = first.union(second)
  21. assert tm.equalContents(union, everything)
  22. # GH 10149
  23. cases = [klass(second.values) for klass in [np.array, Series, list]]
  24. for case in cases:
  25. result = first.union(case)
  26. assert tm.equalContents(result, everything)
  27. @pytest.mark.parametrize("tz", tz)
  28. def test_union(self, tz):
  29. rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
  30. other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
  31. expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz)
  32. rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
  33. other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
  34. expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz)
  35. rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
  36. other3 = pd.DatetimeIndex([], tz=tz)
  37. expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz)
  38. for rng, other, expected in [(rng1, other1, expected1),
  39. (rng2, other2, expected2),
  40. (rng3, other3, expected3)]:
  41. result_union = rng.union(other)
  42. tm.assert_index_equal(result_union, expected)
  43. def test_union_coverage(self):
  44. idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
  45. ordered = DatetimeIndex(idx.sort_values(), freq='infer')
  46. result = ordered.union(idx)
  47. tm.assert_index_equal(result, ordered)
  48. result = ordered[:0].union(ordered)
  49. tm.assert_index_equal(result, ordered)
  50. assert result.freq == ordered.freq
  51. def test_union_bug_1730(self):
  52. rng_a = date_range('1/1/2012', periods=4, freq='3H')
  53. rng_b = date_range('1/1/2012', periods=4, freq='4H')
  54. result = rng_a.union(rng_b)
  55. exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
  56. tm.assert_index_equal(result, exp)
  57. def test_union_bug_1745(self):
  58. left = DatetimeIndex(['2012-05-11 15:19:49.695000'])
  59. right = DatetimeIndex(['2012-05-29 13:04:21.322000',
  60. '2012-05-11 15:27:24.873000',
  61. '2012-05-11 15:31:05.350000'])
  62. result = left.union(right)
  63. exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
  64. tm.assert_index_equal(result, exp)
  65. def test_union_bug_4564(self):
  66. from pandas import DateOffset
  67. left = date_range("2013-01-01", "2013-02-01")
  68. right = left + DateOffset(minutes=15)
  69. result = left.union(right)
  70. exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
  71. tm.assert_index_equal(result, exp)
  72. def test_union_freq_both_none(self):
  73. # GH11086
  74. expected = bdate_range('20150101', periods=10)
  75. expected.freq = None
  76. result = expected.union(expected)
  77. tm.assert_index_equal(result, expected)
  78. assert result.freq is None
  79. def test_union_dataframe_index(self):
  80. rng1 = date_range('1/1/1999', '1/1/2012', freq='MS')
  81. s1 = Series(np.random.randn(len(rng1)), rng1)
  82. rng2 = date_range('1/1/1980', '12/1/2001', freq='MS')
  83. s2 = Series(np.random.randn(len(rng2)), rng2)
  84. df = DataFrame({'s1': s1, 's2': s2})
  85. exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS')
  86. tm.assert_index_equal(df.index, exp)
  87. def test_union_with_DatetimeIndex(self):
  88. i1 = Int64Index(np.arange(0, 20, 2))
  89. i2 = date_range(start='2012-01-03 00:00:00', periods=10, freq='D')
  90. i1.union(i2) # Works
  91. i2.union(i1) # Fails with "AttributeError: can't set attribute"
  92. # TODO: moved from test_datetimelike; de-duplicate with version below
  93. def test_intersection2(self):
  94. first = tm.makeDateIndex(10)
  95. second = first[5:]
  96. intersect = first.intersection(second)
  97. assert tm.equalContents(intersect, second)
  98. # GH 10149
  99. cases = [klass(second.values) for klass in [np.array, Series, list]]
  100. for case in cases:
  101. result = first.intersection(case)
  102. assert tm.equalContents(result, second)
  103. third = Index(['a', 'b', 'c'])
  104. result = first.intersection(third)
  105. expected = pd.Index([], dtype=object)
  106. tm.assert_index_equal(result, expected)
  107. @pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern',
  108. 'dateutil/US/Pacific'])
  109. @pytest.mark.parametrize("sort", [None, False])
  110. def test_intersection(self, tz, sort):
  111. # GH 4690 (with tz)
  112. base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
  113. # if target has the same name, it is preserved
  114. rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
  115. expected2 = date_range('6/1/2000', '6/20/2000', freq='D', name='idx')
  116. # if target name is different, it will be reset
  117. rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
  118. expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None)
  119. rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
  120. expected4 = DatetimeIndex([], name='idx')
  121. for (rng, expected) in [(rng2, expected2), (rng3, expected3),
  122. (rng4, expected4)]:
  123. result = base.intersection(rng)
  124. tm.assert_index_equal(result, expected)
  125. assert result.name == expected.name
  126. assert result.freq == expected.freq
  127. assert result.tz == expected.tz
  128. # non-monotonic
  129. base = DatetimeIndex(['2011-01-05', '2011-01-04',
  130. '2011-01-02', '2011-01-03'],
  131. tz=tz, name='idx')
  132. rng2 = DatetimeIndex(['2011-01-04', '2011-01-02',
  133. '2011-02-02', '2011-02-03'],
  134. tz=tz, name='idx')
  135. expected2 = DatetimeIndex(['2011-01-04', '2011-01-02'],
  136. tz=tz, name='idx')
  137. rng3 = DatetimeIndex(['2011-01-04', '2011-01-02',
  138. '2011-02-02', '2011-02-03'],
  139. tz=tz, name='other')
  140. expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'],
  141. tz=tz, name=None)
  142. # GH 7880
  143. rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz,
  144. name='idx')
  145. expected4 = DatetimeIndex([], tz=tz, name='idx')
  146. for (rng, expected) in [(rng2, expected2), (rng3, expected3),
  147. (rng4, expected4)]:
  148. result = base.intersection(rng, sort=sort)
  149. if sort is None:
  150. expected = expected.sort_values()
  151. tm.assert_index_equal(result, expected)
  152. assert result.name == expected.name
  153. assert result.freq is None
  154. assert result.tz == expected.tz
  155. def test_intersection_empty(self):
  156. # empty same freq GH2129
  157. rng = date_range('6/1/2000', '6/15/2000', freq='T')
  158. result = rng[0:0].intersection(rng)
  159. assert len(result) == 0
  160. result = rng.intersection(rng[0:0])
  161. assert len(result) == 0
  162. def test_intersection_bug_1708(self):
  163. from pandas import DateOffset
  164. index_1 = date_range('1/1/2012', periods=4, freq='12H')
  165. index_2 = index_1 + DateOffset(hours=1)
  166. result = index_1 & index_2
  167. assert len(result) == 0
  168. @pytest.mark.parametrize("tz", tz)
  169. @pytest.mark.parametrize("sort", [None, False])
  170. def test_difference(self, tz, sort):
  171. rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000',
  172. '1/5/2000']
  173. rng1 = pd.DatetimeIndex(rng_dates, tz=tz)
  174. other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz)
  175. expected1 = pd.DatetimeIndex(rng_dates, tz=tz)
  176. rng2 = pd.DatetimeIndex(rng_dates, tz=tz)
  177. other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz)
  178. expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz)
  179. rng3 = pd.DatetimeIndex(rng_dates, tz=tz)
  180. other3 = pd.DatetimeIndex([], tz=tz)
  181. expected3 = pd.DatetimeIndex(rng_dates, tz=tz)
  182. for rng, other, expected in [(rng1, other1, expected1),
  183. (rng2, other2, expected2),
  184. (rng3, other3, expected3)]:
  185. result_diff = rng.difference(other, sort)
  186. if sort is None:
  187. expected = expected.sort_values()
  188. tm.assert_index_equal(result_diff, expected)
  189. @pytest.mark.parametrize("sort", [None, False])
  190. def test_difference_freq(self, sort):
  191. # GH14323: difference of DatetimeIndex should not preserve frequency
  192. index = date_range("20160920", "20160925", freq="D")
  193. other = date_range("20160921", "20160924", freq="D")
  194. expected = DatetimeIndex(["20160920", "20160925"], freq=None)
  195. idx_diff = index.difference(other, sort)
  196. tm.assert_index_equal(idx_diff, expected)
  197. tm.assert_attr_equal('freq', idx_diff, expected)
  198. other = date_range("20160922", "20160925", freq="D")
  199. idx_diff = index.difference(other, sort)
  200. expected = DatetimeIndex(["20160920", "20160921"], freq=None)
  201. tm.assert_index_equal(idx_diff, expected)
  202. tm.assert_attr_equal('freq', idx_diff, expected)
  203. @pytest.mark.parametrize("sort", [None, False])
  204. def test_datetimeindex_diff(self, sort):
  205. dti1 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
  206. periods=100)
  207. dti2 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
  208. periods=98)
  209. assert len(dti1.difference(dti2, sort)) == 2
  210. def test_datetimeindex_union_join_empty(self):
  211. dti = date_range(start='1/1/2001', end='2/1/2001', freq='D')
  212. empty = Index([])
  213. result = dti.union(empty)
  214. assert isinstance(result, DatetimeIndex)
  215. assert result is result
  216. result = dti.join(empty)
  217. assert isinstance(result, DatetimeIndex)
  218. def test_join_nonunique(self):
  219. idx1 = to_datetime(['2012-11-06 16:00:11.477563',
  220. '2012-11-06 16:00:11.477563'])
  221. idx2 = to_datetime(['2012-11-06 15:11:09.006507',
  222. '2012-11-06 15:11:09.006507'])
  223. rs = idx1.join(idx2, how='outer')
  224. assert rs.is_monotonic
  225. class TestBusinessDatetimeIndex(object):
  226. def setup_method(self, method):
  227. self.rng = bdate_range(START, END)
  228. def test_union(self):
  229. # overlapping
  230. left = self.rng[:10]
  231. right = self.rng[5:10]
  232. the_union = left.union(right)
  233. assert isinstance(the_union, DatetimeIndex)
  234. # non-overlapping, gap in middle
  235. left = self.rng[:5]
  236. right = self.rng[10:]
  237. the_union = left.union(right)
  238. assert isinstance(the_union, Index)
  239. # non-overlapping, no gap
  240. left = self.rng[:5]
  241. right = self.rng[5:10]
  242. the_union = left.union(right)
  243. assert isinstance(the_union, DatetimeIndex)
  244. # order does not matter
  245. tm.assert_index_equal(right.union(left), the_union)
  246. # overlapping, but different offset
  247. rng = date_range(START, END, freq=BMonthEnd())
  248. the_union = self.rng.union(rng)
  249. assert isinstance(the_union, DatetimeIndex)
  250. def test_outer_join(self):
  251. # should just behave as union
  252. # overlapping
  253. left = self.rng[:10]
  254. right = self.rng[5:10]
  255. the_join = left.join(right, how='outer')
  256. assert isinstance(the_join, DatetimeIndex)
  257. # non-overlapping, gap in middle
  258. left = self.rng[:5]
  259. right = self.rng[10:]
  260. the_join = left.join(right, how='outer')
  261. assert isinstance(the_join, DatetimeIndex)
  262. assert the_join.freq is None
  263. # non-overlapping, no gap
  264. left = self.rng[:5]
  265. right = self.rng[5:10]
  266. the_join = left.join(right, how='outer')
  267. assert isinstance(the_join, DatetimeIndex)
  268. # overlapping, but different offset
  269. rng = date_range(START, END, freq=BMonthEnd())
  270. the_join = self.rng.join(rng, how='outer')
  271. assert isinstance(the_join, DatetimeIndex)
  272. assert the_join.freq is None
  273. def test_union_not_cacheable(self):
  274. rng = date_range('1/1/2000', periods=50, freq=Minute())
  275. rng1 = rng[10:]
  276. rng2 = rng[:25]
  277. the_union = rng1.union(rng2)
  278. tm.assert_index_equal(the_union, rng)
  279. rng1 = rng[10:]
  280. rng2 = rng[15:35]
  281. the_union = rng1.union(rng2)
  282. expected = rng[10:]
  283. tm.assert_index_equal(the_union, expected)
  284. def test_intersection(self):
  285. rng = date_range('1/1/2000', periods=50, freq=Minute())
  286. rng1 = rng[10:]
  287. rng2 = rng[:25]
  288. the_int = rng1.intersection(rng2)
  289. expected = rng[10:25]
  290. tm.assert_index_equal(the_int, expected)
  291. assert isinstance(the_int, DatetimeIndex)
  292. assert the_int.freq == rng.freq
  293. the_int = rng1.intersection(rng2.view(DatetimeIndex))
  294. tm.assert_index_equal(the_int, expected)
  295. # non-overlapping
  296. the_int = rng[:10].intersection(rng[10:])
  297. expected = DatetimeIndex([])
  298. tm.assert_index_equal(the_int, expected)
  299. def test_intersection_bug(self):
  300. # GH #771
  301. a = bdate_range('11/30/2011', '12/31/2011')
  302. b = bdate_range('12/10/2011', '12/20/2011')
  303. result = a.intersection(b)
  304. tm.assert_index_equal(result, b)
  305. def test_month_range_union_tz_pytz(self):
  306. from pytz import timezone
  307. tz = timezone('US/Eastern')
  308. early_start = datetime(2011, 1, 1)
  309. early_end = datetime(2011, 3, 1)
  310. late_start = datetime(2011, 3, 1)
  311. late_end = datetime(2011, 5, 1)
  312. early_dr = date_range(start=early_start, end=early_end, tz=tz,
  313. freq=MonthEnd())
  314. late_dr = date_range(start=late_start, end=late_end, tz=tz,
  315. freq=MonthEnd())
  316. early_dr.union(late_dr)
  317. @td.skip_if_windows_python_3
  318. def test_month_range_union_tz_dateutil(self):
  319. from pandas._libs.tslibs.timezones import dateutil_gettz
  320. tz = dateutil_gettz('US/Eastern')
  321. early_start = datetime(2011, 1, 1)
  322. early_end = datetime(2011, 3, 1)
  323. late_start = datetime(2011, 3, 1)
  324. late_end = datetime(2011, 5, 1)
  325. early_dr = date_range(start=early_start, end=early_end, tz=tz,
  326. freq=MonthEnd())
  327. late_dr = date_range(start=late_start, end=late_end, tz=tz,
  328. freq=MonthEnd())
  329. early_dr.union(late_dr)
  330. class TestCustomDatetimeIndex(object):
  331. def setup_method(self, method):
  332. self.rng = bdate_range(START, END, freq='C')
  333. def test_union(self):
  334. # overlapping
  335. left = self.rng[:10]
  336. right = self.rng[5:10]
  337. the_union = left.union(right)
  338. assert isinstance(the_union, DatetimeIndex)
  339. # non-overlapping, gap in middle
  340. left = self.rng[:5]
  341. right = self.rng[10:]
  342. the_union = left.union(right)
  343. assert isinstance(the_union, Index)
  344. # non-overlapping, no gap
  345. left = self.rng[:5]
  346. right = self.rng[5:10]
  347. the_union = left.union(right)
  348. assert isinstance(the_union, DatetimeIndex)
  349. # order does not matter
  350. tm.assert_index_equal(right.union(left), the_union)
  351. # overlapping, but different offset
  352. rng = date_range(START, END, freq=BMonthEnd())
  353. the_union = self.rng.union(rng)
  354. assert isinstance(the_union, DatetimeIndex)
  355. def test_outer_join(self):
  356. # should just behave as union
  357. # overlapping
  358. left = self.rng[:10]
  359. right = self.rng[5:10]
  360. the_join = left.join(right, how='outer')
  361. assert isinstance(the_join, DatetimeIndex)
  362. # non-overlapping, gap in middle
  363. left = self.rng[:5]
  364. right = self.rng[10:]
  365. the_join = left.join(right, how='outer')
  366. assert isinstance(the_join, DatetimeIndex)
  367. assert the_join.freq is None
  368. # non-overlapping, no gap
  369. left = self.rng[:5]
  370. right = self.rng[5:10]
  371. the_join = left.join(right, how='outer')
  372. assert isinstance(the_join, DatetimeIndex)
  373. # overlapping, but different offset
  374. rng = date_range(START, END, freq=BMonthEnd())
  375. the_join = self.rng.join(rng, how='outer')
  376. assert isinstance(the_join, DatetimeIndex)
  377. assert the_join.freq is None
  378. def test_intersection_bug(self):
  379. # GH #771
  380. a = bdate_range('11/30/2011', '12/31/2011', freq='C')
  381. b = bdate_range('12/10/2011', '12/20/2011', freq='C')
  382. result = a.intersection(b)
  383. tm.assert_index_equal(result, b)