common.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. from pandas._libs.tslib import iNaT
  5. import pandas.compat as compat
  6. from pandas.compat import PY3
  7. from pandas.core.dtypes.dtypes import CategoricalDtype
  8. import pandas as pd
  9. from pandas import (
  10. CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index,
  11. IntervalIndex, MultiIndex, PeriodIndex, RangeIndex, Series, TimedeltaIndex,
  12. UInt64Index, isna)
  13. from pandas.core.indexes.base import InvalidIndexError
  14. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  15. import pandas.util.testing as tm
  16. class Base(object):
  17. """ base class for index sub-class tests """
  18. _holder = None
  19. _compat_props = ['shape', 'ndim', 'size', 'nbytes']
  20. def setup_indices(self):
  21. for name, idx in self.indices.items():
  22. setattr(self, name, idx)
  23. def test_pickle_compat_construction(self):
  24. # need an object to create with
  25. pytest.raises(TypeError, self._holder)
  26. def test_to_series(self):
  27. # assert that we are creating a copy of the index
  28. idx = self.create_index()
  29. s = idx.to_series()
  30. assert s.values is not idx.values
  31. assert s.index is not idx
  32. assert s.name == idx.name
  33. def test_to_series_with_arguments(self):
  34. # GH18699
  35. # index kwarg
  36. idx = self.create_index()
  37. s = idx.to_series(index=idx)
  38. assert s.values is not idx.values
  39. assert s.index is idx
  40. assert s.name == idx.name
  41. # name kwarg
  42. idx = self.create_index()
  43. s = idx.to_series(name='__test')
  44. assert s.values is not idx.values
  45. assert s.index is not idx
  46. assert s.name != idx.name
  47. @pytest.mark.parametrize("name", [None, "new_name"])
  48. def test_to_frame(self, name):
  49. # see GH-15230, GH-22580
  50. idx = self.create_index()
  51. if name:
  52. idx_name = name
  53. else:
  54. idx_name = idx.name or 0
  55. df = idx.to_frame(name=idx_name)
  56. assert df.index is idx
  57. assert len(df.columns) == 1
  58. assert df.columns[0] == idx_name
  59. assert df[idx_name].values is not idx.values
  60. df = idx.to_frame(index=False, name=idx_name)
  61. assert df.index is not idx
  62. def test_shift(self):
  63. # GH8083 test the base class for shift
  64. idx = self.create_index()
  65. pytest.raises(NotImplementedError, idx.shift, 1)
  66. pytest.raises(NotImplementedError, idx.shift, 1, 2)
  67. def test_create_index_existing_name(self):
  68. # GH11193, when an existing index is passed, and a new name is not
  69. # specified, the new index should inherit the previous object name
  70. expected = self.create_index()
  71. if not isinstance(expected, MultiIndex):
  72. expected.name = 'foo'
  73. result = pd.Index(expected)
  74. tm.assert_index_equal(result, expected)
  75. result = pd.Index(expected, name='bar')
  76. expected.name = 'bar'
  77. tm.assert_index_equal(result, expected)
  78. else:
  79. expected.names = ['foo', 'bar']
  80. result = pd.Index(expected)
  81. tm.assert_index_equal(
  82. result, Index(Index([('foo', 'one'), ('foo', 'two'),
  83. ('bar', 'one'), ('baz', 'two'),
  84. ('qux', 'one'), ('qux', 'two')],
  85. dtype='object'),
  86. names=['foo', 'bar']))
  87. result = pd.Index(expected, names=['A', 'B'])
  88. tm.assert_index_equal(
  89. result,
  90. Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
  91. ('baz', 'two'), ('qux', 'one'), ('qux', 'two')],
  92. dtype='object'), names=['A', 'B']))
  93. def test_numeric_compat(self):
  94. idx = self.create_index()
  95. with pytest.raises(TypeError, match="cannot perform __mul__"):
  96. idx * 1
  97. with pytest.raises(TypeError, match="cannot perform __rmul__"):
  98. 1 * idx
  99. div_err = ("cannot perform __truediv__" if PY3
  100. else "cannot perform __div__")
  101. with pytest.raises(TypeError, match=div_err):
  102. idx / 1
  103. div_err = div_err.replace(' __', ' __r')
  104. with pytest.raises(TypeError, match=div_err):
  105. 1 / idx
  106. with pytest.raises(TypeError, match="cannot perform __floordiv__"):
  107. idx // 1
  108. with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
  109. 1 // idx
  110. def test_logical_compat(self):
  111. idx = self.create_index()
  112. with pytest.raises(TypeError, match='cannot perform all'):
  113. idx.all()
  114. with pytest.raises(TypeError, match='cannot perform any'):
  115. idx.any()
  116. def test_boolean_context_compat(self):
  117. # boolean context compat
  118. idx = self.create_index()
  119. with pytest.raises(ValueError, match='The truth value of a'):
  120. if idx:
  121. pass
  122. def test_reindex_base(self):
  123. idx = self.create_index()
  124. expected = np.arange(idx.size, dtype=np.intp)
  125. actual = idx.get_indexer(idx)
  126. tm.assert_numpy_array_equal(expected, actual)
  127. with pytest.raises(ValueError, match='Invalid fill method'):
  128. idx.get_indexer(idx, method='invalid')
  129. def test_get_indexer_consistency(self):
  130. # See GH 16819
  131. for name, index in self.indices.items():
  132. if isinstance(index, IntervalIndex):
  133. continue
  134. if index.is_unique or isinstance(index, CategoricalIndex):
  135. indexer = index.get_indexer(index[0:2])
  136. assert isinstance(indexer, np.ndarray)
  137. assert indexer.dtype == np.intp
  138. else:
  139. e = "Reindexing only valid with uniquely valued Index objects"
  140. with pytest.raises(InvalidIndexError, match=e):
  141. index.get_indexer(index[0:2])
  142. indexer, _ = index.get_indexer_non_unique(index[0:2])
  143. assert isinstance(indexer, np.ndarray)
  144. assert indexer.dtype == np.intp
  145. def test_ndarray_compat_properties(self):
  146. idx = self.create_index()
  147. assert idx.T.equals(idx)
  148. assert idx.transpose().equals(idx)
  149. values = idx.values
  150. for prop in self._compat_props:
  151. assert getattr(idx, prop) == getattr(values, prop)
  152. # test for validity
  153. idx.nbytes
  154. idx.values.nbytes
  155. def test_repr_roundtrip(self):
  156. idx = self.create_index()
  157. tm.assert_index_equal(eval(repr(idx)), idx)
  158. def test_str(self):
  159. # test the string repr
  160. idx = self.create_index()
  161. idx.name = 'foo'
  162. assert "'foo'" in str(idx)
  163. assert idx.__class__.__name__ in str(idx)
  164. def test_repr_max_seq_item_setting(self):
  165. # GH10182
  166. idx = self.create_index()
  167. idx = idx.repeat(50)
  168. with pd.option_context("display.max_seq_items", None):
  169. repr(idx)
  170. assert '...' not in str(idx)
  171. def test_copy_name(self):
  172. # gh-12309: Check that the "name" argument
  173. # passed at initialization is honored.
  174. for name, index in compat.iteritems(self.indices):
  175. if isinstance(index, MultiIndex):
  176. continue
  177. first = index.__class__(index, copy=True, name='mario')
  178. second = first.__class__(first, copy=False)
  179. # Even though "copy=False", we want a new object.
  180. assert first is not second
  181. # Not using tm.assert_index_equal() since names differ.
  182. assert index.equals(first)
  183. assert first.name == 'mario'
  184. assert second.name == 'mario'
  185. s1 = Series(2, index=first)
  186. s2 = Series(3, index=second[:-1])
  187. if not isinstance(index, CategoricalIndex):
  188. # See gh-13365
  189. s3 = s1 * s2
  190. assert s3.index.name == 'mario'
  191. def test_ensure_copied_data(self):
  192. # Check the "copy" argument of each Index.__new__ is honoured
  193. # GH12309
  194. for name, index in compat.iteritems(self.indices):
  195. init_kwargs = {}
  196. if isinstance(index, PeriodIndex):
  197. # Needs "freq" specification:
  198. init_kwargs['freq'] = index.freq
  199. elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
  200. # RangeIndex cannot be initialized from data
  201. # MultiIndex and CategoricalIndex are tested separately
  202. continue
  203. index_type = index.__class__
  204. result = index_type(index.values, copy=True, **init_kwargs)
  205. tm.assert_index_equal(index, result)
  206. tm.assert_numpy_array_equal(index._ndarray_values,
  207. result._ndarray_values,
  208. check_same='copy')
  209. if isinstance(index, PeriodIndex):
  210. # .values an object array of Period, thus copied
  211. result = index_type(ordinal=index.asi8, copy=False,
  212. **init_kwargs)
  213. tm.assert_numpy_array_equal(index._ndarray_values,
  214. result._ndarray_values,
  215. check_same='same')
  216. elif isinstance(index, IntervalIndex):
  217. # checked in test_interval.py
  218. pass
  219. else:
  220. result = index_type(index.values, copy=False, **init_kwargs)
  221. tm.assert_numpy_array_equal(index.values, result.values,
  222. check_same='same')
  223. tm.assert_numpy_array_equal(index._ndarray_values,
  224. result._ndarray_values,
  225. check_same='same')
  226. def test_memory_usage(self):
  227. for name, index in compat.iteritems(self.indices):
  228. result = index.memory_usage()
  229. if len(index):
  230. index.get_loc(index[0])
  231. result2 = index.memory_usage()
  232. result3 = index.memory_usage(deep=True)
  233. # RangeIndex, IntervalIndex
  234. # don't have engines
  235. if not isinstance(index, (RangeIndex, IntervalIndex)):
  236. assert result2 > result
  237. if index.inferred_type == 'object':
  238. assert result3 > result2
  239. else:
  240. # we report 0 for no-length
  241. assert result == 0
  242. def test_argsort(self):
  243. for k, ind in self.indices.items():
  244. # separately tested
  245. if k in ['catIndex']:
  246. continue
  247. result = ind.argsort()
  248. expected = np.array(ind).argsort()
  249. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  250. def test_numpy_argsort(self):
  251. for k, ind in self.indices.items():
  252. result = np.argsort(ind)
  253. expected = ind.argsort()
  254. tm.assert_numpy_array_equal(result, expected)
  255. # these are the only two types that perform
  256. # pandas compatibility input validation - the
  257. # rest already perform separate (or no) such
  258. # validation via their 'values' attribute as
  259. # defined in pandas.core.indexes/base.py - they
  260. # cannot be changed at the moment due to
  261. # backwards compatibility concerns
  262. if isinstance(type(ind), (CategoricalIndex, RangeIndex)):
  263. msg = "the 'axis' parameter is not supported"
  264. with pytest.raises(ValueError, match=msg):
  265. np.argsort(ind, axis=1)
  266. msg = "the 'kind' parameter is not supported"
  267. with pytest.raises(ValueError, match=msg):
  268. np.argsort(ind, kind='mergesort')
  269. msg = "the 'order' parameter is not supported"
  270. with pytest.raises(ValueError, match=msg):
  271. np.argsort(ind, order=('a', 'b'))
  272. def test_take(self):
  273. indexer = [4, 3, 0, 2]
  274. for k, ind in self.indices.items():
  275. # separate
  276. if k in ['boolIndex', 'tuples', 'empty']:
  277. continue
  278. result = ind.take(indexer)
  279. expected = ind[indexer]
  280. assert result.equals(expected)
  281. if not isinstance(ind,
  282. (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
  283. # GH 10791
  284. with pytest.raises(AttributeError):
  285. ind.freq
  286. def test_take_invalid_kwargs(self):
  287. idx = self.create_index()
  288. indices = [1, 2]
  289. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  290. with pytest.raises(TypeError, match=msg):
  291. idx.take(indices, foo=2)
  292. msg = "the 'out' parameter is not supported"
  293. with pytest.raises(ValueError, match=msg):
  294. idx.take(indices, out=indices)
  295. msg = "the 'mode' parameter is not supported"
  296. with pytest.raises(ValueError, match=msg):
  297. idx.take(indices, mode='clip')
  298. def test_repeat(self):
  299. rep = 2
  300. i = self.create_index()
  301. expected = pd.Index(i.values.repeat(rep), name=i.name)
  302. tm.assert_index_equal(i.repeat(rep), expected)
  303. i = self.create_index()
  304. rep = np.arange(len(i))
  305. expected = pd.Index(i.values.repeat(rep), name=i.name)
  306. tm.assert_index_equal(i.repeat(rep), expected)
  307. def test_numpy_repeat(self):
  308. rep = 2
  309. i = self.create_index()
  310. expected = i.repeat(rep)
  311. tm.assert_index_equal(np.repeat(i, rep), expected)
  312. msg = "the 'axis' parameter is not supported"
  313. with pytest.raises(ValueError, match=msg):
  314. np.repeat(i, rep, axis=0)
  315. @pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
  316. def test_where(self, klass):
  317. i = self.create_index()
  318. cond = [True] * len(i)
  319. result = i.where(klass(cond))
  320. expected = i
  321. tm.assert_index_equal(result, expected)
  322. cond = [False] + [True] * len(i[1:])
  323. expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
  324. result = i.where(klass(cond))
  325. tm.assert_index_equal(result, expected)
  326. @pytest.mark.parametrize("case", [0.5, "xxx"])
  327. @pytest.mark.parametrize("method", ["intersection", "union",
  328. "difference", "symmetric_difference"])
  329. def test_set_ops_error_cases(self, case, method):
  330. for name, idx in compat.iteritems(self.indices):
  331. # non-iterable input
  332. msg = "Input must be Index or array-like"
  333. with pytest.raises(TypeError, match=msg):
  334. getattr(idx, method)(case)
  335. def test_intersection_base(self):
  336. for name, idx in compat.iteritems(self.indices):
  337. first = idx[:5]
  338. second = idx[:3]
  339. intersect = first.intersection(second)
  340. if isinstance(idx, CategoricalIndex):
  341. pass
  342. else:
  343. assert tm.equalContents(intersect, second)
  344. # GH 10149
  345. cases = [klass(second.values)
  346. for klass in [np.array, Series, list]]
  347. for case in cases:
  348. if isinstance(idx, PeriodIndex):
  349. msg = "can only call with other PeriodIndex-ed objects"
  350. with pytest.raises(ValueError, match=msg):
  351. first.intersection(case)
  352. elif isinstance(idx, CategoricalIndex):
  353. pass
  354. else:
  355. result = first.intersection(case)
  356. assert tm.equalContents(result, second)
  357. if isinstance(idx, MultiIndex):
  358. msg = "other must be a MultiIndex or a list of tuples"
  359. with pytest.raises(TypeError, match=msg):
  360. first.intersection([1, 2, 3])
  361. def test_union_base(self):
  362. for name, idx in compat.iteritems(self.indices):
  363. first = idx[3:]
  364. second = idx[:5]
  365. everything = idx
  366. union = first.union(second)
  367. assert tm.equalContents(union, everything)
  368. # GH 10149
  369. cases = [klass(second.values)
  370. for klass in [np.array, Series, list]]
  371. for case in cases:
  372. if isinstance(idx, PeriodIndex):
  373. msg = "can only call with other PeriodIndex-ed objects"
  374. with pytest.raises(ValueError, match=msg):
  375. first.union(case)
  376. elif isinstance(idx, CategoricalIndex):
  377. pass
  378. else:
  379. result = first.union(case)
  380. assert tm.equalContents(result, everything)
  381. if isinstance(idx, MultiIndex):
  382. msg = "other must be a MultiIndex or a list of tuples"
  383. with pytest.raises(TypeError, match=msg):
  384. first.union([1, 2, 3])
  385. @pytest.mark.parametrize("sort", [None, False])
  386. def test_difference_base(self, sort):
  387. for name, idx in compat.iteritems(self.indices):
  388. first = idx[2:]
  389. second = idx[:4]
  390. answer = idx[4:]
  391. result = first.difference(second, sort)
  392. if isinstance(idx, CategoricalIndex):
  393. pass
  394. else:
  395. assert tm.equalContents(result, answer)
  396. # GH 10149
  397. cases = [klass(second.values)
  398. for klass in [np.array, Series, list]]
  399. for case in cases:
  400. if isinstance(idx, PeriodIndex):
  401. msg = "can only call with other PeriodIndex-ed objects"
  402. with pytest.raises(ValueError, match=msg):
  403. first.difference(case, sort)
  404. elif isinstance(idx, CategoricalIndex):
  405. pass
  406. elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
  407. assert result.__class__ == answer.__class__
  408. tm.assert_numpy_array_equal(result.sort_values().asi8,
  409. answer.sort_values().asi8)
  410. else:
  411. result = first.difference(case, sort)
  412. assert tm.equalContents(result, answer)
  413. if isinstance(idx, MultiIndex):
  414. msg = "other must be a MultiIndex or a list of tuples"
  415. with pytest.raises(TypeError, match=msg):
  416. first.difference([1, 2, 3], sort)
  417. def test_symmetric_difference(self):
  418. for name, idx in compat.iteritems(self.indices):
  419. first = idx[1:]
  420. second = idx[:-1]
  421. if isinstance(idx, CategoricalIndex):
  422. pass
  423. else:
  424. answer = idx[[0, -1]]
  425. result = first.symmetric_difference(second)
  426. assert tm.equalContents(result, answer)
  427. # GH 10149
  428. cases = [klass(second.values)
  429. for klass in [np.array, Series, list]]
  430. for case in cases:
  431. if isinstance(idx, PeriodIndex):
  432. msg = "can only call with other PeriodIndex-ed objects"
  433. with pytest.raises(ValueError, match=msg):
  434. first.symmetric_difference(case)
  435. elif isinstance(idx, CategoricalIndex):
  436. pass
  437. else:
  438. result = first.symmetric_difference(case)
  439. assert tm.equalContents(result, answer)
  440. if isinstance(idx, MultiIndex):
  441. msg = "other must be a MultiIndex or a list of tuples"
  442. with pytest.raises(TypeError, match=msg):
  443. first.symmetric_difference([1, 2, 3])
  444. def test_insert_base(self):
  445. for name, idx in compat.iteritems(self.indices):
  446. result = idx[1:4]
  447. if not len(idx):
  448. continue
  449. # test 0th element
  450. assert idx[0:4].equals(result.insert(0, idx[0]))
  451. def test_delete_base(self):
  452. for name, idx in compat.iteritems(self.indices):
  453. if not len(idx):
  454. continue
  455. if isinstance(idx, RangeIndex):
  456. # tested in class
  457. continue
  458. expected = idx[1:]
  459. result = idx.delete(0)
  460. assert result.equals(expected)
  461. assert result.name == expected.name
  462. expected = idx[:-1]
  463. result = idx.delete(-1)
  464. assert result.equals(expected)
  465. assert result.name == expected.name
  466. with pytest.raises((IndexError, ValueError)):
  467. # either depending on numpy version
  468. idx.delete(len(idx))
  469. def test_equals(self):
  470. for name, idx in compat.iteritems(self.indices):
  471. assert idx.equals(idx)
  472. assert idx.equals(idx.copy())
  473. assert idx.equals(idx.astype(object))
  474. assert not idx.equals(list(idx))
  475. assert not idx.equals(np.array(idx))
  476. # Cannot pass in non-int64 dtype to RangeIndex
  477. if not isinstance(idx, RangeIndex):
  478. same_values = Index(idx, dtype=object)
  479. assert idx.equals(same_values)
  480. assert same_values.equals(idx)
  481. if idx.nlevels == 1:
  482. # do not test MultiIndex
  483. assert not idx.equals(pd.Series(idx))
  484. def test_equals_op(self):
  485. # GH9947, GH10637
  486. index_a = self.create_index()
  487. if isinstance(index_a, PeriodIndex):
  488. pytest.skip('Skip check for PeriodIndex')
  489. n = len(index_a)
  490. index_b = index_a[0:-1]
  491. index_c = index_a[0:-1].append(index_a[-2:-1])
  492. index_d = index_a[0:1]
  493. msg = "Lengths must match|could not be broadcast"
  494. with pytest.raises(ValueError, match=msg):
  495. index_a == index_b
  496. expected1 = np.array([True] * n)
  497. expected2 = np.array([True] * (n - 1) + [False])
  498. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  499. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  500. # test comparisons with numpy arrays
  501. array_a = np.array(index_a)
  502. array_b = np.array(index_a[0:-1])
  503. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  504. array_d = np.array(index_a[0:1])
  505. with pytest.raises(ValueError, match=msg):
  506. index_a == array_b
  507. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  508. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  509. # test comparisons with Series
  510. series_a = Series(array_a)
  511. series_b = Series(array_b)
  512. series_c = Series(array_c)
  513. series_d = Series(array_d)
  514. with pytest.raises(ValueError, match=msg):
  515. index_a == series_b
  516. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  517. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  518. # cases where length is 1 for one of them
  519. with pytest.raises(ValueError, match="Lengths must match"):
  520. index_a == index_d
  521. with pytest.raises(ValueError, match="Lengths must match"):
  522. index_a == series_d
  523. with pytest.raises(ValueError, match="Lengths must match"):
  524. index_a == array_d
  525. msg = "Can only compare identically-labeled Series objects"
  526. with pytest.raises(ValueError, match=msg):
  527. series_a == series_d
  528. with pytest.raises(ValueError, match="Lengths must match"):
  529. series_a == array_d
  530. # comparing with a scalar should broadcast; note that we are excluding
  531. # MultiIndex because in this case each item in the index is a tuple of
  532. # length 2, and therefore is considered an array of length 2 in the
  533. # comparison instead of a scalar
  534. if not isinstance(index_a, MultiIndex):
  535. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  536. # assuming the 2nd to last item is unique in the data
  537. item = index_a[-2]
  538. tm.assert_numpy_array_equal(index_a == item, expected3)
  539. tm.assert_series_equal(series_a == item, Series(expected3))
  540. def test_numpy_ufuncs(self):
  541. # test ufuncs of numpy, see:
  542. # http://docs.scipy.org/doc/numpy/reference/ufuncs.html
  543. for name, idx in compat.iteritems(self.indices):
  544. for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
  545. np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
  546. np.arccos, np.arctan, np.sinh, np.cosh, np.tanh,
  547. np.arcsinh, np.arccosh, np.arctanh, np.deg2rad,
  548. np.rad2deg]:
  549. if isinstance(idx, DatetimeIndexOpsMixin):
  550. # raise TypeError or ValueError (PeriodIndex)
  551. # PeriodIndex behavior should be changed in future version
  552. with pytest.raises(Exception):
  553. with np.errstate(all='ignore'):
  554. func(idx)
  555. elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
  556. # coerces to float (e.g. np.sin)
  557. with np.errstate(all='ignore'):
  558. result = func(idx)
  559. exp = Index(func(idx.values), name=idx.name)
  560. tm.assert_index_equal(result, exp)
  561. assert isinstance(result, pd.Float64Index)
  562. else:
  563. # raise AttributeError or TypeError
  564. if len(idx) == 0:
  565. continue
  566. else:
  567. with pytest.raises(Exception):
  568. with np.errstate(all='ignore'):
  569. func(idx)
  570. for func in [np.isfinite, np.isinf, np.isnan, np.signbit]:
  571. if isinstance(idx, DatetimeIndexOpsMixin):
  572. # raise TypeError or ValueError (PeriodIndex)
  573. with pytest.raises(Exception):
  574. func(idx)
  575. elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)):
  576. # Results in bool array
  577. result = func(idx)
  578. assert isinstance(result, np.ndarray)
  579. assert not isinstance(result, Index)
  580. else:
  581. if len(idx) == 0:
  582. continue
  583. else:
  584. with pytest.raises(Exception):
  585. func(idx)
  586. def test_hasnans_isnans(self):
  587. # GH 11343, added tests for hasnans / isnans
  588. for name, index in self.indices.items():
  589. if isinstance(index, MultiIndex):
  590. pass
  591. else:
  592. idx = index.copy()
  593. # cases in indices doesn't include NaN
  594. expected = np.array([False] * len(idx), dtype=bool)
  595. tm.assert_numpy_array_equal(idx._isnan, expected)
  596. assert idx.hasnans is False
  597. idx = index.copy()
  598. values = np.asarray(idx.values)
  599. if len(index) == 0:
  600. continue
  601. elif isinstance(index, DatetimeIndexOpsMixin):
  602. values[1] = iNaT
  603. elif isinstance(index, (Int64Index, UInt64Index)):
  604. continue
  605. else:
  606. values[1] = np.nan
  607. if isinstance(index, PeriodIndex):
  608. idx = index.__class__(values, freq=index.freq)
  609. else:
  610. idx = index.__class__(values)
  611. expected = np.array([False] * len(idx), dtype=bool)
  612. expected[1] = True
  613. tm.assert_numpy_array_equal(idx._isnan, expected)
  614. assert idx.hasnans is True
  615. def test_fillna(self):
  616. # GH 11343
  617. for name, index in self.indices.items():
  618. if len(index) == 0:
  619. pass
  620. elif isinstance(index, MultiIndex):
  621. idx = index.copy()
  622. msg = "isna is not defined for MultiIndex"
  623. with pytest.raises(NotImplementedError, match=msg):
  624. idx.fillna(idx[0])
  625. else:
  626. idx = index.copy()
  627. result = idx.fillna(idx[0])
  628. tm.assert_index_equal(result, idx)
  629. assert result is not idx
  630. msg = "'value' must be a scalar, passed: "
  631. with pytest.raises(TypeError, match=msg):
  632. idx.fillna([idx[0]])
  633. idx = index.copy()
  634. values = np.asarray(idx.values)
  635. if isinstance(index, DatetimeIndexOpsMixin):
  636. values[1] = iNaT
  637. elif isinstance(index, (Int64Index, UInt64Index)):
  638. continue
  639. else:
  640. values[1] = np.nan
  641. if isinstance(index, PeriodIndex):
  642. idx = index.__class__(values, freq=index.freq)
  643. else:
  644. idx = index.__class__(values)
  645. expected = np.array([False] * len(idx), dtype=bool)
  646. expected[1] = True
  647. tm.assert_numpy_array_equal(idx._isnan, expected)
  648. assert idx.hasnans is True
  649. def test_nulls(self):
  650. # this is really a smoke test for the methods
  651. # as these are adequately tested for function elsewhere
  652. for name, index in self.indices.items():
  653. if len(index) == 0:
  654. tm.assert_numpy_array_equal(
  655. index.isna(), np.array([], dtype=bool))
  656. elif isinstance(index, MultiIndex):
  657. idx = index.copy()
  658. msg = "isna is not defined for MultiIndex"
  659. with pytest.raises(NotImplementedError, match=msg):
  660. idx.isna()
  661. else:
  662. if not index.hasnans:
  663. tm.assert_numpy_array_equal(
  664. index.isna(), np.zeros(len(index), dtype=bool))
  665. tm.assert_numpy_array_equal(
  666. index.notna(), np.ones(len(index), dtype=bool))
  667. else:
  668. result = isna(index)
  669. tm.assert_numpy_array_equal(index.isna(), result)
  670. tm.assert_numpy_array_equal(index.notna(), ~result)
  671. def test_empty(self):
  672. # GH 15270
  673. index = self.create_index()
  674. assert not index.empty
  675. assert index[:0].empty
  676. def test_join_self_unique(self, join_type):
  677. index = self.create_index()
  678. if index.is_unique:
  679. joined = index.join(index, how=join_type)
  680. assert (index == joined).all()
  681. def test_map(self):
  682. # callable
  683. index = self.create_index()
  684. # we don't infer UInt64
  685. if isinstance(index, pd.UInt64Index):
  686. expected = index.astype('int64')
  687. else:
  688. expected = index
  689. result = index.map(lambda x: x)
  690. tm.assert_index_equal(result, expected)
  691. @pytest.mark.parametrize(
  692. "mapper",
  693. [
  694. lambda values, index: {i: e for e, i in zip(values, index)},
  695. lambda values, index: pd.Series(values, index)])
  696. def test_map_dictlike(self, mapper):
  697. index = self.create_index()
  698. if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
  699. pytest.skip("skipping tests for {}".format(type(index)))
  700. identity = mapper(index.values, index)
  701. # we don't infer to UInt64 for a dict
  702. if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
  703. expected = index.astype('int64')
  704. else:
  705. expected = index
  706. result = index.map(identity)
  707. tm.assert_index_equal(result, expected)
  708. # empty mappable
  709. expected = pd.Index([np.nan] * len(index))
  710. result = index.map(mapper(expected, index))
  711. tm.assert_index_equal(result, expected)
  712. def test_putmask_with_wrong_mask(self):
  713. # GH18368
  714. index = self.create_index()
  715. with pytest.raises(ValueError):
  716. index.putmask(np.ones(len(index) + 1, np.bool), 1)
  717. with pytest.raises(ValueError):
  718. index.putmask(np.ones(len(index) - 1, np.bool), 1)
  719. with pytest.raises(ValueError):
  720. index.putmask('foo', 1)
  721. @pytest.mark.parametrize('copy', [True, False])
  722. @pytest.mark.parametrize('name', [None, 'foo'])
  723. @pytest.mark.parametrize('ordered', [True, False])
  724. def test_astype_category(self, copy, name, ordered):
  725. # GH 18630
  726. index = self.create_index()
  727. if name:
  728. index = index.rename(name)
  729. # standard categories
  730. dtype = CategoricalDtype(ordered=ordered)
  731. result = index.astype(dtype, copy=copy)
  732. expected = CategoricalIndex(index.values, name=name, ordered=ordered)
  733. tm.assert_index_equal(result, expected)
  734. # non-standard categories
  735. dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
  736. result = index.astype(dtype, copy=copy)
  737. expected = CategoricalIndex(index.values, name=name, dtype=dtype)
  738. tm.assert_index_equal(result, expected)
  739. if ordered is False:
  740. # dtype='category' defaults to ordered=False, so only test once
  741. result = index.astype('category', copy=copy)
  742. expected = CategoricalIndex(index.values, name=name)
  743. tm.assert_index_equal(result, expected)
  744. def test_is_unique(self):
  745. # initialize a unique index
  746. index = self.create_index().drop_duplicates()
  747. assert index.is_unique is True
  748. # empty index should be unique
  749. index_empty = index[:0]
  750. assert index_empty.is_unique is True
  751. # test basic dupes
  752. index_dup = index.insert(0, index[0])
  753. assert index_dup.is_unique is False
  754. # single NA should be unique
  755. index_na = index.insert(0, np.nan)
  756. assert index_na.is_unique is True
  757. # multiple NA should not be unique
  758. index_na_dup = index_na.insert(0, np.nan)
  759. assert index_na_dup.is_unique is False