test_base.py 99 KB


  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. from datetime import datetime, timedelta
  4. import math
  5. import operator
  6. import sys
  7. import numpy as np
  8. import pytest
  9. from pandas._libs.tslib import Timestamp
  10. from pandas.compat import (
  11. PY3, PY35, PY36, StringIO, lrange, lzip, range, text_type, u, zip)
  12. from pandas.compat.numpy import np_datetime64_compat
  13. from pandas.core.dtypes.common import is_unsigned_integer_dtype
  14. from pandas.core.dtypes.generic import ABCIndex
  15. import pandas as pd
  16. from pandas import (
  17. CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, Int64Index,
  18. PeriodIndex, RangeIndex, Series, TimedeltaIndex, UInt64Index, date_range,
  19. isna, period_range)
  20. import pandas.core.config as cf
  21. from pandas.core.index import _get_combined_index, ensure_index_from_sequences
  22. from pandas.core.indexes.api import Index, MultiIndex
  23. from pandas.core.sorting import safe_sort
  24. from pandas.tests.indexes.common import Base
  25. import pandas.util.testing as tm
  26. from pandas.util.testing import assert_almost_equal
  27. class TestIndex(Base):
  28. _holder = Index
  29. def setup_method(self, method):
  30. self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100),
  31. strIndex=tm.makeStringIndex(100),
  32. dateIndex=tm.makeDateIndex(100),
  33. periodIndex=tm.makePeriodIndex(100),
  34. tdIndex=tm.makeTimedeltaIndex(100),
  35. intIndex=tm.makeIntIndex(100),
  36. uintIndex=tm.makeUIntIndex(100),
  37. rangeIndex=tm.makeRangeIndex(100),
  38. floatIndex=tm.makeFloatIndex(100),
  39. boolIndex=Index([True, False]),
  40. catIndex=tm.makeCategoricalIndex(100),
  41. empty=Index([]),
  42. tuples=MultiIndex.from_tuples(lzip(
  43. ['foo', 'bar', 'baz'], [1, 2, 3])),
  44. repeats=Index([0, 0, 1, 1, 2, 2]))
  45. self.setup_indices()
  46. def create_index(self):
  47. return Index(list('abcde'))
  48. def generate_index_types(self, skip_index_keys=[]):
  49. """
  50. Return a generator of the various index types, leaving
  51. out the ones with a key in skip_index_keys
  52. """
  53. for key, index in self.indices.items():
  54. if key not in skip_index_keys:
  55. yield key, index
  56. def test_can_hold_identifiers(self):
  57. index = self.create_index()
  58. key = index[0]
  59. assert index._can_hold_identifiers_and_holds_name(key) is True
  60. def test_new_axis(self):
  61. new_index = self.dateIndex[None, :]
  62. assert new_index.ndim == 2
  63. assert isinstance(new_index, np.ndarray)
  64. def test_copy_and_deepcopy(self):
  65. new_copy2 = self.intIndex.copy(dtype=int)
  66. assert new_copy2.dtype.kind == 'i'
  67. @pytest.mark.parametrize("attr", ['strIndex', 'dateIndex'])
  68. def test_constructor_regular(self, attr):
  69. # regular instance creation
  70. index = getattr(self, attr)
  71. tm.assert_contains_all(index, index)
  72. def test_constructor_casting(self):
  73. # casting
  74. arr = np.array(self.strIndex)
  75. index = Index(arr)
  76. tm.assert_contains_all(arr, index)
  77. tm.assert_index_equal(self.strIndex, index)
  78. def test_constructor_copy(self):
  79. # copy
  80. arr = np.array(self.strIndex)
  81. index = Index(arr, copy=True, name='name')
  82. assert isinstance(index, Index)
  83. assert index.name == 'name'
  84. tm.assert_numpy_array_equal(arr, index.values)
  85. arr[0] = "SOMEBIGLONGSTRING"
  86. assert index[0] != "SOMEBIGLONGSTRING"
  87. # what to do here?
  88. # arr = np.array(5.)
  89. # pytest.raises(Exception, arr.view, Index)
  90. def test_constructor_corner(self):
  91. # corner case
  92. pytest.raises(TypeError, Index, 0)
  93. @pytest.mark.parametrize("index_vals", [
  94. [('A', 1), 'B'], ['B', ('A', 1)]])
  95. def test_construction_list_mixed_tuples(self, index_vals):
  96. # see gh-10697: if we are constructing from a mixed list of tuples,
  97. # make sure that we are independent of the sorting order.
  98. index = Index(index_vals)
  99. assert isinstance(index, Index)
  100. assert not isinstance(index, MultiIndex)
  101. @pytest.mark.parametrize('na_value', [None, np.nan])
  102. @pytest.mark.parametrize('vtype', [list, tuple, iter])
  103. def test_construction_list_tuples_nan(self, na_value, vtype):
  104. # GH 18505 : valid tuples containing NaN
  105. values = [(1, 'two'), (3., na_value)]
  106. result = Index(vtype(values))
  107. expected = MultiIndex.from_tuples(values)
  108. tm.assert_index_equal(result, expected)
  109. @pytest.mark.parametrize("cast_as_obj", [True, False])
  110. @pytest.mark.parametrize("index", [
  111. pd.date_range('2015-01-01 10:00', freq='D', periods=3,
  112. tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz
  113. pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz
  114. pd.timedelta_range('1 days', freq='D', periods=3), # td
  115. pd.period_range('2015-01-01', freq='D', periods=3) # period
  116. ])
  117. def test_constructor_from_index_dtlike(self, cast_as_obj, index):
  118. if cast_as_obj:
  119. result = pd.Index(index.astype(object))
  120. else:
  121. result = pd.Index(index)
  122. tm.assert_index_equal(result, index)
  123. if isinstance(index, pd.DatetimeIndex):
  124. assert result.tz == index.tz
  125. if cast_as_obj:
  126. # GH#23524 check that Index(dti, dtype=object) does not
  127. # incorrectly raise ValueError, and that nanoseconds are not
  128. # dropped
  129. index += pd.Timedelta(nanoseconds=50)
  130. result = pd.Index(index, dtype=object)
  131. assert result.dtype == np.object_
  132. assert list(result) == list(index)
  133. @pytest.mark.parametrize("index,has_tz", [
  134. (pd.date_range('2015-01-01 10:00', freq='D', periods=3,
  135. tz='US/Eastern'), True), # datetimetz
  136. (pd.timedelta_range('1 days', freq='D', periods=3), False), # td
  137. (pd.period_range('2015-01-01', freq='D', periods=3), False) # period
  138. ])
  139. def test_constructor_from_series_dtlike(self, index, has_tz):
  140. result = pd.Index(pd.Series(index))
  141. tm.assert_index_equal(result, index)
  142. if has_tz:
  143. assert result.tz == index.tz
  144. @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
  145. def test_constructor_from_series(self, klass):
  146. expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'),
  147. Timestamp('20130101')])
  148. s = Series([Timestamp('20110101'), Timestamp('20120101'),
  149. Timestamp('20130101')])
  150. result = klass(s)
  151. tm.assert_index_equal(result, expected)
  152. def test_constructor_from_series_freq(self):
  153. # GH 6273
  154. # create from a series, passing a freq
  155. dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
  156. expected = DatetimeIndex(dts, freq='MS')
  157. s = Series(pd.to_datetime(dts))
  158. result = DatetimeIndex(s, freq='MS')
  159. tm.assert_index_equal(result, expected)
  160. def test_constructor_from_frame_series_freq(self):
  161. # GH 6273
  162. # create from a series, passing a freq
  163. dts = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
  164. expected = DatetimeIndex(dts, freq='MS')
  165. df = pd.DataFrame(np.random.rand(5, 3))
  166. df['date'] = dts
  167. result = DatetimeIndex(df['date'], freq='MS')
  168. assert df['date'].dtype == object
  169. expected.name = 'date'
  170. tm.assert_index_equal(result, expected)
  171. expected = pd.Series(dts, name='date')
  172. tm.assert_series_equal(df['date'], expected)
  173. # GH 6274
  174. # infer freq of same
  175. freq = pd.infer_freq(df['date'])
  176. assert freq == 'MS'
  177. @pytest.mark.parametrize("array", [
  178. np.arange(5), np.array(['a', 'b', 'c']), date_range(
  179. '2000-01-01', periods=3).values
  180. ])
  181. def test_constructor_ndarray_like(self, array):
  182. # GH 5460#issuecomment-44474502
  183. # it should be possible to convert any object that satisfies the numpy
  184. # ndarray interface directly into an Index
  185. class ArrayLike(object):
  186. def __init__(self, array):
  187. self.array = array
  188. def __array__(self, dtype=None):
  189. return self.array
  190. expected = pd.Index(array)
  191. result = pd.Index(ArrayLike(array))
  192. tm.assert_index_equal(result, expected)
  193. @pytest.mark.parametrize('dtype', [
  194. int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32',
  195. 'uint16', 'uint8'])
  196. def test_constructor_int_dtype_float(self, dtype):
  197. # GH 18400
  198. if is_unsigned_integer_dtype(dtype):
  199. index_type = UInt64Index
  200. else:
  201. index_type = Int64Index
  202. expected = index_type([0, 1, 2, 3])
  203. result = Index([0., 1., 2., 3.], dtype=dtype)
  204. tm.assert_index_equal(result, expected)
  205. def test_constructor_int_dtype_nan(self):
  206. # see gh-15187
  207. data = [np.nan]
  208. expected = Float64Index(data)
  209. result = Index(data, dtype='float')
  210. tm.assert_index_equal(result, expected)
  211. @pytest.mark.parametrize("dtype", ['int64', 'uint64'])
  212. def test_constructor_int_dtype_nan_raises(self, dtype):
  213. # see gh-15187
  214. data = [np.nan]
  215. msg = "cannot convert"
  216. with pytest.raises(ValueError, match=msg):
  217. Index(data, dtype=dtype)
  218. def test_constructor_no_pandas_array(self):
  219. ser = pd.Series([1, 2, 3])
  220. result = pd.Index(ser.array)
  221. expected = pd.Index([1, 2, 3])
  222. tm.assert_index_equal(result, expected)
  223. @pytest.mark.parametrize("klass,dtype,na_val", [
  224. (pd.Float64Index, np.float64, np.nan),
  225. (pd.DatetimeIndex, 'datetime64[ns]', pd.NaT)
  226. ])
  227. def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val):
  228. # GH 13467
  229. na_list = [na_val, na_val]
  230. expected = klass(na_list)
  231. assert expected.dtype == dtype
  232. result = Index(na_list)
  233. tm.assert_index_equal(result, expected)
  234. result = Index(np.array(na_list))
  235. tm.assert_index_equal(result, expected)
  236. @pytest.mark.parametrize("pos", [0, 1])
  237. @pytest.mark.parametrize("klass,dtype,ctor", [
  238. (pd.DatetimeIndex, 'datetime64[ns]', np.datetime64('nat')),
  239. (pd.TimedeltaIndex, 'timedelta64[ns]', np.timedelta64('nat'))
  240. ])
  241. def test_index_ctor_infer_nat_dt_like(self, pos, klass, dtype, ctor,
  242. nulls_fixture):
  243. expected = klass([pd.NaT, pd.NaT])
  244. assert expected.dtype == dtype
  245. data = [ctor]
  246. data.insert(pos, nulls_fixture)
  247. result = Index(data)
  248. tm.assert_index_equal(result, expected)
  249. result = Index(np.array(data, dtype=object))
  250. tm.assert_index_equal(result, expected)
  251. @pytest.mark.parametrize("swap_objs", [True, False])
  252. def test_index_ctor_nat_result(self, swap_objs):
  253. # mixed np.datetime64/timedelta64 nat results in object
  254. data = [np.datetime64('nat'), np.timedelta64('nat')]
  255. if swap_objs:
  256. data = data[::-1]
  257. expected = pd.Index(data, dtype=object)
  258. tm.assert_index_equal(Index(data), expected)
  259. tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
  260. def test_index_ctor_infer_periodindex(self):
  261. xp = period_range('2012-1-1', freq='M', periods=3)
  262. rs = Index(xp)
  263. tm.assert_index_equal(rs, xp)
  264. assert isinstance(rs, PeriodIndex)
  265. @pytest.mark.parametrize("vals,dtype", [
  266. ([1, 2, 3, 4, 5], 'int'), ([1.1, np.nan, 2.2, 3.0], 'float'),
  267. (['A', 'B', 'C', np.nan], 'obj')
  268. ])
  269. def test_constructor_simple_new(self, vals, dtype):
  270. index = Index(vals, name=dtype)
  271. result = index._simple_new(index.values, dtype)
  272. tm.assert_index_equal(result, index)
  273. @pytest.mark.parametrize("vals", [
  274. [1, 2, 3], np.array([1, 2, 3]), np.array([1, 2, 3], dtype=int),
  275. # below should coerce
  276. [1., 2., 3.], np.array([1., 2., 3.], dtype=float)
  277. ])
  278. def test_constructor_dtypes_to_int64(self, vals):
  279. index = Index(vals, dtype=int)
  280. assert isinstance(index, Int64Index)
  281. @pytest.mark.parametrize("vals", [
  282. [1, 2, 3], [1., 2., 3.], np.array([1., 2., 3.]),
  283. np.array([1, 2, 3], dtype=int), np.array([1., 2., 3.], dtype=float)
  284. ])
  285. def test_constructor_dtypes_to_float64(self, vals):
  286. index = Index(vals, dtype=float)
  287. assert isinstance(index, Float64Index)
  288. @pytest.mark.parametrize("cast_index", [True, False])
  289. @pytest.mark.parametrize("vals", [
  290. [True, False, True], np.array([True, False, True], dtype=bool)
  291. ])
  292. def test_constructor_dtypes_to_object(self, cast_index, vals):
  293. if cast_index:
  294. index = Index(vals, dtype=bool)
  295. else:
  296. index = Index(vals)
  297. assert isinstance(index, Index)
  298. assert index.dtype == object
  299. @pytest.mark.parametrize("vals", [
  300. [1, 2, 3], np.array([1, 2, 3], dtype=int),
  301. np.array([np_datetime64_compat('2011-01-01'),
  302. np_datetime64_compat('2011-01-02')]),
  303. [datetime(2011, 1, 1), datetime(2011, 1, 2)]
  304. ])
  305. def test_constructor_dtypes_to_categorical(self, vals):
  306. index = Index(vals, dtype='category')
  307. assert isinstance(index, CategoricalIndex)
  308. @pytest.mark.parametrize("cast_index", [True, False])
  309. @pytest.mark.parametrize("vals", [
  310. Index(np.array([np_datetime64_compat('2011-01-01'),
  311. np_datetime64_compat('2011-01-02')])),
  312. Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])
  313. ])
  314. def test_constructor_dtypes_to_datetime(self, cast_index, vals):
  315. if cast_index:
  316. index = Index(vals, dtype=object)
  317. assert isinstance(index, Index)
  318. assert index.dtype == object
  319. else:
  320. index = Index(vals)
  321. assert isinstance(index, DatetimeIndex)
  322. @pytest.mark.parametrize("cast_index", [True, False])
  323. @pytest.mark.parametrize("vals", [
  324. np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]),
  325. [timedelta(1), timedelta(1)]
  326. ])
  327. def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
  328. if cast_index:
  329. index = Index(vals, dtype=object)
  330. assert isinstance(index, Index)
  331. assert index.dtype == object
  332. else:
  333. index = Index(vals)
  334. assert isinstance(index, TimedeltaIndex)
  335. @pytest.mark.parametrize("attr, utc", [
  336. ['values', False],
  337. ['asi8', True]])
  338. @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
  339. def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, utc,
  340. klass):
  341. # Test constructing with a datetimetz dtype
  342. # .values produces numpy datetimes, so these are considered naive
  343. # .asi8 produces integers, so these are considered epoch timestamps
  344. # ^the above will be true in a later version. Right now we `.view`
  345. # the i8 values as NS_DTYPE, effectively treating them as wall times.
  346. index = pd.date_range('2011-01-01', periods=5)
  347. arg = getattr(index, attr)
  348. index = index.tz_localize(tz_naive_fixture)
  349. dtype = index.dtype
  350. # TODO(GH-24559): Remove the sys.modules and warnings
  351. # not sure what this is from. It's Py2 only.
  352. modules = [sys.modules['pandas.core.indexes.base']]
  353. if (tz_naive_fixture and attr == "asi8" and
  354. str(tz_naive_fixture) not in ('UTC', 'tzutc()')):
  355. ex_warn = FutureWarning
  356. else:
  357. ex_warn = None
  358. # stacklevel is checked elsewhere. We don't do it here since
  359. # Index will have an frame, throwing off the expected.
  360. with tm.assert_produces_warning(ex_warn, check_stacklevel=False,
  361. clear=modules):
  362. result = klass(arg, tz=tz_naive_fixture)
  363. tm.assert_index_equal(result, index)
  364. with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
  365. result = klass(arg, dtype=dtype)
  366. tm.assert_index_equal(result, index)
  367. with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
  368. result = klass(list(arg), tz=tz_naive_fixture)
  369. tm.assert_index_equal(result, index)
  370. with tm.assert_produces_warning(ex_warn, check_stacklevel=False):
  371. result = klass(list(arg), dtype=dtype)
  372. tm.assert_index_equal(result, index)
  373. @pytest.mark.parametrize("attr", ['values', 'asi8'])
  374. @pytest.mark.parametrize("klass", [pd.Index, pd.TimedeltaIndex])
  375. def test_constructor_dtypes_timedelta(self, attr, klass):
  376. index = pd.timedelta_range('1 days', periods=5)
  377. dtype = index.dtype
  378. values = getattr(index, attr)
  379. result = klass(values, dtype=dtype)
  380. tm.assert_index_equal(result, index)
  381. result = klass(list(values), dtype=dtype)
  382. tm.assert_index_equal(result, index)
  383. @pytest.mark.parametrize("value", [[], iter([]), (x for x in [])])
  384. @pytest.mark.parametrize("klass",
  385. [Index, Float64Index, Int64Index, UInt64Index,
  386. CategoricalIndex, DatetimeIndex, TimedeltaIndex])
  387. def test_constructor_empty(self, value, klass):
  388. empty = klass(value)
  389. assert isinstance(empty, klass)
  390. assert not len(empty)
  391. @pytest.mark.parametrize("empty,klass", [
  392. (PeriodIndex([], freq='B'), PeriodIndex),
  393. (PeriodIndex(iter([]), freq='B'), PeriodIndex),
  394. (PeriodIndex((x for x in []), freq='B'), PeriodIndex),
  395. (RangeIndex(step=1), pd.RangeIndex),
  396. (MultiIndex(levels=[[1, 2], ['blue', 'red']],
  397. codes=[[], []]), MultiIndex)
  398. ])
  399. def test_constructor_empty_special(self, empty, klass):
  400. assert isinstance(empty, klass)
  401. assert not len(empty)
  402. def test_constructor_overflow_int64(self):
  403. # see gh-15832
  404. msg = ("The elements provided in the data cannot "
  405. "all be casted to the dtype int64")
  406. with pytest.raises(OverflowError, match=msg):
  407. Index([np.iinfo(np.uint64).max - 1], dtype="int64")
  408. @pytest.mark.xfail(reason="see GH#21311: Index "
  409. "doesn't enforce dtype argument")
  410. def test_constructor_cast(self):
  411. msg = "could not convert string to float"
  412. with pytest.raises(ValueError, match=msg):
  413. Index(["a", "b", "c"], dtype=float)
  414. def test_view_with_args(self):
  415. restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',
  416. 'empty']
  417. for i in restricted:
  418. ind = self.indices[i]
  419. # with arguments
  420. pytest.raises(TypeError, lambda: ind.view('i8'))
  421. # these are ok
  422. for i in list(set(self.indices.keys()) - set(restricted)):
  423. ind = self.indices[i]
  424. # with arguments
  425. ind.view('i8')
  426. def test_astype(self):
  427. casted = self.intIndex.astype('i8')
  428. # it works!
  429. casted.get_loc(5)
  430. # pass on name
  431. self.intIndex.name = 'foobar'
  432. casted = self.intIndex.astype('i8')
  433. assert casted.name == 'foobar'
  434. def test_equals_object(self):
  435. # same
  436. assert Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))
  437. @pytest.mark.parametrize("comp", [
  438. Index(['a', 'b']), Index(['a', 'b', 'd']), ['a', 'b', 'c']])
  439. def test_not_equals_object(self, comp):
  440. assert not Index(['a', 'b', 'c']).equals(comp)
  441. def test_insert(self):
  442. # GH 7256
  443. # validate neg/pos inserts
  444. result = Index(['b', 'c', 'd'])
  445. # test 0th element
  446. tm.assert_index_equal(Index(['a', 'b', 'c', 'd']),
  447. result.insert(0, 'a'))
  448. # test Nth element that follows Python list behavior
  449. tm.assert_index_equal(Index(['b', 'c', 'e', 'd']),
  450. result.insert(-1, 'e'))
  451. # test loc +/- neq (0, -1)
  452. tm.assert_index_equal(result.insert(1, 'z'), result.insert(-2, 'z'))
  453. # test empty
  454. null_index = Index([])
  455. tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
  456. def test_insert_missing(self, nulls_fixture):
  457. # GH 22295
  458. # test there is no mangling of NA values
  459. expected = Index(['a', nulls_fixture, 'b', 'c'])
  460. result = Index(list('abc')).insert(1, nulls_fixture)
  461. tm.assert_index_equal(result, expected)
  462. @pytest.mark.parametrize("pos,expected", [
  463. (0, Index(['b', 'c', 'd'], name='index')),
  464. (-1, Index(['a', 'b', 'c'], name='index'))
  465. ])
  466. def test_delete(self, pos, expected):
  467. index = Index(['a', 'b', 'c', 'd'], name='index')
  468. result = index.delete(pos)
  469. tm.assert_index_equal(result, expected)
  470. assert result.name == expected.name
  471. def test_delete_raises(self):
  472. index = Index(['a', 'b', 'c', 'd'], name='index')
  473. with pytest.raises((IndexError, ValueError)):
  474. # either depending on numpy version
  475. index.delete(5)
  476. def test_identical(self):
  477. # index
  478. i1 = Index(['a', 'b', 'c'])
  479. i2 = Index(['a', 'b', 'c'])
  480. assert i1.identical(i2)
  481. i1 = i1.rename('foo')
  482. assert i1.equals(i2)
  483. assert not i1.identical(i2)
  484. i2 = i2.rename('foo')
  485. assert i1.identical(i2)
  486. i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')])
  487. i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False)
  488. assert not i3.identical(i4)
  489. def test_is_(self):
  490. ind = Index(range(10))
  491. assert ind.is_(ind)
  492. assert ind.is_(ind.view().view().view().view())
  493. assert not ind.is_(Index(range(10)))
  494. assert not ind.is_(ind.copy())
  495. assert not ind.is_(ind.copy(deep=False))
  496. assert not ind.is_(ind[:])
  497. assert not ind.is_(np.array(range(10)))
  498. # quasi-implementation dependent
  499. assert ind.is_(ind.view())
  500. ind2 = ind.view()
  501. ind2.name = 'bob'
  502. assert ind.is_(ind2)
  503. assert ind2.is_(ind)
  504. # doesn't matter if Indices are *actually* views of underlying data,
  505. assert not ind.is_(Index(ind.values))
  506. arr = np.array(range(1, 11))
  507. ind1 = Index(arr, copy=False)
  508. ind2 = Index(arr, copy=False)
  509. assert not ind1.is_(ind2)
  510. def test_asof(self):
  511. d = self.dateIndex[0]
  512. assert self.dateIndex.asof(d) == d
  513. assert isna(self.dateIndex.asof(d - timedelta(1)))
  514. d = self.dateIndex[-1]
  515. assert self.dateIndex.asof(d + timedelta(1)) == d
  516. d = self.dateIndex[0].to_pydatetime()
  517. assert isinstance(self.dateIndex.asof(d), Timestamp)
  518. def test_asof_datetime_partial(self):
  519. index = pd.date_range('2010-01-01', periods=2, freq='m')
  520. expected = Timestamp('2010-02-28')
  521. result = index.asof('2010-02')
  522. assert result == expected
  523. assert not isinstance(result, Index)
  524. def test_nanosecond_index_access(self):
  525. s = Series([Timestamp('20130101')]).values.view('i8')[0]
  526. r = DatetimeIndex([s + 50 + i for i in range(100)])
  527. x = Series(np.random.randn(100), index=r)
  528. first_value = x.asof(x.index[0])
  529. # this does not yet work, as parsing strings is done via dateutil
  530. # assert first_value == x['2013-01-01 00:00:00.000000050+0000']
  531. expected_ts = np_datetime64_compat('2013-01-01 00:00:00.000000050+'
  532. '0000', 'ns')
  533. assert first_value == x[Timestamp(expected_ts)]
  534. def test_booleanindex(self):
  535. boolIndex = np.repeat(True, len(self.strIndex)).astype(bool)
  536. boolIndex[5:30:2] = False
  537. subIndex = self.strIndex[boolIndex]
  538. for i, val in enumerate(subIndex):
  539. assert subIndex.get_loc(val) == i
  540. subIndex = self.strIndex[list(boolIndex)]
  541. for i, val in enumerate(subIndex):
  542. assert subIndex.get_loc(val) == i
  543. def test_fancy(self):
  544. sl = self.strIndex[[1, 2, 3]]
  545. for i in sl:
  546. assert i == sl[sl.get_loc(i)]
  547. @pytest.mark.parametrize("attr", [
  548. 'strIndex', 'intIndex', 'floatIndex'])
  549. @pytest.mark.parametrize("dtype", [np.int_, np.bool_])
  550. def test_empty_fancy(self, attr, dtype):
  551. empty_arr = np.array([], dtype=dtype)
  552. index = getattr(self, attr)
  553. empty_index = index.__class__([])
  554. assert index[[]].identical(empty_index)
  555. assert index[empty_arr].identical(empty_index)
  556. @pytest.mark.parametrize("attr", [
  557. 'strIndex', 'intIndex', 'floatIndex'])
  558. def test_empty_fancy_raises(self, attr):
  559. # pd.DatetimeIndex is excluded, because it overrides getitem and should
  560. # be tested separately.
  561. empty_farr = np.array([], dtype=np.float_)
  562. index = getattr(self, attr)
  563. empty_index = index.__class__([])
  564. assert index[[]].identical(empty_index)
  565. # np.ndarray only accepts ndarray of int & bool dtypes, so should Index
  566. pytest.raises(IndexError, index.__getitem__, empty_farr)
  567. @pytest.mark.parametrize("sort", [None, False])
  568. def test_intersection(self, sort):
  569. first = self.strIndex[:20]
  570. second = self.strIndex[:10]
  571. intersect = first.intersection(second, sort=sort)
  572. if sort is None:
  573. tm.assert_index_equal(intersect, second.sort_values())
  574. assert tm.equalContents(intersect, second)
  575. # Corner cases
  576. inter = first.intersection(first, sort=sort)
  577. assert inter is first
  578. @pytest.mark.parametrize("index2,keeps_name", [
  579. (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name
  580. (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names
  581. (Index([3, 4, 5, 6, 7]), False)])
  582. @pytest.mark.parametrize("sort", [None, False])
  583. def test_intersection_name_preservation(self, index2, keeps_name, sort):
  584. index1 = Index([1, 2, 3, 4, 5], name='index')
  585. expected = Index([3, 4, 5])
  586. result = index1.intersection(index2, sort)
  587. if keeps_name:
  588. expected.name = 'index'
  589. assert result.name == expected.name
  590. tm.assert_index_equal(result, expected)
  591. @pytest.mark.parametrize("first_name,second_name,expected_name", [
  592. ('A', 'A', 'A'), ('A', 'B', None), (None, 'B', None)])
  593. @pytest.mark.parametrize("sort", [None, False])
  594. def test_intersection_name_preservation2(self, first_name, second_name,
  595. expected_name, sort):
  596. first = self.strIndex[5:20]
  597. second = self.strIndex[:10]
  598. first.name = first_name
  599. second.name = second_name
  600. intersect = first.intersection(second, sort=sort)
  601. assert intersect.name == expected_name
  602. @pytest.mark.parametrize("index2,keeps_name", [
  603. (Index([4, 7, 6, 5, 3], name='index'), True),
  604. (Index([4, 7, 6, 5, 3], name='other'), False)])
  605. @pytest.mark.parametrize("sort", [None, False])
  606. def test_intersection_monotonic(self, index2, keeps_name, sort):
  607. index1 = Index([5, 3, 2, 4, 1], name='index')
  608. expected = Index([5, 3, 4])
  609. if keeps_name:
  610. expected.name = "index"
  611. result = index1.intersection(index2, sort=sort)
  612. if sort is None:
  613. expected = expected.sort_values()
  614. tm.assert_index_equal(result, expected)
  615. @pytest.mark.parametrize("index2,expected_arr", [
  616. (Index(['B', 'D']), ['B']),
  617. (Index(['B', 'D', 'A']), ['A', 'B', 'A'])])
  618. @pytest.mark.parametrize("sort", [None, False])
  619. def test_intersection_non_monotonic_non_unique(self, index2, expected_arr,
  620. sort):
  621. # non-monotonic non-unique
  622. index1 = Index(['A', 'B', 'A', 'C'])
  623. expected = Index(expected_arr, dtype='object')
  624. result = index1.intersection(index2, sort=sort)
  625. if sort is None:
  626. expected = expected.sort_values()
  627. tm.assert_index_equal(result, expected)
  628. @pytest.mark.parametrize("sort", [None, False])
  629. def test_intersect_str_dates(self, sort):
  630. dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
  631. i1 = Index(dt_dates, dtype=object)
  632. i2 = Index(['aa'], dtype=object)
  633. result = i2.intersection(i1, sort=sort)
  634. assert len(result) == 0
  635. def test_intersect_nosort(self):
  636. result = pd.Index(['c', 'b', 'a']).intersection(['b', 'a'])
  637. expected = pd.Index(['b', 'a'])
  638. tm.assert_index_equal(result, expected)
  639. def test_intersection_equal_sort(self):
  640. idx = pd.Index(['c', 'a', 'b'])
  641. tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
  642. tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
  643. @pytest.mark.xfail(reason="Not implemented")
  644. def test_intersection_equal_sort_true(self):
  645. # TODO decide on True behaviour
  646. idx = pd.Index(['c', 'a', 'b'])
  647. sorted_ = pd.Index(['a', 'b', 'c'])
  648. tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
  649. @pytest.mark.parametrize("sort", [None, False])
  650. def test_chained_union(self, sort):
  651. # Chained unions handles names correctly
  652. i1 = Index([1, 2], name='i1')
  653. i2 = Index([5, 6], name='i2')
  654. i3 = Index([3, 4], name='i3')
  655. union = i1.union(i2.union(i3, sort=sort), sort=sort)
  656. expected = i1.union(i2, sort=sort).union(i3, sort=sort)
  657. tm.assert_index_equal(union, expected)
  658. j1 = Index([1, 2], name='j1')
  659. j2 = Index([], name='j2')
  660. j3 = Index([], name='j3')
  661. union = j1.union(j2.union(j3, sort=sort), sort=sort)
  662. expected = j1.union(j2, sort=sort).union(j3, sort=sort)
  663. tm.assert_index_equal(union, expected)
  664. @pytest.mark.parametrize("sort", [None, False])
  665. def test_union(self, sort):
  666. # TODO: Replace with fixturesult
  667. first = self.strIndex[5:20]
  668. second = self.strIndex[:10]
  669. everything = self.strIndex[:20]
  670. union = first.union(second, sort=sort)
  671. if sort is None:
  672. tm.assert_index_equal(union, everything.sort_values())
  673. assert tm.equalContents(union, everything)
  674. @pytest.mark.parametrize('slice_', [slice(None), slice(0)])
  675. def test_union_sort_other_special(self, slice_):
  676. # https://github.com/pandas-dev/pandas/issues/24959
  677. idx = pd.Index([1, 0, 2])
  678. # default, sort=None
  679. other = idx[slice_]
  680. tm.assert_index_equal(idx.union(other), idx)
  681. tm.assert_index_equal(other.union(idx), idx)
  682. # sort=False
  683. tm.assert_index_equal(idx.union(other, sort=False), idx)
  684. @pytest.mark.xfail(reason="Not implemented")
  685. @pytest.mark.parametrize('slice_', [slice(None), slice(0)])
  686. def test_union_sort_special_true(self, slice_):
  687. # TODO decide on True behaviour
  688. # sort=True
  689. idx = pd.Index([1, 0, 2])
  690. # default, sort=None
  691. other = idx[slice_]
  692. result = idx.union(other, sort=True)
  693. expected = pd.Index([0, 1, 2])
  694. tm.assert_index_equal(result, expected)
  695. def test_union_sort_other_incomparable(self):
  696. # https://github.com/pandas-dev/pandas/issues/24959
  697. idx = pd.Index([1, pd.Timestamp('2000')])
  698. # default (sort=None)
  699. with tm.assert_produces_warning(RuntimeWarning):
  700. result = idx.union(idx[:1])
  701. tm.assert_index_equal(result, idx)
  702. # sort=None
  703. with tm.assert_produces_warning(RuntimeWarning):
  704. result = idx.union(idx[:1], sort=None)
  705. tm.assert_index_equal(result, idx)
  706. # sort=False
  707. result = idx.union(idx[:1], sort=False)
  708. tm.assert_index_equal(result, idx)
  709. @pytest.mark.xfail(reason="Not implemented")
  710. def test_union_sort_other_incomparable_true(self):
  711. # TODO decide on True behaviour
  712. # sort=True
  713. idx = pd.Index([1, pd.Timestamp('2000')])
  714. with pytest.raises(TypeError, match='.*'):
  715. idx.union(idx[:1], sort=True)
  716. @pytest.mark.parametrize("klass", [
  717. np.array, Series, list])
  718. @pytest.mark.parametrize("sort", [None, False])
  719. def test_union_from_iterables(self, klass, sort):
  720. # GH 10149
  721. # TODO: Replace with fixturesult
  722. first = self.strIndex[5:20]
  723. second = self.strIndex[:10]
  724. everything = self.strIndex[:20]
  725. case = klass(second.values)
  726. result = first.union(case, sort=sort)
  727. if sort is None:
  728. tm.assert_index_equal(result, everything.sort_values())
  729. assert tm.equalContents(result, everything)
  730. @pytest.mark.parametrize("sort", [None, False])
  731. def test_union_identity(self, sort):
  732. # TODO: replace with fixturesult
  733. first = self.strIndex[5:20]
  734. union = first.union(first, sort=sort)
  735. # i.e. identity is not preserved when sort is True
  736. assert (union is first) is (not sort)
  737. union = first.union([], sort=sort)
  738. assert (union is first) is (not sort)
  739. union = Index([]).union(first, sort=sort)
  740. assert (union is first) is (not sort)
  741. @pytest.mark.parametrize("first_list", [list('ba'), list()])
  742. @pytest.mark.parametrize("second_list", [list('ab'), list()])
  743. @pytest.mark.parametrize("first_name, second_name, expected_name", [
  744. ('A', 'B', None), (None, 'B', None), ('A', None, None)])
  745. @pytest.mark.parametrize("sort", [None, False])
  746. def test_union_name_preservation(self, first_list, second_list, first_name,
  747. second_name, expected_name, sort):
  748. first = Index(first_list, name=first_name)
  749. second = Index(second_list, name=second_name)
  750. union = first.union(second, sort=sort)
  751. vals = set(first_list).union(second_list)
  752. if sort is None and len(first_list) > 0 and len(second_list) > 0:
  753. expected = Index(sorted(vals), name=expected_name)
  754. tm.assert_index_equal(union, expected)
  755. else:
  756. expected = Index(vals, name=expected_name)
  757. assert tm.equalContents(union, expected)
  758. @pytest.mark.parametrize("sort", [None, False])
  759. def test_union_dt_as_obj(self, sort):
  760. # TODO: Replace with fixturesult
  761. firstCat = self.strIndex.union(self.dateIndex)
  762. secondCat = self.strIndex.union(self.strIndex)
  763. if self.dateIndex.dtype == np.object_:
  764. appended = np.append(self.strIndex, self.dateIndex)
  765. else:
  766. appended = np.append(self.strIndex, self.dateIndex.astype('O'))
  767. assert tm.equalContents(firstCat, appended)
  768. assert tm.equalContents(secondCat, self.strIndex)
  769. tm.assert_contains_all(self.strIndex, firstCat)
  770. tm.assert_contains_all(self.strIndex, secondCat)
  771. tm.assert_contains_all(self.dateIndex, firstCat)
  772. @pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
  773. 'symmetric_difference'])
  774. def test_setops_disallow_true(self, method):
  775. idx1 = pd.Index(['a', 'b'])
  776. idx2 = pd.Index(['b', 'c'])
  777. with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
  778. getattr(idx1, method)(idx2, sort=True)
  779. def test_map_identity_mapping(self):
  780. # GH 12766
  781. # TODO: replace with fixture
  782. for name, cur_index in self.indices.items():
  783. tm.assert_index_equal(cur_index, cur_index.map(lambda x: x))
  784. def test_map_with_tuples(self):
  785. # GH 12766
  786. # Test that returning a single tuple from an Index
  787. # returns an Index.
  788. index = tm.makeIntIndex(3)
  789. result = tm.makeIntIndex(3).map(lambda x: (x,))
  790. expected = Index([(i,) for i in index])
  791. tm.assert_index_equal(result, expected)
  792. # Test that returning a tuple from a map of a single index
  793. # returns a MultiIndex object.
  794. result = index.map(lambda x: (x, x == 1))
  795. expected = MultiIndex.from_tuples([(i, i == 1) for i in index])
  796. tm.assert_index_equal(result, expected)
  797. def test_map_with_tuples_mi(self):
  798. # Test that returning a single object from a MultiIndex
  799. # returns an Index.
  800. first_level = ['foo', 'bar', 'baz']
  801. multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3]))
  802. reduced_index = multi_index.map(lambda x: x[0])
  803. tm.assert_index_equal(reduced_index, Index(first_level))
  804. @pytest.mark.parametrize("attr", [
  805. 'makeDateIndex', 'makePeriodIndex', 'makeTimedeltaIndex'])
  806. def test_map_tseries_indices_return_index(self, attr):
  807. index = getattr(tm, attr)(10)
  808. expected = Index([1] * 10)
  809. result = index.map(lambda x: 1)
  810. tm.assert_index_equal(expected, result)
  811. def test_map_tseries_indices_accsr_return_index(self):
  812. date_index = tm.makeDateIndex(24, freq='h', name='hourly')
  813. expected = Index(range(24), name='hourly')
  814. tm.assert_index_equal(expected, date_index.map(lambda x: x.hour))
  815. @pytest.mark.parametrize(
  816. "mapper",
  817. [
  818. lambda values, index: {i: e for e, i in zip(values, index)},
  819. lambda values, index: pd.Series(values, index)])
  820. def test_map_dictlike(self, mapper):
  821. # GH 12756
  822. expected = Index(['foo', 'bar', 'baz'])
  823. index = tm.makeIntIndex(3)
  824. result = index.map(mapper(expected.values, index))
  825. tm.assert_index_equal(result, expected)
  826. # TODO: replace with fixture
  827. for name in self.indices.keys():
  828. if name == 'catIndex':
  829. # Tested in test_categorical
  830. continue
  831. elif name == 'repeats':
  832. # Cannot map duplicated index
  833. continue
  834. index = self.indices[name]
  835. expected = Index(np.arange(len(index), 0, -1))
  836. # to match proper result coercion for uints
  837. if name == 'empty':
  838. expected = Index([])
  839. result = index.map(mapper(expected, index))
  840. tm.assert_index_equal(result, expected)
  841. @pytest.mark.parametrize("mapper", [
  842. Series(['foo', 2., 'baz'], index=[0, 2, -1]),
  843. {0: 'foo', 2: 2.0, -1: 'baz'}])
  844. def test_map_with_non_function_missing_values(self, mapper):
  845. # GH 12756
  846. expected = Index([2., np.nan, 'foo'])
  847. result = Index([2, 1, 0]).map(mapper)
  848. tm.assert_index_equal(expected, result)
  849. def test_map_na_exclusion(self):
  850. index = Index([1.5, np.nan, 3, np.nan, 5])
  851. result = index.map(lambda x: x * 2, na_action='ignore')
  852. expected = index * 2
  853. tm.assert_index_equal(result, expected)
  854. def test_map_defaultdict(self):
  855. index = Index([1, 2, 3])
  856. default_dict = defaultdict(lambda: 'blank')
  857. default_dict[1] = 'stuff'
  858. result = index.map(default_dict)
  859. expected = Index(['stuff', 'blank', 'blank'])
  860. tm.assert_index_equal(result, expected)
  861. def test_append_multiple(self):
  862. index = Index(['a', 'b', 'c', 'd', 'e', 'f'])
  863. foos = [index[:2], index[2:4], index[4:]]
  864. result = foos[0].append(foos[1:])
  865. tm.assert_index_equal(result, index)
  866. # empty
  867. result = index.append([])
  868. tm.assert_index_equal(result, index)
  869. @pytest.mark.parametrize("name,expected", [
  870. ('foo', 'foo'), ('bar', None)])
  871. def test_append_empty_preserve_name(self, name, expected):
  872. left = Index([], name='foo')
  873. right = Index([1, 2, 3], name=name)
  874. result = left.append(right)
  875. assert result.name == expected
  876. @pytest.mark.parametrize("second_name,expected", [
  877. (None, None), ('name', 'name')])
  878. @pytest.mark.parametrize("sort", [None, False])
  879. def test_difference_name_preservation(self, second_name, expected, sort):
  880. # TODO: replace with fixturesult
  881. first = self.strIndex[5:20]
  882. second = self.strIndex[:10]
  883. answer = self.strIndex[10:20]
  884. first.name = 'name'
  885. second.name = second_name
  886. result = first.difference(second, sort=sort)
  887. assert tm.equalContents(result, answer)
  888. if expected is None:
  889. assert result.name is None
  890. else:
  891. assert result.name == expected
  892. @pytest.mark.parametrize("sort", [None, False])
  893. def test_difference_empty_arg(self, sort):
  894. first = self.strIndex[5:20]
  895. first.name == 'name'
  896. result = first.difference([], sort)
  897. assert tm.equalContents(result, first)
  898. assert result.name == first.name
  899. @pytest.mark.parametrize("sort", [None, False])
  900. def test_difference_identity(self, sort):
  901. first = self.strIndex[5:20]
  902. first.name == 'name'
  903. result = first.difference(first, sort)
  904. assert len(result) == 0
  905. assert result.name == first.name
  906. @pytest.mark.parametrize("sort", [None, False])
  907. def test_difference_sort(self, sort):
  908. first = self.strIndex[5:20]
  909. second = self.strIndex[:10]
  910. result = first.difference(second, sort)
  911. expected = self.strIndex[10:20]
  912. if sort is None:
  913. expected = expected.sort_values()
  914. tm.assert_index_equal(result, expected)
  915. @pytest.mark.parametrize("sort", [None, False])
  916. def test_symmetric_difference(self, sort):
  917. # smoke
  918. index1 = Index([5, 2, 3, 4], name='index1')
  919. index2 = Index([2, 3, 4, 1])
  920. result = index1.symmetric_difference(index2, sort=sort)
  921. expected = Index([5, 1])
  922. assert tm.equalContents(result, expected)
  923. assert result.name is None
  924. if sort is None:
  925. expected = expected.sort_values()
  926. tm.assert_index_equal(result, expected)
  927. # __xor__ syntax
  928. expected = index1 ^ index2
  929. assert tm.equalContents(result, expected)
  930. assert result.name is None
  931. @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference'])
  932. def test_difference_incomparable(self, opname):
  933. a = pd.Index([3, pd.Timestamp('2000'), 1])
  934. b = pd.Index([2, pd.Timestamp('1999'), 1])
  935. op = operator.methodcaller(opname, b)
  936. # sort=None, the default
  937. result = op(a)
  938. expected = pd.Index([3, pd.Timestamp('2000'), 2, pd.Timestamp('1999')])
  939. if opname == 'difference':
  940. expected = expected[:2]
  941. tm.assert_index_equal(result, expected)
  942. # sort=False
  943. op = operator.methodcaller(opname, b, sort=False)
  944. result = op(a)
  945. tm.assert_index_equal(result, expected)
  946. @pytest.mark.xfail(reason="Not implemented")
  947. @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference'])
  948. def test_difference_incomparable_true(self, opname):
  949. # TODO decide on True behaviour
  950. # # sort=True, raises
  951. a = pd.Index([3, pd.Timestamp('2000'), 1])
  952. b = pd.Index([2, pd.Timestamp('1999'), 1])
  953. op = operator.methodcaller(opname, b, sort=True)
  954. with pytest.raises(TypeError, match='Cannot compare'):
  955. op(a)
  956. @pytest.mark.parametrize("sort", [None, False])
  957. def test_symmetric_difference_mi(self, sort):
  958. index1 = MultiIndex.from_tuples(self.tuples)
  959. index2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)])
  960. result = index1.symmetric_difference(index2, sort=sort)
  961. expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)])
  962. if sort is None:
  963. expected = expected.sort_values()
  964. tm.assert_index_equal(result, expected)
  965. assert tm.equalContents(result, expected)
  966. @pytest.mark.parametrize("index2,expected", [
  967. (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])),
  968. (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0]))])
  969. @pytest.mark.parametrize("sort", [None, False])
  970. def test_symmetric_difference_missing(self, index2, expected, sort):
  971. # GH 13514 change: {nan} - {nan} == {}
  972. # (GH 6444, sorting of nans, is no longer an issue)
  973. index1 = Index([1, np.nan, 2, 3])
  974. result = index1.symmetric_difference(index2, sort=sort)
  975. if sort is None:
  976. expected = expected.sort_values()
  977. tm.assert_index_equal(result, expected)
  978. @pytest.mark.parametrize("sort", [None, False])
  979. def test_symmetric_difference_non_index(self, sort):
  980. index1 = Index([1, 2, 3, 4], name='index1')
  981. index2 = np.array([2, 3, 4, 5])
  982. expected = Index([1, 5])
  983. result = index1.symmetric_difference(index2, sort=sort)
  984. assert tm.equalContents(result, expected)
  985. assert result.name == 'index1'
  986. result = index1.symmetric_difference(index2, result_name='new_name',
  987. sort=sort)
  988. assert tm.equalContents(result, expected)
  989. assert result.name == 'new_name'
  990. @pytest.mark.parametrize("sort", [None, False])
  991. def test_difference_type(self, sort):
  992. # GH 20040
  993. # If taking difference of a set and itself, it
  994. # needs to preserve the type of the index
  995. skip_index_keys = ['repeats']
  996. for key, index in self.generate_index_types(skip_index_keys):
  997. result = index.difference(index, sort=sort)
  998. expected = index.drop(index)
  999. tm.assert_index_equal(result, expected)
  1000. @pytest.mark.parametrize("sort", [None, False])
  1001. def test_intersection_difference(self, sort):
  1002. # GH 20040
  1003. # Test that the intersection of an index with an
  1004. # empty index produces the same index as the difference
  1005. # of an index with itself. Test for all types
  1006. skip_index_keys = ['repeats']
  1007. for key, index in self.generate_index_types(skip_index_keys):
  1008. inter = index.intersection(index.drop(index))
  1009. diff = index.difference(index, sort=sort)
  1010. tm.assert_index_equal(inter, diff)
  1011. @pytest.mark.parametrize("attr,expected", [
  1012. ('strIndex', False), ('boolIndex', False), ('catIndex', False),
  1013. ('intIndex', True), ('dateIndex', False), ('floatIndex', True)])
  1014. def test_is_numeric(self, attr, expected):
  1015. assert getattr(self, attr).is_numeric() == expected
  1016. @pytest.mark.parametrize("attr,expected", [
  1017. ('strIndex', True), ('boolIndex', True), ('catIndex', False),
  1018. ('intIndex', False), ('dateIndex', False), ('floatIndex', False)])
  1019. def test_is_object(self, attr, expected):
  1020. assert getattr(self, attr).is_object() == expected
  1021. @pytest.mark.parametrize("attr,expected", [
  1022. ('strIndex', False), ('boolIndex', False), ('catIndex', False),
  1023. ('intIndex', False), ('dateIndex', True), ('floatIndex', False)])
  1024. def test_is_all_dates(self, attr, expected):
  1025. assert getattr(self, attr).is_all_dates == expected
  1026. def test_summary(self):
  1027. self._check_method_works(Index._summary)
  1028. # GH3869
  1029. ind = Index(['{other}%s', "~:{range}:0"], name='A')
  1030. result = ind._summary()
  1031. # shouldn't be formatted accidentally.
  1032. assert '~:{range}:0' in result
  1033. assert '{other}%s' in result
  1034. # GH18217
  1035. def test_summary_deprecated(self):
  1036. ind = Index(['{other}%s', "~:{range}:0"], name='A')
  1037. with tm.assert_produces_warning(FutureWarning):
  1038. ind.summary()
  1039. def test_format(self):
  1040. self._check_method_works(Index.format)
  1041. # GH 14626
  1042. # windows has different precision on datetime.datetime.now (it doesn't
  1043. # include us since the default for Timestamp shows these but Index
  1044. # formatting does not we are skipping)
  1045. now = datetime.now()
  1046. if not str(now).endswith("000"):
  1047. index = Index([now])
  1048. formatted = index.format()
  1049. expected = [str(index[0])]
  1050. assert formatted == expected
  1051. self.strIndex[:0].format()
  1052. @pytest.mark.parametrize("vals", [
  1053. [1, 2.0 + 3.0j, 4.], ['a', 'b', 'c']])
  1054. def test_format_missing(self, vals, nulls_fixture):
  1055. # 2845
  1056. vals = list(vals) # Copy for each iteration
  1057. vals.append(nulls_fixture)
  1058. index = Index(vals)
  1059. formatted = index.format()
  1060. expected = [str(index[0]), str(index[1]), str(index[2]), u('NaN')]
  1061. assert formatted == expected
  1062. assert index[3] is nulls_fixture
  1063. def test_format_with_name_time_info(self):
  1064. # bug I fixed 12/20/2011
  1065. inc = timedelta(hours=4)
  1066. dates = Index([dt + inc for dt in self.dateIndex], name='something')
  1067. formatted = dates.format(name=True)
  1068. assert formatted[0] == 'something'
  1069. def test_format_datetime_with_time(self):
  1070. t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
  1071. result = t.format()
  1072. expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00']
  1073. assert len(result) == 2
  1074. assert result == expected
  1075. @pytest.mark.parametrize("op", ['any', 'all'])
  1076. def test_logical_compat(self, op):
  1077. index = self.create_index()
  1078. assert getattr(index, op)() == getattr(index.values, op)()
  1079. def _check_method_works(self, method):
  1080. # TODO: make this a dedicated test with parametrized methods
  1081. method(self.empty)
  1082. method(self.dateIndex)
  1083. method(self.unicodeIndex)
  1084. method(self.strIndex)
  1085. method(self.intIndex)
  1086. method(self.tuples)
  1087. method(self.catIndex)
  1088. def test_get_indexer(self):
  1089. index1 = Index([1, 2, 3, 4, 5])
  1090. index2 = Index([2, 4, 6])
  1091. r1 = index1.get_indexer(index2)
  1092. e1 = np.array([1, 3, -1], dtype=np.intp)
  1093. assert_almost_equal(r1, e1)
  1094. @pytest.mark.parametrize("reverse", [True, False])
  1095. @pytest.mark.parametrize("expected,method", [
  1096. (np.array([-1, 0, 0, 1, 1], dtype=np.intp), 'pad'),
  1097. (np.array([-1, 0, 0, 1, 1], dtype=np.intp), 'ffill'),
  1098. (np.array([0, 0, 1, 1, 2], dtype=np.intp), 'backfill'),
  1099. (np.array([0, 0, 1, 1, 2], dtype=np.intp), 'bfill')])
  1100. def test_get_indexer_methods(self, reverse, expected, method):
  1101. index1 = Index([1, 2, 3, 4, 5])
  1102. index2 = Index([2, 4, 6])
  1103. if reverse:
  1104. index1 = index1[::-1]
  1105. expected = expected[::-1]
  1106. result = index2.get_indexer(index1, method=method)
  1107. assert_almost_equal(result, expected)
  1108. def test_get_indexer_invalid(self):
  1109. # GH10411
  1110. index = Index(np.arange(10))
  1111. with pytest.raises(ValueError, match='tolerance argument'):
  1112. index.get_indexer([1, 0], tolerance=1)
  1113. with pytest.raises(ValueError, match='limit argument'):
  1114. index.get_indexer([1, 0], limit=1)
  1115. @pytest.mark.parametrize(
  1116. 'method, tolerance, indexer, expected',
  1117. [
  1118. ('pad', None, [0, 5, 9], [0, 5, 9]),
  1119. ('backfill', None, [0, 5, 9], [0, 5, 9]),
  1120. ('nearest', None, [0, 5, 9], [0, 5, 9]),
  1121. ('pad', 0, [0, 5, 9], [0, 5, 9]),
  1122. ('backfill', 0, [0, 5, 9], [0, 5, 9]),
  1123. ('nearest', 0, [0, 5, 9], [0, 5, 9]),
  1124. ('pad', None, [0.2, 1.8, 8.5], [0, 1, 8]),
  1125. ('backfill', None, [0.2, 1.8, 8.5], [1, 2, 9]),
  1126. ('nearest', None, [0.2, 1.8, 8.5], [0, 2, 9]),
  1127. ('pad', 1, [0.2, 1.8, 8.5], [0, 1, 8]),
  1128. ('backfill', 1, [0.2, 1.8, 8.5], [1, 2, 9]),
  1129. ('nearest', 1, [0.2, 1.8, 8.5], [0, 2, 9]),
  1130. ('pad', 0.2, [0.2, 1.8, 8.5], [0, -1, -1]),
  1131. ('backfill', 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]),
  1132. ('nearest', 0.2, [0.2, 1.8, 8.5], [0, 2, -1])])
  1133. def test_get_indexer_nearest(self, method, tolerance, indexer, expected):
  1134. index = Index(np.arange(10))
  1135. actual = index.get_indexer(indexer, method=method, tolerance=tolerance)
  1136. tm.assert_numpy_array_equal(actual, np.array(expected,
  1137. dtype=np.intp))
  1138. @pytest.mark.parametrize('listtype', [list, tuple, Series, np.array])
  1139. @pytest.mark.parametrize(
  1140. 'tolerance, expected',
  1141. list(zip([[0.3, 0.3, 0.1], [0.2, 0.1, 0.1],
  1142. [0.1, 0.5, 0.5]],
  1143. [[0, 2, -1], [0, -1, -1],
  1144. [-1, 2, 9]])))
  1145. def test_get_indexer_nearest_listlike_tolerance(self, tolerance,
  1146. expected, listtype):
  1147. index = Index(np.arange(10))
  1148. actual = index.get_indexer([0.2, 1.8, 8.5], method='nearest',
  1149. tolerance=listtype(tolerance))
  1150. tm.assert_numpy_array_equal(actual, np.array(expected,
  1151. dtype=np.intp))
  1152. def test_get_indexer_nearest_error(self):
  1153. index = Index(np.arange(10))
  1154. with pytest.raises(ValueError, match='limit argument'):
  1155. index.get_indexer([1, 0], method='nearest', limit=1)
  1156. with pytest.raises(ValueError, match='tolerance size must match'):
  1157. index.get_indexer([1, 0], method='nearest',
  1158. tolerance=[1, 2, 3])
  1159. @pytest.mark.parametrize("method,expected", [
  1160. ('pad', [8, 7, 0]), ('backfill', [9, 8, 1]), ('nearest', [9, 7, 0])])
  1161. def test_get_indexer_nearest_decreasing(self, method, expected):
  1162. index = Index(np.arange(10))[::-1]
  1163. actual = index.get_indexer([0, 5, 9], method=method)
  1164. tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp))
  1165. actual = index.get_indexer([0.2, 1.8, 8.5], method=method)
  1166. tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
  1167. @pytest.mark.parametrize("method,expected", [
  1168. ('pad', np.array([-1, 0, 1, 1], dtype=np.intp)),
  1169. ('backfill', np.array([0, 0, 1, -1], dtype=np.intp))])
  1170. def test_get_indexer_strings(self, method, expected):
  1171. index = pd.Index(['b', 'c'])
  1172. actual = index.get_indexer(['a', 'b', 'c', 'd'], method=method)
  1173. tm.assert_numpy_array_equal(actual, expected)
  1174. def test_get_indexer_strings_raises(self):
  1175. index = pd.Index(['b', 'c'])
  1176. with pytest.raises(TypeError):
  1177. index.get_indexer(['a', 'b', 'c', 'd'], method='nearest')
  1178. with pytest.raises(TypeError):
  1179. index.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)
  1180. with pytest.raises(TypeError):
  1181. index.get_indexer(['a', 'b', 'c', 'd'], method='pad',
  1182. tolerance=[2, 2, 2, 2])
  1183. def test_get_indexer_numeric_index_boolean_target(self):
  1184. # GH 16877
  1185. numeric_index = pd.Index(range(4))
  1186. result = numeric_index.get_indexer([True, False, True])
  1187. expected = np.array([-1, -1, -1], dtype=np.intp)
  1188. tm.assert_numpy_array_equal(result, expected)
  1189. def test_get_indexer_with_NA_values(self, unique_nulls_fixture,
  1190. unique_nulls_fixture2):
  1191. # GH 22332
  1192. # check pairwise, that no pair of na values
  1193. # is mangled
  1194. if unique_nulls_fixture is unique_nulls_fixture2:
  1195. return # skip it, values are not unique
  1196. arr = np.array([unique_nulls_fixture,
  1197. unique_nulls_fixture2], dtype=np.object)
  1198. index = pd.Index(arr, dtype=np.object)
  1199. result = index.get_indexer([unique_nulls_fixture,
  1200. unique_nulls_fixture2, 'Unknown'])
  1201. expected = np.array([0, 1, -1], dtype=np.intp)
  1202. tm.assert_numpy_array_equal(result, expected)
  1203. @pytest.mark.parametrize("method", [None, 'pad', 'backfill', 'nearest'])
  1204. def test_get_loc(self, method):
  1205. index = pd.Index([0, 1, 2])
  1206. assert index.get_loc(1, method=method) == 1
  1207. if method:
  1208. assert index.get_loc(1, method=method, tolerance=0) == 1
  1209. @pytest.mark.parametrize("method", [None, 'pad', 'backfill', 'nearest'])
  1210. def test_get_loc_raises_bad_label(self, method):
  1211. index = pd.Index([0, 1, 2])
  1212. if method:
  1213. # Messages vary across versions
  1214. if PY36:
  1215. msg = 'not supported between'
  1216. elif PY35:
  1217. msg = 'unorderable types'
  1218. else:
  1219. if method == 'nearest':
  1220. msg = 'unsupported operand'
  1221. else:
  1222. msg = 'requires scalar valued input'
  1223. else:
  1224. msg = 'invalid key'
  1225. with pytest.raises(TypeError, match=msg):
  1226. index.get_loc([1, 2], method=method)
  1227. @pytest.mark.parametrize("method,loc", [
  1228. ('pad', 1), ('backfill', 2), ('nearest', 1)])
  1229. def test_get_loc_tolerance(self, method, loc):
  1230. index = pd.Index([0, 1, 2])
  1231. assert index.get_loc(1.1, method) == loc
  1232. assert index.get_loc(1.1, method, tolerance=1) == loc
  1233. @pytest.mark.parametrize("method", ['pad', 'backfill', 'nearest'])
  1234. def test_get_loc_outside_tolerance_raises(self, method):
  1235. index = pd.Index([0, 1, 2])
  1236. with pytest.raises(KeyError, match='1.1'):
  1237. index.get_loc(1.1, method, tolerance=0.05)
  1238. def test_get_loc_bad_tolerance_raises(self):
  1239. index = pd.Index([0, 1, 2])
  1240. with pytest.raises(ValueError, match='must be numeric'):
  1241. index.get_loc(1.1, 'nearest', tolerance='invalid')
  1242. def test_get_loc_tolerance_no_method_raises(self):
  1243. index = pd.Index([0, 1, 2])
  1244. with pytest.raises(ValueError, match='tolerance .* valid if'):
  1245. index.get_loc(1.1, tolerance=1)
  1246. def test_get_loc_raises_missized_tolerance(self):
  1247. index = pd.Index([0, 1, 2])
  1248. with pytest.raises(ValueError, match='tolerance size must match'):
  1249. index.get_loc(1.1, 'nearest', tolerance=[1, 1])
  1250. def test_get_loc_raises_object_nearest(self):
  1251. index = pd.Index(['a', 'c'])
  1252. with pytest.raises(TypeError, match='unsupported operand type'):
  1253. index.get_loc('a', method='nearest')
  1254. def test_get_loc_raises_object_tolerance(self):
  1255. index = pd.Index(['a', 'c'])
  1256. with pytest.raises(TypeError, match='unsupported operand type'):
  1257. index.get_loc('a', method='pad', tolerance='invalid')
  1258. @pytest.mark.parametrize("dtype", [int, float])
  1259. def test_slice_locs(self, dtype):
  1260. index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
  1261. n = len(index)
  1262. assert index.slice_locs(start=2) == (2, n)
  1263. assert index.slice_locs(start=3) == (3, n)
  1264. assert index.slice_locs(3, 8) == (3, 6)
  1265. assert index.slice_locs(5, 10) == (3, n)
  1266. assert index.slice_locs(end=8) == (0, 6)
  1267. assert index.slice_locs(end=9) == (0, 7)
  1268. # reversed
  1269. index2 = index[::-1]
  1270. assert index2.slice_locs(8, 2) == (2, 6)
  1271. assert index2.slice_locs(7, 3) == (2, 5)
  1272. def test_slice_float_locs(self):
  1273. index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float))
  1274. n = len(index)
  1275. assert index.slice_locs(5.0, 10.0) == (3, n)
  1276. assert index.slice_locs(4.5, 10.5) == (3, 8)
  1277. index2 = index[::-1]
  1278. assert index2.slice_locs(8.5, 1.5) == (2, 6)
  1279. assert index2.slice_locs(10.5, -1) == (0, n)
  1280. @pytest.mark.xfail(reason="Assertions were not correct - see GH#20915")
  1281. def test_slice_ints_with_floats_raises(self):
  1282. # int slicing with floats
  1283. # GH 4892, these are all TypeErrors
  1284. index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int))
  1285. n = len(index)
  1286. pytest.raises(TypeError,
  1287. lambda: index.slice_locs(5.0, 10.0))
  1288. pytest.raises(TypeError,
  1289. lambda: index.slice_locs(4.5, 10.5))
  1290. index2 = index[::-1]
  1291. pytest.raises(TypeError,
  1292. lambda: index2.slice_locs(8.5, 1.5), (2, 6))
  1293. pytest.raises(TypeError,
  1294. lambda: index2.slice_locs(10.5, -1), (0, n))
  1295. def test_slice_locs_dup(self):
  1296. index = Index(['a', 'a', 'b', 'c', 'd', 'd'])
  1297. assert index.slice_locs('a', 'd') == (0, 6)
  1298. assert index.slice_locs(end='d') == (0, 6)
  1299. assert index.slice_locs('a', 'c') == (0, 4)
  1300. assert index.slice_locs('b', 'd') == (2, 6)
  1301. index2 = index[::-1]
  1302. assert index2.slice_locs('d', 'a') == (0, 6)
  1303. assert index2.slice_locs(end='a') == (0, 6)
  1304. assert index2.slice_locs('d', 'b') == (0, 4)
  1305. assert index2.slice_locs('c', 'a') == (2, 6)
  1306. @pytest.mark.parametrize("dtype", [int, float])
  1307. def test_slice_locs_dup_numeric(self, dtype):
  1308. index = Index(np.array([10, 12, 12, 14], dtype=dtype))
  1309. assert index.slice_locs(12, 12) == (1, 3)
  1310. assert index.slice_locs(11, 13) == (1, 3)
  1311. index2 = index[::-1]
  1312. assert index2.slice_locs(12, 12) == (1, 3)
  1313. assert index2.slice_locs(13, 11) == (1, 3)
  1314. def test_slice_locs_na(self):
  1315. index = Index([np.nan, 1, 2])
  1316. assert index.slice_locs(1) == (1, 3)
  1317. assert index.slice_locs(np.nan) == (0, 3)
  1318. index = Index([0, np.nan, np.nan, 1, 2])
  1319. assert index.slice_locs(np.nan) == (1, 5)
  1320. def test_slice_locs_na_raises(self):
  1321. index = Index([np.nan, 1, 2])
  1322. with pytest.raises(KeyError, match=''):
  1323. index.slice_locs(start=1.5)
  1324. with pytest.raises(KeyError, match=''):
  1325. index.slice_locs(end=1.5)
  1326. @pytest.mark.parametrize("in_slice,expected", [
  1327. (pd.IndexSlice[::-1], 'yxdcb'), (pd.IndexSlice['b':'y':-1], ''),
  1328. (pd.IndexSlice['b'::-1], 'b'), (pd.IndexSlice[:'b':-1], 'yxdcb'),
  1329. (pd.IndexSlice[:'y':-1], 'y'), (pd.IndexSlice['y'::-1], 'yxdcb'),
  1330. (pd.IndexSlice['y'::-4], 'yb'),
  1331. # absent labels
  1332. (pd.IndexSlice[:'a':-1], 'yxdcb'), (pd.IndexSlice[:'a':-2], 'ydb'),
  1333. (pd.IndexSlice['z'::-1], 'yxdcb'), (pd.IndexSlice['z'::-3], 'yc'),
  1334. (pd.IndexSlice['m'::-1], 'dcb'), (pd.IndexSlice[:'m':-1], 'yx'),
  1335. (pd.IndexSlice['a':'a':-1], ''), (pd.IndexSlice['z':'z':-1], ''),
  1336. (pd.IndexSlice['m':'m':-1], '')
  1337. ])
  1338. def test_slice_locs_negative_step(self, in_slice, expected):
  1339. index = Index(list('bcdxy'))
  1340. s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop,
  1341. in_slice.step)
  1342. result = index[s_start:s_stop:in_slice.step]
  1343. expected = pd.Index(list(expected))
  1344. tm.assert_index_equal(result, expected)
  1345. def test_drop_by_str_label(self):
  1346. # TODO: Parametrize these after replacing self.strIndex with fixture
  1347. n = len(self.strIndex)
  1348. drop = self.strIndex[lrange(5, 10)]
  1349. dropped = self.strIndex.drop(drop)
  1350. expected = self.strIndex[lrange(5) + lrange(10, n)]
  1351. tm.assert_index_equal(dropped, expected)
  1352. dropped = self.strIndex.drop(self.strIndex[0])
  1353. expected = self.strIndex[1:]
  1354. tm.assert_index_equal(dropped, expected)
  1355. @pytest.mark.parametrize("keys", [['foo', 'bar'], ['1', 'bar']])
  1356. def test_drop_by_str_label_raises_missing_keys(self, keys):
  1357. with pytest.raises(KeyError, match=''):
  1358. self.strIndex.drop(keys)
  1359. def test_drop_by_str_label_errors_ignore(self):
  1360. # TODO: Parametrize these after replacing self.strIndex with fixture
  1361. # errors='ignore'
  1362. n = len(self.strIndex)
  1363. drop = self.strIndex[lrange(5, 10)]
  1364. mixed = drop.tolist() + ['foo']
  1365. dropped = self.strIndex.drop(mixed, errors='ignore')
  1366. expected = self.strIndex[lrange(5) + lrange(10, n)]
  1367. tm.assert_index_equal(dropped, expected)
  1368. dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore')
  1369. expected = self.strIndex[lrange(n)]
  1370. tm.assert_index_equal(dropped, expected)
  1371. def test_drop_by_numeric_label_loc(self):
  1372. # TODO: Parametrize numeric and str tests after self.strIndex fixture
  1373. index = Index([1, 2, 3])
  1374. dropped = index.drop(1)
  1375. expected = Index([2, 3])
  1376. tm.assert_index_equal(dropped, expected)
  1377. def test_drop_by_numeric_label_raises_missing_keys(self):
  1378. index = Index([1, 2, 3])
  1379. with pytest.raises(KeyError, match=''):
  1380. index.drop([3, 4])
  1381. @pytest.mark.parametrize("key,expected", [
  1382. (4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))])
  1383. def test_drop_by_numeric_label_errors_ignore(self, key, expected):
  1384. index = Index([1, 2, 3])
  1385. dropped = index.drop(key, errors='ignore')
  1386. tm.assert_index_equal(dropped, expected)
  1387. @pytest.mark.parametrize("values", [['a', 'b', ('c', 'd')],
  1388. ['a', ('c', 'd'), 'b'],
  1389. [('c', 'd'), 'a', 'b']])
  1390. @pytest.mark.parametrize("to_drop", [[('c', 'd'), 'a'], ['a', ('c', 'd')]])
  1391. def test_drop_tuple(self, values, to_drop):
  1392. # GH 18304
  1393. index = pd.Index(values)
  1394. expected = pd.Index(['b'])
  1395. result = index.drop(to_drop)
  1396. tm.assert_index_equal(result, expected)
  1397. removed = index.drop(to_drop[0])
  1398. for drop_me in to_drop[1], [to_drop[1]]:
  1399. result = removed.drop(drop_me)
  1400. tm.assert_index_equal(result, expected)
  1401. removed = index.drop(to_drop[1])
  1402. for drop_me in to_drop[1], [to_drop[1]]:
  1403. pytest.raises(KeyError, removed.drop, drop_me)
  1404. @pytest.mark.parametrize("method,expected,sort", [
  1405. ('intersection', np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
  1406. dtype=[('num', int), ('let', 'a1')]),
  1407. False),
  1408. ('intersection', np.array([(1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')],
  1409. dtype=[('num', int), ('let', 'a1')]),
  1410. None),
  1411. ('union', np.array([(1, 'A'), (1, 'B'), (1, 'C'), (2, 'A'), (2, 'B'),
  1412. (2, 'C')], dtype=[('num', int), ('let', 'a1')]),
  1413. None)
  1414. ])
  1415. def test_tuple_union_bug(self, method, expected, sort):
  1416. index1 = Index(np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
  1417. dtype=[('num', int), ('let', 'a1')]))
  1418. index2 = Index(np.array([(1, 'A'), (2, 'A'), (1, 'B'),
  1419. (2, 'B'), (1, 'C'), (2, 'C')],
  1420. dtype=[('num', int), ('let', 'a1')]))
  1421. result = getattr(index1, method)(index2, sort=sort)
  1422. assert result.ndim == 1
  1423. expected = Index(expected)
  1424. tm.assert_index_equal(result, expected)
  1425. @pytest.mark.parametrize("attr", [
  1426. 'is_monotonic_increasing', 'is_monotonic_decreasing',
  1427. '_is_strictly_monotonic_increasing',
  1428. '_is_strictly_monotonic_decreasing'])
  1429. def test_is_monotonic_incomparable(self, attr):
  1430. index = Index([5, datetime.now(), 7])
  1431. assert not getattr(index, attr)
  1432. def test_get_set_value(self):
  1433. # TODO: Remove function? GH 19728
  1434. values = np.random.randn(100)
  1435. date = self.dateIndex[67]
  1436. assert_almost_equal(self.dateIndex.get_value(values, date), values[67])
  1437. self.dateIndex.set_value(values, date, 10)
  1438. assert values[67] == 10
  1439. @pytest.mark.parametrize("values", [
  1440. ['foo', 'bar', 'quux'], {'foo', 'bar', 'quux'}])
  1441. @pytest.mark.parametrize("index,expected", [
  1442. (Index(['qux', 'baz', 'foo', 'bar']),
  1443. np.array([False, False, True, True])),
  1444. (Index([]), np.array([], dtype=bool)) # empty
  1445. ])
  1446. def test_isin(self, values, index, expected):
  1447. result = index.isin(values)
  1448. tm.assert_numpy_array_equal(result, expected)
  1449. def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2):
  1450. # Test cartesian product of null fixtures and ensure that we don't
  1451. # mangle the various types (save a corner case with PyPy)
  1452. # all nans are the same
  1453. if (isinstance(nulls_fixture, float) and
  1454. isinstance(nulls_fixture2, float) and
  1455. math.isnan(nulls_fixture) and
  1456. math.isnan(nulls_fixture2)):
  1457. tm.assert_numpy_array_equal(Index(['a', nulls_fixture]).isin(
  1458. [nulls_fixture2]), np.array([False, True]))
  1459. elif nulls_fixture is nulls_fixture2: # should preserve NA type
  1460. tm.assert_numpy_array_equal(Index(['a', nulls_fixture]).isin(
  1461. [nulls_fixture2]), np.array([False, True]))
  1462. else:
  1463. tm.assert_numpy_array_equal(Index(['a', nulls_fixture]).isin(
  1464. [nulls_fixture2]), np.array([False, False]))
  1465. def test_isin_nan_common_float64(self, nulls_fixture):
  1466. if nulls_fixture is pd.NaT:
  1467. pytest.skip("pd.NaT not compatible with Float64Index")
  1468. # Float64Index overrides isin, so must be checked separately
  1469. tm.assert_numpy_array_equal(Float64Index([1.0, nulls_fixture]).isin(
  1470. [np.nan]), np.array([False, True]))
  1471. # we cannot compare NaT with NaN
  1472. tm.assert_numpy_array_equal(Float64Index([1.0, nulls_fixture]).isin(
  1473. [pd.NaT]), np.array([False, False]))
  1474. @pytest.mark.parametrize("level", [0, -1])
  1475. @pytest.mark.parametrize("index", [
  1476. Index(['qux', 'baz', 'foo', 'bar']),
  1477. # Float64Index overrides isin, so must be checked separately
  1478. Float64Index([1.0, 2.0, 3.0, 4.0])])
  1479. def test_isin_level_kwarg(self, level, index):
  1480. values = index.tolist()[-2:] + ['nonexisting']
  1481. expected = np.array([False, False, True, True])
  1482. tm.assert_numpy_array_equal(expected, index.isin(values, level=level))
  1483. index.name = 'foobar'
  1484. tm.assert_numpy_array_equal(expected,
  1485. index.isin(values, level='foobar'))
  1486. @pytest.mark.parametrize("level", [1, 10, -2])
  1487. @pytest.mark.parametrize("index", [
  1488. Index(['qux', 'baz', 'foo', 'bar']),
  1489. # Float64Index overrides isin, so must be checked separately
  1490. Float64Index([1.0, 2.0, 3.0, 4.0])])
  1491. def test_isin_level_kwarg_raises_bad_index(self, level, index):
  1492. with pytest.raises(IndexError, match='Too many levels'):
  1493. index.isin([], level=level)
  1494. @pytest.mark.parametrize("level", [1.0, 'foobar', 'xyzzy', np.nan])
  1495. @pytest.mark.parametrize("index", [
  1496. Index(['qux', 'baz', 'foo', 'bar']),
  1497. Float64Index([1.0, 2.0, 3.0, 4.0])])
  1498. def test_isin_level_kwarg_raises_key(self, level, index):
  1499. with pytest.raises(KeyError, match='must be same as name'):
  1500. index.isin([], level=level)
  1501. @pytest.mark.parametrize("empty", [[], Series(), np.array([])])
  1502. def test_isin_empty(self, empty):
  1503. # see gh-16991
  1504. index = Index(["a", "b"])
  1505. expected = np.array([False, False])
  1506. result = index.isin(empty)
  1507. tm.assert_numpy_array_equal(expected, result)
  1508. @pytest.mark.parametrize("values", [
  1509. [1, 2, 3, 4],
  1510. [1., 2., 3., 4.],
  1511. [True, True, True, True],
  1512. ["foo", "bar", "baz", "qux"],
  1513. pd.date_range('2018-01-01', freq='D', periods=4)])
  1514. def test_boolean_cmp(self, values):
  1515. index = Index(values)
  1516. result = (index == values)
  1517. expected = np.array([True, True, True, True], dtype=bool)
  1518. tm.assert_numpy_array_equal(result, expected)
  1519. @pytest.mark.parametrize("name,level", [
  1520. (None, 0), ('a', 'a')])
  1521. def test_get_level_values(self, name, level):
  1522. expected = self.strIndex.copy()
  1523. if name:
  1524. expected.name = name
  1525. result = expected.get_level_values(level)
  1526. tm.assert_index_equal(result, expected)
  1527. def test_slice_keep_name(self):
  1528. index = Index(['a', 'b'], name='asdf')
  1529. assert index.name == index[1:].name
  1530. # instance attributes of the form self.<name>Index
  1531. @pytest.mark.parametrize('index_kind',
  1532. ['unicode', 'str', 'date', 'int', 'float'])
  1533. def test_join_self(self, join_type, index_kind):
  1534. res = getattr(self, '{0}Index'.format(index_kind))
  1535. joined = res.join(res, how=join_type)
  1536. assert res is joined
  1537. @pytest.mark.parametrize("method", ['strip', 'rstrip', 'lstrip'])
  1538. def test_str_attribute(self, method):
  1539. # GH9068
  1540. index = Index([' jack', 'jill ', ' jesse ', 'frank'])
  1541. expected = Index([getattr(str, method)(x) for x in index.values])
  1542. result = getattr(index.str, method)()
  1543. tm.assert_index_equal(result, expected)
  1544. @pytest.mark.parametrize("index", [
  1545. Index(range(5)), tm.makeDateIndex(10),
  1546. MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]),
  1547. period_range(start='2000', end='2010', freq='A')])
  1548. def test_str_attribute_raises(self, index):
  1549. with pytest.raises(AttributeError, match='only use .str accessor'):
  1550. index.str.repeat(2)
  1551. @pytest.mark.parametrize("expand,expected", [
  1552. (None, Index([['a', 'b', 'c'], ['d', 'e'], ['f']])),
  1553. (False, Index([['a', 'b', 'c'], ['d', 'e'], ['f']])),
  1554. (True, MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
  1555. ('f', np.nan, np.nan)]))])
  1556. def test_str_split(self, expand, expected):
  1557. index = Index(['a b c', 'd e', 'f'])
  1558. if expand is not None:
  1559. result = index.str.split(expand=expand)
  1560. else:
  1561. result = index.str.split()
  1562. tm.assert_index_equal(result, expected)
  1563. def test_str_bool_return(self):
  1564. # test boolean case, should return np.array instead of boolean Index
  1565. index = Index(['a1', 'a2', 'b1', 'b2'])
  1566. result = index.str.startswith('a')
  1567. expected = np.array([True, True, False, False])
  1568. tm.assert_numpy_array_equal(result, expected)
  1569. assert isinstance(result, np.ndarray)
  1570. def test_str_bool_series_indexing(self):
  1571. index = Index(['a1', 'a2', 'b1', 'b2'])
  1572. s = Series(range(4), index=index)
  1573. result = s[s.index.str.startswith('a')]
  1574. expected = Series(range(2), index=['a1', 'a2'])
  1575. tm.assert_series_equal(result, expected)
  1576. @pytest.mark.parametrize("index,expected", [
  1577. (Index(list('abcd')), True), (Index(range(4)), False)])
  1578. def test_tab_completion(self, index, expected):
  1579. # GH 9910
  1580. result = 'str' in dir(index)
  1581. assert result == expected
  1582. def test_indexing_doesnt_change_class(self):
  1583. index = Index([1, 2, 3, 'a', 'b', 'c'])
  1584. assert index[1:3].identical(pd.Index([2, 3], dtype=np.object_))
  1585. assert index[[0, 1]].identical(pd.Index([1, 2], dtype=np.object_))
  1586. def test_outer_join_sort(self):
  1587. left_index = Index(np.random.permutation(15))
  1588. right_index = tm.makeDateIndex(10)
  1589. with tm.assert_produces_warning(RuntimeWarning):
  1590. result = left_index.join(right_index, how='outer')
  1591. # right_index in this case because DatetimeIndex has join precedence
  1592. # over Int64Index
  1593. with tm.assert_produces_warning(RuntimeWarning):
  1594. expected = right_index.astype(object).union(
  1595. left_index.astype(object))
  1596. tm.assert_index_equal(result, expected)
  1597. def test_nan_first_take_datetime(self):
  1598. index = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')])
  1599. result = index.take([-1, 0, 1])
  1600. expected = Index([index[-1], index[0], index[1]])
  1601. tm.assert_index_equal(result, expected)
  1602. def test_take_fill_value(self):
  1603. # GH 12631
  1604. index = pd.Index(list('ABC'), name='xxx')
  1605. result = index.take(np.array([1, 0, -1]))
  1606. expected = pd.Index(list('BAC'), name='xxx')
  1607. tm.assert_index_equal(result, expected)
  1608. # fill_value
  1609. result = index.take(np.array([1, 0, -1]), fill_value=True)
  1610. expected = pd.Index(['B', 'A', np.nan], name='xxx')
  1611. tm.assert_index_equal(result, expected)
  1612. # allow_fill=False
  1613. result = index.take(np.array([1, 0, -1]), allow_fill=False,
  1614. fill_value=True)
  1615. expected = pd.Index(['B', 'A', 'C'], name='xxx')
  1616. tm.assert_index_equal(result, expected)
  1617. def test_take_fill_value_none_raises(self):
  1618. index = pd.Index(list('ABC'), name='xxx')
  1619. msg = ('When allow_fill=True and fill_value is not None, '
  1620. 'all indices must be >= -1')
  1621. with pytest.raises(ValueError, match=msg):
  1622. index.take(np.array([1, 0, -2]), fill_value=True)
  1623. with pytest.raises(ValueError, match=msg):
  1624. index.take(np.array([1, 0, -5]), fill_value=True)
  1625. def test_take_bad_bounds_raises(self):
  1626. index = pd.Index(list('ABC'), name='xxx')
  1627. with pytest.raises(IndexError, match='out of bounds'):
  1628. index.take(np.array([1, -5]))
  1629. @pytest.mark.parametrize("name", [None, 'foobar'])
  1630. @pytest.mark.parametrize("labels", [
  1631. [], np.array([]), ['A', 'B', 'C'], ['C', 'B', 'A'],
  1632. np.array(['A', 'B', 'C']), np.array(['C', 'B', 'A']),
  1633. # Must preserve name even if dtype changes
  1634. pd.date_range('20130101', periods=3).values,
  1635. pd.date_range('20130101', periods=3).tolist()])
  1636. def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name,
  1637. labels):
  1638. # GH6552
  1639. index = pd.Index([0, 1, 2])
  1640. index.name = name
  1641. assert index.reindex(labels)[0].name == name
  1642. @pytest.mark.parametrize("labels", [
  1643. [], np.array([]), np.array([], dtype=np.int64)])
  1644. def test_reindex_preserves_type_if_target_is_empty_list_or_array(self,
  1645. labels):
  1646. # GH7774
  1647. index = pd.Index(list('abc'))
  1648. assert index.reindex(labels)[0].dtype.type == np.object_
  1649. @pytest.mark.parametrize("labels,dtype", [
  1650. (pd.Int64Index([]), np.int64),
  1651. (pd.Float64Index([]), np.float64),
  1652. (pd.DatetimeIndex([]), np.datetime64)])
  1653. def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self,
  1654. labels,
  1655. dtype):
  1656. # GH7774
  1657. index = pd.Index(list('abc'))
  1658. assert index.reindex(labels)[0].dtype.type == dtype
  1659. def test_reindex_no_type_preserve_target_empty_mi(self):
  1660. index = pd.Index(list('abc'))
  1661. result = index.reindex(pd.MultiIndex(
  1662. [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0]
  1663. assert result.levels[0].dtype.type == np.int64
  1664. assert result.levels[1].dtype.type == np.float64
  1665. def test_groupby(self):
  1666. index = Index(range(5))
  1667. result = index.groupby(np.array([1, 1, 2, 2, 2]))
  1668. expected = {1: pd.Index([0, 1]), 2: pd.Index([2, 3, 4])}
  1669. tm.assert_dict_equal(result, expected)
  1670. @pytest.mark.parametrize("mi,expected", [
  1671. (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])),
  1672. (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False]))])
  1673. def test_equals_op_multiindex(self, mi, expected):
  1674. # GH9785
  1675. # test comparisons of multiindex
  1676. df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
  1677. result = df.index == mi
  1678. tm.assert_numpy_array_equal(result, expected)
  1679. def test_equals_op_multiindex_identify(self):
  1680. df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
  1681. result = df.index == df.index
  1682. expected = np.array([True, True])
  1683. tm.assert_numpy_array_equal(result, expected)
  1684. @pytest.mark.parametrize("index", [
  1685. MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]),
  1686. Index(['foo', 'bar', 'baz'])])
  1687. def test_equals_op_mismatched_multiindex_raises(self, index):
  1688. df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
  1689. with pytest.raises(ValueError, match="Lengths must match"):
  1690. df.index == index
  1691. def test_equals_op_index_vs_mi_same_length(self):
  1692. mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
  1693. index = Index(['foo', 'bar', 'baz'])
  1694. result = mi == index
  1695. expected = np.array([False, False, False])
  1696. tm.assert_numpy_array_equal(result, expected)
  1697. @pytest.mark.parametrize("dt_conv", [
  1698. pd.to_datetime, pd.to_timedelta])
  1699. def test_dt_conversion_preserves_name(self, dt_conv):
  1700. # GH 10875
  1701. index = pd.Index(['01:02:03', '01:02:04'], name='label')
  1702. assert index.name == dt_conv(index).name
  1703. @pytest.mark.skipif(not PY3, reason="compat test")
  1704. @pytest.mark.parametrize("index,expected", [
  1705. # ASCII
  1706. # short
  1707. (pd.Index(['a', 'bb', 'ccc']),
  1708. u"""Index(['a', 'bb', 'ccc'], dtype='object')"""),
  1709. # multiple lines
  1710. (pd.Index(['a', 'bb', 'ccc'] * 10),
  1711. u"""\
  1712. Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
  1713. 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
  1714. 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
  1715. dtype='object')"""),
  1716. # truncated
  1717. (pd.Index(['a', 'bb', 'ccc'] * 100),
  1718. u"""\
  1719. Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
  1720. ...
  1721. 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
  1722. dtype='object', length=300)"""),
  1723. # Non-ASCII
  1724. # short
  1725. (pd.Index([u'あ', u'いい', u'ううう']),
  1726. u"""Index(['あ', 'いい', 'ううう'], dtype='object')"""),
  1727. # multiple lines
  1728. (pd.Index([u'あ', u'いい', u'ううう'] * 10),
  1729. (u"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
  1730. u"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
  1731. u" 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
  1732. u"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
  1733. u" 'あ', 'いい', 'ううう', 'あ', 'いい', "
  1734. u"'ううう'],\n"
  1735. u" dtype='object')")),
  1736. # truncated
  1737. (pd.Index([u'あ', u'いい', u'ううう'] * 100),
  1738. (u"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
  1739. u"'あ', 'いい', 'ううう', 'あ',\n"
  1740. u" ...\n"
  1741. u" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
  1742. u"'ううう', 'あ', 'いい', 'ううう'],\n"
  1743. u" dtype='object', length=300)"))])
  1744. def test_string_index_repr(self, index, expected):
  1745. result = repr(index)
  1746. assert result == expected
  1747. @pytest.mark.skipif(PY3, reason="compat test")
  1748. @pytest.mark.parametrize("index,expected", [
  1749. # ASCII
  1750. # short
  1751. (pd.Index(['a', 'bb', 'ccc']),
  1752. u"""Index([u'a', u'bb', u'ccc'], dtype='object')"""),
  1753. # multiple lines
  1754. (pd.Index(['a', 'bb', 'ccc'] * 10),
  1755. u"""\
  1756. Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
  1757. u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb',
  1758. u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'],
  1759. dtype='object')"""),
  1760. # truncated
  1761. (pd.Index(['a', 'bb', 'ccc'] * 100),
  1762. u"""\
  1763. Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
  1764. ...
  1765. u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'],
  1766. dtype='object', length=300)"""),
  1767. # Non-ASCII
  1768. # short
  1769. (pd.Index([u'あ', u'いい', u'ううう']),
  1770. u"""Index([u'あ', u'いい', u'ううう'], dtype='object')"""),
  1771. # multiple lines
  1772. (pd.Index([u'あ', u'いい', u'ううう'] * 10),
  1773. (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', "
  1774. u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n"
  1775. u" u'いい', u'ううう', u'あ', u'いい', u'ううう', "
  1776. u"u'あ', u'いい', u'ううう', u'あ', u'いい',\n"
  1777. u" u'ううう', u'あ', u'いい', u'ううう', u'あ', "
  1778. u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n"
  1779. u" dtype='object')")),
  1780. # truncated
  1781. (pd.Index([u'あ', u'いい', u'ううう'] * 100),
  1782. (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', "
  1783. u"u'ううう', u'あ', u'いい', u'ううう', u'あ',\n"
  1784. u" ...\n"
  1785. u" u'ううう', u'あ', u'いい', u'ううう', u'あ', "
  1786. u"u'いい', u'ううう', u'あ', u'いい', u'ううう'],\n"
  1787. u" dtype='object', length=300)"))])
  1788. def test_string_index_repr_compat(self, index, expected):
  1789. result = unicode(index) # noqa
  1790. assert result == expected
  1791. @pytest.mark.skipif(not PY3, reason="compat test")
  1792. @pytest.mark.parametrize("index,expected", [
  1793. # short
  1794. (pd.Index([u'あ', u'いい', u'ううう']),
  1795. (u"Index(['あ', 'いい', 'ううう'], "
  1796. u"dtype='object')")),
  1797. # multiple lines
  1798. (pd.Index([u'あ', u'いい', u'ううう'] * 10),
  1799. (u"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
  1800. u"'ううう', 'あ', 'いい', 'ううう',\n"
  1801. u" 'あ', 'いい', 'ううう', 'あ', 'いい', "
  1802. u"'ううう', 'あ', 'いい', 'ううう',\n"
  1803. u" 'あ', 'いい', 'ううう', 'あ', 'いい', "
  1804. u"'ううう', 'あ', 'いい', 'ううう',\n"
  1805. u" 'あ', 'いい', 'ううう'],\n"
  1806. u" dtype='object')""")),
  1807. # truncated
  1808. (pd.Index([u'あ', u'いい', u'ううう'] * 100),
  1809. (u"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
  1810. u"'ううう', 'あ', 'いい', 'ううう',\n"
  1811. u" 'あ',\n"
  1812. u" ...\n"
  1813. u" 'ううう', 'あ', 'いい', 'ううう', 'あ', "
  1814. u"'いい', 'ううう', 'あ', 'いい',\n"
  1815. u" 'ううう'],\n"
  1816. u" dtype='object', length=300)"))])
  1817. def test_string_index_repr_with_unicode_option(self, index, expected):
  1818. # Enable Unicode option -----------------------------------------
  1819. with cf.option_context('display.unicode.east_asian_width', True):
  1820. result = repr(index)
  1821. assert result == expected
  1822. @pytest.mark.skipif(PY3, reason="compat test")
  1823. @pytest.mark.parametrize("index,expected", [
  1824. # short
  1825. (pd.Index([u'あ', u'いい', u'ううう']),
  1826. (u"Index([u'あ', u'いい', u'ううう'], "
  1827. u"dtype='object')")),
  1828. # multiple lines
  1829. (pd.Index([u'あ', u'いい', u'ううう'] * 10),
  1830. (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', "
  1831. u"u'ううう', u'あ', u'いい',\n"
  1832. u" u'ううう', u'あ', u'いい', u'ううう', "
  1833. u"u'あ', u'いい', u'ううう', u'あ',\n"
  1834. u" u'いい', u'ううう', u'あ', u'いい', "
  1835. u"u'ううう', u'あ', u'いい',\n"
  1836. u" u'ううう', u'あ', u'いい', u'ううう', "
  1837. u"u'あ', u'いい', u'ううう'],\n"
  1838. u" dtype='object')")),
  1839. # truncated
  1840. (pd.Index([u'あ', u'いい', u'ううう'] * 100),
  1841. (u"Index([u'あ', u'いい', u'ううう', u'あ', u'いい', "
  1842. u"u'ううう', u'あ', u'いい',\n"
  1843. u" u'ううう', u'あ',\n"
  1844. u" ...\n"
  1845. u" u'ううう', u'あ', u'いい', u'ううう', "
  1846. u"u'あ', u'いい', u'ううう', u'あ',\n"
  1847. u" u'いい', u'ううう'],\n"
  1848. u" dtype='object', length=300)"))])
  1849. def test_string_index_repr_with_unicode_option_compat(self, index,
  1850. expected):
  1851. # Enable Unicode option -----------------------------------------
  1852. with cf.option_context('display.unicode.east_asian_width', True):
  1853. result = unicode(index) # noqa
  1854. assert result == expected
  1855. def test_cached_properties_not_settable(self):
  1856. index = pd.Index([1, 2, 3])
  1857. with pytest.raises(AttributeError, match="Can't set attribute"):
  1858. index.is_unique = False
  1859. def test_get_duplicates_deprecated(self):
  1860. index = pd.Index([1, 2, 3])
  1861. with tm.assert_produces_warning(FutureWarning):
  1862. index.get_duplicates()
  1863. def test_tab_complete_warning(self, ip):
  1864. # https://github.com/pandas-dev/pandas/issues/16409
  1865. pytest.importorskip('IPython', minversion="6.0.0")
  1866. from IPython.core.completer import provisionalcompleter
  1867. code = "import pandas as pd; idx = pd.Index([1, 2])"
  1868. ip.run_code(code)
  1869. with tm.assert_produces_warning(None):
  1870. with provisionalcompleter('ignore'):
  1871. list(ip.Completer.completions('idx.', 4))
  1872. class TestMixedIntIndex(Base):
  1873. # Mostly the tests from common.py for which the results differ
  1874. # in py2 and py3 because ints and strings are uncomparable in py3
  1875. # (GH 13514)
  1876. _holder = Index
  1877. def setup_method(self, method):
  1878. self.indices = dict(mixedIndex=Index([0, 'a', 1, 'b', 2, 'c']))
  1879. self.setup_indices()
  1880. def create_index(self):
  1881. return self.mixedIndex
  1882. def test_argsort(self):
  1883. index = self.create_index()
  1884. if PY36:
  1885. with pytest.raises(TypeError, match="'>|<' not supported"):
  1886. result = index.argsort()
  1887. elif PY3:
  1888. with pytest.raises(TypeError, match="unorderable types"):
  1889. result = index.argsort()
  1890. else:
  1891. result = index.argsort()
  1892. expected = np.array(index).argsort()
  1893. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  1894. def test_numpy_argsort(self):
  1895. index = self.create_index()
  1896. if PY36:
  1897. with pytest.raises(TypeError, match="'>|<' not supported"):
  1898. result = np.argsort(index)
  1899. elif PY3:
  1900. with pytest.raises(TypeError, match="unorderable types"):
  1901. result = np.argsort(index)
  1902. else:
  1903. result = np.argsort(index)
  1904. expected = index.argsort()
  1905. tm.assert_numpy_array_equal(result, expected)
  1906. def test_copy_name(self):
  1907. # Check that "name" argument passed at initialization is honoured
  1908. # GH12309
  1909. index = self.create_index()
  1910. first = index.__class__(index, copy=True, name='mario')
  1911. second = first.__class__(first, copy=False)
  1912. # Even though "copy=False", we want a new object.
  1913. assert first is not second
  1914. tm.assert_index_equal(first, second)
  1915. assert first.name == 'mario'
  1916. assert second.name == 'mario'
  1917. s1 = Series(2, index=first)
  1918. s2 = Series(3, index=second[:-1])
  1919. s3 = s1 * s2
  1920. assert s3.index.name == 'mario'
  1921. def test_copy_name2(self):
  1922. # Check that adding a "name" parameter to the copy is honored
  1923. # GH14302
  1924. index = pd.Index([1, 2], name='MyName')
  1925. index1 = index.copy()
  1926. tm.assert_index_equal(index, index1)
  1927. index2 = index.copy(name='NewName')
  1928. tm.assert_index_equal(index, index2, check_names=False)
  1929. assert index.name == 'MyName'
  1930. assert index2.name == 'NewName'
  1931. index3 = index.copy(names=['NewName'])
  1932. tm.assert_index_equal(index, index3, check_names=False)
  1933. assert index.name == 'MyName'
  1934. assert index.names == ['MyName']
  1935. assert index3.name == 'NewName'
  1936. assert index3.names == ['NewName']
  1937. def test_union_base(self):
  1938. index = self.create_index()
  1939. first = index[3:]
  1940. second = index[:5]
  1941. result = first.union(second)
  1942. expected = Index([0, 1, 2, 'a', 'b', 'c'])
  1943. tm.assert_index_equal(result, expected)
  1944. @pytest.mark.parametrize("klass", [
  1945. np.array, Series, list])
  1946. def test_union_different_type_base(self, klass):
  1947. # GH 10149
  1948. index = self.create_index()
  1949. first = index[3:]
  1950. second = index[:5]
  1951. result = first.union(klass(second.values))
  1952. assert tm.equalContents(result, index)
  1953. def test_unique_na(self):
  1954. idx = pd.Index([2, np.nan, 2, 1], name='my_index')
  1955. expected = pd.Index([2, np.nan, 1], name='my_index')
  1956. result = idx.unique()
  1957. tm.assert_index_equal(result, expected)
  1958. @pytest.mark.parametrize("sort", [None, False])
  1959. def test_intersection_base(self, sort):
  1960. # (same results for py2 and py3 but sortedness not tested elsewhere)
  1961. index = self.create_index()
  1962. first = index[:5]
  1963. second = index[:3]
  1964. expected = Index([0, 1, 'a']) if sort is None else Index([0, 'a', 1])
  1965. result = first.intersection(second, sort=sort)
  1966. tm.assert_index_equal(result, expected)
  1967. @pytest.mark.parametrize("klass", [
  1968. np.array, Series, list])
  1969. @pytest.mark.parametrize("sort", [None, False])
  1970. def test_intersection_different_type_base(self, klass, sort):
  1971. # GH 10149
  1972. index = self.create_index()
  1973. first = index[:5]
  1974. second = index[:3]
  1975. result = first.intersection(klass(second.values), sort=sort)
  1976. assert tm.equalContents(result, second)
  1977. @pytest.mark.parametrize("sort", [None, False])
  1978. def test_difference_base(self, sort):
  1979. # (same results for py2 and py3 but sortedness not tested elsewhere)
  1980. index = self.create_index()
  1981. first = index[:4]
  1982. second = index[3:]
  1983. result = first.difference(second, sort)
  1984. expected = Index([0, 'a', 1])
  1985. if sort is None:
  1986. expected = Index(safe_sort(expected))
  1987. tm.assert_index_equal(result, expected)
  1988. def test_symmetric_difference(self):
  1989. # (same results for py2 and py3 but sortedness not tested elsewhere)
  1990. index = self.create_index()
  1991. first = index[:4]
  1992. second = index[3:]
  1993. result = first.symmetric_difference(second)
  1994. expected = Index([0, 1, 2, 'a', 'c'])
  1995. tm.assert_index_equal(result, expected)
  1996. def test_logical_compat(self):
  1997. index = self.create_index()
  1998. assert index.all() == index.values.all()
  1999. assert index.any() == index.values.any()
  2000. @pytest.mark.parametrize("how", ['any', 'all'])
  2001. @pytest.mark.parametrize("dtype", [
  2002. None, object, 'category'])
  2003. @pytest.mark.parametrize("vals,expected", [
  2004. ([1, 2, 3], [1, 2, 3]), ([1., 2., 3.], [1., 2., 3.]),
  2005. ([1., 2., np.nan, 3.], [1., 2., 3.]),
  2006. (['A', 'B', 'C'], ['A', 'B', 'C']),
  2007. (['A', np.nan, 'B', 'C'], ['A', 'B', 'C'])])
  2008. def test_dropna(self, how, dtype, vals, expected):
  2009. # GH 6194
  2010. index = pd.Index(vals, dtype=dtype)
  2011. result = index.dropna(how=how)
  2012. expected = pd.Index(expected, dtype=dtype)
  2013. tm.assert_index_equal(result, expected)
  2014. @pytest.mark.parametrize("how", ['any', 'all'])
  2015. @pytest.mark.parametrize("index,expected", [
  2016. (pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03']),
  2017. pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'])),
  2018. (pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', pd.NaT]),
  2019. pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'])),
  2020. (pd.TimedeltaIndex(['1 days', '2 days', '3 days']),
  2021. pd.TimedeltaIndex(['1 days', '2 days', '3 days'])),
  2022. (pd.TimedeltaIndex([pd.NaT, '1 days', '2 days', '3 days', pd.NaT]),
  2023. pd.TimedeltaIndex(['1 days', '2 days', '3 days'])),
  2024. (pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M'),
  2025. pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M')),
  2026. (pd.PeriodIndex(['2012-02', '2012-04', 'NaT', '2012-05'], freq='M'),
  2027. pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M'))])
  2028. def test_dropna_dt_like(self, how, index, expected):
  2029. result = index.dropna(how=how)
  2030. tm.assert_index_equal(result, expected)
  2031. def test_dropna_invalid_how_raises(self):
  2032. msg = "invalid how option: xxx"
  2033. with pytest.raises(ValueError, match=msg):
  2034. pd.Index([1, 2, 3]).dropna(how='xxx')
  2035. def test_get_combined_index(self):
  2036. result = _get_combined_index([])
  2037. expected = Index([])
  2038. tm.assert_index_equal(result, expected)
  2039. def test_repeat(self):
  2040. repeats = 2
  2041. index = pd.Index([1, 2, 3])
  2042. expected = pd.Index([1, 1, 2, 2, 3, 3])
  2043. result = index.repeat(repeats)
  2044. tm.assert_index_equal(result, expected)
  2045. @pytest.mark.parametrize("index", [
  2046. pd.Index([np.nan]), pd.Index([np.nan, 1]),
  2047. pd.Index([1, 2, np.nan]), pd.Index(['a', 'b', np.nan]),
  2048. pd.to_datetime(['NaT']), pd.to_datetime(['NaT', '2000-01-01']),
  2049. pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']),
  2050. pd.to_timedelta(['1 day', 'NaT'])])
  2051. def test_is_monotonic_na(self, index):
  2052. assert index.is_monotonic_increasing is False
  2053. assert index.is_monotonic_decreasing is False
  2054. assert index._is_strictly_monotonic_increasing is False
  2055. assert index._is_strictly_monotonic_decreasing is False
  2056. def test_repr_summary(self):
  2057. with cf.option_context('display.max_seq_items', 10):
  2058. result = repr(pd.Index(np.arange(1000)))
  2059. assert len(result) < 200
  2060. assert "..." in result
  2061. @pytest.mark.parametrize("klass", [Series, DataFrame])
  2062. def test_int_name_format(self, klass):
  2063. index = Index(['a', 'b', 'c'], name=0)
  2064. result = klass(lrange(3), index=index)
  2065. assert '0' in repr(result)
  2066. def test_print_unicode_columns(self):
  2067. df = pd.DataFrame({u("\u05d0"): [1, 2, 3],
  2068. "\u05d1": [4, 5, 6],
  2069. "c": [7, 8, 9]})
  2070. repr(df.columns) # should not raise UnicodeDecodeError
  2071. @pytest.mark.parametrize("func,compat_func", [
  2072. (str, text_type), # unicode string
  2073. (bytes, str) # byte string
  2074. ])
  2075. def test_with_unicode(self, func, compat_func):
  2076. index = Index(lrange(1000))
  2077. if PY3:
  2078. func(index)
  2079. else:
  2080. compat_func(index)
  2081. def test_intersect_str_dates(self):
  2082. dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
  2083. index1 = Index(dt_dates, dtype=object)
  2084. index2 = Index(['aa'], dtype=object)
  2085. result = index2.intersection(index1)
  2086. expected = Index([], dtype=object)
  2087. tm.assert_index_equal(result, expected)
  2088. class TestIndexUtils(object):
  2089. @pytest.mark.parametrize('data, names, expected', [
  2090. ([[1, 2, 3]], None, Index([1, 2, 3])),
  2091. ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
  2092. ([['a', 'a'], ['c', 'd']], None,
  2093. MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
  2094. ([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
  2095. MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
  2096. names=['L1', 'L2'])),
  2097. ])
  2098. def test_ensure_index_from_sequences(self, data, names, expected):
  2099. result = ensure_index_from_sequences(data, names)
  2100. tm.assert_index_equal(result, expected)
  2101. @pytest.mark.parametrize('opname', ['eq', 'ne', 'le', 'lt', 'ge', 'gt',
  2102. 'add', 'radd', 'sub', 'rsub',
  2103. 'mul', 'rmul', 'truediv', 'rtruediv',
  2104. 'floordiv', 'rfloordiv',
  2105. 'pow', 'rpow', 'mod', 'divmod'])
  2106. def test_generated_op_names(opname, indices):
  2107. index = indices
  2108. if isinstance(index, ABCIndex) and opname == 'rsub':
  2109. # pd.Index.__rsub__ does not exist; though the method does exist
  2110. # for subclasses. see GH#19723
  2111. return
  2112. opname = '__{name}__'.format(name=opname)
  2113. method = getattr(index, opname)
  2114. assert method.__name__ == opname
  2115. @pytest.mark.parametrize('index_maker', tm.index_subclass_makers_generator())
  2116. def test_index_subclass_constructor_wrong_kwargs(index_maker):
  2117. # GH #19348
  2118. with pytest.raises(TypeError, match='unexpected keyword argument'):
  2119. index_maker(foo='bar')
  2120. def test_deprecated_fastpath():
  2121. with tm.assert_produces_warning(FutureWarning):
  2122. idx = pd.Index(
  2123. np.array(['a', 'b'], dtype=object), name='test', fastpath=True)
  2124. expected = pd.Index(['a', 'b'], name='test')
  2125. tm.assert_index_equal(idx, expected)
  2126. with tm.assert_produces_warning(FutureWarning):
  2127. idx = pd.Int64Index(
  2128. np.array([1, 2, 3], dtype='int64'), name='test', fastpath=True)
  2129. expected = pd.Index([1, 2, 3], name='test', dtype='int64')
  2130. tm.assert_index_equal(idx, expected)
  2131. with tm.assert_produces_warning(FutureWarning):
  2132. idx = pd.RangeIndex(0, 5, 2, name='test', fastpath=True)
  2133. expected = pd.RangeIndex(0, 5, 2, name='test')
  2134. tm.assert_index_equal(idx, expected)
  2135. with tm.assert_produces_warning(FutureWarning):
  2136. idx = pd.CategoricalIndex(['a', 'b', 'c'], name='test', fastpath=True)
  2137. expected = pd.CategoricalIndex(['a', 'b', 'c'], name='test')
  2138. tm.assert_index_equal(idx, expected)