test_construction.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. from __future__ import division
  2. from functools import partial
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import lzip
  6. from pandas.core.dtypes.common import is_categorical_dtype
  7. from pandas.core.dtypes.dtypes import IntervalDtype
  8. from pandas import (
  9. Categorical, CategoricalIndex, Float64Index, Index, Int64Index, Interval,
  10. IntervalIndex, date_range, notna, period_range, timedelta_range)
  11. from pandas.core.arrays import IntervalArray
  12. import pandas.core.common as com
  13. import pandas.util.testing as tm
  14. @pytest.fixture(params=[None, 'foo'])
  15. def name(request):
  16. return request.param
  17. class Base(object):
  18. """
  19. Common tests for all variations of IntervalIndex construction. Input data
  20. to be supplied in breaks format, then converted by the subclass method
  21. get_kwargs_from_breaks to the expected format.
  22. """
  23. @pytest.mark.parametrize('breaks', [
  24. [3, 14, 15, 92, 653],
  25. np.arange(10, dtype='int64'),
  26. Int64Index(range(-10, 11)),
  27. Float64Index(np.arange(20, 30, 0.5)),
  28. date_range('20180101', periods=10),
  29. date_range('20180101', periods=10, tz='US/Eastern'),
  30. timedelta_range('1 day', periods=10)])
  31. def test_constructor(self, constructor, breaks, closed, name):
  32. result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
  33. result = constructor(closed=closed, name=name, **result_kwargs)
  34. assert result.closed == closed
  35. assert result.name == name
  36. assert result.dtype.subtype == getattr(breaks, 'dtype', 'int64')
  37. tm.assert_index_equal(result.left, Index(breaks[:-1]))
  38. tm.assert_index_equal(result.right, Index(breaks[1:]))
  39. @pytest.mark.parametrize('breaks, subtype', [
  40. (Int64Index([0, 1, 2, 3, 4]), 'float64'),
  41. (Int64Index([0, 1, 2, 3, 4]), 'datetime64[ns]'),
  42. (Int64Index([0, 1, 2, 3, 4]), 'timedelta64[ns]'),
  43. (Float64Index([0, 1, 2, 3, 4]), 'int64'),
  44. (date_range('2017-01-01', periods=5), 'int64'),
  45. (timedelta_range('1 day', periods=5), 'int64')])
  46. def test_constructor_dtype(self, constructor, breaks, subtype):
  47. # GH 19262: conversion via dtype parameter
  48. expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
  49. expected = constructor(**expected_kwargs)
  50. result_kwargs = self.get_kwargs_from_breaks(breaks)
  51. iv_dtype = IntervalDtype(subtype)
  52. for dtype in (iv_dtype, str(iv_dtype)):
  53. result = constructor(dtype=dtype, **result_kwargs)
  54. tm.assert_index_equal(result, expected)
  55. @pytest.mark.parametrize('breaks', [
  56. [np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
  57. def test_constructor_nan(self, constructor, breaks, closed):
  58. # GH 18421
  59. result_kwargs = self.get_kwargs_from_breaks(breaks)
  60. result = constructor(closed=closed, **result_kwargs)
  61. expected_subtype = np.float64
  62. expected_values = np.array(breaks[:-1], dtype=object)
  63. assert result.closed == closed
  64. assert result.dtype.subtype == expected_subtype
  65. tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
  66. @pytest.mark.parametrize('breaks', [
  67. [],
  68. np.array([], dtype='int64'),
  69. np.array([], dtype='float64'),
  70. np.array([], dtype='datetime64[ns]'),
  71. np.array([], dtype='timedelta64[ns]')])
  72. def test_constructor_empty(self, constructor, breaks, closed):
  73. # GH 18421
  74. result_kwargs = self.get_kwargs_from_breaks(breaks)
  75. result = constructor(closed=closed, **result_kwargs)
  76. expected_values = np.array([], dtype=object)
  77. expected_subtype = getattr(breaks, 'dtype', np.int64)
  78. assert result.empty
  79. assert result.closed == closed
  80. assert result.dtype.subtype == expected_subtype
  81. tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
  82. @pytest.mark.parametrize('breaks', [
  83. tuple('0123456789'),
  84. list('abcdefghij'),
  85. np.array(list('abcdefghij'), dtype=object),
  86. np.array(list('abcdefghij'), dtype='<U1')])
  87. def test_constructor_string(self, constructor, breaks):
  88. # GH 19016
  89. msg = ('category, object, and string subtypes are not supported '
  90. 'for IntervalIndex')
  91. with pytest.raises(TypeError, match=msg):
  92. constructor(**self.get_kwargs_from_breaks(breaks))
  93. @pytest.mark.parametrize('cat_constructor', [
  94. Categorical, CategoricalIndex])
  95. def test_constructor_categorical_valid(self, constructor, cat_constructor):
  96. # GH 21243/21253
  97. if isinstance(constructor, partial) and constructor.func is Index:
  98. # Index is defined to create CategoricalIndex from categorical data
  99. pytest.skip()
  100. breaks = np.arange(10, dtype='int64')
  101. expected = IntervalIndex.from_breaks(breaks)
  102. cat_breaks = cat_constructor(breaks)
  103. result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
  104. result = constructor(**result_kwargs)
  105. tm.assert_index_equal(result, expected)
  106. def test_generic_errors(self, constructor):
  107. # filler input data to be used when supplying invalid kwargs
  108. filler = self.get_kwargs_from_breaks(range(10))
  109. # invalid closed
  110. msg = "invalid option for 'closed': invalid"
  111. with pytest.raises(ValueError, match=msg):
  112. constructor(closed='invalid', **filler)
  113. # unsupported dtype
  114. msg = 'dtype must be an IntervalDtype, got int64'
  115. with pytest.raises(TypeError, match=msg):
  116. constructor(dtype='int64', **filler)
  117. # invalid dtype
  118. msg = "data type 'invalid' not understood"
  119. with pytest.raises(TypeError, match=msg):
  120. constructor(dtype='invalid', **filler)
  121. # no point in nesting periods in an IntervalIndex
  122. periods = period_range('2000-01-01', periods=10)
  123. periods_kwargs = self.get_kwargs_from_breaks(periods)
  124. msg = 'Period dtypes are not supported, use a PeriodIndex instead'
  125. with pytest.raises(ValueError, match=msg):
  126. constructor(**periods_kwargs)
  127. # decreasing values
  128. decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
  129. msg = 'left side of interval must be <= right side'
  130. with pytest.raises(ValueError, match=msg):
  131. constructor(**decreasing_kwargs)
  132. class TestFromArrays(Base):
  133. """Tests specific to IntervalIndex.from_arrays"""
  134. @pytest.fixture
  135. def constructor(self):
  136. return IntervalIndex.from_arrays
  137. def get_kwargs_from_breaks(self, breaks, closed='right'):
  138. """
  139. converts intervals in breaks format to a dictionary of kwargs to
  140. specific to the format expected by IntervalIndex.from_arrays
  141. """
  142. return {'left': breaks[:-1], 'right': breaks[1:]}
  143. def test_constructor_errors(self):
  144. # GH 19016: categorical data
  145. data = Categorical(list('01234abcde'), ordered=True)
  146. msg = ('category, object, and string subtypes are not supported '
  147. 'for IntervalIndex')
  148. with pytest.raises(TypeError, match=msg):
  149. IntervalIndex.from_arrays(data[:-1], data[1:])
  150. # unequal length
  151. left = [0, 1, 2]
  152. right = [2, 3]
  153. msg = 'left and right must have the same length'
  154. with pytest.raises(ValueError, match=msg):
  155. IntervalIndex.from_arrays(left, right)
  156. @pytest.mark.parametrize('left_subtype, right_subtype', [
  157. (np.int64, np.float64), (np.float64, np.int64)])
  158. def test_mixed_float_int(self, left_subtype, right_subtype):
  159. """mixed int/float left/right results in float for both sides"""
  160. left = np.arange(9, dtype=left_subtype)
  161. right = np.arange(1, 10, dtype=right_subtype)
  162. result = IntervalIndex.from_arrays(left, right)
  163. expected_left = Float64Index(left)
  164. expected_right = Float64Index(right)
  165. expected_subtype = np.float64
  166. tm.assert_index_equal(result.left, expected_left)
  167. tm.assert_index_equal(result.right, expected_right)
  168. assert result.dtype.subtype == expected_subtype
  169. class TestFromBreaks(Base):
  170. """Tests specific to IntervalIndex.from_breaks"""
  171. @pytest.fixture
  172. def constructor(self):
  173. return IntervalIndex.from_breaks
  174. def get_kwargs_from_breaks(self, breaks, closed='right'):
  175. """
  176. converts intervals in breaks format to a dictionary of kwargs to
  177. specific to the format expected by IntervalIndex.from_breaks
  178. """
  179. return {'breaks': breaks}
  180. def test_constructor_errors(self):
  181. # GH 19016: categorical data
  182. data = Categorical(list('01234abcde'), ordered=True)
  183. msg = ('category, object, and string subtypes are not supported '
  184. 'for IntervalIndex')
  185. with pytest.raises(TypeError, match=msg):
  186. IntervalIndex.from_breaks(data)
  187. def test_length_one(self):
  188. """breaks of length one produce an empty IntervalIndex"""
  189. breaks = [0]
  190. result = IntervalIndex.from_breaks(breaks)
  191. expected = IntervalIndex.from_breaks([])
  192. tm.assert_index_equal(result, expected)
  193. class TestFromTuples(Base):
  194. """Tests specific to IntervalIndex.from_tuples"""
  195. @pytest.fixture
  196. def constructor(self):
  197. return IntervalIndex.from_tuples
  198. def get_kwargs_from_breaks(self, breaks, closed='right'):
  199. """
  200. converts intervals in breaks format to a dictionary of kwargs to
  201. specific to the format expected by IntervalIndex.from_tuples
  202. """
  203. if len(breaks) == 0:
  204. return {'data': breaks}
  205. tuples = lzip(breaks[:-1], breaks[1:])
  206. if isinstance(breaks, (list, tuple)):
  207. return {'data': tuples}
  208. elif is_categorical_dtype(breaks):
  209. return {'data': breaks._constructor(tuples)}
  210. return {'data': com.asarray_tuplesafe(tuples)}
  211. def test_constructor_errors(self):
  212. # non-tuple
  213. tuples = [(0, 1), 2, (3, 4)]
  214. msg = 'IntervalIndex.from_tuples received an invalid item, 2'
  215. with pytest.raises(TypeError, match=msg.format(t=tuples)):
  216. IntervalIndex.from_tuples(tuples)
  217. # too few/many items
  218. tuples = [(0, 1), (2,), (3, 4)]
  219. msg = 'IntervalIndex.from_tuples requires tuples of length 2, got {t}'
  220. with pytest.raises(ValueError, match=msg.format(t=tuples)):
  221. IntervalIndex.from_tuples(tuples)
  222. tuples = [(0, 1), (2, 3, 4), (5, 6)]
  223. with pytest.raises(ValueError, match=msg.format(t=tuples)):
  224. IntervalIndex.from_tuples(tuples)
  225. def test_na_tuples(self):
  226. # tuple (NA, NA) evaluates the same as NA as an elemenent
  227. na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
  228. idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
  229. idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
  230. tm.assert_index_equal(idx_na_tuple, idx_na_element)
  231. class TestClassConstructors(Base):
  232. """Tests specific to the IntervalIndex/Index constructors"""
  233. @pytest.fixture(params=[IntervalIndex, partial(Index, dtype='interval')],
  234. ids=['IntervalIndex', 'Index'])
  235. def constructor(self, request):
  236. return request.param
  237. def get_kwargs_from_breaks(self, breaks, closed='right'):
  238. """
  239. converts intervals in breaks format to a dictionary of kwargs to
  240. specific to the format expected by the IntervalIndex/Index constructors
  241. """
  242. if len(breaks) == 0:
  243. return {'data': breaks}
  244. ivs = [Interval(l, r, closed) if notna(l) else l
  245. for l, r in zip(breaks[:-1], breaks[1:])]
  246. if isinstance(breaks, list):
  247. return {'data': ivs}
  248. elif is_categorical_dtype(breaks):
  249. return {'data': breaks._constructor(ivs)}
  250. return {'data': np.array(ivs, dtype=object)}
  251. def test_generic_errors(self, constructor):
  252. """
  253. override the base class implementation since errors are handled
  254. differently; checks unnecessary since caught at the Interval level
  255. """
  256. pass
  257. def test_constructor_errors(self, constructor):
  258. # mismatched closed within intervals with no constructor override
  259. ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')]
  260. msg = 'intervals must all be closed on the same side'
  261. with pytest.raises(ValueError, match=msg):
  262. constructor(ivs)
  263. # scalar
  264. msg = (r'IntervalIndex\(...\) must be called with a collection of '
  265. 'some kind, 5 was passed')
  266. with pytest.raises(TypeError, match=msg):
  267. constructor(5)
  268. # not an interval
  269. msg = ("type <(class|type) 'numpy.int64'> with value 0 "
  270. "is not an interval")
  271. with pytest.raises(TypeError, match=msg):
  272. constructor([0, 1])
  273. @pytest.mark.parametrize('data, closed', [
  274. ([], 'both'),
  275. ([np.nan, np.nan], 'neither'),
  276. ([Interval(0, 3, closed='neither'),
  277. Interval(2, 5, closed='neither')], 'left'),
  278. ([Interval(0, 3, closed='left'),
  279. Interval(2, 5, closed='right')], 'neither'),
  280. (IntervalIndex.from_breaks(range(5), closed='both'), 'right')])
  281. def test_override_inferred_closed(self, constructor, data, closed):
  282. # GH 19370
  283. if isinstance(data, IntervalIndex):
  284. tuples = data.to_tuples()
  285. else:
  286. tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
  287. expected = IntervalIndex.from_tuples(tuples, closed=closed)
  288. result = constructor(data, closed=closed)
  289. tm.assert_index_equal(result, expected)
  290. @pytest.mark.parametrize('values_constructor', [
  291. list, np.array, IntervalIndex, IntervalArray])
  292. def test_index_object_dtype(self, values_constructor):
  293. # Index(intervals, dtype=object) is an Index (not an IntervalIndex)
  294. intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
  295. values = values_constructor(intervals)
  296. result = Index(values, dtype=object)
  297. assert type(result) is Index
  298. tm.assert_numpy_array_equal(result.values, np.array(values))
  299. class TestFromIntervals(TestClassConstructors):
  300. """
  301. Tests for IntervalIndex.from_intervals, which is deprecated in favor of the
  302. IntervalIndex constructor. Same tests as the IntervalIndex constructor,
  303. plus deprecation test. Should only need to delete this class when removed.
  304. """
  305. @pytest.fixture
  306. def constructor(self):
  307. def from_intervals_ignore_warnings(*args, **kwargs):
  308. with tm.assert_produces_warning(FutureWarning,
  309. check_stacklevel=False):
  310. return IntervalIndex.from_intervals(*args, **kwargs)
  311. return from_intervals_ignore_warnings
  312. def test_deprecated(self):
  313. ivs = [Interval(0, 1), Interval(1, 2)]
  314. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  315. IntervalIndex.from_intervals(ivs)
  316. @pytest.mark.skip(reason='parent class test that is not applicable')
  317. def test_index_object_dtype(self):
  318. pass