construction.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. """
  2. Functions for preparing various inputs passed to the DataFrame or Series
  3. constructors before passing them to a BlockManager.
  4. """
  5. from collections import OrderedDict
  6. import numpy as np
  7. import numpy.ma as ma
  8. from pandas._libs import lib
  9. from pandas._libs.tslibs import IncompatibleFrequency
  10. import pandas.compat as compat
  11. from pandas.compat import (
  12. get_range_parameters, lmap, lrange, raise_with_traceback, range)
  13. from pandas.core.dtypes.cast import (
  14. construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na,
  15. construct_1d_object_array_from_listlike, infer_dtype_from_scalar,
  16. maybe_cast_to_datetime, maybe_cast_to_integer_array, maybe_castable,
  17. maybe_convert_platform, maybe_infer_to_datetimelike, maybe_upcast)
  18. from pandas.core.dtypes.common import (
  19. is_categorical_dtype, is_datetime64tz_dtype, is_dtype_equal,
  20. is_extension_array_dtype, is_extension_type, is_float_dtype,
  21. is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype)
  22. from pandas.core.dtypes.generic import (
  23. ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPandasArray,
  24. ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex)
  25. from pandas.core.dtypes.missing import isna
  26. from pandas.core import algorithms, common as com
  27. from pandas.core.arrays import Categorical, ExtensionArray, period_array
  28. from pandas.core.index import (
  29. Index, _get_objs_combined_axis, _union_indexes, ensure_index)
  30. from pandas.core.indexes import base as ibase
  31. from pandas.core.internals import (
  32. create_block_manager_from_arrays, create_block_manager_from_blocks)
  33. from pandas.core.internals.arrays import extract_array
  34. # ---------------------------------------------------------------------
  35. # BlockManager Interface
  36. def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
  37. """
  38. Segregate Series based on type and coerce into matrices.
  39. Needs to handle a lot of exceptional cases.
  40. """
  41. # figure out the index, if necessary
  42. if index is None:
  43. index = extract_index(arrays)
  44. else:
  45. index = ensure_index(index)
  46. # don't force copy because getting jammed in an ndarray anyway
  47. arrays = _homogenize(arrays, index, dtype)
  48. # from BlockManager perspective
  49. axes = [ensure_index(columns), index]
  50. return create_block_manager_from_arrays(arrays, arr_names, axes)
  51. def masked_rec_array_to_mgr(data, index, columns, dtype, copy):
  52. """
  53. Extract from a masked rec array and create the manager.
  54. """
  55. # essentially process a record array then fill it
  56. fill_value = data.fill_value
  57. fdata = ma.getdata(data)
  58. if index is None:
  59. index = get_names_from_index(fdata)
  60. if index is None:
  61. index = ibase.default_index(len(data))
  62. index = ensure_index(index)
  63. if columns is not None:
  64. columns = ensure_index(columns)
  65. arrays, arr_columns = to_arrays(fdata, columns)
  66. # fill if needed
  67. new_arrays = []
  68. for fv, arr, col in zip(fill_value, arrays, arr_columns):
  69. mask = ma.getmaskarray(data[col])
  70. if mask.any():
  71. arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
  72. arr[mask] = fv
  73. new_arrays.append(arr)
  74. # create the manager
  75. arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns)
  76. if columns is None:
  77. columns = arr_columns
  78. mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype)
  79. if copy:
  80. mgr = mgr.copy()
  81. return mgr
  82. # ---------------------------------------------------------------------
  83. # DataFrame Constructor Interface
  84. def init_ndarray(values, index, columns, dtype=None, copy=False):
  85. # input must be a ndarray, list, Series, index
  86. if isinstance(values, ABCSeries):
  87. if columns is None:
  88. if values.name is not None:
  89. columns = [values.name]
  90. if index is None:
  91. index = values.index
  92. else:
  93. values = values.reindex(index)
  94. # zero len case (GH #2234)
  95. if not len(values) and columns is not None and len(columns):
  96. values = np.empty((0, 1), dtype=object)
  97. # we could have a categorical type passed or coerced to 'category'
  98. # recast this to an arrays_to_mgr
  99. if (is_categorical_dtype(getattr(values, 'dtype', None)) or
  100. is_categorical_dtype(dtype)):
  101. if not hasattr(values, 'dtype'):
  102. values = prep_ndarray(values, copy=copy)
  103. values = values.ravel()
  104. elif copy:
  105. values = values.copy()
  106. index, columns = _get_axes(len(values), 1, index, columns)
  107. return arrays_to_mgr([values], columns, index, columns,
  108. dtype=dtype)
  109. elif (is_datetime64tz_dtype(values) or
  110. is_extension_array_dtype(values)):
  111. # GH#19157
  112. if columns is None:
  113. columns = [0]
  114. return arrays_to_mgr([values], columns, index, columns,
  115. dtype=dtype)
  116. # by definition an array here
  117. # the dtypes will be coerced to a single dtype
  118. values = prep_ndarray(values, copy=copy)
  119. if dtype is not None:
  120. if not is_dtype_equal(values.dtype, dtype):
  121. try:
  122. values = values.astype(dtype)
  123. except Exception as orig:
  124. e = ValueError("failed to cast to '{dtype}' (Exception "
  125. "was: {orig})".format(dtype=dtype,
  126. orig=orig))
  127. raise_with_traceback(e)
  128. index, columns = _get_axes(*values.shape, index=index, columns=columns)
  129. values = values.T
  130. # if we don't have a dtype specified, then try to convert objects
  131. # on the entire block; this is to convert if we have datetimelike's
  132. # embedded in an object type
  133. if dtype is None and is_object_dtype(values):
  134. values = maybe_infer_to_datetimelike(values)
  135. return create_block_manager_from_blocks([values], [columns, index])
  136. def init_dict(data, index, columns, dtype=None):
  137. """
  138. Segregate Series based on type and coerce into matrices.
  139. Needs to handle a lot of exceptional cases.
  140. """
  141. if columns is not None:
  142. from pandas.core.series import Series
  143. arrays = Series(data, index=columns, dtype=object)
  144. data_names = arrays.index
  145. missing = arrays.isnull()
  146. if index is None:
  147. # GH10856
  148. # raise ValueError if only scalars in dict
  149. index = extract_index(arrays[~missing])
  150. else:
  151. index = ensure_index(index)
  152. # no obvious "empty" int column
  153. if missing.any() and not is_integer_dtype(dtype):
  154. if dtype is None or np.issubdtype(dtype, np.flexible):
  155. # GH#1783
  156. nan_dtype = object
  157. else:
  158. nan_dtype = dtype
  159. val = construct_1d_arraylike_from_scalar(np.nan, len(index),
  160. nan_dtype)
  161. arrays.loc[missing] = [val] * missing.sum()
  162. else:
  163. for key in data:
  164. if (isinstance(data[key], ABCDatetimeIndex) and
  165. data[key].tz is not None):
  166. # GH#24096 need copy to be deep for datetime64tz case
  167. # TODO: See if we can avoid these copies
  168. data[key] = data[key].copy(deep=True)
  169. keys = com.dict_keys_to_ordered_list(data)
  170. columns = data_names = Index(keys)
  171. arrays = [data[k] for k in keys]
  172. return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
  173. # ---------------------------------------------------------------------
  174. def prep_ndarray(values, copy=True):
  175. if not isinstance(values, (np.ndarray, ABCSeries, Index)):
  176. if len(values) == 0:
  177. return np.empty((0, 0), dtype=object)
  178. def convert(v):
  179. return maybe_convert_platform(v)
  180. # we could have a 1-dim or 2-dim list here
  181. # this is equiv of np.asarray, but does object conversion
  182. # and platform dtype preservation
  183. try:
  184. if is_list_like(values[0]) or hasattr(values[0], 'len'):
  185. values = np.array([convert(v) for v in values])
  186. elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
  187. # GH#21861
  188. values = np.array([convert(v) for v in values])
  189. else:
  190. values = convert(values)
  191. except (ValueError, TypeError):
  192. values = convert(values)
  193. else:
  194. # drop subclass info, do not copy data
  195. values = np.asarray(values)
  196. if copy:
  197. values = values.copy()
  198. if values.ndim == 1:
  199. values = values.reshape((values.shape[0], 1))
  200. elif values.ndim != 2:
  201. raise ValueError('Must pass 2-d input')
  202. return values
  203. def _homogenize(data, index, dtype=None):
  204. oindex = None
  205. homogenized = []
  206. for val in data:
  207. if isinstance(val, ABCSeries):
  208. if dtype is not None:
  209. val = val.astype(dtype)
  210. if val.index is not index:
  211. # Forces alignment. No need to copy data since we
  212. # are putting it into an ndarray later
  213. val = val.reindex(index, copy=False)
  214. else:
  215. if isinstance(val, dict):
  216. if oindex is None:
  217. oindex = index.astype('O')
  218. if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
  219. val = com.dict_compat(val)
  220. else:
  221. val = dict(val)
  222. val = lib.fast_multiget(val, oindex.values, default=np.nan)
  223. val = sanitize_array(val, index, dtype=dtype, copy=False,
  224. raise_cast_failure=False)
  225. homogenized.append(val)
  226. return homogenized
  227. def extract_index(data):
  228. index = None
  229. if len(data) == 0:
  230. index = Index([])
  231. elif len(data) > 0:
  232. raw_lengths = []
  233. indexes = []
  234. have_raw_arrays = False
  235. have_series = False
  236. have_dicts = False
  237. for val in data:
  238. if isinstance(val, ABCSeries):
  239. have_series = True
  240. indexes.append(val.index)
  241. elif isinstance(val, dict):
  242. have_dicts = True
  243. indexes.append(list(val.keys()))
  244. elif is_list_like(val) and getattr(val, 'ndim', 1) == 1:
  245. have_raw_arrays = True
  246. raw_lengths.append(len(val))
  247. if not indexes and not raw_lengths:
  248. raise ValueError('If using all scalar values, you must pass'
  249. ' an index')
  250. if have_series or have_dicts:
  251. index = _union_indexes(indexes)
  252. if have_raw_arrays:
  253. lengths = list(set(raw_lengths))
  254. if len(lengths) > 1:
  255. raise ValueError('arrays must all be same length')
  256. if have_dicts:
  257. raise ValueError('Mixing dicts with non-Series may lead to '
  258. 'ambiguous ordering.')
  259. if have_series:
  260. if lengths[0] != len(index):
  261. msg = ('array length {length} does not match index '
  262. 'length {idx_len}'
  263. .format(length=lengths[0], idx_len=len(index)))
  264. raise ValueError(msg)
  265. else:
  266. index = ibase.default_index(lengths[0])
  267. return ensure_index(index)
  268. def reorder_arrays(arrays, arr_columns, columns):
  269. # reorder according to the columns
  270. if (columns is not None and len(columns) and arr_columns is not None and
  271. len(arr_columns)):
  272. indexer = ensure_index(arr_columns).get_indexer(columns)
  273. arr_columns = ensure_index([arr_columns[i] for i in indexer])
  274. arrays = [arrays[i] for i in indexer]
  275. return arrays, arr_columns
  276. def get_names_from_index(data):
  277. has_some_name = any(getattr(s, 'name', None) is not None for s in data)
  278. if not has_some_name:
  279. return ibase.default_index(len(data))
  280. index = lrange(len(data))
  281. count = 0
  282. for i, s in enumerate(data):
  283. n = getattr(s, 'name', None)
  284. if n is not None:
  285. index[i] = n
  286. else:
  287. index[i] = 'Unnamed {count}'.format(count=count)
  288. count += 1
  289. return index
  290. def _get_axes(N, K, index, columns):
  291. # helper to create the axes as indexes
  292. # return axes or defaults
  293. if index is None:
  294. index = ibase.default_index(N)
  295. else:
  296. index = ensure_index(index)
  297. if columns is None:
  298. columns = ibase.default_index(K)
  299. else:
  300. columns = ensure_index(columns)
  301. return index, columns
  302. # ---------------------------------------------------------------------
  303. # Conversion of Inputs to Arrays
  304. def to_arrays(data, columns, coerce_float=False, dtype=None):
  305. """
  306. Return list of arrays, columns.
  307. """
  308. if isinstance(data, ABCDataFrame):
  309. if columns is not None:
  310. arrays = [data._ixs(i, axis=1).values
  311. for i, col in enumerate(data.columns) if col in columns]
  312. else:
  313. columns = data.columns
  314. arrays = [data._ixs(i, axis=1).values for i in range(len(columns))]
  315. return arrays, columns
  316. if not len(data):
  317. if isinstance(data, np.ndarray):
  318. columns = data.dtype.names
  319. if columns is not None:
  320. return [[]] * len(columns), columns
  321. return [], [] # columns if columns is not None else []
  322. if isinstance(data[0], (list, tuple)):
  323. return _list_to_arrays(data, columns, coerce_float=coerce_float,
  324. dtype=dtype)
  325. elif isinstance(data[0], compat.Mapping):
  326. return _list_of_dict_to_arrays(data, columns,
  327. coerce_float=coerce_float, dtype=dtype)
  328. elif isinstance(data[0], ABCSeries):
  329. return _list_of_series_to_arrays(data, columns,
  330. coerce_float=coerce_float,
  331. dtype=dtype)
  332. elif isinstance(data[0], Categorical):
  333. if columns is None:
  334. columns = ibase.default_index(len(data))
  335. return data, columns
  336. elif (isinstance(data, (np.ndarray, ABCSeries, Index)) and
  337. data.dtype.names is not None):
  338. columns = list(data.dtype.names)
  339. arrays = [data[k] for k in columns]
  340. return arrays, columns
  341. else:
  342. # last ditch effort
  343. data = lmap(tuple, data)
  344. return _list_to_arrays(data, columns, coerce_float=coerce_float,
  345. dtype=dtype)
  346. def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
  347. if len(data) > 0 and isinstance(data[0], tuple):
  348. content = list(lib.to_object_array_tuples(data).T)
  349. else:
  350. # list of lists
  351. content = list(lib.to_object_array(data).T)
  352. return _convert_object_array(content, columns, dtype=dtype,
  353. coerce_float=coerce_float)
  354. def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
  355. if columns is None:
  356. columns = _get_objs_combined_axis(data, sort=False)
  357. indexer_cache = {}
  358. aligned_values = []
  359. for s in data:
  360. index = getattr(s, 'index', None)
  361. if index is None:
  362. index = ibase.default_index(len(s))
  363. if id(index) in indexer_cache:
  364. indexer = indexer_cache[id(index)]
  365. else:
  366. indexer = indexer_cache[id(index)] = index.get_indexer(columns)
  367. values = com.values_from_object(s)
  368. aligned_values.append(algorithms.take_1d(values, indexer))
  369. values = np.vstack(aligned_values)
  370. if values.dtype == np.object_:
  371. content = list(values.T)
  372. return _convert_object_array(content, columns, dtype=dtype,
  373. coerce_float=coerce_float)
  374. else:
  375. return values.T, columns
  376. def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
  377. if columns is None:
  378. gen = (list(x.keys()) for x in data)
  379. sort = not any(isinstance(d, OrderedDict) for d in data)
  380. columns = lib.fast_unique_multiple_list_gen(gen, sort=sort)
  381. # assure that they are of the base dict class and not of derived
  382. # classes
  383. data = [(type(d) is dict) and d or dict(d) for d in data]
  384. content = list(lib.dicts_to_array(data, list(columns)).T)
  385. return _convert_object_array(content, columns, dtype=dtype,
  386. coerce_float=coerce_float)
  387. def _convert_object_array(content, columns, coerce_float=False, dtype=None):
  388. if columns is None:
  389. columns = ibase.default_index(len(content))
  390. else:
  391. if len(columns) != len(content): # pragma: no cover
  392. # caller's responsibility to check for this...
  393. raise AssertionError('{col:d} columns passed, passed data had '
  394. '{con} columns'.format(col=len(columns),
  395. con=len(content)))
  396. # provide soft conversion of object dtypes
  397. def convert(arr):
  398. if dtype != object and dtype != np.object:
  399. arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
  400. arr = maybe_cast_to_datetime(arr, dtype)
  401. return arr
  402. arrays = [convert(arr) for arr in content]
  403. return arrays, columns
  404. # ---------------------------------------------------------------------
  405. # Series-Based
  406. def sanitize_index(data, index, copy=False):
  407. """
  408. Sanitize an index type to return an ndarray of the underlying, pass
  409. through a non-Index.
  410. """
  411. if index is None:
  412. return data
  413. if len(data) != len(index):
  414. raise ValueError('Length of values does not match length of index')
  415. if isinstance(data, ABCIndexClass) and not copy:
  416. pass
  417. elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)):
  418. data = data._values
  419. if copy:
  420. data = data.copy()
  421. elif isinstance(data, np.ndarray):
  422. # coerce datetimelike types
  423. if data.dtype.kind in ['M', 'm']:
  424. data = sanitize_array(data, index, copy=copy)
  425. return data
  426. def sanitize_array(data, index, dtype=None, copy=False,
  427. raise_cast_failure=False):
  428. """
  429. Sanitize input data to an ndarray, copy if specified, coerce to the
  430. dtype if specified.
  431. """
  432. if dtype is not None:
  433. dtype = pandas_dtype(dtype)
  434. if isinstance(data, ma.MaskedArray):
  435. mask = ma.getmaskarray(data)
  436. if mask.any():
  437. data, fill_value = maybe_upcast(data, copy=True)
  438. data.soften_mask() # set hardmask False if it was True
  439. data[mask] = fill_value
  440. else:
  441. data = data.copy()
  442. data = extract_array(data, extract_numpy=True)
  443. # GH#846
  444. if isinstance(data, np.ndarray):
  445. if dtype is not None:
  446. subarr = np.array(data, copy=False)
  447. # possibility of nan -> garbage
  448. if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
  449. try:
  450. subarr = _try_cast(data, True, dtype, copy,
  451. True)
  452. except ValueError:
  453. if copy:
  454. subarr = data.copy()
  455. else:
  456. subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
  457. elif isinstance(data, Index):
  458. # don't coerce Index types
  459. # e.g. indexes can have different conversions (so don't fast path
  460. # them)
  461. # GH#6140
  462. subarr = sanitize_index(data, index, copy=copy)
  463. else:
  464. # we will try to copy be-definition here
  465. subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
  466. elif isinstance(data, ExtensionArray):
  467. if isinstance(data, ABCPandasArray):
  468. # We don't want to let people put our PandasArray wrapper
  469. # (the output of Series/Index.array), into a Series. So
  470. # we explicitly unwrap it here.
  471. subarr = data.to_numpy()
  472. else:
  473. subarr = data
  474. # everything else in this block must also handle ndarray's,
  475. # becuase we've unwrapped PandasArray into an ndarray.
  476. if dtype is not None:
  477. subarr = data.astype(dtype)
  478. if copy:
  479. subarr = data.copy()
  480. return subarr
  481. elif isinstance(data, (list, tuple)) and len(data) > 0:
  482. if dtype is not None:
  483. try:
  484. subarr = _try_cast(data, False, dtype, copy,
  485. raise_cast_failure)
  486. except Exception:
  487. if raise_cast_failure: # pragma: no cover
  488. raise
  489. subarr = np.array(data, dtype=object, copy=copy)
  490. subarr = lib.maybe_convert_objects(subarr)
  491. else:
  492. subarr = maybe_convert_platform(data)
  493. subarr = maybe_cast_to_datetime(subarr, dtype)
  494. elif isinstance(data, range):
  495. # GH#16804
  496. start, stop, step = get_range_parameters(data)
  497. arr = np.arange(start, stop, step, dtype='int64')
  498. subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
  499. else:
  500. subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)
  501. # scalar like, GH
  502. if getattr(subarr, 'ndim', 0) == 0:
  503. if isinstance(data, list): # pragma: no cover
  504. subarr = np.array(data, dtype=object)
  505. elif index is not None:
  506. value = data
  507. # figure out the dtype from the value (upcast if necessary)
  508. if dtype is None:
  509. dtype, value = infer_dtype_from_scalar(value)
  510. else:
  511. # need to possibly convert the value here
  512. value = maybe_cast_to_datetime(value, dtype)
  513. subarr = construct_1d_arraylike_from_scalar(
  514. value, len(index), dtype)
  515. else:
  516. return subarr.item()
  517. # the result that we want
  518. elif subarr.ndim == 1:
  519. if index is not None:
  520. # a 1-element ndarray
  521. if len(subarr) != len(index) and len(subarr) == 1:
  522. subarr = construct_1d_arraylike_from_scalar(
  523. subarr[0], len(index), subarr.dtype)
  524. elif subarr.ndim > 1:
  525. if isinstance(data, np.ndarray):
  526. raise Exception('Data must be 1-dimensional')
  527. else:
  528. subarr = com.asarray_tuplesafe(data, dtype=dtype)
  529. # This is to prevent mixed-type Series getting all casted to
  530. # NumPy string type, e.g. NaN --> '-1#IND'.
  531. if issubclass(subarr.dtype.type, compat.string_types):
  532. # GH#16605
  533. # If not empty convert the data to dtype
  534. # GH#19853: If data is a scalar, subarr has already the result
  535. if not lib.is_scalar(data):
  536. if not np.all(isna(data)):
  537. data = np.array(data, dtype=dtype, copy=False)
  538. subarr = np.array(data, dtype=object, copy=copy)
  539. if is_object_dtype(subarr.dtype) and dtype != 'object':
  540. inferred = lib.infer_dtype(subarr, skipna=False)
  541. if inferred == 'period':
  542. try:
  543. subarr = period_array(subarr)
  544. except IncompatibleFrequency:
  545. pass
  546. return subarr
  547. def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):
  548. # perf shortcut as this is the most common case
  549. if take_fast_path:
  550. if maybe_castable(arr) and not copy and dtype is None:
  551. return arr
  552. try:
  553. # GH#15832: Check if we are requesting a numeric dype and
  554. # that we can convert the data to the requested dtype.
  555. if is_integer_dtype(dtype):
  556. subarr = maybe_cast_to_integer_array(arr, dtype)
  557. subarr = maybe_cast_to_datetime(arr, dtype)
  558. # Take care in creating object arrays (but iterators are not
  559. # supported):
  560. if is_object_dtype(dtype) and (is_list_like(subarr) and
  561. not (is_iterator(subarr) or
  562. isinstance(subarr, np.ndarray))):
  563. subarr = construct_1d_object_array_from_listlike(subarr)
  564. elif not is_extension_type(subarr):
  565. subarr = construct_1d_ndarray_preserving_na(subarr, dtype,
  566. copy=copy)
  567. except (ValueError, TypeError):
  568. if is_categorical_dtype(dtype):
  569. # We *do* allow casting to categorical, since we know
  570. # that Categorical is the only array type for 'category'.
  571. subarr = Categorical(arr, dtype.categories,
  572. ordered=dtype.ordered)
  573. elif is_extension_array_dtype(dtype):
  574. # create an extension array from its dtype
  575. array_type = dtype.construct_array_type()._from_sequence
  576. subarr = array_type(arr, dtype=dtype, copy=copy)
  577. elif dtype is not None and raise_cast_failure:
  578. raise
  579. else:
  580. subarr = np.array(arr, dtype=object, copy=copy)
  581. return subarr