datetimes.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. from datetime import datetime, time
  2. from functools import partial
  3. import numpy as np
  4. from pandas._libs import tslib, tslibs
  5. from pandas._libs.tslibs import Timestamp, conversion, parsing
  6. from pandas._libs.tslibs.parsing import ( # noqa
  7. DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string)
  8. from pandas._libs.tslibs.strptime import array_strptime
  9. from pandas.compat import zip
  10. from pandas.core.dtypes.common import (
  11. ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
  12. is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype,
  13. is_list_like, is_numeric_dtype, is_object_dtype, is_scalar)
  14. from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
  15. from pandas.core.dtypes.missing import notna
  16. from pandas import compat
  17. from pandas.core import algorithms
  18. def _guess_datetime_format_for_array(arr, **kwargs):
  19. # Try to guess the format based on the first non-NaN element
  20. non_nan_elements = notna(arr).nonzero()[0]
  21. if len(non_nan_elements):
  22. return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
  23. def _maybe_cache(arg, format, cache, convert_listlike):
  24. """
  25. Create a cache of unique dates from an array of dates
  26. Parameters
  27. ----------
  28. arg : integer, float, string, datetime, list, tuple, 1-d array, Series
  29. format : string
  30. Strftime format to parse time
  31. cache : boolean
  32. True attempts to create a cache of converted values
  33. convert_listlike : function
  34. Conversion function to apply on dates
  35. Returns
  36. -------
  37. cache_array : Series
  38. Cache of converted, unique dates. Can be empty
  39. """
  40. from pandas import Series
  41. cache_array = Series()
  42. if cache:
  43. # Perform a quicker unique check
  44. from pandas import Index
  45. if not Index(arg).is_unique:
  46. unique_dates = algorithms.unique(arg)
  47. cache_dates = convert_listlike(unique_dates, True, format)
  48. cache_array = Series(cache_dates, index=unique_dates)
  49. return cache_array
  50. def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
  51. """
  52. Convert array of dates with a cache and box the result
  53. Parameters
  54. ----------
  55. arg : integer, float, string, datetime, list, tuple, 1-d array, Series
  56. cache_array : Series
  57. Cache of converted, unique dates
  58. box : boolean
  59. True boxes result as an Index-like, False returns an ndarray
  60. errors : string
  61. 'ignore' plus box=True will convert result to Index
  62. name : string, default None
  63. Name for a DatetimeIndex
  64. Returns
  65. -------
  66. result : datetime of converted dates
  67. Returns:
  68. - Index-like if box=True
  69. - ndarray if box=False
  70. """
  71. from pandas import Series, DatetimeIndex, Index
  72. result = Series(arg).map(cache_array)
  73. if box:
  74. if errors == 'ignore':
  75. return Index(result, name=name)
  76. else:
  77. return DatetimeIndex(result, name=name)
  78. return result.values
  79. def _return_parsed_timezone_results(result, timezones, box, tz, name):
  80. """
  81. Return results from array_strptime if a %z or %Z directive was passed.
  82. Parameters
  83. ----------
  84. result : ndarray
  85. int64 date representations of the dates
  86. timezones : ndarray
  87. pytz timezone objects
  88. box : boolean
  89. True boxes result as an Index-like, False returns an ndarray
  90. tz : object
  91. None or pytz timezone object
  92. name : string, default None
  93. Name for a DatetimeIndex
  94. Returns
  95. -------
  96. tz_result : ndarray of parsed dates with timezone
  97. Returns:
  98. - Index-like if box=True
  99. - ndarray of Timestamps if box=False
  100. """
  101. if tz is not None:
  102. raise ValueError("Cannot pass a tz argument when "
  103. "parsing strings with timezone "
  104. "information.")
  105. tz_results = np.array([Timestamp(res).tz_localize(zone) for res, zone
  106. in zip(result, timezones)])
  107. if box:
  108. from pandas import Index
  109. return Index(tz_results, name=name)
  110. return tz_results
  111. def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
  112. unit=None, errors=None,
  113. infer_datetime_format=None, dayfirst=None,
  114. yearfirst=None, exact=None):
  115. """
  116. Helper function for to_datetime. Performs the conversions of 1D listlike
  117. of dates
  118. Parameters
  119. ----------
  120. arg : list, tuple, ndarray, Series, Index
  121. date to be parced
  122. box : boolean
  123. True boxes result as an Index-like, False returns an ndarray
  124. name : object
  125. None or string for the Index name
  126. tz : object
  127. None or 'utc'
  128. unit : string
  129. None or string of the frequency of the passed data
  130. errors : string
  131. error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
  132. infer_datetime_format : boolean
  133. inferring format behavior from to_datetime
  134. dayfirst : boolean
  135. dayfirst parsing behavior from to_datetime
  136. yearfirst : boolean
  137. yearfirst parsing behavior from to_datetime
  138. exact : boolean
  139. exact format matching behavior from to_datetime
  140. Returns
  141. -------
  142. ndarray of parsed dates
  143. Returns:
  144. - Index-like if box=True
  145. - ndarray of Timestamps if box=False
  146. """
  147. from pandas import DatetimeIndex
  148. from pandas.core.arrays import DatetimeArray
  149. from pandas.core.arrays.datetimes import (
  150. maybe_convert_dtype, objects_to_datetime64ns)
  151. if isinstance(arg, (list, tuple)):
  152. arg = np.array(arg, dtype='O')
  153. # these are shortcutable
  154. if is_datetime64tz_dtype(arg):
  155. if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
  156. return DatetimeIndex(arg, tz=tz, name=name)
  157. if tz == 'utc':
  158. arg = arg.tz_convert(None).tz_localize(tz)
  159. return arg
  160. elif is_datetime64_ns_dtype(arg):
  161. if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
  162. try:
  163. return DatetimeIndex(arg, tz=tz, name=name)
  164. except ValueError:
  165. pass
  166. return arg
  167. elif unit is not None:
  168. if format is not None:
  169. raise ValueError("cannot specify both format and unit")
  170. arg = getattr(arg, 'values', arg)
  171. result = tslib.array_with_unit_to_datetime(arg, unit,
  172. errors=errors)
  173. if box:
  174. if errors == 'ignore':
  175. from pandas import Index
  176. result = Index(result, name=name)
  177. # GH 23758: We may still need to localize the result with tz
  178. try:
  179. return result.tz_localize(tz)
  180. except AttributeError:
  181. return result
  182. return DatetimeIndex(result, tz=tz, name=name)
  183. return result
  184. elif getattr(arg, 'ndim', 1) > 1:
  185. raise TypeError('arg must be a string, datetime, list, tuple, '
  186. '1-d array, or Series')
  187. # warn if passing timedelta64, raise for PeriodDtype
  188. # NB: this must come after unit transformation
  189. orig_arg = arg
  190. arg, _ = maybe_convert_dtype(arg, copy=False)
  191. arg = ensure_object(arg)
  192. require_iso8601 = False
  193. if infer_datetime_format and format is None:
  194. format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
  195. if format is not None:
  196. # There is a special fast-path for iso8601 formatted
  197. # datetime strings, so in those cases don't use the inferred
  198. # format because this path makes process slower in this
  199. # special case
  200. format_is_iso8601 = _format_is_iso(format)
  201. if format_is_iso8601:
  202. require_iso8601 = not infer_datetime_format
  203. format = None
  204. tz_parsed = None
  205. result = None
  206. if format is not None:
  207. try:
  208. # shortcut formatting here
  209. if format == '%Y%m%d':
  210. try:
  211. # pass orig_arg as float-dtype may have been converted to
  212. # datetime64[ns]
  213. orig_arg = ensure_object(orig_arg)
  214. result = _attempt_YYYYMMDD(orig_arg, errors=errors)
  215. except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
  216. raise ValueError("cannot convert the input to "
  217. "'%Y%m%d' date format")
  218. # fallback
  219. if result is None:
  220. try:
  221. result, timezones = array_strptime(
  222. arg, format, exact=exact, errors=errors)
  223. if '%Z' in format or '%z' in format:
  224. return _return_parsed_timezone_results(
  225. result, timezones, box, tz, name)
  226. except tslibs.OutOfBoundsDatetime:
  227. if errors == 'raise':
  228. raise
  229. elif errors == 'coerce':
  230. result = np.empty(arg.shape, dtype='M8[ns]')
  231. iresult = result.view('i8')
  232. iresult.fill(tslibs.iNaT)
  233. else:
  234. result = arg
  235. except ValueError:
  236. # if format was inferred, try falling back
  237. # to array_to_datetime - terminate here
  238. # for specified formats
  239. if not infer_datetime_format:
  240. if errors == 'raise':
  241. raise
  242. elif errors == 'coerce':
  243. result = np.empty(arg.shape, dtype='M8[ns]')
  244. iresult = result.view('i8')
  245. iresult.fill(tslibs.iNaT)
  246. else:
  247. result = arg
  248. except ValueError as e:
  249. # Fallback to try to convert datetime objects if timezone-aware
  250. # datetime objects are found without passing `utc=True`
  251. try:
  252. values, tz = conversion.datetime_to_datetime64(arg)
  253. return DatetimeIndex._simple_new(values, name=name, tz=tz)
  254. except (ValueError, TypeError):
  255. raise e
  256. if result is None:
  257. assert format is None or infer_datetime_format
  258. utc = tz == 'utc'
  259. result, tz_parsed = objects_to_datetime64ns(
  260. arg, dayfirst=dayfirst, yearfirst=yearfirst,
  261. utc=utc, errors=errors, require_iso8601=require_iso8601,
  262. allow_object=True)
  263. if tz_parsed is not None:
  264. if box:
  265. # We can take a shortcut since the datetime64 numpy array
  266. # is in UTC
  267. return DatetimeIndex._simple_new(result, name=name,
  268. tz=tz_parsed)
  269. else:
  270. # Convert the datetime64 numpy array to an numpy array
  271. # of datetime objects
  272. result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
  273. for ts in result]
  274. return np.array(result, dtype=object)
  275. if box:
  276. # Ensure we return an Index in all cases where box=True
  277. if is_datetime64_dtype(result):
  278. return DatetimeIndex(result, tz=tz, name=name)
  279. elif is_object_dtype(result):
  280. # e.g. an Index of datetime objects
  281. from pandas import Index
  282. return Index(result, name=name)
  283. return result
  284. def _adjust_to_origin(arg, origin, unit):
  285. """
  286. Helper function for to_datetime.
  287. Adjust input argument to the specified origin
  288. Parameters
  289. ----------
  290. arg : list, tuple, ndarray, Series, Index
  291. date to be adjusted
  292. origin : 'julian' or Timestamp
  293. origin offset for the arg
  294. unit : string
  295. passed unit from to_datetime, must be 'D'
  296. Returns
  297. -------
  298. ndarray or scalar of adjusted date(s)
  299. """
  300. if origin == 'julian':
  301. original = arg
  302. j0 = Timestamp(0).to_julian_date()
  303. if unit != 'D':
  304. raise ValueError("unit must be 'D' for origin='julian'")
  305. try:
  306. arg = arg - j0
  307. except TypeError:
  308. raise ValueError("incompatible 'arg' type for given "
  309. "'origin'='julian'")
  310. # premptively check this for a nice range
  311. j_max = Timestamp.max.to_julian_date() - j0
  312. j_min = Timestamp.min.to_julian_date() - j0
  313. if np.any(arg > j_max) or np.any(arg < j_min):
  314. raise tslibs.OutOfBoundsDatetime(
  315. "{original} is Out of Bounds for "
  316. "origin='julian'".format(original=original))
  317. else:
  318. # arg must be numeric
  319. if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or
  320. is_numeric_dtype(np.asarray(arg))):
  321. raise ValueError(
  322. "'{arg}' is not compatible with origin='{origin}'; "
  323. "it must be numeric with a unit specified ".format(
  324. arg=arg,
  325. origin=origin))
  326. # we are going to offset back to unix / epoch time
  327. try:
  328. offset = Timestamp(origin)
  329. except tslibs.OutOfBoundsDatetime:
  330. raise tslibs.OutOfBoundsDatetime(
  331. "origin {origin} is Out of Bounds".format(origin=origin))
  332. except ValueError:
  333. raise ValueError("origin {origin} cannot be converted "
  334. "to a Timestamp".format(origin=origin))
  335. if offset.tz is not None:
  336. raise ValueError(
  337. "origin offset {} must be tz-naive".format(offset))
  338. offset -= Timestamp(0)
  339. # convert the offset to the unit of the arg
  340. # this should be lossless in terms of precision
  341. offset = offset // tslibs.Timedelta(1, unit=unit)
  342. # scalars & ndarray-like can handle the addition
  343. if is_list_like(arg) and not isinstance(
  344. arg, (ABCSeries, ABCIndexClass, np.ndarray)):
  345. arg = np.asarray(arg)
  346. arg = arg + offset
  347. return arg
  348. def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
  349. utc=None, box=True, format=None, exact=True,
  350. unit=None, infer_datetime_format=False, origin='unix',
  351. cache=False):
  352. """
  353. Convert argument to datetime.
  354. Parameters
  355. ----------
  356. arg : integer, float, string, datetime, list, tuple, 1-d array, Series
  357. .. versionadded:: 0.18.1
  358. or DataFrame/dict-like
  359. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  360. - If 'raise', then invalid parsing will raise an exception
  361. - If 'coerce', then invalid parsing will be set as NaT
  362. - If 'ignore', then invalid parsing will return the input
  363. dayfirst : boolean, default False
  364. Specify a date parse order if `arg` is str or its list-likes.
  365. If True, parses dates with the day first, eg 10/11/12 is parsed as
  366. 2012-11-10.
  367. Warning: dayfirst=True is not strict, but will prefer to parse
  368. with day first (this is a known bug, based on dateutil behavior).
  369. yearfirst : boolean, default False
  370. Specify a date parse order if `arg` is str or its list-likes.
  371. - If True parses dates with the year first, eg 10/11/12 is parsed as
  372. 2010-11-12.
  373. - If both dayfirst and yearfirst are True, yearfirst is preceded (same
  374. as dateutil).
  375. Warning: yearfirst=True is not strict, but will prefer to parse
  376. with year first (this is a known bug, based on dateutil behavior).
  377. .. versionadded:: 0.16.1
  378. utc : boolean, default None
  379. Return UTC DatetimeIndex if True (converting any tz-aware
  380. datetime.datetime objects as well).
  381. box : boolean, default True
  382. - If True returns a DatetimeIndex or Index-like object
  383. - If False returns ndarray of values.
  384. format : string, default None
  385. strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
  386. all the way up to nanoseconds.
  387. exact : boolean, True by default
  388. - If True, require an exact format match.
  389. - If False, allow the format to match anywhere in the target string.
  390. unit : string, default 'ns'
  391. unit of the arg (D,s,ms,us,ns) denote the unit, which is an
  392. integer or float number. This will be based off the origin.
  393. Example, with unit='ms' and origin='unix' (the default), this
  394. would calculate the number of milliseconds to the unix epoch start.
  395. infer_datetime_format : boolean, default False
  396. If True and no `format` is given, attempt to infer the format of the
  397. datetime strings, and if it can be inferred, switch to a faster
  398. method of parsing them. In some cases this can increase the parsing
  399. speed by ~5-10x.
  400. origin : scalar, default is 'unix'
  401. Define the reference date. The numeric values would be parsed as number
  402. of units (defined by `unit`) since this reference date.
  403. - If 'unix' (or POSIX) time; origin is set to 1970-01-01.
  404. - If 'julian', unit must be 'D', and origin is set to beginning of
  405. Julian Calendar. Julian day number 0 is assigned to the day starting
  406. at noon on January 1, 4713 BC.
  407. - If Timestamp convertible, origin is set to Timestamp identified by
  408. origin.
  409. .. versionadded:: 0.20.0
  410. cache : boolean, default False
  411. If True, use a cache of unique, converted dates to apply the datetime
  412. conversion. May produce significant speed-up when parsing duplicate
  413. date strings, especially ones with timezone offsets.
  414. .. versionadded:: 0.23.0
  415. Returns
  416. -------
  417. ret : datetime if parsing succeeded.
  418. Return type depends on input:
  419. - list-like: DatetimeIndex
  420. - Series: Series of datetime64 dtype
  421. - scalar: Timestamp
  422. In case when it is not possible to return designated types (e.g. when
  423. any element of input is before Timestamp.min or after Timestamp.max)
  424. return will have datetime.datetime type (or corresponding
  425. array/Series).
  426. See Also
  427. --------
  428. pandas.DataFrame.astype : Cast argument to a specified dtype.
  429. pandas.to_timedelta : Convert argument to timedelta.
  430. Examples
  431. --------
  432. Assembling a datetime from multiple columns of a DataFrame. The keys can be
  433. common abbreviations like ['year', 'month', 'day', 'minute', 'second',
  434. 'ms', 'us', 'ns']) or plurals of the same
  435. >>> df = pd.DataFrame({'year': [2015, 2016],
  436. 'month': [2, 3],
  437. 'day': [4, 5]})
  438. >>> pd.to_datetime(df)
  439. 0 2015-02-04
  440. 1 2016-03-05
  441. dtype: datetime64[ns]
  442. If a date does not meet the `timestamp limitations
  443. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html
  444. #timeseries-timestamp-limits>`_, passing errors='ignore'
  445. will return the original input instead of raising any exception.
  446. Passing errors='coerce' will force an out-of-bounds date to NaT,
  447. in addition to forcing non-dates (or non-parseable dates) to NaT.
  448. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
  449. datetime.datetime(1300, 1, 1, 0, 0)
  450. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
  451. NaT
  452. Passing infer_datetime_format=True can often-times speedup a parsing
  453. if its not an ISO8601 format exactly, but in a regular format.
  454. >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000']*1000)
  455. >>> s.head()
  456. 0 3/11/2000
  457. 1 3/12/2000
  458. 2 3/13/2000
  459. 3 3/11/2000
  460. 4 3/12/2000
  461. dtype: object
  462. >>> %timeit pd.to_datetime(s,infer_datetime_format=True)
  463. 100 loops, best of 3: 10.4 ms per loop
  464. >>> %timeit pd.to_datetime(s,infer_datetime_format=False)
  465. 1 loop, best of 3: 471 ms per loop
  466. Using a unix epoch time
  467. >>> pd.to_datetime(1490195805, unit='s')
  468. Timestamp('2017-03-22 15:16:45')
  469. >>> pd.to_datetime(1490195805433502912, unit='ns')
  470. Timestamp('2017-03-22 15:16:45.433502912')
  471. .. warning:: For float arg, precision rounding might happen. To prevent
  472. unexpected behavior use a fixed-width exact type.
  473. Using a non-unix epoch origin
  474. >>> pd.to_datetime([1, 2, 3], unit='D',
  475. origin=pd.Timestamp('1960-01-01'))
  476. 0 1960-01-02
  477. 1 1960-01-03
  478. 2 1960-01-04
  479. """
  480. if arg is None:
  481. return None
  482. if origin != 'unix':
  483. arg = _adjust_to_origin(arg, origin, unit)
  484. tz = 'utc' if utc else None
  485. convert_listlike = partial(_convert_listlike_datetimes, tz=tz, unit=unit,
  486. dayfirst=dayfirst, yearfirst=yearfirst,
  487. errors=errors, exact=exact,
  488. infer_datetime_format=infer_datetime_format)
  489. if isinstance(arg, Timestamp):
  490. result = arg
  491. if tz is not None:
  492. if arg.tz is not None:
  493. result = result.tz_convert(tz)
  494. else:
  495. result = result.tz_localize(tz)
  496. elif isinstance(arg, ABCSeries):
  497. cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  498. if not cache_array.empty:
  499. result = arg.map(cache_array)
  500. else:
  501. from pandas import Series
  502. values = convert_listlike(arg._values, True, format)
  503. result = Series(values, index=arg.index, name=arg.name)
  504. elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):
  505. result = _assemble_from_unit_mappings(arg, errors, box, tz)
  506. elif isinstance(arg, ABCIndexClass):
  507. cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  508. if not cache_array.empty:
  509. result = _convert_and_box_cache(arg, cache_array, box, errors,
  510. name=arg.name)
  511. else:
  512. convert_listlike = partial(convert_listlike, name=arg.name)
  513. result = convert_listlike(arg, box, format)
  514. elif is_list_like(arg):
  515. cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  516. if not cache_array.empty:
  517. result = _convert_and_box_cache(arg, cache_array, box, errors)
  518. else:
  519. result = convert_listlike(arg, box, format)
  520. else:
  521. result = convert_listlike(np.array([arg]), box, format)[0]
  522. return result
  523. # mappings for assembling units
  524. _unit_map = {'year': 'year',
  525. 'years': 'year',
  526. 'month': 'month',
  527. 'months': 'month',
  528. 'day': 'day',
  529. 'days': 'day',
  530. 'hour': 'h',
  531. 'hours': 'h',
  532. 'minute': 'm',
  533. 'minutes': 'm',
  534. 'second': 's',
  535. 'seconds': 's',
  536. 'ms': 'ms',
  537. 'millisecond': 'ms',
  538. 'milliseconds': 'ms',
  539. 'us': 'us',
  540. 'microsecond': 'us',
  541. 'microseconds': 'us',
  542. 'ns': 'ns',
  543. 'nanosecond': 'ns',
  544. 'nanoseconds': 'ns'
  545. }
  546. def _assemble_from_unit_mappings(arg, errors, box, tz):
  547. """
  548. assemble the unit specified fields from the arg (DataFrame)
  549. Return a Series for actual parsing
  550. Parameters
  551. ----------
  552. arg : DataFrame
  553. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  554. - If 'raise', then invalid parsing will raise an exception
  555. - If 'coerce', then invalid parsing will be set as NaT
  556. - If 'ignore', then invalid parsing will return the input
  557. box : boolean
  558. - If True, return a DatetimeIndex
  559. - If False, return an array
  560. tz : None or 'utc'
  561. Returns
  562. -------
  563. Series
  564. """
  565. from pandas import to_timedelta, to_numeric, DataFrame
  566. arg = DataFrame(arg)
  567. if not arg.columns.is_unique:
  568. raise ValueError("cannot assemble with duplicate keys")
  569. # replace passed unit with _unit_map
  570. def f(value):
  571. if value in _unit_map:
  572. return _unit_map[value]
  573. # m is case significant
  574. if value.lower() in _unit_map:
  575. return _unit_map[value.lower()]
  576. return value
  577. unit = {k: f(k) for k in arg.keys()}
  578. unit_rev = {v: k for k, v in unit.items()}
  579. # we require at least Ymd
  580. required = ['year', 'month', 'day']
  581. req = sorted(list(set(required) - set(unit_rev.keys())))
  582. if len(req):
  583. raise ValueError("to assemble mappings requires at least that "
  584. "[year, month, day] be specified: [{required}] "
  585. "is missing".format(required=','.join(req)))
  586. # keys we don't recognize
  587. excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values())))
  588. if len(excess):
  589. raise ValueError("extra keys have been passed "
  590. "to the datetime assemblage: "
  591. "[{excess}]".format(excess=','.join(excess)))
  592. def coerce(values):
  593. # we allow coercion to if errors allows
  594. values = to_numeric(values, errors=errors)
  595. # prevent overflow in case of int8 or int16
  596. if is_integer_dtype(values):
  597. values = values.astype('int64', copy=False)
  598. return values
  599. values = (coerce(arg[unit_rev['year']]) * 10000 +
  600. coerce(arg[unit_rev['month']]) * 100 +
  601. coerce(arg[unit_rev['day']]))
  602. try:
  603. values = to_datetime(values, format='%Y%m%d', errors=errors, utc=tz)
  604. except (TypeError, ValueError) as e:
  605. raise ValueError("cannot assemble the "
  606. "datetimes: {error}".format(error=e))
  607. for u in ['h', 'm', 's', 'ms', 'us', 'ns']:
  608. value = unit_rev.get(u)
  609. if value is not None and value in arg:
  610. try:
  611. values += to_timedelta(coerce(arg[value]),
  612. unit=u,
  613. errors=errors)
  614. except (TypeError, ValueError) as e:
  615. raise ValueError("cannot assemble the datetimes [{value}]: "
  616. "{error}".format(value=value, error=e))
  617. if not box:
  618. return values.values
  619. return values
  620. def _attempt_YYYYMMDD(arg, errors):
  621. """
  622. try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
  623. arg is a passed in as an object dtype, but could really be ints/strings
  624. with nan-like/or floats (e.g. with nan)
  625. Parameters
  626. ----------
  627. arg : passed value
  628. errors : 'raise','ignore','coerce'
  629. """
  630. def calc(carg):
  631. # calculate the actual result
  632. carg = carg.astype(object)
  633. parsed = parsing.try_parse_year_month_day(carg / 10000,
  634. carg / 100 % 100,
  635. carg % 100)
  636. return tslib.array_to_datetime(parsed, errors=errors)[0]
  637. def calc_with_mask(carg, mask):
  638. result = np.empty(carg.shape, dtype='M8[ns]')
  639. iresult = result.view('i8')
  640. iresult[~mask] = tslibs.iNaT
  641. masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))
  642. result[mask] = masked_result.astype('M8[ns]')
  643. return result
  644. # try intlike / strings that are ints
  645. try:
  646. return calc(arg.astype(np.int64))
  647. except ValueError:
  648. pass
  649. # a float with actual np.nan
  650. try:
  651. carg = arg.astype(np.float64)
  652. return calc_with_mask(carg, notna(carg))
  653. except ValueError:
  654. pass
  655. # string with NaN-like
  656. try:
  657. mask = ~algorithms.isin(arg, list(tslib.nat_strings))
  658. return calc_with_mask(arg, mask)
  659. except ValueError:
  660. pass
  661. return None
  662. # Fixed time formats for time parsing
  663. _time_formats = ["%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
  664. "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"]
  665. def _guess_time_format_for_array(arr):
  666. # Try to guess the format based on the first non-NaN element
  667. non_nan_elements = notna(arr).nonzero()[0]
  668. if len(non_nan_elements):
  669. element = arr[non_nan_elements[0]]
  670. for time_format in _time_formats:
  671. try:
  672. datetime.strptime(element, time_format)
  673. return time_format
  674. except ValueError:
  675. pass
  676. return None
  677. def to_time(arg, format=None, infer_time_format=False, errors='raise'):
  678. """
  679. Parse time strings to time objects using fixed strptime formats ("%H:%M",
  680. "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
  681. "%I%M%S%p")
  682. Use infer_time_format if all the strings are in the same format to speed
  683. up conversion.
  684. Parameters
  685. ----------
  686. arg : string in time format, datetime.time, list, tuple, 1-d array, Series
  687. format : str, default None
  688. Format used to convert arg into a time object. If None, fixed formats
  689. are used.
  690. infer_time_format: bool, default False
  691. Infer the time format based on the first non-NaN element. If all
  692. strings are in the same format, this will speed up conversion.
  693. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  694. - If 'raise', then invalid parsing will raise an exception
  695. - If 'coerce', then invalid parsing will be set as None
  696. - If 'ignore', then invalid parsing will return the input
  697. Returns
  698. -------
  699. datetime.time
  700. """
  701. from pandas.core.series import Series
  702. def _convert_listlike(arg, format):
  703. if isinstance(arg, (list, tuple)):
  704. arg = np.array(arg, dtype='O')
  705. elif getattr(arg, 'ndim', 1) > 1:
  706. raise TypeError('arg must be a string, datetime, list, tuple, '
  707. '1-d array, or Series')
  708. arg = ensure_object(arg)
  709. if infer_time_format and format is None:
  710. format = _guess_time_format_for_array(arg)
  711. times = []
  712. if format is not None:
  713. for element in arg:
  714. try:
  715. times.append(datetime.strptime(element, format).time())
  716. except (ValueError, TypeError):
  717. if errors == 'raise':
  718. msg = ("Cannot convert {element} to a time with given "
  719. "format {format}").format(element=element,
  720. format=format)
  721. raise ValueError(msg)
  722. elif errors == 'ignore':
  723. return arg
  724. else:
  725. times.append(None)
  726. else:
  727. formats = _time_formats[:]
  728. format_found = False
  729. for element in arg:
  730. time_object = None
  731. for time_format in formats:
  732. try:
  733. time_object = datetime.strptime(element,
  734. time_format).time()
  735. if not format_found:
  736. # Put the found format in front
  737. fmt = formats.pop(formats.index(time_format))
  738. formats.insert(0, fmt)
  739. format_found = True
  740. break
  741. except (ValueError, TypeError):
  742. continue
  743. if time_object is not None:
  744. times.append(time_object)
  745. elif errors == 'raise':
  746. raise ValueError("Cannot convert arg {arg} to "
  747. "a time".format(arg=arg))
  748. elif errors == 'ignore':
  749. return arg
  750. else:
  751. times.append(None)
  752. return times
  753. if arg is None:
  754. return arg
  755. elif isinstance(arg, time):
  756. return arg
  757. elif isinstance(arg, Series):
  758. values = _convert_listlike(arg._values, format)
  759. return Series(values, index=arg.index, name=arg.name)
  760. elif isinstance(arg, ABCIndexClass):
  761. return _convert_listlike(arg, format)
  762. elif is_list_like(arg):
  763. return _convert_listlike(arg, format)
  764. return _convert_listlike(np.array([arg]), format)[0]