timedeltas.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. """
  2. timedelta support tools
  3. """
  4. import numpy as np
  5. from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit
  6. from pandas.core.dtypes.common import is_list_like
  7. from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
  8. import pandas as pd
  9. from pandas.core.arrays.timedeltas import sequence_to_td64ns
  10. def to_timedelta(arg, unit='ns', box=True, errors='raise'):
  11. """
  12. Convert argument to timedelta.
  13. Timedeltas are absolute differences in times, expressed in difference
  14. units (e.g. days, hours, minutes, seconds). This method converts
  15. an argument from a recognized timedelta format / value into
  16. a Timedelta type.
  17. Parameters
  18. ----------
  19. arg : str, timedelta, list-like or Series
  20. The data to be converted to timedelta.
  21. unit : str, default 'ns'
  22. Denotes the unit of the arg. Possible values:
  23. ('Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr',
  24. 'h', 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds',
  25. 'sec', 'second', 'ms', 'milliseconds', 'millisecond',
  26. 'milli', 'millis', 'L', 'us', 'microseconds', 'microsecond',
  27. 'micro', 'micros', 'U', 'ns', 'nanoseconds', 'nano', 'nanos',
  28. 'nanosecond', 'N').
  29. box : bool, default True
  30. - If True returns a Timedelta/TimedeltaIndex of the results.
  31. - If False returns a numpy.timedelta64 or numpy.darray of
  32. values of dtype timedelta64[ns].
  33. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  34. - If 'raise', then invalid parsing will raise an exception.
  35. - If 'coerce', then invalid parsing will be set as NaT.
  36. - If 'ignore', then invalid parsing will return the input.
  37. Returns
  38. -------
  39. timedelta64 or numpy.array of timedelta64
  40. Output type returned if parsing succeeded.
  41. See Also
  42. --------
  43. DataFrame.astype : Cast argument to a specified dtype.
  44. to_datetime : Convert argument to datetime.
  45. Examples
  46. --------
  47. Parsing a single string to a Timedelta:
  48. >>> pd.to_timedelta('1 days 06:05:01.00003')
  49. Timedelta('1 days 06:05:01.000030')
  50. >>> pd.to_timedelta('15.5us')
  51. Timedelta('0 days 00:00:00.000015')
  52. Parsing a list or array of strings:
  53. >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
  54. TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015', NaT],
  55. dtype='timedelta64[ns]', freq=None)
  56. Converting numbers by specifying the `unit` keyword argument:
  57. >>> pd.to_timedelta(np.arange(5), unit='s')
  58. TimedeltaIndex(['00:00:00', '00:00:01', '00:00:02',
  59. '00:00:03', '00:00:04'],
  60. dtype='timedelta64[ns]', freq=None)
  61. >>> pd.to_timedelta(np.arange(5), unit='d')
  62. TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
  63. dtype='timedelta64[ns]', freq=None)
  64. Returning an ndarray by using the 'box' keyword argument:
  65. >>> pd.to_timedelta(np.arange(5), box=False)
  66. array([0, 1, 2, 3, 4], dtype='timedelta64[ns]')
  67. """
  68. unit = parse_timedelta_unit(unit)
  69. if errors not in ('ignore', 'raise', 'coerce'):
  70. raise ValueError("errors must be one of 'ignore', "
  71. "'raise', or 'coerce'}")
  72. if arg is None:
  73. return arg
  74. elif isinstance(arg, ABCSeries):
  75. from pandas import Series
  76. values = _convert_listlike(arg._values, unit=unit,
  77. box=False, errors=errors)
  78. return Series(values, index=arg.index, name=arg.name)
  79. elif isinstance(arg, ABCIndexClass):
  80. return _convert_listlike(arg, unit=unit, box=box,
  81. errors=errors, name=arg.name)
  82. elif isinstance(arg, np.ndarray) and arg.ndim == 0:
  83. # extract array scalar and process below
  84. arg = arg.item()
  85. elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
  86. return _convert_listlike(arg, unit=unit, box=box, errors=errors)
  87. elif getattr(arg, 'ndim', 1) > 1:
  88. raise TypeError('arg must be a string, timedelta, list, tuple, '
  89. '1-d array, or Series')
  90. # ...so it must be a scalar value. Return scalar.
  91. return _coerce_scalar_to_timedelta_type(arg, unit=unit,
  92. box=box, errors=errors)
  93. def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
  94. """Convert string 'r' to a timedelta object."""
  95. try:
  96. result = Timedelta(r, unit)
  97. if not box:
  98. # explicitly view as timedelta64 for case when result is pd.NaT
  99. result = result.asm8.view('timedelta64[ns]')
  100. except ValueError:
  101. if errors == 'raise':
  102. raise
  103. elif errors == 'ignore':
  104. return r
  105. # coerce
  106. result = pd.NaT
  107. return result
  108. def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
  109. """Convert a list of objects to a timedelta index object."""
  110. if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
  111. # This is needed only to ensure that in the case where we end up
  112. # returning arg (errors == "ignore"), and where the input is a
  113. # generator, we return a useful list-like instead of a
  114. # used-up generator
  115. arg = np.array(list(arg), dtype=object)
  116. try:
  117. value = sequence_to_td64ns(arg, unit=unit,
  118. errors=errors, copy=False)[0]
  119. except ValueError:
  120. if errors == 'ignore':
  121. return arg
  122. else:
  123. # This else-block accounts for the cases when errors='raise'
  124. # and errors='coerce'. If errors == 'raise', these errors
  125. # should be raised. If errors == 'coerce', we shouldn't
  126. # expect any errors to be raised, since all parsing errors
  127. # cause coercion to pd.NaT. However, if an error / bug is
  128. # introduced that causes an Exception to be raised, we would
  129. # like to surface it.
  130. raise
  131. if box:
  132. from pandas import TimedeltaIndex
  133. value = TimedeltaIndex(value, unit='ns', name=name)
  134. return value