test_tools.py 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841
  1. """ test to_datetime """
  2. import calendar
  3. from datetime import datetime, time
  4. from distutils.version import LooseVersion
  5. import locale
  6. import dateutil
  7. from dateutil.parser import parse
  8. from dateutil.tz.tz import tzoffset
  9. import numpy as np
  10. import pytest
  11. import pytz
  12. from pandas._libs import tslib
  13. from pandas._libs.tslibs import iNaT, parsing
  14. from pandas.compat import PY3, lmap
  15. from pandas.errors import OutOfBoundsDatetime
  16. import pandas.util._test_decorators as td
  17. from pandas.core.dtypes.common import is_datetime64_ns_dtype
  18. import pandas as pd
  19. from pandas import (
  20. DataFrame, DatetimeIndex, Index, NaT, Series, Timestamp, compat,
  21. date_range, isna, to_datetime)
  22. from pandas.core.arrays import DatetimeArray
  23. from pandas.core.tools import datetimes as tools
  24. from pandas.util import testing as tm
  25. from pandas.util.testing import assert_series_equal
  26. class TestTimeConversionFormats(object):
  27. @pytest.mark.parametrize('cache', [True, False])
  28. def test_to_datetime_format(self, cache):
  29. values = ['1/1/2000', '1/2/2000', '1/3/2000']
  30. results1 = [Timestamp('20000101'), Timestamp('20000201'),
  31. Timestamp('20000301')]
  32. results2 = [Timestamp('20000101'), Timestamp('20000102'),
  33. Timestamp('20000103')]
  34. for vals, expecteds in [(values, (Index(results1), Index(results2))),
  35. (Series(values),
  36. (Series(results1), Series(results2))),
  37. (values[0], (results1[0], results2[0])),
  38. (values[1], (results1[1], results2[1])),
  39. (values[2], (results1[2], results2[2]))]:
  40. for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']):
  41. result = to_datetime(vals, format=fmt, cache=cache)
  42. expected = expecteds[i]
  43. if isinstance(expected, Series):
  44. assert_series_equal(result, Series(expected))
  45. elif isinstance(expected, Timestamp):
  46. assert result == expected
  47. else:
  48. tm.assert_index_equal(result, expected)
  49. @pytest.mark.parametrize('cache', [True, False])
  50. def test_to_datetime_format_YYYYMMDD(self, cache):
  51. s = Series([19801222, 19801222] + [19810105] * 5)
  52. expected = Series([Timestamp(x) for x in s.apply(str)])
  53. result = to_datetime(s, format='%Y%m%d', cache=cache)
  54. assert_series_equal(result, expected)
  55. result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache)
  56. assert_series_equal(result, expected)
  57. # with NaT
  58. expected = Series([Timestamp("19801222"), Timestamp("19801222")] +
  59. [Timestamp("19810105")] * 5)
  60. expected[2] = np.nan
  61. s[2] = np.nan
  62. result = to_datetime(s, format='%Y%m%d', cache=cache)
  63. assert_series_equal(result, expected)
  64. # string with NaT
  65. s = s.apply(str)
  66. s[2] = 'nat'
  67. result = to_datetime(s, format='%Y%m%d', cache=cache)
  68. assert_series_equal(result, expected)
  69. # coercion
  70. # GH 7930
  71. s = Series([20121231, 20141231, 99991231])
  72. result = pd.to_datetime(s, format='%Y%m%d', errors='ignore',
  73. cache=cache)
  74. expected = Series([datetime(2012, 12, 31),
  75. datetime(2014, 12, 31), datetime(9999, 12, 31)],
  76. dtype=object)
  77. tm.assert_series_equal(result, expected)
  78. result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
  79. cache=cache)
  80. expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
  81. assert_series_equal(result, expected)
  82. @pytest.mark.parametrize('cache', [True, False])
  83. def test_to_datetime_format_integer(self, cache):
  84. # GH 10178
  85. s = Series([2000, 2001, 2002])
  86. expected = Series([Timestamp(x) for x in s.apply(str)])
  87. result = to_datetime(s, format='%Y', cache=cache)
  88. assert_series_equal(result, expected)
  89. s = Series([200001, 200105, 200206])
  90. expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str)
  91. ])
  92. result = to_datetime(s, format='%Y%m', cache=cache)
  93. assert_series_equal(result, expected)
  94. @pytest.mark.parametrize('cache', [True, False])
  95. def test_to_datetime_format_microsecond(self, cache):
  96. # these are locale dependent
  97. lang, _ = locale.getlocale()
  98. month_abbr = calendar.month_abbr[4]
  99. val = '01-{}-2011 00:00:01.978'.format(month_abbr)
  100. format = '%d-%b-%Y %H:%M:%S.%f'
  101. result = to_datetime(val, format=format, cache=cache)
  102. exp = datetime.strptime(val, format)
  103. assert result == exp
  104. @pytest.mark.parametrize('cache', [True, False])
  105. def test_to_datetime_format_time(self, cache):
  106. data = [
  107. ['01/10/2010 15:20', '%m/%d/%Y %H:%M',
  108. Timestamp('2010-01-10 15:20')],
  109. ['01/10/2010 05:43', '%m/%d/%Y %I:%M',
  110. Timestamp('2010-01-10 05:43')],
  111. ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S',
  112. Timestamp('2010-01-10 13:56:01')] # ,
  113. # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p',
  114. # Timestamp('2010-01-10 20:14')],
  115. # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p',
  116. # Timestamp('2010-01-10 07:40')],
  117. # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p',
  118. # Timestamp('2010-01-10 09:12:56')]
  119. ]
  120. for s, format, dt in data:
  121. assert to_datetime(s, format=format, cache=cache) == dt
  122. @td.skip_if_has_locale
  123. @pytest.mark.parametrize('cache', [True, False])
  124. def test_to_datetime_with_non_exact(self, cache):
  125. # GH 10834
  126. # 8904
  127. # exact kw
  128. s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00',
  129. '19MAY11 00:00:00Z'])
  130. result = to_datetime(s, format='%d%b%y', exact=False, cache=cache)
  131. expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False),
  132. format='%d%b%y', cache=cache)
  133. assert_series_equal(result, expected)
  134. @pytest.mark.parametrize('cache', [True, False])
  135. def test_parse_nanoseconds_with_formula(self, cache):
  136. # GH8989
  137. # trunctaing the nanoseconds when a format was provided
  138. for v in ["2012-01-01 09:00:00.000000001",
  139. "2012-01-01 09:00:00.000001",
  140. "2012-01-01 09:00:00.001",
  141. "2012-01-01 09:00:00.001000",
  142. "2012-01-01 09:00:00.001000000", ]:
  143. expected = pd.to_datetime(v, cache=cache)
  144. result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f",
  145. cache=cache)
  146. assert result == expected
  147. @pytest.mark.parametrize('cache', [True, False])
  148. def test_to_datetime_format_weeks(self, cache):
  149. data = [
  150. ['2009324', '%Y%W%w', Timestamp('2009-08-13')],
  151. ['2013020', '%Y%U%w', Timestamp('2013-01-13')]
  152. ]
  153. for s, format, dt in data:
  154. assert to_datetime(s, format=format, cache=cache) == dt
  155. @pytest.mark.parametrize("box,const", [
  156. [True, pd.Index],
  157. [False, np.array]])
  158. @pytest.mark.parametrize("fmt,dates,expected_dates", [
  159. ['%Y-%m-%d %H:%M:%S %Z',
  160. ['2010-01-01 12:00:00 UTC'] * 2,
  161. [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2],
  162. ['%Y-%m-%d %H:%M:%S %Z',
  163. ['2010-01-01 12:00:00 UTC',
  164. '2010-01-01 12:00:00 GMT',
  165. '2010-01-01 12:00:00 US/Pacific'],
  166. [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'),
  167. pd.Timestamp('2010-01-01 12:00:00', tz='GMT'),
  168. pd.Timestamp('2010-01-01 12:00:00', tz='US/Pacific')]],
  169. ['%Y-%m-%d %H:%M:%S%z',
  170. ['2010-01-01 12:00:00+0100'] * 2,
  171. [pd.Timestamp('2010-01-01 12:00:00',
  172. tzinfo=pytz.FixedOffset(60))] * 2],
  173. ['%Y-%m-%d %H:%M:%S %z',
  174. ['2010-01-01 12:00:00 +0100'] * 2,
  175. [pd.Timestamp('2010-01-01 12:00:00',
  176. tzinfo=pytz.FixedOffset(60))] * 2],
  177. ['%Y-%m-%d %H:%M:%S %z',
  178. ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'],
  179. [pd.Timestamp('2010-01-01 12:00:00',
  180. tzinfo=pytz.FixedOffset(60)),
  181. pd.Timestamp('2010-01-01 12:00:00',
  182. tzinfo=pytz.FixedOffset(-60))]],
  183. ['%Y-%m-%d %H:%M:%S %z',
  184. ['2010-01-01 12:00:00 Z', '2010-01-01 12:00:00 Z'],
  185. [pd.Timestamp('2010-01-01 12:00:00',
  186. tzinfo=pytz.FixedOffset(0)), # pytz coerces to UTC
  187. pd.Timestamp('2010-01-01 12:00:00',
  188. tzinfo=pytz.FixedOffset(0))]]])
  189. def test_to_datetime_parse_tzname_or_tzoffset(self, box, const,
  190. fmt, dates, expected_dates):
  191. # GH 13486
  192. result = pd.to_datetime(dates, format=fmt, box=box)
  193. expected = const(expected_dates)
  194. tm.assert_equal(result, expected)
  195. with pytest.raises(ValueError):
  196. pd.to_datetime(dates, format=fmt, box=box, utc=True)
  197. @pytest.mark.parametrize('offset', [
  198. '+0', '-1foo', 'UTCbar', ':10', '+01:000:01', ''])
  199. def test_to_datetime_parse_timezone_malformed(self, offset):
  200. fmt = '%Y-%m-%d %H:%M:%S %z'
  201. date = '2010-01-01 12:00:00 ' + offset
  202. with pytest.raises(ValueError):
  203. pd.to_datetime([date], format=fmt)
  204. def test_to_datetime_parse_timezone_keeps_name(self):
  205. # GH 21697
  206. fmt = '%Y-%m-%d %H:%M:%S %z'
  207. arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo')
  208. result = pd.to_datetime(arg, format=fmt)
  209. expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC',
  210. name='foo')
  211. tm.assert_index_equal(result, expected)
  212. class TestToDatetime(object):
  213. @pytest.mark.parametrize('tz', [None, 'US/Central'])
  214. def test_to_datetime_dtarr(self, tz):
  215. # DatetimeArray
  216. dti = date_range('1965-04-03', periods=19, freq='2W', tz=tz)
  217. arr = DatetimeArray(dti)
  218. result = to_datetime(arr)
  219. assert result is arr
  220. result = to_datetime(arr, box=True)
  221. assert result is arr
  222. def test_to_datetime_pydatetime(self):
  223. actual = pd.to_datetime(datetime(2008, 1, 15))
  224. assert actual == datetime(2008, 1, 15)
  225. def test_to_datetime_YYYYMMDD(self):
  226. actual = pd.to_datetime('20080115')
  227. assert actual == datetime(2008, 1, 15)
  228. def test_to_datetime_unparseable_ignore(self):
  229. # unparseable
  230. s = 'Month 1, 1999'
  231. assert pd.to_datetime(s, errors='ignore') == s
  232. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  233. def test_to_datetime_now(self):
  234. # See GH#18666
  235. with tm.set_timezone('US/Eastern'):
  236. npnow = np.datetime64('now').astype('datetime64[ns]')
  237. pdnow = pd.to_datetime('now')
  238. pdnow2 = pd.to_datetime(['now'])[0]
  239. # These should all be equal with infinite perf; this gives
  240. # a generous margin of 10 seconds
  241. assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10
  242. assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10
  243. assert pdnow.tzinfo is None
  244. assert pdnow2.tzinfo is None
  245. @td.skip_if_windows # `tm.set_timezone` does not work in windows
  246. def test_to_datetime_today(self):
  247. # See GH#18666
  248. # Test with one timezone far ahead of UTC and another far behind, so
  249. # one of these will _almost_ alawys be in a different day from UTC.
  250. # Unfortunately this test between 12 and 1 AM Samoa time
  251. # this both of these timezones _and_ UTC will all be in the same day,
  252. # so this test will not detect the regression introduced in #18666.
  253. with tm.set_timezone('Pacific/Auckland'): # 12-13 hours ahead of UTC
  254. nptoday = np.datetime64('today')\
  255. .astype('datetime64[ns]').astype(np.int64)
  256. pdtoday = pd.to_datetime('today')
  257. pdtoday2 = pd.to_datetime(['today'])[0]
  258. tstoday = pd.Timestamp('today')
  259. tstoday2 = pd.Timestamp.today()
  260. # These should all be equal with infinite perf; this gives
  261. # a generous margin of 10 seconds
  262. assert abs(pdtoday.normalize().value - nptoday) < 1e10
  263. assert abs(pdtoday2.normalize().value - nptoday) < 1e10
  264. assert abs(pdtoday.value - tstoday.value) < 1e10
  265. assert abs(pdtoday.value - tstoday2.value) < 1e10
  266. assert pdtoday.tzinfo is None
  267. assert pdtoday2.tzinfo is None
  268. with tm.set_timezone('US/Samoa'): # 11 hours behind UTC
  269. nptoday = np.datetime64('today')\
  270. .astype('datetime64[ns]').astype(np.int64)
  271. pdtoday = pd.to_datetime('today')
  272. pdtoday2 = pd.to_datetime(['today'])[0]
  273. # These should all be equal with infinite perf; this gives
  274. # a generous margin of 10 seconds
  275. assert abs(pdtoday.normalize().value - nptoday) < 1e10
  276. assert abs(pdtoday2.normalize().value - nptoday) < 1e10
  277. assert pdtoday.tzinfo is None
  278. assert pdtoday2.tzinfo is None
  279. def test_to_datetime_today_now_unicode_bytes(self):
  280. to_datetime([u'now'])
  281. to_datetime([u'today'])
  282. if not PY3:
  283. to_datetime(['now'])
  284. to_datetime(['today'])
  285. @pytest.mark.parametrize('cache', [True, False])
  286. def test_to_datetime_dt64s(self, cache):
  287. in_bound_dts = [
  288. np.datetime64('2000-01-01'),
  289. np.datetime64('2000-01-02'),
  290. ]
  291. for dt in in_bound_dts:
  292. assert pd.to_datetime(dt, cache=cache) == Timestamp(dt)
  293. oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ]
  294. for dt in oob_dts:
  295. pytest.raises(ValueError, pd.to_datetime, dt, errors='raise')
  296. pytest.raises(ValueError, Timestamp, dt)
  297. assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT
  298. @pytest.mark.parametrize('cache', [True, False])
  299. def test_to_datetime_array_of_dt64s(self, cache):
  300. dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ]
  301. # Assuming all datetimes are in bounds, to_datetime() returns
  302. # an array that is equal to Timestamp() parsing
  303. tm.assert_numpy_array_equal(
  304. pd.to_datetime(dts, box=False, cache=cache),
  305. np.array([Timestamp(x).asm8 for x in dts])
  306. )
  307. # A list of datetimes where the last one is out of bounds
  308. dts_with_oob = dts + [np.datetime64('9999-01-01')]
  309. pytest.raises(ValueError, pd.to_datetime, dts_with_oob,
  310. errors='raise')
  311. tm.assert_numpy_array_equal(
  312. pd.to_datetime(dts_with_oob, box=False, errors='coerce',
  313. cache=cache),
  314. np.array(
  315. [
  316. Timestamp(dts_with_oob[0]).asm8,
  317. Timestamp(dts_with_oob[1]).asm8,
  318. tslib.iNaT,
  319. ],
  320. dtype='M8'
  321. )
  322. )
  323. # With errors='ignore', out of bounds datetime64s
  324. # are converted to their .item(), which depending on the version of
  325. # numpy is either a python datetime.datetime or datetime.date
  326. tm.assert_numpy_array_equal(
  327. pd.to_datetime(dts_with_oob, box=False, errors='ignore',
  328. cache=cache),
  329. np.array(
  330. [dt.item() for dt in dts_with_oob],
  331. dtype='O'
  332. )
  333. )
  334. @pytest.mark.parametrize('cache', [True, False])
  335. def test_to_datetime_tz(self, cache):
  336. # xref 8260
  337. # uniform returns a DatetimeIndex
  338. arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
  339. pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]
  340. result = pd.to_datetime(arr, cache=cache)
  341. expected = DatetimeIndex(
  342. ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific')
  343. tm.assert_index_equal(result, expected)
  344. # mixed tzs will raise
  345. arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'),
  346. pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')]
  347. pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache))
  348. @pytest.mark.parametrize('cache', [True, False])
  349. def test_to_datetime_tz_pytz(self, cache):
  350. # see gh-8260
  351. us_eastern = pytz.timezone('US/Eastern')
  352. arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1,
  353. hour=3, minute=0)),
  354. us_eastern.localize(datetime(year=2000, month=6, day=1,
  355. hour=3, minute=0))],
  356. dtype=object)
  357. result = pd.to_datetime(arr, utc=True, cache=cache)
  358. expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
  359. '2000-06-01 07:00:00+00:00'],
  360. dtype='datetime64[ns, UTC]', freq=None)
  361. tm.assert_index_equal(result, expected)
  362. @pytest.mark.parametrize('cache', [True, False])
  363. @pytest.mark.parametrize("init_constructor, end_constructor, test_method",
  364. [(Index, DatetimeIndex, tm.assert_index_equal),
  365. (list, DatetimeIndex, tm.assert_index_equal),
  366. (np.array, DatetimeIndex, tm.assert_index_equal),
  367. (Series, Series, tm.assert_series_equal)])
  368. def test_to_datetime_utc_true(self,
  369. cache,
  370. init_constructor,
  371. end_constructor,
  372. test_method):
  373. # See gh-11934 & gh-6415
  374. data = ['20100102 121314', '20100102 121315']
  375. expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
  376. pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
  377. result = pd.to_datetime(init_constructor(data),
  378. format='%Y%m%d %H%M%S',
  379. utc=True,
  380. cache=cache)
  381. expected = end_constructor(expected_data)
  382. test_method(result, expected)
  383. # Test scalar case as well
  384. for scalar, expected in zip(data, expected_data):
  385. result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True,
  386. cache=cache)
  387. assert result == expected
  388. @pytest.mark.parametrize('cache', [True, False])
  389. def test_to_datetime_utc_true_with_series_single_value(self, cache):
  390. # GH 15760 UTC=True with Series
  391. ts = 1.5e18
  392. result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache)
  393. expected = pd.Series([pd.Timestamp(ts, tz='utc')])
  394. tm.assert_series_equal(result, expected)
  395. @pytest.mark.parametrize('cache', [True, False])
  396. def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
  397. ts = '2013-01-01 00:00:00-01:00'
  398. expected_ts = '2013-01-01 01:00:00'
  399. data = pd.Series([ts] * 3)
  400. result = pd.to_datetime(data, utc=True, cache=cache)
  401. expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3)
  402. tm.assert_series_equal(result, expected)
  403. @pytest.mark.parametrize('cache', [True, False])
  404. @pytest.mark.parametrize('date, dtype',
  405. [('2013-01-01 01:00:00', 'datetime64[ns]'),
  406. ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')])
  407. def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date,
  408. dtype):
  409. expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')])
  410. result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True,
  411. cache=cache)
  412. tm.assert_series_equal(result, expected)
  413. @pytest.mark.parametrize('cache', [True, False])
  414. def test_to_datetime_tz_psycopg2(self, cache):
  415. # xref 8260
  416. try:
  417. import psycopg2
  418. except ImportError:
  419. pytest.skip("no psycopg2 installed")
  420. # misc cases
  421. tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)
  422. tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None)
  423. arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1),
  424. datetime(2000, 6, 1, 3, 0, tzinfo=tz2)],
  425. dtype=object)
  426. result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache)
  427. expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
  428. '2000-06-01 07:00:00+00:00'],
  429. dtype='datetime64[ns, UTC]', freq=None)
  430. tm.assert_index_equal(result, expected)
  431. # dtype coercion
  432. i = pd.DatetimeIndex([
  433. '2000-01-01 08:00:00'
  434. ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None))
  435. assert is_datetime64_ns_dtype(i)
  436. # tz coerceion
  437. result = pd.to_datetime(i, errors='coerce', cache=cache)
  438. tm.assert_index_equal(result, i)
  439. result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache)
  440. expected = pd.DatetimeIndex(['2000-01-01 13:00:00'],
  441. dtype='datetime64[ns, UTC]')
  442. tm.assert_index_equal(result, expected)
  443. @pytest.mark.parametrize(
  444. 'cache',
  445. [pytest.param(True,
  446. marks=pytest.mark.skipif(True, reason="GH 18111")),
  447. False])
  448. def test_datetime_bool(self, cache):
  449. # GH13176
  450. with pytest.raises(TypeError):
  451. to_datetime(False)
  452. assert to_datetime(False, errors="coerce", cache=cache) is NaT
  453. assert to_datetime(False, errors="ignore", cache=cache) is False
  454. with pytest.raises(TypeError):
  455. to_datetime(True)
  456. assert to_datetime(True, errors="coerce", cache=cache) is NaT
  457. assert to_datetime(True, errors="ignore", cache=cache) is True
  458. with pytest.raises(TypeError):
  459. to_datetime([False, datetime.today()], cache=cache)
  460. with pytest.raises(TypeError):
  461. to_datetime(['20130101', True], cache=cache)
  462. tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
  463. errors="coerce", cache=cache),
  464. DatetimeIndex([to_datetime(0, cache=cache),
  465. NaT,
  466. NaT,
  467. to_datetime(0, cache=cache)]))
  468. def test_datetime_invalid_datatype(self):
  469. # GH13176
  470. with pytest.raises(TypeError):
  471. pd.to_datetime(bool)
  472. with pytest.raises(TypeError):
  473. pd.to_datetime(pd.to_datetime)
  474. @pytest.mark.parametrize('value', ["a", "00:01:99"])
  475. @pytest.mark.parametrize('infer', [True, False])
  476. @pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
  477. def test_datetime_invalid_scalar(self, value, format, infer):
  478. # GH24763
  479. res = pd.to_datetime(value, errors='ignore', format=format,
  480. infer_datetime_format=infer)
  481. assert res == value
  482. res = pd.to_datetime(value, errors='coerce', format=format,
  483. infer_datetime_format=infer)
  484. assert res is pd.NaT
  485. with pytest.raises(ValueError):
  486. pd.to_datetime(value, errors='raise', format=format,
  487. infer_datetime_format=infer)
  488. @pytest.mark.parametrize('value', ["3000/12/11 00:00:00"])
  489. @pytest.mark.parametrize('infer', [True, False])
  490. @pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
  491. def test_datetime_outofbounds_scalar(self, value, format, infer):
  492. # GH24763
  493. res = pd.to_datetime(value, errors='ignore', format=format,
  494. infer_datetime_format=infer)
  495. assert res == value
  496. res = pd.to_datetime(value, errors='coerce', format=format,
  497. infer_datetime_format=infer)
  498. assert res is pd.NaT
  499. if format is not None:
  500. with pytest.raises(ValueError):
  501. pd.to_datetime(value, errors='raise', format=format,
  502. infer_datetime_format=infer)
  503. else:
  504. with pytest.raises(OutOfBoundsDatetime):
  505. pd.to_datetime(value, errors='raise', format=format,
  506. infer_datetime_format=infer)
  507. @pytest.mark.parametrize('values', [["a"], ["00:01:99"],
  508. ["a", "b", "99:00:00"]])
  509. @pytest.mark.parametrize('infer', [True, False])
  510. @pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
  511. def test_datetime_invalid_index(self, values, format, infer):
  512. # GH24763
  513. res = pd.to_datetime(values, errors='ignore', format=format,
  514. infer_datetime_format=infer)
  515. tm.assert_index_equal(res, pd.Index(values))
  516. res = pd.to_datetime(values, errors='coerce', format=format,
  517. infer_datetime_format=infer)
  518. tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values)))
  519. with pytest.raises(ValueError):
  520. pd.to_datetime(values, errors='raise', format=format,
  521. infer_datetime_format=infer)
  522. @pytest.mark.parametrize("utc", [True, None])
  523. @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
  524. @pytest.mark.parametrize("box", [True, False])
  525. @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
  526. def test_to_datetime_cache(self, utc, format, box, constructor):
  527. date = '20130101 00:00:00'
  528. test_dates = [date] * 10**5
  529. data = constructor(test_dates)
  530. result = pd.to_datetime(data, utc=utc, format=format, box=box,
  531. cache=True)
  532. expected = pd.to_datetime(data, utc=utc, format=format, box=box,
  533. cache=False)
  534. if box:
  535. tm.assert_index_equal(result, expected)
  536. else:
  537. tm.assert_numpy_array_equal(result, expected)
  538. @pytest.mark.parametrize("utc", [True, None])
  539. @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
  540. def test_to_datetime_cache_series(self, utc, format):
  541. date = '20130101 00:00:00'
  542. test_dates = [date] * 10**5
  543. data = pd.Series(test_dates)
  544. result = pd.to_datetime(data, utc=utc, format=format, cache=True)
  545. expected = pd.to_datetime(data, utc=utc, format=format, cache=False)
  546. tm.assert_series_equal(result, expected)
  547. def test_to_datetime_cache_scalar(self):
  548. date = '20130101 00:00:00'
  549. result = pd.to_datetime(date, cache=True)
  550. expected = pd.Timestamp('20130101 00:00:00')
  551. assert result == expected
  552. @pytest.mark.parametrize('date, format',
  553. [('2017-20', '%Y-%W'),
  554. ('20 Sunday', '%W %A'),
  555. ('20 Sun', '%W %a'),
  556. ('2017-21', '%Y-%U'),
  557. ('20 Sunday', '%U %A'),
  558. ('20 Sun', '%U %a')])
  559. def test_week_without_day_and_calendar_year(self, date, format):
  560. # GH16774
  561. msg = "Cannot use '%W' or '%U' without day and year"
  562. with pytest.raises(ValueError, match=msg):
  563. pd.to_datetime(date, format=format)
  564. def test_iso_8601_strings_with_same_offset(self):
  565. # GH 17697, 11736
  566. ts_str = "2015-11-18 15:30:00+05:30"
  567. result = to_datetime(ts_str)
  568. expected = Timestamp(ts_str)
  569. assert result == expected
  570. expected = DatetimeIndex([Timestamp(ts_str)] * 2)
  571. result = to_datetime([ts_str] * 2)
  572. tm.assert_index_equal(result, expected)
  573. result = DatetimeIndex([ts_str] * 2)
  574. tm.assert_index_equal(result, expected)
  575. def test_iso_8601_strings_same_offset_no_box(self):
  576. # GH 22446
  577. data = ['2018-01-04 09:01:00+09:00', '2018-01-04 09:02:00+09:00']
  578. result = pd.to_datetime(data, box=False)
  579. expected = np.array([
  580. datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
  581. datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540))
  582. ],
  583. dtype=object)
  584. tm.assert_numpy_array_equal(result, expected)
  585. def test_iso_8601_strings_with_different_offsets(self):
  586. # GH 17697, 11736
  587. ts_strings = ["2015-11-18 15:30:00+05:30",
  588. "2015-11-18 16:30:00+06:30",
  589. NaT]
  590. result = to_datetime(ts_strings)
  591. expected = np.array([datetime(2015, 11, 18, 15, 30,
  592. tzinfo=tzoffset(None, 19800)),
  593. datetime(2015, 11, 18, 16, 30,
  594. tzinfo=tzoffset(None, 23400)),
  595. NaT],
  596. dtype=object)
  597. # GH 21864
  598. expected = Index(expected)
  599. tm.assert_index_equal(result, expected)
  600. result = to_datetime(ts_strings, utc=True)
  601. expected = DatetimeIndex([Timestamp(2015, 11, 18, 10),
  602. Timestamp(2015, 11, 18, 10),
  603. NaT], tz='UTC')
  604. tm.assert_index_equal(result, expected)
  605. def test_non_iso_strings_with_tz_offset(self):
  606. result = to_datetime(['March 1, 2018 12:00:00+0400'] * 2)
  607. expected = DatetimeIndex([datetime(2018, 3, 1, 12,
  608. tzinfo=pytz.FixedOffset(240))] * 2)
  609. tm.assert_index_equal(result, expected)
  610. @pytest.mark.parametrize('ts, expected', [
  611. (Timestamp('2018-01-01'),
  612. Timestamp('2018-01-01', tz='UTC')),
  613. (Timestamp('2018-01-01', tz='US/Pacific'),
  614. Timestamp('2018-01-01 08:00', tz='UTC'))])
  615. def test_timestamp_utc_true(self, ts, expected):
  616. # GH 24415
  617. result = to_datetime(ts, utc=True)
  618. assert result == expected
  619. class TestToDatetimeUnit(object):
  620. @pytest.mark.parametrize('cache', [True, False])
  621. def test_unit(self, cache):
  622. # GH 11758
  623. # test proper behavior with erros
  624. with pytest.raises(ValueError):
  625. to_datetime([1], unit='D', format='%Y%m%d', cache=cache)
  626. values = [11111111, 1, 1.0, iNaT, NaT, np.nan,
  627. 'NaT', '']
  628. result = to_datetime(values, unit='D', errors='ignore', cache=cache)
  629. expected = Index([11111111, Timestamp('1970-01-02'),
  630. Timestamp('1970-01-02'), NaT,
  631. NaT, NaT, NaT, NaT],
  632. dtype=object)
  633. tm.assert_index_equal(result, expected)
  634. result = to_datetime(values, unit='D', errors='coerce', cache=cache)
  635. expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
  636. 'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
  637. tm.assert_index_equal(result, expected)
  638. with pytest.raises(tslib.OutOfBoundsDatetime):
  639. to_datetime(values, unit='D', errors='raise', cache=cache)
  640. values = [1420043460000, iNaT, NaT, np.nan, 'NaT']
  641. result = to_datetime(values, errors='ignore', unit='s', cache=cache)
  642. expected = Index([1420043460000, NaT, NaT,
  643. NaT, NaT], dtype=object)
  644. tm.assert_index_equal(result, expected)
  645. result = to_datetime(values, errors='coerce', unit='s', cache=cache)
  646. expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
  647. tm.assert_index_equal(result, expected)
  648. with pytest.raises(tslib.OutOfBoundsDatetime):
  649. to_datetime(values, errors='raise', unit='s', cache=cache)
  650. # if we have a string, then we raise a ValueError
  651. # and NOT an OutOfBoundsDatetime
  652. for val in ['foo', Timestamp('20130101')]:
  653. try:
  654. to_datetime(val, errors='raise', unit='s', cache=cache)
  655. except tslib.OutOfBoundsDatetime:
  656. raise AssertionError("incorrect exception raised")
  657. except ValueError:
  658. pass
  659. @pytest.mark.parametrize('cache', [True, False])
  660. def test_unit_consistency(self, cache):
  661. # consistency of conversions
  662. expected = Timestamp('1970-05-09 14:25:11')
  663. result = pd.to_datetime(11111111, unit='s', errors='raise',
  664. cache=cache)
  665. assert result == expected
  666. assert isinstance(result, Timestamp)
  667. result = pd.to_datetime(11111111, unit='s', errors='coerce',
  668. cache=cache)
  669. assert result == expected
  670. assert isinstance(result, Timestamp)
  671. result = pd.to_datetime(11111111, unit='s', errors='ignore',
  672. cache=cache)
  673. assert result == expected
  674. assert isinstance(result, Timestamp)
  675. @pytest.mark.parametrize('cache', [True, False])
  676. def test_unit_with_numeric(self, cache):
  677. # GH 13180
  678. # coercions from floats/ints are ok
  679. expected = DatetimeIndex(['2015-06-19 05:33:20',
  680. '2015-05-27 22:33:20'])
  681. arr1 = [1.434692e+18, 1.432766e+18]
  682. arr2 = np.array(arr1).astype('int64')
  683. for errors in ['ignore', 'raise', 'coerce']:
  684. result = pd.to_datetime(arr1, errors=errors, cache=cache)
  685. tm.assert_index_equal(result, expected)
  686. result = pd.to_datetime(arr2, errors=errors, cache=cache)
  687. tm.assert_index_equal(result, expected)
  688. # but we want to make sure that we are coercing
  689. # if we have ints/strings
  690. expected = DatetimeIndex(['NaT',
  691. '2015-06-19 05:33:20',
  692. '2015-05-27 22:33:20'])
  693. arr = ['foo', 1.434692e+18, 1.432766e+18]
  694. result = pd.to_datetime(arr, errors='coerce', cache=cache)
  695. tm.assert_index_equal(result, expected)
  696. expected = DatetimeIndex(['2015-06-19 05:33:20',
  697. '2015-05-27 22:33:20',
  698. 'NaT',
  699. 'NaT'])
  700. arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
  701. result = pd.to_datetime(arr, errors='coerce', cache=cache)
  702. tm.assert_index_equal(result, expected)
  703. @pytest.mark.parametrize('cache', [True, False])
  704. def test_unit_mixed(self, cache):
  705. # mixed integers/datetimes
  706. expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
  707. arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
  708. result = pd.to_datetime(arr, errors='coerce', cache=cache)
  709. tm.assert_index_equal(result, expected)
  710. with pytest.raises(ValueError):
  711. pd.to_datetime(arr, errors='raise', cache=cache)
  712. expected = DatetimeIndex(['NaT',
  713. 'NaT',
  714. '2013-01-01'])
  715. arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
  716. result = pd.to_datetime(arr, errors='coerce', cache=cache)
  717. tm.assert_index_equal(result, expected)
  718. with pytest.raises(ValueError):
  719. pd.to_datetime(arr, errors='raise', cache=cache)
  720. @pytest.mark.parametrize('cache', [True, False])
  721. def test_unit_rounding(self, cache):
  722. # GH 14156: argument will incur floating point errors but no
  723. # premature rounding
  724. result = pd.to_datetime(1434743731.8770001, unit='s', cache=cache)
  725. expected = pd.Timestamp('2015-06-19 19:55:31.877000093')
  726. assert result == expected
  727. @pytest.mark.parametrize('cache', [True, False])
  728. def test_unit_ignore_keeps_name(self, cache):
  729. # GH 21697
  730. expected = pd.Index([15e9] * 2, name='name')
  731. result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
  732. cache=cache)
  733. tm.assert_index_equal(result, expected)
  734. @pytest.mark.parametrize('cache', [True, False])
  735. def test_dataframe(self, cache):
  736. df = DataFrame({'year': [2015, 2016],
  737. 'month': [2, 3],
  738. 'day': [4, 5],
  739. 'hour': [6, 7],
  740. 'minute': [58, 59],
  741. 'second': [10, 11],
  742. 'ms': [1, 1],
  743. 'us': [2, 2],
  744. 'ns': [3, 3]})
  745. result = to_datetime({'year': df['year'],
  746. 'month': df['month'],
  747. 'day': df['day']}, cache=cache)
  748. expected = Series([Timestamp('20150204 00:00:00'),
  749. Timestamp('20160305 00:0:00')])
  750. assert_series_equal(result, expected)
  751. # dict-like
  752. result = to_datetime(df[['year', 'month', 'day']].to_dict(),
  753. cache=cache)
  754. assert_series_equal(result, expected)
  755. # dict but with constructable
  756. df2 = df[['year', 'month', 'day']].to_dict()
  757. df2['month'] = 2
  758. result = to_datetime(df2, cache=cache)
  759. expected2 = Series([Timestamp('20150204 00:00:00'),
  760. Timestamp('20160205 00:0:00')])
  761. assert_series_equal(result, expected2)
  762. # unit mappings
  763. units = [{'year': 'years',
  764. 'month': 'months',
  765. 'day': 'days',
  766. 'hour': 'hours',
  767. 'minute': 'minutes',
  768. 'second': 'seconds'},
  769. {'year': 'year',
  770. 'month': 'month',
  771. 'day': 'day',
  772. 'hour': 'hour',
  773. 'minute': 'minute',
  774. 'second': 'second'},
  775. ]
  776. for d in units:
  777. result = to_datetime(df[list(d.keys())].rename(columns=d),
  778. cache=cache)
  779. expected = Series([Timestamp('20150204 06:58:10'),
  780. Timestamp('20160305 07:59:11')])
  781. assert_series_equal(result, expected)
  782. d = {'year': 'year',
  783. 'month': 'month',
  784. 'day': 'day',
  785. 'hour': 'hour',
  786. 'minute': 'minute',
  787. 'second': 'second',
  788. 'ms': 'ms',
  789. 'us': 'us',
  790. 'ns': 'ns'}
  791. result = to_datetime(df.rename(columns=d), cache=cache)
  792. expected = Series([Timestamp('20150204 06:58:10.001002003'),
  793. Timestamp('20160305 07:59:11.001002003')])
  794. assert_series_equal(result, expected)
  795. # coerce back to int
  796. result = to_datetime(df.astype(str), cache=cache)
  797. assert_series_equal(result, expected)
  798. # passing coerce
  799. df2 = DataFrame({'year': [2015, 2016],
  800. 'month': [2, 20],
  801. 'day': [4, 5]})
  802. msg = ("cannot assemble the datetimes: time data .+ does not "
  803. r"match format '%Y%m%d' \(match\)")
  804. with pytest.raises(ValueError, match=msg):
  805. to_datetime(df2, cache=cache)
  806. result = to_datetime(df2, errors='coerce', cache=cache)
  807. expected = Series([Timestamp('20150204 00:00:00'),
  808. NaT])
  809. assert_series_equal(result, expected)
  810. # extra columns
  811. msg = ("extra keys have been passed to the datetime assemblage: "
  812. r"\[foo\]")
  813. with pytest.raises(ValueError, match=msg):
  814. df2 = df.copy()
  815. df2['foo'] = 1
  816. to_datetime(df2, cache=cache)
  817. # not enough
  818. msg = (r'to assemble mappings requires at least that \[year, month, '
  819. r'day\] be specified: \[.+\] is missing')
  820. for c in [['year'],
  821. ['year', 'month'],
  822. ['year', 'month', 'second'],
  823. ['month', 'day'],
  824. ['year', 'day', 'second']]:
  825. with pytest.raises(ValueError, match=msg):
  826. to_datetime(df[c], cache=cache)
  827. # duplicates
  828. msg = 'cannot assemble with duplicate keys'
  829. df2 = DataFrame({'year': [2015, 2016],
  830. 'month': [2, 20],
  831. 'day': [4, 5]})
  832. df2.columns = ['year', 'year', 'day']
  833. with pytest.raises(ValueError, match=msg):
  834. to_datetime(df2, cache=cache)
  835. df2 = DataFrame({'year': [2015, 2016],
  836. 'month': [2, 20],
  837. 'day': [4, 5],
  838. 'hour': [4, 5]})
  839. df2.columns = ['year', 'month', 'day', 'day']
  840. with pytest.raises(ValueError, match=msg):
  841. to_datetime(df2, cache=cache)
  842. @pytest.mark.parametrize('cache', [True, False])
  843. def test_dataframe_dtypes(self, cache):
  844. # #13451
  845. df = DataFrame({'year': [2015, 2016],
  846. 'month': [2, 3],
  847. 'day': [4, 5]})
  848. # int16
  849. result = to_datetime(df.astype('int16'), cache=cache)
  850. expected = Series([Timestamp('20150204 00:00:00'),
  851. Timestamp('20160305 00:00:00')])
  852. assert_series_equal(result, expected)
  853. # mixed dtypes
  854. df['month'] = df['month'].astype('int8')
  855. df['day'] = df['day'].astype('int8')
  856. result = to_datetime(df, cache=cache)
  857. expected = Series([Timestamp('20150204 00:00:00'),
  858. Timestamp('20160305 00:00:00')])
  859. assert_series_equal(result, expected)
  860. # float
  861. df = DataFrame({'year': [2000, 2001],
  862. 'month': [1.5, 1],
  863. 'day': [1, 1]})
  864. with pytest.raises(ValueError):
  865. to_datetime(df, cache=cache)
  866. def test_dataframe_box_false(self):
  867. # GH 23760
  868. df = pd.DataFrame({'year': [2015, 2016],
  869. 'month': [2, 3],
  870. 'day': [4, 5]})
  871. result = pd.to_datetime(df, box=False)
  872. expected = np.array(['2015-02-04', '2016-03-05'],
  873. dtype='datetime64[ns]')
  874. tm.assert_numpy_array_equal(result, expected)
  875. def test_dataframe_utc_true(self):
  876. # GH 23760
  877. df = pd.DataFrame({'year': [2015, 2016],
  878. 'month': [2, 3],
  879. 'day': [4, 5]})
  880. result = pd.to_datetime(df, utc=True)
  881. expected = pd.Series(np.array(['2015-02-04', '2016-03-05'],
  882. dtype='datetime64[ns]')).dt.tz_localize('UTC')
  883. tm.assert_series_equal(result, expected)
  884. def test_to_datetime_errors_ignore_utc_true(self):
  885. # GH 23758
  886. result = pd.to_datetime([1], unit='s', box=True, utc=True,
  887. errors='ignore')
  888. expected = DatetimeIndex(['1970-01-01 00:00:01'], tz='UTC')
  889. tm.assert_index_equal(result, expected)
  890. class TestToDatetimeMisc(object):
  891. def test_to_datetime_barely_out_of_bounds(self):
  892. # GH#19529
  893. # GH#19382 close enough to bounds that dropping nanos would result
  894. # in an in-bounds datetime
  895. arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
  896. with pytest.raises(OutOfBoundsDatetime):
  897. to_datetime(arr)
  898. @pytest.mark.parametrize('cache', [True, False])
  899. def test_to_datetime_iso8601(self, cache):
  900. result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
  901. exp = Timestamp("2012-01-01 00:00:00")
  902. assert result[0] == exp
  903. result = to_datetime(['20121001'], cache=cache) # bad iso 8601
  904. exp = Timestamp('2012-10-01')
  905. assert result[0] == exp
  906. @pytest.mark.parametrize('cache', [True, False])
  907. def test_to_datetime_default(self, cache):
  908. rs = to_datetime('2001', cache=cache)
  909. xp = datetime(2001, 1, 1)
  910. assert rs == xp
  911. # dayfirst is essentially broken
  912. # to_datetime('01-13-2012', dayfirst=True)
  913. # pytest.raises(ValueError, to_datetime('01-13-2012',
  914. # dayfirst=True))
  915. @pytest.mark.parametrize('cache', [True, False])
  916. def test_to_datetime_on_datetime64_series(self, cache):
  917. # #2699
  918. s = Series(date_range('1/1/2000', periods=10))
  919. result = to_datetime(s, cache=cache)
  920. assert result[0] == s[0]
  921. @pytest.mark.parametrize('cache', [True, False])
  922. def test_to_datetime_with_space_in_series(self, cache):
  923. # GH 6428
  924. s = Series(['10/18/2006', '10/18/2008', ' '])
  925. pytest.raises(ValueError, lambda: to_datetime(s,
  926. errors='raise',
  927. cache=cache))
  928. result_coerce = to_datetime(s, errors='coerce', cache=cache)
  929. expected_coerce = Series([datetime(2006, 10, 18),
  930. datetime(2008, 10, 18),
  931. NaT])
  932. tm.assert_series_equal(result_coerce, expected_coerce)
  933. result_ignore = to_datetime(s, errors='ignore', cache=cache)
  934. tm.assert_series_equal(result_ignore, s)
  935. @td.skip_if_has_locale
  936. @pytest.mark.parametrize('cache', [True, False])
  937. def test_to_datetime_with_apply(self, cache):
  938. # this is only locale tested with US/None locales
  939. # GH 5195
  940. # with a format and coerce a single item to_datetime fails
  941. td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3])
  942. expected = pd.to_datetime(td, format='%b %y', cache=cache)
  943. result = td.apply(pd.to_datetime, format='%b %y', cache=cache)
  944. assert_series_equal(result, expected)
  945. td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3])
  946. pytest.raises(ValueError,
  947. lambda: pd.to_datetime(td, format='%b %y',
  948. errors='raise',
  949. cache=cache))
  950. pytest.raises(ValueError,
  951. lambda: td.apply(pd.to_datetime, format='%b %y',
  952. errors='raise', cache=cache))
  953. expected = pd.to_datetime(td, format='%b %y', errors='coerce',
  954. cache=cache)
  955. result = td.apply(
  956. lambda x: pd.to_datetime(x, format='%b %y', errors='coerce',
  957. cache=cache))
  958. assert_series_equal(result, expected)
  959. @pytest.mark.parametrize('cache', [True, False])
  960. def test_to_datetime_types(self, cache):
  961. # empty string
  962. result = to_datetime('', cache=cache)
  963. assert result is NaT
  964. result = to_datetime(['', ''], cache=cache)
  965. assert isna(result).all()
  966. # ints
  967. result = Timestamp(0)
  968. expected = to_datetime(0, cache=cache)
  969. assert result == expected
  970. # GH 3888 (strings)
  971. expected = to_datetime(['2012'], cache=cache)[0]
  972. result = to_datetime('2012', cache=cache)
  973. assert result == expected
  974. # array = ['2012','20120101','20120101 12:01:01']
  975. array = ['20120101', '20120101 12:01:01']
  976. expected = list(to_datetime(array, cache=cache))
  977. result = lmap(Timestamp, array)
  978. tm.assert_almost_equal(result, expected)
  979. # currently fails ###
  980. # result = Timestamp('2012')
  981. # expected = to_datetime('2012')
  982. # assert result == expected
  983. @pytest.mark.parametrize('cache', [True, False])
  984. @pytest.mark.parametrize('box, klass', [
  985. [True, Index],
  986. [False, np.array]
  987. ])
  988. def test_to_datetime_unprocessable_input(self, cache, box, klass):
  989. # GH 4928
  990. # GH 21864
  991. result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box)
  992. expected = klass(np.array([1, '1'], dtype='O'))
  993. tm.assert_equal(result, expected)
  994. pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise',
  995. cache=cache, box=box)
  996. def test_to_datetime_other_datetime64_units(self):
  997. # 5/25/2012
  998. scalar = np.int64(1337904000000000).view('M8[us]')
  999. as_obj = scalar.astype('O')
  1000. index = DatetimeIndex([scalar])
  1001. assert index[0] == scalar.astype('O')
  1002. value = Timestamp(scalar)
  1003. assert value == as_obj
  1004. def test_to_datetime_list_of_integers(self):
  1005. rng = date_range('1/1/2000', periods=20)
  1006. rng = DatetimeIndex(rng.values)
  1007. ints = list(rng.asi8)
  1008. result = DatetimeIndex(ints)
  1009. tm.assert_index_equal(rng, result)
  1010. def test_to_datetime_overflow(self):
  1011. # gh-17637
  1012. # we are overflowing Timedelta range here
  1013. with pytest.raises(OverflowError):
  1014. date_range(start='1/1/1700', freq='B', periods=100000)
  1015. @pytest.mark.parametrize('cache', [True, False])
  1016. def test_string_na_nat_conversion(self, cache):
  1017. # GH #999, #858
  1018. from pandas.compat import parse_date
  1019. strings = np.array(['1/1/2000', '1/2/2000', np.nan,
  1020. '1/4/2000, 12:34:56'], dtype=object)
  1021. expected = np.empty(4, dtype='M8[ns]')
  1022. for i, val in enumerate(strings):
  1023. if isna(val):
  1024. expected[i] = iNaT
  1025. else:
  1026. expected[i] = parse_date(val)
  1027. result = tslib.array_to_datetime(strings)[0]
  1028. tm.assert_almost_equal(result, expected)
  1029. result2 = to_datetime(strings, cache=cache)
  1030. assert isinstance(result2, DatetimeIndex)
  1031. tm.assert_numpy_array_equal(result, result2.values)
  1032. malformed = np.array(['1/100/2000', np.nan], dtype=object)
  1033. # GH 10636, default is now 'raise'
  1034. pytest.raises(ValueError,
  1035. lambda: to_datetime(malformed, errors='raise',
  1036. cache=cache))
  1037. result = to_datetime(malformed, errors='ignore', cache=cache)
  1038. # GH 21864
  1039. expected = Index(malformed)
  1040. tm.assert_index_equal(result, expected)
  1041. pytest.raises(ValueError, to_datetime, malformed, errors='raise',
  1042. cache=cache)
  1043. idx = ['a', 'b', 'c', 'd', 'e']
  1044. series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
  1045. '1/5/2000'], index=idx, name='foo')
  1046. dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan,
  1047. to_datetime('1/3/2000', cache=cache), np.nan,
  1048. to_datetime('1/5/2000', cache=cache)],
  1049. index=idx, name='foo')
  1050. result = to_datetime(series, cache=cache)
  1051. dresult = to_datetime(dseries, cache=cache)
  1052. expected = Series(np.empty(5, dtype='M8[ns]'), index=idx)
  1053. for i in range(5):
  1054. x = series[i]
  1055. if isna(x):
  1056. expected[i] = iNaT
  1057. else:
  1058. expected[i] = to_datetime(x, cache=cache)
  1059. assert_series_equal(result, expected, check_names=False)
  1060. assert result.name == 'foo'
  1061. assert_series_equal(dresult, expected, check_names=False)
  1062. assert dresult.name == 'foo'
  1063. @pytest.mark.parametrize('dtype', [
  1064. 'datetime64[h]', 'datetime64[m]',
  1065. 'datetime64[s]', 'datetime64[ms]',
  1066. 'datetime64[us]', 'datetime64[ns]'])
  1067. @pytest.mark.parametrize('cache', [True, False])
  1068. def test_dti_constructor_numpy_timeunits(self, cache, dtype):
  1069. # GH 9114
  1070. base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'],
  1071. cache=cache)
  1072. values = base.values.astype(dtype)
  1073. tm.assert_index_equal(DatetimeIndex(values), base)
  1074. tm.assert_index_equal(to_datetime(values, cache=cache), base)
  1075. @pytest.mark.parametrize('cache', [True, False])
  1076. def test_dayfirst(self, cache):
  1077. # GH 5917
  1078. arr = ['10/02/2014', '11/02/2014', '12/02/2014']
  1079. expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11),
  1080. datetime(2014, 2, 12)])
  1081. idx1 = DatetimeIndex(arr, dayfirst=True)
  1082. idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
  1083. idx3 = to_datetime(arr, dayfirst=True, cache=cache)
  1084. idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache)
  1085. idx5 = DatetimeIndex(Index(arr), dayfirst=True)
  1086. idx6 = DatetimeIndex(Series(arr), dayfirst=True)
  1087. tm.assert_index_equal(expected, idx1)
  1088. tm.assert_index_equal(expected, idx2)
  1089. tm.assert_index_equal(expected, idx3)
  1090. tm.assert_index_equal(expected, idx4)
  1091. tm.assert_index_equal(expected, idx5)
  1092. tm.assert_index_equal(expected, idx6)
  1093. class TestGuessDatetimeFormat(object):
  1094. @td.skip_if_not_us_locale
  1095. def test_guess_datetime_format_for_array(self):
  1096. expected_format = '%Y-%m-%d %H:%M:%S.%f'
  1097. dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format)
  1098. test_arrays = [
  1099. np.array([dt_string, dt_string, dt_string], dtype='O'),
  1100. np.array([np.nan, np.nan, dt_string], dtype='O'),
  1101. np.array([dt_string, 'random_string'], dtype='O'),
  1102. ]
  1103. for test_array in test_arrays:
  1104. assert tools._guess_datetime_format_for_array(
  1105. test_array) == expected_format
  1106. format_for_string_of_nans = tools._guess_datetime_format_for_array(
  1107. np.array(
  1108. [np.nan, np.nan, np.nan], dtype='O'))
  1109. assert format_for_string_of_nans is None
  1110. class TestToDatetimeInferFormat(object):
  1111. @pytest.mark.parametrize('cache', [True, False])
  1112. def test_to_datetime_infer_datetime_format_consistent_format(self, cache):
  1113. s = pd.Series(pd.date_range('20000101', periods=50, freq='H'))
  1114. test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f',
  1115. '%Y-%m-%dT%H:%M:%S.%f']
  1116. for test_format in test_formats:
  1117. s_as_dt_strings = s.apply(lambda x: x.strftime(test_format))
  1118. with_format = pd.to_datetime(s_as_dt_strings, format=test_format,
  1119. cache=cache)
  1120. no_infer = pd.to_datetime(s_as_dt_strings,
  1121. infer_datetime_format=False,
  1122. cache=cache)
  1123. yes_infer = pd.to_datetime(s_as_dt_strings,
  1124. infer_datetime_format=True,
  1125. cache=cache)
  1126. # Whether the format is explicitly passed, it is inferred, or
  1127. # it is not inferred, the results should all be the same
  1128. tm.assert_series_equal(with_format, no_infer)
  1129. tm.assert_series_equal(no_infer, yes_infer)
  1130. @pytest.mark.parametrize('cache', [True, False])
  1131. def test_to_datetime_infer_datetime_format_inconsistent_format(self,
  1132. cache):
  1133. s = pd.Series(np.array(['01/01/2011 00:00:00',
  1134. '01-02-2011 00:00:00',
  1135. '2011-01-03T00:00:00']))
  1136. # When the format is inconsistent, infer_datetime_format should just
  1137. # fallback to the default parsing
  1138. tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
  1139. cache=cache),
  1140. pd.to_datetime(s, infer_datetime_format=True,
  1141. cache=cache))
  1142. s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011']))
  1143. tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
  1144. cache=cache),
  1145. pd.to_datetime(s, infer_datetime_format=True,
  1146. cache=cache))
  1147. @pytest.mark.parametrize('cache', [True, False])
  1148. def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
  1149. s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan,
  1150. '01/03/2011 00:00:00', np.nan]))
  1151. tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
  1152. cache=cache),
  1153. pd.to_datetime(s, infer_datetime_format=True,
  1154. cache=cache))
  1155. @pytest.mark.parametrize('cache', [True, False])
  1156. def test_to_datetime_infer_datetime_format_series_start_with_nans(self,
  1157. cache):
  1158. s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00',
  1159. '01/02/2011 00:00:00', '01/03/2011 00:00:00']))
  1160. tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
  1161. cache=cache),
  1162. pd.to_datetime(s, infer_datetime_format=True,
  1163. cache=cache))
  1164. @pytest.mark.parametrize('cache', [True, False])
  1165. def test_to_datetime_iso8601_noleading_0s(self, cache):
  1166. # GH 11871
  1167. s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3'])
  1168. expected = pd.Series([pd.Timestamp('2014-01-01'),
  1169. pd.Timestamp('2014-02-02'),
  1170. pd.Timestamp('2015-03-03')])
  1171. tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected)
  1172. tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d',
  1173. cache=cache), expected)
  1174. class TestDaysInMonth(object):
  1175. # tests for issue #10154
  1176. @pytest.mark.parametrize('cache', [True, False])
  1177. def test_day_not_in_month_coerce(self, cache):
  1178. assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache))
  1179. assert isna(to_datetime('2015-02-29', format="%Y-%m-%d",
  1180. errors='coerce', cache=cache))
  1181. assert isna(to_datetime('2015-02-32', format="%Y-%m-%d",
  1182. errors='coerce', cache=cache))
  1183. assert isna(to_datetime('2015-04-31', format="%Y-%m-%d",
  1184. errors='coerce', cache=cache))
  1185. @pytest.mark.parametrize('cache', [True, False])
  1186. def test_day_not_in_month_raise(self, cache):
  1187. pytest.raises(ValueError, to_datetime, '2015-02-29',
  1188. errors='raise', cache=cache)
  1189. pytest.raises(ValueError, to_datetime, '2015-02-29',
  1190. errors='raise', format="%Y-%m-%d", cache=cache)
  1191. pytest.raises(ValueError, to_datetime, '2015-02-32',
  1192. errors='raise', format="%Y-%m-%d", cache=cache)
  1193. pytest.raises(ValueError, to_datetime, '2015-04-31',
  1194. errors='raise', format="%Y-%m-%d", cache=cache)
  1195. @pytest.mark.parametrize('cache', [True, False])
  1196. def test_day_not_in_month_ignore(self, cache):
  1197. assert to_datetime('2015-02-29', errors='ignore',
  1198. cache=cache) == '2015-02-29'
  1199. assert to_datetime('2015-02-29', errors='ignore',
  1200. format="%Y-%m-%d", cache=cache) == '2015-02-29'
  1201. assert to_datetime('2015-02-32', errors='ignore',
  1202. format="%Y-%m-%d", cache=cache) == '2015-02-32'
  1203. assert to_datetime('2015-04-31', errors='ignore',
  1204. format="%Y-%m-%d", cache=cache) == '2015-04-31'
  1205. class TestDatetimeParsingWrappers(object):
  1206. @pytest.mark.parametrize('date_str,expected', list({
  1207. '2011-01-01': datetime(2011, 1, 1),
  1208. '2Q2005': datetime(2005, 4, 1),
  1209. '2Q05': datetime(2005, 4, 1),
  1210. '2005Q1': datetime(2005, 1, 1),
  1211. '05Q1': datetime(2005, 1, 1),
  1212. '2011Q3': datetime(2011, 7, 1),
  1213. '11Q3': datetime(2011, 7, 1),
  1214. '3Q2011': datetime(2011, 7, 1),
  1215. '3Q11': datetime(2011, 7, 1),
  1216. # quarterly without space
  1217. '2000Q4': datetime(2000, 10, 1),
  1218. '00Q4': datetime(2000, 10, 1),
  1219. '4Q2000': datetime(2000, 10, 1),
  1220. '4Q00': datetime(2000, 10, 1),
  1221. '2000q4': datetime(2000, 10, 1),
  1222. '2000-Q4': datetime(2000, 10, 1),
  1223. '00-Q4': datetime(2000, 10, 1),
  1224. '4Q-2000': datetime(2000, 10, 1),
  1225. '4Q-00': datetime(2000, 10, 1),
  1226. '00q4': datetime(2000, 10, 1),
  1227. '2005': datetime(2005, 1, 1),
  1228. '2005-11': datetime(2005, 11, 1),
  1229. '2005 11': datetime(2005, 11, 1),
  1230. '11-2005': datetime(2005, 11, 1),
  1231. '11 2005': datetime(2005, 11, 1),
  1232. '200511': datetime(2020, 5, 11),
  1233. '20051109': datetime(2005, 11, 9),
  1234. '20051109 10:15': datetime(2005, 11, 9, 10, 15),
  1235. '20051109 08H': datetime(2005, 11, 9, 8, 0),
  1236. '2005-11-09 10:15': datetime(2005, 11, 9, 10, 15),
  1237. '2005-11-09 08H': datetime(2005, 11, 9, 8, 0),
  1238. '2005/11/09 10:15': datetime(2005, 11, 9, 10, 15),
  1239. '2005/11/09 08H': datetime(2005, 11, 9, 8, 0),
  1240. "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, 36, 28),
  1241. "Thu Sep 25 2003": datetime(2003, 9, 25),
  1242. "Sep 25 2003": datetime(2003, 9, 25),
  1243. "January 1 2014": datetime(2014, 1, 1),
  1244. # GHE10537
  1245. '2014-06': datetime(2014, 6, 1),
  1246. '06-2014': datetime(2014, 6, 1),
  1247. '2014-6': datetime(2014, 6, 1),
  1248. '6-2014': datetime(2014, 6, 1),
  1249. '20010101 12': datetime(2001, 1, 1, 12),
  1250. '20010101 1234': datetime(2001, 1, 1, 12, 34),
  1251. '20010101 123456': datetime(2001, 1, 1, 12, 34, 56)}.items()))
  1252. @pytest.mark.parametrize('cache', [True, False])
  1253. def test_parsers(self, date_str, expected, cache):
  1254. # dateutil >= 2.5.0 defaults to yearfirst=True
  1255. # https://github.com/dateutil/dateutil/issues/217
  1256. yearfirst = True
  1257. result1, _, _ = parsing.parse_time_string(date_str,
  1258. yearfirst=yearfirst)
  1259. result2 = to_datetime(date_str, yearfirst=yearfirst)
  1260. result3 = to_datetime([date_str], yearfirst=yearfirst)
  1261. # result5 is used below
  1262. result4 = to_datetime(np.array([date_str], dtype=object),
  1263. yearfirst=yearfirst, cache=cache)
  1264. result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
  1265. # result7 is used below
  1266. result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
  1267. result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst)
  1268. for res in [result1, result2]:
  1269. assert res == expected
  1270. for res in [result3, result4, result6, result8, result9]:
  1271. exp = DatetimeIndex([pd.Timestamp(expected)])
  1272. tm.assert_index_equal(res, exp)
  1273. # these really need to have yearfirst, but we don't support
  1274. if not yearfirst:
  1275. result5 = Timestamp(date_str)
  1276. assert result5 == expected
  1277. result7 = date_range(date_str, freq='S', periods=1,
  1278. yearfirst=yearfirst)
  1279. assert result7 == expected
  1280. def test_parsers_nat(self):
  1281. # Test that each of several string-accepting methods return pd.NaT
  1282. result1, _, _ = parsing.parse_time_string('NaT')
  1283. result2 = to_datetime('NaT')
  1284. result3 = Timestamp('NaT')
  1285. result4 = DatetimeIndex(['NaT'])[0]
  1286. assert result1 is NaT
  1287. assert result2 is NaT
  1288. assert result3 is NaT
  1289. assert result4 is NaT
  1290. @pytest.mark.parametrize('cache', [True, False])
  1291. def test_parsers_dayfirst_yearfirst(self, cache):
  1292. # OK
  1293. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  1294. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
  1295. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
  1296. # OK
  1297. # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  1298. # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  1299. # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
  1300. # bug fix in 2.5.2
  1301. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00
  1302. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  1303. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
  1304. # OK
  1305. # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  1306. # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  1307. # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
  1308. # OK
  1309. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  1310. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  1311. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
  1312. # OK
  1313. # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  1314. # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  1315. # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
  1316. # revert of bug in 2.5.2
  1317. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  1318. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12
  1319. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
  1320. # OK
  1321. # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  1322. # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  1323. # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
  1324. is_lt_253 = LooseVersion(dateutil.__version__) < LooseVersion('2.5.3')
  1325. # str : dayfirst, yearfirst, expected
  1326. cases = {'10-11-12': [(False, False,
  1327. datetime(2012, 10, 11)),
  1328. (True, False,
  1329. datetime(2012, 11, 10)),
  1330. (False, True,
  1331. datetime(2010, 11, 12)),
  1332. (True, True,
  1333. datetime(2010, 12, 11))],
  1334. '20/12/21': [(False, False,
  1335. datetime(2021, 12, 20)),
  1336. (True, False,
  1337. datetime(2021, 12, 20)),
  1338. (False, True,
  1339. datetime(2020, 12, 21)),
  1340. (True, True,
  1341. datetime(2020, 12, 21))]}
  1342. for date_str, values in compat.iteritems(cases):
  1343. for dayfirst, yearfirst, expected in values:
  1344. # odd comparisons across version
  1345. # let's just skip
  1346. if dayfirst and yearfirst and is_lt_253:
  1347. continue
  1348. # compare with dateutil result
  1349. dateutil_result = parse(date_str, dayfirst=dayfirst,
  1350. yearfirst=yearfirst)
  1351. assert dateutil_result == expected
  1352. result1, _, _ = parsing.parse_time_string(date_str,
  1353. dayfirst=dayfirst,
  1354. yearfirst=yearfirst)
  1355. # we don't support dayfirst/yearfirst here:
  1356. if not dayfirst and not yearfirst:
  1357. result2 = Timestamp(date_str)
  1358. assert result2 == expected
  1359. result3 = to_datetime(date_str, dayfirst=dayfirst,
  1360. yearfirst=yearfirst, cache=cache)
  1361. result4 = DatetimeIndex([date_str], dayfirst=dayfirst,
  1362. yearfirst=yearfirst)[0]
  1363. assert result1 == expected
  1364. assert result3 == expected
  1365. assert result4 == expected
  1366. @pytest.mark.parametrize('cache', [True, False])
  1367. def test_parsers_timestring(self, cache):
  1368. # must be the same as dateutil result
  1369. cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)),
  1370. '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))}
  1371. for date_str, (exp_now, exp_def) in compat.iteritems(cases):
  1372. result1, _, _ = parsing.parse_time_string(date_str)
  1373. result2 = to_datetime(date_str)
  1374. result3 = to_datetime([date_str])
  1375. result4 = Timestamp(date_str)
  1376. result5 = DatetimeIndex([date_str])[0]
  1377. # parse time string return time string based on default date
  1378. # others are not, and can't be changed because it is used in
  1379. # time series plot
  1380. assert result1 == exp_def
  1381. assert result2 == exp_now
  1382. assert result3 == exp_now
  1383. assert result4 == exp_now
  1384. assert result5 == exp_now
  1385. @td.skip_if_has_locale
  1386. def test_parsers_time(self):
  1387. # GH11818
  1388. strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500",
  1389. "2:15:00pm", "021500pm", time(14, 15)]
  1390. expected = time(14, 15)
  1391. for time_string in strings:
  1392. assert tools.to_time(time_string) == expected
  1393. new_string = "14.15"
  1394. pytest.raises(ValueError, tools.to_time, new_string)
  1395. assert tools.to_time(new_string, format="%H.%M") == expected
  1396. arg = ["14:15", "20:20"]
  1397. expected_arr = [time(14, 15), time(20, 20)]
  1398. assert tools.to_time(arg) == expected_arr
  1399. assert tools.to_time(arg, format="%H:%M") == expected_arr
  1400. assert tools.to_time(arg, infer_time_format=True) == expected_arr
  1401. assert tools.to_time(arg, format="%I:%M%p",
  1402. errors="coerce") == [None, None]
  1403. res = tools.to_time(arg, format="%I:%M%p", errors="ignore")
  1404. tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_))
  1405. with pytest.raises(ValueError):
  1406. tools.to_time(arg, format="%I:%M%p", errors="raise")
  1407. tm.assert_series_equal(tools.to_time(Series(arg, name="test")),
  1408. Series(expected_arr, name="test"))
  1409. res = tools.to_time(np.array(arg))
  1410. assert isinstance(res, list)
  1411. assert res == expected_arr
  1412. @pytest.mark.parametrize('cache', [True, False])
  1413. @pytest.mark.parametrize('dt_string, tz, dt_string_repr', [
  1414. ('2013-01-01 05:45+0545', pytz.FixedOffset(345),
  1415. "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')"),
  1416. ('2013-01-01 05:30+0530', pytz.FixedOffset(330),
  1417. "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')")])
  1418. def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string,
  1419. tz, dt_string_repr):
  1420. # GH11708
  1421. base = to_datetime("2013-01-01 00:00:00", cache=cache)
  1422. base = base.tz_localize('UTC').tz_convert(tz)
  1423. dt_time = to_datetime(dt_string, cache=cache)
  1424. assert base == dt_time
  1425. assert dt_string_repr == repr(dt_time)
  1426. @pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
  1427. def units(request):
  1428. """Day and some time units.
  1429. * D
  1430. * s
  1431. * ms
  1432. * us
  1433. * ns
  1434. """
  1435. return request.param
  1436. @pytest.fixture
  1437. def epoch_1960():
  1438. """Timestamp at 1960-01-01."""
  1439. return Timestamp('1960-01-01')
  1440. @pytest.fixture
  1441. def units_from_epochs():
  1442. return list(range(5))
  1443. @pytest.fixture(params=['timestamp', 'pydatetime', 'datetime64', 'str_1960'])
  1444. def epochs(epoch_1960, request):
  1445. """Timestamp at 1960-01-01 in various forms.
  1446. * pd.Timestamp
  1447. * datetime.datetime
  1448. * numpy.datetime64
  1449. * str
  1450. """
  1451. assert request.param in {'timestamp', 'pydatetime', 'datetime64',
  1452. "str_1960"}
  1453. if request.param == 'timestamp':
  1454. return epoch_1960
  1455. elif request.param == 'pydatetime':
  1456. return epoch_1960.to_pydatetime()
  1457. elif request.param == "datetime64":
  1458. return epoch_1960.to_datetime64()
  1459. else:
  1460. return str(epoch_1960)
  1461. @pytest.fixture
  1462. def julian_dates():
  1463. return pd.date_range('2014-1-1', periods=10).to_julian_date().values
  1464. class TestOrigin(object):
  1465. def test_to_basic(self, julian_dates):
  1466. # gh-11276, gh-11745
  1467. # for origin as julian
  1468. result = Series(pd.to_datetime(
  1469. julian_dates, unit='D', origin='julian'))
  1470. expected = Series(pd.to_datetime(
  1471. julian_dates - pd.Timestamp(0).to_julian_date(), unit='D'))
  1472. assert_series_equal(result, expected)
  1473. result = Series(pd.to_datetime(
  1474. [0, 1, 2], unit='D', origin='unix'))
  1475. expected = Series([Timestamp('1970-01-01'),
  1476. Timestamp('1970-01-02'),
  1477. Timestamp('1970-01-03')])
  1478. assert_series_equal(result, expected)
  1479. # default
  1480. result = Series(pd.to_datetime(
  1481. [0, 1, 2], unit='D'))
  1482. expected = Series([Timestamp('1970-01-01'),
  1483. Timestamp('1970-01-02'),
  1484. Timestamp('1970-01-03')])
  1485. assert_series_equal(result, expected)
  1486. def test_julian_round_trip(self):
  1487. result = pd.to_datetime(2456658, origin='julian', unit='D')
  1488. assert result.to_julian_date() == 2456658
  1489. # out-of-bounds
  1490. with pytest.raises(ValueError):
  1491. pd.to_datetime(1, origin="julian", unit='D')
  1492. def test_invalid_unit(self, units, julian_dates):
  1493. # checking for invalid combination of origin='julian' and unit != D
  1494. if units != 'D':
  1495. with pytest.raises(ValueError):
  1496. pd.to_datetime(julian_dates, unit=units, origin='julian')
  1497. def test_invalid_origin(self):
  1498. # need to have a numeric specified
  1499. with pytest.raises(ValueError):
  1500. pd.to_datetime("2005-01-01", origin="1960-01-01")
  1501. with pytest.raises(ValueError):
  1502. pd.to_datetime("2005-01-01", origin="1960-01-01", unit='D')
  1503. def test_epoch(self, units, epochs, epoch_1960, units_from_epochs):
  1504. expected = Series(
  1505. [pd.Timedelta(x, unit=units) +
  1506. epoch_1960 for x in units_from_epochs])
  1507. result = Series(pd.to_datetime(
  1508. units_from_epochs, unit=units, origin=epochs))
  1509. assert_series_equal(result, expected)
  1510. @pytest.mark.parametrize("origin, exc",
  1511. [('random_string', ValueError),
  1512. ('epoch', ValueError),
  1513. ('13-24-1990', ValueError),
  1514. (datetime(1, 1, 1), tslib.OutOfBoundsDatetime)])
  1515. def test_invalid_origins(self, origin, exc, units, units_from_epochs):
  1516. with pytest.raises(exc):
  1517. pd.to_datetime(units_from_epochs, unit=units,
  1518. origin=origin)
  1519. def test_invalid_origins_tzinfo(self):
  1520. # GH16842
  1521. with pytest.raises(ValueError):
  1522. pd.to_datetime(1, unit='D',
  1523. origin=datetime(2000, 1, 1, tzinfo=pytz.utc))
  1524. def test_processing_order(self):
  1525. # make sure we handle out-of-bounds *before*
  1526. # constructing the dates
  1527. result = pd.to_datetime(200 * 365, unit='D')
  1528. expected = Timestamp('2169-11-13 00:00:00')
  1529. assert result == expected
  1530. result = pd.to_datetime(200 * 365, unit='D', origin='1870-01-01')
  1531. expected = Timestamp('2069-11-13 00:00:00')
  1532. assert result == expected
  1533. result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01')
  1534. expected = Timestamp('2169-10-20 00:00:00')
  1535. assert result == expected