test_array_to_datetime.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # -*- coding: utf-8 -*-
  2. from datetime import date, datetime
  3. from dateutil.tz.tz import tzoffset
  4. import numpy as np
  5. import pytest
  6. import pytz
  7. from pandas._libs import iNaT, tslib
  8. from pandas.compat.numpy import np_array_datetime64_compat
  9. import pandas.util.testing as tm
  10. @pytest.mark.parametrize("data,expected", [
  11. (["01-01-2013", "01-02-2013"],
  12. ["2013-01-01T00:00:00.000000000-0000",
  13. "2013-01-02T00:00:00.000000000-0000"]),
  14. (["Mon Sep 16 2013", "Tue Sep 17 2013"],
  15. ["2013-09-16T00:00:00.000000000-0000",
  16. "2013-09-17T00:00:00.000000000-0000"])
  17. ])
  18. def test_parsing_valid_dates(data, expected):
  19. arr = np.array(data, dtype=object)
  20. result, _ = tslib.array_to_datetime(arr)
  21. expected = np_array_datetime64_compat(expected, dtype="M8[ns]")
  22. tm.assert_numpy_array_equal(result, expected)
  23. @pytest.mark.parametrize("dt_string, expected_tz", [
  24. ["01-01-2013 08:00:00+08:00", 480],
  25. ["2013-01-01T08:00:00.000000000+0800", 480],
  26. ["2012-12-31T16:00:00.000000000-0800", -480],
  27. ["12-31-2012 23:00:00-01:00", -60]
  28. ])
  29. def test_parsing_timezone_offsets(dt_string, expected_tz):
  30. # All of these datetime strings with offsets are equivalent
  31. # to the same datetime after the timezone offset is added.
  32. arr = np.array(["01-01-2013 00:00:00"], dtype=object)
  33. expected, _ = tslib.array_to_datetime(arr)
  34. arr = np.array([dt_string], dtype=object)
  35. result, result_tz = tslib.array_to_datetime(arr)
  36. tm.assert_numpy_array_equal(result, expected)
  37. assert result_tz is pytz.FixedOffset(expected_tz)
  38. def test_parsing_non_iso_timezone_offset():
  39. dt_string = "01-01-2013T00:00:00.000000000+0000"
  40. arr = np.array([dt_string], dtype=object)
  41. result, result_tz = tslib.array_to_datetime(arr)
  42. expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")])
  43. tm.assert_numpy_array_equal(result, expected)
  44. assert result_tz is pytz.FixedOffset(0)
  45. def test_parsing_different_timezone_offsets():
  46. # see gh-17697
  47. data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"]
  48. data = np.array(data, dtype=object)
  49. result, result_tz = tslib.array_to_datetime(data)
  50. expected = np.array([datetime(2015, 11, 18, 15, 30,
  51. tzinfo=tzoffset(None, 19800)),
  52. datetime(2015, 11, 18, 15, 30,
  53. tzinfo=tzoffset(None, 23400))],
  54. dtype=object)
  55. tm.assert_numpy_array_equal(result, expected)
  56. assert result_tz is None
  57. @pytest.mark.parametrize("data", [
  58. ["-352.737091", "183.575577"],
  59. ["1", "2", "3", "4", "5"]
  60. ])
  61. def test_number_looking_strings_not_into_datetime(data):
  62. # see gh-4601
  63. #
  64. # These strings don't look like datetimes, so
  65. # they shouldn't be attempted to be converted.
  66. arr = np.array(data, dtype=object)
  67. result, _ = tslib.array_to_datetime(arr, errors="ignore")
  68. tm.assert_numpy_array_equal(result, arr)
  69. @pytest.mark.parametrize("invalid_date", [
  70. date(1000, 1, 1),
  71. datetime(1000, 1, 1),
  72. "1000-01-01",
  73. "Jan 1, 1000",
  74. np.datetime64("1000-01-01")])
  75. @pytest.mark.parametrize("errors", ["coerce", "raise"])
  76. def test_coerce_outside_ns_bounds(invalid_date, errors):
  77. arr = np.array([invalid_date], dtype="object")
  78. kwargs = dict(values=arr, errors=errors)
  79. if errors == "raise":
  80. msg = "Out of bounds nanosecond timestamp"
  81. with pytest.raises(ValueError, match=msg):
  82. tslib.array_to_datetime(**kwargs)
  83. else: # coerce.
  84. result, _ = tslib.array_to_datetime(**kwargs)
  85. expected = np.array([iNaT], dtype="M8[ns]")
  86. tm.assert_numpy_array_equal(result, expected)
  87. def test_coerce_outside_ns_bounds_one_valid():
  88. arr = np.array(["1/1/1000", "1/1/2000"], dtype=object)
  89. result, _ = tslib.array_to_datetime(arr, errors="coerce")
  90. expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"]
  91. expected = np_array_datetime64_compat(expected, dtype="M8[ns]")
  92. tm.assert_numpy_array_equal(result, expected)
  93. @pytest.mark.parametrize("errors", ["ignore", "coerce"])
  94. def test_coerce_of_invalid_datetimes(errors):
  95. arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)
  96. kwargs = dict(values=arr, errors=errors)
  97. if errors == "ignore":
  98. # Without coercing, the presence of any invalid
  99. # dates prevents any values from being converted.
  100. result, _ = tslib.array_to_datetime(**kwargs)
  101. tm.assert_numpy_array_equal(result, arr)
  102. else: # coerce.
  103. # With coercing, the invalid dates becomes iNaT
  104. result, _ = tslib.array_to_datetime(arr, errors="coerce")
  105. expected = ["2013-01-01T00:00:00.000000000-0000",
  106. iNaT,
  107. iNaT]
  108. tm.assert_numpy_array_equal(
  109. result,
  110. np_array_datetime64_compat(expected, dtype="M8[ns]"))
  111. def test_to_datetime_barely_out_of_bounds():
  112. # see gh-19382, gh-19529
  113. #
  114. # Close enough to bounds that dropping nanos
  115. # would result in an in-bounds datetime.
  116. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
  117. msg = "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16"
  118. with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
  119. tslib.array_to_datetime(arr)