test_parsing.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx
  4. """
  5. from datetime import datetime
  6. from dateutil.parser import parse
  7. import numpy as np
  8. import pytest
  9. from pandas._libs.tslibs import parsing
  10. from pandas._libs.tslibs.parsing import parse_time_string
  11. import pandas.util._test_decorators as td
  12. from pandas.util import testing as tm
  13. def test_parse_time_string():
  14. (date, parsed, reso) = parse_time_string("4Q1984")
  15. (date_lower, parsed_lower, reso_lower) = parse_time_string("4q1984")
  16. assert date == date_lower
  17. assert reso == reso_lower
  18. assert parsed == parsed_lower
  19. @pytest.mark.parametrize("dashed,normal", [
  20. ("1988-Q2", "1988Q2"),
  21. ("2Q-1988", "2Q1988")
  22. ])
  23. def test_parse_time_quarter_with_dash(dashed, normal):
  24. # see gh-9688
  25. (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed)
  26. (date, parsed, reso) = parse_time_string(normal)
  27. assert date_dash == date
  28. assert parsed_dash == parsed
  29. assert reso_dash == reso
  30. @pytest.mark.parametrize("dashed", [
  31. "-2Q1992", "2-Q1992", "4-4Q1992"
  32. ])
  33. def test_parse_time_quarter_with_dash_error(dashed):
  34. msg = ("Unknown datetime string format, "
  35. "unable to parse: {dashed}".format(dashed=dashed))
  36. with pytest.raises(parsing.DateParseError, match=msg):
  37. parse_time_string(dashed)
  38. @pytest.mark.parametrize("date_string,expected", [
  39. ("123.1234", False),
  40. ("-50000", False),
  41. ("999", False),
  42. ("m", False),
  43. ("T", False),
  44. ("Mon Sep 16, 2013", True),
  45. ("2012-01-01", True),
  46. ("01/01/2012", True),
  47. ("01012012", True),
  48. ("0101", True),
  49. ("1-1", True)
  50. ])
  51. def test_does_not_convert_mixed_integer(date_string, expected):
  52. assert parsing._does_string_look_like_datetime(date_string) is expected
  53. @pytest.mark.parametrize("date_str,kwargs,msg", [
  54. ("2013Q5", dict(),
  55. ("Incorrect quarterly string is given, "
  56. "quarter must be between 1 and 4: 2013Q5")),
  57. # see gh-5418
  58. ("2013Q1", dict(freq="INVLD-L-DEC-SAT"),
  59. ("Unable to retrieve month information "
  60. "from given freq: INVLD-L-DEC-SAT"))
  61. ])
  62. def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg):
  63. with pytest.raises(parsing.DateParseError, match=msg):
  64. parsing.parse_time_string(date_str, **kwargs)
  65. @pytest.mark.parametrize("date_str,freq,expected", [
  66. ("2013Q2", None, datetime(2013, 4, 1)),
  67. ("2013Q2", "A-APR", datetime(2012, 8, 1)),
  68. ("2013-Q2", "A-DEC", datetime(2013, 4, 1))
  69. ])
  70. def test_parsers_quarterly_with_freq(date_str, freq, expected):
  71. result, _, _ = parsing.parse_time_string(date_str, freq=freq)
  72. assert result == expected
  73. @pytest.mark.parametrize("date_str", [
  74. "2Q 2005", "2Q-200A", "2Q-200",
  75. "22Q2005", "2Q200.", "6Q-20"
  76. ])
  77. def test_parsers_quarter_invalid(date_str):
  78. if date_str == "6Q-20":
  79. msg = ("Incorrect quarterly string is given, quarter "
  80. "must be between 1 and 4: {date_str}".format(date_str=date_str))
  81. else:
  82. msg = ("Unknown datetime string format, unable "
  83. "to parse: {date_str}".format(date_str=date_str))
  84. with pytest.raises(ValueError, match=msg):
  85. parsing.parse_time_string(date_str)
  86. @pytest.mark.parametrize("date_str,expected", [
  87. ("201101", datetime(2011, 1, 1, 0, 0)),
  88. ("200005", datetime(2000, 5, 1, 0, 0))
  89. ])
  90. def test_parsers_month_freq(date_str, expected):
  91. result, _, _ = parsing.parse_time_string(date_str, freq="M")
  92. assert result == expected
  93. @td.skip_if_not_us_locale
  94. @pytest.mark.parametrize("string,fmt", [
  95. ("20111230", "%Y%m%d"),
  96. ("2011-12-30", "%Y-%m-%d"),
  97. ("30-12-2011", "%d-%m-%Y"),
  98. ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"),
  99. ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"),
  100. ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f")
  101. ])
  102. def test_guess_datetime_format_with_parseable_formats(string, fmt):
  103. result = parsing._guess_datetime_format(string)
  104. assert result == fmt
  105. @pytest.mark.parametrize("dayfirst,expected", [
  106. (True, "%d/%m/%Y"),
  107. (False, "%m/%d/%Y")
  108. ])
  109. def test_guess_datetime_format_with_dayfirst(dayfirst, expected):
  110. ambiguous_string = "01/01/2011"
  111. result = parsing._guess_datetime_format(ambiguous_string,
  112. dayfirst=dayfirst)
  113. assert result == expected
  114. @td.skip_if_has_locale
  115. @pytest.mark.parametrize("string,fmt", [
  116. ("30/Dec/2011", "%d/%b/%Y"),
  117. ("30/December/2011", "%d/%B/%Y"),
  118. ("30/Dec/2011 00:00:00", "%d/%b/%Y %H:%M:%S")
  119. ])
  120. def test_guess_datetime_format_with_locale_specific_formats(string, fmt):
  121. result = parsing._guess_datetime_format(string)
  122. assert result == fmt
  123. @pytest.mark.parametrize("invalid_dt", [
  124. "2013", "01/2013", "12:00:00", "1/1/1/1",
  125. "this_is_not_a_datetime", "51a", 9,
  126. datetime(2011, 1, 1)
  127. ])
  128. def test_guess_datetime_format_invalid_inputs(invalid_dt):
  129. # A datetime string must include a year, month and a day for it to be
  130. # guessable, in addition to being a string that looks like a datetime.
  131. assert parsing._guess_datetime_format(invalid_dt) is None
  132. @pytest.mark.parametrize("string,fmt", [
  133. ("2011-1-1", "%Y-%m-%d"),
  134. ("1/1/2011", "%m/%d/%Y"),
  135. ("30-1-2011", "%d-%m-%Y"),
  136. ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"),
  137. ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S"),
  138. ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S")
  139. ])
  140. def test_guess_datetime_format_no_padding(string, fmt):
  141. # see gh-11142
  142. result = parsing._guess_datetime_format(string)
  143. assert result == fmt
  144. def test_try_parse_dates():
  145. arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object)
  146. result = parsing.try_parse_dates(arr, dayfirst=True)
  147. expected = np.array([parse(d, dayfirst=True) for d in arr])
  148. tm.assert_numpy_array_equal(result, expected)