__init__.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. # flake8: noqa
  2. """
  3. Expose public exceptions & warnings
  4. """
  5. from pandas._libs.tslibs import OutOfBoundsDatetime
  6. class PerformanceWarning(Warning):
  7. """
  8. Warning raised when there is a possible
  9. performance impact.
  10. """
  11. class UnsupportedFunctionCall(ValueError):
  12. """
  13. Exception raised when attempting to call a numpy function
  14. on a pandas object, but that function is not supported by
  15. the object e.g. ``np.cumsum(groupby_object)``.
  16. """
  17. class UnsortedIndexError(KeyError):
  18. """
  19. Error raised when attempting to get a slice of a MultiIndex,
  20. and the index has not been lexsorted. Subclass of `KeyError`.
  21. .. versionadded:: 0.20.0
  22. """
  23. class ParserError(ValueError):
  24. """
  25. Exception that is raised by an error encountered in `pd.read_csv`.
  26. """
  27. class DtypeWarning(Warning):
  28. """
  29. Warning raised when reading different dtypes in a column from a file.
  30. Raised for a dtype incompatibility. This can happen whenever `read_csv`
  31. or `read_table` encounter non-uniform dtypes in a column(s) of a given
  32. CSV file.
  33. See Also
  34. --------
  35. pandas.read_csv : Read CSV (comma-separated) file into a DataFrame.
  36. pandas.read_table : Read general delimited file into a DataFrame.
  37. Notes
  38. -----
  39. This warning is issued when dealing with larger files because the dtype
  40. checking happens per chunk read.
  41. Despite the warning, the CSV file is read with mixed types in a single
  42. column which will be an object type. See the examples below to better
  43. understand this issue.
  44. Examples
  45. --------
  46. This example creates and reads a large CSV file with a column that contains
  47. `int` and `str`.
  48. >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
  49. ... ['1'] * 100000),
  50. ... 'b': ['b'] * 300000})
  51. >>> df.to_csv('test.csv', index=False)
  52. >>> df2 = pd.read_csv('test.csv')
  53. ... # DtypeWarning: Columns (0) have mixed types
  54. Important to notice that ``df2`` will contain both `str` and `int` for the
  55. same input, '1'.
  56. >>> df2.iloc[262140, 0]
  57. '1'
  58. >>> type(df2.iloc[262140, 0])
  59. <class 'str'>
  60. >>> df2.iloc[262150, 0]
  61. 1
  62. >>> type(df2.iloc[262150, 0])
  63. <class 'int'>
  64. One way to solve this issue is using the `dtype` parameter in the
  65. `read_csv` and `read_table` functions to explicit the conversion:
  66. >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str})
  67. No warning was issued.
  68. >>> import os
  69. >>> os.remove('test.csv')
  70. """
  71. class EmptyDataError(ValueError):
  72. """
  73. Exception that is thrown in `pd.read_csv` (by both the C and
  74. Python engines) when empty data or header is encountered.
  75. """
  76. class ParserWarning(Warning):
  77. """
  78. Warning raised when reading a file that doesn't use the default 'c' parser.
  79. Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change
  80. parsers, generally from the default 'c' parser to 'python'.
  81. It happens due to a lack of support or functionality for parsing a
  82. particular attribute of a CSV file with the requested engine.
  83. Currently, 'c' unsupported options include the following parameters:
  84. 1. `sep` other than a single character (e.g. regex separators)
  85. 2. `skipfooter` higher than 0
  86. 3. `sep=None` with `delim_whitespace=False`
  87. The warning can be avoided by adding `engine='python'` as a parameter in
  88. `pd.read_csv` and `pd.read_table` methods.
  89. See Also
  90. --------
  91. pd.read_csv : Read CSV (comma-separated) file into DataFrame.
  92. pd.read_table : Read general delimited file into DataFrame.
  93. Examples
  94. --------
  95. Using a `sep` in `pd.read_csv` other than a single character:
  96. >>> import io
  97. >>> csv = u'''a;b;c
  98. ... 1;1,8
  99. ... 1;2,1'''
  100. >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP
  101. ... # ParserWarning: Falling back to the 'python' engine...
  102. Adding `engine='python'` to `pd.read_csv` removes the Warning:
  103. >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python')
  104. """
  105. class MergeError(ValueError):
  106. """
  107. Error raised when problems arise during merging due to problems
  108. with input data. Subclass of `ValueError`.
  109. """
  110. class NullFrequencyError(ValueError):
  111. """
  112. Error raised when a null `freq` attribute is used in an operation
  113. that needs a non-null frequency, particularly `DatetimeIndex.shift`,
  114. `TimedeltaIndex.shift`, `PeriodIndex.shift`.
  115. """
  116. class AccessorRegistrationWarning(Warning):
  117. """Warning for attribute conflicts in accessor registration."""
  118. class AbstractMethodError(NotImplementedError):
  119. """Raise this error instead of NotImplementedError for abstract methods
  120. while keeping compatibility with Python 2 and Python 3.
  121. """
  122. def __init__(self, class_instance, methodtype='method'):
  123. types = {'method', 'classmethod', 'staticmethod', 'property'}
  124. if methodtype not in types:
  125. msg = 'methodtype must be one of {}, got {} instead.'.format(
  126. methodtype, types)
  127. raise ValueError(msg)
  128. self.methodtype = methodtype
  129. self.class_instance = class_instance
  130. def __str__(self):
  131. if self.methodtype == 'classmethod':
  132. name = self.class_instance.__name__
  133. else:
  134. name = self.class_instance.__class__.__name__
  135. msg = "This {methodtype} must be defined in the concrete class {name}"
  136. return (msg.format(methodtype=self.methodtype, name=name))