sasreader.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. """
  2. Read SAS sas7bdat or xport files.
  3. """
  4. from pandas import compat
  5. from pandas.io.common import _stringify_path
  6. def read_sas(filepath_or_buffer, format=None, index=None, encoding=None,
  7. chunksize=None, iterator=False):
  8. """
  9. Read SAS files stored as either XPORT or SAS7BDAT format files.
  10. Parameters
  11. ----------
  12. filepath_or_buffer : string or file-like object
  13. Path to the SAS file.
  14. format : string {'xport', 'sas7bdat'} or None
  15. If None, file format is inferred from file extension. If 'xport' or
  16. 'sas7bdat', uses the corresponding format.
  17. index : identifier of index column, defaults to None
  18. Identifier of column that should be used as index of the DataFrame.
  19. encoding : string, default is None
  20. Encoding for text data. If None, text data are stored as raw bytes.
  21. chunksize : int
  22. Read file `chunksize` lines at a time, returns iterator.
  23. iterator : bool, defaults to False
  24. If True, returns an iterator for reading the file incrementally.
  25. Returns
  26. -------
  27. DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
  28. or XportReader
  29. """
  30. if format is None:
  31. buffer_error_msg = ("If this is a buffer object rather "
  32. "than a string name, you must specify "
  33. "a format string")
  34. filepath_or_buffer = _stringify_path(filepath_or_buffer)
  35. if not isinstance(filepath_or_buffer, compat.string_types):
  36. raise ValueError(buffer_error_msg)
  37. fname = filepath_or_buffer.lower()
  38. if fname.endswith(".xpt"):
  39. format = "xport"
  40. elif fname.endswith(".sas7bdat"):
  41. format = "sas7bdat"
  42. else:
  43. raise ValueError("unable to infer format of SAS file")
  44. if format.lower() == 'xport':
  45. from pandas.io.sas.sas_xport import XportReader
  46. reader = XportReader(filepath_or_buffer, index=index,
  47. encoding=encoding,
  48. chunksize=chunksize)
  49. elif format.lower() == 'sas7bdat':
  50. from pandas.io.sas.sas7bdat import SAS7BDATReader
  51. reader = SAS7BDATReader(filepath_or_buffer, index=index,
  52. encoding=encoding,
  53. chunksize=chunksize)
  54. else:
  55. raise ValueError('unknown SAS format')
  56. if iterator or chunksize:
  57. return reader
  58. data = reader.read()
  59. reader.close()
  60. return data