_datasource.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. """A file interface for handling local and remote data files.
  2. The goal of datasource is to abstract some of the file system operations
  3. when dealing with data files so the researcher doesn't have to know all the
  4. low-level details. Through datasource, a researcher can obtain and use a
  5. file with one function call, regardless of location of the file.
  6. DataSource is meant to augment standard python libraries, not replace them.
  7. It should work seamlessly with standard file IO operations and the os
  8. module.
  9. DataSource files can originate locally or remotely:
  10. - local files : '/home/guido/src/local/data.txt'
  11. - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
  12. DataSource files can also be compressed or uncompressed. Currently only
  13. gzip, bz2 and xz are supported.
  14. Example::
  15. >>> # Create a DataSource, use os.curdir (default) for local storage.
  16. >>> ds = datasource.DataSource()
  17. >>>
  18. >>> # Open a remote file.
  19. >>> # DataSource downloads the file, stores it locally in:
  20. >>> # './www.google.com/index.html'
  21. >>> # opens the file and returns a file object.
  22. >>> fp = ds.open('http://www.google.com/index.html')
  23. >>>
  24. >>> # Use the file as you normally would
  25. >>> fp.read()
  26. >>> fp.close()
  27. """
  28. from __future__ import division, absolute_import, print_function
  29. import os
  30. import sys
  31. import warnings
  32. import shutil
  33. import io
  34. from numpy.core.overrides import set_module
  35. _open = open
  36. def _check_mode(mode, encoding, newline):
  37. """Check mode and that encoding and newline are compatible.
  38. Parameters
  39. ----------
  40. mode : str
  41. File open mode.
  42. encoding : str
  43. File encoding.
  44. newline : str
  45. Newline for text files.
  46. """
  47. if "t" in mode:
  48. if "b" in mode:
  49. raise ValueError("Invalid mode: %r" % (mode,))
  50. else:
  51. if encoding is not None:
  52. raise ValueError("Argument 'encoding' not supported in binary mode")
  53. if newline is not None:
  54. raise ValueError("Argument 'newline' not supported in binary mode")
  55. def _python2_bz2open(fn, mode, encoding, newline):
  56. """Wrapper to open bz2 in text mode.
  57. Parameters
  58. ----------
  59. fn : str
  60. File name
  61. mode : {'r', 'w'}
  62. File mode. Note that bz2 Text files are not supported.
  63. encoding : str
  64. Ignored, text bz2 files not supported in Python2.
  65. newline : str
  66. Ignored, text bz2 files not supported in Python2.
  67. """
  68. import bz2
  69. _check_mode(mode, encoding, newline)
  70. if "t" in mode:
  71. # BZ2File is missing necessary functions for TextIOWrapper
  72. warnings.warn("Assuming latin1 encoding for bz2 text file in Python2",
  73. RuntimeWarning, stacklevel=5)
  74. mode = mode.replace("t", "")
  75. return bz2.BZ2File(fn, mode)
  76. def _python2_gzipopen(fn, mode, encoding, newline):
  77. """ Wrapper to open gzip in text mode.
  78. Parameters
  79. ----------
  80. fn : str, bytes, file
  81. File path or opened file.
  82. mode : str
  83. File mode. The actual files are opened as binary, but will decoded
  84. using the specified `encoding` and `newline`.
  85. encoding : str
  86. Encoding to be used when reading/writing as text.
  87. newline : str
  88. Newline to be used when reading/writing as text.
  89. """
  90. import gzip
  91. # gzip is lacking read1 needed for TextIOWrapper
  92. class GzipWrap(gzip.GzipFile):
  93. def read1(self, n):
  94. return self.read(n)
  95. _check_mode(mode, encoding, newline)
  96. gz_mode = mode.replace("t", "")
  97. if isinstance(fn, (str, bytes)):
  98. binary_file = GzipWrap(fn, gz_mode)
  99. elif hasattr(fn, "read") or hasattr(fn, "write"):
  100. binary_file = GzipWrap(None, gz_mode, fileobj=fn)
  101. else:
  102. raise TypeError("filename must be a str or bytes object, or a file")
  103. if "t" in mode:
  104. return io.TextIOWrapper(binary_file, encoding, newline=newline)
  105. else:
  106. return binary_file
  107. # Using a class instead of a module-level dictionary
  108. # to reduce the initial 'import numpy' overhead by
  109. # deferring the import of lzma, bz2 and gzip until needed
  110. # TODO: .zip support, .tar support?
  111. class _FileOpeners(object):
  112. """
  113. Container for different methods to open (un-)compressed files.
  114. `_FileOpeners` contains a dictionary that holds one method for each
  115. supported file format. Attribute lookup is implemented in such a way
  116. that an instance of `_FileOpeners` itself can be indexed with the keys
  117. of that dictionary. Currently uncompressed files as well as files
  118. compressed with ``gzip``, ``bz2`` or ``xz`` compression are supported.
  119. Notes
  120. -----
  121. `_file_openers`, an instance of `_FileOpeners`, is made available for
  122. use in the `_datasource` module.
  123. Examples
  124. --------
  125. >>> np.lib._datasource._file_openers.keys()
  126. [None, '.bz2', '.gz', '.xz', '.lzma']
  127. >>> np.lib._datasource._file_openers['.gz'] is gzip.open
  128. True
  129. """
  130. def __init__(self):
  131. self._loaded = False
  132. self._file_openers = {None: io.open}
  133. def _load(self):
  134. if self._loaded:
  135. return
  136. try:
  137. import bz2
  138. if sys.version_info[0] >= 3:
  139. self._file_openers[".bz2"] = bz2.open
  140. else:
  141. self._file_openers[".bz2"] = _python2_bz2open
  142. except ImportError:
  143. pass
  144. try:
  145. import gzip
  146. if sys.version_info[0] >= 3:
  147. self._file_openers[".gz"] = gzip.open
  148. else:
  149. self._file_openers[".gz"] = _python2_gzipopen
  150. except ImportError:
  151. pass
  152. try:
  153. import lzma
  154. self._file_openers[".xz"] = lzma.open
  155. self._file_openers[".lzma"] = lzma.open
  156. except (ImportError, AttributeError):
  157. # There are incompatible backports of lzma that do not have the
  158. # lzma.open attribute, so catch that as well as ImportError.
  159. pass
  160. self._loaded = True
  161. def keys(self):
  162. """
  163. Return the keys of currently supported file openers.
  164. Parameters
  165. ----------
  166. None
  167. Returns
  168. -------
  169. keys : list
  170. The keys are None for uncompressed files and the file extension
  171. strings (i.e. ``'.gz'``, ``'.xz'``) for supported compression
  172. methods.
  173. """
  174. self._load()
  175. return list(self._file_openers.keys())
  176. def __getitem__(self, key):
  177. self._load()
  178. return self._file_openers[key]
  179. _file_openers = _FileOpeners()
  180. def open(path, mode='r', destpath=os.curdir, encoding=None, newline=None):
  181. """
  182. Open `path` with `mode` and return the file object.
  183. If ``path`` is an URL, it will be downloaded, stored in the
  184. `DataSource` `destpath` directory and opened from there.
  185. Parameters
  186. ----------
  187. path : str
  188. Local file path or URL to open.
  189. mode : str, optional
  190. Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
  191. append. Available modes depend on the type of object specified by
  192. path. Default is 'r'.
  193. destpath : str, optional
  194. Path to the directory where the source file gets downloaded to for
  195. use. If `destpath` is None, a temporary directory will be created.
  196. The default path is the current directory.
  197. encoding : {None, str}, optional
  198. Open text file with given encoding. The default encoding will be
  199. what `io.open` uses.
  200. newline : {None, str}, optional
  201. Newline to use when reading text file.
  202. Returns
  203. -------
  204. out : file object
  205. The opened file.
  206. Notes
  207. -----
  208. This is a convenience function that instantiates a `DataSource` and
  209. returns the file object from ``DataSource.open(path)``.
  210. """
  211. ds = DataSource(destpath)
  212. return ds.open(path, mode, encoding=encoding, newline=newline)
  213. @set_module('numpy')
  214. class DataSource(object):
  215. """
  216. DataSource(destpath='.')
  217. A generic data source file (file, http, ftp, ...).
  218. DataSources can be local files or remote files/URLs. The files may
  219. also be compressed or uncompressed. DataSource hides some of the
  220. low-level details of downloading the file, allowing you to simply pass
  221. in a valid file path (or URL) and obtain a file object.
  222. Parameters
  223. ----------
  224. destpath : str or None, optional
  225. Path to the directory where the source file gets downloaded to for
  226. use. If `destpath` is None, a temporary directory will be created.
  227. The default path is the current directory.
  228. Notes
  229. -----
  230. URLs require a scheme string (``http://``) to be used, without it they
  231. will fail::
  232. >>> repos = DataSource()
  233. >>> repos.exists('www.google.com/index.html')
  234. False
  235. >>> repos.exists('http://www.google.com/index.html')
  236. True
  237. Temporary directories are deleted when the DataSource is deleted.
  238. Examples
  239. --------
  240. ::
  241. >>> ds = DataSource('/home/guido')
  242. >>> urlname = 'http://www.google.com/index.html'
  243. >>> gfile = ds.open('http://www.google.com/index.html') # remote file
  244. >>> ds.abspath(urlname)
  245. '/home/guido/www.google.com/site/index.html'
  246. >>> ds = DataSource(None) # use with temporary file
  247. >>> ds.open('/home/guido/foobar.txt')
  248. <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430>
  249. >>> ds.abspath('/home/guido/foobar.txt')
  250. '/tmp/tmpy4pgsP/home/guido/foobar.txt'
  251. """
  252. def __init__(self, destpath=os.curdir):
  253. """Create a DataSource with a local path at destpath."""
  254. if destpath:
  255. self._destpath = os.path.abspath(destpath)
  256. self._istmpdest = False
  257. else:
  258. import tempfile # deferring import to improve startup time
  259. self._destpath = tempfile.mkdtemp()
  260. self._istmpdest = True
  261. def __del__(self):
  262. # Remove temp directories
  263. if hasattr(self, '_istmpdest') and self._istmpdest:
  264. shutil.rmtree(self._destpath)
  265. def _iszip(self, filename):
  266. """Test if the filename is a zip file by looking at the file extension.
  267. """
  268. fname, ext = os.path.splitext(filename)
  269. return ext in _file_openers.keys()
  270. def _iswritemode(self, mode):
  271. """Test if the given mode will open a file for writing."""
  272. # Currently only used to test the bz2 files.
  273. _writemodes = ("w", "+")
  274. for c in mode:
  275. if c in _writemodes:
  276. return True
  277. return False
  278. def _splitzipext(self, filename):
  279. """Split zip extension from filename and return filename.
  280. *Returns*:
  281. base, zip_ext : {tuple}
  282. """
  283. if self._iszip(filename):
  284. return os.path.splitext(filename)
  285. else:
  286. return filename, None
  287. def _possible_names(self, filename):
  288. """Return a tuple containing compressed filename variations."""
  289. names = [filename]
  290. if not self._iszip(filename):
  291. for zipext in _file_openers.keys():
  292. if zipext:
  293. names.append(filename+zipext)
  294. return names
  295. def _isurl(self, path):
  296. """Test if path is a net location. Tests the scheme and netloc."""
  297. # We do this here to reduce the 'import numpy' initial import time.
  298. if sys.version_info[0] >= 3:
  299. from urllib.parse import urlparse
  300. else:
  301. from urlparse import urlparse
  302. # BUG : URLs require a scheme string ('http://') to be used.
  303. # www.google.com will fail.
  304. # Should we prepend the scheme for those that don't have it and
  305. # test that also? Similar to the way we append .gz and test for
  306. # for compressed versions of files.
  307. scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
  308. return bool(scheme and netloc)
  309. def _cache(self, path):
  310. """Cache the file specified by path.
  311. Creates a copy of the file in the datasource cache.
  312. """
  313. # We import these here because importing urllib2 is slow and
  314. # a significant fraction of numpy's total import time.
  315. if sys.version_info[0] >= 3:
  316. from urllib.request import urlopen
  317. from urllib.error import URLError
  318. else:
  319. from urllib2 import urlopen
  320. from urllib2 import URLError
  321. upath = self.abspath(path)
  322. # ensure directory exists
  323. if not os.path.exists(os.path.dirname(upath)):
  324. os.makedirs(os.path.dirname(upath))
  325. # TODO: Doesn't handle compressed files!
  326. if self._isurl(path):
  327. try:
  328. openedurl = urlopen(path)
  329. f = _open(upath, 'wb')
  330. try:
  331. shutil.copyfileobj(openedurl, f)
  332. finally:
  333. f.close()
  334. openedurl.close()
  335. except URLError:
  336. raise URLError("URL not found: %s" % path)
  337. else:
  338. shutil.copyfile(path, upath)
  339. return upath
  340. def _findfile(self, path):
  341. """Searches for ``path`` and returns full path if found.
  342. If path is an URL, _findfile will cache a local copy and return the
  343. path to the cached file. If path is a local file, _findfile will
  344. return a path to that local file.
  345. The search will include possible compressed versions of the file
  346. and return the first occurrence found.
  347. """
  348. # Build list of possible local file paths
  349. if not self._isurl(path):
  350. # Valid local paths
  351. filelist = self._possible_names(path)
  352. # Paths in self._destpath
  353. filelist += self._possible_names(self.abspath(path))
  354. else:
  355. # Cached URLs in self._destpath
  356. filelist = self._possible_names(self.abspath(path))
  357. # Remote URLs
  358. filelist = filelist + self._possible_names(path)
  359. for name in filelist:
  360. if self.exists(name):
  361. if self._isurl(name):
  362. name = self._cache(name)
  363. return name
  364. return None
  365. def abspath(self, path):
  366. """
  367. Return absolute path of file in the DataSource directory.
  368. If `path` is an URL, then `abspath` will return either the location
  369. the file exists locally or the location it would exist when opened
  370. using the `open` method.
  371. Parameters
  372. ----------
  373. path : str
  374. Can be a local file or a remote URL.
  375. Returns
  376. -------
  377. out : str
  378. Complete path, including the `DataSource` destination directory.
  379. Notes
  380. -----
  381. The functionality is based on `os.path.abspath`.
  382. """
  383. # We do this here to reduce the 'import numpy' initial import time.
  384. if sys.version_info[0] >= 3:
  385. from urllib.parse import urlparse
  386. else:
  387. from urlparse import urlparse
  388. # TODO: This should be more robust. Handles case where path includes
  389. # the destpath, but not other sub-paths. Failing case:
  390. # path = /home/guido/datafile.txt
  391. # destpath = /home/alex/
  392. # upath = self.abspath(path)
  393. # upath == '/home/alex/home/guido/datafile.txt'
  394. # handle case where path includes self._destpath
  395. splitpath = path.split(self._destpath, 2)
  396. if len(splitpath) > 1:
  397. path = splitpath[1]
  398. scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
  399. netloc = self._sanitize_relative_path(netloc)
  400. upath = self._sanitize_relative_path(upath)
  401. return os.path.join(self._destpath, netloc, upath)
  402. def _sanitize_relative_path(self, path):
  403. """Return a sanitised relative path for which
  404. os.path.abspath(os.path.join(base, path)).startswith(base)
  405. """
  406. last = None
  407. path = os.path.normpath(path)
  408. while path != last:
  409. last = path
  410. # Note: os.path.join treats '/' as os.sep on Windows
  411. path = path.lstrip(os.sep).lstrip('/')
  412. path = path.lstrip(os.pardir).lstrip('..')
  413. drive, path = os.path.splitdrive(path) # for Windows
  414. return path
  415. def exists(self, path):
  416. """
  417. Test if path exists.
  418. Test if `path` exists as (and in this order):
  419. - a local file.
  420. - a remote URL that has been downloaded and stored locally in the
  421. `DataSource` directory.
  422. - a remote URL that has not been downloaded, but is valid and
  423. accessible.
  424. Parameters
  425. ----------
  426. path : str
  427. Can be a local file or a remote URL.
  428. Returns
  429. -------
  430. out : bool
  431. True if `path` exists.
  432. Notes
  433. -----
  434. When `path` is an URL, `exists` will return True if it's either
  435. stored locally in the `DataSource` directory, or is a valid remote
  436. URL. `DataSource` does not discriminate between the two, the file
  437. is accessible if it exists in either location.
  438. """
  439. # First test for local path
  440. if os.path.exists(path):
  441. return True
  442. # We import this here because importing urllib2 is slow and
  443. # a significant fraction of numpy's total import time.
  444. if sys.version_info[0] >= 3:
  445. from urllib.request import urlopen
  446. from urllib.error import URLError
  447. else:
  448. from urllib2 import urlopen
  449. from urllib2 import URLError
  450. # Test cached url
  451. upath = self.abspath(path)
  452. if os.path.exists(upath):
  453. return True
  454. # Test remote url
  455. if self._isurl(path):
  456. try:
  457. netfile = urlopen(path)
  458. netfile.close()
  459. del(netfile)
  460. return True
  461. except URLError:
  462. return False
  463. return False
  464. def open(self, path, mode='r', encoding=None, newline=None):
  465. """
  466. Open and return file-like object.
  467. If `path` is an URL, it will be downloaded, stored in the
  468. `DataSource` directory and opened from there.
  469. Parameters
  470. ----------
  471. path : str
  472. Local file path or URL to open.
  473. mode : {'r', 'w', 'a'}, optional
  474. Mode to open `path`. Mode 'r' for reading, 'w' for writing,
  475. 'a' to append. Available modes depend on the type of object
  476. specified by `path`. Default is 'r'.
  477. encoding : {None, str}, optional
  478. Open text file with given encoding. The default encoding will be
  479. what `io.open` uses.
  480. newline : {None, str}, optional
  481. Newline to use when reading text file.
  482. Returns
  483. -------
  484. out : file object
  485. File object.
  486. """
  487. # TODO: There is no support for opening a file for writing which
  488. # doesn't exist yet (creating a file). Should there be?
  489. # TODO: Add a ``subdir`` parameter for specifying the subdirectory
  490. # used to store URLs in self._destpath.
  491. if self._isurl(path) and self._iswritemode(mode):
  492. raise ValueError("URLs are not writeable")
  493. # NOTE: _findfile will fail on a new file opened for writing.
  494. found = self._findfile(path)
  495. if found:
  496. _fname, ext = self._splitzipext(found)
  497. if ext == 'bz2':
  498. mode.replace("+", "")
  499. return _file_openers[ext](found, mode=mode,
  500. encoding=encoding, newline=newline)
  501. else:
  502. raise IOError("%s not found." % path)
  503. class Repository (DataSource):
  504. """
  505. Repository(baseurl, destpath='.')
  506. A data repository where multiple DataSource's share a base
  507. URL/directory.
  508. `Repository` extends `DataSource` by prepending a base URL (or
  509. directory) to all the files it handles. Use `Repository` when you will
  510. be working with multiple files from one base URL. Initialize
  511. `Repository` with the base URL, then refer to each file by its filename
  512. only.
  513. Parameters
  514. ----------
  515. baseurl : str
  516. Path to the local directory or remote location that contains the
  517. data files.
  518. destpath : str or None, optional
  519. Path to the directory where the source file gets downloaded to for
  520. use. If `destpath` is None, a temporary directory will be created.
  521. The default path is the current directory.
  522. Examples
  523. --------
  524. To analyze all files in the repository, do something like this
  525. (note: this is not self-contained code)::
  526. >>> repos = np.lib._datasource.Repository('/home/user/data/dir/')
  527. >>> for filename in filelist:
  528. ... fp = repos.open(filename)
  529. ... fp.analyze()
  530. ... fp.close()
  531. Similarly you could use a URL for a repository::
  532. >>> repos = np.lib._datasource.Repository('http://www.xyz.edu/data')
  533. """
  534. def __init__(self, baseurl, destpath=os.curdir):
  535. """Create a Repository with a shared url or directory of baseurl."""
  536. DataSource.__init__(self, destpath=destpath)
  537. self._baseurl = baseurl
  538. def __del__(self):
  539. DataSource.__del__(self)
  540. def _fullpath(self, path):
  541. """Return complete path for path. Prepends baseurl if necessary."""
  542. splitpath = path.split(self._baseurl, 2)
  543. if len(splitpath) == 1:
  544. result = os.path.join(self._baseurl, path)
  545. else:
  546. result = path # path contains baseurl already
  547. return result
  548. def _findfile(self, path):
  549. """Extend DataSource method to prepend baseurl to ``path``."""
  550. return DataSource._findfile(self, self._fullpath(path))
  551. def abspath(self, path):
  552. """
  553. Return absolute path of file in the Repository directory.
  554. If `path` is an URL, then `abspath` will return either the location
  555. the file exists locally or the location it would exist when opened
  556. using the `open` method.
  557. Parameters
  558. ----------
  559. path : str
  560. Can be a local file or a remote URL. This may, but does not
  561. have to, include the `baseurl` with which the `Repository` was
  562. initialized.
  563. Returns
  564. -------
  565. out : str
  566. Complete path, including the `DataSource` destination directory.
  567. """
  568. return DataSource.abspath(self, self._fullpath(path))
  569. def exists(self, path):
  570. """
  571. Test if path exists prepending Repository base URL to path.
  572. Test if `path` exists as (and in this order):
  573. - a local file.
  574. - a remote URL that has been downloaded and stored locally in the
  575. `DataSource` directory.
  576. - a remote URL that has not been downloaded, but is valid and
  577. accessible.
  578. Parameters
  579. ----------
  580. path : str
  581. Can be a local file or a remote URL. This may, but does not
  582. have to, include the `baseurl` with which the `Repository` was
  583. initialized.
  584. Returns
  585. -------
  586. out : bool
  587. True if `path` exists.
  588. Notes
  589. -----
  590. When `path` is an URL, `exists` will return True if it's either
  591. stored locally in the `DataSource` directory, or is a valid remote
  592. URL. `DataSource` does not discriminate between the two, the file
  593. is accessible if it exists in either location.
  594. """
  595. return DataSource.exists(self, self._fullpath(path))
  596. def open(self, path, mode='r', encoding=None, newline=None):
  597. """
  598. Open and return file-like object prepending Repository base URL.
  599. If `path` is an URL, it will be downloaded, stored in the
  600. DataSource directory and opened from there.
  601. Parameters
  602. ----------
  603. path : str
  604. Local file path or URL to open. This may, but does not have to,
  605. include the `baseurl` with which the `Repository` was
  606. initialized.
  607. mode : {'r', 'w', 'a'}, optional
  608. Mode to open `path`. Mode 'r' for reading, 'w' for writing,
  609. 'a' to append. Available modes depend on the type of object
  610. specified by `path`. Default is 'r'.
  611. encoding : {None, str}, optional
  612. Open text file with given encoding. The default encoding will be
  613. what `io.open` uses.
  614. newline : {None, str}, optional
  615. Newline to use when reading text file.
  616. Returns
  617. -------
  618. out : file object
  619. File object.
  620. """
  621. return DataSource.open(self, self._fullpath(path), mode,
  622. encoding=encoding, newline=newline)
  623. def listdir(self):
  624. """
  625. List files in the source Repository.
  626. Returns
  627. -------
  628. files : list of str
  629. List of file names (not containing a directory part).
  630. Notes
  631. -----
  632. Does not currently work for remote repositories.
  633. """
  634. if self._isurl(self._baseurl):
  635. raise NotImplementedError(
  636. "Directory listing of URLs, not supported yet.")
  637. else:
  638. return os.listdir(self._baseurl)