filemanager.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. """A contents manager that uses the local file system for storage."""
  2. # Copyright (c) Jupyter Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from datetime import datetime
  5. import errno
  6. import io
  7. import os
  8. import shutil
  9. import stat
  10. import sys
  11. import warnings
  12. import mimetypes
  13. import nbformat
  14. from send2trash import send2trash
  15. from tornado import web
  16. from .filecheckpoints import FileCheckpoints
  17. from .fileio import FileManagerMixin
  18. from .manager import ContentsManager
  19. from ...utils import exists
  20. from ipython_genutils.importstring import import_item
  21. from traitlets import Any, Unicode, Bool, TraitError, observe, default, validate
  22. from ipython_genutils.py3compat import getcwd, string_types
  23. from notebook import _tz as tz
  24. from notebook.utils import (
  25. is_hidden, is_file_hidden,
  26. to_api_path,
  27. )
  28. from notebook.base.handlers import AuthenticatedFileHandler
  29. from notebook.transutils import _
  30. try:
  31. from os.path import samefile
  32. except ImportError:
  33. # windows + py2
  34. from notebook.utils import samefile_simple as samefile
  35. _script_exporter = None
  36. def _post_save_script(model, os_path, contents_manager, **kwargs):
  37. """convert notebooks to Python script after save with nbconvert
  38. replaces `jupyter notebook --script`
  39. """
  40. from nbconvert.exporters.script import ScriptExporter
  41. warnings.warn("`_post_save_script` is deprecated and will be removed in Notebook 5.0", DeprecationWarning)
  42. if model['type'] != 'notebook':
  43. return
  44. global _script_exporter
  45. if _script_exporter is None:
  46. _script_exporter = ScriptExporter(parent=contents_manager)
  47. log = contents_manager.log
  48. base, ext = os.path.splitext(os_path)
  49. script, resources = _script_exporter.from_filename(os_path)
  50. script_fname = base + resources.get('output_extension', '.txt')
  51. log.info("Saving script /%s", to_api_path(script_fname, contents_manager.root_dir))
  52. with io.open(script_fname, 'w', encoding='utf-8') as f:
  53. f.write(script)
  54. class FileContentsManager(FileManagerMixin, ContentsManager):
  55. root_dir = Unicode(config=True)
  56. @default('root_dir')
  57. def _default_root_dir(self):
  58. try:
  59. return self.parent.notebook_dir
  60. except AttributeError:
  61. return getcwd()
  62. save_script = Bool(False, config=True, help='DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0')
  63. @observe('save_script')
  64. def _update_save_script(self, change):
  65. if not change['new']:
  66. return
  67. self.log.warning("""
  68. `--script` is deprecated and will be removed in notebook 5.0.
  69. You can trigger nbconvert via pre- or post-save hooks:
  70. ContentsManager.pre_save_hook
  71. FileContentsManager.post_save_hook
  72. A post-save hook has been registered that calls:
  73. jupyter nbconvert --to script [notebook]
  74. which behaves similarly to `--script`.
  75. """)
  76. self.post_save_hook = _post_save_script
  77. post_save_hook = Any(None, config=True, allow_none=True,
  78. help="""Python callable or importstring thereof
  79. to be called on the path of a file just saved.
  80. This can be used to process the file on disk,
  81. such as converting the notebook to a script or HTML via nbconvert.
  82. It will be called as (all arguments passed by keyword)::
  83. hook(os_path=os_path, model=model, contents_manager=instance)
  84. - path: the filesystem path to the file just written
  85. - model: the model representing the file
  86. - contents_manager: this ContentsManager instance
  87. """
  88. )
  89. @validate('post_save_hook')
  90. def _validate_post_save_hook(self, proposal):
  91. value = proposal['value']
  92. if isinstance(value, string_types):
  93. value = import_item(value)
  94. if not callable(value):
  95. raise TraitError("post_save_hook must be callable")
  96. return value
  97. def run_post_save_hook(self, model, os_path):
  98. """Run the post-save hook if defined, and log errors"""
  99. if self.post_save_hook:
  100. try:
  101. self.log.debug("Running post-save hook on %s", os_path)
  102. self.post_save_hook(os_path=os_path, model=model, contents_manager=self)
  103. except Exception as e:
  104. self.log.error("Post-save hook failed o-n %s", os_path, exc_info=True)
  105. raise web.HTTPError(500, u'Unexpected error while running post hook save: %s' % e)
  106. @validate('root_dir')
  107. def _validate_root_dir(self, proposal):
  108. """Do a bit of validation of the root_dir."""
  109. value = proposal['value']
  110. if not os.path.isabs(value):
  111. # If we receive a non-absolute path, make it absolute.
  112. value = os.path.abspath(value)
  113. if not os.path.isdir(value):
  114. raise TraitError("%r is not a directory" % value)
  115. return value
  116. @default('checkpoints_class')
  117. def _checkpoints_class_default(self):
  118. return FileCheckpoints
  119. delete_to_trash = Bool(True, config=True,
  120. help="""If True (default), deleting files will send them to the
  121. platform's trash/recycle bin, where they can be recovered. If False,
  122. deleting files really deletes them.""")
  123. @default('files_handler_class')
  124. def _files_handler_class_default(self):
  125. return AuthenticatedFileHandler
  126. @default('files_handler_params')
  127. def _files_handler_params_default(self):
  128. return {'path': self.root_dir}
  129. def is_hidden(self, path):
  130. """Does the API style path correspond to a hidden directory or file?
  131. Parameters
  132. ----------
  133. path : string
  134. The path to check. This is an API path (`/` separated,
  135. relative to root_dir).
  136. Returns
  137. -------
  138. hidden : bool
  139. Whether the path exists and is hidden.
  140. """
  141. path = path.strip('/')
  142. os_path = self._get_os_path(path=path)
  143. return is_hidden(os_path, self.root_dir)
  144. def file_exists(self, path):
  145. """Returns True if the file exists, else returns False.
  146. API-style wrapper for os.path.isfile
  147. Parameters
  148. ----------
  149. path : string
  150. The relative path to the file (with '/' as separator)
  151. Returns
  152. -------
  153. exists : bool
  154. Whether the file exists.
  155. """
  156. path = path.strip('/')
  157. os_path = self._get_os_path(path)
  158. return os.path.isfile(os_path)
  159. def dir_exists(self, path):
  160. """Does the API-style path refer to an extant directory?
  161. API-style wrapper for os.path.isdir
  162. Parameters
  163. ----------
  164. path : string
  165. The path to check. This is an API path (`/` separated,
  166. relative to root_dir).
  167. Returns
  168. -------
  169. exists : bool
  170. Whether the path is indeed a directory.
  171. """
  172. path = path.strip('/')
  173. os_path = self._get_os_path(path=path)
  174. return os.path.isdir(os_path)
  175. def exists(self, path):
  176. """Returns True if the path exists, else returns False.
  177. API-style wrapper for os.path.exists
  178. Parameters
  179. ----------
  180. path : string
  181. The API path to the file (with '/' as separator)
  182. Returns
  183. -------
  184. exists : bool
  185. Whether the target exists.
  186. """
  187. path = path.strip('/')
  188. os_path = self._get_os_path(path=path)
  189. return exists(os_path)
  190. def _base_model(self, path):
  191. """Build the common base of a contents model"""
  192. os_path = self._get_os_path(path)
  193. info = os.lstat(os_path)
  194. try:
  195. # size of file
  196. size = info.st_size
  197. except (ValueError, OSError):
  198. self.log.warning('Unable to get size.')
  199. size = None
  200. try:
  201. last_modified = tz.utcfromtimestamp(info.st_mtime)
  202. except (ValueError, OSError):
  203. # Files can rarely have an invalid timestamp
  204. # https://github.com/jupyter/notebook/issues/2539
  205. # https://github.com/jupyter/notebook/issues/2757
  206. # Use the Unix epoch as a fallback so we don't crash.
  207. self.log.warning('Invalid mtime %s for %s', info.st_mtime, os_path)
  208. last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
  209. try:
  210. created = tz.utcfromtimestamp(info.st_ctime)
  211. except (ValueError, OSError): # See above
  212. self.log.warning('Invalid ctime %s for %s', info.st_ctime, os_path)
  213. created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
  214. # Create the base model.
  215. model = {}
  216. model['name'] = path.rsplit('/', 1)[-1]
  217. model['path'] = path
  218. model['last_modified'] = last_modified
  219. model['created'] = created
  220. model['content'] = None
  221. model['format'] = None
  222. model['mimetype'] = None
  223. model['size'] = size
  224. try:
  225. model['writable'] = os.access(os_path, os.W_OK)
  226. except OSError:
  227. self.log.error("Failed to check write permissions on %s", os_path)
  228. model['writable'] = False
  229. return model
  230. def _dir_model(self, path, content=True):
  231. """Build a model for a directory
  232. if content is requested, will include a listing of the directory
  233. """
  234. os_path = self._get_os_path(path)
  235. four_o_four = u'directory does not exist: %r' % path
  236. if not os.path.isdir(os_path):
  237. raise web.HTTPError(404, four_o_four)
  238. elif is_hidden(os_path, self.root_dir) and not self.allow_hidden:
  239. self.log.info("Refusing to serve hidden directory %r, via 404 Error",
  240. os_path
  241. )
  242. raise web.HTTPError(404, four_o_four)
  243. model = self._base_model(path)
  244. model['type'] = 'directory'
  245. model['size'] = None
  246. if content:
  247. model['content'] = contents = []
  248. os_dir = self._get_os_path(path)
  249. for name in os.listdir(os_dir):
  250. try:
  251. os_path = os.path.join(os_dir, name)
  252. except UnicodeDecodeError as e:
  253. self.log.warning(
  254. "failed to decode filename '%s': %s", name, e)
  255. continue
  256. try:
  257. st = os.lstat(os_path)
  258. except OSError as e:
  259. # skip over broken symlinks in listing
  260. if e.errno == errno.ENOENT:
  261. self.log.warning("%s doesn't exist", os_path)
  262. else:
  263. self.log.warning("Error stat-ing %s: %s", os_path, e)
  264. continue
  265. if (not stat.S_ISLNK(st.st_mode)
  266. and not stat.S_ISREG(st.st_mode)
  267. and not stat.S_ISDIR(st.st_mode)):
  268. self.log.debug("%s not a regular file", os_path)
  269. continue
  270. if self.should_list(name) and not is_file_hidden(os_path, stat_res=st):
  271. contents.append(self.get(
  272. path='%s/%s' % (path, name),
  273. content=False)
  274. )
  275. model['format'] = 'json'
  276. return model
  277. def _file_model(self, path, content=True, format=None):
  278. """Build a model for a file
  279. if content is requested, include the file contents.
  280. format:
  281. If 'text', the contents will be decoded as UTF-8.
  282. If 'base64', the raw bytes contents will be encoded as base64.
  283. If not specified, try to decode as UTF-8, and fall back to base64
  284. """
  285. model = self._base_model(path)
  286. model['type'] = 'file'
  287. os_path = self._get_os_path(path)
  288. model['mimetype'] = mimetypes.guess_type(os_path)[0]
  289. if content:
  290. content, format = self._read_file(os_path, format)
  291. if model['mimetype'] is None:
  292. default_mime = {
  293. 'text': 'text/plain',
  294. 'base64': 'application/octet-stream'
  295. }[format]
  296. model['mimetype'] = default_mime
  297. model.update(
  298. content=content,
  299. format=format,
  300. )
  301. return model
  302. def _notebook_model(self, path, content=True):
  303. """Build a notebook model
  304. if content is requested, the notebook content will be populated
  305. as a JSON structure (not double-serialized)
  306. """
  307. model = self._base_model(path)
  308. model['type'] = 'notebook'
  309. os_path = self._get_os_path(path)
  310. if content:
  311. nb = self._read_notebook(os_path, as_version=4)
  312. self.mark_trusted_cells(nb, path)
  313. model['content'] = nb
  314. model['format'] = 'json'
  315. self.validate_notebook_model(model)
  316. return model
  317. def get(self, path, content=True, type=None, format=None):
  318. """ Takes a path for an entity and returns its model
  319. Parameters
  320. ----------
  321. path : str
  322. the API path that describes the relative path for the target
  323. content : bool
  324. Whether to include the contents in the reply
  325. type : str, optional
  326. The requested type - 'file', 'notebook', or 'directory'.
  327. Will raise HTTPError 400 if the content doesn't match.
  328. format : str, optional
  329. The requested format for file contents. 'text' or 'base64'.
  330. Ignored if this returns a notebook or directory model.
  331. Returns
  332. -------
  333. model : dict
  334. the contents model. If content=True, returns the contents
  335. of the file or directory as well.
  336. """
  337. path = path.strip('/')
  338. if not self.exists(path):
  339. raise web.HTTPError(404, u'No such file or directory: %s' % path)
  340. os_path = self._get_os_path(path)
  341. if os.path.isdir(os_path):
  342. if type not in (None, 'directory'):
  343. raise web.HTTPError(400,
  344. u'%s is a directory, not a %s' % (path, type), reason='bad type')
  345. model = self._dir_model(path, content=content)
  346. elif type == 'notebook' or (type is None and path.endswith('.ipynb')):
  347. model = self._notebook_model(path, content=content)
  348. else:
  349. if type == 'directory':
  350. raise web.HTTPError(400,
  351. u'%s is not a directory' % path, reason='bad type')
  352. model = self._file_model(path, content=content, format=format)
  353. return model
  354. def _save_directory(self, os_path, model, path=''):
  355. """create a directory"""
  356. if is_hidden(os_path, self.root_dir) and not self.allow_hidden:
  357. raise web.HTTPError(400, u'Cannot create hidden directory %r' % os_path)
  358. if not os.path.exists(os_path):
  359. with self.perm_to_403():
  360. os.mkdir(os_path)
  361. elif not os.path.isdir(os_path):
  362. raise web.HTTPError(400, u'Not a directory: %s' % (os_path))
  363. else:
  364. self.log.debug("Directory %r already exists", os_path)
  365. def save(self, model, path=''):
  366. """Save the file model and return the model with no content."""
  367. path = path.strip('/')
  368. if 'type' not in model:
  369. raise web.HTTPError(400, u'No file type provided')
  370. if 'content' not in model and model['type'] != 'directory':
  371. raise web.HTTPError(400, u'No file content provided')
  372. os_path = self._get_os_path(path)
  373. self.log.debug("Saving %s", os_path)
  374. self.run_pre_save_hook(model=model, path=path)
  375. try:
  376. if model['type'] == 'notebook':
  377. nb = nbformat.from_dict(model['content'])
  378. self.check_and_sign(nb, path)
  379. self._save_notebook(os_path, nb)
  380. # One checkpoint should always exist for notebooks.
  381. if not self.checkpoints.list_checkpoints(path):
  382. self.create_checkpoint(path)
  383. elif model['type'] == 'file':
  384. # Missing format will be handled internally by _save_file.
  385. self._save_file(os_path, model['content'], model.get('format'))
  386. elif model['type'] == 'directory':
  387. self._save_directory(os_path, model, path)
  388. else:
  389. raise web.HTTPError(400, "Unhandled contents type: %s" % model['type'])
  390. except web.HTTPError:
  391. raise
  392. except Exception as e:
  393. self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True)
  394. raise web.HTTPError(500, u'Unexpected error while saving file: %s %s' % (path, e))
  395. validation_message = None
  396. if model['type'] == 'notebook':
  397. self.validate_notebook_model(model)
  398. validation_message = model.get('message', None)
  399. model = self.get(path, content=False)
  400. if validation_message:
  401. model['message'] = validation_message
  402. self.run_post_save_hook(model=model, os_path=os_path)
  403. return model
  404. def delete_file(self, path):
  405. """Delete file at path."""
  406. path = path.strip('/')
  407. os_path = self._get_os_path(path)
  408. rm = os.unlink
  409. if not os.path.exists(os_path):
  410. raise web.HTTPError(404, u'File or directory does not exist: %s' % os_path)
  411. def _check_trash(os_path):
  412. if sys.platform in {'win32', 'darwin'}:
  413. return True
  414. # It's a bit more nuanced than this, but until we can better
  415. # distinguish errors from send2trash, assume that we can only trash
  416. # files on the same partition as the home directory.
  417. file_dev = os.stat(os_path).st_dev
  418. home_dev = os.stat(os.path.expanduser('~')).st_dev
  419. return file_dev == home_dev
  420. def is_non_empty_dir(os_path):
  421. if os.path.isdir(os_path):
  422. # A directory containing only leftover checkpoints is
  423. # considered empty.
  424. cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None)
  425. if set(os.listdir(os_path)) - {cp_dir}:
  426. return True
  427. return False
  428. if self.delete_to_trash:
  429. if sys.platform == 'win32' and is_non_empty_dir(os_path):
  430. # send2trash can really delete files on Windows, so disallow
  431. # deleting non-empty files. See Github issue 3631.
  432. raise web.HTTPError(400, u'Directory %s not empty' % os_path)
  433. if _check_trash(os_path):
  434. self.log.debug("Sending %s to trash", os_path)
  435. # Looking at the code in send2trash, I don't think the errors it
  436. # raises let us distinguish permission errors from other errors in
  437. # code. So for now, just let them all get logged as server errors.
  438. send2trash(os_path)
  439. return
  440. else:
  441. self.log.warning("Skipping trash for %s, on different device "
  442. "to home directory", os_path)
  443. if os.path.isdir(os_path):
  444. # Don't permanently delete non-empty directories.
  445. if is_non_empty_dir(os_path):
  446. raise web.HTTPError(400, u'Directory %s not empty' % os_path)
  447. self.log.debug("Removing directory %s", os_path)
  448. with self.perm_to_403():
  449. shutil.rmtree(os_path)
  450. else:
  451. self.log.debug("Unlinking file %s", os_path)
  452. with self.perm_to_403():
  453. rm(os_path)
  454. def rename_file(self, old_path, new_path):
  455. """Rename a file."""
  456. old_path = old_path.strip('/')
  457. new_path = new_path.strip('/')
  458. if new_path == old_path:
  459. return
  460. new_os_path = self._get_os_path(new_path)
  461. old_os_path = self._get_os_path(old_path)
  462. # Should we proceed with the move?
  463. if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
  464. raise web.HTTPError(409, u'File already exists: %s' % new_path)
  465. # Move the file
  466. try:
  467. with self.perm_to_403():
  468. shutil.move(old_os_path, new_os_path)
  469. except web.HTTPError:
  470. raise
  471. except Exception as e:
  472. raise web.HTTPError(500, u'Unknown error renaming file: %s %s' % (old_path, e))
  473. def info_string(self):
  474. return _("Serving notebooks from local directory: %s") % self.root_dir
  475. def get_kernel_path(self, path, model=None):
  476. """Return the initial API path of a kernel associated with a given notebook"""
  477. if self.dir_exists(path):
  478. return path
  479. if '/' in path:
  480. parent_dir = path.rsplit('/', 1)[0]
  481. else:
  482. parent_dir = ''
  483. return parent_dir