validator.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. # Copyright (c) IPython Development Team.
  2. # Distributed under the terms of the Modified BSD License.
  3. from __future__ import print_function
  4. import json
  5. import os
  6. import pprint
  7. import sys
  8. import warnings
  9. try:
  10. from jsonschema import ValidationError
  11. from jsonschema import Draft4Validator as Validator
  12. except ImportError as e:
  13. verbose_msg = """
  14. Jupyter notebook format depends on the jsonschema package:
  15. https://pypi.python.org/pypi/jsonschema
  16. Please install it first.
  17. """
  18. raise ImportError(str(e) + verbose_msg)
  19. from ipython_genutils.importstring import import_item
  20. validators = {}
  21. def _relax_additional_properties(obj):
  22. """relax any `additionalProperties`"""
  23. if isinstance(obj, dict):
  24. for key, value in obj.items():
  25. if key == 'additionalProperties':
  26. value = True
  27. else:
  28. value = _relax_additional_properties(value)
  29. obj[key] = value
  30. elif isinstance(obj, list):
  31. for i, value in enumerate(obj):
  32. obj[i] = _relax_additional_properties(value)
  33. return obj
  34. def _allow_undefined(schema):
  35. schema['definitions']['cell']['oneOf'].append(
  36. {"$ref": "#/definitions/unrecognized_cell"}
  37. )
  38. schema['definitions']['output']['oneOf'].append(
  39. {"$ref": "#/definitions/unrecognized_output"}
  40. )
  41. return schema
  42. def get_validator(version=None, version_minor=None, relax_add_props=False):
  43. """Load the JSON schema into a Validator"""
  44. if version is None:
  45. from .. import current_nbformat
  46. version = current_nbformat
  47. v = import_item("nbformat.v%s" % version)
  48. current_minor = getattr(v, 'nbformat_minor', 0)
  49. if version_minor is None:
  50. version_minor = current_minor
  51. version_tuple = (version, version_minor)
  52. if version_tuple not in validators:
  53. try:
  54. schema_json = _get_schema_json(v)
  55. except AttributeError:
  56. return None
  57. if current_minor < version_minor:
  58. # notebook from the future, relax all `additionalProperties: False` requirements
  59. schema_json = _relax_additional_properties(schema_json)
  60. # and allow undefined cell types and outputs
  61. schema_json = _allow_undefined(schema_json)
  62. validators[version_tuple] = Validator(schema_json)
  63. if relax_add_props:
  64. try:
  65. schema_json = _get_schema_json(v)
  66. except AttributeError:
  67. return None
  68. # this allows properties to be added for intermediate
  69. # representations while validating for all other kinds of errors
  70. schema_json = _relax_additional_properties(schema_json)
  71. validators[version_tuple] = Validator(schema_json)
  72. return validators[version_tuple]
  73. def _get_schema_json(v):
  74. """
  75. Gets the json schema from a given imported library a nbformat version.
  76. """
  77. schema_path = os.path.join(os.path.dirname(v.__file__), v.nbformat_schema)
  78. with open(schema_path) as f:
  79. schema_json = json.load(f)
  80. return schema_json
  81. def isvalid(nbjson, ref=None, version=None, version_minor=None):
  82. """Checks whether the given notebook JSON conforms to the current
  83. notebook format schema. Returns True if the JSON is valid, and
  84. False otherwise.
  85. To see the individual errors that were encountered, please use the
  86. `validate` function instead.
  87. """
  88. try:
  89. validate(nbjson, ref, version, version_minor)
  90. except ValidationError:
  91. return False
  92. else:
  93. return True
  94. def _format_as_index(indices):
  95. """
  96. (from jsonschema._utils.format_as_index, copied to avoid relying on private API)
  97. Construct a single string containing indexing operations for the indices.
  98. For example, [1, 2, "foo"] -> [1][2]["foo"]
  99. """
  100. if not indices:
  101. return ""
  102. return "[%s]" % "][".join(repr(index) for index in indices)
  103. _ITEM_LIMIT = 16
  104. _STR_LIMIT = 64
  105. def _truncate_obj(obj):
  106. """Truncate objects for use in validation tracebacks
  107. Cell and output lists are squashed, as are long strings, lists, and dicts.
  108. """
  109. if isinstance(obj, dict):
  110. truncated = { k:_truncate_obj(v) for k,v in list(obj.items())[:_ITEM_LIMIT] }
  111. if isinstance(truncated.get('cells'), list):
  112. truncated['cells'] = ['...%i cells...' % len(obj['cells'])]
  113. if isinstance(truncated.get('outputs'), list):
  114. truncated['outputs'] = ['...%i outputs...' % len(obj['outputs'])]
  115. if len(obj) > _ITEM_LIMIT:
  116. truncated['...'] = '%i keys truncated' % (len(obj) - _ITEM_LIMIT)
  117. return truncated
  118. elif isinstance(obj, list):
  119. truncated = [ _truncate_obj(item) for item in obj[:_ITEM_LIMIT] ]
  120. if len(obj) > _ITEM_LIMIT:
  121. truncated.append('...%i items truncated...' % (len(obj) - _ITEM_LIMIT))
  122. return truncated
  123. elif isinstance(obj, str):
  124. truncated = obj[:_STR_LIMIT]
  125. if len(obj) > _STR_LIMIT:
  126. truncated += '...'
  127. return truncated
  128. else:
  129. return obj
  130. class NotebookValidationError(ValidationError):
  131. """Schema ValidationError with truncated representation
  132. to avoid massive verbose tracebacks.
  133. """
  134. def __init__(self, original, ref=None):
  135. self.original = original
  136. self.ref = getattr(self.original, 'ref', ref)
  137. self.message = self.original.message
  138. def __getattr__(self, key):
  139. return getattr(self.original, key)
  140. def __unicode__(self):
  141. """Custom str for validation errors
  142. avoids dumping full schema and notebook to logs
  143. """
  144. error = self.original
  145. instance = _truncate_obj(error.instance)
  146. return u'\n'.join([
  147. error.message,
  148. u'',
  149. u"Failed validating %r in %s%s:" % (
  150. error.validator,
  151. self.ref or 'notebook',
  152. _format_as_index(list(error.relative_schema_path)[:-1])),
  153. u'',
  154. u'On instance%s:' % _format_as_index(error.relative_path),
  155. pprint.pformat(instance, width=78),
  156. ])
  157. if sys.version_info >= (3,):
  158. __str__ = __unicode__
  159. def better_validation_error(error, version, version_minor):
  160. """Get better ValidationError on oneOf failures
  161. oneOf errors aren't informative.
  162. if it's a cell type or output_type error,
  163. try validating directly based on the type for a better error message
  164. """
  165. key = error.schema_path[-1]
  166. ref = None
  167. if key.endswith('Of'):
  168. if isinstance(error.instance, dict):
  169. if 'cell_type' in error.instance:
  170. ref = error.instance['cell_type'] + "_cell"
  171. elif 'output_type' in error.instance:
  172. ref = error.instance['output_type']
  173. if ref:
  174. try:
  175. validate(error.instance,
  176. ref,
  177. version=version,
  178. version_minor=version_minor,
  179. )
  180. except ValidationError as sub_error:
  181. # keep extending relative path
  182. error.relative_path.extend(sub_error.relative_path)
  183. sub_error.relative_path = error.relative_path
  184. better = better_validation_error(sub_error, version, version_minor)
  185. if better.ref is None:
  186. better.ref = ref
  187. return better
  188. except Exception:
  189. # if it fails for some reason,
  190. # let the original error through
  191. pass
  192. return NotebookValidationError(error, ref)
  193. def validate(nbjson, ref=None, version=None, version_minor=None, relax_add_props=False):
  194. """Checks whether the given notebook JSON conforms to the current
  195. notebook format schema.
  196. Raises ValidationError if not valid.
  197. """
  198. if version is None:
  199. from .reader import get_version
  200. (version, version_minor) = get_version(nbjson)
  201. validator = get_validator(version, version_minor, relax_add_props=relax_add_props)
  202. if validator is None:
  203. # no validator
  204. warnings.warn("No schema for validating v%s notebooks" % version, UserWarning)
  205. return
  206. try:
  207. if ref:
  208. return validator.validate(nbjson, {'$ref' : '#/definitions/%s' % ref})
  209. else:
  210. return validator.validate(nbjson)
  211. except ValidationError as e:
  212. raise better_validation_error(e, version, version_minor)