emitter.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140
  1. # Emitter expects events obeying the following grammar:
  2. # stream ::= STREAM-START document* STREAM-END
  3. # document ::= DOCUMENT-START node DOCUMENT-END
  4. # node ::= SCALAR | sequence | mapping
  5. # sequence ::= SEQUENCE-START node* SEQUENCE-END
  6. # mapping ::= MAPPING-START (node node)* MAPPING-END
  7. __all__ = ['Emitter', 'EmitterError']
  8. from error import YAMLError
  9. from events import *
  10. class EmitterError(YAMLError):
  11. pass
  12. class ScalarAnalysis(object):
  13. def __init__(self, scalar, empty, multiline,
  14. allow_flow_plain, allow_block_plain,
  15. allow_single_quoted, allow_double_quoted,
  16. allow_block):
  17. self.scalar = scalar
  18. self.empty = empty
  19. self.multiline = multiline
  20. self.allow_flow_plain = allow_flow_plain
  21. self.allow_block_plain = allow_block_plain
  22. self.allow_single_quoted = allow_single_quoted
  23. self.allow_double_quoted = allow_double_quoted
  24. self.allow_block = allow_block
  25. class Emitter(object):
  26. DEFAULT_TAG_PREFIXES = {
  27. u'!' : u'!',
  28. u'tag:yaml.org,2002:' : u'!!',
  29. }
  30. def __init__(self, stream, canonical=None, indent=None, width=None,
  31. allow_unicode=None, line_break=None):
  32. # The stream should have the methods `write` and possibly `flush`.
  33. self.stream = stream
  34. # Encoding can be overriden by STREAM-START.
  35. self.encoding = None
  36. # Emitter is a state machine with a stack of states to handle nested
  37. # structures.
  38. self.states = []
  39. self.state = self.expect_stream_start
  40. # Current event and the event queue.
  41. self.events = []
  42. self.event = None
  43. # The current indentation level and the stack of previous indents.
  44. self.indents = []
  45. self.indent = None
  46. # Flow level.
  47. self.flow_level = 0
  48. # Contexts.
  49. self.root_context = False
  50. self.sequence_context = False
  51. self.mapping_context = False
  52. self.simple_key_context = False
  53. # Characteristics of the last emitted character:
  54. # - current position.
  55. # - is it a whitespace?
  56. # - is it an indention character
  57. # (indentation space, '-', '?', or ':')?
  58. self.line = 0
  59. self.column = 0
  60. self.whitespace = True
  61. self.indention = True
  62. # Whether the document requires an explicit document indicator
  63. self.open_ended = False
  64. # Formatting details.
  65. self.canonical = canonical
  66. self.allow_unicode = allow_unicode
  67. self.best_indent = 2
  68. if indent and 1 < indent < 10:
  69. self.best_indent = indent
  70. self.best_width = 80
  71. if width and width > self.best_indent*2:
  72. self.best_width = width
  73. self.best_line_break = u'\n'
  74. if line_break in [u'\r', u'\n', u'\r\n']:
  75. self.best_line_break = line_break
  76. # Tag prefixes.
  77. self.tag_prefixes = None
  78. # Prepared anchor and tag.
  79. self.prepared_anchor = None
  80. self.prepared_tag = None
  81. # Scalar analysis and style.
  82. self.analysis = None
  83. self.style = None
  84. def dispose(self):
  85. # Reset the state attributes (to clear self-references)
  86. self.states = []
  87. self.state = None
  88. def emit(self, event):
  89. self.events.append(event)
  90. while not self.need_more_events():
  91. self.event = self.events.pop(0)
  92. self.state()
  93. self.event = None
  94. # In some cases, we wait for a few next events before emitting.
  95. def need_more_events(self):
  96. if not self.events:
  97. return True
  98. event = self.events[0]
  99. if isinstance(event, DocumentStartEvent):
  100. return self.need_events(1)
  101. elif isinstance(event, SequenceStartEvent):
  102. return self.need_events(2)
  103. elif isinstance(event, MappingStartEvent):
  104. return self.need_events(3)
  105. else:
  106. return False
  107. def need_events(self, count):
  108. level = 0
  109. for event in self.events[1:]:
  110. if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
  111. level += 1
  112. elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
  113. level -= 1
  114. elif isinstance(event, StreamEndEvent):
  115. level = -1
  116. if level < 0:
  117. return False
  118. return (len(self.events) < count+1)
  119. def increase_indent(self, flow=False, indentless=False):
  120. self.indents.append(self.indent)
  121. if self.indent is None:
  122. if flow:
  123. self.indent = self.best_indent
  124. else:
  125. self.indent = 0
  126. elif not indentless:
  127. self.indent += self.best_indent
  128. # States.
  129. # Stream handlers.
  130. def expect_stream_start(self):
  131. if isinstance(self.event, StreamStartEvent):
  132. if self.event.encoding and not getattr(self.stream, 'encoding', None):
  133. self.encoding = self.event.encoding
  134. self.write_stream_start()
  135. self.state = self.expect_first_document_start
  136. else:
  137. raise EmitterError("expected StreamStartEvent, but got %s"
  138. % self.event)
  139. def expect_nothing(self):
  140. raise EmitterError("expected nothing, but got %s" % self.event)
  141. # Document handlers.
  142. def expect_first_document_start(self):
  143. return self.expect_document_start(first=True)
  144. def expect_document_start(self, first=False):
  145. if isinstance(self.event, DocumentStartEvent):
  146. if (self.event.version or self.event.tags) and self.open_ended:
  147. self.write_indicator(u'...', True)
  148. self.write_indent()
  149. if self.event.version:
  150. version_text = self.prepare_version(self.event.version)
  151. self.write_version_directive(version_text)
  152. self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
  153. if self.event.tags:
  154. handles = self.event.tags.keys()
  155. handles.sort()
  156. for handle in handles:
  157. prefix = self.event.tags[handle]
  158. self.tag_prefixes[prefix] = handle
  159. handle_text = self.prepare_tag_handle(handle)
  160. prefix_text = self.prepare_tag_prefix(prefix)
  161. self.write_tag_directive(handle_text, prefix_text)
  162. implicit = (first and not self.event.explicit and not self.canonical
  163. and not self.event.version and not self.event.tags
  164. and not self.check_empty_document())
  165. if not implicit:
  166. self.write_indent()
  167. self.write_indicator(u'---', True)
  168. if self.canonical:
  169. self.write_indent()
  170. self.state = self.expect_document_root
  171. elif isinstance(self.event, StreamEndEvent):
  172. if self.open_ended:
  173. self.write_indicator(u'...', True)
  174. self.write_indent()
  175. self.write_stream_end()
  176. self.state = self.expect_nothing
  177. else:
  178. raise EmitterError("expected DocumentStartEvent, but got %s"
  179. % self.event)
  180. def expect_document_end(self):
  181. if isinstance(self.event, DocumentEndEvent):
  182. self.write_indent()
  183. if self.event.explicit:
  184. self.write_indicator(u'...', True)
  185. self.write_indent()
  186. self.flush_stream()
  187. self.state = self.expect_document_start
  188. else:
  189. raise EmitterError("expected DocumentEndEvent, but got %s"
  190. % self.event)
  191. def expect_document_root(self):
  192. self.states.append(self.expect_document_end)
  193. self.expect_node(root=True)
  194. # Node handlers.
  195. def expect_node(self, root=False, sequence=False, mapping=False,
  196. simple_key=False):
  197. self.root_context = root
  198. self.sequence_context = sequence
  199. self.mapping_context = mapping
  200. self.simple_key_context = simple_key
  201. if isinstance(self.event, AliasEvent):
  202. self.expect_alias()
  203. elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
  204. self.process_anchor(u'&')
  205. self.process_tag()
  206. if isinstance(self.event, ScalarEvent):
  207. self.expect_scalar()
  208. elif isinstance(self.event, SequenceStartEvent):
  209. if self.flow_level or self.canonical or self.event.flow_style \
  210. or self.check_empty_sequence():
  211. self.expect_flow_sequence()
  212. else:
  213. self.expect_block_sequence()
  214. elif isinstance(self.event, MappingStartEvent):
  215. if self.flow_level or self.canonical or self.event.flow_style \
  216. or self.check_empty_mapping():
  217. self.expect_flow_mapping()
  218. else:
  219. self.expect_block_mapping()
  220. else:
  221. raise EmitterError("expected NodeEvent, but got %s" % self.event)
  222. def expect_alias(self):
  223. if self.event.anchor is None:
  224. raise EmitterError("anchor is not specified for alias")
  225. self.process_anchor(u'*')
  226. self.state = self.states.pop()
  227. def expect_scalar(self):
  228. self.increase_indent(flow=True)
  229. self.process_scalar()
  230. self.indent = self.indents.pop()
  231. self.state = self.states.pop()
  232. # Flow sequence handlers.
  233. def expect_flow_sequence(self):
  234. self.write_indicator(u'[', True, whitespace=True)
  235. self.flow_level += 1
  236. self.increase_indent(flow=True)
  237. self.state = self.expect_first_flow_sequence_item
  238. def expect_first_flow_sequence_item(self):
  239. if isinstance(self.event, SequenceEndEvent):
  240. self.indent = self.indents.pop()
  241. self.flow_level -= 1
  242. self.write_indicator(u']', False)
  243. self.state = self.states.pop()
  244. else:
  245. if self.canonical or self.column > self.best_width:
  246. self.write_indent()
  247. self.states.append(self.expect_flow_sequence_item)
  248. self.expect_node(sequence=True)
  249. def expect_flow_sequence_item(self):
  250. if isinstance(self.event, SequenceEndEvent):
  251. self.indent = self.indents.pop()
  252. self.flow_level -= 1
  253. if self.canonical:
  254. self.write_indicator(u',', False)
  255. self.write_indent()
  256. self.write_indicator(u']', False)
  257. self.state = self.states.pop()
  258. else:
  259. self.write_indicator(u',', False)
  260. if self.canonical or self.column > self.best_width:
  261. self.write_indent()
  262. self.states.append(self.expect_flow_sequence_item)
  263. self.expect_node(sequence=True)
  264. # Flow mapping handlers.
  265. def expect_flow_mapping(self):
  266. self.write_indicator(u'{', True, whitespace=True)
  267. self.flow_level += 1
  268. self.increase_indent(flow=True)
  269. self.state = self.expect_first_flow_mapping_key
  270. def expect_first_flow_mapping_key(self):
  271. if isinstance(self.event, MappingEndEvent):
  272. self.indent = self.indents.pop()
  273. self.flow_level -= 1
  274. self.write_indicator(u'}', False)
  275. self.state = self.states.pop()
  276. else:
  277. if self.canonical or self.column > self.best_width:
  278. self.write_indent()
  279. if not self.canonical and self.check_simple_key():
  280. self.states.append(self.expect_flow_mapping_simple_value)
  281. self.expect_node(mapping=True, simple_key=True)
  282. else:
  283. self.write_indicator(u'?', True)
  284. self.states.append(self.expect_flow_mapping_value)
  285. self.expect_node(mapping=True)
  286. def expect_flow_mapping_key(self):
  287. if isinstance(self.event, MappingEndEvent):
  288. self.indent = self.indents.pop()
  289. self.flow_level -= 1
  290. if self.canonical:
  291. self.write_indicator(u',', False)
  292. self.write_indent()
  293. self.write_indicator(u'}', False)
  294. self.state = self.states.pop()
  295. else:
  296. self.write_indicator(u',', False)
  297. if self.canonical or self.column > self.best_width:
  298. self.write_indent()
  299. if not self.canonical and self.check_simple_key():
  300. self.states.append(self.expect_flow_mapping_simple_value)
  301. self.expect_node(mapping=True, simple_key=True)
  302. else:
  303. self.write_indicator(u'?', True)
  304. self.states.append(self.expect_flow_mapping_value)
  305. self.expect_node(mapping=True)
  306. def expect_flow_mapping_simple_value(self):
  307. self.write_indicator(u':', False)
  308. self.states.append(self.expect_flow_mapping_key)
  309. self.expect_node(mapping=True)
  310. def expect_flow_mapping_value(self):
  311. if self.canonical or self.column > self.best_width:
  312. self.write_indent()
  313. self.write_indicator(u':', True)
  314. self.states.append(self.expect_flow_mapping_key)
  315. self.expect_node(mapping=True)
  316. # Block sequence handlers.
  317. def expect_block_sequence(self):
  318. indentless = (self.mapping_context and not self.indention)
  319. self.increase_indent(flow=False, indentless=indentless)
  320. self.state = self.expect_first_block_sequence_item
  321. def expect_first_block_sequence_item(self):
  322. return self.expect_block_sequence_item(first=True)
  323. def expect_block_sequence_item(self, first=False):
  324. if not first and isinstance(self.event, SequenceEndEvent):
  325. self.indent = self.indents.pop()
  326. self.state = self.states.pop()
  327. else:
  328. self.write_indent()
  329. self.write_indicator(u'-', True, indention=True)
  330. self.states.append(self.expect_block_sequence_item)
  331. self.expect_node(sequence=True)
  332. # Block mapping handlers.
  333. def expect_block_mapping(self):
  334. self.increase_indent(flow=False)
  335. self.state = self.expect_first_block_mapping_key
  336. def expect_first_block_mapping_key(self):
  337. return self.expect_block_mapping_key(first=True)
  338. def expect_block_mapping_key(self, first=False):
  339. if not first and isinstance(self.event, MappingEndEvent):
  340. self.indent = self.indents.pop()
  341. self.state = self.states.pop()
  342. else:
  343. self.write_indent()
  344. if self.check_simple_key():
  345. self.states.append(self.expect_block_mapping_simple_value)
  346. self.expect_node(mapping=True, simple_key=True)
  347. else:
  348. self.write_indicator(u'?', True, indention=True)
  349. self.states.append(self.expect_block_mapping_value)
  350. self.expect_node(mapping=True)
  351. def expect_block_mapping_simple_value(self):
  352. self.write_indicator(u':', False)
  353. self.states.append(self.expect_block_mapping_key)
  354. self.expect_node(mapping=True)
  355. def expect_block_mapping_value(self):
  356. self.write_indent()
  357. self.write_indicator(u':', True, indention=True)
  358. self.states.append(self.expect_block_mapping_key)
  359. self.expect_node(mapping=True)
  360. # Checkers.
  361. def check_empty_sequence(self):
  362. return (isinstance(self.event, SequenceStartEvent) and self.events
  363. and isinstance(self.events[0], SequenceEndEvent))
  364. def check_empty_mapping(self):
  365. return (isinstance(self.event, MappingStartEvent) and self.events
  366. and isinstance(self.events[0], MappingEndEvent))
  367. def check_empty_document(self):
  368. if not isinstance(self.event, DocumentStartEvent) or not self.events:
  369. return False
  370. event = self.events[0]
  371. return (isinstance(event, ScalarEvent) and event.anchor is None
  372. and event.tag is None and event.implicit and event.value == u'')
  373. def check_simple_key(self):
  374. length = 0
  375. if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
  376. if self.prepared_anchor is None:
  377. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  378. length += len(self.prepared_anchor)
  379. if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
  380. and self.event.tag is not None:
  381. if self.prepared_tag is None:
  382. self.prepared_tag = self.prepare_tag(self.event.tag)
  383. length += len(self.prepared_tag)
  384. if isinstance(self.event, ScalarEvent):
  385. if self.analysis is None:
  386. self.analysis = self.analyze_scalar(self.event.value)
  387. length += len(self.analysis.scalar)
  388. return (length < 128 and (isinstance(self.event, AliasEvent)
  389. or (isinstance(self.event, ScalarEvent)
  390. and not self.analysis.empty and not self.analysis.multiline)
  391. or self.check_empty_sequence() or self.check_empty_mapping()))
  392. # Anchor, Tag, and Scalar processors.
  393. def process_anchor(self, indicator):
  394. if self.event.anchor is None:
  395. self.prepared_anchor = None
  396. return
  397. if self.prepared_anchor is None:
  398. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  399. if self.prepared_anchor:
  400. self.write_indicator(indicator+self.prepared_anchor, True)
  401. self.prepared_anchor = None
  402. def process_tag(self):
  403. tag = self.event.tag
  404. if isinstance(self.event, ScalarEvent):
  405. if self.style is None:
  406. self.style = self.choose_scalar_style()
  407. if ((not self.canonical or tag is None) and
  408. ((self.style == '' and self.event.implicit[0])
  409. or (self.style != '' and self.event.implicit[1]))):
  410. self.prepared_tag = None
  411. return
  412. if self.event.implicit[0] and tag is None:
  413. tag = u'!'
  414. self.prepared_tag = None
  415. else:
  416. if (not self.canonical or tag is None) and self.event.implicit:
  417. self.prepared_tag = None
  418. return
  419. if tag is None:
  420. raise EmitterError("tag is not specified")
  421. if self.prepared_tag is None:
  422. self.prepared_tag = self.prepare_tag(tag)
  423. if self.prepared_tag:
  424. self.write_indicator(self.prepared_tag, True)
  425. self.prepared_tag = None
  426. def choose_scalar_style(self):
  427. if self.analysis is None:
  428. self.analysis = self.analyze_scalar(self.event.value)
  429. if self.event.style == '"' or self.canonical:
  430. return '"'
  431. if not self.event.style and self.event.implicit[0]:
  432. if (not (self.simple_key_context and
  433. (self.analysis.empty or self.analysis.multiline))
  434. and (self.flow_level and self.analysis.allow_flow_plain
  435. or (not self.flow_level and self.analysis.allow_block_plain))):
  436. return ''
  437. if self.event.style and self.event.style in '|>':
  438. if (not self.flow_level and not self.simple_key_context
  439. and self.analysis.allow_block):
  440. return self.event.style
  441. if not self.event.style or self.event.style == '\'':
  442. if (self.analysis.allow_single_quoted and
  443. not (self.simple_key_context and self.analysis.multiline)):
  444. return '\''
  445. return '"'
  446. def process_scalar(self):
  447. if self.analysis is None:
  448. self.analysis = self.analyze_scalar(self.event.value)
  449. if self.style is None:
  450. self.style = self.choose_scalar_style()
  451. split = (not self.simple_key_context)
  452. #if self.analysis.multiline and split \
  453. # and (not self.style or self.style in '\'\"'):
  454. # self.write_indent()
  455. if self.style == '"':
  456. self.write_double_quoted(self.analysis.scalar, split)
  457. elif self.style == '\'':
  458. self.write_single_quoted(self.analysis.scalar, split)
  459. elif self.style == '>':
  460. self.write_folded(self.analysis.scalar)
  461. elif self.style == '|':
  462. self.write_literal(self.analysis.scalar)
  463. else:
  464. self.write_plain(self.analysis.scalar, split)
  465. self.analysis = None
  466. self.style = None
  467. # Analyzers.
  468. def prepare_version(self, version):
  469. major, minor = version
  470. if major != 1:
  471. raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
  472. return u'%d.%d' % (major, minor)
  473. def prepare_tag_handle(self, handle):
  474. if not handle:
  475. raise EmitterError("tag handle must not be empty")
  476. if handle[0] != u'!' or handle[-1] != u'!':
  477. raise EmitterError("tag handle must start and end with '!': %r"
  478. % (handle.encode('utf-8')))
  479. for ch in handle[1:-1]:
  480. if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
  481. or ch in u'-_'):
  482. raise EmitterError("invalid character %r in the tag handle: %r"
  483. % (ch.encode('utf-8'), handle.encode('utf-8')))
  484. return handle
  485. def prepare_tag_prefix(self, prefix):
  486. if not prefix:
  487. raise EmitterError("tag prefix must not be empty")
  488. chunks = []
  489. start = end = 0
  490. if prefix[0] == u'!':
  491. end = 1
  492. while end < len(prefix):
  493. ch = prefix[end]
  494. if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
  495. or ch in u'-;/?!:@&=+$,_.~*\'()[]':
  496. end += 1
  497. else:
  498. if start < end:
  499. chunks.append(prefix[start:end])
  500. start = end = end+1
  501. data = ch.encode('utf-8')
  502. for ch in data:
  503. chunks.append(u'%%%02X' % ord(ch))
  504. if start < end:
  505. chunks.append(prefix[start:end])
  506. return u''.join(chunks)
  507. def prepare_tag(self, tag):
  508. if not tag:
  509. raise EmitterError("tag must not be empty")
  510. if tag == u'!':
  511. return tag
  512. handle = None
  513. suffix = tag
  514. prefixes = self.tag_prefixes.keys()
  515. prefixes.sort()
  516. for prefix in prefixes:
  517. if tag.startswith(prefix) \
  518. and (prefix == u'!' or len(prefix) < len(tag)):
  519. handle = self.tag_prefixes[prefix]
  520. suffix = tag[len(prefix):]
  521. chunks = []
  522. start = end = 0
  523. while end < len(suffix):
  524. ch = suffix[end]
  525. if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
  526. or ch in u'-;/?:@&=+$,_.~*\'()[]' \
  527. or (ch == u'!' and handle != u'!'):
  528. end += 1
  529. else:
  530. if start < end:
  531. chunks.append(suffix[start:end])
  532. start = end = end+1
  533. data = ch.encode('utf-8')
  534. for ch in data:
  535. chunks.append(u'%%%02X' % ord(ch))
  536. if start < end:
  537. chunks.append(suffix[start:end])
  538. suffix_text = u''.join(chunks)
  539. if handle:
  540. return u'%s%s' % (handle, suffix_text)
  541. else:
  542. return u'!<%s>' % suffix_text
  543. def prepare_anchor(self, anchor):
  544. if not anchor:
  545. raise EmitterError("anchor must not be empty")
  546. for ch in anchor:
  547. if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
  548. or ch in u'-_'):
  549. raise EmitterError("invalid character %r in the anchor: %r"
  550. % (ch.encode('utf-8'), anchor.encode('utf-8')))
  551. return anchor
  552. def analyze_scalar(self, scalar):
  553. # Empty scalar is a special case.
  554. if not scalar:
  555. return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
  556. allow_flow_plain=False, allow_block_plain=True,
  557. allow_single_quoted=True, allow_double_quoted=True,
  558. allow_block=False)
  559. # Indicators and special characters.
  560. block_indicators = False
  561. flow_indicators = False
  562. line_breaks = False
  563. special_characters = False
  564. # Important whitespace combinations.
  565. leading_space = False
  566. leading_break = False
  567. trailing_space = False
  568. trailing_break = False
  569. break_space = False
  570. space_break = False
  571. # Check document indicators.
  572. if scalar.startswith(u'---') or scalar.startswith(u'...'):
  573. block_indicators = True
  574. flow_indicators = True
  575. # First character or preceded by a whitespace.
  576. preceeded_by_whitespace = True
  577. # Last character or followed by a whitespace.
  578. followed_by_whitespace = (len(scalar) == 1 or
  579. scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
  580. # The previous character is a space.
  581. previous_space = False
  582. # The previous character is a break.
  583. previous_break = False
  584. index = 0
  585. while index < len(scalar):
  586. ch = scalar[index]
  587. # Check for indicators.
  588. if index == 0:
  589. # Leading indicators are special characters.
  590. if ch in u'#,[]{}&*!|>\'\"%@`':
  591. flow_indicators = True
  592. block_indicators = True
  593. if ch in u'?:':
  594. flow_indicators = True
  595. if followed_by_whitespace:
  596. block_indicators = True
  597. if ch == u'-' and followed_by_whitespace:
  598. flow_indicators = True
  599. block_indicators = True
  600. else:
  601. # Some indicators cannot appear within a scalar as well.
  602. if ch in u',?[]{}':
  603. flow_indicators = True
  604. if ch == u':':
  605. flow_indicators = True
  606. if followed_by_whitespace:
  607. block_indicators = True
  608. if ch == u'#' and preceeded_by_whitespace:
  609. flow_indicators = True
  610. block_indicators = True
  611. # Check for line breaks, special, and unicode characters.
  612. if ch in u'\n\x85\u2028\u2029':
  613. line_breaks = True
  614. if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
  615. if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
  616. or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
  617. unicode_characters = True
  618. if not self.allow_unicode:
  619. special_characters = True
  620. else:
  621. special_characters = True
  622. # Detect important whitespace combinations.
  623. if ch == u' ':
  624. if index == 0:
  625. leading_space = True
  626. if index == len(scalar)-1:
  627. trailing_space = True
  628. if previous_break:
  629. break_space = True
  630. previous_space = True
  631. previous_break = False
  632. elif ch in u'\n\x85\u2028\u2029':
  633. if index == 0:
  634. leading_break = True
  635. if index == len(scalar)-1:
  636. trailing_break = True
  637. if previous_space:
  638. space_break = True
  639. previous_space = False
  640. previous_break = True
  641. else:
  642. previous_space = False
  643. previous_break = False
  644. # Prepare for the next character.
  645. index += 1
  646. preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
  647. followed_by_whitespace = (index+1 >= len(scalar) or
  648. scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
  649. # Let's decide what styles are allowed.
  650. allow_flow_plain = True
  651. allow_block_plain = True
  652. allow_single_quoted = True
  653. allow_double_quoted = True
  654. allow_block = True
  655. # Leading and trailing whitespaces are bad for plain scalars.
  656. if (leading_space or leading_break
  657. or trailing_space or trailing_break):
  658. allow_flow_plain = allow_block_plain = False
  659. # We do not permit trailing spaces for block scalars.
  660. if trailing_space:
  661. allow_block = False
  662. # Spaces at the beginning of a new line are only acceptable for block
  663. # scalars.
  664. if break_space:
  665. allow_flow_plain = allow_block_plain = allow_single_quoted = False
  666. # Spaces followed by breaks, as well as special character are only
  667. # allowed for double quoted scalars.
  668. if space_break or special_characters:
  669. allow_flow_plain = allow_block_plain = \
  670. allow_single_quoted = allow_block = False
  671. # Although the plain scalar writer supports breaks, we never emit
  672. # multiline plain scalars.
  673. if line_breaks:
  674. allow_flow_plain = allow_block_plain = False
  675. # Flow indicators are forbidden for flow plain scalars.
  676. if flow_indicators:
  677. allow_flow_plain = False
  678. # Block indicators are forbidden for block plain scalars.
  679. if block_indicators:
  680. allow_block_plain = False
  681. return ScalarAnalysis(scalar=scalar,
  682. empty=False, multiline=line_breaks,
  683. allow_flow_plain=allow_flow_plain,
  684. allow_block_plain=allow_block_plain,
  685. allow_single_quoted=allow_single_quoted,
  686. allow_double_quoted=allow_double_quoted,
  687. allow_block=allow_block)
  688. # Writers.
  689. def flush_stream(self):
  690. if hasattr(self.stream, 'flush'):
  691. self.stream.flush()
  692. def write_stream_start(self):
  693. # Write BOM if needed.
  694. if self.encoding and self.encoding.startswith('utf-16'):
  695. self.stream.write(u'\uFEFF'.encode(self.encoding))
  696. def write_stream_end(self):
  697. self.flush_stream()
  698. def write_indicator(self, indicator, need_whitespace,
  699. whitespace=False, indention=False):
  700. if self.whitespace or not need_whitespace:
  701. data = indicator
  702. else:
  703. data = u' '+indicator
  704. self.whitespace = whitespace
  705. self.indention = self.indention and indention
  706. self.column += len(data)
  707. self.open_ended = False
  708. if self.encoding:
  709. data = data.encode(self.encoding)
  710. self.stream.write(data)
  711. def write_indent(self):
  712. indent = self.indent or 0
  713. if not self.indention or self.column > indent \
  714. or (self.column == indent and not self.whitespace):
  715. self.write_line_break()
  716. if self.column < indent:
  717. self.whitespace = True
  718. data = u' '*(indent-self.column)
  719. self.column = indent
  720. if self.encoding:
  721. data = data.encode(self.encoding)
  722. self.stream.write(data)
  723. def write_line_break(self, data=None):
  724. if data is None:
  725. data = self.best_line_break
  726. self.whitespace = True
  727. self.indention = True
  728. self.line += 1
  729. self.column = 0
  730. if self.encoding:
  731. data = data.encode(self.encoding)
  732. self.stream.write(data)
  733. def write_version_directive(self, version_text):
  734. data = u'%%YAML %s' % version_text
  735. if self.encoding:
  736. data = data.encode(self.encoding)
  737. self.stream.write(data)
  738. self.write_line_break()
  739. def write_tag_directive(self, handle_text, prefix_text):
  740. data = u'%%TAG %s %s' % (handle_text, prefix_text)
  741. if self.encoding:
  742. data = data.encode(self.encoding)
  743. self.stream.write(data)
  744. self.write_line_break()
  745. # Scalar streams.
  746. def write_single_quoted(self, text, split=True):
  747. self.write_indicator(u'\'', True)
  748. spaces = False
  749. breaks = False
  750. start = end = 0
  751. while end <= len(text):
  752. ch = None
  753. if end < len(text):
  754. ch = text[end]
  755. if spaces:
  756. if ch is None or ch != u' ':
  757. if start+1 == end and self.column > self.best_width and split \
  758. and start != 0 and end != len(text):
  759. self.write_indent()
  760. else:
  761. data = text[start:end]
  762. self.column += len(data)
  763. if self.encoding:
  764. data = data.encode(self.encoding)
  765. self.stream.write(data)
  766. start = end
  767. elif breaks:
  768. if ch is None or ch not in u'\n\x85\u2028\u2029':
  769. if text[start] == u'\n':
  770. self.write_line_break()
  771. for br in text[start:end]:
  772. if br == u'\n':
  773. self.write_line_break()
  774. else:
  775. self.write_line_break(br)
  776. self.write_indent()
  777. start = end
  778. else:
  779. if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
  780. if start < end:
  781. data = text[start:end]
  782. self.column += len(data)
  783. if self.encoding:
  784. data = data.encode(self.encoding)
  785. self.stream.write(data)
  786. start = end
  787. if ch == u'\'':
  788. data = u'\'\''
  789. self.column += 2
  790. if self.encoding:
  791. data = data.encode(self.encoding)
  792. self.stream.write(data)
  793. start = end + 1
  794. if ch is not None:
  795. spaces = (ch == u' ')
  796. breaks = (ch in u'\n\x85\u2028\u2029')
  797. end += 1
  798. self.write_indicator(u'\'', False)
  799. ESCAPE_REPLACEMENTS = {
  800. u'\0': u'0',
  801. u'\x07': u'a',
  802. u'\x08': u'b',
  803. u'\x09': u't',
  804. u'\x0A': u'n',
  805. u'\x0B': u'v',
  806. u'\x0C': u'f',
  807. u'\x0D': u'r',
  808. u'\x1B': u'e',
  809. u'\"': u'\"',
  810. u'\\': u'\\',
  811. u'\x85': u'N',
  812. u'\xA0': u'_',
  813. u'\u2028': u'L',
  814. u'\u2029': u'P',
  815. }
  816. def write_double_quoted(self, text, split=True):
  817. self.write_indicator(u'"', True)
  818. start = end = 0
  819. while end <= len(text):
  820. ch = None
  821. if end < len(text):
  822. ch = text[end]
  823. if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
  824. or not (u'\x20' <= ch <= u'\x7E'
  825. or (self.allow_unicode
  826. and (u'\xA0' <= ch <= u'\uD7FF'
  827. or u'\uE000' <= ch <= u'\uFFFD'))):
  828. if start < end:
  829. data = text[start:end]
  830. self.column += len(data)
  831. if self.encoding:
  832. data = data.encode(self.encoding)
  833. self.stream.write(data)
  834. start = end
  835. if ch is not None:
  836. if ch in self.ESCAPE_REPLACEMENTS:
  837. data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
  838. elif ch <= u'\xFF':
  839. data = u'\\x%02X' % ord(ch)
  840. elif ch <= u'\uFFFF':
  841. data = u'\\u%04X' % ord(ch)
  842. else:
  843. data = u'\\U%08X' % ord(ch)
  844. self.column += len(data)
  845. if self.encoding:
  846. data = data.encode(self.encoding)
  847. self.stream.write(data)
  848. start = end+1
  849. if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
  850. and self.column+(end-start) > self.best_width and split:
  851. data = text[start:end]+u'\\'
  852. if start < end:
  853. start = end
  854. self.column += len(data)
  855. if self.encoding:
  856. data = data.encode(self.encoding)
  857. self.stream.write(data)
  858. self.write_indent()
  859. self.whitespace = False
  860. self.indention = False
  861. if text[start] == u' ':
  862. data = u'\\'
  863. self.column += len(data)
  864. if self.encoding:
  865. data = data.encode(self.encoding)
  866. self.stream.write(data)
  867. end += 1
  868. self.write_indicator(u'"', False)
  869. def determine_block_hints(self, text):
  870. hints = u''
  871. if text:
  872. if text[0] in u' \n\x85\u2028\u2029':
  873. hints += unicode(self.best_indent)
  874. if text[-1] not in u'\n\x85\u2028\u2029':
  875. hints += u'-'
  876. elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
  877. hints += u'+'
  878. return hints
  879. def write_folded(self, text):
  880. hints = self.determine_block_hints(text)
  881. self.write_indicator(u'>'+hints, True)
  882. if hints[-1:] == u'+':
  883. self.open_ended = True
  884. self.write_line_break()
  885. leading_space = True
  886. spaces = False
  887. breaks = True
  888. start = end = 0
  889. while end <= len(text):
  890. ch = None
  891. if end < len(text):
  892. ch = text[end]
  893. if breaks:
  894. if ch is None or ch not in u'\n\x85\u2028\u2029':
  895. if not leading_space and ch is not None and ch != u' ' \
  896. and text[start] == u'\n':
  897. self.write_line_break()
  898. leading_space = (ch == u' ')
  899. for br in text[start:end]:
  900. if br == u'\n':
  901. self.write_line_break()
  902. else:
  903. self.write_line_break(br)
  904. if ch is not None:
  905. self.write_indent()
  906. start = end
  907. elif spaces:
  908. if ch != u' ':
  909. if start+1 == end and self.column > self.best_width:
  910. self.write_indent()
  911. else:
  912. data = text[start:end]
  913. self.column += len(data)
  914. if self.encoding:
  915. data = data.encode(self.encoding)
  916. self.stream.write(data)
  917. start = end
  918. else:
  919. if ch is None or ch in u' \n\x85\u2028\u2029':
  920. data = text[start:end]
  921. self.column += len(data)
  922. if self.encoding:
  923. data = data.encode(self.encoding)
  924. self.stream.write(data)
  925. if ch is None:
  926. self.write_line_break()
  927. start = end
  928. if ch is not None:
  929. breaks = (ch in u'\n\x85\u2028\u2029')
  930. spaces = (ch == u' ')
  931. end += 1
  932. def write_literal(self, text):
  933. hints = self.determine_block_hints(text)
  934. self.write_indicator(u'|'+hints, True)
  935. if hints[-1:] == u'+':
  936. self.open_ended = True
  937. self.write_line_break()
  938. breaks = True
  939. start = end = 0
  940. while end <= len(text):
  941. ch = None
  942. if end < len(text):
  943. ch = text[end]
  944. if breaks:
  945. if ch is None or ch not in u'\n\x85\u2028\u2029':
  946. for br in text[start:end]:
  947. if br == u'\n':
  948. self.write_line_break()
  949. else:
  950. self.write_line_break(br)
  951. if ch is not None:
  952. self.write_indent()
  953. start = end
  954. else:
  955. if ch is None or ch in u'\n\x85\u2028\u2029':
  956. data = text[start:end]
  957. if self.encoding:
  958. data = data.encode(self.encoding)
  959. self.stream.write(data)
  960. if ch is None:
  961. self.write_line_break()
  962. start = end
  963. if ch is not None:
  964. breaks = (ch in u'\n\x85\u2028\u2029')
  965. end += 1
  966. def write_plain(self, text, split=True):
  967. if self.root_context:
  968. self.open_ended = True
  969. if not text:
  970. return
  971. if not self.whitespace:
  972. data = u' '
  973. self.column += len(data)
  974. if self.encoding:
  975. data = data.encode(self.encoding)
  976. self.stream.write(data)
  977. self.whitespace = False
  978. self.indention = False
  979. spaces = False
  980. breaks = False
  981. start = end = 0
  982. while end <= len(text):
  983. ch = None
  984. if end < len(text):
  985. ch = text[end]
  986. if spaces:
  987. if ch != u' ':
  988. if start+1 == end and self.column > self.best_width and split:
  989. self.write_indent()
  990. self.whitespace = False
  991. self.indention = False
  992. else:
  993. data = text[start:end]
  994. self.column += len(data)
  995. if self.encoding:
  996. data = data.encode(self.encoding)
  997. self.stream.write(data)
  998. start = end
  999. elif breaks:
  1000. if ch not in u'\n\x85\u2028\u2029':
  1001. if text[start] == u'\n':
  1002. self.write_line_break()
  1003. for br in text[start:end]:
  1004. if br == u'\n':
  1005. self.write_line_break()
  1006. else:
  1007. self.write_line_break(br)
  1008. self.write_indent()
  1009. self.whitespace = False
  1010. self.indention = False
  1011. start = end
  1012. else:
  1013. if ch is None or ch in u' \n\x85\u2028\u2029':
  1014. data = text[start:end]
  1015. self.column += len(data)
  1016. if self.encoding:
  1017. data = data.encode(self.encoding)
  1018. self.stream.write(data)
  1019. start = end
  1020. if ch is not None:
  1021. spaces = (ch == u' ')
  1022. breaks = (ch in u'\n\x85\u2028\u2029')
  1023. end += 1