rules.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. const {
  2. noopTest,
  3. edit,
  4. merge
  5. } = require('./helpers.js');
  6. /**
  7. * Block-Level Grammar
  8. */
  9. const block = {
  10. newline: /^(?: *(?:\n|$))+/,
  11. code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
  12. fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
  13. hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
  14. heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
  15. blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
  16. list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
  17. html: '^ {0,3}(?:' // optional indentation
  18. + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
  19. + '|comment[^\\n]*(\\n+|$)' // (2)
  20. + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
  21. + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
  22. + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
  23. + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
  24. + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
  25. + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
  26. + ')',
  27. def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
  28. nptable: noopTest,
  29. table: noopTest,
  30. lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/,
  31. // regex template, placeholders will be replaced according to different paragraph
  32. // interruption rules of commonmark and the original markdown spec:
  33. _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html| +\n)[^\n]+)*)/,
  34. text: /^[^\n]+/
  35. };
  36. block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
  37. block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
  38. block.def = edit(block.def)
  39. .replace('label', block._label)
  40. .replace('title', block._title)
  41. .getRegex();
  42. block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
  43. block.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/;
  44. block.item = edit(block.item, 'gm')
  45. .replace(/bull/g, block.bullet)
  46. .getRegex();
  47. block.listItemStart = edit(/^( *)(bull) */)
  48. .replace('bull', block.bullet)
  49. .getRegex();
  50. block.list = edit(block.list)
  51. .replace(/bull/g, block.bullet)
  52. .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))')
  53. .replace('def', '\\n+(?=' + block.def.source + ')')
  54. .getRegex();
  55. block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
  56. + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
  57. + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
  58. + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
  59. + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
  60. + '|track|ul';
  61. block._comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/;
  62. block.html = edit(block.html, 'i')
  63. .replace('comment', block._comment)
  64. .replace('tag', block._tag)
  65. .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
  66. .getRegex();
  67. block.paragraph = edit(block._paragraph)
  68. .replace('hr', block.hr)
  69. .replace('heading', ' {0,3}#{1,6} ')
  70. .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs
  71. .replace('blockquote', ' {0,3}>')
  72. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  73. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  74. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  75. .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
  76. .getRegex();
  77. block.blockquote = edit(block.blockquote)
  78. .replace('paragraph', block.paragraph)
  79. .getRegex();
  80. /**
  81. * Normal Block Grammar
  82. */
  83. block.normal = merge({}, block);
  84. /**
  85. * GFM Block Grammar
  86. */
  87. block.gfm = merge({}, block.normal, {
  88. nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header
  89. + ' {0,3}([-:]+ *\\|[-| :]*)' // Align
  90. + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells
  91. table: '^ *\\|(.+)\\n' // Header
  92. + ' {0,3}\\|?( *[-:]+[-| :]*)' // Align
  93. + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
  94. });
  95. block.gfm.nptable = edit(block.gfm.nptable)
  96. .replace('hr', block.hr)
  97. .replace('heading', ' {0,3}#{1,6} ')
  98. .replace('blockquote', ' {0,3}>')
  99. .replace('code', ' {4}[^\\n]')
  100. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  101. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  102. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  103. .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
  104. .getRegex();
  105. block.gfm.table = edit(block.gfm.table)
  106. .replace('hr', block.hr)
  107. .replace('heading', ' {0,3}#{1,6} ')
  108. .replace('blockquote', ' {0,3}>')
  109. .replace('code', ' {4}[^\\n]')
  110. .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
  111. .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
  112. .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')
  113. .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks
  114. .getRegex();
  115. /**
  116. * Pedantic grammar (original John Gruber's loose markdown specification)
  117. */
  118. block.pedantic = merge({}, block.normal, {
  119. html: edit(
  120. '^ *(?:comment *(?:\\n|\\s*$)'
  121. + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
  122. + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
  123. .replace('comment', block._comment)
  124. .replace(/tag/g, '(?!(?:'
  125. + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
  126. + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
  127. + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
  128. .getRegex(),
  129. def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
  130. heading: /^(#{1,6})(.*)(?:\n+|$)/,
  131. fences: noopTest, // fences not supported
  132. paragraph: edit(block.normal._paragraph)
  133. .replace('hr', block.hr)
  134. .replace('heading', ' *#{1,6} *[^\n]')
  135. .replace('lheading', block.lheading)
  136. .replace('blockquote', ' {0,3}>')
  137. .replace('|fences', '')
  138. .replace('|list', '')
  139. .replace('|html', '')
  140. .getRegex()
  141. });
  142. /**
  143. * Inline-Level Grammar
  144. */
  145. const inline = {
  146. escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
  147. autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
  148. url: noopTest,
  149. tag: '^comment'
  150. + '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
  151. + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
  152. + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
  153. + '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
  154. + '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
  155. link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
  156. reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
  157. nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
  158. reflinkSearch: 'reflink|nolink(?!\\()',
  159. emStrong: {
  160. lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
  161. // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
  162. // () Skip other delimiter (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
  163. rDelimAst: /\_\_[^_*]*?\*[^_*]*?\_\_|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
  164. rDelimUnd: /\*\*[^_*]*?\_[^_*]*?\*\*|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
  165. },
  166. code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
  167. br: /^( {2,}|\\)\n(?!\s*$)/,
  168. del: noopTest,
  169. text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
  170. punctuation: /^([\spunctuation])/
  171. };
  172. // list of punctuation marks from CommonMark spec
  173. // without * and _ to handle the different emphasis markers * and _
  174. inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
  175. inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
  176. // sequences em should skip over [title](link), `code`, <html>
  177. inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g;
  178. inline.escapedEmSt = /\\\*|\\_/g;
  179. inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
  180. inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
  181. .replace(/punct/g, inline._punctuation)
  182. .getRegex();
  183. inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
  184. .replace(/punct/g, inline._punctuation)
  185. .getRegex();
  186. inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
  187. .replace(/punct/g, inline._punctuation)
  188. .getRegex();
  189. inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
  190. inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
  191. inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
  192. inline.autolink = edit(inline.autolink)
  193. .replace('scheme', inline._scheme)
  194. .replace('email', inline._email)
  195. .getRegex();
  196. inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;
  197. inline.tag = edit(inline.tag)
  198. .replace('comment', inline._comment)
  199. .replace('attribute', inline._attribute)
  200. .getRegex();
  201. inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/;
  202. inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/;
  203. inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;
  204. inline.link = edit(inline.link)
  205. .replace('label', inline._label)
  206. .replace('href', inline._href)
  207. .replace('title', inline._title)
  208. .getRegex();
  209. inline.reflink = edit(inline.reflink)
  210. .replace('label', inline._label)
  211. .getRegex();
  212. inline.reflinkSearch = edit(inline.reflinkSearch, 'g')
  213. .replace('reflink', inline.reflink)
  214. .replace('nolink', inline.nolink)
  215. .getRegex();
  216. /**
  217. * Normal Inline Grammar
  218. */
  219. inline.normal = merge({}, inline);
  220. /**
  221. * Pedantic Inline Grammar
  222. */
  223. inline.pedantic = merge({}, inline.normal, {
  224. strong: {
  225. start: /^__|\*\*/,
  226. middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
  227. endAst: /\*\*(?!\*)/g,
  228. endUnd: /__(?!_)/g
  229. },
  230. em: {
  231. start: /^_|\*/,
  232. middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
  233. endAst: /\*(?!\*)/g,
  234. endUnd: /_(?!_)/g
  235. },
  236. link: edit(/^!?\[(label)\]\((.*?)\)/)
  237. .replace('label', inline._label)
  238. .getRegex(),
  239. reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
  240. .replace('label', inline._label)
  241. .getRegex()
  242. });
  243. /**
  244. * GFM Inline Grammar
  245. */
  246. inline.gfm = merge({}, inline.normal, {
  247. escape: edit(inline.escape).replace('])', '~|])').getRegex(),
  248. _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/,
  249. url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/,
  250. _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
  251. del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/,
  252. text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/
  253. });
  254. inline.gfm.url = edit(inline.gfm.url, 'i')
  255. .replace('email', inline.gfm._extended_email)
  256. .getRegex();
  257. /**
  258. * GFM + Line Breaks Inline Grammar
  259. */
  260. inline.breaks = merge({}, inline.gfm, {
  261. br: edit(inline.br).replace('{2,}', '*').getRegex(),
  262. text: edit(inline.gfm.text)
  263. .replace('\\b_', '\\b_| {2,}\\n')
  264. .replace(/\{2,\}/g, '*')
  265. .getRegex()
  266. });
  267. module.exports = {
  268. block,
  269. inline
  270. };