Parser.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. exports.Parser = void 0;
  7. var Tokenizer_1 = __importDefault(require("./Tokenizer"));
  8. var formTags = new Set([
  9. "input",
  10. "option",
  11. "optgroup",
  12. "select",
  13. "button",
  14. "datalist",
  15. "textarea",
  16. ]);
  17. var pTag = new Set(["p"]);
  18. var tableSectionTags = new Set(["thead", "tbody"]);
  19. var ddtTags = new Set(["dd", "dt"]);
  20. var rtpTags = new Set(["rt", "rp"]);
  21. var openImpliesClose = new Map([
  22. ["tr", new Set(["tr", "th", "td"])],
  23. ["th", new Set(["th"])],
  24. ["td", new Set(["thead", "th", "td"])],
  25. ["body", new Set(["head", "link", "script"])],
  26. ["li", new Set(["li"])],
  27. ["p", pTag],
  28. ["h1", pTag],
  29. ["h2", pTag],
  30. ["h3", pTag],
  31. ["h4", pTag],
  32. ["h5", pTag],
  33. ["h6", pTag],
  34. ["select", formTags],
  35. ["input", formTags],
  36. ["output", formTags],
  37. ["button", formTags],
  38. ["datalist", formTags],
  39. ["textarea", formTags],
  40. ["option", new Set(["option"])],
  41. ["optgroup", new Set(["optgroup", "option"])],
  42. ["dd", ddtTags],
  43. ["dt", ddtTags],
  44. ["address", pTag],
  45. ["article", pTag],
  46. ["aside", pTag],
  47. ["blockquote", pTag],
  48. ["details", pTag],
  49. ["div", pTag],
  50. ["dl", pTag],
  51. ["fieldset", pTag],
  52. ["figcaption", pTag],
  53. ["figure", pTag],
  54. ["footer", pTag],
  55. ["form", pTag],
  56. ["header", pTag],
  57. ["hr", pTag],
  58. ["main", pTag],
  59. ["nav", pTag],
  60. ["ol", pTag],
  61. ["pre", pTag],
  62. ["section", pTag],
  63. ["table", pTag],
  64. ["ul", pTag],
  65. ["rt", rtpTags],
  66. ["rp", rtpTags],
  67. ["tbody", tableSectionTags],
  68. ["tfoot", tableSectionTags],
  69. ]);
  70. var voidElements = new Set([
  71. "area",
  72. "base",
  73. "basefont",
  74. "br",
  75. "col",
  76. "command",
  77. "embed",
  78. "frame",
  79. "hr",
  80. "img",
  81. "input",
  82. "isindex",
  83. "keygen",
  84. "link",
  85. "meta",
  86. "param",
  87. "source",
  88. "track",
  89. "wbr",
  90. ]);
  91. var foreignContextElements = new Set(["math", "svg"]);
  92. var htmlIntegrationElements = new Set([
  93. "mi",
  94. "mo",
  95. "mn",
  96. "ms",
  97. "mtext",
  98. "annotation-xml",
  99. "foreignobject",
  100. "desc",
  101. "title",
  102. ]);
  103. var reNameEnd = /\s|\//;
  104. var Parser = /** @class */ (function () {
  105. function Parser(cbs, options) {
  106. if (options === void 0) { options = {}; }
  107. var _a, _b, _c, _d, _e;
  108. this.options = options;
  109. /** The start index of the last event. */
  110. this.startIndex = 0;
  111. /** The end index of the last event. */
  112. this.endIndex = 0;
  113. /**
  114. * Store the start index of the current open tag,
  115. * so we can update the start index for attributes.
  116. */
  117. this.openTagStart = 0;
  118. this.tagname = "";
  119. this.attribname = "";
  120. this.attribvalue = "";
  121. this.attribs = null;
  122. this.stack = [];
  123. this.foreignContext = [];
  124. this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
  125. this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode;
  126. this.lowerCaseAttributeNames =
  127. (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode;
  128. this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer_1.default)(this.options, this);
  129. (_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this);
  130. }
  131. // Tokenizer event handlers
  132. /** @internal */
  133. Parser.prototype.ontext = function (data) {
  134. var _a, _b;
  135. var idx = this.tokenizer.getAbsoluteIndex();
  136. this.endIndex = idx - 1;
  137. (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
  138. this.startIndex = idx;
  139. };
  140. Parser.prototype.isVoidElement = function (name) {
  141. return !this.options.xmlMode && voidElements.has(name);
  142. };
  143. /** @internal */
  144. Parser.prototype.onopentagname = function (name) {
  145. this.endIndex = this.tokenizer.getAbsoluteIndex();
  146. if (this.lowerCaseTagNames) {
  147. name = name.toLowerCase();
  148. }
  149. this.emitOpenTag(name);
  150. };
  151. Parser.prototype.emitOpenTag = function (name) {
  152. var _a, _b, _c, _d;
  153. this.openTagStart = this.startIndex;
  154. this.tagname = name;
  155. var impliesClose = !this.options.xmlMode && openImpliesClose.get(name);
  156. if (impliesClose) {
  157. while (this.stack.length > 0 &&
  158. impliesClose.has(this.stack[this.stack.length - 1])) {
  159. var el = this.stack.pop();
  160. (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, el, true);
  161. }
  162. }
  163. if (!this.isVoidElement(name)) {
  164. this.stack.push(name);
  165. if (foreignContextElements.has(name)) {
  166. this.foreignContext.push(true);
  167. }
  168. else if (htmlIntegrationElements.has(name)) {
  169. this.foreignContext.push(false);
  170. }
  171. }
  172. (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
  173. if (this.cbs.onopentag)
  174. this.attribs = {};
  175. };
  176. Parser.prototype.endOpenTag = function (isImplied) {
  177. var _a, _b;
  178. this.startIndex = this.openTagStart;
  179. this.endIndex = this.tokenizer.getAbsoluteIndex();
  180. if (this.attribs) {
  181. (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
  182. this.attribs = null;
  183. }
  184. if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
  185. this.cbs.onclosetag(this.tagname, true);
  186. }
  187. this.tagname = "";
  188. };
  189. /** @internal */
  190. Parser.prototype.onopentagend = function () {
  191. this.endOpenTag(false);
  192. // Set `startIndex` for next node
  193. this.startIndex = this.endIndex + 1;
  194. };
  195. /** @internal */
  196. Parser.prototype.onclosetag = function (name) {
  197. var _a, _b, _c, _d, _e, _f;
  198. this.endIndex = this.tokenizer.getAbsoluteIndex();
  199. if (this.lowerCaseTagNames) {
  200. name = name.toLowerCase();
  201. }
  202. if (foreignContextElements.has(name) ||
  203. htmlIntegrationElements.has(name)) {
  204. this.foreignContext.pop();
  205. }
  206. if (!this.isVoidElement(name)) {
  207. var pos = this.stack.lastIndexOf(name);
  208. if (pos !== -1) {
  209. if (this.cbs.onclosetag) {
  210. var count = this.stack.length - pos;
  211. while (count--) {
  212. // We know the stack has sufficient elements.
  213. this.cbs.onclosetag(this.stack.pop(), count !== 0);
  214. }
  215. }
  216. else
  217. this.stack.length = pos;
  218. }
  219. else if (!this.options.xmlMode && name === "p") {
  220. this.emitOpenTag(name);
  221. this.closeCurrentTag(true);
  222. }
  223. }
  224. else if (!this.options.xmlMode && name === "br") {
  225. // We can't go through `emitOpenTag` here, as `br` would be implicitly closed.
  226. (_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, name);
  227. (_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, name, {}, true);
  228. (_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, name, false);
  229. }
  230. // Set `startIndex` for next node
  231. this.startIndex = this.endIndex + 1;
  232. };
  233. /** @internal */
  234. Parser.prototype.onselfclosingtag = function () {
  235. if (this.options.xmlMode ||
  236. this.options.recognizeSelfClosing ||
  237. this.foreignContext[this.foreignContext.length - 1]) {
  238. this.closeCurrentTag(false);
  239. // Set `startIndex` for next node
  240. this.startIndex = this.endIndex + 1;
  241. }
  242. else {
  243. // Ignore the fact that the tag is self-closing.
  244. this.onopentagend();
  245. }
  246. };
  247. Parser.prototype.closeCurrentTag = function (isOpenImplied) {
  248. var _a, _b;
  249. var name = this.tagname;
  250. this.endOpenTag(isOpenImplied);
  251. // Self-closing tags will be on the top of the stack
  252. if (this.stack[this.stack.length - 1] === name) {
  253. // If the opening tag isn't implied, the closing tag has to be implied.
  254. (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
  255. this.stack.pop();
  256. }
  257. };
  258. /** @internal */
  259. Parser.prototype.onattribname = function (name) {
  260. this.startIndex = this.tokenizer.getAbsoluteSectionStart();
  261. if (this.lowerCaseAttributeNames) {
  262. name = name.toLowerCase();
  263. }
  264. this.attribname = name;
  265. };
  266. /** @internal */
  267. Parser.prototype.onattribdata = function (value) {
  268. this.attribvalue += value;
  269. };
  270. /** @internal */
  271. Parser.prototype.onattribend = function (quote) {
  272. var _a, _b;
  273. this.endIndex = this.tokenizer.getAbsoluteIndex();
  274. (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote);
  275. if (this.attribs &&
  276. !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
  277. this.attribs[this.attribname] = this.attribvalue;
  278. }
  279. this.attribname = "";
  280. this.attribvalue = "";
  281. };
  282. Parser.prototype.getInstructionName = function (value) {
  283. var idx = value.search(reNameEnd);
  284. var name = idx < 0 ? value : value.substr(0, idx);
  285. if (this.lowerCaseTagNames) {
  286. name = name.toLowerCase();
  287. }
  288. return name;
  289. };
  290. /** @internal */
  291. Parser.prototype.ondeclaration = function (value) {
  292. this.endIndex = this.tokenizer.getAbsoluteIndex();
  293. if (this.cbs.onprocessinginstruction) {
  294. var name_1 = this.getInstructionName(value);
  295. this.cbs.onprocessinginstruction("!" + name_1, "!" + value);
  296. }
  297. // Set `startIndex` for next node
  298. this.startIndex = this.endIndex + 1;
  299. };
  300. /** @internal */
  301. Parser.prototype.onprocessinginstruction = function (value) {
  302. this.endIndex = this.tokenizer.getAbsoluteIndex();
  303. if (this.cbs.onprocessinginstruction) {
  304. var name_2 = this.getInstructionName(value);
  305. this.cbs.onprocessinginstruction("?" + name_2, "?" + value);
  306. }
  307. // Set `startIndex` for next node
  308. this.startIndex = this.endIndex + 1;
  309. };
  310. /** @internal */
  311. Parser.prototype.oncomment = function (value) {
  312. var _a, _b, _c, _d;
  313. this.endIndex = this.tokenizer.getAbsoluteIndex();
  314. (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, value);
  315. (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
  316. // Set `startIndex` for next node
  317. this.startIndex = this.endIndex + 1;
  318. };
  319. /** @internal */
  320. Parser.prototype.oncdata = function (value) {
  321. var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
  322. this.endIndex = this.tokenizer.getAbsoluteIndex();
  323. if (this.options.xmlMode || this.options.recognizeCDATA) {
  324. (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
  325. (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
  326. (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
  327. }
  328. else {
  329. (_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[" + value + "]]");
  330. (_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
  331. }
  332. // Set `startIndex` for next node
  333. this.startIndex = this.endIndex + 1;
  334. };
  335. /** @internal */
  336. Parser.prototype.onerror = function (err) {
  337. var _a, _b;
  338. (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, err);
  339. };
  340. /** @internal */
  341. Parser.prototype.onend = function () {
  342. var _a, _b;
  343. if (this.cbs.onclosetag) {
  344. // Set the end index for all remaining tags
  345. this.endIndex = this.startIndex;
  346. for (var i = this.stack.length; i > 0; this.cbs.onclosetag(this.stack[--i], true))
  347. ;
  348. }
  349. (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
  350. };
  351. /**
  352. * Resets the parser to a blank state, ready to parse a new HTML document
  353. */
  354. Parser.prototype.reset = function () {
  355. var _a, _b, _c, _d;
  356. (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
  357. this.tokenizer.reset();
  358. this.tagname = "";
  359. this.attribname = "";
  360. this.attribs = null;
  361. this.stack = [];
  362. this.startIndex = 0;
  363. this.endIndex = 0;
  364. (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
  365. };
  366. /**
  367. * Resets the parser, then parses a complete document and
  368. * pushes it to the handler.
  369. *
  370. * @param data Document to parse.
  371. */
  372. Parser.prototype.parseComplete = function (data) {
  373. this.reset();
  374. this.end(data);
  375. };
  376. /**
  377. * Parses a chunk of data and calls the corresponding callbacks.
  378. *
  379. * @param chunk Chunk to parse.
  380. */
  381. Parser.prototype.write = function (chunk) {
  382. this.tokenizer.write(chunk);
  383. };
  384. /**
  385. * Parses the end of the buffer and clears the stack, calls onend.
  386. *
  387. * @param chunk Optional final chunk to parse.
  388. */
  389. Parser.prototype.end = function (chunk) {
  390. this.tokenizer.end(chunk);
  391. };
  392. /**
  393. * Pauses parsing. The parser won't emit events until `resume` is called.
  394. */
  395. Parser.prototype.pause = function () {
  396. this.tokenizer.pause();
  397. };
  398. /**
  399. * Resumes parsing after `pause` was called.
  400. */
  401. Parser.prototype.resume = function () {
  402. this.tokenizer.resume();
  403. };
  404. /**
  405. * Alias of `write`, for backwards compatibility.
  406. *
  407. * @param chunk Chunk to parse.
  408. * @deprecated
  409. */
  410. Parser.prototype.parseChunk = function (chunk) {
  411. this.write(chunk);
  412. };
  413. /**
  414. * Alias of `end`, for backwards compatibility.
  415. *
  416. * @param chunk Optional final chunk to parse.
  417. * @deprecated
  418. */
  419. Parser.prototype.done = function (chunk) {
  420. this.end(chunk);
  421. };
  422. return Parser;
  423. }());
  424. exports.Parser = Parser;