1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- 'use strict';
- const { DomHandler, DomUtils, Parser } = require('htmlparser2');
- const escapeHTML = require('./escape_html');
- const nonWord = /^\s*[^a-zA-Z0-9]\s*$/;
- const parseHtml = html => {
- const handler = new DomHandler(null, {});
- new Parser(handler, {}).end(html);
- return handler.dom;
- };
- const getId = ({ attribs = {}, parent }) => {
- return attribs.id || (!parent ? '' : getId(parent));
- };
- /**
- * Identify a heading that to be unnumbered or not.
- */
- const isUnnumbered = ({ attribs = {} }) => {
- return attribs['data-toc-unnumbered'] === 'true';
- };
- function tocObj(str, options = {}) {
- const { min_depth, max_depth } = Object.assign({
- min_depth: 1,
- max_depth: 6
- }, options);
- const headingsSelector = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].slice(min_depth - 1, max_depth);
- const headings = DomUtils.find(({ tagName }) => headingsSelector.includes(tagName), parseHtml(str), true);
- const headingsLen = headings.length;
- if (!headingsLen) return [];
- const result = [];
- for (let i = 0; i < headingsLen; i++) {
- const el = headings[i];
- const level = +el.name[1];
- const id = getId(el);
- const unnumbered = isUnnumbered(el);
- let text = '';
- for (const element of el.children) {
- const elText = DomUtils.textContent(element);
- // Skip permalink symbol wrapped in <a>
- // permalink is a single non-word character, word = [a-Z0-9]
- // permalink may be wrapped in whitespace(s)
- if (element.name !== 'a' || !nonWord.test(elText)) {
- text += escapeHTML(elText);
- }
- }
- if (!text) text = escapeHTML(DomUtils.textContent(el));
- const res = { text, id, level };
- if (unnumbered) res.unnumbered = true;
- result.push(res);
- }
- return result;
- }
- module.exports = tocObj;
|