123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- 'use strict';
- const STATE_PLAINTEXT = Symbol('plaintext');
- const STATE_HTML = Symbol('html');
- const STATE_COMMENT = Symbol('comment');
- function striptags(html = '') {
- // if not string, then safely return an empty string
- if (typeof html !== 'string' && !(html instanceof String)) {
- return '';
- }
- let state = STATE_PLAINTEXT;
- let tag_buffer = '';
- let depth = 0;
- let in_quote_char = '';
- let output = '';
- const { length } = html;
- for (let idx = 0; idx < length; idx++) {
- const char = html[idx];
- if (state === STATE_PLAINTEXT) {
- switch (char) {
- case '<':
- state = STATE_HTML;
- tag_buffer = tag_buffer + char;
- break;
- default:
- output += char;
- break;
- }
- } else if (state === STATE_HTML) {
- switch (char) {
- case '<':
- // ignore '<' if inside a quote
- if (in_quote_char) break;
- // we're seeing a nested '<'
- depth++;
- break;
- case '>':
- // ignore '>' if inside a quote
- if (in_quote_char) {
- break;
- }
- // something like this is happening: '<<>>'
- if (depth) {
- depth--;
- break;
- }
- // this is closing the tag in tag_buffer
- in_quote_char = '';
- state = STATE_PLAINTEXT;
- // tag_buffer += '>';
- tag_buffer = '';
- break;
- case '"':
- case '\'':
- // catch both single and double quotes
- if (char === in_quote_char) {
- in_quote_char = '';
- } else {
- in_quote_char = in_quote_char || char;
- }
- tag_buffer = tag_buffer + char;
- break;
- case '-':
- if (tag_buffer === '<!-') {
- state = STATE_COMMENT;
- }
- tag_buffer = tag_buffer + char;
- break;
- case ' ':
- case '\n':
- if (tag_buffer === '<') {
- state = STATE_PLAINTEXT;
- output += '< ';
- tag_buffer = '';
- break;
- }
- tag_buffer = tag_buffer + char;
- break;
- default:
- tag_buffer = tag_buffer + char;
- break;
- }
- } else if (state === STATE_COMMENT) {
- switch (char) {
- case '>':
- if (tag_buffer.slice(-2) === '--') {
- // close the comment
- state = STATE_PLAINTEXT;
- }
- tag_buffer = '';
- break;
- default:
- tag_buffer = tag_buffer + char;
- break;
- }
- }
- }
- return output;
- }
- module.exports = striptags;
|