ParserStream = function (options) {
WritableStream.call(this);
this.parser = new Parser(options);
this.lastChunkWritten = false;
this.writeCallback = null;
this.pausedByScript = false;
this.document = this.parser.treeAdapter.createDocument();
this.pendingHtmlInsertions = [];
this._resume = this._resume.bind(this);
this._documentWrite = this._documentWrite.bind(this);
this._scriptHandler = this._scriptHandler.bind(this);
this.parser._bootstrap(this.document, null);
}...
* ```js
*
* const parse5 = require('parse5');
* const http = require('http');
*
* // Fetch the page content and obtain it's <head> node
* http.get('http://inikulin.github.io/parse5/', res => {
* const parser = new parse5.ParserStream();
*
* parser.once('finish', () => {
* console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
* });
*
* res.pipe(parser);
* });
...PlainTextConversionStream = function (options) {
ParserStream.call(this, options);
// NOTE: see https://html.spec.whatwg.org/#read-text
this.parser._insertFakeElement($.HTML);
this.parser._insertFakeElement($.HEAD);
this.parser.openElements.pop();
this.parser._insertFakeElement($.BODY);
this.parser._insertFakeElement($.PRE);
this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
this.parser.switchToPlaintextParsing();
}...
* @example
* ```js
*
* const parse5 = require('parse5');
* const fs = require('fs');
*
* const file = fs.createReadStream('war_and_peace.txt');
* const converter = new parse5.PlainTextConversionStream();
*
* converter.once('finish', () => {
* console.log(converter.document.childNodes[1].childNodes[0].tagName); //> 'head'
* });
*
* file.pipe(converter);
* ```
...SAXParser = function (options) {
TransformStream.call(this);
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.tokenizer = new Tokenizer(options);
this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.tokenizer);
this.pendingText = null;
this.currentTokenLocation = void 0;
this.lastChunkWritten = false;
this.stopped = false;
// NOTE: always pipe stream to the /dev/null stream to avoid
// `highWaterMark` hit even if we don't have consumers.
// (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
this.pipe(new DevNullStream());
}...
* ```js
*
* const parse5 = require('parse5');
* const http = require('http');
* const fs = require('fs');
*
* const file = fs.createWriteStream('/home/google.com.html');
* const parser = new parse5.SAXParser();
*
* parser.on('text', text => {
* // Handle page text content
* ...
* });
*
* http.get('http://google.com', res => {
...SerializerStream = function (node, options) {
ReadableStream.call(this);
this.serializer = new Serializer(node, options);
Object.defineProperty(this.serializer, 'html', {
//NOTE: To make `+=` concat operator work properly we define
//getter which always returns empty string
get: function () {
return '';
},
set: this.push.bind(this)
});
}...
*
* // Uses the default tree adapter for parsing.
* const document = parse5.parse('<div></div>', {
* treeAdapter: parse5.treeAdapters.default
* });
*
* // Uses the htmlparser2 tree adapter with the SerializerStream.
* const serializer = new parse5.SerializerStream(node, {
* treeAdapter: parse5.treeAdapters.htmlparser2
* });
* ```
*/
export var treeAdapters: {
/**
* Default tree format for parse5.
...dev_null_stream = function () {
WritableStream.call(this);
}n/a
formatting_element_list = function (treeAdapter) {
this.length = 0;
this.entries = [];
this.treeAdapter = treeAdapter;
this.bookmark = null;
}n/a
index = function (options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
if (this.options.locationInfo)
locationInfoMixin.assign(this);
}n/a
open_element_stack = function (document, treeAdapter) {
this.stackTop = -1;
this.items = [];
this.current = document;
this.currentTagName = null;
this.currentTmplContent = null;
this.tmplCount = 0;
this.treeAdapter = treeAdapter;
}n/a
function parse(html, options) {
var parser = new Parser(options);
return parser.parse(html);
}...
*
* @example
*```js
*
* const parse5 = require('parse5');
*
* // Uses the default tree adapter for parsing.
* const document = parse5.parse('<div></div>', {
* treeAdapter: parse5.treeAdapters.default
* });
*
* // Uses the htmlparser2 tree adapter with the SerializerStream.
* const serializer = new parse5.SerializerStream(node, {
* treeAdapter: parse5.treeAdapters.htmlparser2
* });
...function parseFragment(fragmentContext, html, options) {
if (typeof fragmentContext === 'string') {
options = html;
html = fragmentContext;
fragmentContext = null;
}
var parser = new Parser(options);
return parser.parseFragment(html, fragmentContext);
}...
* @param options - Parsing options.
*
* @example
* ```js
*
* const parse5 = require('parse5');
*
* const documentFragment = parse5.parseFragment('<table></table
x3e;');
*
* console.log(documentFragment.childNodes[0].tagName); //> 'table'
*
* // Parses the html fragment in the context of the parsed <table> element.
* const trFragment = parser.parseFragment(documentFragment.childNodes[0], '<tr><td>Shake it, baby</
td></tr>');
*
* console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
...parser_feedback_simulator = function (tokenizer) {
this.tokenizer = tokenizer;
this.namespaceStack = [];
this.namespaceStackTop = -1;
this._enterNamespace(NS.HTML);
}n/a
preprocessor = function () {
this.html = null;
this.pos = -1;
this.lastGapPos = -1;
this.lastCharPos = -1;
this.droppedBufferSize = 0;
this.gapStack = [];
this.skipNextNewLine = false;
this.lastChunkWritten = false;
this.endOfChunkHit = false;
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
}n/a
serialize = function (node, options) {
var serializer = new Serializer(node, options);
return serializer.serialize();
}...
* ```js
*
* const parse5 = require('parse5');
*
* const document = parse5.parse('<!DOCTYPE html><html><head></head><body>
Hi there!</body></html>');
*
* // Serializes a document.
* const html = parse5.serialize(document);
*
* // Serializes the <html> element content.
* const str = parse5.serialize(document.childNodes[1]);
*
* console.log(str); //> '<head></head><body>Hi there!</body>'
* ```
*/
...ParserStream = function (options) {
WritableStream.call(this);
this.parser = new Parser(options);
this.lastChunkWritten = false;
this.writeCallback = null;
this.pausedByScript = false;
this.document = this.parser.treeAdapter.createDocument();
this.pendingHtmlInsertions = [];
this._resume = this._resume.bind(this);
this._documentWrite = this._documentWrite.bind(this);
this._scriptHandler = this._scriptHandler.bind(this);
this.parser._bootstrap(this.document, null);
}...
* ```js
*
* const parse5 = require('parse5');
* const http = require('http');
*
* // Fetch the page content and obtain it's <head> node
* http.get('http://inikulin.github.io/parse5/', res => {
* const parser = new parse5.ParserStream();
*
* parser.once('finish', () => {
* console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
* });
*
* res.pipe(parser);
* });
...function Writable(options) {
// Writable ctor is applied to Duplexes, too.
// `realHasInstance` is necessary because using plain `instanceof`
// would return false, as no `_writableState` property is attached.
// Trying to use the custom `instanceof` for Writable here will also break the
// Node.js LazyTransform implementation, which has a non-trivial getter for
// `_writableState` that would lead to infinite recursion.
if (!(realHasInstance.call(Writable, this)) &&
!(this instanceof Stream.Duplex)) {
return new Writable(options);
}
this._writableState = new WritableState(options, this);
// legacy.
this.writable = true;
if (options) {
if (typeof options.write === 'function')
this._write = options.write;
if (typeof options.writev === 'function')
this._writev = options.writev;
}
Stream.call(this);
}n/a
_documentWrite = function (html) {
if (!this.parser.stopped)
this.pendingHtmlInsertions.push(html);
}n/a
_resume = function () {
if (!this.pausedByScript)
throw new Error('Parser was already resumed');
while (this.pendingHtmlInsertions.length) {
var html = this.pendingHtmlInsertions.pop();
this.parser.tokenizer.insertHtmlAtCurrentPos(html);
}
this.pausedByScript = false;
//NOTE: keep parsing if we don't wait for the next input chunk
if (this.parser.tokenizer.active)
this._runParsingLoop();
}n/a
_runParsingLoop = function () {
this.parser.runParsingLoopForCurrentChunk(this.writeCallback, this._scriptHandler);
}...
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
..._scriptHandler = function (scriptElement) {
if (this.listeners('script').length) {
this.pausedByScript = true;
this.emit('script', scriptElement, this._documentWrite, this._resume);
}
else
this._runParsingLoop();
}n/a
_write = function (chunk, encoding, callback) {
this.writeCallback = callback;
this.parser.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten);
this._runParsingLoop();
}n/a
end = function (chunk, encoding, callback) {
this.lastChunkWritten = true;
WritableStream.prototype.end.call(this, chunk, encoding, callback);
}...
* // `document.write` implemented using `documentWrite`.
* ...
* // Then resume parsing.
* resume();
* });
* });
*
* parser.end('<script src="example.com/script.js"></script
>');
* ```
*/
on(event: 'script', listener: (scriptElement: AST.Element, documentWrite: (html: string) => void, resume: () =>
; void) => void): this;
/**
* WritableStream events
*/
on(event: string, listener: Function): this;
...PlainTextConversionStream = function (options) {
ParserStream.call(this, options);
// NOTE: see https://html.spec.whatwg.org/#read-text
this.parser._insertFakeElement($.HTML);
this.parser._insertFakeElement($.HEAD);
this.parser.openElements.pop();
this.parser._insertFakeElement($.BODY);
this.parser._insertFakeElement($.PRE);
this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
this.parser.switchToPlaintextParsing();
}...
* @example
* ```js
*
* const parse5 = require('parse5');
* const fs = require('fs');
*
* const file = fs.createReadStream('war_and_peace.txt');
* const converter = new parse5.PlainTextConversionStream();
*
* converter.once('finish', () => {
* console.log(converter.document.childNodes[1].childNodes[0].tagName); //> 'head'
* });
*
* file.pipe(converter);
* ```
...super_ = function (options) {
WritableStream.call(this);
this.parser = new Parser(options);
this.lastChunkWritten = false;
this.writeCallback = null;
this.pausedByScript = false;
this.document = this.parser.treeAdapter.createDocument();
this.pendingHtmlInsertions = [];
this._resume = this._resume.bind(this);
this._documentWrite = this._documentWrite.bind(this);
this._scriptHandler = this._scriptHandler.bind(this);
this.parser._bootstrap(this.document, null);
}n/a
SAXParser = function (options) {
TransformStream.call(this);
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.tokenizer = new Tokenizer(options);
this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.tokenizer);
this.pendingText = null;
this.currentTokenLocation = void 0;
this.lastChunkWritten = false;
this.stopped = false;
// NOTE: always pipe stream to the /dev/null stream to avoid
// `highWaterMark` hit even if we don't have consumers.
// (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
this.pipe(new DevNullStream());
}...
* ```js
*
* const parse5 = require('parse5');
* const http = require('http');
* const fs = require('fs');
*
* const file = fs.createWriteStream('/home/google.com.html');
* const parser = new parse5.SAXParser();
*
* parser.on('text', text => {
* // Handle page text content
* ...
* });
*
* http.get('http://google.com', res => {
...function Transform(options) {
if (!(this instanceof Transform))
return new Transform(options);
Duplex.call(this, options);
this._transformState = new TransformState(this);
var stream = this;
// start out asking for a readable event once data is transformed.
this._readableState.needReadable = true;
// we have implemented the _read method, and done the other things
// that Readable wants before the first _read call, so unset the
// sync guard flag.
this._readableState.sync = false;
if (options) {
if (typeof options.transform === 'function')
this._transform = options.transform;
if (typeof options.flush === 'function')
this._flush = options.flush;
}
// When the writable side finishes, then flush out anything remaining.
this.once('prefinish', function() {
if (typeof this._flush === 'function')
this._flush(function(er) {
done(stream, er);
});
else
done(stream);
});
}n/a
_emitPendingText = function () {
if (this.pendingText !== null) {
this.emit('text', this.pendingText, this.currentTokenLocation);
this.pendingText = null;
}
}...
this.currentTokenLocation.endOffset = token.location.endOffset;
}
this.pendingText = (this.pendingText || '') + token.chars;
}
else {
this._emitPendingText();
this._handleToken(token);
}
} while (!this.stopped && token.type !== Tokenizer.EOF_TOKEN);
};
SAXParser.prototype._handleToken = function (token) {
if (this.options.locationInfo)
..._flush = function (callback) {
callback();
}n/a
_handleToken = function (token) {
if (this.options.locationInfo)
this.currentTokenLocation = token.location;
if (token.type === Tokenizer.START_TAG_TOKEN)
this.emit('startTag', token.tagName, token.attrs, token.selfClosing, this.currentTokenLocation);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this.emit('endTag', token.tagName, this.currentTokenLocation);
else if (token.type === Tokenizer.COMMENT_TOKEN)
this.emit('comment', token.data, this.currentTokenLocation);
else if (token.type === Tokenizer.DOCTYPE_TOKEN)
this.emit('doctype', token.name, token.publicId, token.systemId, this.currentTokenLocation);
}...
}
this.pendingText = (this.pendingText || '') + token.chars;
}
else {
this._emitPendingText();
this._handleToken(token);
}
} while (!this.stopped && token.type !== Tokenizer.EOF_TOKEN);
};
SAXParser.prototype._handleToken = function (token) {
if (this.options.locationInfo)
this.currentTokenLocation = token.location;
..._runParsingLoop = function () {
do {
var token = this.parserFeedbackSimulator.getNextToken();
if (token.type === Tokenizer.HIBERNATION_TOKEN)
break;
if (token.type === Tokenizer.CHARACTER_TOKEN ||
token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN ||
token.type === Tokenizer.NULL_CHARACTER_TOKEN) {
if (this.options.locationInfo) {
if (this.pendingText === null)
this.currentTokenLocation = token.location;
else
this.currentTokenLocation.endOffset = token.location.endOffset;
}
this.pendingText = (this.pendingText || '') + token.chars;
}
else {
this._emitPendingText();
this._handleToken(token);
}
} while (!this.stopped && token.type !== Tokenizer.EOF_TOKEN);
}...
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
..._transform = function (chunk, encoding, callback) {
if (!this.stopped) {
this.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten);
this._runParsingLoop();
}
this.push(chunk);
callback();
}n/a
end = function (chunk, encoding, callback) {
this.lastChunkWritten = true;
TransformStream.prototype.end.call(this, chunk, encoding, callback);
}...
* // `document.write` implemented using `documentWrite`.
* ...
* // Then resume parsing.
* resume();
* });
* });
*
* parser.end('<script src="example.com/script.js"></script
>');
* ```
*/
on(event: 'script', listener: (scriptElement: AST.Element, documentWrite: (html: string) => void, resume: () =>
; void) => void): this;
/**
* WritableStream events
*/
on(event: string, listener: Function): this;
...stop = function () {
this.stopped = true;
}...
*
* const file = fs.createWriteStream('google.com.html');
* const parser = new parse5.SAXParser();
*
* parser.on('doctype', (name, publicId, systemId) => {
* // Process doctype info ans stop parsing
* ...
* parser.stop();
* });
*
* http.get('http://google.com', res => {
* // Despite the fact that parser.stop() was called whole
* // content of the page will be written to the file
* res.pipe(parser).pipe(file);
* });
...SerializerStream = function (node, options) {
ReadableStream.call(this);
this.serializer = new Serializer(node, options);
Object.defineProperty(this.serializer, 'html', {
//NOTE: To make `+=` concat operator work properly we define
//getter which always returns empty string
get: function () {
return '';
},
set: this.push.bind(this)
});
}...
*
* // Uses the default tree adapter for parsing.
* const document = parse5.parse('<div></div>', {
* treeAdapter: parse5.treeAdapters.default
* });
*
* // Uses the htmlparser2 tree adapter with the SerializerStream.
* const serializer = new parse5.SerializerStream(node, {
* treeAdapter: parse5.treeAdapters.htmlparser2
* });
* ```
*/
export var treeAdapters: {
/**
* Default tree format for parse5.
...function Readable(options) {
if (!(this instanceof Readable))
return new Readable(options);
this._readableState = new ReadableState(options, this);
// legacy
this.readable = true;
if (options && typeof options.read === 'function')
this._read = options.read;
Stream.call(this);
}n/a
_read = function () {
this.serializer.serialize();
this.push(null);
}n/a
adoptAttributes = function (recipient, attrs) {
var recipientAttrsMap = [];
for (var i = 0; i < recipient.attrs.length; i++)
recipientAttrsMap.push(recipient.attrs[i].name);
for (var j = 0; j < attrs.length; j++) {
if (recipientAttrsMap.indexOf(attrs[j].name) === -1)
recipient.attrs.push(attrs[j]);
}
}...
p._reconstructActiveFormattingElements();
p._insertCharacters(token);
p.framesetOk = false;
}
function htmlStartTagInBody(p, token) {
if (p.openElements.tmplCount === 0)
p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
}
function bodyStartTagInBody(p, token) {
var bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
if (bodyElement && p.openElements.tmplCount === 0) {
p.framesetOk = false;
...appendChild = function (parentNode, newNode) {
parentNode.childNodes.push(newNode);
newNode.parentNode = parentNode;
}...
Parser.prototype._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.appendChild(parent, element);
}
};
Parser.prototype._appendElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
...createCommentNode = function (data) {
return {
nodeName: '#comment',
data: data,
parentNode: null
};
}...
var element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
this.treeAdapter.appendChild(this.openElements.current, element);
this.openElements.push(element);
};
Parser.prototype._appendCommentNode = function (token, parent) {
var commentNode = this.treeAdapter.createCommentNode(token.data);
this.treeAdapter.appendChild(parent, commentNode);
};
Parser.prototype._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
...createDocument = function () {
return {
nodeName: '#document',
mode: DOCUMENT_MODE.NO_QUIRKS,
childNodes: []
};
}...
if (this.options.locationInfo)
locationInfoMixin.assign(this);
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
...createDocumentFragment = function () {
return {
nodeName: '#document-fragment',
childNodes: []
};
}...
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
};
//Bootstrap parser
...createElement = function (tagName, namespaceURI, attrs) {
return {
nodeName: tagName,
tagName: tagName,
attrs: attrs,
namespaceURI: namespaceURI,
childNodes: [],
parentNode: null
};
}...
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
if (!fragmentContext)
fragmentContext = this.treeAdapter.createElement($.TEMPLATE, NS.HTML, []);
//NOTE: create fake element which will be used as 'document' for fragment parsing.
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
var documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
this._bootstrap(documentMock, fragmentContext);
...detachNode = function (node) {
if (node.parentNode) {
var idx = node.parentNode.childNodes.indexOf(node);
node.parentNode.childNodes.splice(idx, 1);
node.parentNode = null;
}
}...
Parser.prototype._adoptNodes = function (donor, recipient) {
while (true) {
var child = this.treeAdapter.getFirstChild(donor);
if (!child)
break;
this.treeAdapter.detachNode(child);
this.treeAdapter.appendChild(recipient, child);
}
};
//Token processing
Parser.prototype._shouldProcessTokenInForeignContent = function (token) {
var current = this._getAdjustedCurrentElement();
...getAttrList = function (element) {
return element.attrs;
}...
*
* const parse = require('parse5');
* const http = require('http');
*
* const parser = new parse5.ParserStream();
*
* parser.on('script', (scriptElement, documentWrite, resume) => {
* const src = parse5.treeAdapters.default.getAttrList(scriptElement)[0].value;
*
* http.get(src, res => {
* // Fetch the script content, execute it with DOM built around `parser.document` and
* // `document.write` implemented using `documentWrite`.
* ...
* // Then resume parsing.
* resume();
...getChildNodes = function (node) {
return node.childNodes;
}...
};
//Doctype
parser._setDocumentType = function (token) {
parserProto._setDocumentType.call(this, token);
var documentChildren = this.treeAdapter.getChildNodes(this.document),
cnLength = documentChildren.length;
for (var i = 0; i < cnLength; i++) {
var node = documentChildren[i];
if (this.treeAdapter.isDocumentTypeNode(node)) {
node.__location = token.location;
...getCommentNodeContent = function (commentNode) {
return commentNode.data;
}...
this.html += content;
else
this.html += Serializer.escapeString(content, false);
};
Serializer.prototype._serializeCommentNode = function (node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) +
x27;-->';
};
Serializer.prototype._serializeDocumentTypeNode = function (node) {
var name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
};
...getDocumentMode = function (document) {
return document.mode;
}...
//12.2.5.4.1 The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = BEFORE_HTML_MODE;
}
function tokenInInitialMode(p, token) {
...getDocumentTypeNodeName = function (doctypeNode) {
return doctypeNode.name;
}...
};
Serializer.prototype._serializeCommentNode = function (node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->';
};
Serializer.prototype._serializeDocumentTypeNode = function (node) {
var name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
};
...getDocumentTypeNodePublicId = function (doctypeNode) {
return doctypeNode.publicId;
}n/a
getDocumentTypeNodeSystemId = function (doctypeNode) {
return doctypeNode.systemId;
}n/a
getFirstChild = function (node) {
return node.childNodes[0];
}...
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
};
...getNamespaceURI = function (element) {
return element.namespaceURI;
}...
//lightweight heuristics without thorough attributes check.
FormattingElementList.prototype._getNoahArkConditionCandidates = function (newElement) {
var candidates = [];
if (this.length >= NOAH_ARK_CAPACITY) {
var neAttrsLength = this.treeAdapter.getAttrList(newElement).length,
neTagName = this.treeAdapter.getTagName(newElement),
neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
for (var i = this.length - 1; i >= 0; i--) {
var entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY)
break;
...getParentNode = function (node) {
return node.parentNode;
}...
do {
if (this.treeAdapter.getTagName(node) === $.FORM) {
this.formElement = node;
break;
}
node = this.treeAdapter.getParentNode(node);
} while (node);
};
Parser.prototype._initTokenizerForFragmentParsing = function () {
if (this.treeAdapter.getNamespaceURI(this.fragmentContext) === NS.HTML) {
var tn = this.treeAdapter.getTagName(this.fragmentContext);
...getTagName = function (element) {
return element.tagName;
}...
if (loc.attrs)
loc.startTag.attrs = loc.attrs;
}
if (closingToken.location) {
var ctLocation = closingToken.location,
tn = treeAdapter.getTagName(element),
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing tag and
// for cases like <td> <p> </td> - 'p' closes without a closing tag
isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN &&
tn === closingToken.tagName;
if (isClosingEndTag) {
loc.endTag = {
...getTemplateContent = function (templateElement) {
return templateElement.content;
}...
parserProto._insertElement.call(this, token, namespaceURI);
};
parser._insertTemplate = function (token) {
attachableElementLocation = token.location;
parserProto._insertTemplate.call(this, token);
var tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current);
tmplContent.__location = null;
};
parser._insertFakeRootElement = function () {
parserProto._insertFakeRootElement.call(this);
this.openElements.current.__location = null;
...getTextNodeContent = function (textNode) {
return textNode.value;
}...
this.html += attr.namespace + ':' + attr.name;
this.html += '="' + value + '"';
}
};
Serializer.prototype._serializeTextNode = function (node) {
var content = this.treeAdapter.getTextNodeContent(node),
parent = this.treeAdapter.getParentNode(node),
parentTn = void 0;
if (parent && this.treeAdapter.isElementNode(parent))
parentTn = this.treeAdapter.getTagName(parent);
if (parentTn === $.STYLE || parentTn === $.SCRIPT || parentTn === $.XMP || parentTn === $.IFRAME ||
...insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.childNodes.indexOf(referenceNode);
parentNode.childNodes.splice(insertionIdx, 0, newNode);
newNode.parentNode = parentNode;
}...
return location;
};
Parser.prototype._fosterParentElement = function (element) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
else
this.treeAdapter.appendChild(location.parent, element);
};
Parser.prototype._fosterParentText = function (chars) {
var location = this._findFosterParentingLocation();
...insertText = function (parentNode, text) {
if (parentNode.childNodes.length) {
var prevNode = parentNode.childNodes[parentNode.childNodes.length - 1];
if (prevNode.nodeName === '#text') {
prevNode.value += text;
return;
}
}
appendChild(parentNode, createTextNode(text));
}...
Parser.prototype._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.insertText(parent, token.chars);
}
};
Parser.prototype._adoptNodes = function (donor, recipient) {
while (true) {
var child = this.treeAdapter.getFirstChild(donor);
...insertTextBefore = function (parentNode, text, referenceNode) {
var prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
if (prevNode && prevNode.nodeName === '#text')
prevNode.value += text;
else
insertBefore(parentNode, createTextNode(text), referenceNode);
}...
this.treeAdapter.appendChild(location.parent, element);
};
Parser.prototype._fosterParentText = function (chars) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertTextBefore(location.parent, chars, location.beforeElement);
else
this.treeAdapter.insertText(location.parent, chars);
};
//Special elements
Parser.prototype._isSpecialElement = function (element) {
var tn = this.treeAdapter.getTagName(element),
...isCommentNode = function (node) {
return node.nodeName === '#comment';
}...
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
else if (this.treeAdapter.isDocumentTypeNode(currentNode))
this._serializeDocumentTypeNode(currentNode);
}
}
};
...isDocumentTypeNode = function (node) {
return node.nodeName === '#documentType';
}...
var documentChildren = this.treeAdapter.getChildNodes(this.document),
cnLength = documentChildren.length;
for (var i = 0; i < cnLength; i++) {
var node = documentChildren[i];
if (this.treeAdapter.isDocumentTypeNode(node)) {
node.__location = token.location;
break;
}
}
};
...isElementNode = function (node) {
return !!node.tagName;
}...
Serializer.prototype._serializeChildNodes = function (parentNode) {
var childNodes = this.treeAdapter.getChildNodes(parentNode);
if (childNodes) {
for (var i = 0, cnLength = childNodes.length; i < cnLength; i++) {
var currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
...isTextNode = function (node) {
return node.nodeName === '#text';
}...
if (childNodes) {
for (var i = 0, cnLength = childNodes.length; i < cnLength; i++) {
var currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
else if (this.treeAdapter.isDocumentTypeNode(currentNode))
this._serializeDocumentTypeNode(currentNode);
...setDocumentMode = function (document, mode) {
document.mode = mode;
}...
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = BEFORE_HTML_MODE;
}
function tokenInInitialMode(p, token) {
p.treeAdapter.setDocumentMode(p.document, HTML.DOCUMENT_MODE.QUIRKS);
p.insertionMode = BEFORE_HTML_MODE;
...setDocumentType = function (document, name, publicId, systemId) {
var doctypeNode = null;
for (var i = 0; i < document.childNodes.length; i++) {
if (document.childNodes[i].nodeName === '#documentType') {
doctypeNode = document.childNodes[i];
break;
}
}
if (doctypeNode) {
doctypeNode.name = name;
doctypeNode.publicId = publicId;
doctypeNode.systemId = systemId;
}
else {
appendChild(document, {
nodeName: '#documentType',
name: name,
publicId: publicId,
systemId: systemId
});
}
}...
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
}
};
//Tree mutation
Parser.prototype._setDocumentType = function (token) {
this.treeAdapter.setDocumentType(this.document, token.name, token.publicId, token.systemId
);
};
Parser.prototype._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
...setTemplateContent = function (templateElement, contentElement) {
templateElement.content = contentElement;
}...
this.openElements.push(element);
};
Parser.prototype._insertTemplate = function (token) {
var tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs),
content = this.treeAdapter.createDocumentFragment();
this.treeAdapter.setTemplateContent(tmpl, content);
this._attachElementToTree(tmpl);
this.openElements.push(tmpl);
};
Parser.prototype._insertFakeRootElement = function () {
var element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
...dev_null_stream = function () {
WritableStream.call(this);
}n/a
function Writable(options) {
// Writable ctor is applied to Duplexes, too.
// `realHasInstance` is necessary because using plain `instanceof`
// would return false, as no `_writableState` property is attached.
// Trying to use the custom `instanceof` for Writable here will also break the
// Node.js LazyTransform implementation, which has a non-trivial getter for
// `_writableState` that would lead to infinite recursion.
if (!(realHasInstance.call(Writable, this)) &&
!(this instanceof Stream.Duplex)) {
return new Writable(options);
}
this._writableState = new WritableState(options, this);
// legacy.
this.writable = true;
if (options) {
if (typeof options.write === 'function')
this._write = options.write;
if (typeof options.writev === 'function')
this._writev = options.writev;
}
Stream.call(this);
}n/a
_write = function (chunk, encoding, cb) {
cb();
}n/a
getDocumentMode = function (name, publicId, systemId) {
if (name !== VALID_DOCTYPE_NAME)
return DOCUMENT_MODE.QUIRKS;
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID)
return DOCUMENT_MODE.QUIRKS;
if (publicId !== null) {
publicId = publicId.toLowerCase();
if (QUIRKS_MODE_PUBLIC_IDS.indexOf(publicId) > -1)
return DOCUMENT_MODE.QUIRKS;
var prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES;
if (hasPrefix(publicId, prefixes))
return DOCUMENT_MODE.QUIRKS;
prefixes = systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES;
if (hasPrefix(publicId, prefixes))
return DOCUMENT_MODE.LIMITED_QUIRKS;
}
return DOCUMENT_MODE.NO_QUIRKS;
}...
//12.2.5.4.1 The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = BEFORE_HTML_MODE;
}
function tokenInInitialMode(p, token) {
...serializeContent = function (name, publicId, systemId) {
var str = '!DOCTYPE ';
if (name)
str += name;
if (publicId !== null)
str += ' PUBLIC ' + enquoteDoctypeId(publicId);
else if (systemId !== null)
str += ' SYSTEM';
if (systemId !== null)
str += ' ' + enquoteDoctypeId(systemId);
return str;
}...
Serializer.prototype._serializeCommentNode = function (node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->';
};
Serializer.prototype._serializeDocumentTypeNode = function (node) {
var name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) +
x27;>';
};
...adjustTokenMathMLAttrs = function (token) {
for (var i = 0; i < token.attrs.length; i++) {
if (token.attrs[i].name === DEFINITION_URL_ATTR) {
token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
break;
}
}
}...
p._insertElement(token, NS.HTML);
}
function mathStartTagInBody(p, token) {
p._reconstructActiveFormattingElements();
foreignContent.adjustTokenMathMLAttrs(token);
foreignContent.adjustTokenXMLAttrs(token);
if (token.selfClosing)
p._appendElement(token, NS.MATHML);
else
p._insertElement(token, NS.MATHML);
}
...adjustTokenSVGAttrs = function (token) {
for (var i = 0; i < token.attrs.length; i++) {
var adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
if (adjustedAttrName)
token.attrs[i].name = adjustedAttrName;
}
}...
else
p._insertElement(token, NS.MATHML);
}
function svgStartTagInBody(p, token) {
p._reconstructActiveFormattingElements();
foreignContent.adjustTokenSVGAttrs(token);
foreignContent.adjustTokenXMLAttrs(token);
if (token.selfClosing)
p._appendElement(token, NS.SVG);
else
p._insertElement(token, NS.SVG);
}
...adjustTokenSVGTagName = function (token) {
var adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName];
if (adjustedTagName)
token.tagName = adjustedTagName;
}...
var current = p._getAdjustedCurrentElement(),
currentNs = p.treeAdapter.getNamespaceURI(current);
if (currentNs === NS.MATHML)
foreignContent.adjustTokenMathMLAttrs(token);
else if (currentNs === NS.SVG) {
foreignContent.adjustTokenSVGTagName(token);
foreignContent.adjustTokenSVGAttrs(token);
}
foreignContent.adjustTokenXMLAttrs(token);
if (token.selfClosing)
p._appendElement(token, currentNs);
...adjustTokenXMLAttrs = function (token) {
for (var i = 0; i < token.attrs.length; i++) {
var adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name];
if (adjustedAttrEntry) {
token.attrs[i].prefix = adjustedAttrEntry.prefix;
token.attrs[i].name = adjustedAttrEntry.name;
token.attrs[i].namespace = adjustedAttrEntry.namespace;
}
}
}...
p._insertElement(token, NS.HTML);
}
function mathStartTagInBody(p, token) {
p._reconstructActiveFormattingElements();
foreignContent.adjustTokenMathMLAttrs(token);
foreignContent.adjustTokenXMLAttrs(token);
if (token.selfClosing)
p._appendElement(token, NS.MATHML);
else
p._insertElement(token, NS.MATHML);
}
...causesExit = function (startTagToken) {
var tn = startTagToken.tagName;
var isFontWithAttrs = tn === $.FONT && (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null ||
Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null);
return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn];
}...
function characterInForeignContent(p, token) {
p._insertCharacters(token);
p.framesetOk = false;
}
function startTagInForeignContent(p, token) {
if (foreignContent.causesExit(token) && !p.fragmentContext) {
while (p.treeAdapter.getNamespaceURI(p.openElements.current) !== NS.HTML && !p._isIntegrationPoint(p.openElements.
current))
p.openElements.pop();
p._processToken(token);
}
else {
...isIntegrationPoint = function (tn, ns, attrs, foreignNS) {
if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs))
return true;
if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns))
return true;
return false;
}...
//Integration points
Parser.prototype._isIntegrationPoint = function (element, foreignNS) {
var tn = this.treeAdapter.getTagName(element),
ns = this.treeAdapter.getNamespaceURI(element),
attrs = this.treeAdapter.getAttrList(element);
return foreignContent.isIntegrationPoint(tn, ns, attrs, foreignNS);
};
//Active formatting elements reconstruction
Parser.prototype._reconstructActiveFormattingElements = function () {
var listLength = this.activeFormattingElements.length;
if (listLength) {
...formatting_element_list = function (treeAdapter) {
this.length = 0;
this.entries = [];
this.treeAdapter = treeAdapter;
this.bookmark = null;
}n/a
_ensureNoahArkCondition = function (newElement) {
var candidates = this._getNoahArkConditionCandidates(newElement),
cLength = candidates.length;
if (cLength) {
var neAttrs = this.treeAdapter.getAttrList(newElement),
neAttrsLength = neAttrs.length,
neAttrsMap = Object.create(null);
//NOTE: build attrs map for the new element so we can perform fast lookups
for (var i = 0; i < neAttrsLength; i++) {
var neAttr = neAttrs[i];
neAttrsMap[neAttr.name] = neAttr.value;
}
for (i = 0; i < neAttrsLength; i++) {
for (var j = 0; j < cLength; j++) {
var cAttr = candidates[j].attrs[i];
if (neAttrsMap[cAttr.name] !== cAttr.value) {
candidates.splice(j, 1);
cLength--;
}
if (candidates.length < NOAH_ARK_CAPACITY)
return;
}
}
//NOTE: remove bottommost candidates until Noah's Ark condition will not be met
for (i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) {
this.entries.splice(candidates[i].idx, 1);
this.length--;
}
}
}...
//Mutations
FormattingElementList.prototype.insertMarker = function () {
this.entries.push({type: FormattingElementList.MARKER_ENTRY});
this.length++;
};
FormattingElementList.prototype.pushElement = function (element, token) {
this._ensureNoahArkCondition(element);
this.entries.push({
type: FormattingElementList.ELEMENT_ENTRY,
element: element,
token: token
});
..._getNoahArkConditionCandidates = function (newElement) {
var candidates = [];
if (this.length >= NOAH_ARK_CAPACITY) {
var neAttrsLength = this.treeAdapter.getAttrList(newElement).length,
neTagName = this.treeAdapter.getTagName(newElement),
neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
for (var i = this.length - 1; i >= 0; i--) {
var entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY)
break;
var element = entry.element,
elementAttrs = this.treeAdapter.getAttrList(element),
isCandidate = this.treeAdapter.getTagName(element) === neTagName &&
this.treeAdapter.getNamespaceURI(element) === neNamespaceURI &&
elementAttrs.length === neAttrsLength;
if (isCandidate)
candidates.push({idx: i, attrs: elementAttrs});
}
}
return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates;
}...
}
}
return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates;
};
FormattingElementList.prototype._ensureNoahArkCondition = function (newElement) {
var candidates = this._getNoahArkConditionCandidates(newElement),
cLength = candidates.length;
if (cLength) {
var neAttrs = this.treeAdapter.getAttrList(newElement),
neAttrsLength = neAttrs.length,
neAttrsMap = Object.create(null);
...clearToLastMarker = function () {
while (this.length) {
var entry = this.entries.pop();
this.length--;
if (entry.type === FormattingElementList.MARKER_ENTRY)
break;
}
}...
}
};
//Close elements
Parser.prototype._closeTableCell = function () {
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
};
Parser.prototype._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
};
...getElementEntry = function (element) {
for (var i = this.length - 1; i >= 0; i--) {
var entry = this.entries[i];
if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element)
return entry;
}
return null;
}...
var lastElement = furthestBlock,
nextElement = p.openElements.getCommonAncestor(furthestBlock);
for (var i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
//NOTE: store next element for the next loop iteration (it may be deleted from the stack by step 9.5)
nextElement = p.openElements.getCommonAncestor(element);
var elementEntry = p.activeFormattingElements.getElementEntry(element),
counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER,
shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
if (shouldRemoveFromOpenElements) {
if (counterOverflow)
p.activeFormattingElements.removeEntry(elementEntry);
...getElementEntryInScopeWithTagName = function (tagName) {
for (var i = this.length - 1; i >= 0; i--) {
var entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY)
return null;
if (this.treeAdapter.getTagName(entry.element) === tagName)
return entry;
}
return null;
}...
//Adoption agency algorithm
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
//------------------------------------------------------------------
//Steps 5-8 of the algorithm
function aaObtainFormattingElementEntry(p, token) {
var formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
if (formattingElementEntry) {
if (!p.openElements.contains(formattingElementEntry.element)) {
p.activeFormattingElements.removeEntry(formattingElementEntry);
formattingElementEntry = null;
}
...insertElementAfterBookmark = function (element, token) {
var bookmarkIdx = this.length - 1;
for (; bookmarkIdx >= 0; bookmarkIdx--) {
if (this.entries[bookmarkIdx] === this.bookmark)
break;
}
this.entries.splice(bookmarkIdx + 1, 0, {
type: FormattingElementList.ELEMENT_ENTRY,
element: element,
token: token
});
this.length++;
}...
var ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element),
token = formattingElementEntry.token,
newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
p._adoptNodes(furthestBlock, newElement);
p.treeAdapter.appendChild(furthestBlock, newElement);
p.activeFormattingElements.insertElementAfterBookmark(newElement, formattingElementEntry
.token);
p.activeFormattingElements.removeEntry(formattingElementEntry);
p.openElements.remove(formattingElementEntry.element);
p.openElements.insertAfter(furthestBlock, newElement);
}
//Algorithm entry point
...insertMarker = function () {
this.entries.push({type: FormattingElementList.MARKER_ENTRY});
this.length++;
}...
p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
else if (tn === $.SCRIPT)
p._switchToTextParsing(token, Tokenizer.MODE.SCRIPT_DATA);
else if (tn === $.TEMPLATE) {
p._insertTemplate(token, NS.HTML);
p.activeFormattingElements.insertMarker();
p.framesetOk = false;
p.insertionMode = IN_TEMPLATE_MODE;
p._pushTmplInsertionMode(IN_TEMPLATE_MODE);
}
else if (tn !== $.HEAD)
tokenInHead(p, token);
...pushElement = function (element, token) {
this._ensureNoahArkCondition(element);
this.entries.push({
type: FormattingElementList.ELEMENT_ENTRY,
element: element,
token: token
});
this.length++;
}...
callAdoptionAgency(p, token);
p.openElements.remove(activeElementEntry.element);
p.activeFormattingElements.removeEntry(activeElementEntry);
}
p._reconstructActiveFormattingElements();
p._insertElement(token, NS.HTML);
p.activeFormattingElements.pushElement(p.openElements.current, token);
}
function bStartTagInBody(p, token) {
p._reconstructActiveFormattingElements();
p._insertElement(token, NS.HTML);
p.activeFormattingElements.pushElement(p.openElements.current, token);
}
...removeEntry = function (entry) {
for (var i = this.length - 1; i >= 0; i--) {
if (this.entries[i] === entry) {
this.entries.splice(i, 1);
this.length--;
break;
}
}
}...
//Steps 5-8 of the algorithm
function aaObtainFormattingElementEntry(p, token) {
var formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
if (formattingElementEntry) {
if (!p.openElements.contains(formattingElementEntry.element)) {
p.activeFormattingElements.removeEntry(formattingElementEntry);
formattingElementEntry = null;
}
else if (!p.openElements.hasInScope(token.tagName))
formattingElementEntry = null;
}
...adoptAttributes = function (recipient, attrs) {
for (var i = 0; i < attrs.length; i++) {
var attrName = attrs[i].name;
if (typeof recipient.attribs[attrName] === 'undefined') {
recipient.attribs[attrName] = attrs[i].value;
recipient['x-attribsNamespace'][attrName] = attrs[i].namespace;
recipient['x-attribsPrefix'][attrName] = attrs[i].prefix;
}
}
}...
p._reconstructActiveFormattingElements();
p._insertCharacters(token);
p.framesetOk = false;
}
function htmlStartTagInBody(p, token) {
if (p.openElements.tmplCount === 0)
p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
}
function bodyStartTagInBody(p, token) {
var bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
if (bodyElement && p.openElements.tmplCount === 0) {
p.framesetOk = false;
...appendChild = function (parentNode, newNode) {
var prev = parentNode.children[parentNode.children.length - 1];
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
parentNode.children.push(newNode);
newNode.parent = parentNode;
}...
Parser.prototype._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.appendChild(parent, element);
}
};
Parser.prototype._appendElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
...createCommentNode = function (data) {
return new Node({
type: 'comment',
data: data,
parent: null,
prev: null,
next: null
});
}...
var element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
this.treeAdapter.appendChild(this.openElements.current, element);
this.openElements.push(element);
};
Parser.prototype._appendCommentNode = function (token, parent) {
var commentNode = this.treeAdapter.createCommentNode(token.data);
this.treeAdapter.appendChild(parent, commentNode);
};
Parser.prototype._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
...createDocument = function () {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: [],
'x-mode': DOCUMENT_MODE.NO_QUIRKS
});
}...
if (this.options.locationInfo)
locationInfoMixin.assign(this);
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
...createDocumentFragment = function () {
return new Node({
type: 'root',
name: 'root',
parent: null,
prev: null,
next: null,
children: []
});
}...
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
};
//Bootstrap parser
...createElement = function (tagName, namespaceURI, attrs) {
var attribs = Object.create(null),
attribsNamespace = Object.create(null),
attribsPrefix = Object.create(null);
for (var i = 0; i < attrs.length; i++) {
var attrName = attrs[i].name;
attribs[attrName] = attrs[i].value;
attribsNamespace[attrName] = attrs[i].namespace;
attribsPrefix[attrName] = attrs[i].prefix;
}
return new Node({
type: tagName === 'script' || tagName === 'style' ? tagName : 'tag',
name: tagName,
namespace: namespaceURI,
attribs: attribs,
'x-attribsNamespace': attribsNamespace,
'x-attribsPrefix': attribsPrefix,
children: [],
parent: null,
prev: null,
next: null
});
}...
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
if (!fragmentContext)
fragmentContext = this.treeAdapter.createElement($.TEMPLATE, NS.HTML, []);
//NOTE: create fake element which will be used as 'document' for fragment parsing.
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
var documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
this._bootstrap(documentMock, fragmentContext);
...detachNode = function (node) {
if (node.parent) {
var idx = node.parent.children.indexOf(node),
prev = node.prev,
next = node.next;
node.prev = null;
node.next = null;
if (prev)
prev.next = next;
if (next)
next.prev = prev;
node.parent.children.splice(idx, 1);
node.parent = null;
}
}...
Parser.prototype._adoptNodes = function (donor, recipient) {
while (true) {
var child = this.treeAdapter.getFirstChild(donor);
if (!child)
break;
this.treeAdapter.detachNode(child);
this.treeAdapter.appendChild(recipient, child);
}
};
//Token processing
Parser.prototype._shouldProcessTokenInForeignContent = function (token) {
var current = this._getAdjustedCurrentElement();
...getAttrList = function (element) {
var attrList = [];
for (var name in element.attribs) {
attrList.push({
name: name,
value: element.attribs[name],
namespace: element['x-attribsNamespace'][name],
prefix: element['x-attribsPrefix'][name]
});
}
return attrList;
}...
*
* const parse = require('parse5');
* const http = require('http');
*
* const parser = new parse5.ParserStream();
*
* parser.on('script', (scriptElement, documentWrite, resume) => {
* const src = parse5.treeAdapters.default.getAttrList(scriptElement)[0].value;
*
* http.get(src, res => {
* // Fetch the script content, execute it with DOM built around `parser.document` and
* // `document.write` implemented using `documentWrite`.
* ...
* // Then resume parsing.
* resume();
...getChildNodes = function (node) {
return node.children;
}...
};
//Doctype
parser._setDocumentType = function (token) {
parserProto._setDocumentType.call(this, token);
var documentChildren = this.treeAdapter.getChildNodes(this.document),
cnLength = documentChildren.length;
for (var i = 0; i < cnLength; i++) {
var node = documentChildren[i];
if (this.treeAdapter.isDocumentTypeNode(node)) {
node.__location = token.location;
...getCommentNodeContent = function (commentNode) {
return commentNode.data;
}...
this.html += content;
else
this.html += Serializer.escapeString(content, false);
};
Serializer.prototype._serializeCommentNode = function (node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) +
x27;-->';
};
Serializer.prototype._serializeDocumentTypeNode = function (node) {
var name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
};
...getDocumentMode = function (document) {
return document['x-mode'];
}...
//12.2.5.4.1 The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = BEFORE_HTML_MODE;
}
function tokenInInitialMode(p, token) {
...getDocumentTypeNodeName = function (doctypeNode) {
return doctypeNode['x-name'];
}...
};
Serializer.prototype._serializeCommentNode = function (node) {
this.html += '<!--' + this.treeAdapter.getCommentNodeContent(node) + '-->';
};
Serializer.prototype._serializeDocumentTypeNode = function (node) {
var name = this.treeAdapter.getDocumentTypeNodeName(node);
this.html += '<' + doctype.serializeContent(name, null, null) + '>';
};
...getDocumentTypeNodePublicId = function (doctypeNode) {
return doctypeNode['x-publicId'];
}n/a
getDocumentTypeNodeSystemId = function (doctypeNode) {
return doctypeNode['x-systemId'];
}n/a
getFirstChild = function (node) {
return node.children[0];
}...
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
};
...getNamespaceURI = function (element) {
return element.namespace;
}...
//lightweight heuristics without thorough attributes check.
FormattingElementList.prototype._getNoahArkConditionCandidates = function (newElement) {
var candidates = [];
if (this.length >= NOAH_ARK_CAPACITY) {
var neAttrsLength = this.treeAdapter.getAttrList(newElement).length,
neTagName = this.treeAdapter.getTagName(newElement),
neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
for (var i = this.length - 1; i >= 0; i--) {
var entry = this.entries[i];
if (entry.type === FormattingElementList.MARKER_ENTRY)
break;
...getParentNode = function (node) {
return node.parent;
}...
do {
if (this.treeAdapter.getTagName(node) === $.FORM) {
this.formElement = node;
break;
}
node = this.treeAdapter.getParentNode(node);
} while (node);
};
Parser.prototype._initTokenizerForFragmentParsing = function () {
if (this.treeAdapter.getNamespaceURI(this.fragmentContext) === NS.HTML) {
var tn = this.treeAdapter.getTagName(this.fragmentContext);
...getTagName = function (element) {
return element.name;
}...
if (loc.attrs)
loc.startTag.attrs = loc.attrs;
}
if (closingToken.location) {
var ctLocation = closingToken.location,
tn = treeAdapter.getTagName(element),
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing tag and
// for cases like <td> <p> </td> - 'p' closes without a closing tag
isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN &&
tn === closingToken.tagName;
if (isClosingEndTag) {
loc.endTag = {
...getTemplateContent = function (templateElement) {
return templateElement.children[0];
}...
parserProto._insertElement.call(this, token, namespaceURI);
};
parser._insertTemplate = function (token) {
attachableElementLocation = token.location;
parserProto._insertTemplate.call(this, token);
var tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current);
tmplContent.__location = null;
};
parser._insertFakeRootElement = function () {
parserProto._insertFakeRootElement.call(this);
this.openElements.current.__location = null;
...getTextNodeContent = function (textNode) {
return textNode.data;
}...
this.html += attr.namespace + ':' + attr.name;
this.html += '="' + value + '"';
}
};
Serializer.prototype._serializeTextNode = function (node) {
var content = this.treeAdapter.getTextNodeContent(node),
parent = this.treeAdapter.getParentNode(node),
parentTn = void 0;
if (parent && this.treeAdapter.isElementNode(parent))
parentTn = this.treeAdapter.getTagName(parent);
if (parentTn === $.STYLE || parentTn === $.SCRIPT || parentTn === $.XMP || parentTn === $.IFRAME ||
...insertBefore = function (parentNode, newNode, referenceNode) {
var insertionIdx = parentNode.children.indexOf(referenceNode),
prev = referenceNode.prev;
if (prev) {
prev.next = newNode;
newNode.prev = prev;
}
referenceNode.prev = newNode;
newNode.next = referenceNode;
parentNode.children.splice(insertionIdx, 0, newNode);
newNode.parent = parentNode;
}...
return location;
};
Parser.prototype._fosterParentElement = function (element) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
else
this.treeAdapter.appendChild(location.parent, element);
};
Parser.prototype._fosterParentText = function (chars) {
var location = this._findFosterParentingLocation();
...insertText = function (parentNode, text) {
var lastChild = parentNode.children[parentNode.children.length - 1];
if (lastChild && lastChild.type === 'text')
lastChild.data += text;
else
appendChild(parentNode, createTextNode(text));
}...
Parser.prototype._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.insertText(parent, token.chars);
}
};
Parser.prototype._adoptNodes = function (donor, recipient) {
while (true) {
var child = this.treeAdapter.getFirstChild(donor);
...insertTextBefore = function (parentNode, text, referenceNode) {
var prevNode = parentNode.children[parentNode.children.indexOf(referenceNode) - 1];
if (prevNode && prevNode.type === 'text')
prevNode.data += text;
else
insertBefore(parentNode, createTextNode(text), referenceNode);
}...
this.treeAdapter.appendChild(location.parent, element);
};
Parser.prototype._fosterParentText = function (chars) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertTextBefore(location.parent, chars, location.beforeElement);
else
this.treeAdapter.insertText(location.parent, chars);
};
//Special elements
Parser.prototype._isSpecialElement = function (element) {
var tn = this.treeAdapter.getTagName(element),
...isCommentNode = function (node) {
return node.type === 'comment';
}...
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
else if (this.treeAdapter.isDocumentTypeNode(currentNode))
this._serializeDocumentTypeNode(currentNode);
}
}
};
...isDocumentTypeNode = function (node) {
return node.type === 'directive' && node.name === '!doctype';
}...
var documentChildren = this.treeAdapter.getChildNodes(this.document),
cnLength = documentChildren.length;
for (var i = 0; i < cnLength; i++) {
var node = documentChildren[i];
if (this.treeAdapter.isDocumentTypeNode(node)) {
node.__location = token.location;
break;
}
}
};
...isElementNode = function (node) {
return !!node.attribs;
}...
Serializer.prototype._serializeChildNodes = function (parentNode) {
var childNodes = this.treeAdapter.getChildNodes(parentNode);
if (childNodes) {
for (var i = 0, cnLength = childNodes.length; i < cnLength; i++) {
var currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
...isTextNode = function (node) {
return node.type === 'text';
}...
if (childNodes) {
for (var i = 0, cnLength = childNodes.length; i < cnLength; i++) {
var currentNode = childNodes[i];
if (this.treeAdapter.isElementNode(currentNode))
this._serializeElement(currentNode);
else if (this.treeAdapter.isTextNode(currentNode))
this._serializeTextNode(currentNode);
else if (this.treeAdapter.isCommentNode(currentNode))
this._serializeCommentNode(currentNode);
else if (this.treeAdapter.isDocumentTypeNode(currentNode))
this._serializeDocumentTypeNode(currentNode);
...setDocumentMode = function (document, mode) {
document['x-mode'] = mode;
}...
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = BEFORE_HTML_MODE;
}
function tokenInInitialMode(p, token) {
p.treeAdapter.setDocumentMode(p.document, HTML.DOCUMENT_MODE.QUIRKS);
p.insertionMode = BEFORE_HTML_MODE;
...setDocumentType = function (document, name, publicId, systemId) {
var data = doctype.serializeContent(name, publicId, systemId),
doctypeNode = null;
for (var i = 0; i < document.children.length; i++) {
if (document.children[i].type === 'directive' && document.children[i].name === '!doctype') {
doctypeNode = document.children[i];
break;
}
}
if (doctypeNode) {
doctypeNode.data = data;
doctypeNode['x-name'] = name;
doctypeNode['x-publicId'] = publicId;
doctypeNode['x-systemId'] = systemId;
}
else {
appendChild(document, new Node({
type: 'directive',
name: '!doctype',
data: data,
'x-name': name,
'x-publicId': publicId,
'x-systemId': systemId
}));
}
}...
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
}
};
//Tree mutation
Parser.prototype._setDocumentType = function (token) {
this.treeAdapter.setDocumentType(this.document, token.name, token.publicId, token.systemId
);
};
Parser.prototype._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
...setTemplateContent = function (templateElement, contentElement) {
appendChild(templateElement, contentElement);
}...
this.openElements.push(element);
};
Parser.prototype._insertTemplate = function (token) {
var tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs),
content = this.treeAdapter.createDocumentFragment();
this.treeAdapter.setTemplateContent(tmpl, content);
this._attachElementToTree(tmpl);
this.openElements.push(tmpl);
};
Parser.prototype._insertFakeRootElement = function () {
var element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
...index = function (options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
if (this.options.locationInfo)
locationInfoMixin.assign(this);
}n/a
_adoptNodes = function (donor, recipient) {
while (true) {
var child = this.treeAdapter.getFirstChild(donor);
if (!child)
break;
this.treeAdapter.detachNode(child);
this.treeAdapter.appendChild(recipient, child);
}
}...
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
};
//Bootstrap parser
Parser.prototype._bootstrap = function (document, fragmentContext) {
this.tokenizer = new Tokenizer(this.options);
..._appendCommentNode = function (token, parent) {
var commentNode = this.treeAdapter.createCommentNode(token.data);
this.treeAdapter.appendChild(parent, commentNode);
}...
//Generic token handlers
//------------------------------------------------------------------
function ignoreToken() {
//NOTE: do nothing =)
}
function appendComment(p, token) {
p._appendCommentNode(token, p.openElements.currentTmplContent || p.openElements.current
);
}
function appendCommentToRootHtmlElement(p, token) {
p._appendCommentNode(token, p.openElements.items[0]);
}
function appendCommentToDocument(p, token) {
..._appendElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
}...
function startTagInHead(p, token) {
var tn = token.tagName;
if (tn === $.HTML)
startTagInBody(p, token);
else if (tn === $.BASE || tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META)
p._appendElement(token, NS.HTML);
else if (tn === $.TITLE)
p._switchToTextParsing(token, Tokenizer.MODE.RCDATA);
//NOTE: here we assume that we always act as an interactive user agent with enabled scripting, so we parse
//<noscript> as a rawtext.
else if (tn === $.NOSCRIPT || tn === $.NOFRAMES || tn === $.STYLE)
..._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.appendChild(parent, element);
}
}...
this.treeAdapter.appendChild(parent, element);
}
};
Parser.prototype._appendElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
};
Parser.prototype._insertElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
this.openElements.push(element);
..._bootstrap = function (document, fragmentContext) {
this.tokenizer = new Tokenizer(this.options);
this.stopped = false;
this.insertionMode = INITIAL_MODE;
this.originalInsertionMode = '';
this.document = document;
this.fragmentContext = fragmentContext;
this.headElement = null;
this.formElement = null;
this.openElements = new OpenElementStack(this.document, this.treeAdapter);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
this.tmplInsertionModeStack = [];
this.tmplInsertionModeStackTop = -1;
this.currentTmplInsertionMode = null;
this.pendingCharacterTokens = [];
this.hasNonWhitespacePendingCharacterToken = false;
this.framesetOk = true;
this.skipNextNewLine = false;
this.fosterParentingEnabled = false;
}...
locationInfoMixin.assign(this);
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
..._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
}...
p._insertElement(token, NS.HTML);
p.insertionMode = IN_FRAMESET_MODE;
}
}
function addressStartTagInBody(p, token) {
if (p.openElements.hasInButtonScope($.P))
p._closePElement();
p._insertElement(token, NS.HTML);
}
function numberedHeaderStartTagInBody(p, token) {
if (p.openElements.hasInButtonScope($.P))
p._closePElement();
..._closeTableCell = function () {
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
}...
function startTagInCell(p, token) {
var tn = token.tagName;
if (tn === $.CAPTION || tn === $.COL || tn === $.COLGROUP || tn === $.TBODY ||
tn === $.TD || tn === $.TFOOT || tn === $.TH || tn === $.THEAD || tn === $.TR) {
if (p.openElements.hasInTableScope($.TD) || p.openElements.hasInTableScope($.TH)) {
p._closeTableCell();
p._processToken(token);
}
}
else
startTagInBody(p, token);
}
..._findFormInFragmentContext = function () {
var node = this.fragmentContext;
do {
if (this.treeAdapter.getTagName(node) === $.FORM) {
this.formElement = node;
break;
}
node = this.treeAdapter.getParentNode(node);
} while (node);
}...
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
..._findFosterParentingLocation = function () {
var location = {
parent: null,
beforeElement: null
};
for (var i = this.openElements.stackTop; i >= 0; i--) {
var openElement = this.openElements.items[i],
tn = this.treeAdapter.getTagName(openElement),
ns = this.treeAdapter.getNamespaceURI(openElement);
if (tn === $.TEMPLATE && ns === NS.HTML) {
location.parent = this.treeAdapter.getTemplateContent(openElement);
break;
}
else if (tn === $.TABLE) {
location.parent = this.treeAdapter.getParentNode(openElement);
if (location.parent)
location.beforeElement = openElement;
else
location.parent = this.openElements.items[i - 1];
break;
}
}
if (!location.parent)
location.parent = this.openElements.items[0];
return location;
}...
if (!location.parent)
location.parent = this.openElements.items[0];
return location;
};
Parser.prototype._fosterParentElement = function (element) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
else
this.treeAdapter.appendChild(location.parent, element);
};
..._fosterParentElement = function (element) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
else
this.treeAdapter.appendChild(location.parent, element);
}...
//Tree mutation
Parser.prototype._setDocumentType = function (token) {
this.treeAdapter.setDocumentType(this.document, token.name, token.publicId, token.systemId);
};
Parser.prototype._attachElementToTree = function (element) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentElement(element);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.appendChild(parent, element);
}
};
..._fosterParentText = function (chars) {
var location = this._findFosterParentingLocation();
if (location.beforeElement)
this.treeAdapter.insertTextBefore(location.parent, chars, location.beforeElement);
else
this.treeAdapter.insertText(location.parent, chars);
}...
var commentNode = this.treeAdapter.createCommentNode(token.data);
this.treeAdapter.appendChild(parent, commentNode);
};
Parser.prototype._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.insertText(parent, token.chars);
}
};
..._getAdjustedCurrentElement = function () {
return this.openElements.stackTop === 0 && this.fragmentContext ?
this.fragmentContext :
this.openElements.current;
}...
if (writeCallback)
writeCallback();
};
//Text parsing
Parser.prototype._setupTokenizerCDATAMode = function () {
var current = this._getAdjustedCurrentElement();
this.tokenizer.allowCDATA = current && current !== this.document &&
this.treeAdapter.getNamespaceURI(current) !== NS.HTML && !this._isIntegrationPoint(current
);
};
Parser.prototype._switchToTextParsing = function (currentToken, nextTokenizerState) {
this._insertElement(currentToken, NS.HTML);
..._initTokenizerForFragmentParsing = function () {
if (this.treeAdapter.getNamespaceURI(this.fragmentContext) === NS.HTML) {
var tn = this.treeAdapter.getTagName(this.fragmentContext);
if (tn === $.TITLE || tn === $.TEXTAREA)
this.tokenizer.state = Tokenizer.MODE.RCDATA;
else if (tn === $.STYLE || tn === $.XMP || tn === $.IFRAME ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
}
}...
var documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
this._bootstrap(documentMock, fragmentContext);
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
..._insertCharacters = function (token) {
if (this._shouldFosterParentOnInsertion())
this._fosterParentText(token.chars);
else {
var parent = this.openElements.currentTmplContent || this.openElements.current;
this.treeAdapter.insertText(parent, token.chars);
}
}...
}
function appendCommentToDocument(p, token) {
p._appendCommentNode(token, p.document);
}
function insertCharacters(p, token) {
p._insertCharacters(token);
}
function stopParsing(p) {
p.stopped = true;
}
//12.2.5.4.1 The "initial" insertion mode
..._insertElement = function (token, namespaceURI) {
var element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element);
this.openElements.push(element);
}...
var current = this._getAdjustedCurrentElement();
this.tokenizer.allowCDATA = current && current !== this.document &&
this.treeAdapter.getNamespaceURI(current) !== NS.HTML && !this._isIntegrationPoint(current
);
};
Parser.prototype._switchToTextParsing = function (currentToken, nextTokenizerState) {
this._insertElement(currentToken, NS.HTML);
this.tokenizer.state = nextTokenizerState;
this.originalInsertionMode = this.insertionMode;
this.insertionMode = TEXT_MODE;
};
Parser.prototype.switchToPlaintextParsing = function () {
this.insertionMode = TEXT_MODE;
..._insertFakeElement = function (tagName) {
var element = this.treeAdapter.createElement(tagName, NS.HTML, []);
this._attachElementToTree(element);
this.openElements.push(element);
}...
var tn = token.tagName;
if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR)
tokenBeforeHead(p, token);
}
function tokenBeforeHead(p, token) {
p._insertFakeElement($.HEAD);
p.headElement = p.openElements.current;
p.insertionMode = IN_HEAD_MODE;
p._processToken(token);
}
//12.2.5.4.4 The "in head" insertion mode
..._insertFakeRootElement = function () {
var element = this.treeAdapter.createElement($.HTML, NS.HTML, []);
this.treeAdapter.appendChild(this.openElements.current, element);
this.openElements.push(element);
}...
this._bootstrap(documentMock, fragmentContext);
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
..._insertTemplate = function (token) {
var tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs),
content = this.treeAdapter.createDocumentFragment();
this.treeAdapter.setTemplateContent(tmpl, content);
this._attachElementToTree(tmpl);
this.openElements.push(tmpl);
}...
else if (tn === $.NOSCRIPT || tn === $.NOFRAMES || tn === $.STYLE)
p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
else if (tn === $.SCRIPT)
p._switchToTextParsing(token, Tokenizer.MODE.SCRIPT_DATA);
else if (tn === $.TEMPLATE) {
p._insertTemplate(token, NS.HTML);
p.activeFormattingElements.insertMarker();
p.framesetOk = false;
p.insertionMode = IN_TEMPLATE_MODE;
p._pushTmplInsertionMode(IN_TEMPLATE_MODE);
}
else if (tn !== $.HEAD)
..._isElementCausesFosterParenting = function (element) {
var tn = this.treeAdapter.getTagName(element);
return tn === $.TABLE || tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD || tn === $.TR;
}...
Parser.prototype._isElementCausesFosterParenting = function (element) {
var tn = this.treeAdapter.getTagName(element);
return tn === $.TABLE || tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD || tn === $.TR;
};
Parser.prototype._shouldFosterParentOnInsertion = function () {
return this.fosterParentingEnabled && this._isElementCausesFosterParenting(
this.openElements.current);
};
Parser.prototype._findFosterParentingLocation = function () {
var location = {
parent: null,
beforeElement: null
};
..._isIntegrationPoint = function (element, foreignNS) {
var tn = this.treeAdapter.getTagName(element),
ns = this.treeAdapter.getNamespaceURI(element),
attrs = this.treeAdapter.getAttrList(element);
return foreignContent.isIntegrationPoint(tn, ns, attrs, foreignNS);
}...
};
//Text parsing
Parser.prototype._setupTokenizerCDATAMode = function () {
var current = this._getAdjustedCurrentElement();
this.tokenizer.allowCDATA = current && current !== this.document &&
this.treeAdapter.getNamespaceURI(current) !== NS.HTML && !this._isIntegrationPoint(current);
};
Parser.prototype._switchToTextParsing = function (currentToken, nextTokenizerState) {
this._insertElement(currentToken, NS.HTML);
this.tokenizer.state = nextTokenizerState;
this.originalInsertionMode = this.insertionMode;
this.insertionMode = TEXT_MODE;
..._isSpecialElement = function (element) {
var tn = this.treeAdapter.getTagName(element),
ns = this.treeAdapter.getNamespaceURI(element);
return HTML.SPECIAL_ELEMENTS[ns][tn];
}...
for (var i = p.openElements.stackTop; i >= 0; i--) {
var element = p.openElements.items[i];
if (element === formattingElementEntry.element)
break;
if (p._isSpecialElement(element))
furthestBlock = element;
}
if (!furthestBlock) {
p.openElements.popUntilElementPopped(formattingElementEntry.element);
p.activeFormattingElements.removeEntry(formattingElementEntry);
}
..._popTmplInsertionMode = function () {
this.tmplInsertionModeStack.pop();
this.tmplInsertionModeStackTop--;
this.currentTmplInsertionMode = this.tmplInsertionModeStack[this.tmplInsertionModeStackTop];
}...
else if (tn === $.BODY || tn === $.BR || tn === $.HTML)
tokenInHead(p, token);
else if (tn === $.TEMPLATE && p.openElements.tmplCount > 0) {
p.openElements.generateImpliedEndTags();
p.openElements.popUntilTagNamePopped($.TEMPLATE);
p.activeFormattingElements.clearToLastMarker();
p._popTmplInsertionMode();
p._resetInsertionMode();
}
}
function tokenInHead(p, token) {
p.openElements.pop();
p.insertionMode = AFTER_HEAD_MODE;
..._processInputToken = function (token) {
if (this._shouldProcessTokenInForeignContent(token))
this._processTokenInForeignContent(token);
else
this._processToken(token);
}...
if (token.chars.length === 1)
continue;
token.chars = token.chars.substr(1);
}
}
this._processInputToken(token);
if (scriptHandler && this.pendingScript)
break;
}
};
Parser.prototype.runParsingLoopForCurrentChunk = function (writeCallback, scriptHandler) {
..._processToken = function (token) {
_[this.insertionMode][token.type](this, token);
}...
};
Parser.prototype._processInputToken = function (token) {
if (this._shouldProcessTokenInForeignContent(token))
this._processTokenInForeignContent(token);
else
this._processToken(token);
};
//Integration points
Parser.prototype._isIntegrationPoint = function (element, foreignNS) {
var tn = this.treeAdapter.getTagName(element),
ns = this.treeAdapter.getNamespaceURI(element),
attrs = this.treeAdapter.getAttrList(element);
..._processTokenInBodyMode = function (token) {
_[IN_BODY_MODE][token.type](this, token);
}...
tokenInTable(p, token);
}
function tokenInTable(p, token) {
var savedFosterParentingState = p.fosterParentingEnabled;
p.fosterParentingEnabled = true;
p._processTokenInBodyMode(token);
p.fosterParentingEnabled = savedFosterParentingState;
}
//12.2.5.4.10 The "in table text" insertion mode
//------------------------------------------------------------------
function whitespaceCharacterInTableText(p, token) {
..._processTokenInForeignContent = function (token) {
if (token.type === Tokenizer.CHARACTER_TOKEN)
characterInForeignContent(this, token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN)
nullCharacterInForeignContent(this, token);
else if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN)
insertCharacters(this, token);
else if (token.type === Tokenizer.COMMENT_TOKEN)
appendComment(this, token);
else if (token.type === Tokenizer.START_TAG_TOKEN)
startTagInForeignContent(this, token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
endTagInForeignContent(this, token);
}...
else if (token.type === Tokenizer.END_TAG_TOKEN)
endTagInForeignContent(this, token);
};
Parser.prototype._processInputToken = function (token) {
if (this._shouldProcessTokenInForeignContent(token))
this._processTokenInForeignContent(token);
else
this._processToken(token);
};
//Integration points
Parser.prototype._isIntegrationPoint = function (element, foreignNS) {
..._pushTmplInsertionMode = function (mode) {
this.tmplInsertionModeStack.push(mode);
this.tmplInsertionModeStackTop++;
this.currentTmplInsertionMode = mode;
}...
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
var documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
this._bootstrap(documentMock, fragmentContext);
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
..._reconstructActiveFormattingElements = function () {
var listLength = this.activeFormattingElements.length;
if (listLength) {
var unopenIdx = listLength,
entry = null;
do {
unopenIdx--;
entry = this.activeFormattingElements.entries[unopenIdx];
if (entry.type === FormattingElementList.MARKER_ENTRY || this.openElements.contains(entry.element)) {
unopenIdx++;
break;
}
} while (unopenIdx > 0);
for (var i = unopenIdx; i < listLength; i++) {
entry = this.activeFormattingElements.entries[i];
this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
entry.element = this.openElements.current;
}
}
}...
p._processToken(token);
}
//12.2.5.4.7 The "in body" insertion mode
//------------------------------------------------------------------
function whitespaceCharacterInBody(p, token) {
p._reconstructActiveFormattingElements();
p._insertCharacters(token);
}
function characterInBody(p, token) {
p._reconstructActiveFormattingElements();
p._insertCharacters(token);
p.framesetOk = false;
..._resetInsertionMode = function () {
for (var i = this.openElements.stackTop, last = false; i >= 0; i--) {
var element = this.openElements.items[i];
if (i === 0) {
last = true;
if (this.fragmentContext)
element = this.fragmentContext;
}
var tn = this.treeAdapter.getTagName(element),
newInsertionMode = INSERTION_MODE_RESET_MAP[tn];
if (newInsertionMode) {
this.insertionMode = newInsertionMode;
break;
}
else if (!last && (tn === $.TD || tn === $.TH)) {
this.insertionMode = IN_CELL_MODE;
break;
}
else if (!last && tn === $.HEAD) {
this.insertionMode = IN_HEAD_MODE;
break;
}
else if (tn === $.SELECT) {
this._resetInsertionModeForSelect(i);
break;
}
else if (tn === $.TEMPLATE) {
this.insertionMode = this.currentTmplInsertionMode;
break;
}
else if (tn === $.HTML) {
this.insertionMode = this.headElement ? AFTER_HEAD_MODE : BEFORE_HEAD_MODE;
break;
}
else if (last) {
this.insertionMode = IN_BODY_MODE;
break;
}
}
}...
this._bootstrap(documentMock, fragmentContext);
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
..._resetInsertionModeForSelect = function (selectIdx) {
if (selectIdx > 0) {
for (var i = selectIdx - 1; i > 0; i--) {
var ancestor = this.openElements.items[i],
tn = this.treeAdapter.getTagName(ancestor);
if (tn === $.TEMPLATE)
break;
else if (tn === $.TABLE) {
this.insertionMode = IN_SELECT_IN_TABLE_MODE;
return;
}
}
}
this.insertionMode = IN_SELECT_MODE;
}...
else if (!last && tn === $.HEAD) {
this.insertionMode = IN_HEAD_MODE;
break;
}
else if (tn === $.SELECT) {
this._resetInsertionModeForSelect(i);
break;
}
else if (tn === $.TEMPLATE) {
this.insertionMode = this.currentTmplInsertionMode;
break;
}
..._runParsingLoop = function (scriptHandler) {
while (!this.stopped) {
this._setupTokenizerCDATAMode();
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.HIBERNATION_TOKEN)
break;
if (this.skipNextNewLine) {
this.skipNextNewLine = false;
if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
if (token.chars.length === 1)
continue;
token.chars = token.chars.substr(1);
}
}
this._processInputToken(token);
if (scriptHandler && this.pendingScript)
break;
}
}...
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
..._setDocumentType = function (token) {
this.treeAdapter.setDocumentType(this.document, token.name, token.publicId, token.systemId);
}...
function stopParsing(p) {
p.stopped = true;
}
//12.2.5.4.1 The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
var mode = token.forceQuirks ?
HTML.DOCUMENT_MODE.QUIRKS :
doctype.getDocumentMode(token.name, token.publicId, token.systemId);
p.treeAdapter.setDocumentMode(p.document, mode);
..._setupTokenizerCDATAMode = function () {
var current = this._getAdjustedCurrentElement();
this.tokenizer.allowCDATA = current && current !== this.document &&
this.treeAdapter.getNamespaceURI(current) !== NS.HTML && !this._isIntegrationPoint(current);
}...
this.skipNextNewLine = false;
this.fosterParentingEnabled = false;
};
//Parsing loop
Parser.prototype._runParsingLoop = function (scriptHandler) {
while (!this.stopped) {
this._setupTokenizerCDATAMode();
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.HIBERNATION_TOKEN)
break;
if (this.skipNextNewLine) {
..._shouldFosterParentOnInsertion = function () {
return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.current);
}...
lastFosterParentingLocation = parserProto._findFosterParentingLocation.call(this);
return lastFosterParentingLocation;
};
parser._insertCharacters = function (token) {
parserProto._insertCharacters.call(this, token);
var hasFosterParent = this._shouldFosterParentOnInsertion(),
parent = hasFosterParent && lastFosterParentingLocation.parent ||
this.openElements.currentTmplContent ||
this.openElements.current,
siblings = this.treeAdapter.getChildNodes(parent),
textNodeIdx = hasFosterParent && lastFosterParentingLocation.beforeElement ?
siblings.indexOf(lastFosterParentingLocation.beforeElement) - 1 :
siblings.length - 1,
..._shouldProcessTokenInForeignContent = function (token) {
var current = this._getAdjustedCurrentElement();
if (!current || current === this.document)
return false;
var ns = this.treeAdapter.getNamespaceURI(current);
if (ns === NS.HTML)
return false;
if (this.treeAdapter.getTagName(current) === $.ANNOTATION_XML && ns === NS.MATHML &&
token.type === Tokenizer.START_TAG_TOKEN && token.tagName === $.SVG)
return false;
var isCharacterToken = token.type === Tokenizer.CHARACTER_TOKEN ||
token.type === Tokenizer.NULL_CHARACTER_TOKEN ||
token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN,
isMathMLTextStartTag = token.type === Tokenizer.START_TAG_TOKEN &&
token.tagName !== $.MGLYPH &&
token.tagName !== $.MALIGNMARK;
if ((isMathMLTextStartTag || isCharacterToken) && this._isIntegrationPoint(current, NS.MATHML))
return false;
if ((token.type === Tokenizer.START_TAG_TOKEN || isCharacterToken) && this._isIntegrationPoint(current, NS.HTML))
return false;
return token.type !== Tokenizer.EOF_TOKEN;
}...
startTagInForeignContent(this, token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
endTagInForeignContent(this, token);
};
Parser.prototype._processInputToken = function (token) {
if (this._shouldProcessTokenInForeignContent(token))
this._processTokenInForeignContent(token);
else
this._processToken(token);
};
//Integration points
..._switchToTextParsing = function (currentToken, nextTokenizerState) {
this._insertElement(currentToken, NS.HTML);
this.tokenizer.state = nextTokenizerState;
this.originalInsertionMode = this.insertionMode;
this.insertionMode = TEXT_MODE;
}...
if (tn === $.HTML)
startTagInBody(p, token);
else if (tn === $.BASE || tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META)
p._appendElement(token, NS.HTML);
else if (tn === $.TITLE)
p._switchToTextParsing(token, Tokenizer.MODE.RCDATA);
//NOTE: here we assume that we always act as an interactive user agent with enabled scripting, so we parse
//<noscript> as a rawtext.
else if (tn === $.NOSCRIPT || tn === $.NOFRAMES || tn === $.STYLE)
p._switchToTextParsing(token, Tokenizer.MODE.RAWTEXT);
else if (tn === $.SCRIPT)
...parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
}...
*
* @example
*```js
*
* const parse5 = require('parse5');
*
* // Uses the default tree adapter for parsing.
* const document = parse5.parse('<div></div>', {
* treeAdapter: parse5.treeAdapters.default
* });
*
* // Uses the htmlparser2 tree adapter with the SerializerStream.
* const serializer = new parse5.SerializerStream(node, {
* treeAdapter: parse5.treeAdapters.htmlparser2
* });
...parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
if (!fragmentContext)
fragmentContext = this.treeAdapter.createElement($.TEMPLATE, NS.HTML, []);
//NOTE: create fake element which will be used as 'document' for fragment parsing.
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
var documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
this._bootstrap(documentMock, fragmentContext);
if (this.treeAdapter.getTagName(fragmentContext) === $.TEMPLATE)
this._pushTmplInsertionMode(IN_TEMPLATE_MODE);
this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
var rootElement = this.treeAdapter.getFirstChild(documentMock),
fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
}...
* @param options - Parsing options.
*
* @example
* ```js
*
* const parse5 = require('parse5');
*
* const documentFragment = parse5.parseFragment('<table></table
x3e;');
*
* console.log(documentFragment.childNodes[0].tagName); //> 'table'
*
* // Parses the html fragment in the context of the parsed <table> element.
* const trFragment = parser.parseFragment(documentFragment.childNodes[0], '<tr><td>Shake it, baby</
td></tr>');
*
* console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
...runParsingLoopForCurrentChunk = function (writeCallback, scriptHandler) {
this._runParsingLoop(scriptHandler);
if (scriptHandler && this.pendingScript) {
var script = this.pendingScript;
this.pendingScript = null;
scriptHandler(script);
return;
}
if (writeCallback)
writeCallback();
}...
ParserStream.prototype.end = function (chunk, encoding, callback) {
this.lastChunkWritten = true;
WritableStream.prototype.end.call(this, chunk, encoding, callback);
};
//Scriptable parser implementation
ParserStream.prototype._runParsingLoop = function () {
this.parser.runParsingLoopForCurrentChunk(this.writeCallback, this._scriptHandler);
};
ParserStream.prototype._resume = function () {
if (!this.pausedByScript)
throw new Error('Parser was already resumed');
while (this.pendingHtmlInsertions.length) {
...switchToPlaintextParsing = function () {
this.insertionMode = TEXT_MODE;
this.originalInsertionMode = IN_BODY_MODE;
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
}...
// NOTE: see https://html.spec.whatwg.org/#read-text
this.parser._insertFakeElement($.HTML);
this.parser._insertFakeElement($.HEAD);
this.parser.openElements.pop();
this.parser._insertFakeElement($.BODY);
this.parser._insertFakeElement($.PRE);
this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
this.parser.switchToPlaintextParsing();
};
inherits(PlainTextConversionStream, ParserStream);
...open_element_stack = function (document, treeAdapter) {
this.stackTop = -1;
this.items = [];
this.current = document;
this.currentTagName = null;
this.currentTmplContent = null;
this.tmplCount = 0;
this.treeAdapter = treeAdapter;
}n/a
_indexOf = function (element) {
var idx = -1;
for (var i = this.stackTop; i >= 0; i--) {
if (this.items[i] === element) {
idx = i;
break;
}
}
return idx;
}...
if (this.tmplCount > 0 && this._isInTemplate())
this.tmplCount--;
this._updateCurrentElement();
};
OpenElementStack.prototype.replace = function (oldElement, newElement) {
var idx = this._indexOf(oldElement);
this.items[idx] = newElement;
if (idx === this.stackTop)
this._updateCurrentElement();
};
..._isInTemplate = function () {
return this.currentTagName === $.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML;
}...
return this.currentTagName === $.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML;
};
OpenElementStack.prototype._updateCurrentElement = function () {
this.current = this.items[this.stackTop];
this.currentTagName = this.current && this.treeAdapter.getTagName(this.current);
this.currentTmplContent = this._isInTemplate() ? this.treeAdapter.getTemplateContent(
this.current) : null;
};
//Mutations
OpenElementStack.prototype.push = function (element) {
this.items[++this.stackTop] = element;
this._updateCurrentElement();
..._updateCurrentElement = function () {
this.current = this.items[this.stackTop];
this.currentTagName = this.current && this.treeAdapter.getTagName(this.current);
this.currentTmplContent = this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : null;
}...
this.currentTmplContent = this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : null;
};
//Mutations
OpenElementStack.prototype.push = function (element) {
this.items[++this.stackTop] = element;
this._updateCurrentElement();
if (this._isInTemplate())
this.tmplCount++;
};
OpenElementStack.prototype.pop = function () {
...clearBackToTableBodyContext = function () {
while (this.currentTagName !== $.TBODY &&
this.currentTagName !== $.TFOOT &&
this.currentTagName !== $.THEAD &&
this.currentTagName !== $.TEMPLATE &&
this.currentTagName !== $.HTML ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML)
this.pop();
}...
//12.2.5.4.13 The "in table body" insertion mode
//------------------------------------------------------------------
function startTagInTableBody(p, token) {
var tn = token.tagName;
if (tn === $.TR) {
p.openElements.clearBackToTableBodyContext();
p._insertElement(token, NS.HTML);
p.insertionMode = IN_ROW_MODE;
}
else if (tn === $.TH || tn === $.TD) {
p.openElements.clearBackToTableBodyContext();
p._insertFakeElement($.TR);
...clearBackToTableContext = function () {
while (this.currentTagName !== $.TABLE &&
this.currentTagName !== $.TEMPLATE &&
this.currentTagName !== $.HTML ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML)
this.pop();
}...
}
else
tokenInTable(p, token);
}
function captionStartTagInTable(p, token) {
p.openElements.clearBackToTableContext();
p.activeFormattingElements.insertMarker();
p._insertElement(token, NS.HTML);
p.insertionMode = IN_CAPTION_MODE;
}
function colgroupStartTagInTable(p, token) {
p.openElements.clearBackToTableContext();
...clearBackToTableRowContext = function () {
while (this.currentTagName !== $.TR &&
this.currentTagName !== $.TEMPLATE &&
this.currentTagName !== $.HTML ||
this.treeAdapter.getNamespaceURI(this.current) !== NS.HTML)
this.pop();
}...
//12.2.5.4.14 The "in row" insertion mode
//------------------------------------------------------------------
function startTagInRow(p, token) {
var tn = token.tagName;
if (tn === $.TH || tn === $.TD) {
p.openElements.clearBackToTableRowContext();
p._insertElement(token, NS.HTML);
p.insertionMode = IN_CELL_MODE;
p.activeFormattingElements.insertMarker();
}
else if (tn === $.CAPTION || tn === $.COL || tn === $.COLGROUP || tn === $.TBODY ||
tn === $.TFOOT || tn === $.THEAD || tn === $.TR) {
...contains = function (element) {
return this._indexOf(element) > -1;
}...
var unopenIdx = listLength,
entry = null;
do {
unopenIdx--;
entry = this.activeFormattingElements.entries[unopenIdx];
if (entry.type === FormattingElementList.MARKER_ENTRY || this.openElements.contains(entry.element)) {
unopenIdx++;
break;
}
} while (unopenIdx > 0);
for (var i = unopenIdx; i < listLength; i++) {
entry = this.activeFormattingElements.entries[i];
...generateImpliedEndTags = function () {
while (isImpliedEndTagRequired(this.currentTagName))
this.pop();
}...
entry.element = this.openElements.current;
}
}
};
//Close elements
Parser.prototype._closeTableCell = function () {
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
};
Parser.prototype._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
...generateImpliedEndTagsWithExclusion = function (exclusionTagName) {
while (isImpliedEndTagRequired(this.currentTagName) && this.currentTagName !== exclusionTagName)
this.pop();
}...
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
};
Parser.prototype._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
};
//Insertion modes
Parser.prototype._resetInsertionMode = function () {
for (var i = this.openElements.stackTop, last = false; i >= 0; i--) {
var element = this.openElements.items[i];
...getCommonAncestor = function (element) {
var elementIdx = this._indexOf(element);
return --elementIdx >= 0 ? this.items[elementIdx] : null;
}...
return furthestBlock;
}
//Step 13 of the algorithm
function aaInnerLoop(p, furthestBlock, formattingElement) {
var lastElement = furthestBlock,
nextElement = p.openElements.getCommonAncestor(furthestBlock);
for (var i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
//NOTE: store next element for the next loop iteration (it may be deleted from the stack by step 9.5)
nextElement = p.openElements.getCommonAncestor(element);
var elementEntry = p.activeFormattingElements.getElementEntry(element),
counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER,
...hasInButtonScope = function (tagName) {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML)
return true;
if (tn === $.BUTTON && ns === NS.HTML || isScopingElement(tn, ns))
return false;
}
return true;
}...
p.openElements.popAllUpToHtmlElement();
p._insertElement(token, NS.HTML);
p.insertionMode = IN_FRAMESET_MODE;
}
}
function addressStartTagInBody(p, token) {
if (p.openElements.hasInButtonScope($.P))
p._closePElement();
p._insertElement(token, NS.HTML);
}
function numberedHeaderStartTagInBody(p, token) {
if (p.openElements.hasInButtonScope($.P))
...hasInListItemScope = function (tagName) {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML)
return true;
if ((tn === $.UL || tn === $.OL) && ns === NS.HTML || isScopingElement(tn, ns))
return false;
}
return true;
}...
if (!p.openElements.hasInButtonScope($.P))
p._insertFakeElement($.P);
p._closePElement();
}
function liEndTagInBody(p) {
if (p.openElements.hasInListItemScope($.LI)) {
p.openElements.generateImpliedEndTagsWithExclusion($.LI);
p.openElements.popUntilTagNamePopped($.LI);
}
}
function ddEndTagInBody(p, token) {
var tn = token.tagName;
...hasInScope = function (tagName) {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (tn === tagName && ns === NS.HTML)
return true;
if (isScopingElement(tn, ns))
return false;
}
return true;
}...
currentToken = token;
parserProto._processToken.call(this, token);
//NOTE: <body> and <html> are never popped from the stack, so we need to updated
//their end location explicitly.
if (token.type === Tokenizer.END_TAG_TOKEN &&
(token.tagName === $.HTML ||
token.tagName === $.BODY && this.openElements.hasInScope($.BODY
))) {
for (var i = this.openElements.stackTop; i >= 0; i--) {
var element = this.openElements.items[i];
if (this.treeAdapter.getTagName(element) === token.tagName) {
setEndLocation(element, token);
break;
}
...hasInSelectScope = function (tagName) {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML)
continue;
if (tn === tagName)
return true;
if (tn !== $.OPTION && tn !== $.OPTGROUP)
return false;
}
return true;
}...
if (p.openElements.currentTagName === $.OPTGROUP)
p.openElements.pop();
p._insertElement(token, NS.HTML);
}
else if (tn === $.INPUT || tn === $.KEYGEN || tn === $.TEXTAREA || tn === $.SELECT) {
if (p.openElements.hasInSelectScope($.SELECT)) {
p.openElements.popUntilTagNamePopped($.SELECT);
p._resetInsertionMode();
if (tn !== $.SELECT)
p._processToken(token);
}
}
...hasInTableScope = function (tagName) {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML)
continue;
if (tn === tagName)
return true;
if (tn === $.TABLE || tn === $.TEMPLATE || tn === $.HTML)
return false;
}
return true;
}...
p.openElements.clearBackToTableContext();
p._insertFakeElement($.TBODY);
p.insertionMode = IN_TABLE_BODY_MODE;
p._processToken(token);
}
function tableStartTagInTable(p, token) {
if (p.openElements.hasInTableScope($.TABLE)) {
p.openElements.popUntilTagNamePopped($.TABLE);
p._resetInsertionMode();
p._processToken(token);
}
}
function inputStartTagInTable(p, token) {
...hasNumberedHeaderInScope = function () {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if ((tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6) && ns === NS.HTML)
return true;
if (isScopingElement(tn, ns))
return false;
}
return true;
}...
if (p.openElements.hasInScope(tn)) {
p.openElements.generateImpliedEndTagsWithExclusion(tn);
p.openElements.popUntilTagNamePopped(tn);
}
}
function numberedHeaderEndTagInBody(p) {
if (p.openElements.hasNumberedHeaderInScope()) {
p.openElements.generateImpliedEndTags();
p.openElements.popUntilNumberedHeaderPopped();
}
}
function appletEndTagInBody(p, token) {
var tn = token.tagName;
...hasTableBodyContextInTableScope = function () {
for (var i = this.stackTop; i >= 0; i--) {
var tn = this.treeAdapter.getTagName(this.items[i]),
ns = this.treeAdapter.getNamespaceURI(this.items[i]);
if (ns !== NS.HTML)
continue;
if (tn === $.TBODY || tn === $.THEAD || tn === $.TFOOT)
return true;
if (tn === $.TABLE || tn === $.HTML)
return false;
}
return true;
}...
p.insertionMode = IN_ROW_MODE;
p._processToken(token);
}
else if (tn === $.CAPTION || tn === $.COL || tn === $.COLGROUP ||
tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD) {
if (p.openElements.hasTableBodyContextInTableScope()) {
p.openElements.clearBackToTableBodyContext();
p.openElements.pop();
p.insertionMode = IN_TABLE_MODE;
p._processToken(token);
}
}
...insertAfter = function (referenceElement, newElement) {
var insertionIdx = this._indexOf(referenceElement) + 1;
this.items.splice(insertionIdx, 0, newElement);
if (insertionIdx === ++this.stackTop)
this._updateCurrentElement();
}...
p._adoptNodes(furthestBlock, newElement);
p.treeAdapter.appendChild(furthestBlock, newElement);
p.activeFormattingElements.insertElementAfterBookmark(newElement, formattingElementEntry.token);
p.activeFormattingElements.removeEntry(formattingElementEntry);
p.openElements.remove(formattingElementEntry.element);
p.openElements.insertAfter(furthestBlock, newElement);
}
//Algorithm entry point
function callAdoptionAgency(p, token) {
var formattingElementEntry;
for (var i = 0; i < AA_OUTER_LOOP_ITER; i++) {
...isRootHtmlElementCurrent = function () {
return this.stackTop === 0 && this.currentTagName === $.HTML;
}...
p._appendElement(token, NS.HTML);
else if (tn === $.NOFRAMES)
startTagInHead(p, token);
}
function endTagInFrameset(p, token) {
if (token.tagName === $.FRAMESET && !p.openElements.isRootHtmlElementCurrent
span>()) {
p.openElements.pop();
if (!p.fragmentContext && p.openElements.currentTagName !== $.FRAMESET)
p.insertionMode = AFTER_FRAMESET_MODE;
}
}
...pop = function () {
this.stackTop--;
if (this.tmplCount > 0 && this._isInTemplate())
this.tmplCount--;
this._updateCurrentElement();
}...
break;
}
}
};
FormattingElementList.prototype.clearToLastMarker = function () {
while (this.length) {
var entry = this.entries.pop();
this.length--;
if (entry.type === FormattingElementList.MARKER_ENTRY)
break;
}
};
...popAllUpToHtmlElement = function () {
//NOTE: here we assume that root <html> element is always first in the open element stack, so
//we perform this fast stack clean up.
this.stackTop = 0;
this._updateCurrentElement();
}...
}
function framesetStartTagInBody(p, token) {
var bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
if (p.framesetOk && bodyElement) {
p.treeAdapter.detachNode(bodyElement);
p.openElements.popAllUpToHtmlElement();
p._insertElement(token, NS.HTML);
p.insertionMode = IN_FRAMESET_MODE;
}
}
function addressStartTagInBody(p, token) {
if (p.openElements.hasInButtonScope($.P))
...popUntilElementPopped = function (element) {
while (this.stackTop > -1) {
var poppedElement = this.current;
this.pop();
if (poppedElement === element)
break;
}
}...
break;
if (p._isSpecialElement(element))
furthestBlock = element;
}
if (!furthestBlock) {
p.openElements.popUntilElementPopped(formattingElementEntry.element);
p.activeFormattingElements.removeEntry(formattingElementEntry);
}
return furthestBlock;
}
//Step 13 of the algorithm
...popUntilNumberedHeaderPopped = function () {
while (this.stackTop > -1) {
var tn = this.currentTagName,
ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6 && ns === NS.HTML)
break;
}
}...
p.openElements.popUntilTagNamePopped(tn);
}
}
function numberedHeaderEndTagInBody(p) {
if (p.openElements.hasNumberedHeaderInScope()) {
p.openElements.generateImpliedEndTags();
p.openElements.popUntilNumberedHeaderPopped();
}
}
function appletEndTagInBody(p, token) {
var tn = token.tagName;
if (p.openElements.hasInScope(tn)) {
...popUntilTableCellPopped = function () {
while (this.stackTop > -1) {
var tn = this.currentTagName,
ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (tn === $.TD || tn === $.TH && ns === NS.HTML)
break;
}
}...
}
}
};
//Close elements
Parser.prototype._closeTableCell = function () {
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
};
Parser.prototype._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
...popUntilTagNamePopped = function (tagName) {
while (this.stackTop > -1) {
var tn = this.currentTagName,
ns = this.treeAdapter.getNamespaceURI(this.current);
this.pop();
if (tn === tagName && ns === NS.HTML)
break;
}
}...
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = IN_ROW_MODE;
};
Parser.prototype._closePElement = function () {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
};
//Insertion modes
Parser.prototype._resetInsertionMode = function () {
for (var i = this.openElements.stackTop, last = false; i >= 0; i--) {
var element = this.openElements.items[i];
...push = function (element) {
this.items[++this.stackTop] = element;
this._updateCurrentElement();
if (this._isInTemplate())
this.tmplCount++;
}...
var element = entry.element,
elementAttrs = this.treeAdapter.getAttrList(element),
isCandidate = this.treeAdapter.getTagName(element) === neTagName &&
this.treeAdapter.getNamespaceURI(element) === neNamespaceURI &&
elementAttrs.length === neAttrsLength;
if (isCandidate)
candidates.push({idx: i, attrs: elementAttrs});
}
}
return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates;
};
FormattingElementList.prototype._ensureNoahArkCondition = function (newElement) {
...remove = function (element) {
for (var i = this.stackTop; i >= 0; i--) {
if (this.items[i] === element) {
this.items.splice(i, 1);
this.stackTop--;
this._updateCurrentElement();
break;
}
}
}...
counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER,
shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
if (shouldRemoveFromOpenElements) {
if (counterOverflow)
p.activeFormattingElements.removeEntry(elementEntry);
p.openElements.remove(element);
}
else {
element = aaRecreateElementFromEntry(p, elementEntry);
if (lastElement === furthestBlock)
p.activeFormattingElements.bookmark = elementEntry;
...replace = function (oldElement, newElement) {
var idx = this._indexOf(oldElement);
this.items[idx] = newElement;
if (idx === this.stackTop)
this._updateCurrentElement();
}...
}
//Step 13.7 of the algorithm
function aaRecreateElementFromEntry(p, elementEntry) {
var ns = p.treeAdapter.getNamespaceURI(elementEntry.element),
newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
p.openElements.replace(elementEntry.element, newElement);
elementEntry.element = newElement;
return newElement;
}
//Step 14 of the algorithm
function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) {
...tryPeekProperlyNestedBodyElement = function () {
//Properly nested <body> element (should be second element in stack).
var element = this.items[1];
return element && this.treeAdapter.getTagName(element) === $.BODY ? element : null;
}...
function htmlStartTagInBody(p, token) {
if (p.openElements.tmplCount === 0)
p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
}
function bodyStartTagInBody(p, token) {
var bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
if (bodyElement && p.openElements.tmplCount === 0) {
p.framesetOk = false;
p.treeAdapter.adoptAttributes(bodyElement, token.attrs);
}
}
...parser_feedback_simulator = function (tokenizer) {
this.tokenizer = tokenizer;
this.namespaceStack = [];
this.namespaceStackTop = -1;
this._enterNamespace(NS.HTML);
}n/a
_ensureTokenizerMode = function (tn) {
if (tn === $.TEXTAREA || tn === $.TITLE)
this.tokenizer.state = Tokenizer.MODE.RCDATA;
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
}...
else {
if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
this.skipNextNewLine = true;
else if (tn === $.IMAGE)
token.tagName = $.IMG;
this._ensureTokenizerMode(tn);
}
};
ParserFeedbackSimulator.prototype._handleEndTagToken = function (token) {
var tn = token.tagName;
if (!this.inForeignContent) {
..._enterNamespace = function (namespace) {
this.namespaceStackTop++;
this.namespaceStack.push(namespace);
this.inForeignContent = namespace !== NS.HTML;
this.currentNamespace = namespace;
this.tokenizer.allowCDATA = this.inForeignContent;
}...
//ParserFeedbackSimulator
//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
var ParserFeedbackSimulator = module.exports = function (tokenizer) {
this.tokenizer = tokenizer;
this.namespaceStack = [];
this.namespaceStackTop = -1;
this._enterNamespace(NS.HTML);
};
ParserFeedbackSimulator.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
..._handleEndTagToken = function (token) {
var tn = token.tagName;
if (!this.inForeignContent) {
var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn])
tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
//NOTE: check for exit from integration point
if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs))
this._leaveCurrentNamespace();
}
else if (tn === $.SVG && this.currentNamespace === NS.SVG ||
tn === $.MATH && this.currentNamespace === NS.MATHML)
this._leaveCurrentNamespace();
// NOTE: adjust end tag name as well for consistency
if (this.currentNamespace === NS.SVG)
foreignContent.adjustTokenSVGTagName(token);
}...
ParserFeedbackSimulator.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this._handleEndTagToken(token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
token.type = Tokenizer.CHARACTER_TOKEN;
token.chars = UNICODE.REPLACEMENT_CHARACTER;
}
else if (this.skipNextNewLine) {
..._handleStartTagToken = function (token) {
var tn = token.tagName;
if (tn === $.SVG)
this._enterNamespace(NS.SVG);
else if (tn === $.MATH)
this._enterNamespace(NS.MATHML);
if (this.inForeignContent) {
if (foreignContent.causesExit(token)) {
this._leaveCurrentNamespace();
return;
}
var currentNs = this.currentNamespace;
if (currentNs === NS.MATHML)
foreignContent.adjustTokenMathMLAttrs(token);
else if (currentNs === NS.SVG) {
foreignContent.adjustTokenSVGTagName(token);
foreignContent.adjustTokenSVGAttrs(token);
}
foreignContent.adjustTokenXMLAttrs(token);
tn = token.tagName;
if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs))
this._enterNamespace(NS.HTML);
}
else {
if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
this.skipNextNewLine = true;
else if (tn === $.IMAGE)
token.tagName = $.IMG;
this._ensureTokenizerMode(tn);
}
}...
this._enterNamespace(NS.HTML);
};
ParserFeedbackSimulator.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this._handleEndTagToken(token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
token.type = Tokenizer.CHARACTER_TOKEN;
token.chars = UNICODE.REPLACEMENT_CHARACTER;
..._leaveCurrentNamespace = function () {
this.namespaceStackTop--;
this.namespaceStack.pop();
this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
this.inForeignContent = this.currentNamespace !== NS.HTML;
this.tokenizer.allowCDATA = this.inForeignContent;
}...
this._enterNamespace(NS.SVG);
else if (tn === $.MATH)
this._enterNamespace(NS.MATHML);
if (this.inForeignContent) {
if (foreignContent.causesExit(token)) {
this._leaveCurrentNamespace();
return;
}
var currentNs = this.currentNamespace;
if (currentNs === NS.MATHML)
foreignContent.adjustTokenMathMLAttrs(token);
...getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this._handleEndTagToken(token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
token.type = Tokenizer.CHARACTER_TOKEN;
token.chars = UNICODE.REPLACEMENT_CHARACTER;
}
else if (this.skipNextNewLine) {
if (token.type !== Tokenizer.HIBERNATION_TOKEN)
this.skipNextNewLine = false;
if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
if (token.chars.length === 1)
return this.getNextToken();
token.chars = token.chars.substr(1);
}
}
return token;
}...
};
//Parsing loop
Parser.prototype._runParsingLoop = function (scriptHandler) {
while (!this.stopped) {
this._setupTokenizerCDATAMode();
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.HIBERNATION_TOKEN)
break;
if (this.skipNextNewLine) {
this.skipNextNewLine = false;
...assign = function (parser) {
//NOTE: obtain Parser proto this way to avoid module circular references
var parserProto = Object.getPrototypeOf(parser),
treeAdapter = parser.treeAdapter,
attachableElementLocation = null,
lastFosterParentingLocation = null,
currentToken = null;
function setEndLocation(element, closingToken) {
var loc = element.__location;
if (!loc)
return;
if (!loc.startTag) {
loc.startTag = {
line: loc.line,
col: loc.col,
startOffset: loc.startOffset,
endOffset: loc.endOffset
};
if (loc.attrs)
loc.startTag.attrs = loc.attrs;
}
if (closingToken.location) {
var ctLocation = closingToken.location,
tn = treeAdapter.getTagName(element),
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing tag and
// for cases like <td> <p> </td> - 'p' closes without a closing tag
isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN &&
tn === closingToken.tagName;
if (isClosingEndTag) {
loc.endTag = {
line: ctLocation.line,
col: ctLocation.col,
startOffset: ctLocation.startOffset,
endOffset: ctLocation.endOffset
};
}
if (isClosingEndTag)
loc.endOffset = ctLocation.endOffset;
else
loc.endOffset = ctLocation.startOffset;
}
else if (closingToken.type === Tokenizer.EOF_TOKEN)
loc.endOffset = parser.tokenizer.preprocessor.sourcePos;
}
//NOTE: patch _bootstrap method
parser._bootstrap = function (document, fragmentContext) {
parserProto._bootstrap.call(this, document, fragmentContext);
attachableElementLocation = null;
lastFosterParentingLocation = null;
currentToken = null;
//OpenElementStack
parser.openElements.pop = function () {
setEndLocation(this.current, currentToken);
OpenElementStack.prototype.pop.call(this);
};
parser.openElements.popAllUpToHtmlElement = function () {
for (var i = this.stackTop; i > 0; i--)
setEndLocation(this.items[i], currentToken);
OpenElementStack.prototype.popAllUpToHtmlElement.call(this);
};
parser.openElements.remove = function (element) {
setEndLocation(element, currentToken);
OpenElementStack.prototype.remove.call(this, element);
};
};
parser._runParsingLoop = function (scriptHandler) {
parserProto._runParsingLoop.call(this, scriptHandler);
// NOTE: generate location info for elements
// that remains on open element stack
for (var i = parser.openElements.stackTop; i >= 0; i--)
setEndLocation(parser.openElements.items[i], currentToken);
};
//Token processing
parser._processTokenInForeignContent = function (token) {
currentToken = token;
parserProto._processTokenInForeignContent.call(this, token);
};
parser._processToken = function (token) {
currentToken = token;
parserProto._processToken.call(this, token);
//NOTE: <body> and <html> are never popped from the stack, so we need to updated
//their end location explicitly.
if (token.type === Tokenizer.END_TAG_TOKEN &&
(token.tagName === $.HTML ||
token.tagName === $.BODY && this.openElements.hasInScope($.BODY))) {
for (var i = this.openElements.stackTop; i >= 0; i--) {
var element = this.openElements.items[i];
if (this.treeAdapter.getTagName(element) === token.tagName) {
setEndLocation(element, token);
break;
}
}
}
};
//Doctyp ......
var Parser = module.exports = function (options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
if (this.options.locationInfo)
locationInfoMixin.assign(this);
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
...preprocessor = function () {
this.html = null;
this.pos = -1;
this.lastGapPos = -1;
this.lastCharPos = -1;
this.droppedBufferSize = 0;
this.gapStack = [];
this.skipNextNewLine = false;
this.lastChunkWritten = false;
this.endOfChunkHit = false;
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
}n/a
_addGap = function () {
this.gapStack.push(this.lastGapPos);
this.lastGapPos = this.pos;
}...
if (isSurrogatePair(cp, nextCp)) {
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
this.pos++;
cp = getSurrogatePairCodePoint(cp, nextCp);
//NOTE: add gap that should be avoided during retreat
this._addGap();
}
}
// NOTE: we've hit the end of chunk, stop processing at this point
else if (!this.lastChunkWritten) {
this.endOfChunkHit = true;
return $.EOF;
..._processHighRangeCodePoint = function (cp) {
//NOTE: try to peek a surrogate pair
if (this.pos !== this.lastCharPos) {
var nextCp = this.html.charCodeAt(this.pos + 1);
if (isSurrogatePair(cp, nextCp)) {
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
this.pos++;
cp = getSurrogatePairCodePoint(cp, nextCp);
//NOTE: add gap that should be avoided during retreat
this._addGap();
}
}
// NOTE: we've hit the end of chunk, stop processing at this point
else if (!this.lastChunkWritten) {
this.endOfChunkHit = true;
return $.EOF;
}
return cp;
}...
return $.LINE_FEED;
}
this.skipNextNewLine = false;
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
};
Preprocessor.prototype.retreat = function () {
if (this.pos === this.lastGapPos) {
this.lastGapPos = this.gapStack.pop();
this.pos--;
}
...advance = function () {
this.pos++;
if (this.pos > this.lastCharPos) {
if (!this.lastChunkWritten)
this.endOfChunkHit = true;
return $.EOF;
}
var cp = this.html.charCodeAt(this.pos);
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
//must be ignored.
if (this.skipNextNewLine && cp === $.LINE_FEED) {
this.skipNextNewLine = false;
this._addGap();
return this.advance();
}
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
if (cp === $.CARRIAGE_RETURN) {
this.skipNextNewLine = true;
return $.LINE_FEED;
}
this.skipNextNewLine = false;
//OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
//HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
}...
return false;
};
//Consumption
Tokenizer.prototype._consume = function () {
this.consumedAfterSnapshot++;
return this.preprocessor.advance();
};
Tokenizer.prototype._unconsume = function () {
this.consumedAfterSnapshot--;
this.preprocessor.retreat();
};
...dropParsedChunk = function () {
if (this.pos > this.bufferWaterline) {
this.lastCharPos -= this.pos;
this.droppedBufferSize += this.pos;
this.html = this.html.substring(this.pos);
this.pos = 0;
this.lastGapPos = -1;
this.gapStack = [];
}
}...
//State machine
var _ = Tokenizer.prototype;
//12.2.4.1 Data state
//------------------------------------------------------------------
_[DATA_STATE] = function dataState(cp) {
this.preprocessor.dropParsedChunk();
if (cp === $.AMPERSAND)
this.state = CHARACTER_REFERENCE_IN_DATA_STATE;
else if (cp === $.LESS_THAN_SIGN)
this.state = TAG_OPEN_STATE;
...insertHtmlAtCurrentPos = function (chunk) {
this.html = this.html.substring(0, this.pos + 1) +
chunk +
this.html.substring(this.pos + 1, this.html.length);
this.lastCharPos = this.html.length - 1;
this.endOfChunkHit = false;
}...
ParserStream.prototype._resume = function () {
if (!this.pausedByScript)
throw new Error('Parser was already resumed');
while (this.pendingHtmlInsertions.length) {
var html = this.pendingHtmlInsertions.pop();
this.parser.tokenizer.insertHtmlAtCurrentPos(html);
}
this.pausedByScript = false;
//NOTE: keep parsing if we don't wait for the next input chunk
if (this.parser.tokenizer.active)
this._runParsingLoop();
...retreat = function () {
if (this.pos === this.lastGapPos) {
this.lastGapPos = this.gapStack.pop();
this.pos--;
}
this.pos--;
}...
Tokenizer.prototype._hibernationSnapshot = function () {
this.consumedAfterSnapshot = 0;
};
Tokenizer.prototype._ensureHibernation = function () {
if (this.preprocessor.endOfChunkHit) {
for (; this.consumedAfterSnapshot > 0; this.consumedAfterSnapshot--)
this.preprocessor.retreat();
this.active = false;
this.tokenQueue.push({type: Tokenizer.HIBERNATION_TOKEN});
return true;
}
...write = function (chunk, isLastChunk) {
if (this.html)
this.html += chunk;
else
this.html = chunk;
this.lastCharPos = this.html.length - 1;
this.endOfChunkHit = false;
this.lastChunkWritten = isLastChunk;
}...
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
return document;
};
Parser.prototype.parseFragment = function (html, fragmentContext) {
//NOTE: use <template> element as a fragment context if context element was not provided,
...assign = function (tokenizer) {
//NOTE: obtain Tokenizer proto this way to avoid module circular references
var tokenizerProto = Object.getPrototypeOf(tokenizer),
tokenStartOffset = -1,
tokenCol = -1,
tokenLine = 1,
isEol = false,
lineStartPos = 0,
col = -1,
line = 1;
function attachLocationInfo(token) {
token.location = {
line: tokenLine,
col: tokenCol,
startOffset: tokenStartOffset,
endOffset: -1
};
}
//NOTE: patch consumption method to track line/col information
tokenizer._consume = function () {
var cp = tokenizerProto._consume.call(this);
//NOTE: LF should be in the last column of the line
if (isEol) {
isEol = false;
line++;
lineStartPos = this.preprocessor.sourcePos;
}
if (cp === $.LINE_FEED)
isEol = true;
col = this.preprocessor.sourcePos - lineStartPos + 1;
return cp;
};
tokenizer._unconsume = function () {
tokenizerProto._unconsume.call(this);
isEol = false;
col = this.preprocessor.sourcePos - lineStartPos + 1;
};
//NOTE: patch token creation methods and attach location objects
tokenizer._createStartTagToken = function () {
tokenizerProto._createStartTagToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createEndTagToken = function () {
tokenizerProto._createEndTagToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createCommentToken = function () {
tokenizerProto._createCommentToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createDoctypeToken = function (initialName) {
tokenizerProto._createDoctypeToken.call(this, initialName);
attachLocationInfo(this.currentToken);
};
tokenizer._createCharacterToken = function (type, ch) {
tokenizerProto._createCharacterToken.call(this, type, ch);
attachLocationInfo(this.currentCharacterToken);
};
tokenizer._createAttr = function (attrNameFirstCh) {
tokenizerProto._createAttr.call(this, attrNameFirstCh);
this.currentAttrLocation = {
line: line,
col: col,
startOffset: this.preprocessor.sourcePos,
endOffset: -1
};
};
tokenizer._leaveAttrName = function (toState) {
tokenizerProto._leaveAttrName.call(this, toState);
this._attachCurrentAttrLocationInfo();
};
tokenizer._leaveAttrValue = function (toState) {
tokenizerProto._leaveAttrValue.call(this, toState);
this._attachCurrentAttrLocationInfo();
};
tokenizer._attachCurrentAttrLocationInfo = function () {
this.currentAttrLocation.endOffset = this.preprocessor.sourcePos;
if (!this.currentToken.location.attrs)
this.currentToken.location.attrs = Object.create(null);
this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation;
};
//NOTE: patch token emission methods to determine end location
tokenizer._emitCurrentToken = function () {
//NOTE: if we have pending character token make it's end location equal to the
//current token's start location.
if (this.currentCharacterToken)
this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset;
this.currentToken.location.endOffset = this.preprocessor.sourcePos + 1;
tokenizerProto._emitCurrentToken.call(this);
};
tokenizer._emitCurrentCharacterToken = function () {
//NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(),
//then set it's location at the current preprocessor position.
//We don't need to increment preprocessor position, since character token
//emission is always forced by the start of the next character token here.
//So, we already have advanced posi ......
var Parser = module.exports = function (options) {
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.treeAdapter = this.options.treeAdapter;
this.pendingScript = null;
if (this.options.locationInfo)
locationInfoMixin.assign(this);
};
// API
Parser.prototype.parse = function (html) {
var document = this.treeAdapter.createDocument();
this._bootstrap(document, null);
...