decode = function (html, options) { options = merge(options, decode.options); var strict = options.strict; if (strict && regexInvalidEntity.test(html)) { parseError('malformed character reference'); } return html.replace(regexDecode, function($0, $1, $2, $3, $4, $5, $6, $7) { var codePoint; var semicolon; var decDigits; var hexDigits; var reference; var next; if ($1) { // Decode decimal escapes, e.g. `𝌆`. decDigits = $1; semicolon = $2; if (strict && !semicolon) { parseError('character reference was not terminated by a semicolon'); } codePoint = parseInt(decDigits, 10); return codePointToSymbol(codePoint, strict); } if ($3) { // Decode hexadecimal escapes, e.g. `𝌆`. hexDigits = $3; semicolon = $4; if (strict && !semicolon) { parseError('character reference was not terminated by a semicolon'); } codePoint = parseInt(hexDigits, 16); return codePointToSymbol(codePoint, strict); } if ($5) { // Decode named character references with trailing `;`, e.g. `©`. reference = $5; if (has(decodeMap, reference)) { return decodeMap[reference]; } else { // Ambiguous ampersand. https://mths.be/notes/ambiguous-ampersands if (strict) { parseError( 'named character reference was not terminated by a semicolon' ); } return $0; } } // If we’re still here, it’s a legacy reference for sure. No need for an // extra `if` check. // Decode named character references without trailing `;`, e.g. `&` // This is only a parse error if it gets converted to `&`, or if it is // followed by `=` in an attribute context. reference = $6; next = $7; if (next && options.isAttributeValue) { if (strict && next == '=') { parseError('`&` did not start a character reference'); } return $0; } else { if (strict) { parseError( 'named character reference was not terminated by a semicolon' ); } // Note: there is no need to check `has(decodeMapLegacy, reference)`. return decodeMapLegacy[reference] + (next || ''); } }); }
...
he.encode.options.useNamedReferences = true;
// Using the global default setting, which is now `true`:
he.encode('foo © bar ≠ baz 𝌆 qux');
// → 'foo © bar ≠ baz 𝌆 qux'
```
### `he.decode(html, options)`
This function takes a string of HTML and decodes any named and numerical character references in it using [the algorithm described
in section 12.2.4.69 of the HTML spec](https://html.spec.whatwg.org/multipage/syntax.html#tokenizing-character-references).
```js
he.decode('foo © bar ≠ baz 𝌆 qux');
// → 'foo © bar ≠ baz 𝌆 qux'
```
...
encode = function (string, options) { options = merge(options, encode.options); var strict = options.strict; if (strict && regexInvalidRawCodePoint.test(string)) { parseError('forbidden code point'); } var encodeEverything = options.encodeEverything; var useNamedReferences = options.useNamedReferences; var allowUnsafeSymbols = options.allowUnsafeSymbols; var escapeCodePoint = options.decimal ? decEscape : hexEscape; var escapeBmpSymbol = function(symbol) { return escapeCodePoint(symbol.charCodeAt(0)); }; if (encodeEverything) { // Encode ASCII symbols. string = string.replace(regexAsciiWhitelist, function(symbol) { // Use named references if requested & possible. if (useNamedReferences && has(encodeMap, symbol)) { return '&' + encodeMap[symbol] + ';'; } return escapeBmpSymbol(symbol); }); // Shorten a few escapes that represent two symbols, of which at least one // is within the ASCII range. if (useNamedReferences) { string = string .replace(/>\u20D2/g, '>⃒') .replace(/<\u20D2/g, '<⃒') .replace(/fj/g, 'fj'); } // Encode non-ASCII symbols. if (useNamedReferences) { // Encode non-ASCII symbols that can be replaced with a named reference. string = string.replace(regexEncodeNonAscii, function(string) { // Note: there is no need to check `has(encodeMap, string)` here. return '&' + encodeMap[string] + ';'; }); } // Note: any remaining non-ASCII symbols are handled outside of the `if`. } else if (useNamedReferences) { // Apply named character references. // Encode `<>"'&` using named character references. if (!allowUnsafeSymbols) { string = string.replace(regexEscape, function(string) { return '&' + encodeMap[string] + ';'; // no need to check `has()` here }); } // Shorten escapes that represent two symbols, of which at least one is // `<>"'&`. string = string .replace(/>\u20D2/g, '>⃒') .replace(/<\u20D2/g, '<⃒'); // Encode non-ASCII symbols that can be replaced with a named reference. string = string.replace(regexEncodeNonAscii, function(string) { // Note: there is no need to check `has(encodeMap, string)` here. return '&' + encodeMap[string] + ';'; }); } else if (!allowUnsafeSymbols) { // Encode `<>"'&` using hexadecimal escapes, now that they’re not handled // using named character references. string = string.replace(regexEscape, escapeBmpSymbol); } return string // Encode astral symbols. .replace(regexAstralSymbols, function($0) { // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae var high = $0.charCodeAt(0); var low = $0.charCodeAt(1); var codePoint = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000; return escapeCodePoint(codePoint); }) // Encode any remaining BMP symbols that are not printable ASCII symbols // using a hexadecimal escape. .replace(regexBmpWhitelist, escapeBmpSymbol); }
...
## API
### `he.version`
A string representing the semantic version number.
### `he.encode(text, options)`
This function takes a string of text and encodes (by default) any symbols that aren’t printable ASCII symbols and `&`, `<
;`, `>`, `"`, `'`, and `` ` ``, replacing them with character references.
```js
he.encode('foo © bar ≠ baz 𝌆 qux');
// → 'foo © bar ≠ baz 𝌆 qux'
```
...
escape = function (string) { return string.replace(regexEscape, function($0) { // Note: there is no need to check `has(escapeMap, $0)` here. return escapeMap[$0]; }); }
...
he.decode.options.isAttributeValue = true;
// Using the global default setting, which is now `true`:
he.decode('foo&bar');
// → 'foo&bar'
```
### `he.escape(text)`
This function takes a string of text and escapes it for use in text contexts in XML or HTML documents. Only the following characters
are escaped: `&`, `<`, `>`, `"`, `'`, and `` ` ``.
```js
he.escape('<img src=\'x\' onerror="prompt(1)">');
// → '<img src='x' onerror="prompt(1)">'
```
...
unescape = function (html, options) { options = merge(options, decode.options); var strict = options.strict; if (strict && regexInvalidEntity.test(html)) { parseError('malformed character reference'); } return html.replace(regexDecode, function($0, $1, $2, $3, $4, $5, $6, $7) { var codePoint; var semicolon; var decDigits; var hexDigits; var reference; var next; if ($1) { // Decode decimal escapes, e.g. `𝌆`. decDigits = $1; semicolon = $2; if (strict && !semicolon) { parseError('character reference was not terminated by a semicolon'); } codePoint = parseInt(decDigits, 10); return codePointToSymbol(codePoint, strict); } if ($3) { // Decode hexadecimal escapes, e.g. `𝌆`. hexDigits = $3; semicolon = $4; if (strict && !semicolon) { parseError('character reference was not terminated by a semicolon'); } codePoint = parseInt(hexDigits, 16); return codePointToSymbol(codePoint, strict); } if ($5) { // Decode named character references with trailing `;`, e.g. `©`. reference = $5; if (has(decodeMap, reference)) { return decodeMap[reference]; } else { // Ambiguous ampersand. https://mths.be/notes/ambiguous-ampersands if (strict) { parseError( 'named character reference was not terminated by a semicolon' ); } return $0; } } // If we’re still here, it’s a legacy reference for sure. No need for an // extra `if` check. // Decode named character references without trailing `;`, e.g. `&` // This is only a parse error if it gets converted to `&`, or if it is // followed by `=` in an attribute context. reference = $6; next = $7; if (next && options.isAttributeValue) { if (strict && next == '=') { parseError('`&` did not start a character reference'); } return $0; } else { if (strict) { parseError( 'named character reference was not terminated by a semicolon' ); } // Note: there is no need to check `has(decodeMapLegacy, reference)`. return decodeMapLegacy[reference] + (next || ''); } }); }
...
This function takes a string of text and escapes it for use in text contexts in XML or HTML documents. Only the following characters
are escaped: `&`, `<`, `>`, `"`, `'`, and `` ` ``.
```js
he.escape('<img src=\'x\' onerror="prompt(1)">');
// → '<img src='x' onerror="prompt(1)">'
```
### `he.unescape(html, options)`
`he.unescape` is an alias for `he.decode`. It takes a string of HTML and decodes any named and numerical character references in
it.
### Using the `he` binary
To use the `he` binary in your shell, simply install _he_ globally using npm:
...