fromFile = function (file, options, callback) {
if (!callback) {
callback = options;
options = {};
}
fs.readFile(file, 'utf8', function (err, str) {
if (err) return callback(err);
return callback(null, htmlToText(str, options));
});
}...
## Usage
You can read from a file via:
```javascript
var htmlToText = require('html-to-text');
htmlToText.fromFile(path.join(__dirname, 'test.html'), {
tables: ['#invoice', '.address']
}, (err, text) => {
if (err) return console.error(err);
console.log(text);
});
```
...fromString = function (str, options) {
return htmlToText(str, options || {});
}...
```
or directly from a string:
```javascript
var htmlToText = require('html-to-text');
var text = htmlToText.fromString('<h1>Hello World</h1>', {
wordwrap: 130
});
console.log(text);
```
### Options:
...function formatAnchor(elem, fn, options) {
var href = '';
// Always get the anchor text
var storedCharCount = options.lineCharCount;
var text = fn(elem.children || [], options);
if (!text) {
text = '';
}
var result = elem.trimLeadingSpace ? _s.lstrip(text) : text;
if (!options.ignoreHref) {
// Get the href, if present
if (elem.attribs && elem.attribs.href) {
href = elem.attribs.href.replace(/^mailto\:/, '');
}
if (href) {
if (options.linkHrefBaseUrl && href.indexOf('/') === 0) {
href = options.linkHrefBaseUrl + href;
}
if (!options.hideLinkHrefIfSameAsText || href !== _s.replaceAll(result, '\n', '')) {
if (!options.noLinkBrackets) {
result += ' [' + href + ']';
} else {
result += ' ' + href;
}
}
}
}
options.lineCharCount = storedCharCount;
return formatText({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options);
}...
case 'img':
result += format.image(elem, options);
break;
case 'a':
// Inline element needs its leading space to be trimmed if `result`
// currently ends with whitespace
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
result += format.anchor(elem, walk, options);
break;
case 'p':
result += format.paragraph(elem, walk, options);
break;
case 'h1':
case 'h2':
case 'h3':
...function formatHeading(elem, fn, options) {
var heading = fn(elem.children, options);
if (options.uppercaseHeadings) {
heading = heading.toUpperCase();
}
return heading + '\n';
}...
break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
result += format.heading(elem, walk, options);
break;
case 'br':
result += format.lineBreak(elem, walk, options);
break;
case 'hr':
result += format.horizontalLine(elem, walk, options);
break;
...function formatHorizontalLine(elem, fn, options) {
return '\n' + _s.repeat('-', options.wordwrap) + '\n\n';
}...
case 'h6':
result += format.heading(elem, walk, options);
break;
case 'br':
result += format.lineBreak(elem, walk, options);
break;
case 'hr':
result += format.horizontalLine(elem, walk, options);
break;
case 'ul':
result += format.unorderedList(elem, walk, options);
break;
case 'ol':
result += format.orderedList(elem, walk, options);
break;
...function formatImage(elem, options) {
if (options.ignoreImage) {
return '';
}
var result = '', attribs = elem.attribs || {};
if (attribs.alt) {
result += he.decode(attribs.alt, options.decodeOptions);
if (attribs.src) {
result += ' ';
}
}
if (attribs.src) {
result += '[' + attribs.src + ']';
}
return (result);
}...
}
var whiteSpaceRegex = /\s$/;
_.each(dom, function(elem) {
switch(elem.type) {
case 'tag':
switch(elem.name.toLowerCase()) {
case 'img':
result += format.image(elem, options);
break;
case 'a':
// Inline element needs its leading space to be trimmed if `result`
// currently ends with whitespace
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
result += format.anchor(elem, walk, options);
break;
...function formatLineBreak(elem, fn, options) {
return '\n' + fn(elem.children, options);
}...
case 'h3':
case 'h4':
case 'h5':
case 'h6':
result += format.heading(elem, walk, options);
break;
case 'br':
result += format.lineBreak(elem, walk, options);
break;
case 'hr':
result += format.horizontalLine(elem, walk, options);
break;
case 'ul':
result += format.unorderedList(elem, walk, options);
break;
...function formatListItem(prefix, elem, fn, options) {
options = _.clone(options);
// Reduce the wordwrap for sub elements.
if (options.wordwrap) {
options.wordwrap -= prefix.length;
}
// Process sub elements.
var text = fn(elem.children, options);
// Replace all line breaks with line break + prefix spacing.
text = text.replace(/\n/g, '\n' + _s.repeat(' ', prefix.length));
// Add first prefix and line break at the end.
return prefix + text + '\n';
}n/a
function formatOrderedList(elem, fn, options) {
var result = '';
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
});
// Return different functions for different OL types
var typeFunctions = {
1: function(start, i) { return i + 1 + start},
a: function(start, i) { return String.fromCharCode(i + start + 97)},
A: function(start, i) { return String.fromCharCode(i + start + 65)}
};
// Determine type
var olType = elem.attribs.type || '1'
// Make sure there are list items present
if (nonWhiteSpaceChildren.length) {
// Calculate initial start from ol attribute
var start = Number(elem.attribs.start || '1') - 1
// Calculate the maximum length to i.
var maxLength = (nonWhiteSpaceChildren.length + start).toString().length;
_.each(nonWhiteSpaceChildren, function(elem, i) {
// Use different function depending on type
var index = typeFunctions[olType](start, i);
// Calculate the needed spacing for nice indentation.
var spacing = maxLength - index.toString().length;
var prefix = (olType === '1') ? ' ' + index + '. ' + _s.repeat(' ', spacing) : index + '. ';
result += formatListItem(prefix, elem, fn, options);
});
}
return result + '\n';
}...
case 'hr':
result += format.horizontalLine(elem, walk, options);
break;
case 'ul':
result += format.unorderedList(elem, walk, options);
break;
case 'ol':
result += format.orderedList(elem, walk, options);
break;
case 'pre':
var newOptions = _(options).clone();
newOptions.isInPre = true;
result += format.paragraph(elem, walk, newOptions);
break;
case 'table':
...function formatParagraph(elem, fn, options) {
var paragraph = fn(elem.children, options)
if (options.singleNewLineParagraphs) {
return paragraph + '\n'
} else {
return paragraph + '\n\n'
}
}...
case 'a':
// Inline element needs its leading space to be trimmed if `result`
// currently ends with whitespace
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
result += format.anchor(elem, walk, options);
break;
case 'p':
result += format.paragraph(elem, walk, options);
break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
...function formatTable(elem, fn, options) {
var table = [];
_.each(elem.children, tryParseRows);
return tableToString(table);
function tryParseRows(elem) {
if (elem.type !== 'tag') {
return;
}
switch (elem.name.toLowerCase()) {
case "thead":
case "tbody":
case "tfoot":
case "center":
_.each(elem.children, tryParseRows);
return;
case 'tr':
var rows = [];
_.each(elem.children, function(elem) {
var tokens, times;
if (elem.type === 'tag') {
switch (elem.name.toLowerCase()) {
case 'th':
tokens = formatHeading(elem, fn, options).split('\n');
rows.push(_.compact(tokens));
break;
case 'td':
tokens = fn(elem.children, options).split('\n');
rows.push(_.compact(tokens));
// Fill colspans with empty values
if (elem.attribs && elem.attribs.colspan) {
times = elem.attribs.colspan - 1 || 0;
_.times(times, function() {
rows.push(['']);
});
}
break;
}
}
});
rows = helper.arrayZip(rows);
_.each(rows, function(row) {
row = _.map(row, function(col) {
return col || '';
});
table.push(row);
});
break;
}
}
}...
case 'pre':
var newOptions = _(options).clone();
newOptions.isInPre = true;
result += format.paragraph(elem, walk, newOptions);
break;
case 'table':
result = containsTable(elem.attribs, options.tables)
? result + format.table(elem, walk, options)
: walk(elem.children || [], options, result);
break;
default:
result = walk(elem.children || [], options, result);
}
break;
case 'text':
...function formatText(elem, options) {
var text = elem.data || "";
text = he.decode(text, options.decodeOptions);
if (options.isInPre) {
return text;
} else {
return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
}
}...
}
break;
case 'text':
if (elem.data !== '\r\n') {
// Text needs its leading space to be trimmed if `result`
// currently ends with whitespace
elem.trimLeadingSpace = whiteSpaceRegex.test(result);
result += format.text(elem, options);
}
break;
default:
if (!_.include(SKIP_TYPES, elem.type)) {
result = walk(elem.children || [], options, result);
}
}
...function formatUnorderedList(elem, fn, options) {
var result = '';
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
});
_.each(nonWhiteSpaceChildren, function(elem) {
result += formatListItem(' * ', elem, fn, options);
});
return result + '\n';
}...
case 'br':
result += format.lineBreak(elem, walk, options);
break;
case 'hr':
result += format.horizontalLine(elem, walk, options);
break;
case 'ul':
result += format.unorderedList(elem, walk, options);
break;
case 'ol':
result += format.orderedList(elem, walk, options);
break;
case 'pre':
var newOptions = _(options).clone();
newOptions.isInPre = true;
...function arrayZip(array) {
return _.zip.apply(_, array);
}...
// Convert all rows to lengths
var widths = _.map(table, function(row) {
return _.map(row, function(col) {
return col.length;
});
});
// Invert rows with colums
widths = helper.arrayZip(widths);
// Determine the max values for each column
widths = _.map(widths, function(col) {
return _.max(col);
});
// Build the table
var text = '';
...function splitCssSearchTag(tagString) {
function getParams(re, string) {
var captures = [], found;
while ((found = re.exec(string)) !== null) {
captures.push(found[1]);
}
return captures;
}
var splitTag = {};
var elementRe = /(^\w*)/g;
splitTag.element = elementRe.exec(tagString)[1];
splitTag.classes = getParams( /\.([\d\w-]*)/g, tagString);
splitTag.ids = getParams( /#([\d\w-]*)/g, tagString);
return splitTag;
}...
}
return _s.strip(result);
}
function filterBody(dom, options, baseElement) {
var result = null;
var splitTag = helper.splitCssSearchTag(baseElement);
function walk(dom) {
if (result) return;
_.each(dom, function(elem) {
if (result) return;
if (elem.name === splitTag.element) {
var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
...function wordwrap(text, options) {
var max = options.wordwrap;
var preserveNewlines = options.preserveNewlines;
var length = options.lineCharCount;
// Preserve leading space
var result = _s.startsWith(text, ' ') ? ' ' : '';
length += result.length;
var buffer = [];
// Split the text into words, decide to preserve new lines or not.
var words = preserveNewlines
? text.replace(/\n/g, '\n ').split(/\ +/)
: _s.words(text);
// Determine where to end line word by word.
_.each(words, function(word) {
// Add buffer to result if we can't fit any more words in the buffer.
if ((max || max === 0) && length > 0 && ((length + word.length > max) || (length + word.indexOf('\n') > max))) {
// Concat buffer and add it to the result
result += buffer.join(' ') + '\n';
// Reset buffer and length
buffer.length = length = 0;
}
// Check if the current word is long enough to be wrapped
if ((max || max === 0) && (options.longWordSplit) && (word.length > max)) {
word = splitLongWord(word, options);
}
buffer.push(word);
// If the word contains a newline then restart the count and add the buffer to the result
if (word.indexOf('\n') !== -1) {
result += buffer.join(' ');
// Reset the buffer, let the length include any characters after the last newline
buffer.length = 0;
length = word.length - (word.lastIndexOf('\n') + 1);
// If there are characters after the newline, add a space and increase the length by 1
if (length) {
result += ' ';
length++;
}
} else {
// Add word length + one whitespace
length += word.length + 1;
}
});
// Add the rest to the result.
result += buffer.join(' ');
// Preserve trailing space
if (!_s.endsWith(text, ' ')) {
result = _s.rtrim(result);
} else if (!_s.endsWith(result, ' ')) {
result = result + ' ';
}
return result;
}...
function formatText(elem, options) {
var text = elem.data || "";
text = he.decode(text, options.decodeOptions);
if (options.isInPre) {
return text;
} else {
return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
}
}
function formatImage(elem, options) {
if (options.ignoreImage) {
return '';
}
...fromFile = function (file, options, callback) {
if (!callback) {
callback = options;
options = {};
}
fs.readFile(file, 'utf8', function (err, str) {
if (err) return callback(err);
return callback(null, htmlToText(str, options));
});
}...
## Usage
You can read from a file via:
```javascript
var htmlToText = require('html-to-text');
htmlToText.fromFile(path.join(__dirname, 'test.html'), {
tables: ['#invoice', '.address']
}, (err, text) => {
if (err) return console.error(err);
console.log(text);
});
```
...fromString = function (str, options) {
return htmlToText(str, options || {});
}...
```
or directly from a string:
```javascript
var htmlToText = require('html-to-text');
var text = htmlToText.fromString('<h1>Hello World</h1>', {
wordwrap: 130
});
console.log(text);
```
### Options:
...