module html-to-text
1. function html-to-text.fromFile (file, options, callback)
2. function html-to-text.fromString (str, options)
3. object html-to-text.formatter
4. object html-to-text.helper
5. object html-to-text.html_to_text
module html-to-text.formatter
module html-to-text.helper
module html-to-text.html_to_text
1. function html-to-text.html_to_text.fromFile (file, options, callback)
2. function html-to-text.html_to_text.fromString (str, options)

module html-to-text

function html-to-text.fromFile (file, options, callback)

description and source-code

fromFile = function (file, options, callback) {
  if (!callback) {
    callback = options;
    options = {};
  }
  fs.readFile(file, 'utf8', function (err, str) {
    if (err) return callback(err);
    return callback(null, htmlToText(str, options));
  });
}

example usage

...

## Usage
You can read from a file via:

```javascript
var htmlToText = require('html-to-text');

htmlToText.fromFile(path.join(__dirname, 'test.html'), {
	tables: ['#invoice', '.address']
}, (err, text) => {
	if (err) return console.error(err);
	console.log(text);
});
```
...

function html-to-text.fromString (str, options)

description and source-code

fromString = function (str, options) {
  return htmlToText(str, options || {});
}

example usage

...
```

or directly from a string:

```javascript
var htmlToText = require('html-to-text');

var text = htmlToText.fromString('<h1>Hello World</h1>', {
	wordwrap: 130
});
console.log(text);
```

### Options:
...

module html-to-text.formatter

function html-to-text.formatter.anchor (elem, fn, options)

description and source-code

function formatAnchor(elem, fn, options) {
  var href = '';
  // Always get the anchor text
  var storedCharCount = options.lineCharCount;
  var text = fn(elem.children || [], options);
  if (!text) {
    text = '';
  }

  var result = elem.trimLeadingSpace ? _s.lstrip(text) : text;

  if (!options.ignoreHref) {
    // Get the href, if present
    if (elem.attribs && elem.attribs.href) {
      href = elem.attribs.href.replace(/^mailto\:/, '');
    }
    if (href) {
      if (options.linkHrefBaseUrl && href.indexOf('/') === 0) {
        href = options.linkHrefBaseUrl + href;
      }
      if (!options.hideLinkHrefIfSameAsText || href !== _s.replaceAll(result, '\n', '')) {
        if (!options.noLinkBrackets) {
          result += ' [' + href + ']';
        } else {
          result += ' ' + href;
        }
      }
    }
  }

  options.lineCharCount = storedCharCount;

  return formatText({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options);
}

example usage

...
case 'img':
  result += format.image(elem, options);
  break;
case 'a':
  // Inline element needs its leading space to be trimmed if `result`
  // currently ends with whitespace
  elem.trimLeadingSpace = whiteSpaceRegex.test(result);
  result += format.anchor(elem, walk, options);
  break;
case 'p':
  result += format.paragraph(elem, walk, options);
  break;
case 'h1':
case 'h2':
case 'h3':
...

function html-to-text.formatter.heading (elem, fn, options)

description and source-code

function formatHeading(elem, fn, options) {
  var heading = fn(elem.children, options);
  if (options.uppercaseHeadings) {
    heading = heading.toUpperCase();
  }
  return heading + '\n';
}

example usage

...
  break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
  result += format.heading(elem, walk, options);
  break;
case 'br':
  result += format.lineBreak(elem, walk, options);
  break;
case 'hr':
  result += format.horizontalLine(elem, walk, options);
  break;
...

function html-to-text.formatter.horizontalLine (elem, fn, options)

description and source-code

function formatHorizontalLine(elem, fn, options) {
  return '\n' + _s.repeat('-', options.wordwrap) + '\n\n';
}

example usage

...
case 'h6':
  result += format.heading(elem, walk, options);
  break;
case 'br':
  result += format.lineBreak(elem, walk, options);
  break;
case 'hr':
  result += format.horizontalLine(elem, walk, options);
  break;
case 'ul':
  result += format.unorderedList(elem, walk, options);
  break;
case 'ol':
  result += format.orderedList(elem, walk, options);
  break;
...

function html-to-text.formatter.image (elem, options)

description and source-code

function formatImage(elem, options) {
  if (options.ignoreImage) {
    return '';
  }

  var result = '', attribs = elem.attribs || {};
  if (attribs.alt) {
    result += he.decode(attribs.alt, options.decodeOptions);
    if (attribs.src) {
      result += ' ';
    }
  }
  if (attribs.src) {
    result += '[' + attribs.src + ']';
  }
  return (result);
}

example usage

...
}
var whiteSpaceRegex = /\s$/;
_.each(dom, function(elem) {
  switch(elem.type) {
    case 'tag':
      switch(elem.name.toLowerCase()) {
        case 'img':
          result += format.image(elem, options);
          break;
        case 'a':
          // Inline element needs its leading space to be trimmed if `result`
          // currently ends with whitespace
          elem.trimLeadingSpace = whiteSpaceRegex.test(result);
          result += format.anchor(elem, walk, options);
          break;
...

function html-to-text.formatter.lineBreak (elem, fn, options)

description and source-code

function formatLineBreak(elem, fn, options) {
  return '\n' + fn(elem.children, options);
}

example usage

...
case 'h3':
case 'h4':
case 'h5':
case 'h6':
  result += format.heading(elem, walk, options);
  break;
case 'br':
  result += format.lineBreak(elem, walk, options);
  break;
case 'hr':
  result += format.horizontalLine(elem, walk, options);
  break;
case 'ul':
  result += format.unorderedList(elem, walk, options);
  break;
...

function html-to-text.formatter.listItem (prefix, elem, fn, options)

description and source-code

function formatListItem(prefix, elem, fn, options) {
  options = _.clone(options);
  // Reduce the wordwrap for sub elements.
  if (options.wordwrap) {
    options.wordwrap -= prefix.length;
  }
  // Process sub elements.
  var text = fn(elem.children, options);
  // Replace all line breaks with line break + prefix spacing.
  text = text.replace(/\n/g, '\n' + _s.repeat(' ', prefix.length));
  // Add first prefix and line break at the end.
  return prefix + text + '\n';
}

example usage
```
n/a
```

function html-to-text.formatter.orderedList (elem, fn, options)

description and source-code

function formatOrderedList(elem, fn, options) {
  var result = '';
  var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
    return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
  });
  // Return different functions for different OL types
  var typeFunctions = {
    1: function(start, i) { return i + 1 + start},
    a: function(start, i) { return String.fromCharCode(i + start + 97)},
    A: function(start, i) { return String.fromCharCode(i + start + 65)}
  };
  // Determine type
  var olType = elem.attribs.type || '1'
  // Make sure there are list items present
  if (nonWhiteSpaceChildren.length) {
    // Calculate initial start from ol attribute
    var start = Number(elem.attribs.start || '1') - 1
    // Calculate the maximum length to i.
    var maxLength = (nonWhiteSpaceChildren.length + start).toString().length;
    _.each(nonWhiteSpaceChildren, function(elem, i) {
      // Use different function depending on type
      var index = typeFunctions[olType](start, i);
      // Calculate the needed spacing for nice indentation.
      var spacing = maxLength - index.toString().length;
      var prefix = (olType === '1') ? ' ' + index + '. ' + _s.repeat(' ', spacing) : index + '. ';
      result += formatListItem(prefix, elem, fn, options);
    });
  }
  return result + '\n';
}

example usage

...
case 'hr':
  result += format.horizontalLine(elem, walk, options);
  break;
case 'ul':
  result += format.unorderedList(elem, walk, options);
  break;
case 'ol':
  result += format.orderedList(elem, walk, options);
  break;
case 'pre':
  var newOptions = _(options).clone();
  newOptions.isInPre = true;
  result += format.paragraph(elem, walk, newOptions);
  break;
case 'table':
...

function html-to-text.formatter.paragraph (elem, fn, options)

description and source-code

function formatParagraph(elem, fn, options) {
  var paragraph = fn(elem.children, options)
  if (options.singleNewLineParagraphs) {
    return paragraph + '\n'
  } else {
    return paragraph + '\n\n'
  }
}

example usage

...
case 'a':
  // Inline element needs its leading space to be trimmed if `result`
  // currently ends with whitespace
  elem.trimLeadingSpace = whiteSpaceRegex.test(result);
  result += format.anchor(elem, walk, options);
  break;
case 'p':
  result += format.paragraph(elem, walk, options);
  break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
...

function html-to-text.formatter.table (elem, fn, options)

description and source-code

function formatTable(elem, fn, options) {
  var table = [];
  _.each(elem.children, tryParseRows);
  return tableToString(table);

  function tryParseRows(elem) {
    if (elem.type !== 'tag') {
      return;
    }
    switch (elem.name.toLowerCase()) {
      case "thead":
      case "tbody":
      case "tfoot":
      case "center":
        _.each(elem.children, tryParseRows);
        return;

      case 'tr':
        var rows = [];
        _.each(elem.children, function(elem) {
          var tokens, times;
          if (elem.type === 'tag') {
            switch (elem.name.toLowerCase()) {
              case 'th':
                tokens = formatHeading(elem, fn, options).split('\n');
                rows.push(_.compact(tokens));
                break;

              case 'td':
                tokens = fn(elem.children, options).split('\n');
                rows.push(_.compact(tokens));
                // Fill colspans with empty values
                if (elem.attribs && elem.attribs.colspan) {
                  times = elem.attribs.colspan - 1 || 0;
                  _.times(times, function() {
                    rows.push(['']);
                  });
                }
                break;
            }
          }
        });
        rows = helper.arrayZip(rows);
        _.each(rows, function(row) {
          row = _.map(row, function(col) {
            return col || '';
          });
          table.push(row);
        });
        break;
    }
  }
}

example usage

...
    case 'pre':
      var newOptions = _(options).clone();
      newOptions.isInPre = true;
      result += format.paragraph(elem, walk, newOptions);
      break;
    case 'table':
      result = containsTable(elem.attribs, options.tables)
        ? result + format.table(elem, walk, options)
        : walk(elem.children || [], options, result);
      break;
    default:
      result = walk(elem.children || [], options, result);
  }
  break;
case 'text':
...

function html-to-text.formatter.text (elem, options)

description and source-code

function formatText(elem, options) {
  var text = elem.data || "";
  text = he.decode(text, options.decodeOptions);

  if (options.isInPre) {
    return text;
  } else {
    return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
  }
}

example usage

...
    }
    break;
  case 'text':
    if (elem.data !== '\r\n') {
      // Text needs its leading space to be trimmed if `result`
      // currently ends with whitespace
      elem.trimLeadingSpace = whiteSpaceRegex.test(result);
      result += format.text(elem, options);
    }
    break;
  default:
    if (!_.include(SKIP_TYPES, elem.type)) {
      result = walk(elem.children || [], options, result);
    }
}
...

function html-to-text.formatter.unorderedList (elem, fn, options)

description and source-code

function formatUnorderedList(elem, fn, options) {
  var result = '';
  var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) {
    return child.type !== 'text' || !whiteSpaceRegex.test(child.data);
  });
  _.each(nonWhiteSpaceChildren, function(elem) {
    result += formatListItem(' * ', elem, fn, options);
  });
  return result + '\n';
}

example usage

...
case 'br':
  result += format.lineBreak(elem, walk, options);
  break;
case 'hr':
  result += format.horizontalLine(elem, walk, options);
  break;
case 'ul':
  result += format.unorderedList(elem, walk, options);
  break;
case 'ol':
  result += format.orderedList(elem, walk, options);
  break;
case 'pre':
  var newOptions = _(options).clone();
  newOptions.isInPre = true;
...

module html-to-text.helper

function html-to-text.helper.arrayZip (array)

description and source-code

function arrayZip(array) {
  return _.zip.apply(_, array);
}

example usage

...
// Convert all rows to lengths
var widths = _.map(table, function(row) {
  return _.map(row, function(col) {
    return col.length;
  });
});
// Invert rows with colums
widths = helper.arrayZip(widths);
// Determine the max values for each column
widths = _.map(widths, function(col) {
  return _.max(col);
});

// Build the table
var text = '';
...

function html-to-text.helper.splitCssSearchTag (tagString)

description and source-code

function splitCssSearchTag(tagString) {
  function getParams(re, string) {
    var captures = [], found;
    while ((found = re.exec(string)) !== null) {
      captures.push(found[1]);
    }
    return captures;
  }

  var splitTag = {};
  var elementRe = /(^\w*)/g;
  splitTag.element = elementRe.exec(tagString)[1];
  splitTag.classes = getParams( /\.([\d\w-]*)/g, tagString);
  splitTag.ids = getParams( /#([\d\w-]*)/g, tagString);

  return splitTag;
}

example usage

...
}
return _s.strip(result);
}

function filterBody(dom, options, baseElement) {
var result = null;

var splitTag = helper.splitCssSearchTag(baseElement);

function walk(dom) {
  if (result) return;
  _.each(dom, function(elem) {
    if (result) return;
    if (elem.name === splitTag.element) {
      var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
...

function html-to-text.helper.wordwrap (text, options)

description and source-code

function wordwrap(text, options) {
  var max = options.wordwrap;
  var preserveNewlines = options.preserveNewlines;
  var length = options.lineCharCount;

  // Preserve leading space
  var result = _s.startsWith(text, ' ') ? ' ' : '';
  length += result.length;
  var buffer = [];
  // Split the text into words, decide to preserve new lines or not.
  var words = preserveNewlines
    ? text.replace(/\n/g, '\n ').split(/\ +/)
    : _s.words(text);

  // Determine where to end line word by word.
  _.each(words, function(word) {
    // Add buffer to result if we can't fit any more words in the buffer.
    if ((max || max === 0) && length > 0 && ((length + word.length > max) || (length + word.indexOf('\n') > max))) {
      // Concat buffer and add it to the result
      result += buffer.join(' ') + '\n';
      // Reset buffer and length
      buffer.length = length = 0;
    }

    // Check if the current word is long enough to be wrapped
    if ((max || max === 0) && (options.longWordSplit) && (word.length > max)) {
      word = splitLongWord(word, options);
    }

    buffer.push(word);

    // If the word contains a newline then restart the count and add the buffer to the result
    if (word.indexOf('\n') !== -1) {
      result += buffer.join(' ');

      // Reset the buffer, let the length include any characters after the last newline
      buffer.length = 0;
      length = word.length - (word.lastIndexOf('\n') + 1);
      // If there are characters after the newline, add a space and increase the length by 1
      if (length) {
        result += ' ';
        length++;
      }
    } else {
      // Add word length + one whitespace
      length += word.length + 1;
    }
  });
  // Add the rest to the result.
  result += buffer.join(' ');

  // Preserve trailing space
  if (!_s.endsWith(text, ' ')) {
    result = _s.rtrim(result);
  } else if (!_s.endsWith(result, ' ')) {
    result = result + ' ';
  }

  return result;
}

example usage

...
function formatText(elem, options) {
var text = elem.data || "";
text = he.decode(text, options.decodeOptions);

if (options.isInPre) {
  return text;
} else {
  return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
}
}

function formatImage(elem, options) {
if (options.ignoreImage) {
  return '';
}
...

module html-to-text.html_to_text

function html-to-text.html_to_text.fromFile (file, options, callback)

description and source-code

fromFile = function (file, options, callback) {
  if (!callback) {
    callback = options;
    options = {};
  }
  fs.readFile(file, 'utf8', function (err, str) {
    if (err) return callback(err);
    return callback(null, htmlToText(str, options));
  });
}

example usage

...

## Usage
You can read from a file via:

```javascript
var htmlToText = require('html-to-text');

htmlToText.fromFile(path.join(__dirname, 'test.html'), {
	tables: ['#invoice', '.address']
}, (err, text) => {
	if (err) return console.error(err);
	console.log(text);
});
```
...

function html-to-text.html_to_text.fromString (str, options)

description and source-code

fromString = function (str, options) {
  return htmlToText(str, options || {});
}

example usage

...
```

or directly from a string:

```javascript
var htmlToText = require('html-to-text');

var text = htmlToText.fromString('<h1>Hello World</h1>', {
	wordwrap: 130
});
console.log(text);
```

### Options:
...

api documentation for html-to-text (v3.2.0)

Advanced html to plain text converter

table of contents

module html-to-text

function html-to-text.fromFile (file, options, callback)

function html-to-text.fromString (str, options)

module html-to-text.formatter

function html-to-text.formatter.anchor (elem, fn, options)

function html-to-text.formatter.heading (elem, fn, options)

function html-to-text.formatter.horizontalLine (elem, fn, options)

function html-to-text.formatter.image (elem, options)

function html-to-text.formatter.lineBreak (elem, fn, options)

function html-to-text.formatter.listItem (prefix, elem, fn, options)

function html-to-text.formatter.orderedList (elem, fn, options)

function html-to-text.formatter.paragraph (elem, fn, options)

function html-to-text.formatter.table (elem, fn, options)

function html-to-text.formatter.text (elem, options)

function html-to-text.formatter.unorderedList (elem, fn, options)

module html-to-text.helper

function html-to-text.helper.arrayZip (array)

function html-to-text.helper.splitCssSearchTag (tagString)

function html-to-text.helper.wordwrap (text, options)

module html-to-text.html_to_text

function html-to-text.html_to_text.fromFile (file, options, callback)

function html-to-text.html_to_text.fromString (str, options)