module textract
1. function textract.fromBufferWithMime ( type, bufferContent, options, cb, withPath )
2. function textract.fromBufferWithName ( filePath, bufferContent, options, cb )
3. function textract.fromFileWithMimeAndPath ( type, filePath, options, cb )
4. function textract.fromFileWithPath ( filePath, options, cb )
5. function textract.fromUrl ( url, options, cb )
6. object textract.docx
7. object textract.dxf
8. object textract.html
9. object textract.images
10. object textract.md
11. object textract.odt
12. object textract.pdf
13. object textract.ppt
14. object textract.pptx
15. object textract.rtf
16. object textract.text
17. object textract.util
18. object textract.xls
module textract.docx
1. function textract.docx.extract ( filePath, options, cb )
2. object textract.docx.types
module textract.dxf
1. function textract.dxf.extract ( filePath, options, cb )
2. function textract.dxf.test ( options, cb )
3. object textract.dxf.types
module textract.html
1. function textract.html.extract ( filePath, options, cb )
2. function textract.html.extractFromText ( data, cb )
3. object textract.html.types
module textract.images
1. function textract.images.extract ( filePath, options, cb )
2. function textract.images.test ( options, cb )
3. object textract.images.types
module textract.md
1. function textract.md.extract ( filePath, options, cb )
2. object textract.md.types
module textract.odt
1. function textract.odt.extract ( filePath, options, cb )
2. object textract.odt.types
module textract.pdf
1. function textract.pdf.extract ( filePath, options, cb )
2. function textract.pdf.test ( options, cb )
3. object textract.pdf.types
module textract.ppt
1. function textract.ppt.extract ( filePath, options, cb )
2. object textract.ppt.types
module textract.pptx
1. function textract.pptx.extract ( filePath, options, cb )
2. object textract.pptx.types
module textract.rtf
1. function textract.rtf.extract ( filePath, options, cb )
2. function textract.rtf.test ( options, cb )
3. object textract.rtf.types
module textract.text
1. function textract.text.extract ( filePath, options, cb )
2. object textract.text.types
module textract.util
module textract.xls
1. function textract.xls.extract ( filePath, options, cb )
2. object textract.xls.types

module textract

function textract.fromBufferWithMime ( type, bufferContent, options, cb, withPath )

description and source-code

function fromBufferWithMime( type, bufferContent, options, cb, withPath ) {
  if ( typeof type === 'string' &&
       bufferContent &&
       bufferContent instanceof Buffer &&
       ( typeof options === 'function' || typeof cb === 'function' ) ) {
    _writeBufferToDisk( bufferContent, function( newPath ) {
      fromFileWithMimeAndPath( type, newPath, options, cb );
    });
  } else {
    _returnArgsError( arguments );
  }
}

example usage

...
```javascript
textract.fromFileWithMimeAndPath(type, filePath, config, function( error, text ) {})
```

##### Buffer + mime type

```javascript
textract.fromBufferWithMime(type, buffer, function( error, text ) {})
```

```javascript
textract.fromBufferWithMime(type, buffer, config, function( error, text ) {})
```

##### Buffer + file name/path
...

function textract.fromBufferWithName ( filePath, bufferContent, options, cb )

description and source-code

function fromBufferWithName( filePath, bufferContent, options, cb ) {
  var type;
  if ( typeof filePath === 'string' ) {
    type = mime.lookup( filePath );
    fromBufferWithMime( type, bufferContent, options, cb, true );
  } else {
    _returnArgsError( arguments );
  }
}

example usage

...
```javascript
textract.fromBufferWithMime(type, buffer, config, function( error, text ) {})
```

##### Buffer + file name/path

```javascript
textract.fromBufferWithName(name, buffer, function( error, text ) {})
```

```javascript
textract.fromBufferWithName(name, buffer, config, function( error, text ) {})
```

##### URL
...

function textract.fromFileWithMimeAndPath ( type, filePath, options, cb )

description and source-code

function fromFileWithMimeAndPath( type, filePath, options, cb ) {
  var called = false;

  if ( typeof type === 'string' && typeof filePath === 'string' ) {
    if ( typeof cb === 'function' && typeof options === 'object' ) {
      // (mimeType, filePath, options, callback)
      _extractWithType( type, filePath, options, cb );
      called = true;
    } else if ( typeof options === 'function' && cb === undefined ) {
      // (mimeType, filePath, callback)
      _extractWithType( type, filePath, {}, options );
      called = true;
    }
  }

  if ( !called ) {
    _returnArgsError( arguments );
  }
}

example usage

...

```javascript
textract.fromFileWithPath(filePath, config, function( error, text ) {})
```
##### File + mime type

```javascript
textract.fromFileWithMimeAndPath(type, filePath, function( error, text ) {})
```

```javascript
textract.fromFileWithMimeAndPath(type, filePath, config, function( error, text ) {})
```

##### Buffer + mime type
...

function textract.fromFileWithPath ( filePath, options, cb )

description and source-code

function fromFileWithPath( filePath, options, cb ) {
  var type;
  if ( typeof filePath === 'string' &&
       ( typeof options === 'function' || typeof cb === 'function' ) ) {
    type = ( options && options.typeOverride ) || mime.lookup( filePath );
    fromFileWithMimeAndPath( type, filePath, options, cb );
  } else {
    _returnArgsError( arguments );
  }
}

example usage

...
There are several ways to extract text.  For all methods, the extracted text and an error object are passed to a callback.

`error` will contain informative text about why the extraction failed. If textract does not currently extract files of the type
provided, a `typeNotFound` flag will be tossed on the error object.

##### File

```javascript
textract.fromFileWithPath(filePath, function( error, text ) {})
```

```javascript
textract.fromFileWithPath(filePath, config, function( error, text ) {})
```
##### File + mime type
...

function textract.fromUrl ( url, options, cb )

description and source-code

function fromUrl( url, options, cb ) {
  var urlNoQueryParams, extname, filePath, fullFilePath, file, href, callbackCalled;

  // allow url to be either a string or to be a
  // Node URL Object: https://nodejs.org/api/url.html
  href = ( typeof url === 'string' ) ? url : url.href;

  if ( href ) {
    options = options || {};
    urlNoQueryParams = href.split( '?' )[0];
    extname = path.extname( urlNoQueryParams );
    filePath = _genRandom() + extname;
    fullFilePath = path.join( tmpDir, filePath );
    file = fs.createWriteStream( fullFilePath );
    file.on( 'finish', function() {
      if ( !callbackCalled ) {
        fromFileWithPath( fullFilePath, options, cb );
      }
    });

    got.stream( url )
      .on( 'response', function( response ) {
        // allows for overriding by the developer or automatically
        // populating based on server response.
        if ( !options.typeOverride ) {
          options.typeOverride = response.headers['content-type'].split( /;/ )[0];
        }
      })
      .on( 'error', function( error ) {
        var _cb = ( typeof options === 'function' ) ? options : cb;
        callbackCalled = true;
        _cb( error );
      })
      .pipe( file );
  } else {
    _returnArgsError( arguments );
  }
}

example usage

...
```

##### URL

When passing a URL, the URL can either be a string, or a [node.js URL object](https://nodejs.org/api/url.html). Using the URL object
 allows fine grained control over the URL being used.

```javascript
textract.fromUrl(url, function( error, text ) {})
```

```javascript
textract.fromUrl(url, config, function( error, text ) {})
```

## Testing Notes
...

module textract.docx

function textract.docx.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var result = '';

  yauzl.open( filePath, function( err, zipfile ) {
    var processEnd
      , processedEntries = 0
      ;

    if ( err ) {
      util.yauzlError( err, cb );
      return;
    }

    processEnd = function() {
      var text;
      if ( zipfile.entryCount === ++processedEntries ) {
        if ( result.length ) {
          text = _calculateExtractedText( result );
          cb( null, text );
        } else {
          cb( new Error(
            'Extraction could not find content in file, are you' +
            ' sure it is the mime type it says it is?' ),
            null );
        }
      }
    };

    zipfile.on( 'entry', function( entry ) {
      if ( includeRegex.test( entry.fileName ) && !excludeRegex.test( entry.fileName ) ) {
        util.getTextFromZipFile( zipfile, entry, function( err2, text ) {
          result += text + '\n';
          processEnd();
        });
      } else {
        processEnd();
      }
    });

    zipfile.on( 'error', function( err3 ) {
      cb( err3 );
    });
  });
}

example usage
```
n/a
```

module textract.dxf

function textract.dxf.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var execOptions = util.createExecOptions( 'dxf', options )
    , escapedPath = filePath.replace( /\s/g, '\\ ' )
    ;

  exec( 'drawingtotext ' + escapedPath,
    execOptions,
    function( error, stdout, stderr ) {
      if ( stderr !== '' ) {
        error = new Error( 'error extracting DXF text ' +
          path.basename( filePath ) + ': ' + stderr );
        cb( error, null );
        return;
      }

      cb( null, stdout );
    }
  );
}

example usage
```
n/a
```

function textract.dxf.test ( options, cb )

description and source-code

function testForBinary( options, cb ) {
  exec( 'drawingtotext notalegalfile',
    function( error, stdout, stderr ) {
      var msg
        , errorRegex = /I couldn't make sense of your input/
        ;

      if ( !( stderr && errorRegex.test( stderr ) ) ) {
        msg = 'INFO: \'drawingtotext\' does not appear to be installed, ' +
          'so textract will be unable to extract DXFs.';
        cb( false, msg );
      } else {
        cb( true );
      }
    }
  );
}

example usage

...
  extractor.types.forEach( function( type ) {
    failedExtractorTypes[type.toLowerCase()] = failedMessage;
  });
}
}

function testExtractor( extractor, options ) {
extractor.test( options, function( passedTest, failedMessage ) {
  satisfiedExtractors++;
  if ( passedTest ) {
    registerExtractor( extractor );
  } else {
    registerFailedExtractor( extractor, failedMessage );
  }
});
...

module textract.html

function textract.html.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  fs.readFile( filePath, function( error, data ) {
    if ( error ) {
      cb( error, null );
      return;
    }
    extractFromText( data, cb );
  });
}

example usage
```
n/a
```

function textract.html.extractFromText ( data, cb )

description and source-code

function extractFromText( data, cb ) {
  var $, text;

  text = data.toString()
    .replace( /< *(br|p|div|section|aside|button|header|footer|li|article|blockquote|cite|code|h1|h2|h3|h4|h5|h6|legend|nav)((.*?)>)/
g, '<$1$2|||||' )
    .replace( /< *\/(td|a|option) *>/g, ' </$1>' ) // spacing some things out so text doesn't get smashed together
    .replace( /< *(a|td|option)/g, ' <$1' ) // spacing out links
    .replace( /< *(br|hr) +\/>/g, '|||||<$1\\>' )
    .replace( /<\/ +?(p|div|section|aside|button|header|footer|li|article|blockquote|cite|code|h1|h2|h3|h4|h5|h6|legend|nav)>/g, '|||||</$
1>' );

  text = '<textractwrapper>' + text + '<textractwrapper>';

  try {
    $ = cheerio.load( text );
    $( 'script' ).remove();
    $( 'style' ).remove();
    $( 'noscript' ).remove();

    text = $( 'textractwrapper' ).text().replace( /\|\|\|\|\|/g, '\n' )
      .replace( /(\n\u00A0|\u00A0\n|\n | \n)+/g, '\n' )
      .replace( /(\r\u00A0|\u00A0\r|\r | \r)+/g, '\n' )
      .replace( /(\v\u00A0|\u00A0\v|\v | \v)+/g, '\n' )
      .replace( /(\t\u00A0|\u00A0\t|\t | \t)+/g, '\n' )
      .replace( /[\n\r\t\v]+/g, '\n' )
      ;
  } catch ( err ) {
    cb( err, null );
    return;
  }

  cb( null, text );
}

example usage

...
    return;
  }

  marked( data.toString(), function( err, content ) {
    if ( err ) {
      cb( err, null );
    } else {
      htmlExtract.extractFromText( content, cb );
    }
  });
});
}

module.exports = {
types: ['text/x-markdown'],
...

module textract.images

function textract.images.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var execOptions = util.createExecOptions( 'images', options );
  util.runExecIntoFile( 'tesseract', filePath, options,
    execOptions, tesseractExtractionCommand, cb );
}

example usage
```
n/a
```

function textract.images.test ( options, cb )

description and source-code

function testForBinary( options, cb ) {
  exec( 'tesseract',
    function( error, stdout, stderr ) {
      var msg;
      // checking for content of help text
      if ( ( error && error.toString().indexOf( 'Usage:' ) > -1 ) ||
          ( stderr && stderr.toString().indexOf( 'Usage:' ) > -1 ) ||
          ( stdout && stdout.toString().indexOf( 'Usage:' ) > -1 ) ) {
        cb( true );
      } else {
        msg = 'INFO: \'tesseract\' does not appear to be installed, ' +
         'so textract will be unable to extract images.';
        cb( false, msg );
      }
    }
  );
}

example usage

...
  extractor.types.forEach( function( type ) {
    failedExtractorTypes[type.toLowerCase()] = failedMessage;
  });
}
}

function testExtractor( extractor, options ) {
extractor.test( options, function( passedTest, failedMessage ) {
  satisfiedExtractors++;
  if ( passedTest ) {
    registerExtractor( extractor );
  } else {
    registerFailedExtractor( extractor, failedMessage );
  }
});
...

module textract.md

function textract.md.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  fs.readFile( filePath, function( error, data ) {
    if ( error ) {
      cb( error, null );
      return;
    }

    marked( data.toString(), function( err, content ) {
      if ( err ) {
        cb( err, null );
      } else {
        htmlExtract.extractFromText( content, cb );
      }
    });
  });
}

example usage
```
n/a
```

module textract.odt

function textract.odt.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  yauzl.open( filePath, function( err, zipfile ) {
    var textOnTheWay = false;

    if ( err ) {
      util.yauzlError( err, cb );
      return;
    }

    zipfile.on( 'end', function() {
      if ( !textOnTheWay ) {
        cb(
          new Error( 'Extraction could not find content.xml in file, ' +
            'are you sure it is the mime type it says it is?' ),
          null );
      }
    });

    zipfile.on( 'entry', function( entry ) {
      if ( entry.fileName === 'content.xml' ) {
        textOnTheWay = true;
        util.getTextFromZipFile( zipfile, entry, function( err2, text ) {
          var output = text
              .replace( 'inflating: content.xml', '' )
              .replace( /^(.Archive).*/, '' )
              .replace( /text:p/g, 'textractTextNode' )
              .replace( /text:h/g, 'textractTextNode' )
              .replace( /<textractTextNode\/>/g, '' )
              .trim()
            , $ = cheerio.load( '<body>' + output + '</body>' )
            , nodes = $( 'textractTextNode' )
            , nodeTexts = []
            , i
            ;

          for ( i = 0; i < nodes.length; i++ ) {
            nodeTexts.push( $( nodes[i] ).text() );
          }

          cb( null, nodeTexts.join( '\n' ) );
        });
      }
    });

    zipfile.on( 'error', function( err3 ) {
      cb( err3 );
    });
  });
}

example usage
```
n/a
```

module textract.pdf

function textract.pdf.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  // See https://github.com/dbashford/textract/issues/75 for description of
  // what is happening here
  var pdftotextOptions = options.pdftotextOptions || { layout: 'raw' };

  extract( filePath, pdftotextOptions, function( error, pages ) {
    var fullText;
    if ( error ) {
      error = new Error( 'Error extracting PDF text for file at [[ ' +
        path.basename( filePath ) + ' ]], error: ' + error.message );
      cb( error, null );
      return;
    }
    fullText = pages.join( ' ' ).trim();
    cb( null, fullText );
  });
}

example usage
```
n/a
```

function textract.pdf.test ( options, cb )

description and source-code

function testForBinary( options, cb ) {
  exec( 'pdftotext -v',
    function( error, stdout, stderr ) {
      var msg;
      if ( stderr && stderr.indexOf( 'pdftotext version' ) > -1 ) {
        cb( true );
      } else {
        msg = 'INFO: \'pdftotext\' does not appear to be installed, ' +
         'so textract will be unable to extract PDFs.';
        cb( false, msg );
      }
    }
  );
}

example usage

...
  extractor.types.forEach( function( type ) {
    failedExtractorTypes[type.toLowerCase()] = failedMessage;
  });
}
}

function testExtractor( extractor, options ) {
extractor.test( options, function( passedTest, failedMessage ) {
  satisfiedExtractors++;
  if ( passedTest ) {
    registerExtractor( extractor );
  } else {
    registerFailedExtractor( extractor, failedMessage );
  }
});
...

module textract.ppt

function textract.ppt.extract ( filePath, options, cb )

description and source-code

extract = function ( filePath, options, cb ) {
  /*
  var captured = ppt.readFile(filePath);
  console.log('CAPTURED!!!!')
  console.log(captured)
  console.log('CAPTURED!!!!')
  cb( null, null );
    if ( error ) {
      cb( error, null );
      return;
    }
    cb( null, data.toString() );
  */
}

example usage
```
n/a
```

module textract.pptx

function textract.pptx.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var slides = [];

  yauzl.open( filePath, function( err, zipfile ) {
    if ( err ) {
      util.yauzlError( err, cb );
      return;
    }

    zipfile.on( 'end', function() {
      var slidesText, text;
      if ( slides.length ) {
        slides.sort( _compareSlides );
        slidesText = slides.map( function( slide ) {
          return slide.text;
        }).join( '\n' );
        text = _calculateExtractedText( slidesText );
        cb( null, text );
      } else {
        cb(
          new Error( 'Extraction could not find slides in file, are you' +
            ' sure it is the mime type it says it is?' ),
          null );
      }
    });

    zipfile.on( 'entry', function( entry ) {
      if ( slideMatch.test( entry.fileName ) ) {
        util.getTextFromZipFile( zipfile, entry, function( err2, text ) {
          var slide = +entry.fileName.replace( 'ppt/slides/slide', '' ).replace( '.xml', '' );
          slides.push({ slide: slide, text: text });
        });
      }
    });

    zipfile.on( 'error', function( err3 ) {
      cb( err3 );
    });
  });
}

example usage
```
n/a
```

module textract.rtf

function textract.rtf.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var escapedPath = filePath.replace( /\s/g, '\\ ' );

  // Going to output html from unrtf because unrtf does a great job of
  // going to html, but does a crap job of going to text. It leaves sections
  // out, strips apostrophes, leaves nasty quotes in for bullets and more
  // that I've likely not yet discovered.
  //
  // textract can go from html to text on its own, so let unrtf go to html
  // then extract the text from that
  //
  // Also do not have to worry about stripping comments from unrtf text
  // output since HTML comments are not included in output. Also, the
  // unrtf --quiet option doesn't work.
  exec( 'unrtf --html --nopict ' + escapedPath,
    function( error, stdout /* , stderr */ ) {
      var err;
      if ( error ) {
        err = new Error( 'unrtf read of file named [[ ' +
          path.basename( filePath ) + ' ]] failed: ' + error );
        cb( err, null );
      } else {
        htmlExtract.extractFromText( stdout.trim(), cb );
      }
    }
  );
}

example usage
```
n/a
```

function textract.rtf.test ( options, cb )

description and source-code

function testForBinary( options, cb ) {
  // just non-osx extractor
  if ( os.platform() === 'darwin' ) {
    cb( true );
    return;
  }

  exec( 'unrtf ' + __filename,
    function( error /* , stdout, stderr */ ) {
      var msg;
      if ( error !== null && error.message &&
          error.message.indexOf( 'not found' ) !== -1 ) {
        msg = 'INFO: \'unrtf\' does not appear to be installed, ' +
          'so textract will be unable to extract RTFs.';
        cb( false, msg );
      } else {
        cb( true );
      }
    }
  );
}

example usage

...
  extractor.types.forEach( function( type ) {
    failedExtractorTypes[type.toLowerCase()] = failedMessage;
  });
}
}

function testExtractor( extractor, options ) {
extractor.test( options, function( passedTest, failedMessage ) {
  satisfiedExtractors++;
  if ( passedTest ) {
    registerExtractor( extractor );
  } else {
    registerFailedExtractor( extractor, failedMessage );
  }
});
...

module textract.text

function textract.text.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  fs.readFile( filePath, function( error, data ) {
    var encoding, decoded;
    if ( error ) {
      cb( error, null );
      return;
    }
    try {
      encoding = jschardet.detect( data ).encoding.toLowerCase();
      decoded = iconv.decode( data, encoding );
    } catch ( e ) {
      cb( e );
      return;
    }
    cb( null, decoded );
  });
}

example usage
```
n/a
```

module textract.util

function textract.util.createExecOptions ( type, options )

description and source-code

function createExecOptions( type, options ) {
  var execOptions = {};
  if ( options[type] && options[type].exec ) {
    execOptions = options[type].exec;
  } else {
    if ( options.exec ) {
      execOptions = options.exec;
    }
  }
  return execOptions;
}

example usage

...

var exec = require( 'child_process' ).exec
, path = require( 'path' )
, util = require( '../util' )
;

function extractText( filePath, options, cb ) {
var execOptions = util.createExecOptions( 'dxf', options )
  , escapedPath = filePath.replace( /\s/g, '\\ ' )
  ;

exec( 'drawingtotext ' + escapedPath,
  execOptions,
  function( error, stdout, stderr ) {
    if ( stderr !== '' ) {
...

function textract.util.getTextFromZipFile ( zipfile, entry, cb )

description and source-code

function getTextFromZipFile( zipfile, entry, cb ) {
  zipfile.openReadStream( entry, function( err, readStream ) {
    var text = ''
      , error = ''
      ;

    if ( err ) {
      cb( err, null );
      return;
    }

    readStream.on( 'data', function( chunk ) {
      text += chunk;
    });
    readStream.on( 'end', function() {
      if ( error.length > 0 ) {
        cb( error, null );
      } else {
        cb( null, text );
      }
    });
    readStream.on( 'error', function( _err ) {
      error += _err;
    });
  });
}

example usage

...
        null );
    }
  }
};

zipfile.on( 'entry', function( entry ) {
  if ( includeRegex.test( entry.fileName ) && !excludeRegex.test( entry.fileName ) ) {
    util.getTextFromZipFile( zipfile, entry, function( err2, text ) {
      result += text + '\n';
      processEnd();
    });
  } else {
    processEnd();
  }
});
...

function textract.util.replaceBadCharacters ( text )

description and source-code

function replaceBadCharacters( text ) {
  var i, repl;
  for ( i = 0; i < rLen; i++ ) {
    repl = replacements[i];
    text = text.replace( repl[0], repl[1] );
  }
  return text;
}

example usage

...
}

// global, all file type, content cleansing
function cleanseText( options, cb ) {
  return function( error, text ) {
    if ( !error ) {
// clean up text
text = util.replaceBadCharacters( text );

if ( options.preserveLineBreaks ) {
  text = text.replace( WHITELIST_PRESERVE_LINEBREAKS, ' ' );
} else {
  text = text.replace( WHITELIST_STRIP_LINEBREAKS, ' ' );
}
...

function textract.util.runExecIntoFile ( label, filePath, options, execOptions, genCommand, cb )

description and source-code

function runExecIntoFile( label, filePath, options, execOptions, genCommand, cb ) {
  // escape the file paths
  var fileTempOutPath = path.join( outDir, path.basename( filePath, path.extname( filePath ) ) )
    , escapedFilePath = filePath.replace( /\s/g, '\\ ' )
    , escapedFileTempOutPath = fileTempOutPath.replace( /\s/g, '\\ ' )
    , cmd = genCommand( options, escapedFilePath, escapedFileTempOutPath )
    ;

  exec( cmd, execOptions,
    function( error /* , stdout, stderr */ ) {
      if ( error !== null ) {
        error = new Error( 'Error extracting [[ ' +
          path.basename( filePath ) + ' ]], exec error: ' + error.message );
        cb( error, null );
        return;
      }

      fs.exists( fileTempOutPath + '.txt', function( exists ) {
        if ( exists ) {
          fs.readFile( fileTempOutPath + '.txt', 'utf8', function( error2, text ) {
            if ( error2 ) {
              error2 = new Error( 'Error reading' + label +
                ' output at [[ ' + fileTempOutPath + ' ]], error: ' + error.message );
              cb( error2, null );
            } else {
              fs.unlink( fileTempOutPath + '.txt', function( error3 ) {
                if ( error3 ) {
                  error3 = new Error( 'Error, ' + label +
                    ' , cleaning up temp file [[ ' + fileTempOutPath +
                    ' ]], error: ' + error.message );
                  cb( error3, null );
                } else {
                  cb( null, text.toString() );
                }
              });
            }
          });
        } else {
          error = new Error( 'Error reading ' + label +
            ' output at [[ ' + fileTempOutPath + ' ]], file does not exist' );
          cb( error, null );
        }
      });
    }
  );
}

example usage

...
}
cmd += ' quiet';
return cmd;
}

function extractText( filePath, options, cb ) {
var execOptions = util.createExecOptions( 'images', options );
util.runExecIntoFile( 'tesseract', filePath, options,
  execOptions, tesseractExtractionCommand, cb );
}

function testForBinary( options, cb ) {
exec( 'tesseract',
  function( error, stdout, stderr ) {
    var msg;
...

function textract.util.unzipCheck ( type, cb )

description and source-code

function unzipCheck( type, cb ) {
  exec( 'unzip',
    function( error /* , stdout, stderr */ ) {
      if ( error ) {
        // eslint-disable-next-line no-console
        console.error( 'textract: \'unzip\' does not appear to be installed, ' +
          'so textract will be unable to extract ' + type + '.' );
      }
      cb( error === null );
    }
  );
}

example usage
```
n/a
```

function textract.util.yauzlError ( err, cb )

description and source-code

function yauzlError( err, cb ) {
  var msg = err.message;
  if ( msg === 'end of central directory record signature not found' ) {
    msg = 'File not correctly recognized as zip file, ' + msg;
  }
  cb( new Error( msg ), null );
}

example usage

...

  yauzl.open( filePath, function( err, zipfile ) {
var processEnd
  , processedEntries = 0
  ;

if ( err ) {
  util.yauzlError( err, cb );
  return;
}

processEnd = function() {
  var text;
  if ( zipfile.entryCount === ++processedEntries ) {
    if ( result.length ) {
...

module textract.xls

function textract.xls.extract ( filePath, options, cb )

description and source-code

function extractText( filePath, options, cb ) {
  var CSVs, wb, result, error;

  try {
    wb = J.readFile( filePath );
    CSVs = J.utils.to_csv( wb );
  } catch ( err ) {
    error = new Error( 'Could not extract ' + path.basename( filePath ) + ', ' + err );
    cb( error, null );
    return;
  }

  result = '';
  Object.keys( CSVs ).forEach( function( key ) {
    result += CSVs[key];
  });

  cb( null, result );
}

example usage
```
n/a
```

api documentation for textract (v2.1.2)

Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.

table of contents