function convert(html) { var jsonResponse = []; var $ = cheerio.load(html); $('table').each(function(i, table) { var tableAsJson = []; // Get column headings // @fixme Doesn't support vertical column headings. // @todo Try to support badly formated tables. var columnHeadings = []; $(table).find('tr').each(function(i, row) { $(row).find('th').each(function(j, cell) { columnHeadings[j] = $(cell).text().trim(); }); }); // Fetch each row $(table).find('tr').each(function(i, row) { var rowAsJson = {}; $(row).find('td').each(function(j, cell) { if (columnHeadings[j]) { rowAsJson[ columnHeadings[j] ] = $(cell).text().trim(); } else { rowAsJson[j] = $(cell).text().trim(); } }); // Skip blank rows if (JSON.stringify(rowAsJson) != '{}') tableAsJson.push(rowAsJson); }); // Add the table to the response if (tableAsJson.length != 0) jsonResponse.push(tableAsJson); }); return jsonResponse; }
...
You might want to use it with a module like 'cheerio' if you want to parse specific tables identified by id or class (
i.e. select them with cheerio and pass the HTML of them as a string).
## Example usage
``` javascript
// Convert an HTML blob into an array of all the tables on the page
var tabletojson = require('tabletojson');
var tablesAsJson = tabletojson.convert(html);
var firstTableAsJson = tablesAsJson[0];
var secondTableAsJson = tablesAsJson[1];
...
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using a callback
...
convertUrl = function (url, callback) { if (typeof(callback) === "function") { // Use a callback (if passed) fetchUrl(url) .then(function(html) { callback.call( this, convert(html) ); }); } else { // If no callback, return a promise return fetchUrl(url) .then(function(html) { return convert(html); }); } }
...
...
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using a callback
var tabletojson = require('tabletojson');
var url = 'https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes';
tabletojson.convertUrl(url, function(tablesAsJson) {
var listofSovereignStates = tablesAsJson[0];
});
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using promises
var tabletojson = require('tabletojson');
...