function convert(html) {
var jsonResponse = [];
var $ = cheerio.load(html);
$('table').each(function(i, table) {
var tableAsJson = [];
// Get column headings
// @fixme Doesn't support vertical column headings.
// @todo Try to support badly formated tables.
var columnHeadings = [];
$(table).find('tr').each(function(i, row) {
$(row).find('th').each(function(j, cell) {
columnHeadings[j] = $(cell).text().trim();
});
});
// Fetch each row
$(table).find('tr').each(function(i, row) {
var rowAsJson = {};
$(row).find('td').each(function(j, cell) {
if (columnHeadings[j]) {
rowAsJson[ columnHeadings[j] ] = $(cell).text().trim();
} else {
rowAsJson[j] = $(cell).text().trim();
}
});
// Skip blank rows
if (JSON.stringify(rowAsJson) != '{}')
tableAsJson.push(rowAsJson);
});
// Add the table to the response
if (tableAsJson.length != 0)
jsonResponse.push(tableAsJson);
});
return jsonResponse;
}...
You might want to use it with a module like 'cheerio' if you want to parse specific tables identified by id or class (
i.e. select them with cheerio and pass the HTML of them as a string).
## Example usage
``` javascript
// Convert an HTML blob into an array of all the tables on the page
var tabletojson = require('tabletojson');
var tablesAsJson = tabletojson.convert(html);
var firstTableAsJson = tablesAsJson[0];
var secondTableAsJson = tablesAsJson[1];
...
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using a callback
...convertUrl = function (url, callback) {
if (typeof(callback) === "function") {
// Use a callback (if passed)
fetchUrl(url)
.then(function(html) {
callback.call( this, convert(html) );
});
} else {
// If no callback, return a promise
return fetchUrl(url)
.then(function(html) {
return convert(html);
});
}
}...
...
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using a callback
var tabletojson = require('tabletojson');
var url = 'https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes';
tabletojson.convertUrl(url, function(tablesAsJson) {
var listofSovereignStates = tablesAsJson[0];
});
```
``` javascript
// Fetch a URL and parse all it's tables into JSON, using promises
var tabletojson = require('tabletojson');
...