adjective = function (s) { return new models.Adjective(s); }
n/a
adverb = function (s) { return new models.Adverb(s); }
n/a
date = function (s) { return new models.Date(s); }
...
}(Noun);
_Date.fn = _Date.prototype;
module.exports = _Date;
// let d = new _Date('June 4th 1993');
// console.log(d.date());
},{"../noun.js":80,"./parse_date.js":77}],75:[function(_dereq_,module,exports){
'use strict';
var months = _dereq_('../../../data/dates').months.concat(['march', 'may']); //(march and may are
ambiguous grammatically)
var month = '(' + months.join('|') + ')';
var day = '([0-9]{1,2})';
...
lexicon = function (obj) { obj = obj || {}; var lex = _dereq_('./lexicon.js'); Object.keys(obj).forEach(function (k) { lex[k] = obj[k]; }); return lex; }
...
Uses semver, with occasional releases to npm and bower.
here, 'Major' is considered an api change, while 'Minor' is considered a performance change.
### v.6
* 6.5.0 - builds now using browserify + derequire()
* 6.4.0 - re-written term-lumper logic
* 6.3.0 - new nlp.lexicon({word:'POS'}) flow
* 6.0.0 - be consistent with `text.normal()`, `term.all_forms()`, `text.word_count()`. `text.normal()` includes sentence-terminators
, like periods etc.
### v.5
* 5.2.0 - airport codes support, helper methods for specific POS
* 5.1.0 - newlines split sentences
* 5.0.0 - Text methods now return this, instead of array of sentences
...
noun = function (s) { return new models.Noun(s); }
...
> `npm install nlp_compromise`
> `<script src="https://unpkg.com/nlp_compromise@latest/builds/nlp_compromise.min.js"></script>
;`
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
...
organization = function (s) { return new models.Organization(s); }
n/a
person = function (s) { return new models.Person(s); }
...
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
...
place = function (s) { return new models.Place(s); }
...
### v.2
* v2.0.0 - Nov 2015 **(Breaking)**
* es6 classes, babel building
* better test coverage
* ngram uses term tokenization, so that 'Tony Hawk' us one term, and not two
* more organized pos rules
* Pos tagging is done implicitly now once nlp.Text is run
* Entity spotting is split into .people(), .place(), .organisations()
* unicode normalisation is killed
* opaque two-letter tags are gone
* plugin support
* passive tense detection
* lexicon can be augmented third-party
* date parsing results are different
...
plugin = function (obj) { obj = obj || {}; // if obj is a function, pass it an instance of this nlp library if (fns.isFunction(obj)) { // run it in this current context obj = obj.call(this, this); } //apply each plugin to the correct prototypes Object.keys(obj).forEach(function (k) { Object.keys(obj[k]).forEach(function (method) { models[k].prototype[method] = obj[k][method]; }); }); }
...
// { count: 1411, text: 'rachel' },
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
...
question = function (s) { return new models.Question(s); }
n/a
sentence = function (s, options) { return new models.Sentence(s, options); }
...
[](https://www.codacy.com/app/spencerkelly86
/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
**nlp_compromise** does NLP in the browser.
```javascript
nlp.sentence('She sells seashells').to_past().text()
// 'She sold seashells'
```
### Yup,
* **<150k** js file
* **86%** on the [Penn treebank](http://www.cis.upenn.edu/~treebank/)
* keypress speed, constant-time.
* caniuse, uhuh. **IE9+**
...
statement = function (s) { return new models.Statement(s); }
...
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
// pluperfect: 'had spoken',
// future_perfect: 'will have spoken'
// }
nlp.statement('She sells seashells').negate().text()
// 'She doesn't sell seashells'
nlp.sentence('I fed the dog').replace('the [Noun]', 'the cat').text()
// 'I fed the cat'
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
...
term = function (s) { return new models.Term(s); }
...
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
//[ [ 'hous', 'ton' ], [ 'tex', 'as' ] ]
...
text = function (s, options) { return new models.Text(s, options); }
...
### v.4
* 4.12.0 - more-sensible responses for invalid, non-string inputs
* 4.11.0 - 14 PRs, with fixes for currencies, pluralization, conjugation
* 4.10.0 - Value.to_text() new method, fix "Posessive" POS typo
* 4.9.0 - return of the text.spot() method (Re:#107)
* 4.8.0 - more aggressive lumping of dates, like 'last week of february'
* 4.7.0 - whitespace reproduction in .text() methods
* 4.6.0 - move negate from sentence to verb & statement
* 4.2.0 - rename 'implicit' to 'expansion' for smarter contractions
* 4.1.3 - added readable-compression to adj, verbs (121kb -> 117kb)
* 4.1.0 - hyphenated words are normalized into spaces
* 4.0.0 - grammar-aware match & replace functions
### v.3 **(Breaking)**
...
value = function (s) { return new models.Value(s); }
...
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
// { count: 1922, text: 'joey' },
// { count: 1876, text: 'god' },
// { count: 1411, text: 'rachel' },
...
verb = function (s) { return new models.Verb(s); }
...
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
...
endsWith = function (str, suffix) { //if suffix is regex if (suffix && suffix instanceof RegExp) { if (str.match(suffix)) { return true; } } //if suffix is a string if (str && suffix && str.indexOf(suffix, str.length - suffix.length) !== -1) { return true; } return false; }
...
//leading ^ flag
if (fns.startsWith(term, '^')) {
term = term.substr(1, term.length);
signals.leading = true;
}
//trailing $ flag means ending
if (fns.endsWith(term, '$')) {
term = term.replace(/\$$/, '');
signals.trailing = true;
}
//optional flag
if (fns.endsWith(term, '?')) {
term = term.replace(/\?$/, '');
signals.optional = true;
...
expand_prefixes = function (list, obj) { let keys = Object.keys(obj); let l = keys.length; for (let i = 0; i < l; i++) { const arr = obj[keys[i]].split(','); for (let i2 = 0; i2 < arr.length; i2++) { list.push(keys[i] + arr[i2]); } } return list; }
...
var prefix_compressed = {
mar: 'go,isol,itza,sha',
tam: 'i,ika,my',
be: 'atriz,cky,tty,ttye',
pe: 'arl,ggy,nny',
pa: 'ige,m,tty'
};
arr = fns.expand_prefixes(arr, prefix_compressed);
module.exports = arr;
},{"../../fns":23}],15:[function(_dereq_,module,exports){
'use strict';
var fns = _dereq_('../../fns');
...
expand_suffixes = function (list, obj) { let keys = Object.keys(obj); let l = keys.length; for (let i = 0; i < l; i++) { const arr = obj[keys[i]].split(','); for (let i2 = 0; i2 < arr.length; i2++) { list.push(arr[i2] + keys[i]); } } return list; }
...
ul: 'fo,gainf,helpf,painf'
};
var arr = ['ablaze', 'above', 'adult', 'ahead', 'aloof', 'arab', '
;asleep', 'average', 'awake', 'backwards', 'bad', 'blank', 'bogus'
;, 'bottom', 'brisk', 'cagey', 'chief', 'civil', 'common', 'complex
', 'cozy', 'crisp', 'deaf', 'devout', 'difficult', 'downtown',
x27;due', 'dumb', 'eerie', 'evil', 'excess', 'extra', 'fake',
x27;far', 'faux', 'fierce ', 'fit', 'foreign', 'fun', 'good',
x27;goofy', 'gratis', 'grey', 'groovy', 'gross', 'half', 'huge',
x27;humdrum', 'inside', 'kaput',
// 'lax', -> airports
'left', 'less', 'level', 'lewd', 'magenta', 'makeshift', 'mammoth
', 'medium', 'moot', 'naive', 'nearby', 'next', 'nonstop', '
;north', 'offbeat', 'ok', 'outside', 'overwrought', 'premium', 'pricey
', 'pro', 'quaint', 'random', 'rear', 'rebel', 'ritzy', 'rough
', 'savvy', 'sexy', 'shut', 'shy', 'sleek', 'smug', 'solemn
', 'south', 'stark', 'superb', 'taboo', 'teenage', 'top', '
;tranquil', 'ultra', 'understood', 'unfair', 'unknown', 'upbeat', 'upstairs
', 'vanilla', 'various', 'widespread', 'woozy', 'wrong', 'final'
;, 'true', 'modern', 'notable'];
module.exports = fns.expand_suffixes(arr, compressed);
},{"../fns":23}],3:[function(_dereq_,module,exports){
'use strict';
//these are adjectives that can become comparative + superlative with out "most/more"
//its a whitelist for conjugation
//this data is shared between comparative/superlative methods
...
extend = function (a, b) { const keys = Object.keys(b); for(let i = 0; i < keys.length; i++) { a[keys[i]] = b[keys[i]]; } return a; }
...
'eid al-fitr': 'july 25',
'diwali': 'october 21',
'ramadan': 'may 27'
}
};
//select current year
var thisYear = new Date().getFullYear();
var holidays = fns.extend(annual, astronomical[thisYear] || {});
module.exports = holidays;
},{"../fns":23}],9:[function(_dereq_,module,exports){
'use strict';
//these are common person titles used in the lexicon and sentence segmentation methods
...
flatten = function (arr) { let all = []; arr.forEach(function(a) { all = all.concat(a); }); return all; }
...
const naiive_split = function(text) {
//first, split by newline
let splits = text.split(/(\n+)/);
//split by period, question-mark, and exclamation-mark
splits = splits.map(function(str) {
return str.split(/(\S.+?[.!?])(?=\s+|$)/g);
});
return fns.flatten(splits);
};
const sentence_parser = function(text) {
const sentences = [];
//first do a greedy-split..
let chunks = [];
//ensure it 'smells like' a sentence
...
isFunction = function (obj) { return Object.prototype.toString.call(obj) === '[object Function]'; }
...
};
function NLP() {
this.plugin = function (obj) {
obj = obj || {};
// if obj is a function, pass it an instance of this nlp library
if (fns.isFunction(obj)) {
// run it in this current context
obj = obj.call(this, this);
}
//apply each plugin to the correct prototypes
Object.keys(obj).forEach(function (k) {
Object.keys(obj[k]).forEach(function (method) {
models[k].prototype[method] = obj[k][method];
...
pluck = function (arr, str) { arr = arr || []; return arr.map(function(o) { return o[str]; }); }
...
//add all names
addArr(Object.keys(firstNames.all), 'Person');
//overwrite to MalePerson, FemalePerson
addArr(firstNames.male, 'MalePerson');
addArr(firstNames.female, 'FemalePerson');
//add irregular nouns
var irregNouns = _dereq_('./data/irregular_nouns.js');
addArr(fns.pluck(irregNouns, 0), 'Noun');
addArr(fns.pluck(irregNouns, 1), 'Plural');
addObj(_dereq_('./data/misc.js'));
addObj(_dereq_('./data/multiples.js'));
addObj(_dereq_('./data/phrasal_verbs.js'));
//add named holidays, like 'easter'
Object.keys(_dereq_('./data/holidays.js')).forEach(function (k) {
...
reverseObj = function (obj) { return Object.keys(obj).reduce(function(h, k) { h[obj[k]] = k; return h; }, {}); }
...
'everybody': 'nobody',
'someone': 'no one',
'somebody': 'nobody',
// everything:"nothing",
'always': 'never'
};
//create corrollary
var logical_affirm = fns.reverseObj(logical_negate);
//these are not symmetic
logical_affirm['nobody'] = 'somebody';
var negate = function negate(s) {
var _loop = function _loop(i) {
var t = s.terms[i];
//these verbs are red-herrings
...
startsWith = function (str, prefix) { if (str && str.length && str.substr(0, 1) === prefix) { return true; } return false; }
...
var parse_term = function parse_term(term, i) {
term = term || '';
term = term.trim();
var signals = {};
//order matters!
//leading ^ flag
if (fns.startsWith(term, '^')) {
term = term.substr(1, term.length);
signals.leading = true;
}
//trailing $ flag means ending
if (fns.endsWith(term, '$')) {
term = term.replace(/\$$/, '');
signals.trailing = true;
...
titlecase = function (str) { if (!str) { return ''; } str = str.toLowerCase(); return str.charAt(0).toUpperCase() + str.slice(1); }
...
return true;
}
}
return false;
}
//support [Pos] syntax
if (signals.pos) {
var pos = fns.titlecase(reg.term);
if (term.pos[pos]) {
return true;
}
return false;
}
//support ~alias~ syntax
if (signals.alias) {
...
toObj = function (arr) { return arr.reduce(function(h, a) { h[a] = true; return h; }, {}); }
...
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw
new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype =
Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true,
configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__
= superClass; }
var Noun = _dereq_('../noun.js');
var places = _dereq_('../../../data/places.js');
var fns = _dereq_('../../../fns.js');
//make cities/countries easy to lookup
var countries = fns.toObj(places.countries);
var cities = fns.toObj(places.cities);
var Place = function (_Noun) {
_inherits(Place, _Noun);
function Place(str, tag) {
_classCallCheck(this, Place);
...
adjective = function (s) { return new models.Adjective(s); }
n/a
adverb = function (s) { return new models.Adverb(s); }
n/a
date = function (s) { return new models.Date(s); }
...
}(Noun);
_Date.fn = _Date.prototype;
module.exports = _Date;
// let d = new _Date('June 4th 1993');
// console.log(d.date());
},{"../noun.js":80,"./parse_date.js":77}],75:[function(_dereq_,module,exports){
'use strict';
var months = _dereq_('../../../data/dates').months.concat(['march', 'may']); //(march and may are
ambiguous grammatically)
var month = '(' + months.join('|') + ')';
var day = '([0-9]{1,2})';
...
lexicon = function (obj) { obj = obj || {}; let lex = require('./lexicon.js'); Object.keys(obj).forEach(function(k) { lex[k] = obj[k]; }); return lex; }
...
Uses semver, with occasional releases to npm and bower.
here, 'Major' is considered an api change, while 'Minor' is considered a performance change.
### v.6
* 6.5.0 - builds now using browserify + derequire()
* 6.4.0 - re-written term-lumper logic
* 6.3.0 - new nlp.lexicon({word:'POS'}) flow
* 6.0.0 - be consistent with `text.normal()`, `term.all_forms()`, `text.word_count()`. `text.normal()` includes sentence-terminators
, like periods etc.
### v.5
* 5.2.0 - airport codes support, helper methods for specific POS
* 5.1.0 - newlines split sentences
* 5.0.0 - Text methods now return this, instead of array of sentences
...
noun = function (s) { return new models.Noun(s); }
...
> `npm install nlp_compromise`
> `<script src="https://unpkg.com/nlp_compromise@latest/builds/nlp_compromise.min.js"></script>
;`
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
...
organization = function (s) { return new models.Organization(s); }
n/a
person = function (s) { return new models.Person(s); }
...
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
...
place = function (s) { return new models.Place(s); }
...
### v.2
* v2.0.0 - Nov 2015 **(Breaking)**
* es6 classes, babel building
* better test coverage
* ngram uses term tokenization, so that 'Tony Hawk' us one term, and not two
* more organized pos rules
* Pos tagging is done implicitly now once nlp.Text is run
* Entity spotting is split into .people(), .place(), .organisations()
* unicode normalisation is killed
* opaque two-letter tags are gone
* plugin support
* passive tense detection
* lexicon can be augmented third-party
* date parsing results are different
...
plugin = function (obj) { obj = obj || {}; // if obj is a function, pass it an instance of this nlp library if (fns.isFunction(obj)) { // run it in this current context obj = obj.call(this, this); } //apply each plugin to the correct prototypes Object.keys(obj).forEach(function(k) { Object.keys(obj[k]).forEach(function(method) { models[k].prototype[method] = obj[k][method]; }); }); }
...
// { count: 1411, text: 'rachel' },
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
...
question = function (s) { return new models.Question(s); }
n/a
sentence = function (s, options) { return new models.Sentence(s, options); }
...
[](https://www.codacy.com/app/spencerkelly86
/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
**nlp_compromise** does NLP in the browser.
```javascript
nlp.sentence('She sells seashells').to_past().text()
// 'She sold seashells'
```
### Yup,
* **<150k** js file
* **86%** on the [Penn treebank](http://www.cis.upenn.edu/~treebank/)
* keypress speed, constant-time.
* caniuse, uhuh. **IE9+**
...
statement = function (s) { return new models.Statement(s); }
...
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
// pluperfect: 'had spoken',
// future_perfect: 'will have spoken'
// }
nlp.statement('She sells seashells').negate().text()
// 'She doesn't sell seashells'
nlp.sentence('I fed the dog').replace('the [Noun]', 'the cat').text()
// 'I fed the cat'
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
...
term = function (s) { return new models.Term(s); }
...
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
//[ [ 'hous', 'ton' ], [ 'tex', 'as' ] ]
...
text = function (s, options) { return new models.Text(s, options); }
...
### v.4
* 4.12.0 - more-sensible responses for invalid, non-string inputs
* 4.11.0 - 14 PRs, with fixes for currencies, pluralization, conjugation
* 4.10.0 - Value.to_text() new method, fix "Posessive" POS typo
* 4.9.0 - return of the text.spot() method (Re:#107)
* 4.8.0 - more aggressive lumping of dates, like 'last week of february'
* 4.7.0 - whitespace reproduction in .text() methods
* 4.6.0 - move negate from sentence to verb & statement
* 4.2.0 - rename 'implicit' to 'expansion' for smarter contractions
* 4.1.3 - added readable-compression to adj, verbs (121kb -> 117kb)
* 4.1.0 - hyphenated words are normalized into spaces
* 4.0.0 - grammar-aware match & replace functions
### v.3 **(Breaking)**
...
value = function (s) { return new models.Value(s); }
...
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
// { count: 1922, text: 'joey' },
// { count: 1876, text: 'god' },
// { count: 1411, text: 'rachel' },
...
verb = function (s) { return new models.Verb(s); }
...
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
...
findAll = function (terms, regs, options) { let result = []; regs = syntax_parse(regs || ''); // one-off lookup for ^ // '^' token is 'must start at 0' if (regs[0].signals.leading) { let match = tryFromHere(terms, regs, options) || []; if (match) { return [new Result(match)]; } else { return null; } } //repeating version starting from each term let len = terms.length; // - regs.length + 1; for(let i = 0; i < len; i++) { let termSlice = terms.slice(i, terms.length); let match = tryFromHere(termSlice, regs, options); if (match) { result.push(new Result(match)); } } //if we have no results, return null if (result.length === 0) { return null; } return result; }
...
// a regex-like lookup for a list of terms.
// returns [] of matches in a 'Terms' class
}, {
key: 'match',
value: function match(match_str, options) {
var regs = tokenize_match(match_str);
return _match.findAll(this.terms, regs, options);
}
//returns a transformed sentence
}, {
key: 'replace',
value: function replace(match_str, replacement, options) {
var regs = tokenize_match(match_str);
...
replaceAll = function (terms, regs, replacement, options) { let list = findAll(terms, regs, options); if (list) { list.forEach((t) => { t.replace(replacement, options); }); } }
...
//returns a transformed sentence
}, {
key: 'replace',
value: function replace(match_str, replacement, options) {
var regs = tokenize_match(match_str);
replacement = tokenize_match(replacement);
_match.replaceAll(this.terms, regs, replacement, options);
return this;
}
//the ending punctuation
}, {
key: 'terminator',
...
class Adjective extends Term { constructor(str, tag) { super(str); this.tag = tag; if (tag) { this.pos[tag] = true; } this.pos['Adjective'] = true; } to_comparative() { return to_comparative(this.normal); } to_superlative() { return to_superlative(this.normal); } to_noun() { return adj_to_noun(this.normal); } to_adverb() { return adj_to_adv(this.normal); } conjugate() { return { comparative: to_comparative(this.normal), superlative: to_superlative(this.normal), adverb: adj_to_adv(this.normal), noun: adj_to_noun(this.normal) }; } all_forms() { let forms = this.conjugate(); forms['normal'] = this.normal; return forms; } }
...
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
...
class Adverb extends Term { constructor(str, tag) { super(str); this.tag = tag; this.pos['Adverb'] = true; } to_adjective() { return to_adjective(this.normal); } all_forms() { return { adjective: this.to_adjective(), normal: this.normal } } }
...
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
...
class _Date extends Noun { constructor(str, tag) { super(str); this.tag = tag; this.pos['Date'] = true; this.data = parse_date(this.text) || {}; } //can we make it a js Date object? is_date() { let o = this.data; if (o.month === null || o.day === null || o.year === null) { return false; } return true; } date() { if (this.is_date() === false) { return null; } let d = new Date(); if (this.data.year) { d.setYear(this.data.year); } if (this.data.month !== null) { d.setMonth(this.data.month); } if (this.data.day !== null) { d.setDate(this.data.day); } return d; } }
...
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
this.text = function (s, options) {
return new models.Text(s, options);
...
class Noun extends Term { constructor(str, tag) { super(str); this.tag = tag; this.pos['Noun'] = true; if (tag) { this.pos[tag] = true; } if (this.is_plural()) { this.pos['Plural'] = true; } } //noun methods article() { //if it's a person, it's he/she, not a/an if (this.pos['Person']) { return this.pronoun(); } //groups of people are 'they' if (this.pos['Organization']) { return 'they'; } return article(this.text); } root() { return this.singularize(); } pronoun() { if (this.is_organization() || this.is_place() || this.is_value()) { return 'it'; } return pronoun(this.normal); } is_plural() { if (this.pos['Date'] || this.pos['Possessive']) { return false; } else if (this.has_abbreviation()) { //contractions & possessives are not plural return false; } else { return is_plural(this.normal); } } is_uncountable() { return is_uncountable(this.strip_apostrophe()); } pluralize() { return pluralize(this.strip_apostrophe()); } singularize() { return singularize(this.strip_apostrophe()); } //sub-classes is_person() { //don't overwrite dates, etc if (this.pos['Date']) { return false; } return is_person(this.strip_apostrophe()); } is_organization() { //don't overwrite urls if (this.pos['Url']) { return false; } return is_organization(this.strip_apostrophe(), this.text); } is_date() { return is_date(this.strip_apostrophe()); } is_value() { //don't overwrite dates, etc if (this.pos['Date'] || this.pos['HashTag']) { return false; } return is_value(this.strip_apostrophe()); } is_place() { return is_place(this.strip_apostrophe()); } all_forms() { return { 'singular': this.singularize(), 'plural': this.pluralize(), 'normal': this.normal }; } }
...
return lex;
};
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
...
class Organization extends Noun { constructor(str, tag) { super(str); this.tag = tag; this.pos['Organization'] = true; } }
...
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
this.text = function (s, options) {
return new models.Text(s, options);
};
this.sentence = function (s, options) {
return new models.Sentence(s, options);
...
class Person extends Noun { constructor(str, tag) { super(str); this.tag = tag; this.pos['Person'] = true; this.honourific = null; this.firstName = null; this.middleName = null; this.lastName = null; this.parse(); if (this.isPronoun()) { this.pos['Pronoun'] = true; } if (tag) { this.pos[tag] = true; } } isPronoun() { let whitelist = { 'he': true, 'she': true, 'i': true, 'you': true, }; return whitelist[this.normal]; } //proper normalised name without the cruft root() { if (this.isPronoun()) { return this.normal; } let str = ''; if (this.firstName) { str = this.firstName.toLowerCase(); } if (this.middleName) { str += ' ' + this.middleName.toLowerCase(); } if (this.lastName) { str += ' ' + this.lastName.toLowerCase(); } return str.trim() || this.normal; } //turn a multi-word string into [first, middle, last, honourific] parse() { let o = parse_name(this.normal, this.text.trim()); this.honourific = o.honourific; this.firstName = title_case(o.firstName); this.middleName = title_case(o.middleName); this.lastName = lastname_case(o.lastName); } gender() { //if we already know it, from the lexicon if (this.pos.FemalePerson) { return 'Female'; } if (this.pos.MalePerson) { return 'Male'; } return guess_gender(this.normal); } pronoun() { const pronouns = { Male: 'he', Female: 'she', }; let gender = this.gender(); //return 'singular they' if no gender is found return pronouns[gender] || 'they'; } }
...
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
...
class Place extends Noun { constructor(str, tag) { super(str); this.tag = tag; this.pos['Place'] = true; this.pos[tag] = true; this.title = null; this.city = null; this.region = null; //'2nd-tier' (state/province/county/whatever) this.country = null; this.parse(); } root() { return this.title || this.normal; } parse() { //parse a comma-described place like "toronto, ontario" let terms = this.strip_apostrophe().split(' '); this.title = terms[0]; for(let i = 1; i < terms.length; i++) { let t = terms[i]; if (cities[t]) { this.city = fns.titlecase(t); } else if (countries[t]) { this.country = fns.titlecase(t); } else if (this.city !== null) { //if we already got the city.. this.region = fns.titlecase(t); } else { //it's part of the title this.title += ' ' + t; } } } }
...
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
...
class Term { constructor(str, tag, whitespace) { //don't pass non-strings through here any further.. if (typeof str === 'number') { str = '' + str; } else if (typeof str !== 'string') { str = ''; } str = (str).toString(); //trailing & preceding whitespace this.whitespace = whitespace || {}; this.whitespace.preceding = this.whitespace.preceding || ''; this.whitespace.trailing = this.whitespace.trailing || ''; //set .text this.text = str; //the normalised working-version of the word this.normal = ''; //if it's a contraction or slang, the implication, or 'hidden word' this.expansion = ''; //set .normal this.rebuild(); //the reasoning behind it's part-of-speech this.reasoning = []; //these are orphaned POS that have no methods this.pos = {}; this.tag = tag || '?'; if (tag) { this.pos[tag] = true; } } //when the text changes, rebuild derivative fields rebuild() { this.text = this.text || ''; this.text = this.text.trim(); this.normal = ''; this.normalize(); this.expansion = implied(this.normal); } changeTo(str) { this.text = str; this.rebuild(); } //a regex-like string search match(match_str, options) { let reg = syntax_parse([match_str]); return match_term(this, reg[0], options); } //the 'root' singular/infinitive/whatever. // method is overloaded by each pos type root() { return this.strip_apostrophe(); } //strip apostrophe s strip_apostrophe() { if (this.normal.match(/[a-z]'[a-z][a-z]?$/)) { let split = this.normal.split(/'/); if (split[1] === 's') { return split[0]; } } return this.normal; } has_comma() { if (this.text.match(/,$/)) { return true; } return false; } has_abbreviation() { // "spencer's" if (this.text.match(/[a-z]'[a-z][a-z]?$/)) { return true; } // "flanders' house" if (this.text.match(/[a-z]s'$/)) { return true; } return false; } is_capital() { if (this.text.match(/[A-Z][a-z]/)) { return true; } return false; } //utility method to avoid lumping words with non-word stuff is_word() { if (this.text.match(/^\[.*?\]\??$/)) { return false; } if (!this.text.match(/[a-z|0-9]/i)) { return false; } if (this.text.match(/[\|#\<\>]/i)) { return false; } return true; } //FBI or F.B.I. is_acronym() { return is_acronym(this.text); } //working word normalize() { let str = this.text || ''; str = str.toLowerCase(); //strip grammatical punctuation str = str.replace(/[,\.!:;\?\(\)^$]/g, ''); //hashtags, atmentions str = str.replace(/^[#@]/, ''); //convert hyphenations to a multiple-word term str = str.replace(/([a-z])\-([a-z])/g, '$1 $2'); // coerce single curly quotes str = str.replace(/[\u2018\u2019\u201A\u201B\u2032\u2035]+/g, '\''); // coerce double curly quotes str = str.replace(/[\u201C\u201D\u201E\u201F\u2033\u2036]+/g, ''); //remove quotations + scare-quotes str = str.replace(/^'/g, ''); str = str.replace(/'$/g, ''); str = str.replace(/"/g, ''); if (!str.match(/[a-z0-9]/i)) { return ''; } this.normal = str; return this.normal; } all_forms() { return {}; } }
...
lex[k] = obj[k];
});
return lex;
};
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
...
class Value extends Noun { constructor(str, tag) { super(str); this.tag = tag; this.pos['Value'] = true; this.number = null; this.unit = null; this.unit_name = null; this.measurement = null; this.of_what = ''; // this.text = str; // this.normal = str; if (this.is_ordinal()) { this.pos['Ordinal'] = true; } this.parse(); } //test for nearly-numbers, like phonenumbers, or whatever is_number(s) { //phone numbers, etc if (s.match(/[:@]/)) { return false; } //if there's a number, then something, then a number if (s.match(/[0-9][^(0-9|\/),\.][0-9]/)) { if (s.match(/((?:[0-9]|\.)+) ((?:[0-9]|\.)+)\/((?:[0-9]|\.)+)/)) { // I'm sure there is a better regexpxs return true; } return false; } return true; }; is_number_word(w) { let number_words = { minus: true, negative: true, point: true, half: true, quarter: true, }; if (w.match(/[0-9]/) || number_words[w]) { return true; } else if (nums.ones[w] || nums.teens[w] || nums.tens[w] || nums.multiples[w]) { return true; } else if (nums.ordinal_ones[w] || nums.ordinal_teens[w] || nums.ordinal_tens[w] || nums.ordinal_multiples[w]) { return true; } return false; }; is_ordinal() { //1st if (this.normal.match(/^[0-9]+(rd|st|nd|th)$/)) { return true; } //first, second... for(let i = 0; i < ordinals.length; i++) { if (fns.endsWith(this.normal, ordinals[i])) { return true; } } return false; } //turn an integer like 22 into '22nd' to_ordinal() { let num = this.number; //fail fast if (!num && num !== 0) { return ''; } //teens are all 'th' if (num >= 10 && num <= 20) { return '' + num + 'th'; } //treat it as a string.. num = '' + num; //fail safely if (!num.match(/[0-9]$/)) { return num; } if (fns.endsWith(num, '1')) { return num + 'st'; } if (fns.endsWith(num, '2')) { return num + 'nd'; } if (fns.endsWith(num, '3')) { return num + 'rd'; } return num + 'th'; } //overwrite term.normal? // normal() { // let str = '' + (this.number || ''); // if (this.is_ordinal()) { // str = this.to_ordinal(str); // } // if (this.unit) { // str += ' ' + this.unit; // } // return str; // } root() { let str = this.number; if (this.unit) { str += ' ' + this.unit; } return str; } is_unit() { //if it's a known unit if (units[this.unit]) { return true; } //currencies are derived-through POS if (this.pos['Currency']) { return true; } let s = this.unit.toLowerCase(); if (nums.prefixes[s]) { return true; } //try singular version s = this.unit.replace(/s$/, ''); if (units[s]) { this.unit = this.unit.replace(/s$/, ''); return true; } s = this.unit.replace(/es$/, ''); if (units[s]) { this.unit = this.unit.replace(/es$/, ''); return true; } return false; } parse() { if (!this.is_number(this.text)) { return; } let words = this.text.toLowerCase().split(/[ ]/); //split at '-' only for numbers like twenty-two, sixty-seven, etc. //so that 'twelve six-gram pieces' returns 12 for number, not null //however, still returns null for 'three sevel-eleven stores' for (let i = 0; i < words.length; i++) { let w = words[i]; if ((w.indexOf('-') === w.lastIndexOf('-')) && w.indexOf('-') > -1) { let halves = w.split(/[-]/); if (this.is_number_word(halves[0]) && this.is_number_word(halves[1])) { words[i] = halves[0]; words.splice(i + 1, 0, halves[1]); } } } let numbers = ''; let raw_units = ''; //seperate number-words from unit-words for (let i = 0; i < words.length; i++) { let w = words[i]; if (this.is_number_word(w)) { numbers += ' ' + w; ...
...
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
...
class Verb extends Term { constructor(str, tag) { super(str); this.tag = tag; this.pos['Verb'] = true; //if we've been told which if (tag) { this.pos[tag] = true; } } //'root' for a verb means infinitive root() { return this.conjugate().infinitive; } //retrieve a specific form conjugation() { //check cached conjugations let conjugations = this.conjugate(); let keys = Object.keys(conjugations); for(let i = 0; i < keys.length; i++) { if (conjugations[keys[i]] === this.normal) { return verbTags[keys[i]]; } } //try to guess return verbTags[predict_form(this.normal)]; } tense() { //map conjugation onto past/present/future let tenses = { infinitive: 'present', gerund: 'present', actor: 'present', present: 'present', past: 'past', future: 'future', perfect: 'past', pluperfect: 'past', future_perfect: 'future' }; let c = this.conjugation(); return tenses[c] || 'present'; } conjugate() { return conjugate(this.normal); } to_past() { let tense = 'past'; let conjugations = this.conjugate(this.normal); this.tag = verbTags[tense]; this.changeTo(conjugations[tense]); return conjugations[tense]; } to_present() { let tense = 'present'; let conjugations = this.conjugate(this.normal); this.tag = verbTags[tense]; this.changeTo(conjugations[tense]); return conjugations[tense]; } to_future() { let tense = 'future'; let conjugations = this.conjugate(this.normal); this.tag = verbTags[tense]; this.changeTo(conjugations[tense]); return conjugations[tense]; } to_adjective() { return to_adjective(this.conjugate().infinitive); } //is this verb negative already? isNegative() { const str = this.normal; //yep, pretty simple if (str.match(/(n't|\bnot\b)/)) { return true; } return false; } //turn 'walked' to "didn't walk" negate() { this.changeTo(negate(this)); return this; } all_forms() { let forms = this.conjugate(); forms['negated'] = negate(this); forms['normal'] = this.normal; return forms; } }
...
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
...