adjective = function (s) {
return new models.Adjective(s);
}n/a
adverb = function (s) {
return new models.Adverb(s);
}n/a
date = function (s) {
return new models.Date(s);
}...
}(Noun);
_Date.fn = _Date.prototype;
module.exports = _Date;
// let d = new _Date('June 4th 1993');
// console.log(d.date());
},{"../noun.js":80,"./parse_date.js":77}],75:[function(_dereq_,module,exports){
'use strict';
var months = _dereq_('../../../data/dates').months.concat(['march', 'may']); //(march and may are
ambiguous grammatically)
var month = '(' + months.join('|') + ')';
var day = '([0-9]{1,2})';
...lexicon = function (obj) {
obj = obj || {};
var lex = _dereq_('./lexicon.js');
Object.keys(obj).forEach(function (k) {
lex[k] = obj[k];
});
return lex;
}...
Uses semver, with occasional releases to npm and bower.
here, 'Major' is considered an api change, while 'Minor' is considered a performance change.
### v.6
* 6.5.0 - builds now using browserify + derequire()
* 6.4.0 - re-written term-lumper logic
* 6.3.0 - new nlp.lexicon({word:'POS'}) flow
* 6.0.0 - be consistent with `text.normal()`, `term.all_forms()`, `text.word_count()`. `text.normal()` includes sentence-terminators
, like periods etc.
### v.5
* 5.2.0 - airport codes support, helper methods for specific POS
* 5.1.0 - newlines split sentences
* 5.0.0 - Text methods now return this, instead of array of sentences
...noun = function (s) {
return new models.Noun(s);
}...
> `npm install nlp_compromise`
> `<script src="https://unpkg.com/nlp_compromise@latest/builds/nlp_compromise.min.js"></script>
;`
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
...organization = function (s) {
return new models.Organization(s);
}n/a
person = function (s) {
return new models.Person(s);
}...
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
...place = function (s) {
return new models.Place(s);
}...
### v.2
* v2.0.0 - Nov 2015 **(Breaking)**
* es6 classes, babel building
* better test coverage
* ngram uses term tokenization, so that 'Tony Hawk' us one term, and not two
* more organized pos rules
* Pos tagging is done implicitly now once nlp.Text is run
* Entity spotting is split into .people(), .place(), .organisations()
* unicode normalisation is killed
* opaque two-letter tags are gone
* plugin support
* passive tense detection
* lexicon can be augmented third-party
* date parsing results are different
...plugin = function (obj) {
obj = obj || {};
// if obj is a function, pass it an instance of this nlp library
if (fns.isFunction(obj)) {
// run it in this current context
obj = obj.call(this, this);
}
//apply each plugin to the correct prototypes
Object.keys(obj).forEach(function (k) {
Object.keys(obj[k]).forEach(function (method) {
models[k].prototype[method] = obj[k][method];
});
});
}...
// { count: 1411, text: 'rachel' },
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
...question = function (s) {
return new models.Question(s);
}n/a
sentence = function (s, options) {
return new models.Sentence(s, options);
}...
[](https://www.codacy.com/app/spencerkelly86
/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
**nlp_compromise** does NLP in the browser.
```javascript
nlp.sentence('She sells seashells').to_past().text()
// 'She sold seashells'
```
### Yup,
* **<150k** js file
* **86%** on the [Penn treebank](http://www.cis.upenn.edu/~treebank/)
* keypress speed, constant-time.
* caniuse, uhuh. **IE9+**
...statement = function (s) {
return new models.Statement(s);
}...
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
// pluperfect: 'had spoken',
// future_perfect: 'will have spoken'
// }
nlp.statement('She sells seashells').negate().text()
// 'She doesn't sell seashells'
nlp.sentence('I fed the dog').replace('the [Noun]', 'the cat').text()
// 'I fed the cat'
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
...term = function (s) {
return new models.Term(s);
}...
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
//[ [ 'hous', 'ton' ], [ 'tex', 'as' ] ]
...text = function (s, options) {
return new models.Text(s, options);
}...
### v.4
* 4.12.0 - more-sensible responses for invalid, non-string inputs
* 4.11.0 - 14 PRs, with fixes for currencies, pluralization, conjugation
* 4.10.0 - Value.to_text() new method, fix "Posessive" POS typo
* 4.9.0 - return of the text.spot() method (Re:#107)
* 4.8.0 - more aggressive lumping of dates, like 'last week of february'
* 4.7.0 - whitespace reproduction in .text() methods
* 4.6.0 - move negate from sentence to verb & statement
* 4.2.0 - rename 'implicit' to 'expansion' for smarter contractions
* 4.1.3 - added readable-compression to adj, verbs (121kb -> 117kb)
* 4.1.0 - hyphenated words are normalized into spaces
* 4.0.0 - grammar-aware match & replace functions
### v.3 **(Breaking)**
...value = function (s) {
return new models.Value(s);
}...
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
// { count: 1922, text: 'joey' },
// { count: 1876, text: 'god' },
// { count: 1411, text: 'rachel' },
...verb = function (s) {
return new models.Verb(s);
}...
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
...endsWith = function (str, suffix) {
//if suffix is regex
if (suffix && suffix instanceof RegExp) {
if (str.match(suffix)) {
return true;
}
}
//if suffix is a string
if (str && suffix && str.indexOf(suffix, str.length - suffix.length) !== -1) {
return true;
}
return false;
}...
//leading ^ flag
if (fns.startsWith(term, '^')) {
term = term.substr(1, term.length);
signals.leading = true;
}
//trailing $ flag means ending
if (fns.endsWith(term, '$')) {
term = term.replace(/\$$/, '');
signals.trailing = true;
}
//optional flag
if (fns.endsWith(term, '?')) {
term = term.replace(/\?$/, '');
signals.optional = true;
...expand_prefixes = function (list, obj) {
let keys = Object.keys(obj);
let l = keys.length;
for (let i = 0; i < l; i++) {
const arr = obj[keys[i]].split(',');
for (let i2 = 0; i2 < arr.length; i2++) {
list.push(keys[i] + arr[i2]);
}
}
return list;
}...
var prefix_compressed = {
mar: 'go,isol,itza,sha',
tam: 'i,ika,my',
be: 'atriz,cky,tty,ttye',
pe: 'arl,ggy,nny',
pa: 'ige,m,tty'
};
arr = fns.expand_prefixes(arr, prefix_compressed);
module.exports = arr;
},{"../../fns":23}],15:[function(_dereq_,module,exports){
'use strict';
var fns = _dereq_('../../fns');
...expand_suffixes = function (list, obj) {
let keys = Object.keys(obj);
let l = keys.length;
for (let i = 0; i < l; i++) {
const arr = obj[keys[i]].split(',');
for (let i2 = 0; i2 < arr.length; i2++) {
list.push(arr[i2] + keys[i]);
}
}
return list;
}...
ul: 'fo,gainf,helpf,painf'
};
var arr = ['ablaze', 'above', 'adult', 'ahead', 'aloof', 'arab', '
;asleep', 'average', 'awake', 'backwards', 'bad', 'blank', 'bogus'
;, 'bottom', 'brisk', 'cagey', 'chief', 'civil', 'common', 'complex
', 'cozy', 'crisp', 'deaf', 'devout', 'difficult', 'downtown',
x27;due', 'dumb', 'eerie', 'evil', 'excess', 'extra', 'fake',
x27;far', 'faux', 'fierce ', 'fit', 'foreign', 'fun', 'good',
x27;goofy', 'gratis', 'grey', 'groovy', 'gross', 'half', 'huge',
x27;humdrum', 'inside', 'kaput',
// 'lax', -> airports
'left', 'less', 'level', 'lewd', 'magenta', 'makeshift', 'mammoth
', 'medium', 'moot', 'naive', 'nearby', 'next', 'nonstop', '
;north', 'offbeat', 'ok', 'outside', 'overwrought', 'premium', 'pricey
', 'pro', 'quaint', 'random', 'rear', 'rebel', 'ritzy', 'rough
', 'savvy', 'sexy', 'shut', 'shy', 'sleek', 'smug', 'solemn
', 'south', 'stark', 'superb', 'taboo', 'teenage', 'top', '
;tranquil', 'ultra', 'understood', 'unfair', 'unknown', 'upbeat', 'upstairs
', 'vanilla', 'various', 'widespread', 'woozy', 'wrong', 'final'
;, 'true', 'modern', 'notable'];
module.exports = fns.expand_suffixes(arr, compressed);
},{"../fns":23}],3:[function(_dereq_,module,exports){
'use strict';
//these are adjectives that can become comparative + superlative with out "most/more"
//its a whitelist for conjugation
//this data is shared between comparative/superlative methods
...extend = function (a, b) {
const keys = Object.keys(b);
for(let i = 0; i < keys.length; i++) {
a[keys[i]] = b[keys[i]];
}
return a;
}...
'eid al-fitr': 'july 25',
'diwali': 'october 21',
'ramadan': 'may 27'
}
};
//select current year
var thisYear = new Date().getFullYear();
var holidays = fns.extend(annual, astronomical[thisYear] || {});
module.exports = holidays;
},{"../fns":23}],9:[function(_dereq_,module,exports){
'use strict';
//these are common person titles used in the lexicon and sentence segmentation methods
...flatten = function (arr) {
let all = [];
arr.forEach(function(a) {
all = all.concat(a);
});
return all;
}...
const naiive_split = function(text) {
//first, split by newline
let splits = text.split(/(\n+)/);
//split by period, question-mark, and exclamation-mark
splits = splits.map(function(str) {
return str.split(/(\S.+?[.!?])(?=\s+|$)/g);
});
return fns.flatten(splits);
};
const sentence_parser = function(text) {
const sentences = [];
//first do a greedy-split..
let chunks = [];
//ensure it 'smells like' a sentence
...isFunction = function (obj) {
return Object.prototype.toString.call(obj) === '[object Function]';
}...
};
function NLP() {
this.plugin = function (obj) {
obj = obj || {};
// if obj is a function, pass it an instance of this nlp library
if (fns.isFunction(obj)) {
// run it in this current context
obj = obj.call(this, this);
}
//apply each plugin to the correct prototypes
Object.keys(obj).forEach(function (k) {
Object.keys(obj[k]).forEach(function (method) {
models[k].prototype[method] = obj[k][method];
...pluck = function (arr, str) {
arr = arr || [];
return arr.map(function(o) {
return o[str];
});
}...
//add all names
addArr(Object.keys(firstNames.all), 'Person');
//overwrite to MalePerson, FemalePerson
addArr(firstNames.male, 'MalePerson');
addArr(firstNames.female, 'FemalePerson');
//add irregular nouns
var irregNouns = _dereq_('./data/irregular_nouns.js');
addArr(fns.pluck(irregNouns, 0), 'Noun');
addArr(fns.pluck(irregNouns, 1), 'Plural');
addObj(_dereq_('./data/misc.js'));
addObj(_dereq_('./data/multiples.js'));
addObj(_dereq_('./data/phrasal_verbs.js'));
//add named holidays, like 'easter'
Object.keys(_dereq_('./data/holidays.js')).forEach(function (k) {
...reverseObj = function (obj) {
return Object.keys(obj).reduce(function(h, k) {
h[obj[k]] = k;
return h;
}, {});
}...
'everybody': 'nobody',
'someone': 'no one',
'somebody': 'nobody',
// everything:"nothing",
'always': 'never'
};
//create corrollary
var logical_affirm = fns.reverseObj(logical_negate);
//these are not symmetic
logical_affirm['nobody'] = 'somebody';
var negate = function negate(s) {
var _loop = function _loop(i) {
var t = s.terms[i];
//these verbs are red-herrings
...startsWith = function (str, prefix) {
if (str && str.length && str.substr(0, 1) === prefix) {
return true;
}
return false;
}...
var parse_term = function parse_term(term, i) {
term = term || '';
term = term.trim();
var signals = {};
//order matters!
//leading ^ flag
if (fns.startsWith(term, '^')) {
term = term.substr(1, term.length);
signals.leading = true;
}
//trailing $ flag means ending
if (fns.endsWith(term, '$')) {
term = term.replace(/\$$/, '');
signals.trailing = true;
...titlecase = function (str) {
if (!str) {
return '';
}
str = str.toLowerCase();
return str.charAt(0).toUpperCase() + str.slice(1);
}...
return true;
}
}
return false;
}
//support [Pos] syntax
if (signals.pos) {
var pos = fns.titlecase(reg.term);
if (term.pos[pos]) {
return true;
}
return false;
}
//support ~alias~ syntax
if (signals.alias) {
...toObj = function (arr) {
return arr.reduce(function(h, a) {
h[a] = true;
return h;
}, {});
}...
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw
new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype =
Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true,
configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__
= superClass; }
var Noun = _dereq_('../noun.js');
var places = _dereq_('../../../data/places.js');
var fns = _dereq_('../../../fns.js');
//make cities/countries easy to lookup
var countries = fns.toObj(places.countries);
var cities = fns.toObj(places.cities);
var Place = function (_Noun) {
_inherits(Place, _Noun);
function Place(str, tag) {
_classCallCheck(this, Place);
...adjective = function (s) {
return new models.Adjective(s);
}n/a
adverb = function (s) {
return new models.Adverb(s);
}n/a
date = function (s) {
return new models.Date(s);
}...
}(Noun);
_Date.fn = _Date.prototype;
module.exports = _Date;
// let d = new _Date('June 4th 1993');
// console.log(d.date());
},{"../noun.js":80,"./parse_date.js":77}],75:[function(_dereq_,module,exports){
'use strict';
var months = _dereq_('../../../data/dates').months.concat(['march', 'may']); //(march and may are
ambiguous grammatically)
var month = '(' + months.join('|') + ')';
var day = '([0-9]{1,2})';
...lexicon = function (obj) {
obj = obj || {};
let lex = require('./lexicon.js');
Object.keys(obj).forEach(function(k) {
lex[k] = obj[k];
});
return lex;
}...
Uses semver, with occasional releases to npm and bower.
here, 'Major' is considered an api change, while 'Minor' is considered a performance change.
### v.6
* 6.5.0 - builds now using browserify + derequire()
* 6.4.0 - re-written term-lumper logic
* 6.3.0 - new nlp.lexicon({word:'POS'}) flow
* 6.0.0 - be consistent with `text.normal()`, `term.all_forms()`, `text.word_count()`. `text.normal()` includes sentence-terminators
, like periods etc.
### v.5
* 5.2.0 - airport codes support, helper methods for specific POS
* 5.1.0 - newlines split sentences
* 5.0.0 - Text methods now return this, instead of array of sentences
...noun = function (s) {
return new models.Noun(s);
}...
> `npm install nlp_compromise`
> `<script src="https://unpkg.com/nlp_compromise@latest/builds/nlp_compromise.min.js"></script>
;`
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
...organization = function (s) {
return new models.Organization(s);
}n/a
person = function (s) {
return new models.Person(s);
}...
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
...place = function (s) {
return new models.Place(s);
}...
### v.2
* v2.0.0 - Nov 2015 **(Breaking)**
* es6 classes, babel building
* better test coverage
* ngram uses term tokenization, so that 'Tony Hawk' us one term, and not two
* more organized pos rules
* Pos tagging is done implicitly now once nlp.Text is run
* Entity spotting is split into .people(), .place(), .organisations()
* unicode normalisation is killed
* opaque two-letter tags are gone
* plugin support
* passive tense detection
* lexicon can be augmented third-party
* date parsing results are different
...plugin = function (obj) {
obj = obj || {};
// if obj is a function, pass it an instance of this nlp library
if (fns.isFunction(obj)) {
// run it in this current context
obj = obj.call(this, this);
}
//apply each plugin to the correct prototypes
Object.keys(obj).forEach(function(k) {
Object.keys(obj[k]).forEach(function(method) {
models[k].prototype[method] = obj[k][method];
});
});
}...
// { count: 1411, text: 'rachel' },
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
...question = function (s) {
return new models.Question(s);
}n/a
sentence = function (s, options) {
return new models.Sentence(s, options);
}...
[](https://www.codacy.com/app/spencerkelly86
/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
[](https://www.npmjs.com/package/nlp_compromise)
**nlp_compromise** does NLP in the browser.
```javascript
nlp.sentence('She sells seashells').to_past().text()
// 'She sold seashells'
```
### Yup,
* **<150k** js file
* **86%** on the [Penn treebank](http://www.cis.upenn.edu/~treebank/)
* keypress speed, constant-time.
* caniuse, uhuh. **IE9+**
...statement = function (s) {
return new models.Statement(s);
}...
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
// pluperfect: 'had spoken',
// future_perfect: 'will have spoken'
// }
nlp.statement('She sells seashells').negate().text()
// 'She doesn't sell seashells'
nlp.sentence('I fed the dog').replace('the [Noun]', 'the cat').text()
// 'I fed the cat'
nlp.text('Tony Hawk did a kickflip').people();
// [ Person { text: 'Tony Hawk' ..} ]
...term = function (s) {
return new models.Term(s);
}...
// ....
```
#Plugin/Mixins
we've also got a modest, though ambitious [plugin ecosystem](https://github.com/nlp-compromise/nlp_compromise/wiki/Plugins):
```javascript
//US-UK localization
nlp.plugin(require('nlp-locale'))
nlp.term('favourite').toAmerican()
// 'favorite'
//syllable hyphenization
nlp.plugin(require('nlp-syllables'));
var t2 = nlp.term('houston texas');
t2.syllables()
//[ [ 'hous', 'ton' ], [ 'tex', 'as' ] ]
...text = function (s, options) {
return new models.Text(s, options);
}...
### v.4
* 4.12.0 - more-sensible responses for invalid, non-string inputs
* 4.11.0 - 14 PRs, with fixes for currencies, pluralization, conjugation
* 4.10.0 - Value.to_text() new method, fix "Posessive" POS typo
* 4.9.0 - return of the text.spot() method (Re:#107)
* 4.8.0 - more aggressive lumping of dates, like 'last week of february'
* 4.7.0 - whitespace reproduction in .text() methods
* 4.6.0 - move negate from sentence to verb & statement
* 4.2.0 - rename 'implicit' to 'expansion' for smarter contractions
* 4.1.3 - added readable-compression to adj, verbs (121kb -> 117kb)
* 4.1.0 - hyphenated words are normalized into spaces
* 4.0.0 - grammar-aware match & replace functions
### v.3 **(Breaking)**
...value = function (s) {
return new models.Value(s);
}...
nlp.noun('vacuum').article();
// 'a'
nlp.person('Tony Hawk').pronoun();
// 'he'
nlp.value('five hundred and sixty').number;
// 560
nlp.text(require('nlp-corpus').text.friends()).topics()//11 seasons of friends
// [ { count: 2523, text: 'ross' },
// { count: 1922, text: 'joey' },
// { count: 1876, text: 'god' },
// { count: 1411, text: 'rachel' },
...verb = function (s) {
return new models.Verb(s);
}...
```javascript
let nlp = require('nlp_compromise'); // or nlp = window.nlp_compromise
nlp.noun('dinosaur').pluralize();
// 'dinosaurs'
nlp.verb('speak').conjugate();
// { past: 'spoke',
// infinitive: 'speak',
// gerund: 'speaking',
// actor: 'speaker',
// present: 'speaks',
// future: 'will speak',
// perfect: 'have spoken',
...findAll = function (terms, regs, options) {
let result = [];
regs = syntax_parse(regs || '');
// one-off lookup for ^
// '^' token is 'must start at 0'
if (regs[0].signals.leading) {
let match = tryFromHere(terms, regs, options) || [];
if (match) {
return [new Result(match)];
} else {
return null;
}
}
//repeating version starting from each term
let len = terms.length; // - regs.length + 1;
for(let i = 0; i < len; i++) {
let termSlice = terms.slice(i, terms.length);
let match = tryFromHere(termSlice, regs, options);
if (match) {
result.push(new Result(match));
}
}
//if we have no results, return null
if (result.length === 0) {
return null;
}
return result;
}...
// a regex-like lookup for a list of terms.
// returns [] of matches in a 'Terms' class
}, {
key: 'match',
value: function match(match_str, options) {
var regs = tokenize_match(match_str);
return _match.findAll(this.terms, regs, options);
}
//returns a transformed sentence
}, {
key: 'replace',
value: function replace(match_str, replacement, options) {
var regs = tokenize_match(match_str);
...replaceAll = function (terms, regs, replacement, options) {
let list = findAll(terms, regs, options);
if (list) {
list.forEach((t) => {
t.replace(replacement, options);
});
}
}...
//returns a transformed sentence
}, {
key: 'replace',
value: function replace(match_str, replacement, options) {
var regs = tokenize_match(match_str);
replacement = tokenize_match(replacement);
_match.replaceAll(this.terms, regs, replacement, options);
return this;
}
//the ending punctuation
}, {
key: 'terminator',
...class Adjective extends Term {
constructor(str, tag) {
super(str);
this.tag = tag;
if (tag) {
this.pos[tag] = true;
}
this.pos['Adjective'] = true;
}
to_comparative() {
return to_comparative(this.normal);
}
to_superlative() {
return to_superlative(this.normal);
}
to_noun() {
return adj_to_noun(this.normal);
}
to_adverb() {
return adj_to_adv(this.normal);
}
conjugate() {
return {
comparative: to_comparative(this.normal),
superlative: to_superlative(this.normal),
adverb: adj_to_adv(this.normal),
noun: adj_to_noun(this.normal)
};
}
all_forms() {
let forms = this.conjugate();
forms['normal'] = this.normal;
return forms;
}
}...
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
...class Adverb extends Term {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Adverb'] = true;
}
to_adjective() {
return to_adjective(this.normal);
}
all_forms() {
return {
adjective: this.to_adjective(),
normal: this.normal
}
}
}...
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
...class _Date extends Noun {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Date'] = true;
this.data = parse_date(this.text) || {};
}
//can we make it a js Date object?
is_date() {
let o = this.data;
if (o.month === null || o.day === null || o.year === null) {
return false;
}
return true;
}
date() {
if (this.is_date() === false) {
return null;
}
let d = new Date();
if (this.data.year) {
d.setYear(this.data.year);
}
if (this.data.month !== null) {
d.setMonth(this.data.month);
}
if (this.data.day !== null) {
d.setDate(this.data.day);
}
return d;
}
}...
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
this.text = function (s, options) {
return new models.Text(s, options);
...class Noun extends Term {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Noun'] = true;
if (tag) {
this.pos[tag] = true;
}
if (this.is_plural()) {
this.pos['Plural'] = true;
}
}
//noun methods
article() {
//if it's a person, it's he/she, not a/an
if (this.pos['Person']) {
return this.pronoun();
}
//groups of people are 'they'
if (this.pos['Organization']) {
return 'they';
}
return article(this.text);
}
root() {
return this.singularize();
}
pronoun() {
if (this.is_organization() || this.is_place() || this.is_value()) {
return 'it';
}
return pronoun(this.normal);
}
is_plural() {
if (this.pos['Date'] || this.pos['Possessive']) {
return false;
} else if (this.has_abbreviation()) { //contractions & possessives are not plural
return false;
} else {
return is_plural(this.normal);
}
}
is_uncountable() {
return is_uncountable(this.strip_apostrophe());
}
pluralize() {
return pluralize(this.strip_apostrophe());
}
singularize() {
return singularize(this.strip_apostrophe());
}
//sub-classes
is_person() {
//don't overwrite dates, etc
if (this.pos['Date']) {
return false;
}
return is_person(this.strip_apostrophe());
}
is_organization() {
//don't overwrite urls
if (this.pos['Url']) {
return false;
}
return is_organization(this.strip_apostrophe(), this.text);
}
is_date() {
return is_date(this.strip_apostrophe());
}
is_value() {
//don't overwrite dates, etc
if (this.pos['Date'] || this.pos['HashTag']) {
return false;
}
return is_value(this.strip_apostrophe());
}
is_place() {
return is_place(this.strip_apostrophe());
}
all_forms() {
return {
'singular': this.singularize(),
'plural': this.pluralize(),
'normal': this.normal
};
}
}...
return lex;
};
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
...class Organization extends Noun {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Organization'] = true;
}
}...
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
this.text = function (s, options) {
return new models.Text(s, options);
};
this.sentence = function (s, options) {
return new models.Sentence(s, options);
...class Person extends Noun {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Person'] = true;
this.honourific = null;
this.firstName = null;
this.middleName = null;
this.lastName = null;
this.parse();
if (this.isPronoun()) {
this.pos['Pronoun'] = true;
}
if (tag) {
this.pos[tag] = true;
}
}
isPronoun() {
let whitelist = {
'he': true,
'she': true,
'i': true,
'you': true,
};
return whitelist[this.normal];
}
//proper normalised name without the cruft
root() {
if (this.isPronoun()) {
return this.normal;
}
let str = '';
if (this.firstName) {
str = this.firstName.toLowerCase();
}
if (this.middleName) {
str += ' ' + this.middleName.toLowerCase();
}
if (this.lastName) {
str += ' ' + this.lastName.toLowerCase();
}
return str.trim() || this.normal;
}
//turn a multi-word string into [first, middle, last, honourific]
parse() {
let o = parse_name(this.normal, this.text.trim());
this.honourific = o.honourific;
this.firstName = title_case(o.firstName);
this.middleName = title_case(o.middleName);
this.lastName = lastname_case(o.lastName);
}
gender() {
//if we already know it, from the lexicon
if (this.pos.FemalePerson) {
return 'Female';
}
if (this.pos.MalePerson) {
return 'Male';
}
return guess_gender(this.normal);
}
pronoun() {
const pronouns = {
Male: 'he',
Female: 'she',
};
let gender = this.gender();
//return 'singular they' if no gender is found
return pronouns[gender] || 'they';
}
}...
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
...class Place extends Noun {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Place'] = true;
this.pos[tag] = true;
this.title = null;
this.city = null;
this.region = null; //'2nd-tier' (state/province/county/whatever)
this.country = null;
this.parse();
}
root() {
return this.title || this.normal;
}
parse() {
//parse a comma-described place like "toronto, ontario"
let terms = this.strip_apostrophe().split(' ');
this.title = terms[0];
for(let i = 1; i < terms.length; i++) {
let t = terms[i];
if (cities[t]) {
this.city = fns.titlecase(t);
} else if (countries[t]) {
this.country = fns.titlecase(t);
} else if (this.city !== null) { //if we already got the city..
this.region = fns.titlecase(t);
} else { //it's part of the title
this.title += ' ' + t;
}
}
}
}...
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
this.date = function (s) {
return new models.Date(s);
};
this.organization = function (s) {
return new models.Organization(s);
};
...class Term {
constructor(str, tag, whitespace) {
//don't pass non-strings through here any further..
if (typeof str === 'number') {
str = '' + str;
} else if (typeof str !== 'string') {
str = '';
}
str = (str).toString();
//trailing & preceding whitespace
this.whitespace = whitespace || {};
this.whitespace.preceding = this.whitespace.preceding || '';
this.whitespace.trailing = this.whitespace.trailing || '';
//set .text
this.text = str;
//the normalised working-version of the word
this.normal = '';
//if it's a contraction or slang, the implication, or 'hidden word'
this.expansion = '';
//set .normal
this.rebuild();
//the reasoning behind it's part-of-speech
this.reasoning = [];
//these are orphaned POS that have no methods
this.pos = {};
this.tag = tag || '?';
if (tag) {
this.pos[tag] = true;
}
}
//when the text changes, rebuild derivative fields
rebuild() {
this.text = this.text || '';
this.text = this.text.trim();
this.normal = '';
this.normalize();
this.expansion = implied(this.normal);
}
changeTo(str) {
this.text = str;
this.rebuild();
}
//a regex-like string search
match(match_str, options) {
let reg = syntax_parse([match_str]);
return match_term(this, reg[0], options);
}
//the 'root' singular/infinitive/whatever.
// method is overloaded by each pos type
root() {
return this.strip_apostrophe();
}
//strip apostrophe s
strip_apostrophe() {
if (this.normal.match(/[a-z]'[a-z][a-z]?$/)) {
let split = this.normal.split(/'/);
if (split[1] === 's') {
return split[0];
}
}
return this.normal;
}
has_comma() {
if (this.text.match(/,$/)) {
return true;
}
return false;
}
has_abbreviation() {
// "spencer's"
if (this.text.match(/[a-z]'[a-z][a-z]?$/)) {
return true;
}
// "flanders' house"
if (this.text.match(/[a-z]s'$/)) {
return true;
}
return false;
}
is_capital() {
if (this.text.match(/[A-Z][a-z]/)) {
return true;
}
return false;
}
//utility method to avoid lumping words with non-word stuff
is_word() {
if (this.text.match(/^\[.*?\]\??$/)) {
return false;
}
if (!this.text.match(/[a-z|0-9]/i)) {
return false;
}
if (this.text.match(/[\|#\<\>]/i)) {
return false;
}
return true;
}
//FBI or F.B.I.
is_acronym() {
return is_acronym(this.text);
}
//working word
normalize() {
let str = this.text || '';
str = str.toLowerCase();
//strip grammatical punctuation
str = str.replace(/[,\.!:;\?\(\)^$]/g, '');
//hashtags, atmentions
str = str.replace(/^[#@]/, '');
//convert hyphenations to a multiple-word term
str = str.replace(/([a-z])\-([a-z])/g, '$1 $2');
// coerce single curly quotes
str = str.replace(/[\u2018\u2019\u201A\u201B\u2032\u2035]+/g, '\'');
// coerce double curly quotes
str = str.replace(/[\u201C\u201D\u201E\u201F\u2033\u2036]+/g, '');
//remove quotations + scare-quotes
str = str.replace(/^'/g, '');
str = str.replace(/'$/g, '');
str = str.replace(/"/g, '');
if (!str.match(/[a-z0-9]/i)) {
return '';
}
this.normal = str;
return this.normal;
}
all_forms() {
return {};
}
}...
lex[k] = obj[k];
});
return lex;
};
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
...class Value extends Noun {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Value'] = true;
this.number = null;
this.unit = null;
this.unit_name = null;
this.measurement = null;
this.of_what = '';
// this.text = str;
// this.normal = str;
if (this.is_ordinal()) {
this.pos['Ordinal'] = true;
}
this.parse();
}
//test for nearly-numbers, like phonenumbers, or whatever
is_number(s) {
//phone numbers, etc
if (s.match(/[:@]/)) {
return false;
}
//if there's a number, then something, then a number
if (s.match(/[0-9][^(0-9|\/),\.][0-9]/)) {
if (s.match(/((?:[0-9]|\.)+) ((?:[0-9]|\.)+)\/((?:[0-9]|\.)+)/)) { // I'm sure there is a better regexpxs
return true;
}
return false;
}
return true;
};
is_number_word(w) {
let number_words = {
minus: true,
negative: true,
point: true,
half: true,
quarter: true,
};
if (w.match(/[0-9]/) || number_words[w]) {
return true;
} else if (nums.ones[w] || nums.teens[w] || nums.tens[w] || nums.multiples[w]) {
return true;
} else if (nums.ordinal_ones[w] || nums.ordinal_teens[w] || nums.ordinal_tens[w] || nums.ordinal_multiples[w]) {
return true;
}
return false;
};
is_ordinal() {
//1st
if (this.normal.match(/^[0-9]+(rd|st|nd|th)$/)) {
return true;
}
//first, second...
for(let i = 0; i < ordinals.length; i++) {
if (fns.endsWith(this.normal, ordinals[i])) {
return true;
}
}
return false;
}
//turn an integer like 22 into '22nd'
to_ordinal() {
let num = this.number;
//fail fast
if (!num && num !== 0) {
return '';
}
//teens are all 'th'
if (num >= 10 && num <= 20) {
return '' + num + 'th';
}
//treat it as a string..
num = '' + num;
//fail safely
if (!num.match(/[0-9]$/)) {
return num;
}
if (fns.endsWith(num, '1')) {
return num + 'st';
}
if (fns.endsWith(num, '2')) {
return num + 'nd';
}
if (fns.endsWith(num, '3')) {
return num + 'rd';
}
return num + 'th';
}
//overwrite term.normal?
// normal() {
// let str = '' + (this.number || '');
// if (this.is_ordinal()) {
// str = this.to_ordinal(str);
// }
// if (this.unit) {
// str += ' ' + this.unit;
// }
// return str;
// }
root() {
let str = this.number;
if (this.unit) {
str += ' ' + this.unit;
}
return str;
}
is_unit() {
//if it's a known unit
if (units[this.unit]) {
return true;
}
//currencies are derived-through POS
if (this.pos['Currency']) {
return true;
}
let s = this.unit.toLowerCase();
if (nums.prefixes[s]) {
return true;
}
//try singular version
s = this.unit.replace(/s$/, '');
if (units[s]) {
this.unit = this.unit.replace(/s$/, '');
return true;
}
s = this.unit.replace(/es$/, '');
if (units[s]) {
this.unit = this.unit.replace(/es$/, '');
return true;
}
return false;
}
parse() {
if (!this.is_number(this.text)) {
return;
}
let words = this.text.toLowerCase().split(/[ ]/);
//split at '-' only for numbers like twenty-two, sixty-seven, etc.
//so that 'twelve six-gram pieces' returns 12 for number, not null
//however, still returns null for 'three sevel-eleven stores'
for (let i = 0; i < words.length; i++) {
let w = words[i];
if ((w.indexOf('-') === w.lastIndexOf('-')) && w.indexOf('-') > -1) {
let halves = w.split(/[-]/);
if (this.is_number_word(halves[0]) && this.is_number_word(halves[1])) {
words[i] = halves[0];
words.splice(i + 1, 0, halves[1]);
}
}
}
let numbers = '';
let raw_units = '';
//seperate number-words from unit-words
for (let i = 0; i < words.length; i++) {
let w = words[i];
if (this.is_number_word(w)) {
numbers += ' ' + w; ......
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
this.value = function (s) {
return new models.Value(s);
};
this.person = function (s) {
return new models.Person(s);
};
this.place = function (s) {
return new models.Place(s);
};
...class Verb extends Term {
constructor(str, tag) {
super(str);
this.tag = tag;
this.pos['Verb'] = true;
//if we've been told which
if (tag) {
this.pos[tag] = true;
}
}
//'root' for a verb means infinitive
root() {
return this.conjugate().infinitive;
}
//retrieve a specific form
conjugation() {
//check cached conjugations
let conjugations = this.conjugate();
let keys = Object.keys(conjugations);
for(let i = 0; i < keys.length; i++) {
if (conjugations[keys[i]] === this.normal) {
return verbTags[keys[i]];
}
}
//try to guess
return verbTags[predict_form(this.normal)];
}
tense() {
//map conjugation onto past/present/future
let tenses = {
infinitive: 'present',
gerund: 'present',
actor: 'present',
present: 'present',
past: 'past',
future: 'future',
perfect: 'past',
pluperfect: 'past',
future_perfect: 'future'
};
let c = this.conjugation();
return tenses[c] || 'present';
}
conjugate() {
return conjugate(this.normal);
}
to_past() {
let tense = 'past';
let conjugations = this.conjugate(this.normal);
this.tag = verbTags[tense];
this.changeTo(conjugations[tense]);
return conjugations[tense];
}
to_present() {
let tense = 'present';
let conjugations = this.conjugate(this.normal);
this.tag = verbTags[tense];
this.changeTo(conjugations[tense]);
return conjugations[tense];
}
to_future() {
let tense = 'future';
let conjugations = this.conjugate(this.normal);
this.tag = verbTags[tense];
this.changeTo(conjugations[tense]);
return conjugations[tense];
}
to_adjective() {
return to_adjective(this.conjugate().infinitive);
}
//is this verb negative already?
isNegative() {
const str = this.normal;
//yep, pretty simple
if (str.match(/(n't|\bnot\b)/)) {
return true;
}
return false;
}
//turn 'walked' to "didn't walk"
negate() {
this.changeTo(negate(this));
return this;
}
all_forms() {
let forms = this.conjugate();
forms['negated'] = negate(this);
forms['normal'] = this.normal;
return forms;
}
}...
this.term = function (s) {
return new models.Term(s);
};
this.noun = function (s) {
return new models.Noun(s);
};
this.verb = function (s) {
return new models.Verb(s);
};
this.adjective = function (s) {
return new models.Adjective(s);
};
this.adverb = function (s) {
return new models.Adverb(s);
};
...