Given a stream of words (or tokens) computes a stream of ngrams. By default it will produce bigrams (pairs of words). This can be changed by setting the n
option.
Copy import {of} from 'rxjs' ;
import {take} from 'rxjs/operators' ;
import {ngram} from '@bottlenose/rxnlp' ;
const words = [ 'yo' , 'ho' , 'yo' , 'ho' , 'a' , 'pirate\'s' , 'life' ];
const word$ = of ( ... words);
const bigram$ = word$ .pipe (
ngram () ,
take ( 3 )
);
bigram$ .subscribe ( console .log);
// ['yo', 'ho']
// ['ho', 'yo']
// ['yo', 'ho']
Copy import {of} from 'rxjs' ;
import {take} from 'rxjs/operators' ;
import {ngram} from '@bottlenose/rxnlp' ;
const words = [ 'yo' , 'ho' , 'yo' , 'ho' , 'a' , 'pirate\'s' , 'life' ];
const word$ = of ( ... words);
const trigram$ = word$ .pipe (
ngram ({n : 3 }) ,
take ( 3 )
);
trigram$ .subscribe ( console .log);
// ['yo', 'ho', 'yo']
// ['ho', 'yo', 'ho']
// ['yo', 'ho', 'a']
Copy import {of} from 'rxjs' ;
import {distinct} from 'rxjs/operators' ;
import {ngram} from '@bottlenose/rxnlp' ;
const words = [ 'yo' , 'ho' , 'yo' , 'ho' , 'a' , 'pirate\'s' , 'life' ];
const word$ = of ( ... words);
const bigram$ = word$ .pipe (
ngram () ,
distinct () , // note: this will consume memory since it must caches values to check uniqueness
);
bigram$ .subscribe ( console .log);
// ['yo', 'ho']
// ['ho', 'yo']
// ['ho', 'a']
// ['a', 'pirate\'s'],
// ['pirate\'s', 'life']
Copy stdev(options = {
[n=2]
})