-
Notifications
You must be signed in to change notification settings - Fork 33
/
generator.js
136 lines (127 loc) · 3.05 KB
/
generator.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// LICENSE_CODE ZON
'use strict'; /*jslint node:true*/
const random_js = require('random-js');
const _ = require('underscore');
const CHARS = 28, TC_SIZE = 100, MAX_DEPTH = 8;
module.exports = {
init,
generate,
sequence,
TC_SIZE,
MAX_DEPTH,
};
let models, dictionary;
function c2i(c)
{
if (!c)
return 0;
if (c=="'")
return 27;
return c.charCodeAt(0)-96;
}
function i2c(i)
{
if (!i)
return '';
if (i==27)
return "'";
return String.fromCharCode(i+96);
}
class MarkovModel {
constructor(order){
this.order = order;
this.data = new Map();
}
learn(word){
for (let i = 0; i<=word.length; i++)
{
let state = word.slice(Math.max(i-this.order, 0), i);
let item = this.data.get(state);
if (!item)
{
item = new Uint32Array(CHARS+1);
this.data.set(state, item);
}
item[0]++;
item[c2i(word[i])+1]++;
}
}
produce(random, prefix){
let item = this.data.get(prefix.slice(-this.order));
let n = random.integer(0, item[0]-1);
for (let i = 1; i<=CHARS; i++)
{
n -= item[i];
if (n<0)
return i2c(i-1);
}
}
generate(random){
let res = '';
while (true)
{
let c = this.produce(random, res);
if (!c)
return res;
res += c;
}
}
}
class LengthModel {
learn(){}
produce(random, prefix){ return i2c(random.integer(0, CHARS-1)); }
generate(random){
let res = '', limit = random.integer(10, 30);
for (let i = 0; i<limit; i++)
{
let c = this.produce(random, res);
if (!c && res)
return res;
res += c;
}
return res;
}
}
function init(words){
models = [new LengthModel()];
for (let i = 1; i<=MAX_DEPTH; i++)
models.push(new MarkovModel(i));
dictionary = words.map(w=>w.toLowerCase()).sort();
for (let word of dictionary)
{
for (let model of models)
model.learn(word);
}
}
function generate(seed)
{
let random = new random_js(random_js.engines.mt19937().seed(seed));
let res = {};
for (let i = 0; i<TC_SIZE; i++)
{
let word, real = random.bool(), model;
if (real)
word = random.pick(dictionary);
else
{
do {
model = random.integer(0, models.length-1);
word = models[model].generate(random);
} while (_.indexOf(dictionary, word, true)>=0);
}
if (word in res)
i--;
else
res[word] = {real, model};
}
return res;
}
function*sequence(seed_text)
{
let arr = [];
for (let i = 0; i<seed_text.length; i++)
arr.push(seed_text.charCodeAt(i));
let random = new random_js(random_js.engines.mt19937().seedWithArray(arr));
while (true)
yield random.integer(0, 0x7fffffff);
}