-
Notifications
You must be signed in to change notification settings - Fork 0
/
lex.ts
71 lines (66 loc) · 1.96 KB
/
lex.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
export const T_PLUS = '+';
export const T_MINUS = '-';
export const T_STAR = '*';
export const T_NUMBER = 'NUMBER';
export const T_SEMICOLON = ';';
export const T_TYPE = 'TYPE';
export const T_EQUALS = '=';
export const T_IDENTIFIER = 'IDENTIFIER';
export interface Token {
token: string;
value?: string;
}
type tokenCreator = (matchingString: string) => Token;
type tokenDeclaration = [string | RegExp, null | tokenCreator]
const t = (token: string) => () => {
return {
token: token
};
};
const tWithValue = (token: string) => (matchingString: string) => {
return {
token: token,
value: matchingString
};
};
const language: tokenDeclaration[] = [
[' ', null],
[';', t(T_SEMICOLON)],
['=', t(T_EQUALS)],
['*', t(T_STAR)],
[/^-?\d+\.?\d*/, tWithValue(T_NUMBER)],
['+', t(T_PLUS)],
['-', t(T_MINUS)],
[/^(int|char|float|bool)/, tWithValue(T_TYPE)],
[/^\w+/, tWithValue(T_IDENTIFIER)],
];
export function lex(str: string): Token[] {
const result = [];
let i = 0;
let matches;
while (i < str.length) {
let matchedSomething = false;
for (let tokenDeclaration of language) {
if (typeof tokenDeclaration[0] === 'string' && tokenDeclaration[0] === str[i]) {
i++;
if (tokenDeclaration[1] !== null) {
result.push(tokenDeclaration[1](str[i]))
}
matchedSomething = true;
break;
} else if (typeof tokenDeclaration[0] === 'object' && (matches = str.slice(i).match(tokenDeclaration[0]))) {
i += matches[0].length;
if (tokenDeclaration[1] !== null) {
result.push(tokenDeclaration[1](matches[0]))
}
matchedSomething = true;
break;
}
}
if (!matchedSomething) {
// bad .. do something better
i++;
}
}
return result;
}