-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathlexer.js
86 lines (72 loc) · 2.16 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/*!
* jsPOS
*
* Copyright 2010, Percy Wegmann
* Licensed under the GNU LGPLv3 license
* http://www.opensource.org/licenses/lgpl-3.0.html
*/
module.exports = Lexer;
var re = {
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
url: /\b(?:(?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/ig,
number: /[0-9]*\.[0-9]+|[0-9]+/ig,
space: /\s+/ig,
unblank: /\S/,
punctuation: /[\/\.\,\?\!]/ig
}
function LexerNode(string, regex, regexs){
this.string = string;
this.children = [];
if (string) {
this.matches = string.match(regex);
var childElements = string.split(regex);
}
if (!this.matches) {
this.matches = [];
var childElements = [string];
}
if (!regexs.length) {
// no more regular expressions, we're done
this.children = childElements;
} else {
// descend recursively
var nextRegex = regexs[0]
, nextRegexes = regexs.slice(1);
for (var i in childElements) {
this.children.push(
new LexerNode(childElements[i], nextRegex, nextRegexes));
}
}
}
LexerNode.prototype.fillArray = function(array){
for (var i in this.children) {
var child = this.children[i];
if (child.fillArray) {
child.fillArray(array);
} else if (re.unblank.test(child)) {
array.push(child);
}
if (i < this.matches.length) {
var match = this.matches[i];
if (re.unblank.test(match))
array.push(match);
}
}
}
LexerNode.prototype.toString = function(){
var array = [];
this.fillArray(array);
return array.toString();
}
function Lexer(){
// Split by urls, then numbers, then whitespace, then punctuation
this.regexs = [re.url, re.number, re.space, re.punctuation];
}
Lexer.prototype.lex = function(string){
var array = []
, node = new LexerNode(string, this.regexs[0], this.regexs.slice(1));
node.fillArray(array);
return array;
}
//var lexer = new Lexer();
//print(lexer.lex("I made $5.60 today in 1 hour of work. The E.M.T.'s were on time, but only barely.").toString());