-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtokenizer.h
117 lines (100 loc) · 2.05 KB
/
tokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#pragma once
#include <iostream>
#include <sstream>
#include <list>
#include <string>
#include "char_pattern.h"
class token
{
public:
virtual void init(token* value)
{
_text = value->text();
_values.insert(std::end(_values), std::begin(value->values()), std::end(value->values()));
}
virtual void add(token* value)
{
if (value->may_be_value() || value->is_sticky())
_values.push_back(value);
}
virtual std::list<token*>& values()
{
return _values;
}
virtual void print()
{
std::cout << _text;
}
void set_text(const std::string& text) { _text = text; }
const std::string& text() { return _text; }
virtual bool may_be_name() { return false; }
virtual bool may_be_value() { return false; }
virtual bool is_text() { return false; }
virtual bool is_sticky() { return false; }
protected:
std::string _text;
std::list<token*> _values;
};
struct tokenizer_pattern
{
char_pattern* pattern;
token* (*func)(std::string& token);
};
class tokenizer
{
public:
tokenizer(std::string& text)
{
_text = text;
_it = std::begin(_text);
_use_backslashes = true;
}
template <typename T>
void add(const char* pattern)
{
auto item = new tokenizer_pattern();
item->pattern = new char_pattern(pattern);
item->pattern->use_backslashes(_use_backslashes);
item->func = [](std::string& text)
{
token* res = new T();
res->set_text(text);
return res;
};
_patterns.push_back(item);
}
void use_backslashes(bool value)
{
_use_backslashes = value;
}
token* next()
{
std::string text;
size_t advance;
for (; _it != std::end(_text); ++_it)
{
auto ch = *_it;
if (ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n')
break;
}
if (_it == std::end(_text))
return NULL;
for (auto p : _patterns)
{
text.clear();
advance = 0;
if (p->pattern->check(_it, std::end(_text), text, advance))
{
auto token = p->func(text);
_it += advance;
return token;
}
}
return NULL;
}
private:
std::list<tokenizer_pattern*> _patterns;
bool _use_backslashes;
std::string _text;
std::string::iterator _it;
};