-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathatom.h
129 lines (108 loc) · 3.02 KB
/
atom.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/* atom.h
*
* One atom matches one token zero or more times. A single atom
* can match against a tree, or it could hold a nested core to
* search a group.
*/
#ifndef __regex_atom
#define __regex_atom
typedef struct _reg_atom atom_t;
#include <stdbool.h>
#include "class.h"
#include "bts.h"
#include "core.h"
// maximum number of repetitions
#define MAXREPS 1000000000
// used by the matching logic to implement word anchors
extern class_t* word_characters;
/** match
*
* Do a match for a single atom of the regular expression, possibly
* including repetitions.
*/
void atom_match(atom_t*, bts_t*, range_t*, char*);
/** give_set
*
* "Gives" a tree to the atom in the sense that the atom now owns
* the tree and is responsible for freeing it. A tree in a atom is
* used to denote a tree of characters that are accepted by the atom,
* so that a atom can test whether it accepts a particular character
* in constant time.
*/
void atom_set_class(atom_t*, class_t*);
/** give_string
*
* Give a string to a atom creating a atom that matches a string
* of literals once.
*/
void atom_set_string(atom_t*, char*);
/** give_core
*
* Give atom possession of a nested core. This construct is used to
* match groups within a regular expression. The int sets the atom's
* internal flag:
* 0 -> group, 1 -> atomic, 2 -> lookahead, 3 -> lookbehind,
* 4 -> subroutine.
*/
void atom_set_core(atom_t*, core_t*, int);
/** set_anchor
*
* Set the atom to be anchor. The int argument tells the engine
* what sort of anchor to create; 1 is a word anchor, and 2 is
* a beginning or end anchor.
*/
void atom_set_anchor(atom_t*, int);
/** set_invert
*
* Normally a atom accepts characters that are in its tree; this
* function can be used to change a atoms behavior to accepting
* any character NOT in its class. This same field is also used in
* the lookahead case to tell whether it's a normal or
* a negative lookahead.
*/
void atom_set_invert(atom_t*, bool);
/** set_range
*
* Set a atom to accept repetitions; for example, the regex construct
* a{2,4} accepts 'aa', 'aaa', and 'aaaa'. If the third argument
* is a -1, the atom will not have a max amount of repetitions.
*/
void atom_set_range(atom_t*, int, int);
/** set_reference
*
* Set a atom to search for a match to a previously captured group;
* i.e. '(suki)\1' matches 'sukisuki'.
*/
void atom_set_reference(atom_t*, int);
/** set_greedy
*
* Set the greediness attribute of the atom.
*/
void atom_set_greedy(atom_t*, bool);
/** has_group
*
* Returns true if the atom contains a group which keeps track
* of group captures.
*/
bool atom_has_group(atom_t*);
/** highest_index
*
* Call _core_groups function on nested core.
*/
int atom_highest_index(atom_t*);
/** find_core
*
* Helper function for core_find_core.
*/
core_t* atom_find_core(atom_t*, int);
/** new
*
* Create a new atom.
*/
atom_t* atom_new(int);
/** free
*
* Deallocate the atom and anything it points to.
*/
void atom_free(atom_t*);
#endif