Skip to content

Commit e507d34

Browse files
committed
Merge github.com:orangeduck/mpc
2 parents 41aecb4 + d00739f commit e507d34

File tree

11 files changed

+830
-758
lines changed

11 files changed

+830
-758
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ examples/maths
88
examples/smallc
99
examples/foobar
1010
examples/tree_traversal
11+
build/*

Makefile

+60-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
2-
CC = gcc
3-
STND=-ansi
4-
CFLAGS = $(STND) -pedantic -O3 -g -Wall -Werror -Wextra -Wformat=2 -Wshadow \
1+
PROJ = mpc
2+
CC ?= gcc
3+
STD ?= -ansi
4+
DIST = build
5+
MKDIR ?= mkdir -p
6+
PREFIX ?= /usr/local
7+
CFLAGS ?= $(STD) -pedantic -O3 -g -Wall -Werror -Wextra -Wformat=2 -Wshadow \
58
-Wno-long-long -Wno-overlength-strings -Wno-format-nonliteral -Wcast-align \
69
-Wwrite-strings -Wstrict-prototypes -Wold-style-definition -Wredundant-decls \
710
-Wnested-externs -Wmissing-include-dirs -Wswitch-default
@@ -10,15 +13,59 @@ TESTS = $(wildcard tests/*.c)
1013
EXAMPLES = $(wildcard examples/*.c)
1114
EXAMPLESEXE = $(EXAMPLES:.c=)
1215

13-
all: $(EXAMPLESEXE) check
16+
.PHONY: all check clean libs $(DIST)/$(PROJ).pc
17+
18+
all: $(EXAMPLESEXE) check
19+
20+
$(DIST):
21+
$(MKDIR) $(DIST)/examples
22+
23+
check: $(DIST)/test-file $(DIST)/test-static $(DIST)/test-dynamic
24+
./$(DIST)/test-file
25+
./$(DIST)/test-static
26+
LD_LIBRARY_PATH=$(DIST) ./$(DIST)/test-dynamic
27+
28+
$(DIST)/test-file: $(TESTS) $(PROJ).c $(PROJ).h tests/ptest.h
29+
$(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) $(PROJ).c -lm -o $(DIST)/test-file
30+
31+
$(DIST)/test-dynamic: $(TESTS) $(DIST)/lib$(PROJ).so $(PROJ).h tests/ptest.h
32+
$(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -o $(DIST)/test-dynamic
33+
34+
$(DIST)/test-static: $(TESTS) $(DIST)/lib$(PROJ).a $(PROJ).h tests/ptest.h
35+
$(CC) $(filter-out -Werror, $(CFLAGS)) $(TESTS) -lm -L$(DIST) -l$(PROJ) -static -o $(DIST)/test-static
1436

15-
check: $(TESTS) mpc.c
16-
$(CC) $(filter-out -Werror, $(CFLAGS)) $^ -lm -o test
17-
./test
37+
examples/%: $(DIST) examples/%.c $(PROJ).c $(PROJ).h
38+
$(CC) $(CFLAGS) $(filter-out $(DIST) $(PROJ).h, $^) -lm -o $(DIST)/$@
39+
40+
$(DIST)/lib$(PROJ).so: $(PROJ).c $(PROJ).h
41+
ifneq ($(OS),Windows_NT)
42+
$(CC) $(CFLAGS) -fPIC -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so
43+
else
44+
$(CC) $(CFLAGS) -shared $(PROJ).c -o $(DIST)/lib$(PROJ).so
45+
endif
46+
47+
$(DIST)/lib$(PROJ).a: $(PROJ).c $(PROJ).h
48+
$(CC) $(CFLAGS) -c $(PROJ).c -o $(DIST)/$(PROJ).o
49+
$(AR) rcs $(DIST)/lib$(PROJ).a $(DIST)/$(PROJ).o
50+
51+
libs: $(DIST)/lib$(PROJ).so $(DIST)/lib$(PROJ).a
52+
53+
$(DIST)/$(PROJ).pc: $(DIST) $(PROJ).pc
54+
cp $(PROJ).pc $(DIST)/$(PROJ).pc
55+
sed -i '1i\prefix=$(PREFIX)/' $(DIST)/$(PROJ).pc
1856

19-
examples/%: examples/%.c mpc.c
20-
$(CC) $(CFLAGS) $^ -lm -o $@
21-
2257
clean:
23-
rm -rf test examples/doge examples/lispy examples/maths examples/smallc \
24-
examples/foobar examples/tree_traversal
58+
rm -rf -- $(DIST)
59+
60+
install: all
61+
install -d -m644 $(DESTDIR)$(PREFIX)/{include,lib/pkgconfig,share/$(PROJ)}
62+
install -m755 -t $(DESTDIR)$(PREFIX)/lib $(DIST)/lib*
63+
install -m644 -t $(DESTDIR)$(PREFIX)/share/$(PROJ) $(PROJ).{c,h}
64+
install -m644 $(PROJ).h $(DESTDIR)$(PREFIX)/include/$(PROJ).h
65+
install -m644 $(DIST)/$(PROJ).pc $(DESTDIR)$(PREFIX)/lib/pkgconfig/$(PROJ).pc
66+
67+
uninstall:
68+
rm -rf -- \
69+
$(DESTDIR)$(PREFIX)/include/$(PROJ).h \
70+
$(DESTDIR)$(PREFIX)/share/$(PROJ)/$(PROJ).{c,h} \
71+
$(DESTDIR)$(PREFIX)/lib/lib$(PROJ).{so,a}

README.md

+40-40
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Micro Parser Combinators
22
========================
33

4-
Version 0.8.8
4+
Version 0.9.0
55

66

77
About
@@ -116,15 +116,15 @@ Basic Parsers
116116
117117
All the following functions construct new basic parsers of the type `mpc_parser_t *`. All of those parsers return a newly allocated `char *` with the character(s) they manage to match. If unsuccessful they will return an error. They have the following functionality.
118118
119-
* * *
119+
* * *
120120
121121
```c
122122
mpc_parser_t *mpc_any(void);
123123
```
124124

125125
Matches any individual character
126126

127-
* * *
127+
* * *
128128

129129
```c
130130
mpc_parser_t *mpc_char(char c);
@@ -283,7 +283,7 @@ Run a parser on the contents of some file.
283283
Combinators
284284
-----------
285285

286-
Combinators are functions that take one or more parsers and return a new parser of some given functionality.
286+
Combinators are functions that take one or more parsers and return a new parser of some given functionality.
287287

288288
These combinators work independently of exactly what data type the parser(s) supplied as input return. In languages such as Haskell ensuring you don't input one type of data into a parser requiring a different type is done by the compiler. But in C we don't have that luxury. So it is at the discretion of the programmer to ensure that he or she deals correctly with the outputs of different parser types.
289289

@@ -312,13 +312,13 @@ Returns a parser that applies function `f` (optionality taking extra input `x`)
312312
* * *
313313

314314
```c
315-
mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_check_t f, const char *e);
316-
mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_check_with_t f, void *x, const char *e);
317-
mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_check_t f, const char *fmt, ...);
318-
mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_check_with_t f, void *x, const char *fmt, ...);
315+
mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e);
316+
mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e);
317+
mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...);
318+
mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...);
319319
```
320320
321-
Returns a parser that applies function `f` (optionally taking extra input `x`) to the result of parser `a`. If `f` returns non-zero, then the parser succeeds and returns the value of `a` (possibly modified by `f`). If `f` returns zero, then the parser fails with message `e`.
321+
Returns a parser that applies function `f` (optionally taking extra input `x`) to the result of parser `a`. If `f` returns non-zero, then the parser succeeds and returns the value of `a` (possibly modified by `f`). If `f` returns zero, then the parser fails with message `e`, and the result of `a` is destroyed with the destructor `da`.
322322
323323
* * *
324324
@@ -556,9 +556,9 @@ To ease the task of undefining and then deleting parsers `mpc_cleanup` can be us
556556
mpc_parser_t *mpc_copy(mpc_parser_t *a);
557557
```
558558
559-
This function makes a copy of a parser `a`. This can be useful when you want to
560-
use a parser as input for some other parsers multiple times without retaining
561-
it.
559+
This function makes a copy of a parser `a`. This can be useful when you want to
560+
use a parser as input for some other parsers multiple times without retaining
561+
it.
562562
563563
* * *
564564
@@ -567,11 +567,11 @@ mpc_parser_t *mpc_re(const char *re);
567567
mpc_parser_t *mpc_re_mode(const char *re, int mode);
568568
```
569569

570-
This function takes as input the regular expression `re` and builds a parser
571-
for it. With the `mpc_re_mode` function optional mode flags can also be given.
572-
Available flags are `MPC_RE_MULTILINE` / `MPC_RE_M` where the start of input
573-
character `^` also matches the beginning of new lines and the end of input `$`
574-
character also matches new lines, and `MPC_RE_DOTALL` / `MPC_RE_S` where the
570+
This function takes as input the regular expression `re` and builds a parser
571+
for it. With the `mpc_re_mode` function optional mode flags can also be given.
572+
Available flags are `MPC_RE_MULTILINE` / `MPC_RE_M` where the start of input
573+
character `^` also matches the beginning of new lines and the end of input `$`
574+
character also matches new lines, and `MPC_RE_DOTALL` / `MPC_RE_S` where the
575575
any character token `.` also matches newlines (by default it doesn't).
576576

577577

@@ -626,7 +626,7 @@ Useful Parsers
626626

627627
<tr><td><code>mpc_startswith(mpc_parser_t *a);</code></td><td>Matches the start of input followed by <code>a</code></td></tr>
628628
<tr><td><code>mpc_endswith(mpc_parser_t *a, mpc_dtor_t da);</code></td><td>Matches <code>a</code> followed by the end of input</td></tr>
629-
<tr><td><code>mpc_whole(mpc_parser_t *a, mpc_dtor_t da);</code></td><td>Matches the start of input, <code>a</code>, and the end of input</td></tr>
629+
<tr><td><code>mpc_whole(mpc_parser_t *a, mpc_dtor_t da);</code></td><td>Matches the start of input, <code>a</code>, and the end of input</td></tr>
630630
<tr><td><code>mpc_stripl(mpc_parser_t *a);</code></td><td>Matches <code>a</code> first consuming any whitespace to the left</td></tr>
631631
<tr><td><code>mpc_stripr(mpc_parser_t *a);</code></td><td>Matches <code>a</code> then consumes any whitespace to the right</td></tr>
632632
<tr><td><code>mpc_strip(mpc_parser_t *a);</code></td><td>Matches <code>a</code> consuming any surrounding whitespace</td></tr>
@@ -707,17 +707,17 @@ We start with a fold function that will fold two `int *` into a new `int *` base
707707

708708
```c
709709
mpc_val_t *fold_maths(int n, mpc_val_t **xs) {
710-
710+
711711
int **vs = (int**)xs;
712-
712+
713713
if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; }
714714
if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; }
715715
if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; }
716716
if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; }
717717
if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; }
718-
718+
719719
free(xs[1]); free(xs[2]);
720-
720+
721721
return xs[0];
722722
}
723723
```
@@ -730,14 +730,14 @@ mpc_parser_t *Factor = mpc_new("factor");
730730
mpc_parser_t *Term = mpc_new("term");
731731
mpc_parser_t *Maths = mpc_new("maths");
732732
733-
mpc_define(Expr, mpc_or(2,
733+
mpc_define(Expr, mpc_or(2,
734734
mpc_and(3, fold_maths,
735735
Factor, mpc_oneof("+-"), Factor,
736736
free, free),
737737
Factor
738738
));
739739
740-
mpc_define(Factor, mpc_or(2,
740+
mpc_define(Factor, mpc_or(2,
741741
mpc_and(3, fold_maths,
742742
Term, mpc_oneof("*/"), Term,
743743
free, free),
@@ -781,6 +781,8 @@ The syntax for this is defined as follows.
781781
<tr><td><code>'a' | 'b'</code></td><td>Either <code>'a'</code> is required, or <code>'b'</code> is required.</td></tr>
782782
<tr><td><code>'a'*</code></td><td>Zero or more <code>'a'</code> are required.</td></tr>
783783
<tr><td><code>'a'+</code></td><td>One or more <code>'a'</code> are required.</td></tr>
784+
<tr><td><code>'a'?</code></td><td>Zero or one <code>'a'</code> is required.</td></tr>
785+
<tr><td><code>'a'{x}</code></td><td>Exactly <code>x</code> (integer) copies of <code>'a'</code> are required.</td></tr>
784786
<tr><td><code>&lt;abba&gt;</code></td><td>The rule called <code>abba</code> is required.</td></tr>
785787
</table>
786788

@@ -825,17 +827,17 @@ This opens and reads in the contents of the file given by `filename` and passes
825827
Case Study - Tokenizer
826828
======================
827829

828-
Another common task we might be interested in doing is tokenizing some block of
830+
Another common task we might be interested in doing is tokenizing some block of
829831
text (splitting the text into individual elements) and performing some function
830832
on each one of these elements as it is read. We can do this with `mpc` too.
831833

832-
First, we can build a regular expression which parses an individual token. For
833-
example if our tokens are identifiers, integers, commas, periods and colons we
834-
could build something like this `mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")`.
835-
Next we can strip any whitespace, and add a callback function using `mpc_apply`
836-
which gets called every time this regex is parsed successfully
837-
`mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)`.
838-
Finally we can surround all of this in `mpc_many` to parse it zero or more
834+
First, we can build a regular expression which parses an individual token. For
835+
example if our tokens are identifiers, integers, commas, periods and colons we
836+
could build something like this `mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")`.
837+
Next we can strip any whitespace, and add a callback function using `mpc_apply`
838+
which gets called every time this regex is parsed successfully
839+
`mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token)`.
840+
Finally we can surround all of this in `mpc_many` to parse it zero or more
839841
times. The final code might look something like this:
840842

841843
```c
@@ -847,16 +849,16 @@ static mpc_val_t *print_token(mpc_val_t *x) {
847849
int main(int argc, char **argv) {
848850

849851
const char *input = " hello 4352 , \n foo.bar \n\n test:ing ";
850-
852+
851853
mpc_parser_t* Tokens = mpc_many(
852-
mpcf_all_free,
854+
mpcf_all_free,
853855
mpc_apply(mpc_strip(mpc_re("\\s*([a-zA-Z_]+|[0-9]+|,|\\.|:)")), print_token));
854-
856+
855857
mpc_result_t r;
856858
mpc_parse("input", input, Tokens, &r);
857-
859+
858860
mpc_delete(Tokens);
859-
861+
860862
return 0;
861863
}
862864
```
@@ -875,7 +877,7 @@ Token: ':'
875877
Token: 'ing'
876878
```
877879
878-
By extending the regex we can easily extend this to parse many more types of
880+
By extending the regex we can easily extend this to parse many more types of
879881
tokens and quickly and easily build a tokenizer for whatever language we are
880882
interested in.
881883
@@ -991,5 +993,3 @@ When parsing from a grammar, the abstract syntax tree is tagged with different t
991993
If you have a rule in your grammar called `string`, `char` or `regex`, you may encounter some confusion. This is because nodes will be tagged with (for example) `string` _either_ if they are a string primitive, _or_ if they were parsed via your `string` rule. If you are detecting node type using something like `strstr`, in this situation it might break. One solution to this is to always check that `string` is the innermost tag to test for string primitives, or to rename your rule called `string` to something that doesn't conflict.
992994

993995
Yes it is annoying but its probably not going to change!
994-
995-

examples/line_reader.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,4 @@ int main(int argc, char **argv) {
3131
mpc_delete(Line);
3232

3333
return 0;
34-
}
34+
}

0 commit comments

Comments
 (0)