Skip to content

Commit 889fc3e

Browse files
committed
Add support for parsing functions in RDF (#3412)
Following are now possible (useful in upsert API) * uid(v) <predicate> "object" . * <0x01> <predicate> uid(foo) . For now, we only support uid function. In future, we can add support for more functions.
1 parent 398318e commit 889fc3e

File tree

3 files changed

+146
-44
lines changed

3 files changed

+146
-44
lines changed

chunker/rdf/parse.go

+37-9
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,17 @@ L:
7777
case itemSubject:
7878
rnq.Subject = strings.Trim(item.Val, " ")
7979

80-
case itemVarKeyword:
81-
it.Next()
82-
if item = it.Item(); item.Typ != itemLeftRound {
83-
return rnq, errors.Errorf("Expected '(', found: %s", item.Val)
84-
}
85-
it.Next()
86-
if item = it.Item(); item.Typ != itemVarName {
87-
return rnq, errors.Errorf("Expected variable name, found: %s", item.Val)
80+
case itemSubjectFunc:
81+
var err error
82+
if rnq.Subject, err = parseFunction(it); err != nil {
83+
return rnq, err
8884
}
8985

90-
it.Next() // parse ')'
86+
case itemObjectFunc:
87+
var err error
88+
if rnq.ObjectId, err = parseFunction(it); err != nil {
89+
return rnq, err
90+
}
9191

9292
case itemPredicate:
9393
// Here we split predicate and lang directive (ex: "name@en"), if needed.
@@ -202,6 +202,34 @@ L:
202202
return rnq, nil
203203
}
204204

205+
// parseFunction parses uid(<var name>) and returns
206+
// uid(<var name>) after striping whitespace if any
207+
func parseFunction(it *lex.ItemIterator) (string, error) {
208+
item := it.Item()
209+
s := item.Val
210+
211+
it.Next()
212+
if item = it.Item(); item.Typ != itemLeftRound {
213+
return "", errors.Errorf("Expected '(', found: %s", item.Val)
214+
}
215+
216+
it.Next()
217+
if item = it.Item(); item.Typ != itemVarName {
218+
return "", errors.Errorf("Expected variable name, found: %s", item.Val)
219+
}
220+
if strings.TrimSpace(item.Val) == "" {
221+
return "", errors.Errorf("Empty variable name in function call")
222+
}
223+
s += "(" + item.Val + ")"
224+
225+
it.Next()
226+
if item = it.Item(); item.Typ != itemRightRound {
227+
return "", errors.Errorf("Expected ')', found: %s", item.Val)
228+
}
229+
230+
return s, nil
231+
}
232+
205233
func parseFacets(it *lex.ItemIterator, rnq *api.NQuad) error {
206234
if !it.Next() {
207235
return errors.Errorf("Unexpected end of facets.")

chunker/rdf/parse_test.go

+73
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,79 @@ var testNQuads = []struct {
895895
input: `<alice> <age> "13"^^<xs:double> (salary=NaN) .`,
896896
expectedErr: true,
897897
},
898+
{
899+
input: `uid(v) <lives> "\x02 wonderland" .`,
900+
nq: api.NQuad{
901+
Subject: "uid(v)",
902+
Predicate: "lives",
903+
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "\x02 wonderland"}},
904+
},
905+
expectedErr: false,
906+
},
907+
{
908+
input: `uid ( v ) <lives> "vrinadavan" .`,
909+
nq: api.NQuad{
910+
Subject: "uid(v)",
911+
Predicate: "lives",
912+
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "vrinadavan"}},
913+
},
914+
expectedErr: false,
915+
},
916+
{
917+
input: `uid ( val ) <lives> "vrinadavan" .`,
918+
nq: api.NQuad{
919+
Subject: "uid(val)",
920+
Predicate: "lives",
921+
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "vrinadavan"}},
922+
},
923+
expectedErr: false,
924+
},
925+
{
926+
input: `uid ( val ) <lives> uid(g) .`,
927+
nq: api.NQuad{
928+
Subject: "uid(val)",
929+
Predicate: "lives",
930+
ObjectId: "uid(g)",
931+
},
932+
expectedErr: false,
933+
},
934+
{
935+
input: `uid ( val ) <lives> uid ( g ) .`,
936+
nq: api.NQuad{
937+
Subject: "uid(val)",
938+
Predicate: "lives",
939+
ObjectId: "uid(g)",
940+
},
941+
expectedErr: false,
942+
},
943+
{
944+
input: `uid ( val <lives> uid ( g ) .`,
945+
expectedErr: true,
946+
},
947+
{
948+
input: `uid val ) <lives> uid ( g ) .`,
949+
expectedErr: true,
950+
},
951+
{
952+
input: `ui(uid) <lives> uid ( g ) .`,
953+
expectedErr: true,
954+
},
955+
{
956+
input: `uid()) <lives> uid ( g ) .`,
957+
expectedErr: true,
958+
},
959+
{
960+
input: `uid() <lives> uid ( g ) .`,
961+
expectedErr: true,
962+
},
963+
{
964+
input: `uid(a) <lives> uid ( ) .`,
965+
expectedErr: true,
966+
},
967+
{
968+
input: `uid(a) lives> uid ( ) .`,
969+
expectedErr: true,
970+
},
898971
}
899972

900973
func TestLex(t *testing.T) {

chunker/rdf/state.go

+36-35
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,24 @@ import (
2626

2727
// The constants represent different types of lexed Items possible for an rdf N-Quad.
2828
const (
29-
itemText lex.ItemType = 5 + iota // plain text
30-
itemSubject // subject, 6
31-
itemPredicate // predicate, 7
32-
itemObject // object, 8
33-
itemLabel // label, 9
34-
itemLiteral // literal, 10
35-
itemLanguage // language, 11
36-
itemObjectType // object type, 12
37-
itemValidEnd // end with dot, 13
38-
itemComment // comment, 14
39-
itemComma // comma, 15
40-
itemEqual // equal, 16
41-
itemLeftRound // '(', 17
42-
itemRightRound // ')', 18
43-
itemStar // *, 19
44-
itemVarKeyword // var, 20
45-
itemVarName // 21
29+
itemText lex.ItemType = 5 + iota // plain text
30+
itemSubject // subject, 6
31+
itemPredicate // predicate, 7
32+
itemObject // object, 8
33+
itemLabel // label, 9
34+
itemLiteral // literal, 10
35+
itemLanguage // language, 11
36+
itemObjectType // object type, 12
37+
itemValidEnd // end with dot, 13
38+
itemComment // comment, 14
39+
itemComma // comma, 15
40+
itemEqual // equal, 16
41+
itemLeftRound // '(', 17
42+
itemRightRound // ')', 18
43+
itemStar // *, 19
44+
itemSubjectFunc // uid, 20
45+
itemObjectFunc // uid, 21
46+
itemVarName // 22
4647
)
4748

4849
// These constants keep a track of the depth while parsing an rdf N-Quad.
@@ -137,6 +138,7 @@ func lexText(l *lex.Lexer) lex.StateFn {
137138
l.Depth = atSubject
138139
}
139140

141+
// TODO(Aman): add support for more functions here.
140142
case r == 'u':
141143
if l.Depth != atSubject && l.Depth != atObject {
142144
return l.Errorf("Unexpected char 'u'")
@@ -422,40 +424,39 @@ func lexComment(l *lex.Lexer) lex.StateFn {
422424
func lexVariable(l *lex.Lexer) lex.StateFn {
423425
var r rune
424426

427+
// TODO(Aman): add support for more functions here.
425428
for _, c := range "uid" {
426429
if r = l.Next(); r != c {
427-
return l.Errorf("Unexpected char '%c' when parsing var keyword", r)
430+
return l.Errorf("Unexpected char '%c' when parsing uid keyword", r)
428431
}
429432
}
430-
l.Emit(itemVarKeyword)
433+
if l.Depth == atObject {
434+
l.Emit(itemObjectFunc)
435+
} else if l.Depth == atSubject {
436+
l.Emit(itemSubjectFunc)
437+
}
431438
l.IgnoreRun(isSpace)
432439

433440
if r = l.Next(); r != '(' {
434-
return l.Errorf("Expected '(' after var keyword, found: '%c'", r)
441+
return l.Errorf("Expected '(' after uid keyword, found: '%c'", r)
435442
}
436443
l.Emit(itemLeftRound)
437-
438444
l.IgnoreRun(isSpace)
439445

440-
for {
441-
r := l.Next()
442-
if r == lex.EOF {
443-
return l.Errorf("Unexpected end of input while reading variable name.")
444-
}
445-
if r == ')' {
446-
l.Backup()
447-
break
448-
}
449-
if isSpace(r) {
450-
break
451-
}
446+
// TODO(Aman): we support all characters in variable names except space and
447+
// right bracket. we should support only limited characters in variable names.
448+
// For now, this is fine because variables names must be used once in query
449+
// block before they can be used here. And, we throw an error if number of
450+
// used variables are different than number of defined variables.
451+
acceptVar := func(r rune) bool { return !(isSpace(r) || r == ')') }
452+
if _, valid := l.AcceptRun(acceptVar); !valid {
453+
return l.Errorf("Unexpected end of input while reading variable name")
452454
}
453455
l.Emit(itemVarName)
454-
455456
l.IgnoreRun(isSpace)
456457

457458
if r = l.Next(); r != ')' {
458-
return l.Errorf("Expected ')' while reading var, found: '%c'", r)
459+
return l.Errorf("Expected ')' while reading uid func, found: '%c'", r)
459460
}
460461
l.Emit(itemRightRound)
461462
l.Depth++

0 commit comments

Comments
 (0)