From 1af33dd33a0408472d2c9be4453e4b89a51e9304 Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Fri, 4 Aug 2017 11:15:51 -0700 Subject: [PATCH 1/6] Don't store schema inside client There's no need to do that --- redisearch/client.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/redisearch/client.go b/redisearch/client.go index fb14d52..160201f 100644 --- a/redisearch/client.go +++ b/redisearch/client.go @@ -40,9 +40,7 @@ var DefaultOptions = Options{ // Cleint is an interface to redisearch's redis commands type Client struct { pool *redis.Pool - - schema *Schema - name string + name string } var maxConns = 500 @@ -55,8 +53,7 @@ func NewClient(addr, name string) *Client { // TODO: Add timeouts. and 2 separate pools for indexing and querying, with different timeouts return redis.Dial("tcp", addr) }, maxConns), - schema: nil, - name: name, + name: name, } ret.pool.TestOnBorrow = func(c redis.Conn, t time.Time) (err error) { @@ -73,7 +70,6 @@ func NewClient(addr, name string) *Client { // CreateIndex configues the index and creates it on redis func (i *Client) CreateIndex(s *Schema) error { - i.schema = s args := redis.Args{i.name} // Set flags based on options if s.Options.NoFieldFlags { @@ -93,7 +89,7 @@ func (i *Client) CreateIndex(s *Schema) error { } args = append(args, "SCHEMA") - for _, f := range i.schema.Fields { + for _, f := range s.Fields { switch f.Type { case TextField: From 951c69b2e5fc796dfac7eb57a629c6305887f2d6 Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Fri, 4 Aug 2017 11:17:11 -0700 Subject: [PATCH 2/6] Cleanup tests: - Allow to change hostname from environment - Change to current API --- redisearch/redisearch_test.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/redisearch/redisearch_test.go b/redisearch/redisearch_test.go index 1478bcc..a59d18c 100644 --- a/redisearch/redisearch_test.go +++ b/redisearch/redisearch_test.go @@ -3,15 +3,25 @@ package redisearch_test import ( "fmt" "log" + "os" "testing" "time" "github.com/RedisLabs/redisearch-go/redisearch" ) +func createClient(indexName string) *redisearch.Client { + value, exists := os.LookupEnv("REDISEARCH_TEST_HOST") + host := "localhost:6379" + if exists && value != "" { + host = value + } + return redisearch.NewClient(host, indexName) +} + func TestClient(t *testing.T) { - c := redisearch.NewClient("localhost:6379", "testung") + c := createClient("testung") sc := redisearch.NewSchema(redisearch.DefaultOptions). AddField(redisearch.NewTextField("foo")) @@ -25,7 +35,7 @@ func TestClient(t *testing.T) { docs[i] = redisearch.NewDocument(fmt.Sprintf("doc%d", i), float32(i)/float32(100)).Set("foo", "hello world") } - if err := c.Index(docs, redisearch.DefaultIndexingOptions); err != nil { + if err := c.IndexOptions(redisearch.DefaultIndexingOptions, docs...); err != nil { t.Fatal(err) } @@ -37,7 +47,7 @@ func ExampleClient() { // Create a client. By default a client is schemaless // unless a schema is provided when creating the index - c := redisearch.NewClient("localhost:6379", "myIndex") + c := createClient("myIndex") // Create a schema sc := redisearch.NewSchema(redisearch.DefaultOptions). @@ -60,8 +70,7 @@ func ExampleClient() { Set("date", time.Now().Unix()) // Index the document. The API accepts multiple documents at a time - if err := c.Index([]redisearch.Document{doc}, - redisearch.DefaultIndexingOptions); err != nil { + if err := c.IndexOptions(redisearch.DefaultIndexingOptions, doc); err != nil { log.Fatal(err) } From 3c03a3f9d9b0250796764004d49b631c7e095dd2 Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Fri, 4 Aug 2017 11:17:55 -0700 Subject: [PATCH 3/6] Return a map of errors This way we can know which documents errored and how many errors took place --- redisearch/client.go | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/redisearch/client.go b/redisearch/client.go index 160201f..5888b81 100644 --- a/redisearch/client.go +++ b/redisearch/client.go @@ -152,15 +152,16 @@ var DefaultIndexingOptions = IndexingOptions{ } // Index indexes multiple documents on the index, with optional Options passed to options -func (i *Client) IndexOptions(opts IndexingOptions, docs ...Document) error { +func (i *Client) IndexOptions(opts IndexingOptions, docs ...Document) (errors map[int]error) { conn := i.pool.Get() defer conn.Close() n := 0 + errors = make(map[int]error) - for _, doc := range docs { - args := make(redis.Args, 0, len(i.schema.Fields)*2+6) + for ii, doc := range docs { + args := make(redis.Args, 0, 6+len(doc.Properties)) args = append(args, i.name, doc.Id, doc.Score) // apply options if opts.NoSave { @@ -184,23 +185,29 @@ func (i *Client) IndexOptions(opts IndexingOptions, docs ...Document) error { } if err := conn.Send("FT.ADD", args...); err != nil { - return err + errors[ii] = err + return } n++ } if err := conn.Flush(); err != nil { - return err + errors[-1] = err + return } for n > 0 { if _, err := conn.Receive(); err != nil { - return err + errors[n-1] = err } n-- } - return nil + if len(errors) == 0 { + return nil + } + + return } // convert the result from a redis query to a proper Document object @@ -239,7 +246,7 @@ func loadDocument(arr []interface{}, idIdx, scoreIdx, payloadIdx, fieldsIdx int) return doc, nil } -func (i *Client) Index(docs ...Document) error { +func (i *Client) Index(docs ...Document) map[int]error { return i.IndexOptions(DefaultIndexingOptions, docs...) } From 9230ce1c51c8731d74cb9b5c45370696d4c8a22d Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Fri, 4 Aug 2017 11:18:23 -0700 Subject: [PATCH 4/6] Provide Info() function This function returns information about the index, but also allows us to check if the index exists. This depends on another change in redigo - I might make a PR for that --- redisearch/client.go | 152 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/redisearch/client.go b/redisearch/client.go index 5888b81..c7f041a 100644 --- a/redisearch/client.go +++ b/redisearch/client.go @@ -3,7 +3,9 @@ package redisearch import ( "errors" "fmt" + "reflect" "strconv" + "strings" "time" @@ -310,3 +312,153 @@ func (i *Client) Drop() error { return err } + +// IndexInfo - Structure showing information about an existing index +type IndexInfo struct { + Schema Schema + Name string `redis:"index_name"` + DocCount uint64 `redis:"num_docs"` + RecordCount uint64 `redis:"num_records"` + TermCount uint64 `redis:"num_terms"` + MaxDocID uint64 `redis:"max_doc_id"` + InvertedIndexSizeMB float64 `redis:"inverted_sz_mb"` + OffsetVectorSizeMB float64 `redis:"offset_vector_sz_mb"` + DocTableSizeMB float64 `redis:"doc_table_size_mb"` + KeyTableSizeMB float64 `redis:"key_table_size_mb"` + RecordsPerDocAvg float64 `redis:"records_per_doc_avg"` + BytesPerRecordAvg float64 `redis:"bytes_per_record_avg"` + OffsetsPerTermAvg float64 `redis:"offsets_per_term_avg"` + OffsetBitsPerTermAvg float64 `redis:"offset_bits_per_record_avg"` +} + +func (info *IndexInfo) setTarget(key string, value interface{}) error { + v := reflect.ValueOf(info).Elem() + for i := 0; i < v.NumField(); i++ { + tag := v.Type().Field(i).Tag.Get("redis") + if tag == key { + targetInfo := v.Field(i) + switch targetInfo.Kind() { + case reflect.String: + s, _ := redis.String(value, nil) + targetInfo.SetString(s) + case reflect.Uint64: + u, _ := redis.Uint64(value, nil) + targetInfo.SetUint(u) + case reflect.Float64: + f, _ := redis.Float64(value, nil) + targetInfo.SetFloat(f) + default: + panic("Tag set without handler") + } + return nil + } + } + return errors.New("setTarget: No handler defined for :" + key) +} + +func sliceIndex(haystack []string, needle string) int { + for pos, elem := range haystack { + if elem == needle { + return pos + } + } + return -1 +} + +func (info *IndexInfo) loadSchema(values []interface{}, options []string) { + // Values are a list of fields + scOptions := Options{} + for _, opt := range options { + switch strings.ToUpper(opt) { + case "NOFIELDS": + scOptions.NoFieldFlags = true + case "NOFREQS": + scOptions.NoFrequencies = true + case "NOOFFSETS": + scOptions.NoOffsetVectors = true + } + } + sc := NewSchema(scOptions) + for _, specTmp := range values { + // spec, isArr := specTmp.([]string) + // if !isArr { + // panic("Value is not an array of strings!") + // } + spec, err := redis.Strings(specTmp, nil) + if err != nil { + panic(err) + } + // Name, Type, + if len(spec) < 3 { + panic("Invalid spec") + } + var options []string + if len(spec) > 3 { + options = spec[3:] + } else { + options = []string{} + } + + f := Field{Name: spec[0]} + switch strings.ToUpper(spec[2]) { + case "NUMERIC": + f.Type = NumericField + nfOptions := NumericFieldOptions{} + f.Options = nfOptions + if sliceIndex(options, "SORTABLE") != -1 { + nfOptions.Sortable = true + } + case "TEXT": + f.Type = TextField + tfOptions := TextFieldOptions{} + f.Options = tfOptions + if sliceIndex(options, "SORTABLE") != -1 { + tfOptions.Sortable = true + } + if wIdx := sliceIndex(options, "WEIGHT"); wIdx != -1 && wIdx+1 != len(spec) { + weightString := options[wIdx+1] + weight64, _ := strconv.ParseFloat(weightString, 32) + tfOptions.Weight = float32(weight64) + } + } + sc = sc.AddField(f) + } + info.Schema = *sc +} + +// Info - Get information about the index. This can also be used to check if the +// index exists +func (i *Client) Info() (*IndexInfo, error) { + conn := i.pool.Get() + defer conn.Close() + + res, err := redis.Values(conn.Do("FT.INFO", i.name)) + if err != nil { + return nil, err + } + + ret := IndexInfo{} + var schemaFields []interface{} + var indexOptions []string + + // Iterate over the values + for ii := 0; ii < len(res); ii += 2 { + key, _ := redis.String(res[ii], nil) + if err := ret.setTarget(key, res[ii+1]); err == nil { + continue + } + + switch key { + case "index_options": + indexOptions, _ = redis.Strings(res[ii+1], nil) + case "fields": + schemaFields, _ = redis.Values(res[ii+1], nil) + } + } + + if schemaFields != nil { + ret.loadSchema(schemaFields, indexOptions) + } + + return &ret, nil +} From 06bab17a546f1f811c0b4073bbc7b027a930ea38 Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Mon, 14 Aug 2017 08:26:29 -0700 Subject: [PATCH 5/6] Add support for NOSTEM option --- redisearch/client.go | 4 +++- redisearch/schema.go | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/redisearch/client.go b/redisearch/client.go index c7f041a..ae2be7e 100644 --- a/redisearch/client.go +++ b/redisearch/client.go @@ -108,7 +108,9 @@ func (i *Client) CreateIndex(s *Schema) error { if opts.Sortable { args = append(args, "SORTABLE") } - + if opts.NoStem { + args = append(args, "NOSTEM") + } } case NumericField: diff --git a/redisearch/schema.go b/redisearch/schema.go index b1d422b..3b115c4 100644 --- a/redisearch/schema.go +++ b/redisearch/schema.go @@ -32,6 +32,7 @@ type Field struct { type TextFieldOptions struct { Weight float32 Sortable bool + NoStem bool } // NumericFieldOptions Options for numeric fields From b4d3b1aef7aa2ef4086f6b5adc015182f9fa78a8 Mon Sep 17 00:00:00 2001 From: Mark Nunberg Date: Tue, 22 Aug 2017 07:06:52 -0700 Subject: [PATCH 6/6] Change IndexOptions return type to error proper --- redisearch/client.go | 12 +++++++++++- redisearch/redisearch_test.go | 7 +++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/redisearch/client.go b/redisearch/client.go index ae2be7e..555ec7f 100644 --- a/redisearch/client.go +++ b/redisearch/client.go @@ -148,6 +148,16 @@ type IndexingOptions struct { Replace bool } +// MultiError Represents one or more errors +type MultiError map[int]error + +func (e MultiError) Error() string { + for _, err := range e { + return err.Error() + } + return "" +} + // DefaultIndexingOptions are the default options for document indexing var DefaultIndexingOptions = IndexingOptions{ Language: "", @@ -156,7 +166,7 @@ var DefaultIndexingOptions = IndexingOptions{ } // Index indexes multiple documents on the index, with optional Options passed to options -func (i *Client) IndexOptions(opts IndexingOptions, docs ...Document) (errors map[int]error) { +func (i *Client) IndexOptions(opts IndexingOptions, docs ...Document) (errors MultiError) { conn := i.pool.Get() defer conn.Close() diff --git a/redisearch/redisearch_test.go b/redisearch/redisearch_test.go index a59d18c..c0750c4 100644 --- a/redisearch/redisearch_test.go +++ b/redisearch/redisearch_test.go @@ -39,6 +39,13 @@ func TestClient(t *testing.T) { t.Fatal(err) } + // Test it again + if err := c.IndexOptions(redisearch.DefaultIndexingOptions, docs...); err == nil { + t.Fatal("Expected error for duplicate document") + } else if len(err) != 100 { + t.Fatal("Not enough errors received") + } + docs, total, err := c.Search(redisearch.NewQuery("hello world")) fmt.Println(docs, total, err) }