Skip to content

Commit 3e659c9

Browse files
committed
changing vector name to record
1 parent 702f013 commit 3e659c9

File tree

7 files changed

+174
-152
lines changed

7 files changed

+174
-152
lines changed

.idea/.gitignore

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

main.go

+73-73
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,73 @@
1-
package main
2-
3-
import (
4-
"encoding/csv"
5-
"fmt"
6-
"io"
7-
"log"
8-
"os"
9-
"strconv"
10-
)
11-
12-
func main() {
13-
dataset := readCsvIntoSlice("dataset.csv")
14-
questions := readCsvIntoSlice("questions.csv")
15-
16-
rankings := make([][]float64, len(questions))
17-
for i, question := range questions {
18-
temp := make([]float64, len(dataset))
19-
for j, sentence := range dataset {
20-
score := question.cosine_similarity(sentence)
21-
temp[j] = score
22-
}
23-
rankings[i] = temp
24-
}
25-
fmt.Println(rankings)
26-
}
27-
28-
func readCsvIntoSlice(name string) []Vector {
29-
file, err := os.Open(name)
30-
if err != nil {
31-
log.Fatal("Cannot read the file", err)
32-
}
33-
34-
r := csv.NewReader(file)
35-
embeddings := []Vector{}
36-
37-
for {
38-
record, err := r.Read()
39-
if err == io.EOF {
40-
break
41-
}
42-
43-
tempEmbedding := convertStringListToFloats(record)
44-
45-
if err != nil {
46-
log.Fatal("Error reading file", err)
47-
}
48-
49-
embeddings = append(embeddings, tempEmbedding)
50-
}
51-
52-
if err := file.Close(); err != nil {
53-
log.Fatal("Not able to close the file")
54-
}
55-
56-
return embeddings
57-
}
58-
59-
func convertStringListToFloats(record []string) Vector {
60-
tempEmbedding := Vector{
61-
data: make([]float64, len(record)),
62-
}
63-
64-
for i, value := range record {
65-
point, err := strconv.ParseFloat(value, 64)
66-
if err != nil {
67-
log.Fatal(err)
68-
}
69-
70-
tempEmbedding.data[i] = point
71-
}
72-
return tempEmbedding
73-
}
1+
package main
2+
3+
import (
4+
"encoding/csv"
5+
"fmt"
6+
"io"
7+
"log"
8+
"os"
9+
"strconv"
10+
)
11+
12+
func main() {
13+
dataset := readCsvIntoSlice("dataset.csv")
14+
questions := readCsvIntoSlice("questions.csv")
15+
16+
rankings := make([][]float64, len(questions))
17+
for i, question := range questions {
18+
temp := make([]float64, len(dataset))
19+
for j, sentence := range dataset {
20+
score := question.cosine_similarity(sentence)
21+
temp[j] = score
22+
}
23+
rankings[i] = temp
24+
}
25+
fmt.Println(rankings)
26+
}
27+
28+
func readCsvIntoSlice(name string) []Record {
29+
file, err := os.Open(name)
30+
if err != nil {
31+
log.Fatal("Cannot read the file", err)
32+
}
33+
34+
r := csv.NewReader(file)
35+
embeddings := []Record{}
36+
37+
for {
38+
record, err := r.Read()
39+
if err == io.EOF {
40+
break
41+
}
42+
43+
tempEmbedding := convertStringListToFloats(record)
44+
45+
if err != nil {
46+
log.Fatal("Error reading file", err)
47+
}
48+
49+
embeddings = append(embeddings, tempEmbedding)
50+
}
51+
52+
if err := file.Close(); err != nil {
53+
log.Fatal("Not able to close the file")
54+
}
55+
56+
return embeddings
57+
}
58+
59+
func convertStringListToFloats(record []string) Record {
60+
tempEmbedding := Record{
61+
index: make([]float64, len(record)),
62+
}
63+
64+
for i, value := range record {
65+
point, err := strconv.ParseFloat(value, 64)
66+
if err != nil {
67+
log.Fatal(err)
68+
}
69+
70+
tempEmbedding.index[i] = point
71+
}
72+
return tempEmbedding
73+
}

readme.md

+20-6
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,26 @@
22

33
Database for storing any querying the embeddings and metadata alongside it.
44

5-
65
## Development Log (26 Aug 2023)
76

87
**Requirements**
9-
- Querying
10-
- Search on vector embeddings
11-
- Cosine similarity
12-
- Storing
13-
- Representing embeddings
8+
9+
Indexing
10+
11+
- Flat Index
12+
- No modification to vectors store directly as series of floats
13+
- How to build and index for embeddings
14+
- Random Projection
15+
- Using matrix of MxM size to convert N size vector to M size
16+
- HNSW (Hierarchical Navigable Small World)- Querying
17+
18+
Querying
19+
20+
- Cosine similarity
21+
- Formula: dot(v, w) / (v.norm() \* w.norm())
22+
- dot v w: sum(vi \* wi)
23+
- norm v: sqrt(sum(vi \* vi))
24+
25+
Storing
26+
27+
- How to store vector format

record.go

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package main
2+
3+
import "math"
4+
5+
type Record struct {
6+
index []float64
7+
normal float64
8+
}
9+
10+
func (r Record) cosine_similarity(w Record) float64 {
11+
return r.dot(w) / (r.norm() * w.norm())
12+
}
13+
14+
func (r Record) dot(w Record) float64 {
15+
sum := 0.0
16+
for i := 0; i < len(r.index); i++ {
17+
sum += r.index[i] * w.index[i]
18+
}
19+
return sum
20+
}
21+
22+
func (r Record) norm() float64 {
23+
sum := 0.0
24+
for _, elem := range r.index {
25+
sum += elem * elem
26+
}
27+
28+
// TODO: Need to check how much perf gain is to return cached version
29+
return math.Sqrt(sum)
30+
}

record_test.go

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package main
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestDotProductFor2Vectors(t *testing.T) {
8+
v := Record{
9+
index: []float64{1, 2, 3},
10+
}
11+
w := Record{
12+
index: []float64{4, 5, 6},
13+
}
14+
15+
dotProduct := v.dot(w)
16+
17+
if dotProduct != 32 {
18+
t.Fatal("Dot product of", v.index, "and", w.index, "should be 3")
19+
}
20+
}
21+
22+
func TestEmptyVectors(t *testing.T) {
23+
v := Record{}
24+
w := Record{}
25+
26+
dotProduct := v.dot(w)
27+
28+
if dotProduct != 0 {
29+
t.Fatal("Dot product of", v.index, "and", w.index, "should be 3")
30+
}
31+
}
32+
33+
func TestNormAVector(t *testing.T) {
34+
v := Record{
35+
index: []float64{1, 2, 3},
36+
}
37+
38+
normal := v.norm()
39+
40+
if normal != 3.7416573867739413 {
41+
t.Fatal("Norm of vector", v.index, "should be", 3.7416573867739413, "but it is", normal)
42+
}
43+
}

vector.go

-30
This file was deleted.

vector_test.go

-43
This file was deleted.

0 commit comments

Comments
 (0)