Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Oscar Franzen committed Mar 15, 2017
0 parents commit 6f66125
Show file tree
Hide file tree
Showing 12 changed files with 1,156 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sift
siftsmall
src/
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# go-hnsw

go-hnsw is a GO implementation of the HNSW approximate nearest-neighbour search algorithm implemented in C++ in https://github.com/searchivarius/nmslib and described in https://arxiv.org/abs/1603.09320
45 changes: 45 additions & 0 deletions bitsetpool/bitsetpool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package bitsetpool

import (
"sync"

"github.com/willf/bitset"
)

type poolItem struct {
b bitset.BitSet
busy bool
}

type BitsetPool struct {
sync.RWMutex
pool []poolItem
}

func New() *BitsetPool {
var bp BitsetPool
bp.pool = make([]poolItem, 0)
return &bp
}

func (bp *BitsetPool) Free(i int) {
bp.Lock()
bp.pool[i].busy = false
bp.Unlock()
}

func (bp *BitsetPool) Get() (int, *bitset.BitSet) {
bp.Lock()
for i := range bp.pool {
if !bp.pool[i].busy {
bp.pool[i].busy = true
bp.pool[i].b.ClearAll()
bp.Unlock()
return i, &bp.pool[i].b
}
}
id := len(bp.pool)
bp.pool = append(bp.pool, poolItem{})
bp.Unlock()
return id, &bp.pool[id].b
}
56 changes: 56 additions & 0 deletions bitsetpool/bitsetpool_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package bitsetpool

import (
"math/rand"
"testing"
"time"

"github.com/willf/bitset"
)

func TestBitset(t *testing.T) {

start2 := time.Now()
for j := 0; j < 100000; j++ {
b2 := make(map[uint32]bool)
for i := 0; i < 100; i++ {
n := rand.Intn(1000000)
b2[uint32(n)] = true
m := rand.Intn(1000000)
if b2[uint32(m)] == false {
}
}
}
stop2 := time.Since(start2)
t.Logf("map done in %v", stop2.Seconds())

start := time.Now()
for j := 0; j < 100000; j++ {
var b1 bitset.BitSet
for i := 0; i < 100; i++ {
n := rand.Intn(1000000)
b1.Set(uint(n))
m := rand.Intn(1000000)
b1.Test(uint(m))
}
}
stop := time.Since(start)
t.Logf("bitset done in %v", stop.Seconds())

start3 := time.Now()
pool := NewBitsetPool()
for j := 0; j < 100000; j++ {
id, b := pool.Get()
for i := 0; i < 100; i++ {
n := rand.Intn(1000000)
b.Set(uint(n))
m := rand.Intn(1000000)
b.Test(uint(m))
}
pool.Free(id)
}
stop3 := time.Since(start3)
t.Logf("bitset pool done in %v", stop3.Seconds())

t.Logf("Performance boost %.2f%%", 100*(1-stop3.Seconds()/stop2.Seconds()))
}
207 changes: 207 additions & 0 deletions distqueue/distqueue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
package distqueue

type Item struct {
ID uint32
D float32
}

type DistQueueClosestFirst struct {
initiated bool
items []*Item
Size int
}

func (pq *DistQueueClosestFirst) Init() *DistQueueClosestFirst {
pq.items = make([]*Item, 1, pq.Size+1)
pq.items[0] = nil // Heap queue first element should always be nil
pq.initiated = true
return pq
}

func (pq *DistQueueClosestFirst) Reset() {
pq.items = pq.items[0:1]
}
func (pq *DistQueueClosestFirst) Items() []*Item {
return pq.items[1:]
}
func (pq *DistQueueClosestFirst) Reserve(n int) {
if n > len(pq.items)-1 {
// reserve memory by setting the slice capacity
items2 := make([]*Item, len(pq.items), n+1)
copy(pq.items, items2)
pq.items = items2
}
}

// Push the value item into the priority queue with provided priority.
func (pq *DistQueueClosestFirst) Push(id uint32, d float32) *Item {
if !pq.initiated {
pq.Init()
}
item := &Item{ID: id, D: d}
pq.items = append(pq.items, item)
pq.swim(len(pq.items) - 1)
return item
}

func (pq *DistQueueClosestFirst) PushItem(item *Item) {
if !pq.initiated {
pq.Init()
}
pq.items = append(pq.items, item)
pq.swim(len(pq.items) - 1)
}

func (pq *DistQueueClosestFirst) Pop() *Item {
if len(pq.items) <= 1 {
return nil
}
var max = pq.items[1]
//pq.items[1], pq.items[len(pq.items)-1] = pq.items[len(pq.items)-1], pq.items[1]
pq.items[1], pq.items[len(pq.items)-1] = pq.items[len(pq.items)-1], pq.items[1]
pq.items = pq.items[0 : len(pq.items)-1]
pq.sink(1)
return max
}

func (pq *DistQueueClosestFirst) Top() (uint32, float32) {
if len(pq.items) <= 1 {
return 0, 0
}
return pq.items[1].ID, pq.items[1].D
}

func (pq *DistQueueClosestFirst) Head() (uint32, float32) {
if len(pq.items) <= 1 {
return 0, 0
}
return pq.items[1].ID, pq.items[1].D
}

func (pq *DistQueueClosestFirst) Len() int {
return len(pq.items) - 1
}

func (pq *DistQueueClosestFirst) Empty() bool {
return len(pq.items) == 1
}

func (pq *DistQueueClosestFirst) swim(k int) {
for k > 1 && (pq.items[k/2].D > pq.items[k].D) {
pq.items[k], pq.items[k/2] = pq.items[k/2], pq.items[k]
k = k / 2
}
}

func (pq *DistQueueClosestFirst) sink(k int) {
for 2*k <= len(pq.items)-1 {
var j = 2 * k
if j < len(pq.items)-1 && (pq.items[j].D > pq.items[j+1].D) {
j++
}
if !(pq.items[k].D > pq.items[j].D) {
break
}
pq.items[k], pq.items[j] = pq.items[j], pq.items[k]
k = j
}
}

type DistQueueClosestLast struct {
initiated bool
items []*Item
Size int
}

func (pq *DistQueueClosestLast) Init() *DistQueueClosestLast {
pq.items = make([]*Item, 1, pq.Size+1)
pq.items[0] = nil // Heap queue first element should always be nil
pq.initiated = true
return pq
}

func (pq *DistQueueClosestLast) Items() []*Item {
return pq.items[1:]
}
func (pq *DistQueueClosestLast) Reserve(n int) {
if n > len(pq.items)-1 {
// reserve memory by setting the slice capacity
items2 := make([]*Item, len(pq.items), n+1)
copy(pq.items, items2)
pq.items = items2
}
}

// Push the value item into the priority queue with provided priority.
func (pq *DistQueueClosestLast) Push(id uint32, d float32) *Item {
if !pq.initiated {
pq.Init()
}
item := &Item{ID: id, D: d}
pq.items = append(pq.items, item)
pq.swim(len(pq.items) - 1)
return item
}

func (pq *DistQueueClosestLast) PushItem(item *Item) {
if !pq.initiated {
pq.Init()
}
pq.items = append(pq.items, item)
pq.swim(len(pq.items) - 1)
}

func (pq *DistQueueClosestLast) Pop() *Item {
if len(pq.items) <= 1 {
return nil
}
var max = pq.items[1]
pq.items[1], pq.items[len(pq.items)-1] = pq.items[len(pq.items)-1], pq.items[1]
pq.items = pq.items[0 : len(pq.items)-1]
pq.sink(1)
return max
}

func (pq *DistQueueClosestLast) Top() (uint32, float32) {
if len(pq.items) <= 1 {
return 0, 0
}
return pq.items[1].ID, pq.items[1].D
}

func (pq *DistQueueClosestLast) Head() (uint32, float32) {
if len(pq.items) <= 1 {
return 0, 0
}
return pq.items[1].ID, pq.items[1].D
}

func (pq *DistQueueClosestLast) Len() int {
return len(pq.items) - 1
}

func (pq *DistQueueClosestLast) Empty() bool {
return len(pq.items) == 1
}

func (pq *DistQueueClosestLast) swim(k int) {
for k > 1 && (pq.items[k/2].D < pq.items[k].D) {
pq.items[k], pq.items[k/2] = pq.items[k/2], pq.items[k]
//pq.exch(k/2, k)
k = k / 2
}
}

func (pq *DistQueueClosestLast) sink(k int) {
for 2*k <= len(pq.items)-1 {
var j = 2 * k
if j < len(pq.items)-1 && (pq.items[j].D < pq.items[j+1].D) {
j++
}
if !(pq.items[k].D < pq.items[j].D) {
break
}
pq.items[k], pq.items[j] = pq.items[j], pq.items[k]
k = j
}
}
68 changes: 68 additions & 0 deletions distqueue/distqueue_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package distqueue

import (
"math/rand"
"testing"
)

func TestQueue(t *testing.T) {

pq := &DistQueueClosestFirst{}

for i := 0; i < 10; i++ {
pq.Push(rand.Uint32(), float32(rand.Float64()))
}

t.Log("Closest first, pop")
ID, D := pq.Top()
t.Logf("TOP before first top: %v %v", ID, D)
var l float32 = 0.0
for pq.Len() > 0 {
item := pq.Pop()
if item.D < l {
t.Error("Incorrect order")
}
l = item.D
t.Logf("%+v", item)
}

pq2 := &DistQueueClosestLast{}
l = 1.0
pq2.Init()
pq2.Reserve(200) // try reserve
for i := 0; i < 10; i++ {
pq2.Push(rand.Uint32(), float32(rand.Float64()))
}
t.Log("Closest last, pop")
for !pq2.Empty() {
item := pq2.Pop()
if item.D > l {
t.Error("Incorrect order")
}
l = item.D
t.Logf("%+v", item)
}
}

func TestKBest(t *testing.T) {

pq := &DistQueueClosestFirst{}
pq.Reserve(5) // reserve less than needed
for i := 0; i < 20; i++ {
pq.Push(rand.Uint32(), rand.Float32())
}

// return K best matches, ordered as best first
t.Log("closest last, still return K best")
K := 10
for pq.Len() > K {
pq.Pop()
}
res := make([]*Item, K)
for i := K - 1; i >= 0; i-- {
res[i] = pq.Pop()
}
for i := 0; i < len(res); i++ {
t.Logf("%+v", res[i])
}
}
4 changes: 4 additions & 0 deletions env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

export GOPATH=`pwd`
export GOBIN=$GOPATH/bin
5 changes: 5 additions & 0 deletions f32/f32_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//+build !noasm,!appengine

package f32

func L2Squared(x, y []float32) float32
Loading

0 comments on commit 6f66125

Please sign in to comment.