Skip to content

Commit

Permalink
Address PR review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
animesh2049 committed Jun 18, 2019
1 parent b1ea9d4 commit 3100387
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 326 deletions.
29 changes: 0 additions & 29 deletions algo/algo_test.go

This file was deleted.

44 changes: 22 additions & 22 deletions codec/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,15 @@ func (e *Encoder) packBlock() {
if len(e.uids) == 0 {
return
}
block := &pb.UidBlock{Base: e.uids[0], UidNum: uint32(len(e.uids))}
block := &pb.UidBlock{Base: e.uids[0], NumUids: uint32(len(e.uids))}
last := e.uids[0]
e.uids = e.uids[1:]

var out bytes.Buffer
buf := make([]byte, 17)
tmpUids := make([]uint32, 4)
for {
i := 0
for ; i < 4; i++ {
for i := 0; i < 4; i++ {
if i >= len(e.uids) {
// Padding with '0' because Encode4 encodes only in batch of 4.
tmpUids[i] = 0
Expand Down Expand Up @@ -126,28 +125,29 @@ func (d *Decoder) unpackBlock() []uint64 {
last := block.Base
d.uids = append(d.uids, last)

var tmpUids [4]uint32
deltas := block.Deltas

// Decoding always expects the encoded byte array to be of
// length >= 4. Padding doesn't affect the decoded values.
deltas = append(deltas, 0, 0, 0)

// Read back the encoded varints.
// Due to padding of 3 '0's, it might be the case that we don't
// completely consume the byte array. We are encoding uids in group
// of 4, that requires atleast 5 bytes. So if we are left with deltas
// of length < 5, those are probably the padded 0s.
for len(deltas) >= 5 {
groupvarint.Decode4(tmpUids[:], deltas)
deltas = deltas[groupvarint.BytesUsed[deltas[0]]:]
tmpUids := make([]uint32, 4)
var sum uint64
encData := block.Deltas

for uint32(len(d.uids)) < block.NumUids {

if len(encData) < 17 {
// Decode4 decodes 4 uids from encData. It moves slice(encData) forward while
// decoding and expects it to be of length >= 4 at all the stages. Padding
// with zero to make sure lenght is always >= 4.
encData = append(encData, 0, 0, 0)
}

groupvarint.Decode4(tmpUids[:], encData)
encData = encData[groupvarint.BytesUsed[encData[0]]:]
for i := 0; i < 4; i++ {
d.uids = append(d.uids, uint64(tmpUids[i])+last)
last += uint64(tmpUids[i])
sum = last + uint64(tmpUids[i])
d.uids = append(d.uids, sum)
last = sum
}
}

d.uids = d.uids[:block.UidNum]
d.uids = d.uids[:block.NumUids]
return d.uids
}

Expand Down Expand Up @@ -297,7 +297,7 @@ func ExactLen(pack *pb.UidPack) int {
}
num := 0
for _, b := range pack.Blocks {
num += int(b.UidNum) // UidNum includes the base UID.
num += int(b.NumUids) // NumUids includes the base UID.
}
return num
}
Expand Down
46 changes: 10 additions & 36 deletions codec/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"bytes"
"compress/gzip"
"encoding/binary"
"fmt"
"math"
"math/rand"
"sort"
Expand Down Expand Up @@ -109,21 +108,10 @@ func TestSeek(t *testing.T) {
}

dec.blockIdx = 0
var found bool
for i := 200; i < 10000; i += 100 {
found = false
for i := 100; i < 10000; i += 100 {
uids := dec.LinearSeek(uint64(i))

for _, uid := range uids {
if uid == uint64(i) {
found = true
break
}
}

if !found {
t.Errorf("Couldn't find %d using LinearSeek\n", i)
}
require.Contains(t, uids, uint64(i))
}
}

Expand Down Expand Up @@ -267,16 +255,6 @@ func benchmarkUidPackDecode(b *testing.B, blockSize int) {
}
}

type UInts []uint64

func (u UInts) Len() int { return len(u) }

func (u UInts) Less(i, j int) bool { return u[i] < u[j] }

func (u UInts) Swap(i, j int) {
u[i], u[j] = u[j], u[i]
}

func TestEncoding(t *testing.T) {
bigInts := make([]uint64, 5)
bigInts[0] = 0xf000000000000000
Expand All @@ -286,28 +264,24 @@ func TestEncoding(t *testing.T) {
bigInts[4] = 0x0f0f0f0f00000000

rand.Seed(time.Now().UnixNano())
var testNums = []int{0, 1, 2, 3, 5, 13, 18, 100, 99, 98}
var lengths = []int{0, 1, 2, 3, 5, 13, 18, 100, 99, 98}

for tc := 0; tc < len(testNums); tc++ {
ints := make([]uint64, testNums[tc])
for i := 0; i < 50 && i < testNums[tc]; i++ {
for tc := 0; tc < len(lengths); tc++ {
ints := make([]uint64, lengths[tc])

for i := 0; i < 50 && i < lengths[tc]; i++ {
ints[i] = uint64(rand.Uint32())
}

for i := 50; i < testNums[tc]; i++ {
for i := 50; i < lengths[tc]; i++ {
ints[i] = uint64(rand.Uint32()) + bigInts[rand.Intn(5)]
}

sort.Sort(UInts(ints))
sort.Slice(ints, func(i, j int) bool { return ints[i] < ints[j] })

encodedInts := Encode(ints, 256)
decodedInts := Decode(encodedInts, 0)

for i := 0; i < testNums[tc]; i++ {
if ints[i] != decodedInts[i] {
fmt.Println(ints[i], decodedInts[i])
t.Errorf("Expected: %d Actual: %d", ints[i], decodedInts[i])
}
}
require.Equal(t, ints, decodedInts)
}
}
6 changes: 4 additions & 2 deletions codec/decoderversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

package codec

import "fmt"
import (
"github.com/golang/glog"
)

func init() {
fmt.Println("[Decoder]: Using assembly version of decoder")
glog.Infof("[Decoder]: Using assembly version of decoder")
}
4 changes: 2 additions & 2 deletions protos/pb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,11 @@ message UidBlock {
// because when the PB is brought to memory, Go would always use 8-bytes per integer. Instead,
// storing it as a byte slice is a lot cheaper in memory.
bytes deltas = 2;
// uidNum is the number of UIDs in the block. We are including this because we want to
// num_uids is the number of UIDs in the block. We are including this because we want to
// swtich encoding to groupvarint encoding. Current avaialble open source version implements
// encoding and decoding for uint32. We want to wrap it around our logic to use it here.
// Default Blocksize is 256 so uint32 would be sufficient.
uint32 uidNum = 3;
uint32 num_uids = 3;
}

message UidPack {
Expand Down
Loading

0 comments on commit 3100387

Please sign in to comment.