Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate some allocations in ReadFrom/WriteTo #124

Merged
merged 1 commit into from
Apr 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 17 additions & 21 deletions bitset.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ which provides a (less set-theoretical) view of bitsets.
package bitset

import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
Expand All @@ -51,6 +50,9 @@ import (
// the wordSize of a bit set
const wordSize = uint(64)

// the wordSize of a bit set in bytes
const wordBytes = wordSize / 8

// log2WordSize is lg(wordSize)
const log2WordSize = uint(6)

Expand Down Expand Up @@ -903,38 +905,33 @@ func (b *BitSet) DumpAsBits() string {

// BinaryStorageSize returns the binary storage requirements (see WriteTo) in bytes.
func (b *BitSet) BinaryStorageSize() int {
nWords := b.wordCount()
return binary.Size(uint64(0)) + binary.Size(b.set[:nWords])
return int(wordBytes + wordBytes*uint(b.wordCount()))
}

// WriteTo writes a BitSet to a stream. The format is:
// 1. uint64 length
// 2. []uint64 set
// Upon success, the number of bytes written is returned.
func (b *BitSet) WriteTo(stream io.Writer) (int64, error) {
buf := make([]byte, wordBytes)
length := uint64(b.length)

// Write length
err := binary.Write(stream, binaryOrder, length)
binaryOrder.PutUint64(buf, length)
n, err := stream.Write(buf)
if err != nil {
return 0, err
return int64(n), err
}

// Write set
// current implementation of bufio.Writer is more memory efficient than
// binary.Write for large set
writer := bufio.NewWriter(stream)
var item = make([]byte, binary.Size(uint64(0))) // for serializing one uint64
nWords := b.wordCount()
for i := range b.set[:nWords] {
binaryOrder.PutUint64(item, b.set[i])
if nn, err := writer.Write(item); err != nil {
return int64(i*binary.Size(uint64(0)) + nn), err
binaryOrder.PutUint64(buf, b.set[i])
if nn, err := stream.Write(buf); err != nil {
return int64(i*int(wordBytes) + nn + n), err
}
}

err = writer.Flush()
return int64(b.BinaryStorageSize()), err
return int64(b.BinaryStorageSize()), nil
}

// ReadFrom reads a BitSet from a stream written using WriteTo
Expand All @@ -947,10 +944,11 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) {
// left unchanged or made empty if the error occurs too late
// to preserve the content.
func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {
var length uint64
buf := make([]byte, wordBytes)

// Read length first
err := binary.Read(stream, binaryOrder, &length)
_, err := io.ReadFull(stream, buf[:])
length := binaryOrder.Uint64(buf)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
Expand All @@ -971,10 +969,8 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {

b.length = newlength

var item [8]byte
reader := bufio.NewReader(io.LimitReader(stream, 8*int64(nWords)))
for i := 0; i < nWords; i++ {
if _, err := io.ReadFull(reader, item[:]); err != nil {
if _, err := io.ReadFull(stream, buf); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
Expand All @@ -984,7 +980,7 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {
b.length = 0
return 0, err
}
b.set[i] = binaryOrder.Uint64(item[:])
b.set[i] = binaryOrder.Uint64(buf)
}

return int64(b.BinaryStorageSize()), nil
Expand Down
16 changes: 16 additions & 0 deletions bitset_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package bitset

import (
"bytes"
"math/rand"
"testing"
)
Expand Down Expand Up @@ -445,3 +446,18 @@ func BenchmarkFlorianUekermannMidStrongDensityIterateManyComp(b *testing.B) {
return
}
}

func BenchmarkBitsetReadWrite(b *testing.B) {
s := New(100000)
for i := 0; i < 100000; i += 100 {
s.Set(uint(i))
}
buffer := bytes.Buffer{}
temp := New(100000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
s.WriteTo(&buffer)
temp.ReadFrom(&buffer)
buffer.Reset()
}
}