Skip to content

Commit

Permalink
Merge pull request #254 from taniabogatsch/caching
Browse files Browse the repository at this point in the history
Cache the validity mask, data pointers, and the chunk size
  • Loading branch information
taniabogatsch authored Jul 25, 2024
2 parents 0be6217 + dcade5f commit 2da1412
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 28 deletions.
2 changes: 1 addition & 1 deletion appender.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func (a *Appender) AppendRow(args ...driver.Value) error {

func (a *Appender) addDataChunk() error {
var chunk DataChunk
if err := chunk.initFromTypes(a.ptr, a.types); err != nil {
if err := chunk.initFromTypes(a.ptr, a.types, true); err != nil {
return err
}
a.chunks = append(a.chunks, chunk)
Expand Down
21 changes: 12 additions & 9 deletions data_chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ type DataChunk struct {
columns []vector
// columnNames holds the column names, if known.
columnNames []string
// size caches the size after initialization.
size int
}

// GetDataChunkCapacity returns the capacity of a data chunk.
Expand All @@ -27,7 +29,8 @@ func GetDataChunkCapacity() int {

// GetSize returns the internal size of the data chunk.
func (chunk *DataChunk) GetSize() int {
return int(C.duckdb_data_chunk_get_size(chunk.data))
chunk.size = int(C.duckdb_data_chunk_get_size(chunk.data))
return chunk.size
}

// SetSize sets the internal size of the data chunk. Cannot exceed GetCapacity().
Expand Down Expand Up @@ -71,7 +74,7 @@ func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error {
return nil
}

func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type) error {
func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type, writable bool) error {
// NOTE: initFromTypes does not initialize the column names.
columnCount := len(types)

Expand All @@ -93,14 +96,13 @@ func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logic

// Initialize the vectors and their child vectors.
for i := 0; i < columnCount; i++ {
duckdbVector := C.duckdb_data_chunk_get_vector(chunk.data, C.idx_t(i))
chunk.columns[i].duckdbVector = duckdbVector
chunk.columns[i].getChildVectors(duckdbVector)
v := C.duckdb_data_chunk_get_vector(chunk.data, C.idx_t(i))
chunk.columns[i].initVectors(v, writable)
}
return nil
}

func (chunk *DataChunk) initFromDuckDataChunk(data C.duckdb_data_chunk) error {
func (chunk *DataChunk) initFromDuckDataChunk(data C.duckdb_data_chunk, writable bool) error {
columnCount := int(C.duckdb_data_chunk_get_column_count(data))
chunk.columns = make([]vector, columnCount)
chunk.data = data
Expand All @@ -117,10 +119,11 @@ func (chunk *DataChunk) initFromDuckDataChunk(data C.duckdb_data_chunk) error {
break
}

// Initialize the vectors and their child vectors.
chunk.columns[i].duckdbVector = duckdbVector
chunk.columns[i].getChildVectors(duckdbVector)
// Initialize the vector and its child vectors.
chunk.columns[i].initVectors(duckdbVector, writable)
}

chunk.GetSize()
return err
}

Expand Down
1 change: 1 addition & 0 deletions errors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ type wrappedDuckDBError struct {
func (w *wrappedDuckDBError) Error() string {
return w.e.Error()
}

func (w *wrappedDuckDBError) Unwrap() error {
return w.e
}
Expand Down
4 changes: 2 additions & 2 deletions rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ func (r *rows) Columns() []string {
}

func (r *rows) Next(dst []driver.Value) error {
for r.rowCount == r.chunk.GetSize() {
for r.rowCount == r.chunk.size {
r.chunk.close()
if r.chunkIdx == r.chunkCount {
return io.EOF
}
data := C.duckdb_result_get_chunk(r.res, r.chunkIdx)
if err := r.chunk.initFromDuckDataChunk(data); err != nil {
if err := r.chunk.initFromDuckDataChunk(data, false); err != nil {
return getError(err, nil)
}

Expand Down
26 changes: 19 additions & 7 deletions vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ import (
type vector struct {
// The underlying DuckDB vector.
duckdbVector C.duckdb_vector
// The underlying data ptr.
ptr unsafe.Pointer
// The vector's validity mask.
mask *C.uint64_t
// A callback function to get a value from this vector.
getFn fnGetVectorValue
// A callback function to write to this vector.
Expand Down Expand Up @@ -311,19 +315,27 @@ func (vec *vector) init(logicalType C.duckdb_logical_type, colIdx int) error {
return nil
}

func (vec *vector) getChildVectors(vector C.duckdb_vector) {
func (vec *vector) initVectors(v C.duckdb_vector, writable bool) {
vec.duckdbVector = v
vec.ptr = C.duckdb_vector_get_data(v)
if writable {
C.duckdb_vector_ensure_validity_writable(v)
}
vec.mask = C.duckdb_vector_get_validity(v)
vec.getChildVectors(v, writable)
}

func (vec *vector) getChildVectors(v C.duckdb_vector, writable bool) {
switch vec.duckdbType {

case C.DUCKDB_TYPE_LIST, C.DUCKDB_TYPE_MAP:
child := C.duckdb_list_vector_get_child(vector)
vec.childVectors[0].duckdbVector = child
vec.childVectors[0].getChildVectors(child)
child := C.duckdb_list_vector_get_child(v)
vec.childVectors[0].initVectors(child, writable)

case C.DUCKDB_TYPE_STRUCT:
for i := 0; i < len(vec.childVectors); i++ {
child := C.duckdb_struct_vector_get_child(vector, C.idx_t(i))
vec.childVectors[i].duckdbVector = child
vec.childVectors[i].getChildVectors(child)
child := C.duckdb_struct_vector_get_child(v, C.idx_t(i))
vec.childVectors[i].initVectors(child, writable)
}
}
}
Expand Down
15 changes: 11 additions & 4 deletions vector_getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,20 @@ import (
type fnGetVectorValue func(vec *vector, rowIdx C.idx_t) any

func (vec *vector) getNull(rowIdx C.idx_t) bool {
mask := C.duckdb_vector_get_validity(vec.duckdbVector)
return !bool(C.duckdb_validity_row_is_valid(mask, rowIdx))
mask := unsafe.Pointer(vec.mask)
if mask == nil {
return false
}

entryIdx := rowIdx / 64
idxInEntry := rowIdx % 64
maskPtr := (*[1 << 31]C.uint64_t)(mask)
isValid := maskPtr[entryIdx] & (C.uint64_t(1) << idxInEntry)
return uint64(isValid) == 0
}

func getPrimitive[T any](vec *vector, rowIdx C.idx_t) T {
ptr := C.duckdb_vector_get_data(vec.duckdbVector)
xs := (*[1 << 31]T)(ptr)
xs := (*[1 << 31]T)(vec.ptr)
return xs[rowIdx]
}

Expand Down
7 changes: 2 additions & 5 deletions vector_setters.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ const secondsPerDay = 24 * 60 * 60
type fnSetVectorValue func(vec *vector, rowIdx C.idx_t, val any)

func (vec *vector) setNull(rowIdx C.idx_t) {
C.duckdb_vector_ensure_validity_writable(vec.duckdbVector)
mask := C.duckdb_vector_get_validity(vec.duckdbVector)
C.duckdb_validity_set_row_invalid(mask, rowIdx)
C.duckdb_validity_set_row_invalid(vec.mask, rowIdx)

if vec.duckdbType == C.DUCKDB_TYPE_STRUCT {
for i := 0; i < len(vec.childVectors); i++ {
Expand All @@ -31,8 +29,7 @@ func (vec *vector) setNull(rowIdx C.idx_t) {
}

func setPrimitive[T any](vec *vector, rowIdx C.idx_t, v T) {
ptr := C.duckdb_vector_get_data(vec.duckdbVector)
xs := (*[1 << 31]T)(ptr)
xs := (*[1 << 31]T)(vec.ptr)
xs[rowIdx] = v
}

Expand Down

0 comments on commit 2da1412

Please sign in to comment.