Skip to content

Commit

Permalink
Merge pull request #407 from Maxxen/dev
Browse files Browse the repository at this point in the history
Serialize r-tree index scan and plan
  • Loading branch information
Maxxen authored Sep 20, 2024
2 parents bbbbb81 + 4b85d75 commit bb9c829
Show file tree
Hide file tree
Showing 14 changed files with 144 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once
#include "duckdb/planner/operator/logical_extension_operator.hpp"
#include "spatial/common.hpp"
#include "duckdb/parser/parsed_data/create_index_info.hpp"

#include "spatial/common.hpp"
namespace spatial {

namespace core {
Expand All @@ -22,16 +23,61 @@ class LogicalCreateRTreeIndex final : public LogicalExtensionOperator {
TableCatalogEntry &table_p);
void ResolveTypes() override;
void ResolveColumnBindings(ColumnBindingResolver &res, vector<ColumnBinding> &bindings) override;
string GetExtensionName() const override;

// Actually create and plan the index creation
unique_ptr<PhysicalOperator> CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) override;

void Serialize(Serializer &writer) const override {
throw InternalException("Cannot serialize RTree Create index");
LogicalExtensionOperator::Serialize(writer);
writer.WritePropertyWithDefault(300, "operator_type", string("logical_rtree_create_index"));
writer.WritePropertyWithDefault<unique_ptr<CreateIndexInfo>>(400, "info", info);
writer.WritePropertyWithDefault<vector<unique_ptr<Expression>>>(401, "unbound_expressions", unbound_expressions);
}

string GetExtensionName() const override {
return "duckdb_spatial";
}
};

class LogicalCreateRTreeIndexOperatorExtension final : public OperatorExtension {
public:
LogicalCreateRTreeIndexOperatorExtension() {
Bind = [](ClientContext &, Binder &, OperatorExtensionInfo *, SQLStatement &) -> BoundStatement {
// For some reason all operator extensions require this callback to be implemented
// even though it is useless for us as we construct this operator through the optimizer instead.
BoundStatement result;
result.plan = nullptr;
return result;
};
}

std::string GetName() override {
return "duckdb_spatial";
}
unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &reader) override {
const auto operator_type = reader.ReadPropertyWithDefault<string>(300, "operator_type");
// We only have one extension operator type right now
if (operator_type != "logical_rtree_create_index") {
throw SerializationException("This version of the spatial extension does not support operator type '%s!", operator_type);
}
auto create_info = reader.ReadPropertyWithDefault<unique_ptr<CreateInfo>>(400, "info");
auto unbound_expressions = reader.ReadPropertyWithDefault<vector<unique_ptr<Expression>>>(401, "unbound_expressions");

auto info = unique_ptr_cast<CreateInfo, CreateIndexInfo>(std::move(create_info));

// We also need to rebind the table
auto &context = reader.Get<ClientContext &>();
const auto &catalog = info->catalog;
const auto &schema = info->schema;
const auto &table_name = info->table;
auto &table_entry = Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table_name);

// Return the new operator
return make_uniq<LogicalCreateRTreeIndex>(std::move(info), std::move(unbound_expressions), table_entry);
}
};


} // namespace core

} // namespace spatial
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ void LogicalCreateRTreeIndex::ResolveColumnBindings(ColumnBindingResolver &res,
[&](unique_ptr<Expression> *child) { res.VisitExpression(child); });
}

string LogicalCreateRTreeIndex::GetExtensionName() const {
return "rtree_create_index";
}

static unique_ptr<PhysicalOperator> CreateNullFilter(const LogicalCreateRTreeIndex &op,
const vector<LogicalType> &types, ClientContext &context) {
vector<unique_ptr<Expression>> filter_select_list;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class RTreeIndexInsertionRewriter : public OptimizerExtension {
void RTreeModule::RegisterIndexPlanCreate(DatabaseInstance &db) {
// Register the optimizer extension
db.config.optimizer_extensions.push_back(RTreeIndexInsertionRewriter());

db.config.operator_extensions.push_back(make_uniq<LogicalCreateRTreeIndexOperatorExtension>());
}

} // namespace core
Expand Down
64 changes: 64 additions & 0 deletions spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,68 @@ static string RTreeIndexScanToString(const FunctionData *bind_data_p) {
return bind_data.table.name + " (RTREE INDEX SCAN : " + bind_data.index.GetIndexName() + ")";
}

//-------------------------------------------------------------------------
// De/Serialize
//-------------------------------------------------------------------------
static void RTreeScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
const TableFunction &function) {
auto &bind_data = bind_data_p->Cast<RTreeIndexScanBindData>();
serializer.WriteProperty(100, "catalog", bind_data.table.schema.catalog.GetName());
serializer.WriteProperty(101, "schema", bind_data.table.schema.name);
serializer.WriteProperty(102, "table", bind_data.table.name);
serializer.WriteProperty(103, "index_name", bind_data.index.GetIndexName());

serializer.WriteObject(104, "bbox", [&](Serializer &ser){
ser.WriteProperty<float>(10, "min_x", bind_data.bbox.min.x);
ser.WriteProperty<float>(11, "min_y", bind_data.bbox.min.y);
ser.WriteProperty<float>(20, "max_x", bind_data.bbox.max.x);
ser.WriteProperty<float>(21, "max_y", bind_data.bbox.max.y);
});
}

static unique_ptr<FunctionData> RTreeScanDeserialize(Deserializer &deserializer, TableFunction &function) {
auto &context = deserializer.Get<ClientContext &>();

const auto catalog = deserializer.ReadProperty<string>(100, "catalog");
const auto schema = deserializer.ReadProperty<string>(101, "schema");
const auto table = deserializer.ReadProperty<string>(102, "table");
auto &catalog_entry =
Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table);
if (catalog_entry.type != CatalogType::TABLE_ENTRY) {
throw SerializationException("Cant find table for %s.%s", schema, table);
}

// Now also lookup the index by name
const auto index_name = deserializer.ReadProperty<string>(103, "index_name");
RTreeBounds bbox;
deserializer.ReadObject(104, "bbox", [&](Deserializer &ser){
bbox.min.x = ser.ReadProperty<float>(10, "min_x");
bbox.min.y = ser.ReadProperty<float>(11, "min_y");
bbox.max.x = ser.ReadProperty<float>(20, "max_x");
bbox.max.y = ser.ReadProperty<float>(21, "max_y");
});

auto &duck_table = catalog_entry.Cast<DuckTableEntry>();
auto &table_info = *catalog_entry.GetStorage().GetDataTableInfo();

unique_ptr<RTreeIndexScanBindData> result = nullptr;

table_info.GetIndexes().BindAndScan<RTreeIndex>(context, table_info, [&](RTreeIndex &index_entry) {
if (index_entry.GetIndexName() == index_name) {
result = make_uniq<RTreeIndexScanBindData>(duck_table, index_entry, bbox);
return true;
}
return false;
});

if(!result) {
throw SerializationException("Could not find index %s on table %s.%s", index_name, schema, table);
}
return std::move(result);
}



//-------------------------------------------------------------------------
// Get Function
//-------------------------------------------------------------------------
Expand All @@ -150,6 +212,8 @@ TableFunction RTreeIndexScanFunction::GetFunction() {
func.projection_pushdown = true;
func.filter_pushdown = false;
func.get_bind_info = RTreeIndexScanBindInfo;
func.serialize = RTreeScanSerialize;
func.deserialize = RTreeScanDeserialize;

return func;
}
Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_basic.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (geom GEOMETRY);

Expand Down
File renamed without changes.
9 changes: 6 additions & 3 deletions test/sql/index/rtree_basic_points.test
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 1_000_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 100_00, 1337);

query I
SELECT count(*) FROM t1 WHERE ST_Within(geom, ST_MakeEnvelope(450, 450, 650, 650));
----
352
390

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom);
Expand All @@ -20,4 +23,4 @@ physical_plan <REGEX>:.*RTREE_INDEX_SCAN.*
query I
SELECT count(*) FROM t1 WHERE ST_Within(geom, ST_MakeEnvelope(450, 450, 650, 650));
----
352
390
3 changes: 3 additions & 0 deletions test/sql/index/rtree_block_reclaim.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ require spatial

load __TEST_DIR__/rtree_reclaim_space.db

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE tbl AS SELECT row_number() over () as i, geom::GEOMETRY as geom FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337) as pts(geom);

Expand Down
6 changes: 3 additions & 3 deletions test/sql/index/rtree_crud.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ require spatial

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 10000, 1337);

statement ok
INSERT INTO t1 (geom) VALUES ('POINT(1 1)');
Expand All @@ -23,12 +23,12 @@ statement ok
DROP INDEX my_idx;

statement ok
INSERT INTO t1 (geom) SELECT * FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 1000, 1337);
INSERT INTO t1 (geom) SELECT * FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 1000, 1337);

query I
SELECT count(*) FROM t1;
----
101000
11000

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom);
Expand Down
4 changes: 2 additions & 2 deletions test/sql/index/rtree_crud_noreinsert.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ require spatial

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 10000, 1337);

query I
SELECT count(*) FROM t1;
----
100000
10000

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom) WITH (min_node_capacity = 0);
Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_empty.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1(i INT, g GEOMETRY);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_projection.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (id int, geom GEOMETRY);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_pushdown.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (geom GEOMETRY, id INT);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_single.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1(i INT, g GEOMETRY);

Expand Down

0 comments on commit bb9c829

Please sign in to comment.