diff --git a/pom.xml b/pom.xml index 880d2928cc193..3cf2559fea004 100644 --- a/pom.xml +++ b/pom.xml @@ -229,12 +229,14 @@ presto-hana presto-openapi presto-native-sidecar-plugin + presto-common-arrow presto-base-arrow-flight presto-function-server presto-router-example-plugin-scheduler presto-plan-checker-router-plugin presto-sql-helpers/presto-sql-invoked-functions-plugin presto-sql-helpers/presto-native-sql-invoked-functions-plugin + presto-lance @@ -1010,6 +1012,12 @@ provided + + com.facebook.presto + presto-common-arrow + ${project.version} + + com.facebook.presto presto-base-arrow-flight diff --git a/presto-base-arrow-flight/pom.xml b/presto-base-arrow-flight/pom.xml index 004f690c12ffb..2bec314c35953 100644 --- a/presto-base-arrow-flight/pom.xml +++ b/presto-base-arrow-flight/pom.xml @@ -29,6 +29,11 @@ + + com.facebook.presto + presto-common-arrow + + org.apache.arrow arrow-memory-core @@ -250,6 +255,7 @@ com.fasterxml.jackson.core:jackson-databind com.facebook.airlift:log-manager javax.inject:javax.inject + io.airlift:slice diff --git a/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java b/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java index f42941a96387c..b72e2100339c1 100644 --- a/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java +++ b/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java @@ -34,7 +34,7 @@ public TestingArrowBlockBuilder(TypeManager typeManager) } @Override - protected Type getPrestoTypeFromArrowField(Field field) + public Type getPrestoTypeFromArrowField(Field field) { String columnLength = field.getMetadata().get("columnLength"); int length = columnLength != null ? Integer.parseInt(columnLength) : 0; diff --git a/presto-common-arrow/pom.xml b/presto-common-arrow/pom.xml new file mode 100644 index 0000000000000..c2507c9f136d4 --- /dev/null +++ b/presto-common-arrow/pom.xml @@ -0,0 +1,87 @@ + + + 4.0.0 + + + com.facebook.presto + presto-root + 0.297-SNAPSHOT + + + presto-common-arrow + presto-common-arrow + Presto - Common Arrow Utilities + + + ${project.parent.basedir} + + + + + org.apache.arrow + arrow-vector + + + org.slf4j + slf4j-api + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + + + + com.facebook.presto + presto-spi + + + + com.facebook.presto + presto-common + + + + io.airlift + slice + + + + com.google.guava + guava + + + + jakarta.inject + jakarta.inject-api + + + + + + + org.basepom.maven + duplicate-finder-maven-plugin + 1.2.1 + + + module-info + META-INF.versions.9.module-info + + + arrow-git.properties + about.html + + + + + + check + + + + + + + diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java similarity index 92% rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java index ed703f1b2444f..a6180a18fe8fb 100644 --- a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java +++ b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java @@ -54,6 +54,7 @@ import org.apache.arrow.vector.TimeMicroVector; import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.TimeSecVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.arrow.vector.TimeStampMilliTZVector; import org.apache.arrow.vector.TimeStampMilliVector; @@ -62,6 +63,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; @@ -112,7 +114,7 @@ public Block buildBlockFromFieldVector(FieldVector vector, Type type, Dictionary return builder.build(); } - protected Type getPrestoTypeFromArrowField(Field field) + public Type getPrestoTypeFromArrowField(Field field) { switch (field.getType().getTypeID()) { case Int: @@ -139,7 +141,8 @@ protected Type getPrestoTypeFromArrowField(Field field) return BooleanType.BOOLEAN; case Time: return TimeType.TIME; - case List: { + case List: + case FixedSizeList: { List children = field.getChildren(); checkArgument(children.size() == 1, "Arrow List expected to have 1 child Field, got: " + children.size()); return new ArrayType(getPrestoTypeFromArrowField(field.getChildren().get(0))); @@ -292,6 +295,9 @@ else if (vector instanceof TimeStampSecVector) { else if (vector instanceof TimeMicroVector) { assignBlockFromTimeMicroVector((TimeMicroVector) vector, type, builder, startIndex, endIndex); } + else if (vector instanceof TimeStampMicroTZVector) { + assignBlockFromTimeStampMicroTZVector((TimeStampMicroTZVector) vector, type, builder, startIndex, endIndex); + } else if (vector instanceof TimeStampMilliTZVector) { assignBlockFromTimeMilliTZVector((TimeStampMilliTZVector) vector, type, builder, startIndex, endIndex); } @@ -299,6 +305,9 @@ else if (vector instanceof MapVector) { // NOTE: MapVector is also instanceof ListVector, so check for Map first assignBlockFromMapVector((MapVector) vector, type, builder, startIndex, endIndex); } + else if (vector instanceof FixedSizeListVector) { + assignBlockFromFixedSizeListVector((FixedSizeListVector) vector, type, builder, startIndex, endIndex); + } else if (vector instanceof ListVector) { assignBlockFromListVector((ListVector) vector, type, builder, startIndex, endIndex); } @@ -666,6 +675,49 @@ public void assignBlockFromListVector(ListVector vector, Type type, BlockBuilder } } + public void assignBlockFromFixedSizeListVector(FixedSizeListVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex) + { + if (!(type instanceof ArrayType)) { + throw new IllegalArgumentException("Type must be an ArrayType for FixedSizeListVector"); + } + + ArrayType arrayType = (ArrayType) type; + Type elementType = arrayType.getElementType(); + int listSize = vector.getListSize(); + + for (int i = startIndex; i < endIndex; i++) { + if (vector.isNull(i)) { + builder.appendNull(); + } + else { + BlockBuilder elementBuilder = builder.beginBlockEntry(); + int elementStart = i * listSize; + int elementEnd = elementStart + listSize; + assignBlockFromValueVector( + vector.getDataVector(), elementType, elementBuilder, elementStart, elementEnd); + builder.closeEntry(); + } + } + } + + public void assignBlockFromTimeStampMicroTZVector(TimeStampMicroTZVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex) + { + if (!(type instanceof TimestampType)) { + throw new IllegalArgumentException("Expected TimestampType but got " + type.getClass().getName()); + } + + for (int i = startIndex; i < endIndex; i++) { + if (vector.isNull(i)) { + builder.appendNull(); + } + else { + long micros = vector.get(i); + long millis = TimeUnit.MICROSECONDS.toMillis(micros); + type.writeLong(builder, millis); + } + } + } + public void assignBlockFromMapVector(MapVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex) { if (!(type instanceof MapType)) { diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java similarity index 100% rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowException.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowException.java similarity index 100% rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowException.java rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowException.java diff --git a/presto-docs/src/main/sphinx/connector.rst b/presto-docs/src/main/sphinx/connector.rst index d337fe4ed12d1..00221c91e86a7 100644 --- a/presto-docs/src/main/sphinx/connector.rst +++ b/presto-docs/src/main/sphinx/connector.rst @@ -27,6 +27,7 @@ from different data sources. connector/kafka connector/kafka-tutorial connector/kudu + connector/lance connector/larksheets connector/localfile connector/memory diff --git a/presto-docs/src/main/sphinx/connector/lance.rst b/presto-docs/src/main/sphinx/connector/lance.rst new file mode 100644 index 0000000000000..266a26a7a3997 --- /dev/null +++ b/presto-docs/src/main/sphinx/connector/lance.rst @@ -0,0 +1,236 @@ +=============== +Lance Connector +=============== + +Overview +-------- + +The Lance connector allows querying and writing data stored in +`Lance `_ format from Presto. Lance is a modern columnar +data format optimized for machine learning workloads and fast random access. + +The connector uses the Lance Java SDK to read and write Lance datasets. +Each Lance dataset is organized into **fragments**, and the connector maps each fragment to a +Presto split for parallel processing across workers. + +Configuration +------------- + +To configure the Lance connector, create a catalog properties file +``etc/catalog/lance.properties`` with the following contents, +replacing the properties as appropriate: + +.. code-block:: none + + connector.name=lance + lance.root-url=/path/to/lance/data + +Configuration Properties +------------------------ + +The following configuration properties are available: + +=============================== ============================================================= =============== +Property Name Description Default +=============================== ============================================================= =============== +``lance.impl`` Namespace implementation: ``dir`` ``dir`` +``lance.root-url`` Root storage path for Lance datasets. ``""`` +``lance.single-level-ns`` When ``true``, uses a single-level namespace with a ``true`` + virtual ``default`` schema. +``lance.read-batch-size`` Number of rows per Arrow batch during reads. ``8192`` +``lance.max-rows-per-file`` Maximum number of rows per Lance data file. ``1000000`` +``lance.max-rows-per-group`` Maximum number of rows per row group. ``100000`` +``lance.write-batch-size`` Number of rows to batch before writing to Arrow. ``10000`` +=============================== ============================================================= =============== + +``lance.impl`` +^^^^^^^^^^^^^^ + +Namespace implementation to use. The default ``dir`` uses a directory-based +table store where each table is a ``.lance`` directory under the root. + +``lance.root-url`` +^^^^^^^^^^^^^^^^^^ + +Root storage path for Lance datasets. All tables are stored as subdirectories +named ``.lance`` under this path. For example, if ``lance.root-url`` +is set to ``/data/lance``, a table named ``my_table`` is stored at +``/data/lance/my_table.lance``. + +``lance.single-level-ns`` +^^^^^^^^^^^^^^^^^^^^^^^^^ + +When set to ``true`` (the default), the connector exposes a single ``default`` +schema that maps directly to the root directory. All tables are accessed as +``lance.default.``. + +``lance.read-batch-size`` +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Controls the number of rows read per Arrow batch from Lance. Larger values may +improve read throughput at the cost of higher memory usage. The default is +``8192``. + +``lance.max-rows-per-file`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Maximum number of rows per Lance data file. The default is ``1000000``. + +.. note:: + + This property is reserved for future use and is not yet wired into the + write path. + +``lance.max-rows-per-group`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Maximum number of rows per row group within a Lance data file. The default is +``100000``. + +.. note:: + + This property is reserved for future use and is not yet wired into the + write path. + +``lance.write-batch-size`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Number of rows to batch before converting to Arrow format during writes. The +default is ``10000``. + +.. note:: + + This property is reserved for future use and is not yet wired into the + write path. + +Data Types +---------- + +The following table lists the supported data type mappings between Lance +(Arrow) types and Presto types: + +================= =============== ====================================== +Lance (Arrow) Presto Notes +================= =============== ====================================== +``Bool`` ``BOOLEAN`` +``Int(8)`` ``TINYINT`` +``Int(16)`` ``SMALLINT`` +``Int(32)`` ``INTEGER`` +``Int(64)`` ``BIGINT`` +``Float(SINGLE)`` ``REAL`` +``Float(DOUBLE)`` ``DOUBLE`` +``Utf8`` ``VARCHAR`` +``LargeUtf8`` ``VARCHAR`` +``Binary`` ``VARBINARY`` +``LargeBinary`` ``VARBINARY`` +``Date(DAY)`` ``DATE`` +``Timestamp`` ``TIMESTAMP`` Microsecond precision; reads support + both with and without timezone +``List`` ``ARRAY`` Read only; element type mapped + recursively +``FixedSizeList`` ``ARRAY`` Read only; element type mapped + recursively +================= =============== ====================================== + +.. note:: + + Arrow types not listed above are unsupported and will cause an error. + +SQL Support +----------- + +The Lance connector supports the following SQL operations. + +CREATE TABLE +^^^^^^^^^^^^ + +Create a new Lance table: + +.. code-block:: sql + + CREATE TABLE lance.default.my_table ( + id BIGINT, + name VARCHAR, + score DOUBLE + ); + +CREATE TABLE AS +^^^^^^^^^^^^^^^ + +Create a Lance table from a query: + +.. code-block:: sql + + CREATE TABLE lance.default.my_table AS + SELECT * FROM tpch.tiny.nation; + +INSERT INTO +^^^^^^^^^^^ + +Append data to an existing Lance table: + +.. code-block:: sql + + INSERT INTO lance.default.my_table + SELECT * FROM tpch.tiny.nation; + +SELECT +^^^^^^ + +Query data from a Lance table: + +.. code-block:: sql + + SELECT * FROM lance.default.my_table; + +Column projection is pushed down to Lance, so queries that select a subset +of columns only read those columns from disk: + +.. code-block:: sql + + SELECT id, name FROM lance.default.my_table; + +DROP TABLE +^^^^^^^^^^ + +Drop a Lance table and delete all its data: + +.. code-block:: sql + + DROP TABLE lance.default.my_table; + +SHOW TABLES +^^^^^^^^^^^ + +List all tables in the catalog: + +.. code-block:: sql + + SHOW TABLES FROM lance.default; + +DESCRIBE +^^^^^^^^ + +Show the columns and types of a Lance table: + +.. code-block:: sql + + DESCRIBE lance.default.my_table; + +Limitations +----------- + +* Only a single schema (``default``) is supported when ``lance.single-level-ns`` + is ``true``. +* The following SQL statements are not supported: + + * :doc:`/sql/alter-table` + * :doc:`/sql/delete` + * :doc:`/sql/update` + +* Predicate pushdown is not supported. Only column projection is pushed down + to the Lance reader. +* ``ARRAY`` types are supported for reads but cannot be written. +* Only local filesystem paths are supported in the current ``dir`` implementation. +* Data written by one Presto cluster is not visible to another cluster until the + write transaction commits. diff --git a/presto-lance/pom.xml b/presto-lance/pom.xml new file mode 100644 index 0000000000000..33918c2d04f38 --- /dev/null +++ b/presto-lance/pom.xml @@ -0,0 +1,216 @@ + + + 4.0.0 + + com.facebook.presto + presto-root + 0.297-SNAPSHOT + + + presto-lance + Presto - LanceDB Connector + presto-plugin + + + ${project.parent.basedir} + 18.3.0 + + + + + com.facebook.airlift + bootstrap + + + + com.facebook.airlift + configuration + + + + com.facebook.airlift + json + + + + com.facebook.airlift + log + + + + com.google.guava + guava + + + + com.google.inject + guice + + + + + com.facebook.presto + presto-spi + provided + + + + com.facebook.presto + presto-common + provided + + + + io.airlift + slice + provided + + + + com.facebook.airlift + units + provided + + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + + com.fasterxml.jackson.core + jackson-core + + + + com.fasterxml.jackson.core + jackson-databind + + + + javax.inject + javax.inject + + + + javax.validation + validation-api + 2.0.1.Final + + + + com.facebook.presto + presto-common-arrow + + + + org.lance + lance-core + 2.0.1 + + + org.slf4j + slf4j-api + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + org.lance + lance-namespace-apache-client + + + + + + org.apache.arrow + arrow-memory-core + ${arrow.version} + + + org.slf4j + slf4j-api + + + + + + org.apache.arrow + arrow-memory-unsafe + ${arrow.version} + + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + org.slf4j + slf4j-api + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + + + + + com.facebook.presto + presto-testng-services + test + + + org.testng + testng + test + + + com.facebook.presto + presto-tests + test + + + com.facebook.presto + presto-main + test + + + com.facebook.presto + presto-main-base + test + + + + + + + org.basepom.maven + duplicate-finder-maven-plugin + + + LICENSE-EDL-1.0.txt + LICENSE-EPL-1.0.txt + arrow-git.properties + about.html + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + org.apache.arrow:arrow-memory-unsafe + com.fasterxml.jackson.core:jackson-core + + + + + + + diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java new file mode 100644 index 0000000000000..22d25a3adedc6 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceArrowToPageScanner.java @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.spi.PrestoException; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.lance.ipc.LanceScanner; + +import java.io.IOException; +import java.util.List; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class LanceArrowToPageScanner + implements AutoCloseable +{ + private final ScannerFactory scannerFactory; + private final ArrowReader arrowReader; + private final List columns; + private final ArrowBlockBuilder arrowBlockBuilder; + private long lastBatchBytes; + + public LanceArrowToPageScanner( + BufferAllocator allocator, + List columns, + ScannerFactory scannerFactory, + ArrowBlockBuilder arrowBlockBuilder) + { + this.columns = requireNonNull(columns, "columns is null"); + this.scannerFactory = requireNonNull(scannerFactory, "scannerFactory is null"); + this.arrowBlockBuilder = requireNonNull(arrowBlockBuilder, "arrowBlockBuilder is null"); + List columnNames = columns.stream() + .map(LanceColumnHandle::getColumnName) + .collect(toImmutableList()); + LanceScanner scanner = scannerFactory.open(allocator, columnNames); + this.arrowReader = scanner.scanBatches(); + } + + public boolean read() + { + try { + boolean hasNext = arrowReader.loadNextBatch(); + if (hasNext) { + VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); + lastBatchBytes = 0; + for (FieldVector vector : root.getFieldVectors()) { + for (ArrowBuf buf : vector.getFieldBuffers()) { + if (buf != null) { + lastBatchBytes += buf.capacity(); + } + } + } + } + return hasNext; + } + catch (IOException e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, "Failed to read Arrow batch", e); + } + } + + public long getLastBatchBytes() + { + return lastBatchBytes; + } + + public Page convert() + { + VectorSchemaRoot root; + try { + root = arrowReader.getVectorSchemaRoot(); + } + catch (IOException e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, "Failed to get VectorSchemaRoot", e); + } + + int rowCount = root.getRowCount(); + Block[] blocks = new Block[columns.size()]; + + for (int col = 0; col < columns.size(); col++) { + LanceColumnHandle column = columns.get(col); + FieldVector vector = root.getVector(column.getColumnName()); + Type type = column.getColumnType(); + blocks[col] = arrowBlockBuilder.buildBlockFromFieldVector(vector, type, null); + } + + return new Page(rowCount, blocks); + } + + @Override + public void close() + { + try { + arrowReader.close(); + } + catch (IOException e) { + // ignore + } + scannerFactory.close(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceBasePageSource.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceBasePageSource.java new file mode 100644 index 0000000000000..35f729e26c69c --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceBasePageSource.java @@ -0,0 +1,121 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.Page; +import com.facebook.presto.spi.ConnectorPageSource; +import org.apache.arrow.memory.BufferAllocator; + +import java.io.IOException; +import java.util.List; + +import static java.util.Objects.requireNonNull; + +public abstract class LanceBasePageSource + implements ConnectorPageSource +{ + protected final LanceTableHandle tableHandle; + protected final LanceArrowToPageScanner arrowToPageScanner; + protected final BufferAllocator bufferAllocator; + protected long readBytes; + protected boolean finished; + + public LanceBasePageSource( + LanceTableHandle tableHandle, + List columns, + ScannerFactory scannerFactory, + ArrowBlockBuilder arrowBlockBuilder, + BufferAllocator parentAllocator) + { + this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); + requireNonNull(columns, "columns is null"); + requireNonNull(scannerFactory, "scannerFactory is null"); + requireNonNull(arrowBlockBuilder, "arrowBlockBuilder is null"); + requireNonNull(parentAllocator, "parentAllocator is null"); + + this.bufferAllocator = parentAllocator + .newChildAllocator(tableHandle.getTableName(), 0, Long.MAX_VALUE); + + try { + this.arrowToPageScanner = new LanceArrowToPageScanner( + bufferAllocator, + columns, + scannerFactory, + arrowBlockBuilder); + } + catch (RuntimeException e) { + bufferAllocator.close(); + throw e; + } + + this.finished = false; + } + + @Override + public long getCompletedBytes() + { + return readBytes; + } + + @Override + public long getCompletedPositions() + { + return 0; + } + + @Override + public long getReadTimeNanos() + { + return 0; + } + + @Override + public boolean isFinished() + { + return finished; + } + + @Override + public Page getNextPage() + { + if (finished) { + return null; + } + if (!arrowToPageScanner.read()) { + finished = true; + return null; + } + readBytes += arrowToPageScanner.getLastBatchBytes(); + return arrowToPageScanner.convert(); + } + + @Override + public long getSystemMemoryUsage() + { + return 0; + } + + @Override + public void close() + throws IOException + { + try { + arrowToPageScanner.close(); + } + finally { + bufferAllocator.close(); + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java new file mode 100644 index 0000000000000..9ae1281e52048 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceColumnHandle.java @@ -0,0 +1,218 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.common.type.ArrayType; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DateType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.IntegerType; +import com.facebook.presto.common.type.RealType; +import com.facebook.presto.common.type.RowType; +import com.facebook.presto.common.type.SmallintType; +import com.facebook.presto.common.type.TimestampType; +import com.facebook.presto.common.type.TinyintType; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.VarbinaryType; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ColumnMetadata; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class LanceColumnHandle + implements ColumnHandle +{ + private final String columnName; + private final Type columnType; + private final boolean nullable; + + @JsonCreator + public LanceColumnHandle( + @JsonProperty("columnName") String columnName, + @JsonProperty("columnType") Type columnType, + @JsonProperty("nullable") boolean nullable) + { + this.columnName = requireNonNull(columnName, "columnName is null"); + this.columnType = requireNonNull(columnType, "columnType is null"); + this.nullable = nullable; + } + + public LanceColumnHandle(String columnName, Type columnType) + { + this(columnName, columnType, true); + } + + @JsonProperty + public String getColumnName() + { + return columnName; + } + + @JsonProperty + public Type getColumnType() + { + return columnType; + } + + @JsonProperty + public boolean isNullable() + { + return nullable; + } + + public ColumnMetadata getColumnMetadata() + { + return ColumnMetadata.builder() + .setName(columnName) + .setType(columnType) + .setNullable(nullable) + .build(); + } + + public static Type toPrestoType(Field field) + { + ArrowType type = field.getType(); + + if (type instanceof ArrowType.FixedSizeList || type instanceof ArrowType.List) { + Type elementType = RealType.REAL; + if (field.getChildren() != null && !field.getChildren().isEmpty()) { + elementType = toPrestoType(field.getChildren().get(0)); + } + return new ArrayType(elementType); + } + + if (type instanceof ArrowType.Bool) { + return BooleanType.BOOLEAN; + } + else if (type instanceof ArrowType.Int) { + ArrowType.Int intType = (ArrowType.Int) type; + switch (intType.getBitWidth()) { + case 8: + return TinyintType.TINYINT; + case 16: + return SmallintType.SMALLINT; + case 32: + return IntegerType.INTEGER; + case 64: + return BigintType.BIGINT; + } + } + else if (type instanceof ArrowType.FloatingPoint) { + ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type; + if (fpType.getPrecision() == FloatingPointPrecision.SINGLE) { + return RealType.REAL; + } + return DoubleType.DOUBLE; + } + else if (type instanceof ArrowType.Utf8 || type instanceof ArrowType.LargeUtf8) { + return VarcharType.VARCHAR; + } + else if (type instanceof ArrowType.Binary || type instanceof ArrowType.LargeBinary) { + return VarbinaryType.VARBINARY; + } + else if (type instanceof ArrowType.Date) { + return DateType.DATE; + } + else if (type instanceof ArrowType.Timestamp) { + return TimestampType.TIMESTAMP; + } + throw new UnsupportedOperationException("Unsupported Arrow type: " + type); + } + + public static ArrowType toArrowType(Type prestoType) + { + if (prestoType.equals(BooleanType.BOOLEAN)) { + return ArrowType.Bool.INSTANCE; + } + else if (prestoType.equals(TinyintType.TINYINT)) { + return new ArrowType.Int(8, true); + } + else if (prestoType.equals(SmallintType.SMALLINT)) { + return new ArrowType.Int(16, true); + } + else if (prestoType.equals(IntegerType.INTEGER)) { + return new ArrowType.Int(32, true); + } + else if (prestoType.equals(BigintType.BIGINT)) { + return new ArrowType.Int(64, true); + } + else if (prestoType.equals(RealType.REAL)) { + return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); + } + else if (prestoType.equals(DoubleType.DOUBLE)) { + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + } + else if (prestoType instanceof VarcharType) { + return ArrowType.Utf8.INSTANCE; + } + else if (prestoType instanceof VarbinaryType) { + return ArrowType.Binary.INSTANCE; + } + else if (prestoType instanceof DateType) { + return new ArrowType.Date(DateUnit.DAY); + } + else if (prestoType instanceof TimestampType) { + return new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); + } + else if (prestoType instanceof ArrayType) { + return ArrowType.List.INSTANCE; + } + else if (prestoType instanceof RowType) { + return ArrowType.Struct.INSTANCE; + } + throw new UnsupportedOperationException("Unsupported Presto type: " + prestoType); + } + + @Override + public int hashCode() + { + return Objects.hash(columnName, columnType); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if ((obj == null) || (getClass() != obj.getClass())) { + return false; + } + LanceColumnHandle other = (LanceColumnHandle) obj; + return Objects.equals(this.columnName, other.columnName) && + Objects.equals(this.columnType, other.columnType); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("columnName", columnName) + .add("columnType", columnType) + .add("nullable", nullable) + .toString(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceCommitTaskData.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceCommitTaskData.java new file mode 100644 index 0000000000000..2c44e1b5534a9 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceCommitTaskData.java @@ -0,0 +1,55 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import static java.util.Objects.requireNonNull; + +public class LanceCommitTaskData +{ + private final String fragmentsJson; + private final long writtenBytes; + private final long rowCount; + + @JsonCreator + public LanceCommitTaskData( + @JsonProperty("fragmentsJson") String fragmentsJson, + @JsonProperty("writtenBytes") long writtenBytes, + @JsonProperty("rowCount") long rowCount) + { + this.fragmentsJson = requireNonNull(fragmentsJson, "fragmentsJson is null"); + this.writtenBytes = writtenBytes; + this.rowCount = rowCount; + } + + @JsonProperty + public String getFragmentsJson() + { + return fragmentsJson; + } + + @JsonProperty + public long getWrittenBytes() + { + return writtenBytes; + } + + @JsonProperty + public long getRowCount() + { + return rowCount; + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceConfig.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConfig.java new file mode 100644 index 0000000000000..33ba378ed6fe0 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConfig.java @@ -0,0 +1,128 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.configuration.Config; +import com.facebook.airlift.configuration.ConfigDescription; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; + +public class LanceConfig +{ + private String impl = "dir"; + private String rootUrl = ""; + private boolean singleLevelNs = true; + private int readBatchSize = 8192; + private int maxRowsPerFile = 1_000_000; + private int maxRowsPerGroup = 100_000; + private int writeBatchSize = 10_000; + + @NotNull + public String getImpl() + { + return impl; + } + + @Config("lance.impl") + @ConfigDescription("Namespace implementation: 'dir' or full class name") + public LanceConfig setImpl(String impl) + { + this.impl = impl; + return this; + } + + @NotNull + public String getRootUrl() + { + return rootUrl; + } + + @Config("lance.root-url") + @ConfigDescription("Lance root storage path") + public LanceConfig setRootUrl(String rootUrl) + { + this.rootUrl = rootUrl; + return this; + } + + public boolean isSingleLevelNs() + { + return singleLevelNs; + } + + @Config("lance.single-level-ns") + @ConfigDescription("Access 1st level namespace with virtual 'default' schema") + public LanceConfig setSingleLevelNs(boolean singleLevelNs) + { + this.singleLevelNs = singleLevelNs; + return this; + } + + @Min(1) + public int getReadBatchSize() + { + return readBatchSize; + } + + @Config("lance.read-batch-size") + @ConfigDescription("Number of rows per batch during reads") + public LanceConfig setReadBatchSize(int readBatchSize) + { + this.readBatchSize = readBatchSize; + return this; + } + + @Min(1) + public int getMaxRowsPerFile() + { + return maxRowsPerFile; + } + + @Config("lance.max-rows-per-file") + @ConfigDescription("Maximum number of rows per Lance file") + public LanceConfig setMaxRowsPerFile(int maxRowsPerFile) + { + this.maxRowsPerFile = maxRowsPerFile; + return this; + } + + @Min(1) + public int getMaxRowsPerGroup() + { + return maxRowsPerGroup; + } + + @Config("lance.max-rows-per-group") + @ConfigDescription("Maximum number of rows per row group") + public LanceConfig setMaxRowsPerGroup(int maxRowsPerGroup) + { + this.maxRowsPerGroup = maxRowsPerGroup; + return this; + } + + @Min(1) + public int getWriteBatchSize() + { + return writeBatchSize; + } + + @Config("lance.write-batch-size") + @ConfigDescription("Number of rows to batch before writing to Arrow") + public LanceConfig setWriteBatchSize(int writeBatchSize) + { + this.writeBatchSize = writeBatchSize; + return this; + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnector.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnector.java new file mode 100644 index 0000000000000..241819422fc13 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnector.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.bootstrap.LifeCycleManager; +import com.facebook.airlift.log.Logger; +import com.facebook.presto.spi.connector.Connector; +import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; +import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; +import com.facebook.presto.spi.connector.ConnectorSplitManager; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.facebook.presto.spi.transaction.IsolationLevel; + +import javax.inject.Inject; + +import static java.util.Objects.requireNonNull; + +public class LanceConnector + implements Connector +{ + private static final Logger log = Logger.get(LanceConnector.class); + + private final LifeCycleManager lifeCycleManager; + private final LanceMetadata metadata; + private final LanceNamespaceHolder namespaceHolder; + private final ConnectorSplitManager splitManager; + private final ConnectorPageSourceProvider pageSourceProvider; + private final ConnectorPageSinkProvider pageSinkProvider; + + @Inject + public LanceConnector( + LifeCycleManager lifeCycleManager, + LanceMetadata metadata, + LanceNamespaceHolder namespaceHolder, + ConnectorSplitManager splitManager, + ConnectorPageSourceProvider pageSourceProvider, + ConnectorPageSinkProvider pageSinkProvider) + { + this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); + this.metadata = requireNonNull(metadata, "metadata is null"); + this.namespaceHolder = requireNonNull(namespaceHolder, "namespaceHolder is null"); + this.splitManager = requireNonNull(splitManager, "splitManager is null"); + this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null"); + this.pageSinkProvider = requireNonNull(pageSinkProvider, "pageSinkProvider is null"); + } + + @Override + public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly) + { + return LanceTransactionHandle.INSTANCE; + } + + @Override + public ConnectorMetadata getMetadata(ConnectorTransactionHandle transactionHandle) + { + return metadata; + } + + @Override + public ConnectorSplitManager getSplitManager() + { + return splitManager; + } + + @Override + public ConnectorPageSourceProvider getPageSourceProvider() + { + return pageSourceProvider; + } + + @Override + public ConnectorPageSinkProvider getPageSinkProvider() + { + return pageSinkProvider; + } + + @Override + public void shutdown() + { + try { + lifeCycleManager.stop(); + } + catch (Exception e) { + log.error(e, "Error shutting down connector"); + } + finally { + namespaceHolder.shutdown(); + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnectorFactory.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnectorFactory.java new file mode 100644 index 0000000000000..baaaacdda6836 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceConnectorFactory.java @@ -0,0 +1,61 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.bootstrap.Bootstrap; +import com.facebook.airlift.json.JsonModule; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.spi.ConnectorHandleResolver; +import com.facebook.presto.spi.classloader.ThreadContextClassLoader; +import com.facebook.presto.spi.connector.Connector; +import com.facebook.presto.spi.connector.ConnectorContext; +import com.facebook.presto.spi.connector.ConnectorFactory; +import com.google.inject.Injector; + +import java.util.Map; + +public class LanceConnectorFactory + implements ConnectorFactory +{ + @Override + public String getName() + { + return "lance"; + } + + @Override + public ConnectorHandleResolver getHandleResolver() + { + return new LanceHandleResolver(); + } + + @Override + public Connector create(String catalogName, Map config, ConnectorContext context) + { + ClassLoader classLoader = LanceConnectorFactory.class.getClassLoader(); + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + Bootstrap app = new Bootstrap( + new JsonModule(), + new LanceModule(), + binder -> binder.bind(TypeManager.class).toInstance(context.getTypeManager())); + + Injector injector = app + .doNotInitializeLogging() + .setRequiredConfigurationProperties(config) + .initialize(); + + return injector.getInstance(LanceConnector.class); + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceErrorCode.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceErrorCode.java new file mode 100644 index 0000000000000..1dcac762bdc46 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceErrorCode.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.common.ErrorCode; +import com.facebook.presto.common.ErrorType; +import com.facebook.presto.spi.ErrorCodeSupplier; + +import static com.facebook.presto.common.ErrorType.EXTERNAL; + +public enum LanceErrorCode + implements ErrorCodeSupplier +{ + LANCE_ERROR(0, EXTERNAL), + LANCE_TABLE_NOT_FOUND(1, EXTERNAL), + LANCE_FILESYSTEM_ERROR(2, EXTERNAL), + LANCE_TYPE_NOT_SUPPORTED(3, EXTERNAL); + + private final ErrorCode errorCode; + + LanceErrorCode(int code, ErrorType type) + { + errorCode = new ErrorCode(code + 0x0520_0000, name(), type); + } + + @Override + public ErrorCode toErrorCode() + { + return errorCode; + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentData.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentData.java new file mode 100644 index 0000000000000..0f9854976e9f9 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentData.java @@ -0,0 +1,316 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.PrestoException; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.lance.FragmentMetadata; +import org.lance.fragment.DataFile; +import org.lance.fragment.DeletionFile; +import org.lance.fragment.DeletionFileType; +import org.lance.fragment.RowIdMeta; + +import java.util.List; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +/** + * JSON-serializable representation of Lance FragmentMetadata. + * Replaces Java ObjectOutputStream serialization for cross-node commit data. + */ +public class LanceFragmentData +{ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private final int id; + private final List files; + private final long physicalRows; + private final LanceDeletionFile deletionFile; + private final String rowIdMetadata; + + @JsonCreator + public LanceFragmentData( + @JsonProperty("id") int id, + @JsonProperty("files") List files, + @JsonProperty("physicalRows") long physicalRows, + @JsonProperty("deletionFile") LanceDeletionFile deletionFile, + @JsonProperty("rowIdMetadata") String rowIdMetadata) + { + this.id = id; + this.files = requireNonNull(files, "files is null"); + this.physicalRows = physicalRows; + this.deletionFile = deletionFile; + this.rowIdMetadata = rowIdMetadata; + } + + public static LanceFragmentData fromFragmentMetadata(FragmentMetadata fragment) + { + List files = fragment.getFiles().stream() + .map(LanceDataFile::fromDataFile) + .collect(toImmutableList()); + + LanceDeletionFile deletionFile = null; + if (fragment.getDeletionFile() != null) { + deletionFile = LanceDeletionFile.fromDeletionFile(fragment.getDeletionFile()); + } + + String rowIdMetadata = null; + if (fragment.getRowIdMeta() != null) { + rowIdMetadata = fragment.getRowIdMeta().getMetadata(); + } + + return new LanceFragmentData( + fragment.getId(), + files, + fragment.getPhysicalRows(), + deletionFile, + rowIdMetadata); + } + + public FragmentMetadata toFragmentMetadata() + { + List dataFiles = files.stream() + .map(LanceDataFile::toDataFile) + .collect(toImmutableList()); + + DeletionFile delFile = deletionFile != null ? deletionFile.toDeletionFile() : null; + RowIdMeta rowIdMeta = rowIdMetadata != null ? new RowIdMeta(rowIdMetadata) : null; + + return new FragmentMetadata(id, dataFiles, physicalRows, delFile, rowIdMeta); + } + + @JsonProperty + public int getId() + { + return id; + } + + @JsonProperty + public List getFiles() + { + return files; + } + + @JsonProperty + public long getPhysicalRows() + { + return physicalRows; + } + + @JsonProperty + public LanceDeletionFile getDeletionFile() + { + return deletionFile; + } + + @JsonProperty + public String getRowIdMetadata() + { + return rowIdMetadata; + } + + public static String serializeFragments(List fragments) + { + try { + List data = fragments.stream() + .map(LanceFragmentData::fromFragmentMetadata) + .collect(toImmutableList()); + return MAPPER.writeValueAsString(data); + } + catch (JsonProcessingException e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, "Failed to serialize fragment metadata", e); + } + } + + public static List deserializeFragments(String json) + { + try { + List data = MAPPER.readValue(json, new TypeReference>() {}); + return data.stream() + .map(LanceFragmentData::toFragmentMetadata) + .collect(toImmutableList()); + } + catch (JsonProcessingException e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, "Failed to deserialize fragment metadata", e); + } + } + + public static class LanceDataFile + { + private final String path; + private final int[] fields; + private final int[] columnIndices; + private final int fileMajorVersion; + private final int fileMinorVersion; + private final Long fileSizeBytes; + private final Integer baseId; + + @JsonCreator + public LanceDataFile( + @JsonProperty("path") String path, + @JsonProperty("fields") int[] fields, + @JsonProperty("columnIndices") int[] columnIndices, + @JsonProperty("fileMajorVersion") int fileMajorVersion, + @JsonProperty("fileMinorVersion") int fileMinorVersion, + @JsonProperty("fileSizeBytes") Long fileSizeBytes, + @JsonProperty("baseId") Integer baseId) + { + this.path = requireNonNull(path, "path is null"); + this.fields = requireNonNull(fields, "fields is null"); + this.columnIndices = requireNonNull(columnIndices, "columnIndices is null"); + this.fileMajorVersion = fileMajorVersion; + this.fileMinorVersion = fileMinorVersion; + this.fileSizeBytes = fileSizeBytes; + this.baseId = baseId; + } + + public static LanceDataFile fromDataFile(DataFile dataFile) + { + return new LanceDataFile( + dataFile.getPath(), + dataFile.getFields(), + dataFile.getColumnIndices(), + dataFile.getFileMajorVersion(), + dataFile.getFileMinorVersion(), + dataFile.getFileSizeBytes(), + dataFile.getBaseId().orElse(null)); + } + + public DataFile toDataFile() + { + return new DataFile(path, fields, columnIndices, fileMajorVersion, fileMinorVersion, fileSizeBytes, baseId); + } + + @JsonProperty + public String getPath() + { + return path; + } + + @JsonProperty + public int[] getFields() + { + return fields; + } + + @JsonProperty + public int[] getColumnIndices() + { + return columnIndices; + } + + @JsonProperty + public int getFileMajorVersion() + { + return fileMajorVersion; + } + + @JsonProperty + public int getFileMinorVersion() + { + return fileMinorVersion; + } + + @JsonProperty + public Long getFileSizeBytes() + { + return fileSizeBytes; + } + + @JsonProperty + public Integer getBaseId() + { + return baseId; + } + } + + public static class LanceDeletionFile + { + private final long id; + private final long readVersion; + private final Long numDeletedRows; + private final String fileType; + private final Integer baseId; + + @JsonCreator + public LanceDeletionFile( + @JsonProperty("id") long id, + @JsonProperty("readVersion") long readVersion, + @JsonProperty("numDeletedRows") Long numDeletedRows, + @JsonProperty("fileType") String fileType, + @JsonProperty("baseId") Integer baseId) + { + this.id = id; + this.readVersion = readVersion; + this.numDeletedRows = numDeletedRows; + this.fileType = fileType; + this.baseId = baseId; + } + + public static LanceDeletionFile fromDeletionFile(DeletionFile deletionFile) + { + return new LanceDeletionFile( + deletionFile.getId(), + deletionFile.getReadVersion(), + deletionFile.getNumDeletedRows(), + deletionFile.getFileType() != null ? deletionFile.getFileType().name() : null, + deletionFile.getBaseId().orElse(null)); + } + + public DeletionFile toDeletionFile() + { + return new DeletionFile( + id, + readVersion, + numDeletedRows, + fileType != null ? DeletionFileType.valueOf(fileType) : null, + baseId); + } + + @JsonProperty + public long getId() + { + return id; + } + + @JsonProperty + public long getReadVersion() + { + return readVersion; + } + + @JsonProperty + public Long getNumDeletedRows() + { + return numDeletedRows; + } + + @JsonProperty + public String getFileType() + { + return fileType; + } + + @JsonProperty + public Integer getBaseId() + { + return baseId; + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentPageSource.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentPageSource.java new file mode 100644 index 0000000000000..7cf7a594eb543 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceFragmentPageSource.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.log.Logger; +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.google.common.collect.ImmutableList; +import org.apache.arrow.memory.BufferAllocator; +import org.lance.Dataset; +import org.lance.ReadOptions; +import org.lance.ipc.LanceScanner; +import org.lance.ipc.ScanOptions; + +import java.util.List; + +public class LanceFragmentPageSource + extends LanceBasePageSource +{ + private static final Logger log = Logger.get(LanceFragmentPageSource.class); + + public LanceFragmentPageSource( + LanceTableHandle tableHandle, + List columns, + List fragments, + String tablePath, + int readBatchSize, + ArrowBlockBuilder arrowBlockBuilder, + BufferAllocator parentAllocator) + { + super(tableHandle, columns, new FragmentScannerFactory(fragments, tablePath, readBatchSize), arrowBlockBuilder, parentAllocator); + } + + private static class FragmentScannerFactory + implements ScannerFactory + { + private final List fragmentIds; + private final String tablePath; + private final int readBatchSize; + private Dataset dataset; + private LanceScanner scanner; + + FragmentScannerFactory(List fragmentIds, String tablePath, int readBatchSize) + { + this.fragmentIds = ImmutableList.copyOf(fragmentIds); + this.tablePath = tablePath; + this.readBatchSize = readBatchSize; + } + + @Override + public LanceScanner open(BufferAllocator allocator, List columns) + { + ScanOptions.Builder optionsBuilder = new ScanOptions.Builder(); + if (!columns.isEmpty()) { + optionsBuilder.columns(columns); + } + optionsBuilder.batchSize(readBatchSize); + optionsBuilder.fragmentIds(fragmentIds); + + this.dataset = Dataset.open(tablePath, new ReadOptions.Builder().build()); + this.scanner = dataset.newScan(optionsBuilder.build()); + return scanner; + } + + @Override + public void close() + { + try { + if (scanner != null) { + scanner.close(); + } + } + catch (Exception e) { + log.warn(e, "Error closing lance scanner"); + } + try { + if (dataset != null) { + dataset.close(); + } + } + catch (Exception e) { + log.warn(e, "Error closing lance dataset"); + } + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceHandleResolver.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceHandleResolver.java new file mode 100644 index 0000000000000..ea303aed93100 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceHandleResolver.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorHandleResolver; +import com.facebook.presto.spi.ConnectorInsertTableHandle; +import com.facebook.presto.spi.ConnectorOutputTableHandle; +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +public class LanceHandleResolver + implements ConnectorHandleResolver +{ + @Override + public Class getTableHandleClass() + { + return LanceTableHandle.class; + } + + @Override + public Class getTableLayoutHandleClass() + { + return LanceTableLayoutHandle.class; + } + + @Override + public Class getColumnHandleClass() + { + return LanceColumnHandle.class; + } + + @Override + public Class getSplitClass() + { + return LanceSplit.class; + } + + @Override + public Class getOutputTableHandleClass() + { + return LanceWritableTableHandle.class; + } + + @Override + public Class getInsertTableHandleClass() + { + return LanceWritableTableHandle.class; + } + + @Override + public Class getTransactionHandleClass() + { + return LanceTransactionHandle.class; + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceMetadata.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceMetadata.java new file mode 100644 index 0000000000000..c18a0bc85f128 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceMetadata.java @@ -0,0 +1,280 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.ConnectorInsertTableHandle; +import com.facebook.presto.spi.ConnectorNewTableLayout; +import com.facebook.presto.spi.ConnectorOutputTableHandle; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.ConnectorTableLayout; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.ConnectorTableLayoutResult; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.Constraint; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.SchemaTablePrefix; +import com.facebook.presto.spi.connector.ConnectorMetadata; +import com.facebook.presto.spi.connector.ConnectorOutputMetadata; +import com.facebook.presto.spi.statistics.ComputedStatistics; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.slice.Slice; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +import javax.inject.Inject; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Collections.singletonList; +import static java.util.Objects.requireNonNull; + +public class LanceMetadata + implements ConnectorMetadata +{ + public static final String LANCE_DEFAULT_SCHEMA = "default"; + + private final LanceNamespaceHolder namespaceHolder; + private final JsonCodec commitTaskDataCodec; + + @Inject + public LanceMetadata( + LanceNamespaceHolder namespaceHolder, + JsonCodec commitTaskDataCodec) + { + this.namespaceHolder = requireNonNull(namespaceHolder, "namespaceHolder is null"); + this.commitTaskDataCodec = requireNonNull(commitTaskDataCodec, "commitTaskDataCodec is null"); + } + + @Override + public boolean schemaExists(ConnectorSession session, String schemaName) + { + return LANCE_DEFAULT_SCHEMA.equals(schemaName); + } + + @Override + public List listSchemaNames(ConnectorSession session) + { + return ImmutableList.of(LANCE_DEFAULT_SCHEMA); + } + + @Override + public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) + { + if (!schemaExists(session, tableName.getSchemaName())) { + return null; + } + if (!namespaceHolder.tableExists(tableName.getTableName())) { + return null; + } + return new LanceTableHandle(tableName.getSchemaName(), tableName.getTableName()); + } + + @Override + public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle handle) + { + return new ConnectorTableLayout(handle); + } + + @Override + public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) + { + LanceTableHandle lanceTable = (LanceTableHandle) table; + if (!namespaceHolder.tableExists(lanceTable.getTableName())) { + return null; + } + Schema arrowSchema = namespaceHolder.describeTable(lanceTable.getTableName()); + SchemaTableName schemaTableName = new SchemaTableName(lanceTable.getSchemaName(), lanceTable.getTableName()); + + ImmutableList.Builder columnsMetadata = ImmutableList.builder(); + for (Field field : arrowSchema.getFields()) { + columnsMetadata.add(ColumnMetadata.builder() + .setName(field.getName()) + .setType(LanceColumnHandle.toPrestoType(field)) + .setNullable(field.isNullable()) + .build()); + } + + return new ConnectorTableMetadata(schemaTableName, columnsMetadata.build()); + } + + @Override + public List listTables(ConnectorSession session, Optional schemaName) + { + String schema = schemaName.orElse(LANCE_DEFAULT_SCHEMA); + return namespaceHolder.listTables().stream() + .map(tableName -> new SchemaTableName(schema, tableName)) + .collect(toImmutableList()); + } + + @Override + public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) + { + LanceTableHandle lanceTable = (LanceTableHandle) tableHandle; + if (!namespaceHolder.tableExists(lanceTable.getTableName())) { + return ImmutableMap.of(); + } + Schema arrowSchema = namespaceHolder.describeTable(lanceTable.getTableName()); + + ImmutableMap.Builder columnHandles = ImmutableMap.builder(); + for (Field field : arrowSchema.getFields()) { + LanceColumnHandle columnHandle = new LanceColumnHandle( + field.getName(), + LanceColumnHandle.toPrestoType(field), + field.isNullable()); + columnHandles.put(field.getName(), columnHandle); + } + return columnHandles.build(); + } + + @Override + public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) + { + return ((LanceColumnHandle) columnHandle).getColumnMetadata(); + } + + @Override + public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) + { + List tables = prefix.getTableName() != null + ? singletonList(prefix.toSchemaTableName()) + : listTables(session, Optional.ofNullable(prefix.getSchemaName())); + + ImmutableMap.Builder> columns = ImmutableMap.builder(); + for (SchemaTableName tableName : tables) { + ConnectorTableHandle tableHandle = getTableHandle(session, tableName); + if (tableHandle != null) { + ConnectorTableMetadata tableMetadata = getTableMetadata(session, tableHandle); + if (tableMetadata != null) { + columns.put(tableName, tableMetadata.getColumns()); + } + } + } + return columns.build(); + } + + @Override + public ConnectorTableLayoutResult getTableLayoutForConstraint( + ConnectorSession session, + ConnectorTableHandle table, + Constraint constraint, + Optional> desiredColumns) + { + LanceTableHandle lanceTable = (LanceTableHandle) table; + ConnectorTableLayout layout = new ConnectorTableLayout( + new LanceTableLayoutHandle(lanceTable, constraint.getSummary())); + return new ConnectorTableLayoutResult(layout, constraint.getSummary()); + } + + @Override + public ConnectorOutputTableHandle beginCreateTable( + ConnectorSession session, + ConnectorTableMetadata tableMetadata, + Optional layout) + { + Schema arrowSchema = LancePageToArrowConverter.toArrowSchema(tableMetadata.getColumns()); + + namespaceHolder.createTable( + tableMetadata.getTable().getTableName(), + arrowSchema); + + List columns = tableMetadata.getColumns().stream() + .map(col -> new LanceColumnHandle(col.getName(), col.getType(), col.isNullable())) + .collect(toImmutableList()); + + return new LanceWritableTableHandle( + tableMetadata.getTable().getSchemaName(), + tableMetadata.getTable().getTableName(), + arrowSchema.toJson(), + columns); + } + + @Override + public Optional finishCreateTable( + ConnectorSession session, + ConnectorOutputTableHandle tableHandle, + Collection fragments, + Collection computedStatistics) + { + LanceWritableTableHandle handle = (LanceWritableTableHandle) tableHandle; + + if (!fragments.isEmpty()) { + List allFragments = collectFragments(fragments); + namespaceHolder.commitAppend(handle.getTableName(), allFragments); + } + return Optional.empty(); + } + + @Override + public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) + { + LanceTableHandle lanceTable = (LanceTableHandle) tableHandle; + Schema arrowSchema = namespaceHolder.describeTable(lanceTable.getTableName()); + + List columns = arrowSchema.getFields().stream() + .map(field -> new LanceColumnHandle( + field.getName(), + LanceColumnHandle.toPrestoType(field), + field.isNullable())) + .collect(toImmutableList()); + + return new LanceWritableTableHandle( + lanceTable.getSchemaName(), + lanceTable.getTableName(), + arrowSchema.toJson(), + columns); + } + + @Override + public Optional finishInsert( + ConnectorSession session, + ConnectorInsertTableHandle tableHandle, + Collection fragments, + Collection computedStatistics) + { + LanceWritableTableHandle handle = (LanceWritableTableHandle) tableHandle; + + if (!fragments.isEmpty()) { + List allFragments = collectFragments(fragments); + namespaceHolder.commitAppend(handle.getTableName(), allFragments); + } + return Optional.empty(); + } + + @Override + public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle) + { + LanceTableHandle lanceTable = (LanceTableHandle) tableHandle; + namespaceHolder.dropTable(lanceTable.getTableName()); + } + + private List collectFragments(Collection fragments) + { + ImmutableList.Builder allFragments = ImmutableList.builder(); + for (Slice slice : fragments) { + LanceCommitTaskData commitData = commitTaskDataCodec.fromJson(slice.getBytes()); + allFragments.addAll(LanceFragmentData.deserializeFragments(commitData.getFragmentsJson())); + } + return allFragments.build(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceModule.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceModule.java new file mode 100644 index 0000000000000..2dcb16cab944c --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceModule.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; +import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; +import com.facebook.presto.spi.connector.ConnectorSplitManager; +import com.google.inject.Binder; +import com.google.inject.Module; +import com.google.inject.Scopes; + +import static com.facebook.airlift.configuration.ConfigBinder.configBinder; +import static com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder; + +public class LanceModule + implements Module +{ + @Override + public void configure(Binder binder) + { + configBinder(binder).bindConfig(LanceConfig.class); + binder.bind(LanceNamespaceHolder.class).in(Scopes.SINGLETON); + binder.bind(LanceConnector.class).in(Scopes.SINGLETON); + binder.bind(LanceMetadata.class).in(Scopes.SINGLETON); + binder.bind(ArrowBlockBuilder.class).in(Scopes.SINGLETON); + binder.bind(ConnectorSplitManager.class).to(LanceSplitManager.class).in(Scopes.SINGLETON); + binder.bind(ConnectorPageSourceProvider.class).to(LancePageSourceProvider.class).in(Scopes.SINGLETON); + binder.bind(ConnectorPageSinkProvider.class).to(LancePageSinkProvider.class).in(Scopes.SINGLETON); + jsonCodecBinder(binder).bindJsonCodec(LanceCommitTaskData.class); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceNamespaceHolder.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceNamespaceHolder.java new file mode 100644 index 0000000000000..d686b897f0a1a --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceNamespaceHolder.java @@ -0,0 +1,197 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.log.Logger; +import com.google.common.io.MoreFiles; +import com.google.common.io.RecursiveDeleteOption; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.Schema; +import org.lance.Dataset; +import org.lance.Fragment; +import org.lance.FragmentMetadata; +import org.lance.FragmentOperation; +import org.lance.ReadOptions; +import org.lance.WriteParams; + +import javax.inject.Inject; + +import java.io.IOException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +/** + * Holds the Lance namespace configuration and provides table management operations. + * For the "dir" implementation, directly manages a directory-based table store. + * All tables live under a single "default" schema mapped to the root directory. + */ +public class LanceNamespaceHolder +{ + private static final Logger log = Logger.get(LanceNamespaceHolder.class); + public static final String DEFAULT_SCHEMA = "default"; + public static final String TABLE_PATH_SUFFIX = ".lance"; + + private final BufferAllocator allocator; + private final String root; + private final boolean singleLevelNs; + + @Inject + public LanceNamespaceHolder(LanceConfig config) + { + this.root = requireNonNull(config.getRootUrl(), "root is null"); + this.singleLevelNs = config.isSingleLevelNs(); + this.allocator = new RootAllocator(Long.MAX_VALUE); + log.debug("LanceNamespaceHolder initialized: root=%s, singleLevelNs=%s", root, singleLevelNs); + } + + public void shutdown() + { + try { + allocator.close(); + } + catch (Exception e) { + log.warn(e, "Error closing Arrow allocator"); + } + } + + public BufferAllocator getAllocator() + { + return allocator; + } + + public String getRoot() + { + return root; + } + + public boolean isSingleLevelNs() + { + return singleLevelNs; + } + + /** + * Get the filesystem path for a table. + */ + public String getTablePath(String tableName) + { + return Paths.get(root, tableName + TABLE_PATH_SUFFIX).toUri().toString(); + } + + /** + * Check if a table exists on the filesystem. + */ + public boolean tableExists(String tableName) + { + try { + Path path = Paths.get(root, tableName + TABLE_PATH_SUFFIX); + return Files.isDirectory(path); + } + catch (Exception e) { + return false; + } + } + + /** + * Get the Arrow schema for a table. + */ + public Schema describeTable(String tableName) + { + String tablePath = getTablePath(tableName); + try (Dataset dataset = Dataset.open(tablePath, new ReadOptions.Builder().build())) { + return dataset.getSchema(); + } + } + + /** + * List all tables in a schema. + */ + public List listTables() + { + Path rootPath = Paths.get(root); + if (!Files.isDirectory(rootPath)) { + return Collections.emptyList(); + } + List tables = new ArrayList<>(); + try (DirectoryStream stream = Files.newDirectoryStream(rootPath, "*" + TABLE_PATH_SUFFIX)) { + for (Path entry : stream) { + if (Files.isDirectory(entry)) { + String fileName = entry.getFileName().toString(); + tables.add(fileName.substring(0, fileName.length() - TABLE_PATH_SUFFIX.length())); + } + } + } + catch (IOException e) { + log.warn(e, "Failed to list tables in %s", root); + } + return tables; + } + + /** + * Create an empty table with the given schema. + */ + public void createTable(String tableName, Schema arrowSchema) + { + String tablePath = getTablePath(tableName); + WriteParams params = new WriteParams.Builder().build(); + Dataset.create(allocator, tablePath, arrowSchema, params).close(); + } + + /** + * Drop a table. + */ + public void dropTable(String tableName) + { + Path tablePath = Paths.get(root, tableName + TABLE_PATH_SUFFIX); + if (Files.exists(tablePath)) { + try { + MoreFiles.deleteRecursively(tablePath, RecursiveDeleteOption.ALLOW_INSECURE); + } + catch (IOException e) { + throw new RuntimeException("Failed to delete table " + tableName, e); + } + } + } + + /** + * Commit fragments to a table (append operation). + */ + public void commitAppend(String tableName, List fragments) + { + String tablePath = getTablePath(tableName); + try (Dataset dataset = Dataset.open(tablePath, new ReadOptions.Builder().build())) { + FragmentOperation.Append appendOp = new FragmentOperation.Append(fragments); + Dataset.commit(allocator, tablePath, appendOp, Optional.of(dataset.version()), Collections.emptyMap()).close(); + } + } + + /** + * Get fragments for a table. + */ + public List getFragments(String tableName) + { + String tablePath = getTablePath(tableName); + try (Dataset dataset = Dataset.open(tablePath, new ReadOptions.Builder().build())) { + return dataset.getFragments(); + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSink.java b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSink.java new file mode 100644 index 0000000000000..3b3388d784120 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSink.java @@ -0,0 +1,170 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.spi.ConnectorPageSink; +import com.facebook.presto.spi.PrestoException; +import com.google.common.collect.ImmutableList; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.lance.Fragment; +import org.lance.FragmentMetadata; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.CompletableFuture.completedFuture; + +public class LancePageSink + implements ConnectorPageSink +{ + private static final Logger log = Logger.get(LancePageSink.class); + + private final String datasetUri; + private final Schema arrowSchema; + private final List columnTypes; + private final JsonCodec jsonCodec; + private final BufferAllocator allocator; + + private final List bufferedPages = new ArrayList<>(); + private long writtenBytes; + private long rowCount; + private boolean finished; + + public LancePageSink( + String datasetUri, + Schema arrowSchema, + List columns, + JsonCodec jsonCodec, + BufferAllocator parentAllocator) + { + this.datasetUri = requireNonNull(datasetUri, "datasetUri is null"); + this.arrowSchema = requireNonNull(arrowSchema, "arrowSchema is null"); + this.columnTypes = columns.stream() + .map(LanceColumnHandle::getColumnType) + .collect(toImmutableList()); + this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); + this.allocator = requireNonNull(parentAllocator, "parentAllocator is null") + .newChildAllocator("page-sink", 0, Long.MAX_VALUE); + } + + @Override + public CompletableFuture appendPage(Page page) + { + bufferedPages.add(page); + rowCount += page.getPositionCount(); + writtenBytes += page.getSizeInBytes(); + return NOT_BLOCKED; + } + + @Override + public CompletableFuture> finish() + { + if (finished) { + throw new IllegalStateException("PageSink already finished"); + } + finished = true; + + try { + String fragmentsJson; + if (bufferedPages.isEmpty()) { + fragmentsJson = "[]"; + } + else { + fragmentsJson = writeFragments(); + } + + LanceCommitTaskData commitData = new LanceCommitTaskData( + fragmentsJson, writtenBytes, rowCount); + + Slice slice = Slices.wrappedBuffer(jsonCodec.toJsonBytes(commitData)); + return completedFuture(ImmutableList.of(slice)); + } + catch (PrestoException e) { + throw e; + } + catch (Exception e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, + "Failed to write Lance fragments: " + e.getMessage(), e); + } + finally { + cleanup(); + } + } + + private String writeFragments() + { + try (VectorSchemaRoot root = VectorSchemaRoot.create(arrowSchema, allocator)) { + long totalRowsLong = bufferedPages.stream() + .mapToLong(Page::getPositionCount) + .sum(); + if (totalRowsLong > Integer.MAX_VALUE) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, + "Total row count exceeds maximum: " + totalRowsLong); + } + int totalRows = (int) totalRowsLong; + + root.allocateNew(); + + int currentOffset = 0; + for (Page page : bufferedPages) { + int pageRows = page.getPositionCount(); + for (int channel = 0; channel < page.getChannelCount(); channel++) { + LancePageToArrowConverter.writeBlockToVectorAtOffset( + page.getBlock(channel), + root.getVector(channel), + columnTypes.get(channel), + pageRows, + currentOffset); + } + currentOffset += pageRows; + } + root.setRowCount(totalRows); + + List fragments = Fragment.create( + datasetUri, allocator, root, + new org.lance.WriteParams.Builder().build()); + + return LanceFragmentData.serializeFragments(fragments); + } + } + + @Override + public void abort() + { + cleanup(); + } + + private void cleanup() + { + bufferedPages.clear(); + try { + allocator.close(); + } + catch (Exception e) { + log.warn(e, "Failed to close allocator"); + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSinkProvider.java b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSinkProvider.java new file mode 100644 index 0000000000000..c836e2cfea629 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSinkProvider.java @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.spi.ConnectorInsertTableHandle; +import com.facebook.presto.spi.ConnectorOutputTableHandle; +import com.facebook.presto.spi.ConnectorPageSink; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.PageSinkContext; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import org.apache.arrow.vector.types.pojo.Schema; + +import javax.inject.Inject; + +import java.io.IOException; + +import static java.util.Objects.requireNonNull; + +public class LancePageSinkProvider + implements ConnectorPageSinkProvider +{ + private final LanceNamespaceHolder namespaceHolder; + private final JsonCodec jsonCodec; + + @Inject + public LancePageSinkProvider( + LanceNamespaceHolder namespaceHolder, + JsonCodec jsonCodec) + { + this.namespaceHolder = requireNonNull(namespaceHolder, "namespaceHolder is null"); + this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); + } + + @Override + public ConnectorPageSink createPageSink( + ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorOutputTableHandle outputTableHandle, + PageSinkContext pageSinkContext) + { + LanceWritableTableHandle handle = (LanceWritableTableHandle) outputTableHandle; + return createPageSink(handle); + } + + @Override + public ConnectorPageSink createPageSink( + ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorInsertTableHandle insertTableHandle, + PageSinkContext pageSinkContext) + { + LanceWritableTableHandle handle = (LanceWritableTableHandle) insertTableHandle; + return createPageSink(handle); + } + + private ConnectorPageSink createPageSink(LanceWritableTableHandle handle) + { + Schema arrowSchema; + try { + arrowSchema = Schema.fromJSON(handle.getSchemaJson()); + } + catch (IOException e) { + throw new PrestoException(LanceErrorCode.LANCE_ERROR, + "Failed to parse Arrow schema", e); + } + + String tablePath = namespaceHolder.getTablePath(handle.getTableName()); + + return new LancePageSink( + tablePath, + arrowSchema, + handle.getInputColumns(), + jsonCodec, + namespaceHolder.getAllocator()); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSourceProvider.java b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSourceProvider.java new file mode 100644 index 0000000000000..edf4409a0a876 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageSourceProvider.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.RuntimeStats; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorPageSource; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.SplitContext; +import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +import javax.inject.Inject; + +import java.util.List; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class LancePageSourceProvider + implements ConnectorPageSourceProvider +{ + private final LanceNamespaceHolder namespaceHolder; + private final LanceConfig config; + private final ArrowBlockBuilder arrowBlockBuilder; + + @Inject + public LancePageSourceProvider(LanceNamespaceHolder namespaceHolder, LanceConfig config, ArrowBlockBuilder arrowBlockBuilder) + { + this.namespaceHolder = requireNonNull(namespaceHolder, "namespaceHolder is null"); + this.config = requireNonNull(config, "config is null"); + this.arrowBlockBuilder = requireNonNull(arrowBlockBuilder, "arrowBlockBuilder is null"); + } + + @Override + public ConnectorPageSource createPageSource( + ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorSplit split, + ConnectorTableLayoutHandle layout, + List columns, + SplitContext splitContext, + RuntimeStats runtimeStats) + { + LanceSplit lanceSplit = (LanceSplit) split; + LanceTableLayoutHandle layoutHandle = (LanceTableLayoutHandle) layout; + LanceTableHandle tableHandle = layoutHandle.getTable(); + + List lanceColumns = columns.stream() + .map(LanceColumnHandle.class::cast) + .collect(toImmutableList()); + + String tablePath = namespaceHolder.getTablePath(tableHandle.getTableName()); + + return new LanceFragmentPageSource( + tableHandle, + lanceColumns, + lanceSplit.getFragments(), + tablePath, + config.getReadBatchSize(), + arrowBlockBuilder, + namespaceHolder.getAllocator()); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LancePageToArrowConverter.java b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageToArrowConverter.java new file mode 100644 index 0000000000000..17d2dfd05bf52 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LancePageToArrowConverter.java @@ -0,0 +1,121 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.type.BigintType; +import com.facebook.presto.common.type.BooleanType; +import com.facebook.presto.common.type.DateType; +import com.facebook.presto.common.type.DoubleType; +import com.facebook.presto.common.type.IntegerType; +import com.facebook.presto.common.type.RealType; +import com.facebook.presto.common.type.SmallintType; +import com.facebook.presto.common.type.TimestampType; +import com.facebook.presto.common.type.TinyintType; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.VarbinaryType; +import com.facebook.presto.common.type.VarcharType; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.PrestoException; +import com.google.common.collect.ImmutableList; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.util.List; + +import static java.lang.Float.intBitsToFloat; + +public final class LancePageToArrowConverter +{ + private LancePageToArrowConverter() {} + + public static Schema toArrowSchema(List columns) + { + ImmutableList.Builder fields = ImmutableList.builder(); + for (ColumnMetadata column : columns) { + ArrowType arrowType = LanceColumnHandle.toArrowType(column.getType()); + fields.add(new Field(column.getName(), new FieldType(column.isNullable(), arrowType, null), null)); + } + return new Schema(fields.build()); + } + + public static void writeBlockToVector(Block block, FieldVector vector, Type type, int rowCount) + { + writeBlockToVectorAtOffset(block, vector, type, rowCount, 0); + } + + public static void writeBlockToVectorAtOffset(Block block, FieldVector vector, Type type, int rowCount, int offset) + { + for (int i = 0; i < rowCount; i++) { + if (block.isNull(i)) { + // Arrow vectors handle nulls automatically with null bitmap + continue; + } + int targetIndex = offset + i; + if (type instanceof BooleanType) { + ((BitVector) vector).setSafe(targetIndex, type.getBoolean(block, i) ? 1 : 0); + } + else if (type instanceof TinyintType) { + ((TinyIntVector) vector).setSafe(targetIndex, (byte) type.getLong(block, i)); + } + else if (type instanceof SmallintType) { + ((SmallIntVector) vector).setSafe(targetIndex, (short) type.getLong(block, i)); + } + else if (type instanceof IntegerType) { + ((IntVector) vector).setSafe(targetIndex, (int) type.getLong(block, i)); + } + else if (type instanceof BigintType) { + ((BigIntVector) vector).setSafe(targetIndex, type.getLong(block, i)); + } + else if (type instanceof RealType) { + ((Float4Vector) vector).setSafe(targetIndex, intBitsToFloat((int) type.getLong(block, i))); + } + else if (type instanceof DoubleType) { + ((Float8Vector) vector).setSafe(targetIndex, type.getDouble(block, i)); + } + else if (type instanceof VarcharType) { + byte[] bytes = type.getSlice(block, i).getBytes(); + ((VarCharVector) vector).setSafe(targetIndex, bytes); + } + else if (type instanceof VarbinaryType) { + byte[] bytes = type.getSlice(block, i).getBytes(); + ((VarBinaryVector) vector).setSafe(targetIndex, bytes); + } + else if (type instanceof DateType) { + ((DateDayVector) vector).setSafe(targetIndex, (int) type.getLong(block, i)); + } + else if (type instanceof TimestampType) { + ((TimeStampMicroVector) vector).setSafe(targetIndex, type.getLong(block, i)); + } + else { + throw new PrestoException(LanceErrorCode.LANCE_TYPE_NOT_SUPPORTED, + "Unsupported type for Arrow conversion: " + type); + } + } + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LancePlugin.java b/presto-lance/src/main/java/com/facebook/presto/lance/LancePlugin.java new file mode 100644 index 0000000000000..882c3d88f271d --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LancePlugin.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.Plugin; +import com.facebook.presto.spi.connector.ConnectorFactory; +import com.google.common.collect.ImmutableList; + +public class LancePlugin + implements Plugin +{ + @Override + public Iterable getConnectorFactories() + { + return ImmutableList.of(new LanceConnectorFactory()); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplit.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplit.java new file mode 100644 index 0000000000000..fa6472d215e67 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplit.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.HostAddress; +import com.facebook.presto.spi.NodeProvider; +import com.facebook.presto.spi.schedule.NodeSelectionStrategy; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import java.util.List; +import java.util.Objects; + +import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE; +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class LanceSplit + implements ConnectorSplit +{ + private final List fragments; + + @JsonCreator + public LanceSplit( + @JsonProperty("fragments") List fragments) + { + this.fragments = ImmutableList.copyOf(requireNonNull(fragments, "fragments is null")); + } + + @JsonProperty + public List getFragments() + { + return fragments; + } + + @Override + public NodeSelectionStrategy getNodeSelectionStrategy() + { + return NO_PREFERENCE; + } + + @Override + public List getPreferredNodes(NodeProvider nodeProvider) + { + return ImmutableList.of(); + } + + @Override + public Object getInfo() + { + return ImmutableMap.builder() + .put("fragments", fragments) + .build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LanceSplit that = (LanceSplit) o; + return Objects.equals(fragments, that.fragments); + } + + @Override + public int hashCode() + { + return Objects.hash(fragments); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("fragments", fragments) + .toString(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplitManager.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplitManager.java new file mode 100644 index 0000000000000..47f9d6e018f4d --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceSplitManager.java @@ -0,0 +1,64 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorSplit; +import com.facebook.presto.spi.ConnectorSplitSource; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.FixedSplitSource; +import com.facebook.presto.spi.connector.ConnectorSplitManager; +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.google.common.collect.ImmutableList; +import org.lance.Fragment; + +import javax.inject.Inject; + +import java.util.List; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class LanceSplitManager + implements ConnectorSplitManager +{ + private final LanceNamespaceHolder namespaceHolder; + + @Inject + public LanceSplitManager(LanceNamespaceHolder namespaceHolder) + { + this.namespaceHolder = requireNonNull(namespaceHolder, "namespaceHolder is null"); + } + + @Override + public ConnectorSplitSource getSplits( + ConnectorTransactionHandle transactionHandle, + ConnectorSession session, + ConnectorTableLayoutHandle layout, + SplitSchedulingContext splitSchedulingContext) + { + LanceTableLayoutHandle layoutHandle = (LanceTableLayoutHandle) layout; + LanceTableHandle tableHandle = layoutHandle.getTable(); + + List fragments = namespaceHolder.getFragments( + tableHandle.getTableName()); + + List splits = fragments.stream() + .map(fragment -> (ConnectorSplit) new LanceSplit( + ImmutableList.of(fragment.getId()))) + .collect(toImmutableList()); + + return new FixedSplitSource(splits); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableHandle.java new file mode 100644 index 0000000000000..8806ba8ee387d --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableHandle.java @@ -0,0 +1,80 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.ConnectorTableHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class LanceTableHandle + implements ConnectorTableHandle +{ + private final String schemaName; + private final String tableName; + + @JsonCreator + public LanceTableHandle( + @JsonProperty("schemaName") String schemaName, + @JsonProperty("tableName") String tableName) + { + this.schemaName = requireNonNull(schemaName, "schemaName is null"); + this.tableName = requireNonNull(tableName, "tableName is null"); + } + + @JsonProperty + public String getSchemaName() + { + return schemaName; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + + @Override + public int hashCode() + { + return Objects.hash(schemaName, tableName); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if ((obj == null) || (getClass() != obj.getClass())) { + return false; + } + LanceTableHandle other = (LanceTableHandle) obj; + return Objects.equals(this.schemaName, other.schemaName) && + Objects.equals(this.tableName, other.tableName); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("schemaName", schemaName) + .add("tableName", tableName) + .toString(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableLayoutHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableLayoutHandle.java new file mode 100644 index 0000000000000..9f5968711380b --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTableLayoutHandle.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +public class LanceTableLayoutHandle + implements ConnectorTableLayoutHandle +{ + private final LanceTableHandle table; + private final TupleDomain tupleDomain; + + @JsonCreator + public LanceTableLayoutHandle( + @JsonProperty("table") LanceTableHandle table, + @JsonProperty("tupleDomain") TupleDomain domain) + { + this.table = requireNonNull(table, "table is null"); + this.tupleDomain = requireNonNull(domain, "tupleDomain is null"); + } + + @JsonProperty + public LanceTableHandle getTable() + { + return table; + } + + @JsonProperty + public TupleDomain getTupleDomain() + { + return tupleDomain; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LanceTableLayoutHandle that = (LanceTableLayoutHandle) o; + return Objects.equals(table, that.table) && + Objects.equals(tupleDomain, that.tupleDomain); + } + + @Override + public int hashCode() + { + return Objects.hash(table, tupleDomain); + } + + @Override + public String toString() + { + return table.toString(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceTransactionHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTransactionHandle.java new file mode 100644 index 0000000000000..1dee7096afe80 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceTransactionHandle.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.connector.ConnectorTransactionHandle; + +public enum LanceTransactionHandle + implements ConnectorTransactionHandle +{ + INSTANCE +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/LanceWritableTableHandle.java b/presto-lance/src/main/java/com/facebook/presto/lance/LanceWritableTableHandle.java new file mode 100644 index 0000000000000..28e8c26505759 --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/LanceWritableTableHandle.java @@ -0,0 +1,103 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.ConnectorInsertTableHandle; +import com.facebook.presto.spi.ConnectorOutputTableHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Objects; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class LanceWritableTableHandle + implements ConnectorInsertTableHandle, ConnectorOutputTableHandle +{ + private final String schemaName; + private final String tableName; + private final String schemaJson; + private final List inputColumns; + + @JsonCreator + public LanceWritableTableHandle( + @JsonProperty("schemaName") String schemaName, + @JsonProperty("tableName") String tableName, + @JsonProperty("schemaJson") String schemaJson, + @JsonProperty("inputColumns") List inputColumns) + { + this.schemaName = requireNonNull(schemaName, "schemaName is null"); + this.tableName = requireNonNull(tableName, "tableName is null"); + this.schemaJson = requireNonNull(schemaJson, "schemaJson is null"); + this.inputColumns = ImmutableList.copyOf(requireNonNull(inputColumns, "inputColumns is null")); + } + + @JsonProperty + public String getSchemaName() + { + return schemaName; + } + + @JsonProperty + public String getTableName() + { + return tableName; + } + + @JsonProperty + public String getSchemaJson() + { + return schemaJson; + } + + @JsonProperty + public List getInputColumns() + { + return inputColumns; + } + + @Override + public int hashCode() + { + return Objects.hash(schemaName, tableName, schemaJson, inputColumns); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if ((obj == null) || (getClass() != obj.getClass())) { + return false; + } + LanceWritableTableHandle other = (LanceWritableTableHandle) obj; + return Objects.equals(this.schemaName, other.schemaName) && + Objects.equals(this.tableName, other.tableName) && + Objects.equals(this.schemaJson, other.schemaJson) && + Objects.equals(this.inputColumns, other.inputColumns); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("schemaName", schemaName) + .add("tableName", tableName) + .toString(); + } +} diff --git a/presto-lance/src/main/java/com/facebook/presto/lance/ScannerFactory.java b/presto-lance/src/main/java/com/facebook/presto/lance/ScannerFactory.java new file mode 100644 index 0000000000000..6861b1c15752c --- /dev/null +++ b/presto-lance/src/main/java/com/facebook/presto/lance/ScannerFactory.java @@ -0,0 +1,26 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import org.apache.arrow.memory.BufferAllocator; +import org.lance.ipc.LanceScanner; + +import java.util.List; + +public interface ScannerFactory +{ + LanceScanner open(BufferAllocator allocator, List columns); + + void close(); +} diff --git a/presto-lance/src/main/resources/META-INF/services/com.facebook.presto.spi.Plugin b/presto-lance/src/main/resources/META-INF/services/com.facebook.presto.spi.Plugin new file mode 100644 index 0000000000000..05805c911d148 --- /dev/null +++ b/presto-lance/src/main/resources/META-INF/services/com.facebook.presto.spi.Plugin @@ -0,0 +1 @@ +com.facebook.presto.lance.LancePlugin diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/LanceQueryRunner.java b/presto-lance/src/test/java/com/facebook/presto/lance/LanceQueryRunner.java new file mode 100644 index 0000000000000..47cb45fb5989a --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/LanceQueryRunner.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.Session; +import com.facebook.presto.tests.DistributedQueryRunner; +import com.google.common.collect.ImmutableMap; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; + +public class LanceQueryRunner +{ + private static final Logger log = Logger.get(LanceQueryRunner.class); + private static final String DEFAULT_SOURCE = "test"; + private static final String DEFAULT_CATALOG = "lance"; + private static final String DEFAULT_SCHEMA = "default"; + + private LanceQueryRunner() + { + } + + public static DistributedQueryRunner createLanceQueryRunner(Map connectorProperties) + throws Exception + { + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(createSession()) + .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) + .build(); + try { + queryRunner.installPlugin(new LancePlugin()); + connectorProperties = new HashMap<>(ImmutableMap.copyOf(connectorProperties)); + + // Use a temp directory for lance root + Path tempDir = Files.createTempDirectory("lance-test"); + connectorProperties.putIfAbsent("lance.root-url", tempDir.toString()); + + queryRunner.createCatalog(DEFAULT_CATALOG, "lance", connectorProperties); + return queryRunner; + } + catch (Exception e) { + queryRunner.close(); + throw e; + } + } + + public static Session createSession() + { + return testSessionBuilder() + .setSource(DEFAULT_SOURCE) + .setCatalog(DEFAULT_CATALOG) + .setSchema(DEFAULT_SCHEMA) + .build(); + } + + public static void main(String[] args) + throws Exception + { + DistributedQueryRunner queryRunner = createLanceQueryRunner(ImmutableMap.of()); + log.info(format("Presto server started: %s", queryRunner.getCoordinator().getBaseUrl())); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java new file mode 100644 index 0000000000000..072d1f2f9b94d --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceColumnHandle.java @@ -0,0 +1,88 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.airlift.json.JsonCodecFactory; +import com.facebook.airlift.json.JsonObjectMapperProvider; +import com.facebook.airlift.json.ObjectMapperProvider; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.type.TypeDeserializer; +import com.google.common.collect.ImmutableMap; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.testng.annotations.Test; + +import java.util.Collections; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.DoubleType.DOUBLE; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.RealType.REAL; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static org.testng.Assert.assertEquals; + +public class TestLanceColumnHandle +{ + @Test + public void testJsonRoundTrip() + { + LanceColumnHandle handle = new LanceColumnHandle("col1", BIGINT, true); + JsonCodec codec = getJsonCodec(); + String json = codec.toJson(handle); + LanceColumnHandle copy = codec.fromJson(json); + assertEquals(copy, handle); + assertEquals(copy.getColumnName(), "col1"); + assertEquals(copy.getColumnType(), BIGINT); + assertEquals(copy.isNullable(), true); + } + + @Test + public void testArrowToPrestoType() + { + assertEquals(LanceColumnHandle.toPrestoType(field("a", ArrowType.Bool.INSTANCE)), BOOLEAN); + assertEquals(LanceColumnHandle.toPrestoType(field("b", new ArrowType.Int(32, true))), INTEGER); + assertEquals(LanceColumnHandle.toPrestoType(field("c", new ArrowType.Int(64, true))), BIGINT); + assertEquals(LanceColumnHandle.toPrestoType(field("d", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))), REAL); + assertEquals(LanceColumnHandle.toPrestoType(field("e", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))), DOUBLE); + assertEquals(LanceColumnHandle.toPrestoType(field("f", ArrowType.Utf8.INSTANCE)), VARCHAR); + } + + @Test + public void testPrestoToArrowType() + { + assertEquals(LanceColumnHandle.toArrowType(BOOLEAN), ArrowType.Bool.INSTANCE); + assertEquals(LanceColumnHandle.toArrowType(INTEGER), new ArrowType.Int(32, true)); + assertEquals(LanceColumnHandle.toArrowType(BIGINT), new ArrowType.Int(64, true)); + assertEquals(LanceColumnHandle.toArrowType(REAL), new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)); + assertEquals(LanceColumnHandle.toArrowType(DOUBLE), new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)); + assertEquals(LanceColumnHandle.toArrowType(VARCHAR), ArrowType.Utf8.INSTANCE); + } + + private static Field field(String name, ArrowType type) + { + return new Field(name, new FieldType(true, type, null), Collections.emptyList()); + } + + private JsonCodec getJsonCodec() + { + ObjectMapperProvider objectMapperProvider = new JsonObjectMapperProvider(); + objectMapperProvider.setJsonDeserializers(ImmutableMap.of(Type.class, new TypeDeserializer(createTestFunctionAndTypeManager()))); + return new JsonCodecFactory(objectMapperProvider).jsonCodec(LanceColumnHandle.class); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceCommitTaskData.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceCommitTaskData.java new file mode 100644 index 0000000000000..632ef473eb997 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceCommitTaskData.java @@ -0,0 +1,38 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import org.testng.annotations.Test; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static org.testng.Assert.assertEquals; + +public class TestLanceCommitTaskData +{ + @Test + public void testJsonRoundTrip() + { + LanceCommitTaskData data = new LanceCommitTaskData( + "[{\"id\":1}]", + 1024L, + 100L); + JsonCodec codec = jsonCodec(LanceCommitTaskData.class); + String json = codec.toJson(data); + LanceCommitTaskData copy = codec.fromJson(json); + assertEquals(copy.getFragmentsJson(), "[{\"id\":1}]"); + assertEquals(copy.getWrittenBytes(), 1024L); + assertEquals(copy.getRowCount(), 100L); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceConfig.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceConfig.java new file mode 100644 index 0000000000000..b3ab811739138 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceConfig.java @@ -0,0 +1,64 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.Test; + +import java.util.Map; + +import static com.facebook.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static com.facebook.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static com.facebook.airlift.configuration.testing.ConfigAssertions.recordDefaults; + +public class TestLanceConfig +{ + @Test + public void testDefaults() + { + assertRecordedDefaults(recordDefaults(LanceConfig.class) + .setImpl("dir") + .setRootUrl("") + .setSingleLevelNs(true) + .setReadBatchSize(8192) + .setMaxRowsPerFile(1_000_000) + .setMaxRowsPerGroup(100_000) + .setWriteBatchSize(10_000)); + } + + @Test + public void testExplicitPropertyMappings() + { + Map properties = ImmutableMap.builder() + .put("lance.impl", "rest") + .put("lance.root-url", "/data/lance") + .put("lance.single-level-ns", "false") + .put("lance.read-batch-size", "4096") + .put("lance.max-rows-per-file", "500000") + .put("lance.max-rows-per-group", "50000") + .put("lance.write-batch-size", "5000") + .build(); + + LanceConfig expected = new LanceConfig() + .setImpl("rest") + .setRootUrl("/data/lance") + .setSingleLevelNs(false) + .setReadBatchSize(4096) + .setMaxRowsPerFile(500_000) + .setMaxRowsPerGroup(50_000) + .setWriteBatchSize(5_000); + + assertFullMapping(properties, expected); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceFragmentPageSource.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceFragmentPageSource.java new file mode 100644 index 0000000000000..bcf46cdfc46e6 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceFragmentPageSource.java @@ -0,0 +1,171 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.plugin.arrow.ArrowBlockBuilder; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.block.Block; +import com.facebook.presto.spi.ColumnHandle; +import com.google.common.collect.ImmutableList; +import com.google.common.io.Resources; +import org.lance.Fragment; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.net.URL; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestLanceFragmentPageSource +{ + private LanceNamespaceHolder namespaceHolder; + private LanceTableHandle tableHandle; + private String tablePath; + private List fragments; + private ArrowBlockBuilder arrowBlockBuilder; + + @BeforeMethod + public void setUp() + throws Exception + { + URL dbUrl = Resources.getResource(TestLanceFragmentPageSource.class, "/example_db"); + assertNotNull(dbUrl, "example_db resource not found"); + String rootPath = Paths.get(dbUrl.toURI()).toString(); + LanceConfig config = new LanceConfig() + .setRootUrl(rootPath) + .setSingleLevelNs(true); + namespaceHolder = new LanceNamespaceHolder(config); + arrowBlockBuilder = new ArrowBlockBuilder(createTestFunctionAndTypeManager()); + tableHandle = new LanceTableHandle("default", "test_table1"); + tablePath = namespaceHolder.getTablePath("test_table1"); + fragments = namespaceHolder.getFragments("test_table1"); + } + + @Test + public void testFragmentScan() + throws Exception + { + List columns = getColumns(); + + try (LanceFragmentPageSource pageSource = new LanceFragmentPageSource( + tableHandle, + columns, + ImmutableList.of(fragments.get(0).getId()), + tablePath, + 8192, + arrowBlockBuilder, + namespaceHolder.getAllocator())) { + Page page = pageSource.getNextPage(); + assertNotNull(page); + assertEquals(page.getChannelCount(), 4); + assertEquals(page.getPositionCount(), 2); + + // Verify first column (x) has expected values + Block xBlock = page.getBlock(0); + assertEquals(BIGINT.getLong(xBlock, 0), 0L); + + Page nextPage = pageSource.getNextPage(); + assertNull(nextPage); + assertTrue(pageSource.isFinished()); + } + } + + @Test + public void testColumnProjection() + throws Exception + { + Map columnHandleMap = getColumnHandles(); + LanceColumnHandle colB = (LanceColumnHandle) columnHandleMap.get("b"); + LanceColumnHandle colX = (LanceColumnHandle) columnHandleMap.get("x"); + List projectedColumns = ImmutableList.of(colB, colX); + + try (LanceFragmentPageSource pageSource = new LanceFragmentPageSource( + tableHandle, + projectedColumns, + ImmutableList.of(fragments.get(0).getId()), + tablePath, + 8192, + arrowBlockBuilder, + namespaceHolder.getAllocator())) { + Page page = pageSource.getNextPage(); + assertNotNull(page); + assertEquals(page.getChannelCount(), 2); + assertEquals(page.getPositionCount(), 2); + + Block bBlock = page.getBlock(0); + assertEquals(BIGINT.getLong(bBlock, 0), 0L); + assertEquals(BIGINT.getLong(bBlock, 1), 3L); + + Block xBlock = page.getBlock(1); + assertEquals(BIGINT.getLong(xBlock, 0), 0L); + assertEquals(BIGINT.getLong(xBlock, 1), 1L); + } + } + + @Test + public void testPartialColumnProjection() + throws Exception + { + Map columnHandleMap = getColumnHandles(); + LanceColumnHandle colC = (LanceColumnHandle) columnHandleMap.get("c"); + LanceColumnHandle colX = (LanceColumnHandle) columnHandleMap.get("x"); + List projectedColumns = ImmutableList.of(colC, colX); + + try (LanceFragmentPageSource pageSource = new LanceFragmentPageSource( + tableHandle, + projectedColumns, + ImmutableList.of(fragments.get(0).getId()), + tablePath, + 8192, + arrowBlockBuilder, + namespaceHolder.getAllocator())) { + Page page = pageSource.getNextPage(); + assertNotNull(page); + assertEquals(page.getChannelCount(), 2); + assertEquals(page.getPositionCount(), 2); + + Block cBlock = page.getBlock(0); + assertEquals(BIGINT.getLong(cBlock, 0), 0L); + assertEquals(BIGINT.getLong(cBlock, 1), -1L); + + Block xBlock = page.getBlock(1); + assertEquals(BIGINT.getLong(xBlock, 0), 0L); + assertEquals(BIGINT.getLong(xBlock, 1), 1L); + } + } + + private List getColumns() + { + return getColumnHandles().values().stream() + .map(LanceColumnHandle.class::cast) + .collect(toImmutableList()); + } + + private Map getColumnHandles() + { + LanceMetadata metadata = new LanceMetadata(namespaceHolder, jsonCodec(LanceCommitTaskData.class)); + return metadata.getColumnHandles(null, tableHandle); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java new file mode 100644 index 0000000000000..cbb93d7d18d7a --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceMetadata.java @@ -0,0 +1,131 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.ConnectorTableHandle; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableSet; +import com.google.common.io.Resources; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.net.URL; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestLanceMetadata +{ + private LanceMetadata metadata; + + @BeforeMethod + public void setUp() + throws Exception + { + URL dbUrl = Resources.getResource(TestLanceMetadata.class, "/example_db"); + assertNotNull(dbUrl, "example_db resource not found"); + String rootPath = Paths.get(dbUrl.toURI()).toString(); + LanceConfig config = new LanceConfig() + .setRootUrl(rootPath) + .setSingleLevelNs(true); + LanceNamespaceHolder namespaceHolder = new LanceNamespaceHolder(config); + JsonCodec commitTaskDataCodec = jsonCodec(LanceCommitTaskData.class); + metadata = new LanceMetadata(namespaceHolder, commitTaskDataCodec); + } + + @Test + public void testListSchemaNames() + { + List schemas = metadata.listSchemaNames(null); + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), "default"); + } + + @Test + public void testGetTableHandle() + { + ConnectorTableHandle handle = metadata.getTableHandle(null, new SchemaTableName("default", "test_table1")); + assertNotNull(handle); + assertEquals(handle, new LanceTableHandle("default", "test_table1")); + + ConnectorTableHandle handle2 = metadata.getTableHandle(null, new SchemaTableName("default", "test_table2")); + assertNotNull(handle2); + assertEquals(handle2, new LanceTableHandle("default", "test_table2")); + + // non-existent schema + assertNull(metadata.getTableHandle(null, new SchemaTableName("other_schema", "test_table1"))); + + // non-existent table + assertNull(metadata.getTableHandle(null, new SchemaTableName("default", "nonexistent"))); + } + + @Test + public void testGetColumnHandles() + { + LanceTableHandle tableHandle = new LanceTableHandle("default", "test_table1"); + Map columns = metadata.getColumnHandles(null, tableHandle); + assertNotNull(columns); + assertEquals(columns.size(), 4); + assertTrue(columns.containsKey("x")); + assertTrue(columns.containsKey("y")); + assertTrue(columns.containsKey("b")); + assertTrue(columns.containsKey("c")); + } + + @Test + public void testGetTableMetadata() + { + LanceTableHandle tableHandle = new LanceTableHandle("default", "test_table1"); + ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(null, tableHandle); + assertNotNull(tableMetadata); + assertEquals(tableMetadata.getTable(), new SchemaTableName("default", "test_table1")); + assertEquals(tableMetadata.getColumns().size(), 4); + + // Verify column names + Set columnNames = tableMetadata.getColumns().stream() + .map(col -> col.getName()) + .collect(Collectors.toSet()); + assertEquals(columnNames, ImmutableSet.of("x", "y", "b", "c")); + } + + @Test + public void testListTables() + { + // all tables in default schema + List tables = metadata.listTables(null, Optional.of("default")); + Set tableSet = ImmutableSet.copyOf(tables); + assertEquals(tableSet, ImmutableSet.of( + new SchemaTableName("default", "test_table1"), + new SchemaTableName("default", "test_table2"), + new SchemaTableName("default", "test_table3"), + new SchemaTableName("default", "test_table4"))); + + // no schema filter + List allTables = metadata.listTables(null, Optional.empty()); + assertEquals(ImmutableSet.copyOf(allTables), tableSet); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLancePlugin.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLancePlugin.java new file mode 100644 index 0000000000000..047f7355fc16e --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLancePlugin.java @@ -0,0 +1,66 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.presto.spi.connector.ConnectorFactory; +import com.facebook.presto.testing.TestingConnectorContext; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.MoreCollectors; +import org.testng.annotations.Test; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +public class TestLancePlugin +{ + @Test + public void testCreateConnector() + throws Exception + { + ConnectorFactory factory = StreamSupport + .stream(new LancePlugin().getConnectorFactories().spliterator(), false) + .collect(MoreCollectors.onlyElement()); + assertNotNull(factory); + assertEquals(factory.getName(), "lance"); + Path tempDir = Files.createTempDirectory("lance-test"); + try { + factory.create( + "test", + ImmutableMap.of("lance.root-url", tempDir.toString()), + new TestingConnectorContext()) + .shutdown(); + } + finally { + deleteRecursively(tempDir); + } + } + + private static void deleteRecursively(Path path) + throws Exception + { + if (Files.isDirectory(path)) { + try (Stream entries = Files.list(path)) { + for (Path entry : (Iterable) entries::iterator) { + deleteRecursively(entry); + } + } + } + Files.deleteIfExists(path); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceSplit.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceSplit.java new file mode 100644 index 0000000000000..b83314e18b2fd --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceSplit.java @@ -0,0 +1,34 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.Test; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static org.testng.Assert.assertEquals; + +public class TestLanceSplit +{ + @Test + public void testJsonRoundTrip() + { + LanceSplit split = new LanceSplit(ImmutableList.of(0, 1, 2)); + JsonCodec codec = jsonCodec(LanceSplit.class); + String json = codec.toJson(split); + LanceSplit copy = codec.fromJson(json); + assertEquals(copy.getFragments(), ImmutableList.of(0, 1, 2)); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceTableHandle.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceTableHandle.java new file mode 100644 index 0000000000000..ea59aff551009 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceTableHandle.java @@ -0,0 +1,34 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.facebook.airlift.json.JsonCodec; +import org.testng.annotations.Test; + +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static org.testng.Assert.assertEquals; + +public class TestLanceTableHandle +{ + private final LanceTableHandle tableHandle = new LanceTableHandle("default", "test_table"); + + @Test + public void testJsonRoundTrip() + { + JsonCodec codec = jsonCodec(LanceTableHandle.class); + String json = codec.toJson(tableHandle); + LanceTableHandle copy = codec.fromJson(json); + assertEquals(copy, tableHandle); + } +} diff --git a/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceWritableTableHandle.java b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceWritableTableHandle.java new file mode 100644 index 0000000000000..32996db8acb64 --- /dev/null +++ b/presto-lance/src/test/java/com/facebook/presto/lance/TestLanceWritableTableHandle.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.lance; + +import com.google.common.collect.ImmutableList; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static org.testng.Assert.assertEquals; + +public class TestLanceWritableTableHandle +{ + @Test + public void testProperties() + { + List columns = ImmutableList.of( + new LanceColumnHandle("id", INTEGER, false), + new LanceColumnHandle("name", VARCHAR, true)); + LanceWritableTableHandle handle = new LanceWritableTableHandle( + "default", "test_table", "{}", columns); + + assertEquals(handle.getSchemaName(), "default"); + assertEquals(handle.getTableName(), "test_table"); + assertEquals(handle.getSchemaJson(), "{}"); + assertEquals(handle.getInputColumns().size(), 2); + assertEquals(handle.getInputColumns().get(0).getColumnName(), "id"); + } +} diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_latest.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_latest.manifest new file mode 100644 index 0000000000000..ad3bfea3fa163 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_latest.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/0-4daea2b4-b38b-4542-af0c-5a839ceab54a.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/0-4daea2b4-b38b-4542-af0c-5a839ceab54a.txn new file mode 100644 index 0000000000000..a4de7d66c40bd Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/0-4daea2b4-b38b-4542-af0c-5a839ceab54a.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/1-99519b7f-c80f-4961-bacc-d556df5ae798.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/1-99519b7f-c80f-4961-bacc-d556df5ae798.txn new file mode 100644 index 0000000000000..e5d1c6c78de54 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/1-99519b7f-c80f-4961-bacc-d556df5ae798.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/2-b9f7655d-01e1-4fa7-8ca2-ddc646564fb8.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/2-b9f7655d-01e1-4fa7-8ca2-ddc646564fb8.txn new file mode 100644 index 0000000000000..0eeae4812848a Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/2-b9f7655d-01e1-4fa7-8ca2-ddc646564fb8.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/3-90bc5dd5-204d-42ba-b39a-65f2abce1602.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/3-90bc5dd5-204d-42ba-b39a-65f2abce1602.txn new file mode 100644 index 0000000000000..26e81b4bf6228 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/3-90bc5dd5-204d-42ba-b39a-65f2abce1602.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/4-dffa23f0-c357-4935-a9c1-e286099b5533.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/4-dffa23f0-c357-4935-a9c1-e286099b5533.txn new file mode 100644 index 0000000000000..9f4d38bed499a --- /dev/null +++ b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/4-dffa23f0-c357-4935-a9c1-e286099b5533.txn @@ -0,0 +1,3 @@ +$dffa23f0-c357-4935-a9c1-e286099b5533ê8 +x ÿÿÿÿÿÿÿÿÿ*int6408 +y ÿÿÿÿÿÿÿÿÿ*int6408 \ No newline at end of file diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/5-8bfb238d-4a29-4582-ab7d-8c53e2253e47.txn b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/5-8bfb238d-4a29-4582-ab7d-8c53e2253e47.txn new file mode 100644 index 0000000000000..c5ebfccd035d4 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_transactions/5-8bfb238d-4a29-4582-ab7d-8c53e2253e47.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/1.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/1.manifest new file mode 100644 index 0000000000000..9afc401b67a58 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/1.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/2.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/2.manifest new file mode 100644 index 0000000000000..02964ac016274 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/2.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/3.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/3.manifest new file mode 100644 index 0000000000000..d58ef994212c6 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/3.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/4.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/4.manifest new file mode 100644 index 0000000000000..d93ec5f20c80c Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/4.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/5.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/5.manifest new file mode 100644 index 0000000000000..2e829ca96b715 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/5.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/6.manifest b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/6.manifest new file mode 100644 index 0000000000000..ad3bfea3fa163 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/_versions/6.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/083d1c7c-b0d2-4ff3-b7ff-4237ea586491.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/083d1c7c-b0d2-4ff3-b7ff-4237ea586491.lance new file mode 100644 index 0000000000000..d20bd1d1d80d5 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/083d1c7c-b0d2-4ff3-b7ff-4237ea586491.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/25c37abd-c753-419b-b420-4847ce2de5a1.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/25c37abd-c753-419b-b420-4847ce2de5a1.lance new file mode 100644 index 0000000000000..8c320c2d27578 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/25c37abd-c753-419b-b420-4847ce2de5a1.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/2c8a0da6-1ace-4b1c-baf0-ed48b04996dc.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/2c8a0da6-1ace-4b1c-baf0-ed48b04996dc.lance new file mode 100644 index 0000000000000..089439d826b46 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/2c8a0da6-1ace-4b1c-baf0-ed48b04996dc.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/ac0bf34e-0e0d-4e3b-ae7e-ab247cae5f77.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/ac0bf34e-0e0d-4e3b-ae7e-ab247cae5f77.lance new file mode 100644 index 0000000000000..123baf9574aac Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/ac0bf34e-0e0d-4e3b-ae7e-ab247cae5f77.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/c888f970-b7b3-4efb-9293-d7c6dc4996d2.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/c888f970-b7b3-4efb-9293-d7c6dc4996d2.lance new file mode 100644 index 0000000000000..92b5c5bfab42b Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/c888f970-b7b3-4efb-9293-d7c6dc4996d2.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table1.lance/data/cbe16da7-b812-43a1-87f1-521470dfed32.lance b/presto-lance/src/test/resources/example_db/test_table1.lance/data/cbe16da7-b812-43a1-87f1-521470dfed32.lance new file mode 100644 index 0000000000000..6aa336b8e0f58 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table1.lance/data/cbe16da7-b812-43a1-87f1-521470dfed32.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_deletions/0-1-8958018423523767581.arrow b/presto-lance/src/test/resources/example_db/test_table2.lance/_deletions/0-1-8958018423523767581.arrow new file mode 100644 index 0000000000000..0ff9e8401fbd2 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_deletions/0-1-8958018423523767581.arrow differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_latest.manifest b/presto-lance/src/test/resources/example_db/test_table2.lance/_latest.manifest new file mode 100644 index 0000000000000..c278c42a37dab Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_latest.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn b/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn new file mode 100644 index 0000000000000..39f2c93c022d2 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn b/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn new file mode 100644 index 0000000000000..b7b085fcf5b10 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/1.manifest b/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/1.manifest new file mode 100644 index 0000000000000..8fb093d607e9c Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/1.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/2.manifest b/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/2.manifest new file mode 100644 index 0000000000000..c278c42a37dab Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/_versions/2.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table2.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance b/presto-lance/src/test/resources/example_db/test_table2.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance new file mode 100644 index 0000000000000..ae5ace0c3b39e Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table2.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_deletions/0-1-8958018423523767581.arrow b/presto-lance/src/test/resources/example_db/test_table3.lance/_deletions/0-1-8958018423523767581.arrow new file mode 100644 index 0000000000000..0ff9e8401fbd2 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_deletions/0-1-8958018423523767581.arrow differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_latest.manifest b/presto-lance/src/test/resources/example_db/test_table3.lance/_latest.manifest new file mode 100644 index 0000000000000..e7e6573b28441 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_latest.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn new file mode 100644 index 0000000000000..39f2c93c022d2 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/0-304ab2ef-f7bc-47b8-aeb6-9110ec67bf98.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn new file mode 100644 index 0000000000000..b7b085fcf5b10 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/1-1baf3405-66ab-4668-9578-5c333acd0440.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/2-8e340735-2a60-438b-9cf0-ec662fb25f1a.txn b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/2-8e340735-2a60-438b-9cf0-ec662fb25f1a.txn new file mode 100644 index 0000000000000..5021d6474bf5a Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_transactions/2-8e340735-2a60-438b-9cf0-ec662fb25f1a.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/1.manifest b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/1.manifest new file mode 100644 index 0000000000000..8fb093d607e9c Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/1.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/2.manifest b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/2.manifest new file mode 100644 index 0000000000000..c278c42a37dab Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/2.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/3.manifest b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/3.manifest new file mode 100644 index 0000000000000..e7e6573b28441 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/_versions/3.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance b/presto-lance/src/test/resources/example_db/test_table3.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance new file mode 100644 index 0000000000000..ae5ace0c3b39e Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/data/016c15dc-2c94-4382-b7a4-2c7def9c3897.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table3.lance/data/e6574672-b3cb-4bc7-92a8-db8754dac368.lance b/presto-lance/src/test/resources/example_db/test_table3.lance/data/e6574672-b3cb-4bc7-92a8-db8754dac368.lance new file mode 100644 index 0000000000000..004a874b0b838 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table3.lance/data/e6574672-b3cb-4bc7-92a8-db8754dac368.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/d32dac97-985b-4628-b1b4-e4b64947e115/index.idx b/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/d32dac97-985b-4628-b1b4-e4b64947e115/index.idx new file mode 100644 index 0000000000000..9ba041c6d31da Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/d32dac97-985b-4628-b1b4-e4b64947e115/index.idx differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/f358a219-95e8-4956-be35-0835f2bed10f/index.idx b/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/f358a219-95e8-4956-be35-0835f2bed10f/index.idx new file mode 100644 index 0000000000000..9c86bf0a03277 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_indices/f358a219-95e8-4956-be35-0835f2bed10f/index.idx differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_latest.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_latest.manifest new file mode 100644 index 0000000000000..93b8060625d5a Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_latest.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/0-c4ece134-3d52-41a8-b2ec-0fb9fff76c35.txn b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/0-c4ece134-3d52-41a8-b2ec-0fb9fff76c35.txn new file mode 100644 index 0000000000000..15c6233536522 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/0-c4ece134-3d52-41a8-b2ec-0fb9fff76c35.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/1-cac38053-d1b8-4ff5-b34c-0e47b41c1b56.txn b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/1-cac38053-d1b8-4ff5-b34c-0e47b41c1b56.txn new file mode 100644 index 0000000000000..603a2023ec001 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/1-cac38053-d1b8-4ff5-b34c-0e47b41c1b56.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/2-f3ac6254-2471-4c8a-8183-e529af6d2603.txn b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/2-f3ac6254-2471-4c8a-8183-e529af6d2603.txn new file mode 100644 index 0000000000000..4359abf588ba9 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/2-f3ac6254-2471-4c8a-8183-e529af6d2603.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/3-e08f185e-5734-4533-bee5-325567f2221a.txn b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/3-e08f185e-5734-4533-bee5-325567f2221a.txn new file mode 100644 index 0000000000000..849aad9471f5b Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/3-e08f185e-5734-4533-bee5-325567f2221a.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/4-2536db77-3757-414b-a525-f8f3288e9d80.txn b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/4-2536db77-3757-414b-a525-f8f3288e9d80.txn new file mode 100644 index 0000000000000..32d0c7d28eae9 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_transactions/4-2536db77-3757-414b-a525-f8f3288e9d80.txn differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/1.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/1.manifest new file mode 100644 index 0000000000000..bacf074f4c002 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/1.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/2.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/2.manifest new file mode 100644 index 0000000000000..043802ce5067d Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/2.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/3.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/3.manifest new file mode 100644 index 0000000000000..4e3ebb1919966 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/3.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/4.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/4.manifest new file mode 100644 index 0000000000000..426f1ba6f4f65 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/4.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/5.manifest b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/5.manifest new file mode 100644 index 0000000000000..93b8060625d5a Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/_versions/5.manifest differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/03c1a82b-a745-4bfe-8413-9441e4ed216e.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/03c1a82b-a745-4bfe-8413-9441e4ed216e.lance new file mode 100644 index 0000000000000..6c7822d71f99d Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/03c1a82b-a745-4bfe-8413-9441e4ed216e.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/2f786e97-1d4c-43e5-bc32-6f7a444396f1.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/2f786e97-1d4c-43e5-bc32-6f7a444396f1.lance new file mode 100644 index 0000000000000..9fb75907db078 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/2f786e97-1d4c-43e5-bc32-6f7a444396f1.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/34199dea-ca38-460b-af71-a816b0f093a1.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/34199dea-ca38-460b-af71-a816b0f093a1.lance new file mode 100644 index 0000000000000..15bd2f6095741 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/34199dea-ca38-460b-af71-a816b0f093a1.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/37ff0067-df64-4ba7-8c50-2086eb2b8127.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/37ff0067-df64-4ba7-8c50-2086eb2b8127.lance new file mode 100644 index 0000000000000..bd7732dd741b0 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/37ff0067-df64-4ba7-8c50-2086eb2b8127.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/4062824b-36bd-42e6-9283-22e9f29dc5ed.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/4062824b-36bd-42e6-9283-22e9f29dc5ed.lance new file mode 100644 index 0000000000000..b6753118f6064 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/4062824b-36bd-42e6-9283-22e9f29dc5ed.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/4d41cc61-800b-46b0-a548-893a35201cf1.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/4d41cc61-800b-46b0-a548-893a35201cf1.lance new file mode 100644 index 0000000000000..c14245dff6966 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/4d41cc61-800b-46b0-a548-893a35201cf1.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/66c4453b-7e80-411d-8508-e7f6dfeb693e.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/66c4453b-7e80-411d-8508-e7f6dfeb693e.lance new file mode 100644 index 0000000000000..90007b84741d4 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/66c4453b-7e80-411d-8508-e7f6dfeb693e.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/7ac6d965-4d35-4e2b-825b-f4a4a8be9024.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/7ac6d965-4d35-4e2b-825b-f4a4a8be9024.lance new file mode 100644 index 0000000000000..71f59ba6c4705 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/7ac6d965-4d35-4e2b-825b-f4a4a8be9024.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/86d11ae4-4a8f-48bc-b1b8-3c850a67c871.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/86d11ae4-4a8f-48bc-b1b8-3c850a67c871.lance new file mode 100644 index 0000000000000..bb61fcdea1035 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/86d11ae4-4a8f-48bc-b1b8-3c850a67c871.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/cd32d611-941e-4aa9-88c4-72193c618255.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/cd32d611-941e-4aa9-88c4-72193c618255.lance new file mode 100644 index 0000000000000..c2fd193ee243d Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/cd32d611-941e-4aa9-88c4-72193c618255.lance differ diff --git a/presto-lance/src/test/resources/example_db/test_table4.lance/data/ec05a2ea-2387-45a0-a146-1208997c4f12.lance b/presto-lance/src/test/resources/example_db/test_table4.lance/data/ec05a2ea-2387-45a0-a146-1208997c4f12.lance new file mode 100644 index 0000000000000..356f66f3c2305 Binary files /dev/null and b/presto-lance/src/test/resources/example_db/test_table4.lance/data/ec05a2ea-2387-45a0-a146-1208997c4f12.lance differ