Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.spark.extensions;

import java.util.List;
import java.util.Map;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.Table;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.spark.sql.AnalysisException;
import org.junit.After;
import org.junit.Test;

public class TestAncestorsOfProcedure extends SparkExtensionsTestBase {

public TestAncestorsOfProcedure(String catalogName, String implementation, Map<String, String> config) {
super(catalogName, implementation, config);
}

@After
public void removeTables() {
sql("DROP TABLE IF EXISTS %s", tableName);
}

@Test
public void testAncestorOfUsingEmptyArgs() {
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName);
sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName);

Table table = validationCatalog.loadTable(tableIdent);
Long currentSnapshotId = table.currentSnapshot().snapshotId();
Long currentTimestamp = table.currentSnapshot().timestampMillis();
Long preSnapshotId = table.currentSnapshot().parentId();
Long preTimeStamp = table.snapshot(table.currentSnapshot().parentId()).timestampMillis();

List<Object[]> output = sql("CALL %s.system.ancestors_of('%s')",
catalogName, tableIdent);

assertEquals(
"Procedure output must match",
ImmutableList.of(
row(currentSnapshotId, currentTimestamp),
row(preSnapshotId, preTimeStamp)),
output);
}

@Test
public void testAncestorOfUsingSnapshotId() {
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName);
sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName);

Table table = validationCatalog.loadTable(tableIdent);
Long currentSnapshotId = table.currentSnapshot().snapshotId();
Long currentTimestamp = table.currentSnapshot().timestampMillis();
Long preSnapshotId = table.currentSnapshot().parentId();
Long preTimeStamp = table.snapshot(table.currentSnapshot().parentId()).timestampMillis();

assertEquals(
"Procedure output must match",
ImmutableList.of(
row(currentSnapshotId, currentTimestamp),
row(preSnapshotId, preTimeStamp)),
sql("CALL %s.system.ancestors_of('%s', %dL)", catalogName, tableIdent, currentSnapshotId));

assertEquals(
"Procedure output must match",
ImmutableList.of(row(preSnapshotId, preTimeStamp)),
sql("CALL %s.system.ancestors_of('%s', %dL)", catalogName, tableIdent, preSnapshotId));
}

@Test
public void testAncestorOfWithRollBack() {
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName);
Table table = validationCatalog.loadTable(tableIdent);
sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);
table.refresh();
Long firstSnapshotId = table.currentSnapshot().snapshotId();
Long firstTimestamp = table.currentSnapshot().timestampMillis();
sql("INSERT INTO TABLE %s VALUES (2, 'b')", tableName);
table.refresh();
Long secondSnapshotId = table.currentSnapshot().snapshotId();
Long secondTimestamp = table.currentSnapshot().timestampMillis();
sql("INSERT INTO TABLE %s VALUES (3, 'c')", tableName);
table.refresh();
Long thirdSnapshotId = table.currentSnapshot().snapshotId();
Long thirdTimestamp = table.currentSnapshot().timestampMillis();

// roll back
sql("CALL %s.system.rollback_to_snapshot('%s', %dL)",
catalogName, tableIdent, secondSnapshotId);

sql("INSERT INTO TABLE %s VALUES (4, 'd')", tableName);
table.refresh();
Long fourthSnapshotId = table.currentSnapshot().snapshotId();
Long fourthTimestamp = table.currentSnapshot().timestampMillis();

assertEquals(
"Procedure output must match",
ImmutableList.of(
row(fourthSnapshotId, fourthTimestamp),
row(secondSnapshotId, secondTimestamp),
row(firstSnapshotId, firstTimestamp)),
sql("CALL %s.system.ancestors_of('%s', %dL)", catalogName, tableIdent, fourthSnapshotId));

assertEquals(
"Procedure output must match",
ImmutableList.of(
row(thirdSnapshotId, thirdTimestamp),
row(secondSnapshotId, secondTimestamp),
row(firstSnapshotId, firstTimestamp)),
sql("CALL %s.system.ancestors_of('%s', %dL)", catalogName, tableIdent, thirdSnapshotId));
}

@Test
public void testAncestorOfUsingNamedArgs() {
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING iceberg", tableName);
sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName);

Table table = validationCatalog.loadTable(tableIdent);
Long firstSnapshotId = table.currentSnapshot().snapshotId();
Long firstTimestamp = table.currentSnapshot().timestampMillis();

assertEquals(
"Procedure output must match",
ImmutableList.of(row(firstSnapshotId, firstTimestamp)),
sql("CALL %s.system.ancestors_of(snapshot_id => %dL, table => '%s')",
catalogName, firstSnapshotId, tableIdent));
}

@Test
public void testInvalidAncestorOfCases() {
AssertHelpers.assertThrows("Should reject calls without all required args",
AnalysisException.class, "Missing required parameters",
() -> sql("CALL %s.system.ancestors_of()", catalogName));

AssertHelpers.assertThrows("Should reject calls with empty table identifier",
IllegalArgumentException.class, "Cannot handle an empty identifier for argument table",
() -> sql("CALL %s.system.ancestors_of('')", catalogName));

AssertHelpers.assertThrows("Should reject calls with invalid arg types",
AnalysisException.class, "Wrong arg type for snapshot_id: cannot cast",
() -> sql("CALL %s.system.ancestors_of('%s', 1.1)", catalogName, tableIdent));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.spark.procedures;

import java.util.List;
import org.apache.iceberg.Table;
import org.apache.iceberg.spark.source.SparkTable;
import org.apache.iceberg.util.SnapshotUtil;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.connector.catalog.Identifier;
import org.apache.spark.sql.connector.catalog.TableCatalog;
import org.apache.spark.sql.connector.iceberg.catalog.ProcedureParameter;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class AncestorsOfProcedure extends BaseProcedure {

private static final ProcedureParameter[] PARAMETERS = new ProcedureParameter[] {
ProcedureParameter.required("table", DataTypes.StringType),
ProcedureParameter.optional("snapshot_id", DataTypes.LongType),
};

private static final StructType OUTPUT_TYPE = new StructType(new StructField[] {
new StructField("snapshot_id", DataTypes.LongType, true, Metadata.empty()),
new StructField("timestamp", DataTypes.LongType, true, Metadata.empty())
});

private AncestorsOfProcedure(TableCatalog tableCatalog) {
super(tableCatalog);
}

public static SparkProcedures.ProcedureBuilder builder() {
return new Builder<AncestorsOfProcedure>() {
@Override
protected AncestorsOfProcedure doBuild() {
return new AncestorsOfProcedure(tableCatalog());
}
};
}

@Override
public ProcedureParameter[] parameters() {
return PARAMETERS;
}

@Override
public StructType outputType() {
return OUTPUT_TYPE;
}

@Override
public InternalRow[] call(InternalRow args) {
Identifier tableIdent = toIdentifier(args.getString(0), PARAMETERS[0].name());
Long toSnapshotId = args.isNullAt(1) ? null : args.getLong(1);

SparkTable sparkTable = loadSparkTable(tableIdent);
Table icebergTable = sparkTable.table();

if (toSnapshotId == null) {
toSnapshotId = icebergTable.currentSnapshot() != null ? icebergTable.currentSnapshot().snapshotId() : -1;
}

List<Long> snapshotIds = SnapshotUtil.snapshotIdsBetween(icebergTable, 0L, toSnapshotId);

return toOutputRow(icebergTable, snapshotIds);
}

@Override
public String description() {
return "AncestorsOf";
}

private InternalRow[] toOutputRow(Table table, List<Long> snapshotIds) {
if (snapshotIds.isEmpty()) {
return new InternalRow[0];
}

InternalRow[] internalRows = new InternalRow[snapshotIds.size()];
for (int i = 0; i < snapshotIds.size(); i++) {
Long snapshotId = snapshotIds.get(i);
internalRows[i] = newInternalRow(snapshotId, table.snapshot(snapshotId).timestampMillis());
}

return internalRows;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ private static Map<String, Supplier<ProcedureBuilder>> initProcedureBuilders() {
mapBuilder.put("migrate", MigrateTableProcedure::builder);
mapBuilder.put("snapshot", SnapshotTableProcedure::builder);
mapBuilder.put("add_files", AddFilesProcedure::builder);
mapBuilder.put("ancestors_of", AncestorsOfProcedure::builder);
return mapBuilder.build();
}

Expand Down