diff --git a/api/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteFiles.java b/api/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteFiles.java new file mode 100644 index 000000000000..9247f209364e --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteFiles.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.actions; + +import org.apache.iceberg.expressions.Expression; + +/** + * An action for converting the equality delete files to position delete files. + */ +public interface ConvertEqualityDeleteFiles + extends SnapshotUpdate { + + /** + * A filter for finding the equality deletes to convert. + *

+ * The filter will be converted to a partition filter with an inclusive projection. Any file that may contain rows + * matching this filter will be used by the action. The matching delete files will be converted to position delete + * files. + * + * @param expression An iceberg expression used to find deletes. + * @return this for method chaining + */ + ConvertEqualityDeleteFiles filter(Expression expression); + + /** + * The action result that contains a summary of the execution. + */ + interface Result { + /** + * Returns the count of the deletes that been converted. + */ + int convertedEqualityDeleteFilesCount(); + + /** + * Returns the count of the added position delete files. + */ + int addedPositionDeleteFilesCount(); + } +} diff --git a/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java new file mode 100644 index 000000000000..1cd79915c0f2 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.actions; + +import org.apache.iceberg.expressions.Expression; + +/** + * An action for rewriting position delete files. + *

+ * Generally used for optimizing the size and layout of position delete files within a table. + */ +public interface RewritePositionDeleteFiles + extends SnapshotUpdate { + + /** + * A filter for finding deletes to rewrite. + *

+ * The filter will be converted to a partition filter with an inclusive projection. Any file that may contain rows + * matching this filter will be used by the action. The matching delete files will be rewritten. + * + * @param expression An iceberg expression used to find deletes. + * @return this for method chaining + */ + RewritePositionDeleteFiles filter(Expression expression); + + /** + * The action result that contains a summary of the execution. + */ + interface Result { + /** + * Returns the count of the position deletes that been rewritten. + */ + int rewrittenDeleteFilesCount(); + + /** + * Returns the count of the added delete files. + */ + int addedDeleteFilesCount(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteStrategy.java b/core/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteStrategy.java new file mode 100644 index 000000000000..6bbba82e49b5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/actions/ConvertEqualityDeleteStrategy.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.actions; + +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.Table; + +/** + * A strategy for the action to convert equality delete to position deletes. + */ +public interface ConvertEqualityDeleteStrategy { + + /** + * Returns the name of this convert deletes strategy + */ + String name(); + + /** + * Returns the table being modified by this convert strategy + */ + Table table(); + + /** + * Returns a set of options which this convert strategy can use. This is an allowed-list and any options not + * specified here will be rejected at runtime. + */ + Set validOptions(); + + /** + * Sets options to be used with this strategy + */ + RewritePositionDeleteStrategy options(Map options); + + /** + * Select the delete files to convert. + * + * @param deleteFiles iterable of delete files in a group. + * @return iterable of original delete file to be converted. + */ + Iterable selectDeleteFiles(Iterable deleteFiles); + + /** + * Groups delete files into lists which will be processed in a single executable unit. Each group will end up being + * committed as an independent set of changes. This creates the jobs which will eventually be run as by the underlying + * Action. + * + * @param dataFiles iterable of data files that contain the DeleteFile to be converted + * @return iterable of lists of FileScanTasks which will be processed together + */ + Iterable> planDeleteFileGroups(Iterable dataFiles); + + /** + * Define how to convert the deletes. + * + * @param deleteFilesToConvert a group of files to be converted together + * @return iterable of delete files used to replace the original delete files. + */ + Iterable convertDeleteFiles(Iterable deleteFilesToConvert); +} diff --git a/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteStrategy.java b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteStrategy.java new file mode 100644 index 000000000000..0b2c48a8a3a1 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteStrategy.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.actions; + +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.Table; + +/** + * A strategy for an action to rewrite position delete files. + */ +public interface RewritePositionDeleteStrategy { + + /** + * Returns the name of this rewrite deletes strategy + */ + String name(); + + /** + * Returns the table being modified by this rewrite strategy + */ + Table table(); + + /** + * Returns a set of options which this rewrite strategy can use. This is an allowed-list and any options not + * specified here will be rejected at runtime. + */ + Set validOptions(); + + /** + * Sets options to be used with this strategy + */ + RewritePositionDeleteStrategy options(Map options); + + /** + * Select the delete files to rewrite. + * + * @param deleteFiles iterable of delete files in a group. + * @return iterable of original delete file to be replaced. + */ + Iterable selectDeleteFiles(Iterable deleteFiles); + + /** + * Groups into lists which will be processed in a single executable unit. Each group will end up being + * committed as an independent set of changes. This creates the jobs which will eventually be run as by the underlying + * Action. + * + * @param deleteFiles iterable of DeleteFile to be rewritten + * @return iterable of lists of FileScanTasks which will be processed together + */ + Iterable> planDeleteFileGroups(Iterable deleteFiles); + + /** + * Define how to rewrite the deletes. + * + * @param deleteFilesToRewrite a group of files to be rewritten together + * @return iterable of delete files used to replace the original delete files. + */ + Iterable rewriteDeleteFiles(Iterable deleteFilesToRewrite); +}