Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SortDirection;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.SortOrderParser;
import org.apache.iceberg.Table;
Expand Down Expand Up @@ -251,21 +250,16 @@ private void setSortOrder(org.apache.hadoop.hive.metastore.api.Table hmsTable, S
return;
}

try {
SortFields sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, SortFields.class);
if (sortFields != null && !sortFields.getSortFields().isEmpty()) {
SortOrder.Builder sortOrderBuilder = SortOrder.builderFor(schema);
sortFields.getSortFields().forEach(fieldDesc -> {
NullOrder nullOrder = fieldDesc.getNullOrdering() == NullOrdering.NULLS_FIRST ?
NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
SortDirection sortDirection = fieldDesc.getDirection() == SortFieldDesc.SortDirection.ASC ?
SortDirection.ASC : SortDirection.DESC;
sortOrderBuilder.sortBy(fieldDesc.getColumnName(), sortDirection, nullOrder);
});
properties.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(sortOrderBuilder.build()));
}
} catch (Exception e) {
LOG.warn("Can not read write order json: {}", sortOrderJSONString);
List<SortFieldDesc> sortFieldDescList = parseSortFieldsJSON(sortOrderJSONString);
if (sortFieldDescList != null) {
SortOrder.Builder sortOrderBuilder = SortOrder.builderFor(schema);
sortFieldDescList.forEach(fieldDesc ->
sortOrderBuilder.sortBy(
fieldDesc.getColumnName(),
convertSortDirection(fieldDesc.getDirection()),
convertNullOrder(fieldDesc.getNullOrdering()))
);
properties.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(sortOrderBuilder.build()));
}
}

Expand Down Expand Up @@ -304,6 +298,44 @@ private boolean isZOrderJSON(String jsonString) {
}
}

/**
* Parses Hive SortFields JSON and returns the list of sort field descriptors.
* This is a common utility method used by both CREATE TABLE and ALTER TABLE flows.
*
* @param sortOrderJSONString The JSON string containing Hive SortFields
* @return List of SortFieldDesc, or null if parsing fails or JSON is empty
*/
protected List<SortFieldDesc> parseSortFieldsJSON(String sortOrderJSONString) {
if (Strings.isNullOrEmpty(sortOrderJSONString)) {
return Collections.emptyList();
}

try {
SortFields sortFields = JSON_OBJECT_MAPPER.reader().readValue(sortOrderJSONString, SortFields.class);
if (sortFields != null) {
return sortFields.getSortFields();
}
} catch (Exception e) {
LOG.warn("Failed to parse sort order JSON: {}", sortOrderJSONString, e);
}
return Collections.emptyList();
}

/**
* Converts Hive NullOrdering to Iceberg NullOrder.
*/
protected static NullOrder convertNullOrder(NullOrdering nullOrdering) {
return nullOrdering == NullOrdering.NULLS_FIRST ? NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST;
}

/**
* Converts Hive SortDirection to Iceberg SortDirection.
*/
private static org.apache.iceberg.SortDirection convertSortDirection(SortFieldDesc.SortDirection direction) {
return direction == SortFieldDesc.SortDirection.ASC ?
org.apache.iceberg.SortDirection.ASC : org.apache.iceberg.SortDirection.DESC;
}

@Override
public void rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
// do nothing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand All @@ -54,6 +56,7 @@
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFieldDesc;
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
Expand Down Expand Up @@ -85,13 +88,17 @@
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.NullOrder;
import org.apache.iceberg.PartitionData;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.PartitionsTable;
import org.apache.iceberg.ReplaceSortOrder;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.SortOrderParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableMetadataParser;
Expand Down Expand Up @@ -624,15 +631,65 @@ private void alterTableProperties(org.apache.hadoop.hive.metastore.api.Table hms
Map<String, String> hmsTableParameters = hmsTable.getParameters();
Splitter splitter = Splitter.on(PROPERTIES_SEPARATOR);
UpdateProperties icebergUpdateProperties = icebergTable.updateProperties();

if (contextProperties.containsKey(SET_PROPERTIES)) {
splitter.splitToList(contextProperties.get(SET_PROPERTIES))
.forEach(k -> icebergUpdateProperties.set(k, hmsTableParameters.get(k)));
List<String> propertiesToSet = splitter.splitToList(contextProperties.get(SET_PROPERTIES));

// Define handlers for properties that need special processing
Map<String, Consumer<String>> propertyHandlers = Maps.newHashMap();
propertyHandlers.put(TableProperties.DEFAULT_SORT_ORDER,
key -> handleDefaultSortOrder(hmsTable, hmsTableParameters));

// Process each property using handlers or default behavior
propertiesToSet.forEach(key ->
propertyHandlers.getOrDefault(key,
k -> icebergUpdateProperties.set(k, hmsTableParameters.get(k))
).accept(key)
);
} else if (contextProperties.containsKey(UNSET_PROPERTIES)) {
splitter.splitToList(contextProperties.get(UNSET_PROPERTIES)).forEach(icebergUpdateProperties::remove);
}

icebergUpdateProperties.commit();
}

/**
* Handles conversion of Hive SortFields JSON to Iceberg SortOrder.
* Uses Iceberg's replaceSortOrder() API to properly handle the reserved property.
*/
private void handleDefaultSortOrder(org.apache.hadoop.hive.metastore.api.Table hmsTable,
Map<String, String> hmsTableParameters) {
String sortOrderJSONString = hmsTableParameters.get(TableProperties.DEFAULT_SORT_ORDER);

List<SortFieldDesc> sortFieldDescList = parseSortFieldsJSON(sortOrderJSONString);
if (sortFieldDescList != null) {
try {
ReplaceSortOrder replaceSortOrder = icebergTable.replaceSortOrder();

// Chain all the sort field additions
for (SortFieldDesc fieldDesc : sortFieldDescList) {
NullOrder nullOrder = convertNullOrder(fieldDesc.getNullOrdering());

BiConsumer<String, NullOrder> sortMethod =
fieldDesc.getDirection() == SortFieldDesc.SortDirection.ASC ?
replaceSortOrder::asc : replaceSortOrder::desc;

sortMethod.accept(fieldDesc.getColumnName(), nullOrder);
}

replaceSortOrder.commit();

// Update HMS table parameters with the Iceberg SortOrder JSON
SortOrder newSortOrder = icebergTable.sortOrder();
hmsTableParameters.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(newSortOrder));

LOG.debug("Successfully set sort order for table {}: {}", hmsTable.getTableName(), newSortOrder);
} catch (Exception e) {
LOG.warn("Failed to apply sort order for table {}: {}", hmsTable.getTableName(), sortOrderJSONString, e);
}
}
}

private void setupAlterOperationType(org.apache.hadoop.hive.metastore.api.Table hmsTable,
EnvironmentContext context) throws MetaException {
TableName tableName = new TableName(hmsTable.getCatName(), hmsTable.getDbName(), hmsTable.getTableName());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- Mask neededVirtualColumns due to non-strict order
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
-- Mask the totalSize value as it can have slight variability, causing test flakiness
--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
-- Mask random uuid
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
-- Mask a random snapshot id
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/
-- Mask added file size
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask total file size
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask current-snapshot-timestamp-ms
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask iceberg version
--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/
set hive.vectorized.execution.enabled=true;

-- Test ALTER TABLE SET WRITE [LOCALLY] ORDERED BY

create table ice_orc_order (id int, name string, age int, city string) stored by iceberg stored as orc;
describe formatted ice_orc_order;

alter table ice_orc_order set write ordered by id desc nulls first, name asc nulls last;
describe formatted ice_orc_order;

explain insert into ice_orc_order values (4, 'David', 28, 'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
insert into ice_orc_order values (4, 'David', 28, 'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
select * from ice_orc_order;

drop table ice_orc_order;
Loading