diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java index 596ee4f2967a2..93b13fd871c6a 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java @@ -18,6 +18,7 @@ import com.facebook.presto.raptor.metadata.ColumnInfo; import com.facebook.presto.raptor.metadata.Distribution; import com.facebook.presto.raptor.metadata.MetadataDao; +import com.facebook.presto.raptor.metadata.ShardDeleteDelta; import com.facebook.presto.raptor.metadata.ShardDelta; import com.facebook.presto.raptor.metadata.ShardInfo; import com.facebook.presto.raptor.metadata.ShardManager; @@ -60,6 +61,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Multimaps; import io.airlift.slice.Slice; +import javafx.util.Pair; import org.skife.jdbi.v2.IDBI; import javax.annotation.Nullable; @@ -96,6 +98,7 @@ import static com.facebook.presto.raptor.RaptorTableProperties.DISTRIBUTION_NAME_PROPERTY; import static com.facebook.presto.raptor.RaptorTableProperties.ORDERING_PROPERTY; import static com.facebook.presto.raptor.RaptorTableProperties.ORGANIZED_PROPERTY; +import static com.facebook.presto.raptor.RaptorTableProperties.TABLE_SUPPORTS_DELTA_DELETE; import static com.facebook.presto.raptor.RaptorTableProperties.TEMPORAL_COLUMN_PROPERTY; import static com.facebook.presto.raptor.RaptorTableProperties.getBucketColumns; import static com.facebook.presto.raptor.RaptorTableProperties.getBucketCount; @@ -103,6 +106,7 @@ import static com.facebook.presto.raptor.RaptorTableProperties.getSortColumns; import static com.facebook.presto.raptor.RaptorTableProperties.getTemporalColumn; import static com.facebook.presto.raptor.RaptorTableProperties.isOrganized; +import static com.facebook.presto.raptor.RaptorTableProperties.isTableSupportsDeltaDelete; import static com.facebook.presto.raptor.systemtables.ColumnRangesSystemTable.getSourceTable; import static com.facebook.presto.raptor.util.DatabaseUtil.daoTransaction; import static com.facebook.presto.raptor.util.DatabaseUtil.onDemandDao; @@ -118,10 +122,12 @@ import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; import static java.lang.String.format; import static java.util.Collections.nCopies; import static java.util.Objects.requireNonNull; +import static java.util.function.Function.identity; import static java.util.stream.Collectors.toCollection; import static java.util.stream.Collectors.toList; @@ -132,6 +138,7 @@ public class RaptorMetadata private static final JsonCodec SHARD_INFO_CODEC = jsonCodec(ShardInfo.class); private static final JsonCodec SHARD_DELTA_CODEC = jsonCodec(ShardDelta.class); + private static final JsonCodec SHARD_DELTA_DELETE_CODEC = jsonCodec(ShardDeleteDelta.class); private final IDBI dbi; private final MetadataDao dao; @@ -195,7 +202,8 @@ private RaptorTableHandle getTableHandle(SchemaTableName tableName) table.isOrganized(), OptionalLong.empty(), Optional.empty(), - false); + false, + table.isTableSupportsDeltaDelete()); } @Override @@ -237,6 +245,7 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect handle.getBucketCount().ifPresent(bucketCount -> properties.put(BUCKET_COUNT_PROPERTY, bucketCount)); handle.getDistributionName().ifPresent(distributionName -> properties.put(DISTRIBUTION_NAME_PROPERTY, distributionName)); + properties.put(TABLE_SUPPORTS_DELTA_DELETE, handle.isTableSupportsDeltaDelete()); // Only display organization property if set if (handle.isOrganized()) { properties.put(ORGANIZED_PROPERTY, true); @@ -568,6 +577,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } boolean organized = isOrganized(tableMetadata.getProperties()); + boolean tableSupportsDeltaDelete = isTableSupportsDeltaDelete(tableMetadata.getProperties()); if (organized) { if (temporalColumnHandle.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Table with temporal columns cannot be organized"); @@ -597,6 +607,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con distribution.map(info -> OptionalLong.of(info.getDistributionId())).orElse(OptionalLong.empty()), distribution.map(info -> OptionalInt.of(info.getBucketCount())).orElse(OptionalInt.empty()), organized, + tableSupportsDeltaDelete, distribution.map(DistributionInfo::getBucketColumns).orElse(ImmutableList.of())); } @@ -659,7 +670,7 @@ public Optional finishCreateTable(ConnectorSession sess Long distributionId = table.getDistributionId().isPresent() ? table.getDistributionId().getAsLong() : null; // TODO: update default value of organization_enabled to true - long tableId = dao.insertTable(table.getSchemaName(), table.getTableName(), true, table.isOrganized(), distributionId, updateTime); + long tableId = dao.insertTable(table.getSchemaName(), table.getTableName(), true, table.isOrganized(), distributionId, updateTime, table.isTableSupportsDeltaDelete()); List sortColumnHandles = table.getSortColumnHandles(); List bucketColumnHandles = table.getBucketColumnHandles(); @@ -689,7 +700,7 @@ public Optional finishCreateTable(ConnectorSession sess .orElse(OptionalLong.empty()); // TODO: refactor this to avoid creating an empty table on failure - shardManager.createTable(newTableId, columns, table.getBucketCount().isPresent(), temporalColumnId); + shardManager.createTable(newTableId, table.isTableSupportsDeltaDelete(), columns, table.getBucketCount().isPresent(), temporalColumnId); shardManager.commitShards(transactionId, newTableId, columns, parseFragments(fragments), Optional.empty(), updateTime); clearRollback(); @@ -799,7 +810,8 @@ public ConnectorTableHandle beginDelete(ConnectorSession session, ConnectorTable handle.isOrganized(), OptionalLong.of(transactionId), Optional.of(columnTypes), - true); + true, + handle.isTableSupportsDeltaDelete()); } @Override @@ -813,22 +825,36 @@ public void finishDelete(ConnectorSession session, ConnectorTableHandle tableHan .map(RaptorColumnHandle.class::cast) .map(ColumnInfo::fromHandle).collect(toList()); - ImmutableSet.Builder oldShardUuidsBuilder = ImmutableSet.builder(); - ImmutableList.Builder newShardsBuilder = ImmutableList.builder(); + if (table.isTableSupportsDeltaDelete()) { + ImmutableMap.Builder, Optional>> shardMapBuilder = new ImmutableMap.Builder<>(); - fragments.stream() - .map(fragment -> SHARD_DELTA_CODEC.fromJson(fragment.getBytes())) - .forEach(delta -> { - oldShardUuidsBuilder.addAll(delta.getOldShardUuids()); - newShardsBuilder.addAll(delta.getNewShards()); - }); + fragments.stream() + .map(fragment -> SHARD_DELTA_DELETE_CODEC.fromJson(fragment.getBytes())) + .forEach(delta -> shardMapBuilder.put(delta.getOldShardUuid(), new Pair, Optional>(delta.getOldDeltaDeleteShard(), delta.getNewDeltaDeleteShard()))); + OptionalLong updateTime = OptionalLong.of(session.getStartTime()); - Set oldShardUuids = oldShardUuidsBuilder.build(); - List newShards = newShardsBuilder.build(); - OptionalLong updateTime = OptionalLong.of(session.getStartTime()); + log.info("Finishing delete for tableId %s (affected shardUuid: %s)", tableId, shardMapBuilder.build().size()); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMapBuilder.build(), updateTime); + } + else { + ImmutableSet.Builder oldShardUuidsBuilder = ImmutableSet.builder(); + ImmutableList.Builder newShardsBuilder = ImmutableList.builder(); + + fragments.stream() + .map(fragment -> SHARD_DELTA_CODEC.fromJson(fragment.getBytes())) + .forEach(delta -> { + oldShardUuidsBuilder.addAll(delta.getOldShardUuids()); + newShardsBuilder.addAll(delta.getNewShards()); + }); - log.info("Finishing delete for tableId %s (removed: %s, rewritten: %s)", tableId, oldShardUuids.size() - newShards.size(), newShards.size()); - shardManager.replaceShardUuids(transactionId, tableId, columns, oldShardUuids, newShards, updateTime); + Set oldShardUuids = oldShardUuidsBuilder.build(); + List newShards = newShardsBuilder.build(); + OptionalLong updateTime = OptionalLong.of(session.getStartTime()); + + log.info("Finishing delete for tableId %s (removed: %s, rewritten: %s)", tableId, oldShardUuids.size() - newShards.size(), newShards.size()); + Map> oldShardUuidsMap = oldShardUuids.stream().collect(toImmutableMap(identity(), uuid -> Optional.empty())); + shardManager.replaceShardUuids(transactionId, false, tableId, columns, oldShardUuidsMap, newShards, updateTime); + } clearRollback(); } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorModule.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorModule.java index 1c83a68764a04..cb1107c073c22 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorModule.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorModule.java @@ -16,6 +16,7 @@ import com.facebook.presto.raptor.metadata.Distribution; import com.facebook.presto.raptor.metadata.ForMetadata; import com.facebook.presto.raptor.metadata.TableColumn; +import com.facebook.presto.raptor.storage.StorageManagerConfig; import com.facebook.presto.raptor.systemtables.ShardMetadataSystemTable; import com.facebook.presto.raptor.systemtables.TableMetadataSystemTable; import com.facebook.presto.raptor.systemtables.TableStatsSystemTable; @@ -33,6 +34,7 @@ import javax.inject.Singleton; +import static com.facebook.airlift.configuration.ConfigBinder.configBinder; import static com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry; import static com.google.inject.multibindings.Multibinder.newSetBinder; import static java.util.Objects.requireNonNull; @@ -50,6 +52,8 @@ public RaptorModule(String connectorId) @Override public void configure(Binder binder) { + configBinder(binder).bindConfig(StorageManagerConfig.class); + binder.bind(RaptorConnectorId.class).toInstance(new RaptorConnectorId(connectorId)); binder.bind(RaptorConnector.class).in(Scopes.SINGLETON); binder.bind(RaptorMetadataFactory.class).in(Scopes.SINGLETON); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorOutputTableHandle.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorOutputTableHandle.java index 958454acc23f0..29997a43fb856 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorOutputTableHandle.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorOutputTableHandle.java @@ -45,6 +45,7 @@ public class RaptorOutputTableHandle private final OptionalInt bucketCount; private final List bucketColumnHandles; private final boolean organized; + private final boolean tableSupportsDeltaDelete; @JsonCreator public RaptorOutputTableHandle( @@ -60,6 +61,7 @@ public RaptorOutputTableHandle( @JsonProperty("distributionId") OptionalLong distributionId, @JsonProperty("bucketCount") OptionalInt bucketCount, @JsonProperty("organized") boolean organized, + @JsonProperty("tableSupportsDeltaDelete") boolean tableSupportsDeltaDelete, @JsonProperty("bucketColumnHandles") List bucketColumnHandles) { this.connectorId = requireNonNull(connectorId, "connectorId is null"); @@ -75,6 +77,7 @@ public RaptorOutputTableHandle( this.bucketCount = requireNonNull(bucketCount, "bucketCount is null"); this.bucketColumnHandles = ImmutableList.copyOf(requireNonNull(bucketColumnHandles, "bucketColumnHandles is null")); this.organized = organized; + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; } @JsonProperty @@ -155,6 +158,12 @@ public boolean isOrganized() return organized; } + @JsonProperty + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + @Override public String toString() { diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorPageSourceProvider.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorPageSourceProvider.java index bb0d1b66bc473..770ce9e472f55 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorPageSourceProvider.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorPageSourceProvider.java @@ -60,16 +60,22 @@ public ConnectorPageSource createPageSource(ConnectorTransactionHandle transacti ReaderAttributes attributes = ReaderAttributes.from(session); OptionalLong transactionId = raptorSplit.getTransactionId(); Optional> columnTypes = raptorSplit.getColumnTypes(); + boolean tableSupportsDeltaDelete = raptorSplit.isTableSupportsDeltaDelete(); FileSystemContext context = new FileSystemContext(session); + Map shardDeltaMap = raptorSplit.getShardDeltaMap(); if (raptorSplit.getShardUuids().size() == 1) { UUID shardUuid = raptorSplit.getShardUuids().iterator().next(); - return createPageSource(context, shardUuid, bucketNumber, columns, predicate, attributes, transactionId, columnTypes); + return createPageSource(context, shardUuid, + Optional.ofNullable(shardDeltaMap.get(shardUuid)), + tableSupportsDeltaDelete, bucketNumber, columns, predicate, attributes, transactionId, columnTypes); } Iterator iterator = raptorSplit.getShardUuids().stream() - .map(shardUuid -> createPageSource(context, shardUuid, bucketNumber, columns, predicate, attributes, transactionId, columnTypes)) + .map(shardUuid -> createPageSource(context, shardUuid, + Optional.ofNullable(shardDeltaMap.get(shardUuid)), + tableSupportsDeltaDelete, bucketNumber, columns, predicate, attributes, transactionId, columnTypes)) .iterator(); return new ConcatPageSource(iterator); @@ -78,6 +84,8 @@ public ConnectorPageSource createPageSource(ConnectorTransactionHandle transacti private ConnectorPageSource createPageSource( FileSystemContext context, UUID shardUuid, + Optional deltaShardUuid, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List columns, TupleDomain predicate, @@ -89,6 +97,6 @@ private ConnectorPageSource createPageSource( List columnIds = columnHandles.stream().map(RaptorColumnHandle::getColumnId).collect(toList()); List columnTypes = columnHandles.stream().map(RaptorColumnHandle::getColumnType).collect(toList()); - return storageManager.getPageSource(context, shardUuid, bucketNumber, columnIds, columnTypes, predicate, attributes, transactionId, allColumnTypes); + return storageManager.getPageSource(context, shardUuid, deltaShardUuid, tableSupportsDeltaDelete, bucketNumber, columnIds, columnTypes, predicate, attributes, transactionId, allColumnTypes); } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplit.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplit.java index 1a9433539f300..54016326a8273 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplit.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplit.java @@ -38,6 +38,8 @@ public class RaptorSplit { private final String connectorId; private final Set shardUuids; + private final Map shardDeltaMap; + private final boolean tableSupportsDeltaDelete; private final OptionalInt bucketNumber; private final List addresses; private final TupleDomain effectivePredicate; @@ -48,40 +50,48 @@ public class RaptorSplit public RaptorSplit( @JsonProperty("connectorId") String connectorId, @JsonProperty("shardUuids") Set shardUuids, + @JsonProperty("shardDeltaMap") Map shardDeltaMap, + @JsonProperty("tableSupportsDeltaDelete") boolean tableSupportsDeltaDelete, @JsonProperty("bucketNumber") OptionalInt bucketNumber, @JsonProperty("effectivePredicate") TupleDomain effectivePredicate, @JsonProperty("transactionId") OptionalLong transactionId, @JsonProperty("columnTypes") Optional> columnTypes) { - this(connectorId, shardUuids, bucketNumber, ImmutableList.of(), effectivePredicate, transactionId, columnTypes); + this(connectorId, shardUuids, shardDeltaMap, tableSupportsDeltaDelete, bucketNumber, ImmutableList.of(), effectivePredicate, transactionId, columnTypes); } public RaptorSplit( String connectorId, UUID shardUuid, + Map shardDeltaMap, + boolean tableSupportsDeltaDelete, List addresses, TupleDomain effectivePredicate, OptionalLong transactionId, Optional> columnTypes) { - this(connectorId, ImmutableSet.of(shardUuid), OptionalInt.empty(), addresses, effectivePredicate, transactionId, columnTypes); + this(connectorId, ImmutableSet.of(shardUuid), shardDeltaMap, tableSupportsDeltaDelete, OptionalInt.empty(), addresses, effectivePredicate, transactionId, columnTypes); } public RaptorSplit( String connectorId, Set shardUuids, + Map shardDeltaMap, + boolean tableSupportsDeltaDelete, int bucketNumber, HostAddress address, TupleDomain effectivePredicate, OptionalLong transactionId, Optional> columnTypes) { - this(connectorId, shardUuids, OptionalInt.of(bucketNumber), ImmutableList.of(address), effectivePredicate, transactionId, columnTypes); + this(connectorId, shardUuids, shardDeltaMap, tableSupportsDeltaDelete, OptionalInt.of(bucketNumber), ImmutableList.of(address), effectivePredicate, transactionId, columnTypes); } private RaptorSplit( String connectorId, Set shardUuids, + Map shardDeltaMap, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List addresses, TupleDomain effectivePredicate, @@ -90,6 +100,8 @@ private RaptorSplit( { this.connectorId = requireNonNull(connectorId, "connectorId is null"); this.shardUuids = ImmutableSet.copyOf(requireNonNull(shardUuids, "shardUuid is null")); + this.shardDeltaMap = requireNonNull(shardDeltaMap, "shardUuid is null"); + this.tableSupportsDeltaDelete = requireNonNull(tableSupportsDeltaDelete, "tableSupportsDeltaDelete is null"); this.bucketNumber = requireNonNull(bucketNumber, "bucketNumber is null"); this.addresses = ImmutableList.copyOf(requireNonNull(addresses, "addresses is null")); this.effectivePredicate = requireNonNull(effectivePredicate, "effectivePredicate is null"); @@ -121,6 +133,12 @@ public Set getShardUuids() return shardUuids; } + @JsonProperty + public Map getShardDeltaMap() + { + return shardDeltaMap; + } + @JsonProperty public OptionalInt getBucketNumber() { @@ -145,6 +163,12 @@ public Optional> getColumnTypes() return columnTypes; } + @JsonProperty + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + @Override public Object getInfo() { @@ -156,6 +180,8 @@ public String toString() { return toStringHelper(this) .add("shardUuids", shardUuids) + .add("shardDeltaMap", shardDeltaMap.toString()) + .add("tableSupportsDeltaDelete", tableSupportsDeltaDelete) .add("bucketNumber", bucketNumber.isPresent() ? bucketNumber.getAsInt() : null) .add("hosts", addresses) .omitNullValues() diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplitManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplitManager.java index 1d14fd599f8ed..897993384908d 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplitManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorSplitManager.java @@ -32,6 +32,7 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.Type; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.skife.jdbi.v2.ResultIterator; import javax.annotation.PreDestroy; @@ -111,7 +112,7 @@ public ConnectorSplitSource getSplits( OptionalLong transactionId = table.getTransactionId(); Optional> bucketToNode = handle.getPartitioning().map(RaptorPartitioningHandle::getBucketToNode); verify(bucketed == bucketToNode.isPresent(), "mismatched bucketCount and bucketToNode presence"); - return new RaptorSplitSource(tableId, merged, effectivePredicate, transactionId, table.getColumnTypes(), bucketToNode); + return new RaptorSplitSource(tableId, merged, effectivePredicate, transactionId, table.getColumnTypes(), bucketToNode, handle.getTable().isTableSupportsDeltaDelete()); } private static List getAddressesForNodes(Map nodeMap, Iterable nodeIdentifiers) @@ -148,6 +149,7 @@ private class RaptorSplitSource private final Optional> columnTypes; private final Optional> bucketToNode; private final ResultIterator iterator; + private final boolean tableSupportsDeltaDelete; @GuardedBy("this") private CompletableFuture future; @@ -158,20 +160,22 @@ public RaptorSplitSource( TupleDomain effectivePredicate, OptionalLong transactionId, Optional> columnTypes, - Optional> bucketToNode) + Optional> bucketToNode, + boolean tableSupportsDeltaDelete) { this.tableId = tableId; this.effectivePredicate = requireNonNull(effectivePredicate, "effectivePredicate is null"); this.transactionId = requireNonNull(transactionId, "transactionId is null"); this.columnTypes = requireNonNull(columnTypes, "columnTypesis null"); this.bucketToNode = requireNonNull(bucketToNode, "bucketToNode is null"); + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; ResultIterator iterator; if (bucketToNode.isPresent()) { - iterator = shardManager.getShardNodesBucketed(tableId, merged, bucketToNode.get(), effectivePredicate); + iterator = shardManager.getShardNodesBucketed(tableId, tableSupportsDeltaDelete, merged, bucketToNode.get(), effectivePredicate); } else { - iterator = shardManager.getShardNodes(tableId, effectivePredicate); + iterator = shardManager.getShardNodes(tableId, tableSupportsDeltaDelete, effectivePredicate); } this.iterator = new SynchronizedResultIterator<>(iterator); } @@ -226,13 +230,14 @@ private ConnectorSplit createSplit(BucketShards bucketShards) verify(bucketShards.getShards().size() == 1, "wrong shard count for non-bucketed table"); ShardNodes shard = getOnlyElement(bucketShards.getShards()); - UUID shardId = shard.getShardUuid(); + UUID shardUuid = shard.getShardUuid(); + Optional deltaShardUuid = shard.getDeltaShardUuid(); Set nodeIds = shard.getNodeIdentifiers(); List addresses = getAddressesForNodes(nodesById, nodeIds); if (addresses.isEmpty()) { if (!backupAvailable) { - throw new PrestoException(RAPTOR_NO_HOST_FOR_SHARD, format("No host for shard %s found: %s", shardId, nodeIds)); + throw new PrestoException(RAPTOR_NO_HOST_FOR_SHARD, format("No host for shard %s found: %s", shardUuid, nodeIds)); } // Pick a random node and optimistically assign the shard to it. @@ -242,11 +247,20 @@ private ConnectorSplit createSplit(BucketShards bucketShards) throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query"); } Node node = selectRandom(availableNodes); - shardManager.replaceShardAssignment(tableId, shardId, node.getNodeIdentifier(), true); + shardManager.replaceShardAssignment(tableId, shardUuid, + deltaShardUuid.isPresent() ? Optional.empty() : deltaShardUuid, node.getNodeIdentifier(), true); addresses = ImmutableList.of(node.getHostAndPort()); } - return new RaptorSplit(connectorId, shardId, addresses, effectivePredicate, transactionId, columnTypes); + return new RaptorSplit( + connectorId, + shardUuid, + deltaShardUuid.isPresent() ? ImmutableMap.of(shardUuid, deltaShardUuid.get()) : ImmutableMap.of(), + tableSupportsDeltaDelete, + addresses, + effectivePredicate, + transactionId, + columnTypes); } private ConnectorSplit createBucketSplit(int bucketNumber, Set shards) @@ -263,9 +277,25 @@ private ConnectorSplit createBucketSplit(int bucketNumber, Set shard Set shardUuids = shards.stream() .map(ShardNodes::getShardUuid) .collect(toSet()); + ImmutableMap.Builder shardMapBuilder = new ImmutableMap.Builder<>(); + shards.stream().forEach( + shard -> { + if (shard.getDeltaShardUuid().isPresent()) { + shardMapBuilder.put(shard.getShardUuid(), shard.getDeltaShardUuid().get()); + } + }); HostAddress address = node.getHostAndPort(); - return new RaptorSplit(connectorId, shardUuids, bucketNumber, address, effectivePredicate, transactionId, columnTypes); + return new RaptorSplit( + connectorId, + shardUuids, + shardMapBuilder.build(), + tableSupportsDeltaDelete, + bucketNumber, + address, + effectivePredicate, + transactionId, + columnTypes); } } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableHandle.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableHandle.java index 97a4bd49e6966..5e38729567868 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableHandle.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableHandle.java @@ -43,6 +43,7 @@ public final class RaptorTableHandle private final OptionalLong transactionId; private final Optional> columnTypes; private final boolean delete; + private final boolean tableSupportsDeltaDelete; @JsonCreator public RaptorTableHandle( @@ -56,7 +57,8 @@ public RaptorTableHandle( @JsonProperty("organized") boolean organized, @JsonProperty("transactionId") OptionalLong transactionId, @JsonProperty("columnTypes") Optional> columnTypes, - @JsonProperty("delete") boolean delete) + @JsonProperty("delete") boolean delete, + @JsonProperty("tableSupportsDeltaDelete") boolean tableSupportsDeltaDelete) { this.connectorId = requireNonNull(connectorId, "connectorId is null"); this.schemaName = checkSchemaName(schemaName); @@ -73,6 +75,7 @@ public RaptorTableHandle( this.columnTypes = requireNonNull(columnTypes, "columnTypes is null"); this.delete = delete; + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; } public boolean isBucketed() @@ -146,6 +149,12 @@ public boolean isDelete() return delete; } + @JsonProperty + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + @Override public String toString() { diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableProperties.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableProperties.java index 7fe53f13df955..dbe920ffb7b9c 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableProperties.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorTableProperties.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.raptor; +import com.facebook.presto.raptor.storage.StorageManagerConfig; import com.facebook.presto.spi.session.PropertyMetadata; import com.facebook.presto.spi.type.TypeManager; import com.facebook.presto.spi.type.TypeSignatureParameter; @@ -39,11 +40,12 @@ public class RaptorTableProperties public static final String BUCKETED_ON_PROPERTY = "bucketed_on"; public static final String DISTRIBUTION_NAME_PROPERTY = "distribution_name"; public static final String ORGANIZED_PROPERTY = "organized"; + public static final String TABLE_SUPPORTS_DELTA_DELETE = "table_supports_delta_delete"; private final List> tableProperties; @Inject - public RaptorTableProperties(TypeManager typeManager) + public RaptorTableProperties(TypeManager typeManager, StorageManagerConfig storageManagerConfig) { tableProperties = ImmutableList.>builder() .add(stringListSessionProperty( @@ -70,6 +72,11 @@ public RaptorTableProperties(TypeManager typeManager) "Keep the table organized using the sort order", null, false)) + .add(booleanProperty( + TABLE_SUPPORTS_DELTA_DELETE, + "Support delta delete on the table", + storageManagerConfig.isTableSupportsDeltaDelete(), + false)) .build(); } @@ -110,6 +117,11 @@ public static boolean isOrganized(Map tableProperties) return (value == null) ? false : value; } + public static boolean isTableSupportsDeltaDelete(Map tableProperties) + { + return (Boolean) tableProperties.get(TABLE_SUPPORTS_DELTA_DELETE); + } + public static PropertyMetadata lowerCaseStringSessionProperty(String name, String description) { return new PropertyMetadata<>( diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/DatabaseShardManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/DatabaseShardManager.java index d6a7f82ece072..26df94edd5451 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/DatabaseShardManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/DatabaseShardManager.java @@ -28,11 +28,13 @@ import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.util.concurrent.UncheckedExecutionException; import io.airlift.units.Duration; +import javafx.util.Pair; import org.h2.jdbc.JdbcConnection; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.IDBI; @@ -51,6 +53,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -82,11 +85,13 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Throwables.throwIfInstanceOf; import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterables.partition; import static java.lang.Boolean.TRUE; import static java.lang.Math.multiplyExact; import static java.lang.String.format; import static java.sql.Statement.RETURN_GENERATED_KEYS; +import static java.sql.Types.BINARY; import static java.util.Arrays.asList; import static java.util.Collections.nCopies; import static java.util.Objects.requireNonNull; @@ -151,7 +156,7 @@ public DatabaseShardManager( } @Override - public void createTable(long tableId, List columns, boolean bucketed, OptionalLong temporalColumnId) + public void createTable(long tableId, boolean tableSupportsDeltaDelete, List columns, boolean bucketed, OptionalLong temporalColumnId) { StringJoiner tableColumns = new StringJoiner(",\n ", " ", ",\n").setEmptyValue(""); @@ -169,43 +174,28 @@ public void createTable(long tableId, List columns, boolean bucketed temporalColumnId.ifPresent(id -> coveringIndexColumns.add(maxColumn(id))); temporalColumnId.ifPresent(id -> coveringIndexColumns.add(minColumn(id))); - String sql; if (bucketed) { - coveringIndexColumns - .add("bucket_number") - .add("shard_id") - .add("shard_uuid"); - - sql = "" + - "CREATE TABLE " + shardIndexTable(tableId) + " (\n" + - " shard_id BIGINT NOT NULL,\n" + - " shard_uuid BINARY(16) NOT NULL,\n" + - " bucket_number INT NOT NULL\n," + - tableColumns + - " PRIMARY KEY (bucket_number, shard_uuid),\n" + - " UNIQUE (shard_id),\n" + - " UNIQUE (shard_uuid),\n" + - " UNIQUE (" + coveringIndexColumns + ")\n" + - ")"; + coveringIndexColumns.add("bucket_number"); } else { - coveringIndexColumns - .add("node_ids") - .add("shard_id") - .add("shard_uuid"); - - sql = "" + - "CREATE TABLE " + shardIndexTable(tableId) + " (\n" + - " shard_id BIGINT NOT NULL,\n" + - " shard_uuid BINARY(16) NOT NULL,\n" + - " node_ids VARBINARY(128) NOT NULL,\n" + - tableColumns + - " PRIMARY KEY (node_ids, shard_uuid),\n" + - " UNIQUE (shard_id),\n" + - " UNIQUE (shard_uuid),\n" + - " UNIQUE (" + coveringIndexColumns + ")\n" + - ")"; + coveringIndexColumns.add("node_ids"); } + coveringIndexColumns + .add("shard_id") + .add("shard_uuid"); + String sql = "" + + "CREATE TABLE " + shardIndexTable(tableId) + " (\n" + + " shard_id BIGINT NOT NULL,\n" + + " shard_uuid BINARY(16) NOT NULL,\n" + + // For table with table_supports_delta_delete, add extra column + (tableSupportsDeltaDelete ? " delta_shard_uuid BINARY(16) DEFAULT NULL,\n" : "") + + (bucketed ? " bucket_number INT NOT NULL\n," : " node_ids VARBINARY(128) NOT NULL,\n") + + tableColumns + + (bucketed ? " PRIMARY KEY (bucket_number, shard_uuid),\n" : " PRIMARY KEY (node_ids, shard_uuid),\n") + + " UNIQUE (shard_id),\n" + + " UNIQUE (shard_uuid),\n" + + " UNIQUE (" + coveringIndexColumns + ")\n" + + ")"; try (Handle handle = dbi.open()) { handle.execute(sql); @@ -296,53 +286,147 @@ public void commitShards(long transactionId, long tableId, List colu runCommit(transactionId, (handle) -> { externalBatchId.ifPresent(shardDaoSupplier.attach(handle)::insertExternalBatch); lockTable(handle, tableId); - insertShardsAndIndex(tableId, columns, shards, nodeIds, handle); + // 1. Insert new shards + insertShardsAndIndex(tableId, columns, shards, nodeIds, handle, false); ShardStats stats = shardStats(shards); - MetadataDao metadata = handle.attach(MetadataDao.class); - metadata.updateTableStats(tableId, shards.size(), stats.getRowCount(), stats.getCompressedSize(), stats.getUncompressedSize()); - metadata.updateTableVersion(tableId, updateTime); + + // 2. Update statistics and table version + updateStatsAndVersion(handle, tableId, shards.size(), 0, stats.getRowCount(), stats.getCompressedSize(), stats.getUncompressedSize(), OptionalLong.of(updateTime)); }); } + /** + * two types of oldShardUuidsMap entry + * a. shard1 -> delete shard + * b. shard2 delta2 -> delete shard and delta + * + * see replaceDeltaUuids + * a is essentially equal to A + * b is essentially equal to B + * + * @param tableSupportsDeltaDelete table table_supports_delta_delete properties + */ @Override - public void replaceShardUuids(long transactionId, long tableId, List columns, Set oldShardUuids, Collection newShards, OptionalLong updateTime) + public void replaceShardUuids(long transactionId, boolean tableSupportsDeltaDelete, long tableId, List columns, Map> oldShardUuidsMap, Collection newShards, OptionalLong updateTime) { Map nodeIds = toNodeIdMap(newShards); runCommit(transactionId, (handle) -> { lockTable(handle, tableId); + // For compaction if (!updateTime.isPresent() && handle.attach(MetadataDao.class).isMaintenanceBlockedLocked(tableId)) { throw new PrestoException(TRANSACTION_CONFLICT, "Maintenance is blocked for table"); } + // 1. Insert new shards + insertShardsAndIndex(tableId, columns, newShards, nodeIds, handle, false); ShardStats newStats = shardStats(newShards); long rowCount = newStats.getRowCount(); long compressedSize = newStats.getCompressedSize(); long uncompressedSize = newStats.getUncompressedSize(); - for (List shards : partition(newShards, 1000)) { - insertShardsAndIndex(tableId, columns, shards, nodeIds, handle); + // 2. Delete old shards and old delta + Set oldDeltaUuidSet = oldShardUuidsMap.values().stream().filter(uuid -> uuid.isPresent()).map(uuid -> uuid.get()).collect(toImmutableSet()); + ShardStats stats = deleteShardsAndIndex(tableId, oldShardUuidsMap, oldDeltaUuidSet, handle, tableSupportsDeltaDelete); + rowCount -= stats.getRowCount(); + compressedSize -= stats.getCompressedSize(); + uncompressedSize -= stats.getUncompressedSize(); + + // 3. Update statistics and table version + long deltaCountChange = -oldDeltaUuidSet.size(); + long shardCountChange = newShards.size() - oldShardUuidsMap.size(); + if (!oldShardUuidsMap.isEmpty() || !newShards.isEmpty()) { + updateStatsAndVersion(handle, tableId, shardCountChange, deltaCountChange, rowCount, compressedSize, uncompressedSize, updateTime); } + }); + } - for (List uuids : partition(oldShardUuids, 1000)) { - ShardStats stats = deleteShardsAndIndex(tableId, ImmutableSet.copyOf(uuids), handle); - rowCount -= stats.getRowCount(); - compressedSize -= stats.getCompressedSize(); - uncompressedSize -= stats.getUncompressedSize(); + /** + * four types of shardMap + * A. shard1 delete shard + * B. shard2 old_delta2 delete shard and delta + * C. shard3 new_delta3 add delta + * D. shard4 old_delta4 new_delta4 replace delta + * + * Conflict resolution: + * (A, A) after deleting shard, verify deleted shard count + * (B, B) after deleting shard, verify deleted shard count / after deleting delta, verify deleted delta count + * (C, C) when updating shard's delta, check its old delta, after updating, verify updated shard count + * (D, D) after deleting delta, verify deleted delta count / verify updated shard count + * + * (A, B) won't happen at the same time + * (A, C) + * A first, B: after updating shard's delta, verfiy updated shard count + * B first, A: when deleting shard, check its delta, after deleting, verify deleted shard count + * (A, D) won't happen at the same time + * (B, C) won't happen at the same time + * (B, D) same way as (A,C) + * (C, D) won't happen at the same time + */ + public void replaceDeltaUuids(long transactionId, long tableId, List columns, Map, Optional>> shardMap, OptionalLong updateTime) + { + runCommit(transactionId, (handle) -> { + lockTable(handle, tableId); + + Set newDeltaShardInfoSet = new HashSet<>(); + Set oldDeltaUuidSet = new HashSet<>(); + Map> toDeleteShardMap = new HashMap<>(); + Map, UUID>> toUpdateShard = new HashMap<>(); + + // Initiate + for (Map.Entry, Optional>> entry : shardMap.entrySet()) { + UUID uuid = entry.getKey(); + Pair, Optional> deltaPair = entry.getValue(); + // Replace Shard's delta if new deltaShard isn't empty + if (deltaPair.getValue().isPresent()) { + newDeltaShardInfoSet.add(deltaPair.getValue().get()); + toUpdateShard.put(uuid, new Pair(deltaPair.getKey(), deltaPair.getValue().get().getShardUuid())); + } + // Delete Shard if deltaShard is empty + else { + toDeleteShardMap.put(uuid, deltaPair.getKey()); + } + + if (deltaPair.getKey().isPresent()) { + oldDeltaUuidSet.add(deltaPair.getKey().get()); + } } - long shardCount = newShards.size() - oldShardUuids.size(); + // 1. Insert new deltas + Map nodeIds = toNodeIdMap(newDeltaShardInfoSet); + insertShardsAndIndex(tableId, columns, newDeltaShardInfoSet, nodeIds, handle, true); + ShardStats newStats = shardStats(newDeltaShardInfoSet); + long rowCount = -newStats.getRowCount(); + long compressedSize = newStats.getCompressedSize(); + long uncompressedSize = newStats.getUncompressedSize(); + + // 2. Delete toDelete shard and old delta + ShardStats stats = deleteShardsAndIndex(tableId, toDeleteShardMap, oldDeltaUuidSet, handle, true); + rowCount -= stats.getRowCount(); + compressedSize -= stats.getCompressedSize(); + uncompressedSize -= stats.getUncompressedSize(); - if (!oldShardUuids.isEmpty() || !newShards.isEmpty()) { - MetadataDao metadata = handle.attach(MetadataDao.class); - metadata.updateTableStats(tableId, shardCount, rowCount, compressedSize, uncompressedSize); - updateTime.ifPresent(time -> metadata.updateTableVersion(tableId, time)); + // 3. Update shard and delta relationship + updateShardsAndIndex(tableId, toUpdateShard, handle); + + // 4. Update statistics and table version + int shardCountChange = -toDeleteShardMap.size(); + int deltaCountChange = newDeltaShardInfoSet.size() - oldDeltaUuidSet.size(); + if (!newDeltaShardInfoSet.isEmpty() || !oldDeltaUuidSet.isEmpty() || toUpdateShard.isEmpty() || !toDeleteShardMap.isEmpty()) { + updateStatsAndVersion(handle, tableId, shardCountChange, deltaCountChange, rowCount, compressedSize, uncompressedSize, updateTime); } }); } + private void updateStatsAndVersion(Handle handle, long tableId, long shardCountChange, long deltaCountChange, long rowCount, long compressedSize, long uncompressedSize, OptionalLong updateTime) + { + MetadataDao metadata = handle.attach(MetadataDao.class); + metadata.updateTableStats(tableId, shardCountChange, deltaCountChange, rowCount, compressedSize, uncompressedSize); + updateTime.ifPresent(time -> metadata.updateTableVersion(tableId, time)); + } + private void runCommit(long transactionId, HandleConsumer callback) { int maxAttempts = 5; @@ -391,84 +475,190 @@ private static boolean commitTransaction(ShardDao dao, long transactionId) return true; } - private ShardStats deleteShardsAndIndex(long tableId, Set shardUuids, Handle handle) + /** + * Delete oldShard and oldDelta + * oldShard and oldDelta is not necessarily related for replaceDeltaUuids + */ + private ShardStats deleteShardsAndIndex(long tableId, Map> oldShardUuidsMap, Set oldDeltaUuidSet, Handle handle, boolean tableSupportsDeltaDelete) + throws SQLException + { + if (tableSupportsDeltaDelete) { + ShardStats shardStats = deleteShardsAndIndexWithDelta(tableId, oldShardUuidsMap, handle); + long rowCount = shardStats.getRowCount(); + long compressedSize = shardStats.getCompressedSize(); + long uncompressedSize = shardStats.getUncompressedSize(); + + ShardStats deltaStats = deleteShardsAndIndexSimple(tableId, oldDeltaUuidSet, handle, true); + rowCount -= deltaStats.getRowCount(); // delta + compressedSize += deltaStats.getCompressedSize(); + uncompressedSize += deltaStats.getUncompressedSize(); + + return new ShardStats(rowCount, compressedSize, uncompressedSize); + } + else { + Set oldShardUuidSet = oldShardUuidsMap.keySet(); + ShardStats stats = deleteShardsAndIndexSimple(tableId, oldShardUuidSet, handle, false); + return stats; + } + } + + /** + * For shards and delta + * + * Select id from `shards` table for both shard and delta shards + * - Purpose: 1. check the count as pre-check to avoid conflict 2. get statistics 3. use id to perform delete + * + * Insert into deleted_shards + * + * Delete from `shards_node` table (won't verify delete count: NONE-BUCKETED) for both shards and delta shards + * Delete from `shards` table verify delete count for both shards and delta shards + * Delete from index table verify delete count only for shards + */ + private ShardStats deleteShardsAndIndexSimple(long tableId, Set shardUuids, Handle handle, boolean isDelta) + throws SQLException + { + if (shardUuids.isEmpty()) { + return new ShardStats(0, 0, 0); + } + + long rowCount = 0; + long compressedSize = 0; + long uncompressedSize = 0; + + // for batch execution + for (List uuids : partition(shardUuids, 1000)) { + String args = Joiner.on(",").join(nCopies(uuids.size(), "?")); + ImmutableSet.Builder shardIdSet = ImmutableSet.builder(); + String selectShards = format("" + + "SELECT shard_id, row_count, compressed_size, uncompressed_size\n" + + "FROM shards\n" + + "WHERE shard_uuid IN (%s)", args); + try (PreparedStatement statement = handle.getConnection().prepareStatement(selectShards)) { + bindUuids(statement, uuids); + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + shardIdSet.add(rs.getLong("shard_id")); + rowCount += rs.getLong("row_count"); + compressedSize += rs.getLong("compressed_size"); + uncompressedSize += rs.getLong("uncompressed_size"); + } + } + } + Set shardIds = shardIdSet.build(); + if (shardIds.size() != uuids.size()) { + throw transactionConflict(); + } + + // For background cleaner + ShardDao dao = shardDaoSupplier.attach(handle); + dao.insertDeletedShards(uuids); + + String where = " WHERE shard_id IN (" + args + ")"; + String deleteFromShardNodes = "DELETE FROM shard_nodes " + where; + String deleteFromShards = "DELETE FROM shards " + where; + String deleteFromShardIndex = "DELETE FROM " + shardIndexTable(tableId) + where; + + try (PreparedStatement statement = handle.getConnection().prepareStatement(deleteFromShardNodes)) { + bindLongs(statement, shardIds); + statement.executeUpdate(); + } + + for (String sql : isDelta ? asList(deleteFromShards) : asList(deleteFromShards, deleteFromShardIndex)) { + try (PreparedStatement statement = handle.getConnection().prepareStatement(sql)) { + bindLongs(statement, shardIds); + if (statement.executeUpdate() != shardIds.size()) { + throw transactionConflict(); + } + } + } + } + + return new ShardStats(rowCount, compressedSize, uncompressedSize); + } + + /** + * ONLY for shards (NO delta) + * + * Select id from `shards` table + * - Purpose: 1. check the count as pre-check to avoid conflict 2. get statistics 3. use id to perform delete + * + * Insert into deleted_shards + * + * Delete from `shards_node` table (won't verify delete count: NONE-BUCKETED) + * Delete from `shards` table check delta verify delete count + * Delete from index table check delta verify delete count + */ + private ShardStats deleteShardsAndIndexWithDelta(long tableId, Map> oldShardUuidsMap, Handle handle) throws SQLException { - String args = Joiner.on(",").join(nCopies(shardUuids.size(), "?")); + if (oldShardUuidsMap.isEmpty()) { + return new ShardStats(0, 0, 0); + } + String args = Joiner.on(",").join(nCopies(oldShardUuidsMap.size(), "?")); - ImmutableSet.Builder shardIdSet = ImmutableSet.builder(); + ImmutableMap.Builder shardUuidToIdBuilder = ImmutableMap.builder(); long rowCount = 0; long compressedSize = 0; long uncompressedSize = 0; String selectShards = format("" + - "SELECT shard_id, row_count, compressed_size, uncompressed_size\n" + + "SELECT shard_id, shard_uuid, row_count, compressed_size, uncompressed_size\n" + "FROM shards\n" + "WHERE shard_uuid IN (%s)", args); - try (PreparedStatement statement = handle.getConnection().prepareStatement(selectShards)) { - bindUuids(statement, shardUuids); + bindUuids(statement, oldShardUuidsMap.keySet()); try (ResultSet rs = statement.executeQuery()) { while (rs.next()) { - shardIdSet.add(rs.getLong("shard_id")); + shardUuidToIdBuilder.put(uuidFromBytes(rs.getBytes("shard_uuid")), rs.getLong("shard_id")); rowCount += rs.getLong("row_count"); compressedSize += rs.getLong("compressed_size"); uncompressedSize += rs.getLong("uncompressed_size"); } } } - - Set shardIds = shardIdSet.build(); - if (shardIds.size() != shardUuids.size()) { + Map shardUuidToId = shardUuidToIdBuilder.build(); + if (shardUuidToId.size() != oldShardUuidsMap.size()) { throw transactionConflict(); } + // For background cleaner ShardDao dao = shardDaoSupplier.attach(handle); - dao.insertDeletedShards(shardUuids); + dao.insertDeletedShards(oldShardUuidsMap.keySet()); String where = " WHERE shard_id IN (" + args + ")"; String deleteFromShardNodes = "DELETE FROM shard_nodes " + where; - String deleteFromShards = "DELETE FROM shards " + where; - String deleteFromShardIndex = "DELETE FROM " + shardIndexTable(tableId) + where; - try (PreparedStatement statement = handle.getConnection().prepareStatement(deleteFromShardNodes)) { - bindLongs(statement, shardIds); + bindLongs(statement, shardUuidToId.values()); statement.executeUpdate(); } - for (String sql : asList(deleteFromShards, deleteFromShardIndex)) { - try (PreparedStatement statement = handle.getConnection().prepareStatement(sql)) { - bindLongs(statement, shardIds); - if (statement.executeUpdate() != shardIds.size()) { - throw transactionConflict(); + Connection connection = handle.getConnection(); + int updatedCount = 0; + try (ShardsAndIndexDeleter shardsAndIndexDeleter = new ShardsAndIndexDeleter(connection, tableId)) { + for (List batch : partition(oldShardUuidsMap.keySet(), batchSize(connection))) { + for (int i = 0; i < batch.size(); i++) { + UUID uuid = batch.get(i); + Optional deltaUuid = oldShardUuidsMap.get(uuid); + shardsAndIndexDeleter.delete(shardUuidToId.get(uuid), deltaUuid); } + updatedCount += shardsAndIndexDeleter.execute(); } } - - return new ShardStats(rowCount, compressedSize, uncompressedSize); - } - - private static void bindUuids(PreparedStatement statement, Iterable uuids) - throws SQLException - { - int i = 1; - for (UUID uuid : uuids) { - statement.setBytes(i, uuidToBytes(uuid)); - i++; + if (updatedCount != oldShardUuidsMap.size()) { + throw transactionConflict(); } - } - private static void bindLongs(PreparedStatement statement, Iterable values) - throws SQLException - { - int i = 1; - for (long value : values) { - statement.setLong(i, value); - i++; - } + return new ShardStats(rowCount, compressedSize, uncompressedSize); } - private static void insertShardsAndIndex(long tableId, List columns, Collection shards, Map nodeIds, Handle handle) + /** + * For shards and delta + * + * Insert into `shards` for both shards and delta shards + * Insert into `shard_nodes` (non-bucketed) for both shards and delta shards + * Insert into index table only for shards + */ + private static void insertShardsAndIndex(long tableId, List columns, Collection shards, Map nodeIds, Handle handle, boolean isDelta) throws SQLException { if (shards.isEmpty()) { @@ -479,27 +669,102 @@ private static void insertShardsAndIndex(long tableId, List columns, Connection connection = handle.getConnection(); try (IndexInserter indexInserter = new IndexInserter(connection, tableId, columns)) { for (List batch : partition(shards, batchSize(connection))) { - List shardIds = insertShards(connection, tableId, batch); + List shardIds = insertShards(connection, tableId, batch, isDelta); if (!bucketed) { insertShardNodes(connection, nodeIds, shardIds, batch); } + if (!isDelta) { + for (int i = 0; i < batch.size(); i++) { + ShardInfo shard = batch.get(i); + Set shardNodes = shard.getNodeIdentifiers().stream() + .map(nodeIds::get) + .collect(toSet()); + indexInserter.insert( + shardIds.get(i), + shard.getShardUuid(), + shard.getBucketNumber(), + shardNodes, + shard.getColumnStats()); + } + indexInserter.execute(); + } + } + } + } + + /** + * For shards + * + * Select id from `shards` table + * - Purpose: 1. check the count as pre-check to avoid conflict 2. get statistics 3. use id to perform update + * + * Update `shards` table check delta verify delete count + * Update index table check delta verify delete count + */ + private void updateShardsAndIndex(long tableId, Map, UUID>> toUpdateShard, Handle handle) + throws SQLException + { + if (toUpdateShard.isEmpty()) { + return; + } + + String args = Joiner.on(",").join(nCopies(toUpdateShard.size(), "?")); + ImmutableMap.Builder shardMapBuilder = ImmutableMap.builder(); + String selectShards = format("" + + "SELECT shard_id, shard_uuid\n" + + "FROM shards\n" + + "WHERE shard_uuid IN (%s)", args); + try (PreparedStatement statement = handle.getConnection().prepareStatement(selectShards)) { + bindUuids(statement, toUpdateShard.keySet()); + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + shardMapBuilder.put(rs.getLong("shard_id"), uuidFromBytes(rs.getBytes("shard_uuid"))); + } + } + } + Map shardIdToUuid = shardMapBuilder.build(); + if (toUpdateShard.size() != shardIdToUuid.size()) { + throw transactionConflict(); + } + + int updatedCount = 0; + try (ShardsAndIndexUpdater shardsAndIndexUpdater = new ShardsAndIndexUpdater(handle.getConnection(), tableId)) { + for (List batch : partition(shardIdToUuid.keySet(), batchSize(handle.getConnection()))) { for (int i = 0; i < batch.size(); i++) { - ShardInfo shard = batch.get(i); - Set shardNodes = shard.getNodeIdentifiers().stream() - .map(nodeIds::get) - .collect(toSet()); - indexInserter.insert( - shardIds.get(i), - shard.getShardUuid(), - shard.getBucketNumber(), - shardNodes, - shard.getColumnStats()); + Long shardId = batch.get(i); + shardsAndIndexUpdater.update( + shardId, + toUpdateShard.get(shardIdToUuid.get(shardId)).getKey(), + toUpdateShard.get(shardIdToUuid.get(shardId)).getValue()); } - indexInserter.execute(); + updatedCount += shardsAndIndexUpdater.execute(); } } + if (updatedCount != shardIdToUuid.size()) { + throw transactionConflict(); + } + } + + private static void bindUuids(PreparedStatement statement, Iterable uuids) + throws SQLException + { + int i = 1; + for (UUID uuid : uuids) { + statement.setBytes(i, uuidToBytes(uuid)); + i++; + } + } + + private static void bindLongs(PreparedStatement statement, Iterable values) + throws SQLException + { + int i = 1; + for (long value : values) { + statement.setLong(i, value); + i++; + } } private static int batchSize(Connection connection) @@ -537,19 +802,19 @@ public Set getNodeShards(String nodeIdentifier, long tableId) } @Override - public ResultIterator getShardNodes(long tableId, TupleDomain effectivePredicate) + public ResultIterator getShardNodes(long tableId, boolean tableSupportsDeltaDelete, TupleDomain effectivePredicate) { - return new ShardIterator(tableId, false, Optional.empty(), effectivePredicate, dbi); + return new ShardIterator(tableId, tableSupportsDeltaDelete, false, Optional.empty(), effectivePredicate, dbi); } @Override - public ResultIterator getShardNodesBucketed(long tableId, boolean merged, List bucketToNode, TupleDomain effectivePredicate) + public ResultIterator getShardNodesBucketed(long tableId, boolean tableSupportsDeltaDelete, boolean merged, List bucketToNode, TupleDomain effectivePredicate) { - return new ShardIterator(tableId, merged, Optional.of(bucketToNode), effectivePredicate, dbi); + return new ShardIterator(tableId, tableSupportsDeltaDelete, merged, Optional.of(bucketToNode), effectivePredicate, dbi); } @Override - public void replaceShardAssignment(long tableId, UUID shardUuid, String nodeIdentifier, boolean gracePeriod) + public void replaceShardAssignment(long tableId, UUID shardUuid, Optional deltaUuid, String nodeIdentifier, boolean gracePeriod) { if (gracePeriod && (nanosSince(startTime).compareTo(startupGracePeriod) < 0)) { throw new PrestoException(SERVER_STARTING_UP, "Cannot reassign shards while server is starting"); @@ -561,10 +826,17 @@ public void replaceShardAssignment(long tableId, UUID shardUuid, String nodeIden ShardDao dao = shardDaoSupplier.attach(handle); Set oldAssignments = new HashSet<>(fetchLockedNodeIds(handle, tableId, shardUuid)); + + // 1. Update index table updateNodeIds(handle, tableId, shardUuid, ImmutableSet.of(nodeId)); + // 2. Update shards table dao.deleteShardNodes(shardUuid, oldAssignments); dao.insertShardNode(shardUuid, nodeId); + if (deltaUuid.isPresent()) { + dao.deleteShardNodes(deltaUuid.get(), oldAssignments); + dao.insertShardNode(deltaUuid.get(), nodeId); + } return null; }); } @@ -760,22 +1032,24 @@ private Duration nanosSince(long nanos) return new Duration(ticker.read() - nanos, NANOSECONDS); } - private static List insertShards(Connection connection, long tableId, List shards) + private static List insertShards(Connection connection, long tableId, List shards, boolean isDelta) throws SQLException { String sql = "" + - "INSERT INTO shards (shard_uuid, table_id, create_time, row_count, compressed_size, uncompressed_size, xxhash64, bucket_number)\n" + - "VALUES (?, ?, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?)"; + "INSERT INTO shards (shard_uuid, table_id, is_delta, delta_uuid, create_time, row_count, compressed_size, uncompressed_size, xxhash64, bucket_number)\n" + + "VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?)"; try (PreparedStatement statement = connection.prepareStatement(sql, RETURN_GENERATED_KEYS)) { for (ShardInfo shard : shards) { statement.setBytes(1, uuidToBytes(shard.getShardUuid())); statement.setLong(2, tableId); - statement.setLong(3, shard.getRowCount()); - statement.setLong(4, shard.getCompressedSize()); - statement.setLong(5, shard.getUncompressedSize()); - statement.setLong(6, shard.getXxhash64()); - bindOptionalInt(statement, 7, shard.getBucketNumber()); + statement.setBoolean(3, isDelta); + statement.setNull(4, BINARY); + statement.setLong(5, shard.getRowCount()); + statement.setLong(6, shard.getCompressedSize()); + statement.setLong(7, shard.getUncompressedSize()); + statement.setLong(8, shard.getXxhash64()); + bindOptionalInt(statement, 9, shard.getBucketNumber()); statement.addBatch(); } statement.executeBatch(); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/MetadataDao.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/MetadataDao.java index d5276410b8465..4209a10bd009b 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/MetadataDao.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/MetadataDao.java @@ -27,7 +27,7 @@ public interface MetadataDao { String TABLE_INFORMATION_SELECT = "" + - "SELECT t.table_id, t.distribution_id, d.distribution_name, d.bucket_count, t.temporal_column_id, t.organization_enabled\n" + + "SELECT t.table_id, t.distribution_id, d.distribution_name, d.bucket_count, t.temporal_column_id, t.organization_enabled, t.table_supports_delta_delete\n" + "FROM tables t\n" + "LEFT JOIN distributions d ON (t.distribution_id = d.distribution_id)\n"; @@ -115,11 +115,11 @@ List getViews( @SqlUpdate("INSERT INTO tables (\n" + " schema_name, table_name, compaction_enabled, organization_enabled, distribution_id,\n" + " create_time, update_time, table_version,\n" + - " shard_count, row_count, compressed_size, uncompressed_size)\n" + + " shard_count, delta_count, row_count, compressed_size, uncompressed_size, table_supports_delta_delete)\n" + "VALUES (\n" + " :schemaName, :tableName, :compactionEnabled, :organizationEnabled, :distributionId,\n" + " :createTime, :createTime, 0,\n" + - " 0, 0, 0, 0)\n") + " 0, 0, 0, 0, 0, :tableSupportsDeltaDelete)\n") @GetGeneratedKeys long insertTable( @Bind("schemaName") String schemaName, @@ -127,7 +127,8 @@ long insertTable( @Bind("compactionEnabled") boolean compactionEnabled, @Bind("organizationEnabled") boolean organizationEnabled, @Bind("distributionId") Long distributionId, - @Bind("createTime") long createTime); + @Bind("createTime") long createTime, + @Bind("tableSupportsDeltaDelete") boolean tableSupportsDeltaDelete); @SqlUpdate("UPDATE tables SET\n" + " update_time = :updateTime\n" + @@ -138,14 +139,16 @@ void updateTableVersion( @Bind("updateTime") long updateTime); @SqlUpdate("UPDATE tables SET\n" + - " shard_count = shard_count + :shardCount \n" + + " shard_count = shard_count + :shardCountChange \n" + + ", delta_count = delta_count + :deltaCountChange \n" + ", row_count = row_count + :rowCount\n" + ", compressed_size = compressed_size + :compressedSize\n" + ", uncompressed_size = uncompressed_size + :uncompressedSize\n" + "WHERE table_id = :tableId") void updateTableStats( @Bind("tableId") long tableId, - @Bind("shardCount") long shardCount, + @Bind("shardCountChange") long shardCountChange, + @Bind("deltaCountChange") long deltaCountChange, @Bind("rowCount") long rowCount, @Bind("compressedSize") long compressedSize, @Bind("uncompressedSize") long uncompressedSize); @@ -246,7 +249,7 @@ long insertDistribution( @Bind("columnTypes") String columnTypes, @Bind("bucketCount") int bucketCount); - @SqlQuery("SELECT table_id, schema_name, table_name, temporal_column_id, distribution_name, bucket_count, organization_enabled\n" + + @SqlQuery("SELECT table_id, schema_name, table_name, temporal_column_id, distribution_name, bucket_count, organization_enabled, table_supports_delta_delete\n" + "FROM tables\n" + "LEFT JOIN distributions\n" + "ON tables.distribution_id = distributions.distribution_id\n" + @@ -272,7 +275,7 @@ List getColumnMetadataRows( @Bind("tableName") String tableName); @SqlQuery("SELECT schema_name, table_name, create_time, update_time, table_version,\n" + - " shard_count, row_count, compressed_size, uncompressed_size\n" + + " shard_count, delta_count, row_count, compressed_size, uncompressed_size\n" + "FROM tables\n" + "WHERE (schema_name = :schemaName OR :schemaName IS NULL)\n" + " AND (table_name = :tableName OR :tableName IS NULL)\n" + diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/SchemaDao.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/SchemaDao.java index 952dbffcbef2f..b2fbf6638ce79 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/SchemaDao.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/SchemaDao.java @@ -38,10 +38,12 @@ public interface SchemaDao " update_time BIGINT NOT NULL,\n" + " table_version BIGINT NOT NULL,\n" + " shard_count BIGINT NOT NULL,\n" + + " delta_count BIGINT NOT NULL,\n" + " row_count BIGINT NOT NULL,\n" + " compressed_size BIGINT NOT NULL,\n" + " uncompressed_size BIGINT NOT NULL,\n" + " maintenance_blocked DATETIME,\n" + + " table_supports_delta_delete BOOLEAN NOT NULL DEFAULT false,\n" + " UNIQUE (schema_name, table_name),\n" + " UNIQUE (distribution_id, table_id),\n" + " UNIQUE (maintenance_blocked, table_id),\n" + @@ -91,6 +93,8 @@ public interface SchemaDao " compressed_size BIGINT NOT NULL,\n" + " uncompressed_size BIGINT NOT NULL,\n" + " xxhash64 BIGINT NOT NULL,\n" + + " is_delta BOOLEAN NOT NULL DEFAULT false,\n" + + " delta_uuid BINARY(16),\n" + " UNIQUE (shard_uuid),\n" + // include a covering index organized by table_id " UNIQUE (table_id, bucket_number, shard_id, shard_uuid, create_time, row_count, compressed_size, uncompressed_size, xxhash64),\n" + diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDao.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDao.java index 1ff4edeca4434..d51ece99a2d60 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDao.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDao.java @@ -36,7 +36,7 @@ public interface ShardDao int CLEANABLE_SHARDS_BATCH_SIZE = 1000; int CLEANUP_TRANSACTIONS_BATCH_SIZE = 10_000; - String SHARD_METADATA_COLUMNS = "table_id, shard_id, shard_uuid, bucket_number, row_count, compressed_size, uncompressed_size, xxhash64"; + String SHARD_METADATA_COLUMNS = "table_id, shard_id, shard_uuid, is_delta, delta_uuid, bucket_number, row_count, compressed_size, uncompressed_size, xxhash64"; @SqlUpdate("INSERT INTO nodes (node_identifier) VALUES (:nodeIdentifier)") @GetGeneratedKeys diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDeleteDelta.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDeleteDelta.java new file mode 100644 index 0000000000000..d81d9b471d393 --- /dev/null +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardDeleteDelta.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.raptor.metadata; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Optional; +import java.util.UUID; + +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + +public class ShardDeleteDelta +{ + private final UUID oldShardUuid; + private final Optional oldDeltaDeleteShard; + // Optional.empty() means delete original file + private final Optional newDeltaDeleteShard; + + @JsonCreator + public ShardDeleteDelta( + @JsonProperty("oldShardUuid") UUID oldShardUuid, + @JsonProperty("oldDeltaDeleteShard") Optional oldDeltaDeleteShard, + @JsonProperty("newDeltaDeleteShard") Optional newDeltaDeleteShard) + { + this.oldShardUuid = requireNonNull(oldShardUuid, "oldShardUuids is null"); + this.oldDeltaDeleteShard = requireNonNull(oldDeltaDeleteShard, "deltaDeleteShardPair is null"); + this.newDeltaDeleteShard = requireNonNull(newDeltaDeleteShard, "deltaDeleteShardPair is null"); + } + + @JsonProperty + public UUID getOldShardUuid() + { + return oldShardUuid; + } + + @JsonProperty + public Optional getOldDeltaDeleteShard() + { + return oldDeltaDeleteShard; + } + + @JsonProperty + public Optional getNewDeltaDeleteShard() + { + return newDeltaDeleteShard; + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("oldShardUuid", oldShardUuid) + .add("oldDeltaDeleteShard", oldDeltaDeleteShard) + .add("newDeltaDeleteShard", newDeltaDeleteShard) + .toString(); + } +} diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardIterator.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardIterator.java index 005c0a2b36a7b..54b87b4a1f1df 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardIterator.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardIterator.java @@ -54,6 +54,7 @@ final class ShardIterator private final Map nodeMap = new HashMap<>(); private final boolean merged; + private final boolean tableSupportsDeltaDelete; private final List bucketToNode; private final ShardDao dao; private final Connection connection; @@ -63,22 +64,23 @@ final class ShardIterator public ShardIterator( long tableId, + boolean tableSupportsDeltaDelete, boolean merged, Optional> bucketToNode, TupleDomain effectivePredicate, IDBI dbi) { this.merged = merged; + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; this.bucketToNode = bucketToNode.orElse(null); ShardPredicate predicate = ShardPredicate.create(effectivePredicate); String sql; - if (bucketToNode.isPresent()) { - sql = "SELECT shard_uuid, bucket_number FROM %s WHERE %s ORDER BY bucket_number"; - } - else { - sql = "SELECT shard_uuid, node_ids FROM %s WHERE %s"; - } + sql = "SELECT shard_uuid, " + + (tableSupportsDeltaDelete ? "delta_shard_uuid, " : "") + + (bucketToNode.isPresent() ? "bucket_number " : "node_ids ") + + "FROM %s WHERE %s " + + (bucketToNode.isPresent() ? "ORDER BY bucket_number" : ""); sql = format(sql, shardIndexTable(tableId), predicate.getPredicate()); dao = onDemandDao(dbi, ShardDao.class); @@ -138,6 +140,8 @@ private BucketShards compute() } UUID shardUuid = uuidFromBytes(resultSet.getBytes("shard_uuid")); + Optional deltaShardUuid = (!tableSupportsDeltaDelete || resultSet.getBytes("delta_shard_uuid") == null) + ? Optional.empty() : Optional.of(uuidFromBytes(resultSet.getBytes("delta_shard_uuid"))); Set nodeIdentifiers; OptionalInt bucketNumber = OptionalInt.empty(); @@ -151,7 +155,7 @@ private BucketShards compute() nodeIdentifiers = getNodeIdentifiers(nodeIds, shardUuid); } - ShardNodes shard = new ShardNodes(shardUuid, nodeIdentifiers); + ShardNodes shard = new ShardNodes(shardUuid, deltaShardUuid, nodeIdentifiers); return new BucketShards(bucketNumber, ImmutableSet.of(shard)); } @@ -176,10 +180,12 @@ private BucketShards computeMerged() do { UUID shardUuid = uuidFromBytes(resultSet.getBytes("shard_uuid")); + Optional deltaShardUuid = !tableSupportsDeltaDelete || resultSet.getBytes("delta_shard_uuid") == null ? + Optional.empty() : Optional.of(uuidFromBytes(resultSet.getBytes("delta_shard_uuid"))); int bucket = resultSet.getInt("bucket_number"); Set nodeIdentifiers = ImmutableSet.of(getBucketNode(bucket)); - shards.add(new ShardNodes(shardUuid, nodeIdentifiers)); + shards.add(new ShardNodes(shardUuid, deltaShardUuid, nodeIdentifiers)); } while (resultSet.next() && resultSet.getInt("bucket_number") == bucketNumber); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardManager.java index 2bdf8b779c5c4..db1a112f3cc16 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardManager.java @@ -15,6 +15,7 @@ import com.facebook.presto.raptor.RaptorColumnHandle; import com.facebook.presto.spi.predicate.TupleDomain; +import javafx.util.Pair; import org.skife.jdbi.v2.ResultIterator; import java.util.Collection; @@ -30,7 +31,7 @@ public interface ShardManager /** * Create a table. */ - void createTable(long tableId, List columns, boolean bucketed, OptionalLong temporalColumnId); + void createTable(long tableId, boolean tableSupportsDeltaDelete, List columns, boolean bucketed, OptionalLong temporalColumnId); /** * Drop a table. @@ -49,8 +50,22 @@ public interface ShardManager /** * Replace oldShardsUuids with newShards. + * Used by rewrite delete and compaction. + * With tableSupportsDeltaDelete: Delete oldShardsUuids with their delta shards + * Add newShards */ - void replaceShardUuids(long transactionId, long tableId, List columns, Set oldShardUuids, Collection newShards, OptionalLong updateTime); + void replaceShardUuids(long transactionId, boolean tableSupportsDeltaDelete, long tableId, List columns, Map> oldShardUuids, Collection newShards, OptionalLong updateTime); + + /** + * Replace oldDeltaDeleteShard with newDeltaDeleteShard. + * Used by delta delete. + * For shardMap: + * UUID is the target file. + * Optional in the Pair is the oldDeltaDeleteShard for the target file. + * Optional> in the Pair is the newDeltaDeleteShard for the target file. + * NOTE: Optional> being Optional.empty() means deleting the target file. + */ + void replaceDeltaUuids(long transactionId, long tableId, List columns, Map, Optional>> shardMap, OptionalLong updateTime); /** * Get shard metadata for a shard. @@ -70,17 +85,17 @@ public interface ShardManager /** * Return the shard nodes for a non-bucketed table. */ - ResultIterator getShardNodes(long tableId, TupleDomain effectivePredicate); + ResultIterator getShardNodes(long tableId, boolean tableSupportsDeltaDelete, TupleDomain effectivePredicate); /** * Return the shard nodes for a bucketed table. */ - ResultIterator getShardNodesBucketed(long tableId, boolean merged, List bucketToNode, TupleDomain effectivePredicate); + ResultIterator getShardNodesBucketed(long tableId, boolean tableSupportsDeltaDelete, boolean merged, List bucketToNode, TupleDomain effectivePredicate); /** * Remove all old shard assignments and assign a shard to a node */ - void replaceShardAssignment(long tableId, UUID shardUuid, String nodeIdentifier, boolean gracePeriod); + void replaceShardAssignment(long tableId, UUID shardUuid, Optional deltaUuid, String nodeIdentifier, boolean gracePeriod); /** * Get the number of bytes used by assigned shards per node. diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardMetadata.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardMetadata.java index 86980f6e8186b..1700e357bc0db 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardMetadata.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardMetadata.java @@ -20,6 +20,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Objects; +import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; import java.util.UUID; @@ -38,6 +39,8 @@ public class ShardMetadata private final long tableId; private final long shardId; private final UUID shardUuid; + private final boolean isDelta; + private final Optional deltaUuid; private final OptionalInt bucketNumber; private final long rowCount; private final long compressedSize; @@ -50,6 +53,8 @@ public ShardMetadata( long tableId, long shardId, UUID shardUuid, + boolean isDelta, + Optional deltaUuid, OptionalInt bucketNumber, long rowCount, long compressedSize, @@ -67,6 +72,8 @@ public ShardMetadata( this.tableId = tableId; this.shardId = shardId; this.shardUuid = requireNonNull(shardUuid, "shardUuid is null"); + this.isDelta = isDelta; + this.deltaUuid = deltaUuid; this.bucketNumber = requireNonNull(bucketNumber, "bucketNumber is null"); this.rowCount = rowCount; this.compressedSize = compressedSize; @@ -91,6 +98,16 @@ public long getShardId() return shardId; } + public boolean isDelta() + { + return isDelta; + } + + public Optional getDeltaUuid() + { + return deltaUuid; + } + public OptionalInt getBucketNumber() { return bucketNumber; @@ -132,6 +149,8 @@ public ShardMetadata withTimeRange(long rangeStart, long rangeEnd) tableId, shardId, shardUuid, + isDelta, + deltaUuid, bucketNumber, rowCount, compressedSize, @@ -148,10 +167,14 @@ public String toString() .add("tableId", tableId) .add("shardId", shardId) .add("shardUuid", shardUuid) + .add("isDelta", isDelta) .add("rowCount", rowCount) .add("compressedSize", DataSize.succinctBytes(compressedSize)) .add("uncompressedSize", DataSize.succinctBytes(uncompressedSize)); + if (deltaUuid.isPresent()) { + stringHelper.add("deltaUuid", deltaUuid.get()); + } if (bucketNumber.isPresent()) { stringHelper.add("bucketNumber", bucketNumber.getAsInt()); } @@ -179,6 +202,8 @@ public boolean equals(Object o) ShardMetadata that = (ShardMetadata) o; return Objects.equals(tableId, that.tableId) && Objects.equals(shardId, that.shardId) && + Objects.equals(isDelta, that.isDelta) && + Objects.equals(deltaUuid, that.deltaUuid) && Objects.equals(bucketNumber, that.bucketNumber) && Objects.equals(rowCount, that.rowCount) && Objects.equals(compressedSize, that.compressedSize) && @@ -196,6 +221,8 @@ public int hashCode() tableId, shardId, shardUuid, + isDelta, + deltaUuid, bucketNumber, rowCount, compressedSize, @@ -216,6 +243,8 @@ public ShardMetadata map(int index, ResultSet r, StatementContext ctx) r.getLong("table_id"), r.getLong("shard_id"), uuidFromBytes(r.getBytes("shard_uuid")), + r.getBoolean("is_delta"), + r.getBytes("delta_uuid") == null ? Optional.empty() : Optional.of(uuidFromBytes(r.getBytes("delta_uuid"))), getOptionalInt(r, "bucket_number"), r.getLong("row_count"), r.getLong("compressed_size"), diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardNodes.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardNodes.java index 1acfde92bc9ef..b2a5bdb932187 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardNodes.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardNodes.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableSet; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.UUID; @@ -25,11 +26,13 @@ public class ShardNodes { private final UUID shardUuid; + private final Optional deltaShardUuid; private final Set nodeIdentifiers; - public ShardNodes(UUID shardUuid, Set nodeIdentifiers) + public ShardNodes(UUID shardUuid, Optional deltaShardUuid, Set nodeIdentifiers) { this.shardUuid = requireNonNull(shardUuid, "shardUuid is null"); + this.deltaShardUuid = requireNonNull(deltaShardUuid, "shardUuid is null"); this.nodeIdentifiers = ImmutableSet.copyOf(requireNonNull(nodeIdentifiers, "nodeIdentifiers is null")); } @@ -38,6 +41,11 @@ public UUID getShardUuid() return shardUuid; } + public Optional getDeltaShardUuid() + { + return deltaShardUuid; + } + public Set getNodeIdentifiers() { return nodeIdentifiers; @@ -54,6 +62,7 @@ public boolean equals(Object obj) } ShardNodes other = (ShardNodes) obj; return Objects.equals(this.shardUuid, other.shardUuid) && + Objects.equals(this.deltaShardUuid, other.deltaShardUuid) && Objects.equals(this.nodeIdentifiers, other.nodeIdentifiers); } @@ -68,6 +77,7 @@ public String toString() { return toStringHelper(this) .add("shardUuid", shardUuid) + .add("deltaShardUuid", deltaShardUuid) .add("nodeIdentifiers", nodeIdentifiers) .toString(); } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexDeleter.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexDeleter.java new file mode 100644 index 0000000000000..1c95b88e1887b --- /dev/null +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexDeleter.java @@ -0,0 +1,106 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.raptor.metadata; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Optional; +import java.util.UUID; + +import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; +import static com.facebook.presto.raptor.util.UuidUtil.uuidToBytes; + +public class ShardsAndIndexDeleter + implements AutoCloseable +{ + private final PreparedStatement deleteShardStatement; + private final PreparedStatement deleteShardEmptyDeltaStatement; + private final PreparedStatement deleteIndexStatement; + private final PreparedStatement deleteIndexEmptyDeltaStatement; + + public ShardsAndIndexDeleter(Connection connection, long tableId) + throws SQLException + { + // DELETE FROM table_name + // WHERE condition; + String deleteIndexSql = "" + + "DELETE FROM " + shardIndexTable(tableId) + " \n" + + " WHERE shard_id = ? AND delta_shard_uuid = ?"; + String deleteIndexSqlEmptyDelta = "" + + "DELETE FROM " + shardIndexTable(tableId) + " \n" + + " WHERE shard_id = ? AND delta_shard_uuid IS NULL"; + String deleteShardSql = "" + + "DELETE FROM shards \n" + + " WHERE shard_id = ? AND delta_uuid = ?"; + String deleteShardSqlEmptyDelta = "" + + "DELETE FROM shards \n" + + " WHERE shard_id = ? AND delta_uuid IS NULL"; + this.deleteIndexStatement = connection.prepareStatement(deleteIndexSql); + this.deleteIndexEmptyDeltaStatement = connection.prepareStatement(deleteIndexSqlEmptyDelta); + this.deleteShardStatement = connection.prepareStatement(deleteShardSql); + this.deleteShardEmptyDeltaStatement = connection.prepareStatement(deleteShardSqlEmptyDelta); + } + + public void delete(Long id, Optional deltaUuid) + throws SQLException + { + if (deltaUuid.isPresent()) { + deleteShardStatement.setLong(1, id); + deleteShardStatement.setBytes(2, uuidToBytes(deltaUuid.get())); + deleteShardStatement.addBatch(); + + deleteIndexStatement.setLong(1, id); + deleteIndexStatement.setBytes(2, uuidToBytes(deltaUuid.get())); + deleteIndexStatement.addBatch(); + } + else { + deleteShardEmptyDeltaStatement.setLong(1, id); + deleteShardEmptyDeltaStatement.addBatch(); + deleteIndexEmptyDeltaStatement.setLong(1, id); + deleteIndexEmptyDeltaStatement.addBatch(); + } + } + + public int execute() + throws SQLException + { + int shardsUpdatedCount = 0; + int indexUpdatedCount = 0; + shardsUpdatedCount += updatedCount(deleteShardStatement.executeBatch()); + shardsUpdatedCount += updatedCount(deleteShardEmptyDeltaStatement.executeBatch()); + indexUpdatedCount += updatedCount(deleteIndexStatement.executeBatch()); + indexUpdatedCount += updatedCount(deleteIndexEmptyDeltaStatement.executeBatch()); + return shardsUpdatedCount == indexUpdatedCount ? shardsUpdatedCount : -1; + } + + @Override + public void close() + throws SQLException + { + deleteShardStatement.close(); + deleteShardEmptyDeltaStatement.close(); + deleteIndexStatement.close(); + deleteIndexEmptyDeltaStatement.close(); + } + + static int updatedCount(int[] executeBatch) + { + int sum = 0; + for (int count : executeBatch) { + sum += count; + } + return sum; + } +} diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexUpdater.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexUpdater.java new file mode 100644 index 0000000000000..52dc880c3f20b --- /dev/null +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/ShardsAndIndexUpdater.java @@ -0,0 +1,108 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.raptor.metadata; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Optional; +import java.util.UUID; + +import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; +import static com.facebook.presto.raptor.metadata.ShardsAndIndexDeleter.updatedCount; +import static com.facebook.presto.raptor.util.UuidUtil.uuidToBytes; + +public class ShardsAndIndexUpdater + implements AutoCloseable +{ + private final PreparedStatement updateShardEmptyDeltaStatement; + private final PreparedStatement updateShardStatement; + private final PreparedStatement updateIndexEmptyDeltaStatement; + private final PreparedStatement updateIndexStatement; + + public ShardsAndIndexUpdater(Connection connection, long tableId) + throws SQLException + { + // UPDATE table_name + // SET column1 = value1, column2 = value2, ... + // WHERE condition; + String updateIndexSqlEmptyDelta = "" + + "UPDATE " + shardIndexTable(tableId) + " SET \n" + + " delta_shard_uuid = ?\n" + + " WHERE shard_id = ? AND delta_shard_uuid IS NULL"; + String updateIndexSql = "" + + "UPDATE " + shardIndexTable(tableId) + " SET \n" + + " delta_shard_uuid = ?\n" + + " WHERE shard_id = ? AND delta_shard_uuid = ?"; + String updateShardSqlEmptyDelta = "" + + "UPDATE shards SET \n" + + " delta_uuid = ?\n" + + " WHERE shard_id = ? AND delta_uuid IS NULL"; + String updateShardSql = "" + + "UPDATE shards SET \n" + + " delta_uuid = ?\n" + + " WHERE shard_id = ? AND delta_uuid = ?"; + this.updateShardEmptyDeltaStatement = connection.prepareStatement(updateShardSqlEmptyDelta); + this.updateIndexEmptyDeltaStatement = connection.prepareStatement(updateIndexSqlEmptyDelta); + this.updateShardStatement = connection.prepareStatement(updateShardSql); + this.updateIndexStatement = connection.prepareStatement(updateIndexSql); + } + + public void update(long oldId, Optional oldUuid, UUID newUuid) + throws SQLException + { + if (oldUuid.isPresent()) { + updateShardStatement.setBytes(1, uuidToBytes(newUuid)); + updateShardStatement.setLong(2, oldId); + updateShardStatement.setBytes(3, uuidToBytes(oldUuid.get())); + updateShardStatement.addBatch(); + + updateIndexStatement.setBytes(1, uuidToBytes(newUuid)); + updateIndexStatement.setLong(2, oldId); + updateIndexStatement.setBytes(3, uuidToBytes(oldUuid.get())); + updateIndexStatement.addBatch(); + } + else { + updateShardEmptyDeltaStatement.setBytes(1, uuidToBytes(newUuid)); + updateShardEmptyDeltaStatement.setLong(2, oldId); + updateShardEmptyDeltaStatement.addBatch(); + + updateIndexEmptyDeltaStatement.setBytes(1, uuidToBytes(newUuid)); + updateIndexEmptyDeltaStatement.setLong(2, oldId); + updateIndexEmptyDeltaStatement.addBatch(); + } + } + + public int execute() + throws SQLException + { + int shardsUpdatedCount = 0; + int indexUpdatedCount = 0; + shardsUpdatedCount += updatedCount(updateShardStatement.executeBatch()); + shardsUpdatedCount += updatedCount(updateShardEmptyDeltaStatement.executeBatch()); + indexUpdatedCount += updatedCount(updateIndexStatement.executeBatch()); + indexUpdatedCount += updatedCount(updateIndexEmptyDeltaStatement.executeBatch()); + return shardsUpdatedCount == indexUpdatedCount ? shardsUpdatedCount : -1; + } + + @Override + public void close() + throws SQLException + { + updateShardStatement.close(); + updateShardEmptyDeltaStatement.close(); + updateIndexStatement.close(); + updateIndexEmptyDeltaStatement.close(); + } +} diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/Table.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/Table.java index 8780df97643cd..53cd6203b610d 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/Table.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/Table.java @@ -35,6 +35,7 @@ public final class Table private final OptionalInt bucketCount; private final OptionalLong temporalColumnId; private final boolean organized; + private final boolean tableSupportsDeltaDelete; public Table( long tableId, @@ -42,7 +43,8 @@ public Table( Optional distributionName, OptionalInt bucketCount, OptionalLong temporalColumnId, - boolean organized) + boolean organized, + boolean tableSupportsDeltaDelete) { this.tableId = tableId; this.distributionId = requireNonNull(distributionId, "distributionId is null"); @@ -50,6 +52,7 @@ public Table( this.bucketCount = requireNonNull(bucketCount, "bucketCount is null"); this.temporalColumnId = requireNonNull(temporalColumnId, "temporalColumnId is null"); this.organized = organized; + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; } public long getTableId() @@ -82,6 +85,11 @@ public boolean isOrganized() return organized; } + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + @Override public String toString() { @@ -91,6 +99,7 @@ public String toString() .add("bucketCount", bucketCount.isPresent() ? bucketCount.getAsInt() : null) .add("temporalColumnId", temporalColumnId.isPresent() ? temporalColumnId.getAsLong() : null) .add("organized", organized) + .add("tableSupportsDeltaDelete", tableSupportsDeltaDelete) .omitNullValues() .toString(); } @@ -108,7 +117,8 @@ public Table map(int index, ResultSet r, StatementContext ctx) Optional.ofNullable(r.getString("distribution_name")), getOptionalInt(r, "bucket_count"), getOptionalLong(r, "temporal_column_id"), - r.getBoolean("organization_enabled")); + r.getBoolean("organization_enabled"), + r.getBoolean("table_supports_delta_delete")); } } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableMetadataRow.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableMetadataRow.java index 50f00161b6a36..6ef08bc887f20 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableMetadataRow.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableMetadataRow.java @@ -35,6 +35,7 @@ public class TableMetadataRow private final Optional distributionName; private final OptionalInt bucketCount; private final boolean organized; + private final boolean tableSupportsDeltaDelete; public TableMetadataRow( long tableId, @@ -43,7 +44,8 @@ public TableMetadataRow( OptionalLong temporalColumnId, Optional distributionName, OptionalInt bucketCount, - boolean organized) + boolean organized, + boolean tableSupportsDeltaDelete) { this.tableId = tableId; this.schemaName = requireNonNull(schemaName, "schemaName is null"); @@ -52,6 +54,7 @@ public TableMetadataRow( this.distributionName = requireNonNull(distributionName, "distributionName is null"); this.bucketCount = requireNonNull(bucketCount, "bucketCount is null"); this.organized = organized; + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; } public long getTableId() @@ -89,6 +92,11 @@ public boolean isOrganized() return organized; } + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + public static class Mapper implements ResultSetMapper { @@ -103,7 +111,8 @@ public TableMetadataRow map(int index, ResultSet rs, StatementContext context) getOptionalLong(rs, "temporal_column_id"), Optional.ofNullable(rs.getString("distribution_name")), getOptionalInt(rs, "bucket_count"), - rs.getBoolean("organization_enabled")); + rs.getBoolean("organization_enabled"), + rs.getBoolean("table_supports_delta_delete")); } } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableStatsRow.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableStatsRow.java index 92da694886b32..0a20f28e270be 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableStatsRow.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/metadata/TableStatsRow.java @@ -29,6 +29,7 @@ public class TableStatsRow private final long updateTime; private final long tableVersion; private final long shardCount; + private final long deltaCount; private final long rowCount; private final long compressedSize; private final long uncompressedSize; @@ -40,6 +41,7 @@ public TableStatsRow( long updateTime, long tableVersion, long shardCount, + long deltaCount, long rowCount, long compressedSize, long uncompressedSize) @@ -50,6 +52,7 @@ public TableStatsRow( this.updateTime = updateTime; this.tableVersion = tableVersion; this.shardCount = shardCount; + this.deltaCount = deltaCount; this.rowCount = rowCount; this.compressedSize = compressedSize; this.uncompressedSize = uncompressedSize; @@ -100,6 +103,11 @@ public long getUncompressedSize() return uncompressedSize; } + public long getDeltaCount() + { + return deltaCount; + } + public static class Mapper implements ResultSetMapper { @@ -114,6 +122,7 @@ public TableStatsRow map(int index, ResultSet rs, StatementContext context) rs.getLong("update_time"), rs.getLong("table_version"), rs.getLong("shard_count"), + rs.getLong("delta_count"), rs.getLong("row_count"), rs.getLong("compressed_size"), rs.getLong("uncompressed_size")); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/DeltaShardRewriter.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/DeltaShardRewriter.java new file mode 100644 index 0000000000000..e223da2cc7a56 --- /dev/null +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/DeltaShardRewriter.java @@ -0,0 +1,178 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.raptor.storage; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.orc.OrcDataSource; +import com.facebook.presto.orc.OrcFileTailSource; +import com.facebook.presto.orc.OrcReader; +import com.facebook.presto.raptor.filesystem.FileSystemContext; +import com.facebook.presto.raptor.metadata.ShardDeleteDelta; +import com.facebook.presto.raptor.metadata.ShardInfo; +import com.facebook.presto.spi.Page; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.block.LongArrayBlockBuilder; +import com.facebook.presto.spi.type.Type; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.apache.hadoop.fs.FileSystem; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static com.facebook.presto.orc.OrcEncoding.ORC; +import static com.facebook.presto.raptor.RaptorErrorCode.RAPTOR_ERROR; +import static com.facebook.presto.raptor.storage.OrcStorageManager.HUGE_MAX_READ_BLOCK_SIZE; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static java.lang.Math.toIntExact; +import static java.util.concurrent.CompletableFuture.completedFuture; +import static java.util.concurrent.CompletableFuture.supplyAsync; + +public class DeltaShardRewriter + implements ShardRewriter +{ + private static final JsonCodec SHARD_DELETE_DELTA_CODEC = jsonCodec(ShardDeleteDelta.class); + private final OrcStorageManager orcStorageManager; + private final FileSystemContext fileSystemContext; + private final FileSystem fileSystem; + private final OrcFileTailSource orcFileTailSource; + private final long transactionId; + private final OptionalInt bucketNumber; + private final UUID oldShardUuid; + private final Optional oldDeltaDeleteShardUuid; + private final ExecutorService deletionExecutor; + private final ReaderAttributes defaultReaderAttributes; + + public DeltaShardRewriter( + FileSystemContext fileSystemContext, + FileSystem fileSystem, + OrcFileTailSource orcFileTailSource, + long transactionId, + OptionalInt bucketNumber, + UUID oldShardUuid, + Optional oldDeltaDeleteShardUuid, + OrcStorageManager orcStorageManager, + ExecutorService deletionExecutor, + ReaderAttributes defaultReaderAttributes) + { + this.orcStorageManager = orcStorageManager; + this.fileSystemContext = fileSystemContext; + this.fileSystem = fileSystem; + this.orcFileTailSource = orcFileTailSource; + this.transactionId = transactionId; + this.bucketNumber = bucketNumber; + this.oldShardUuid = oldShardUuid; + this.oldDeltaDeleteShardUuid = oldDeltaDeleteShardUuid; + this.deletionExecutor = deletionExecutor; + this.defaultReaderAttributes = defaultReaderAttributes; + } + + @Override + public CompletableFuture> rewrite(BitSet rowsToDelete) + { + if (rowsToDelete.isEmpty()) { + return completedFuture(ImmutableList.of()); + } + return supplyAsync(() -> writeDeltaDeleteFile(rowsToDelete), deletionExecutor); + } + + @VisibleForTesting + Collection writeDeltaDeleteFile(BitSet rowsToDelete) + { + if (rowsToDelete.isEmpty()) { + return ImmutableList.of(); + } + // blockToDelete is LongArrayBlock + List columnIds = new ArrayList<>(); + List columnTypes = new ArrayList<>(); + columnIds.add(0L); + columnTypes.add(BIGINT); + + // TODO: Under current implementation, one block can only hold INT_MAX many rows + // which theoretically may not be enough to hold all rows from an ORC file. + // At this point, rowsToDelete couldn't be empty + oldDeltaDeleteShardUuid.ifPresent(oldDeltaDeleteShardUuid -> mergeToRowsToDelete(rowsToDelete, oldDeltaDeleteShardUuid)); + + if (rowsToDelete.cardinality() == getRowCount(oldShardUuid)) { + // Delete original file + return shardDeleteDelta(oldShardUuid, oldDeltaDeleteShardUuid, Optional.empty()); + } + + BlockBuilder blockBuilder = new LongArrayBlockBuilder(null, rowsToDelete.size()); + for (int i = rowsToDelete.nextSetBit(0); i >= 0; i = rowsToDelete.nextSetBit(i + 1)) { + blockBuilder.writeLong(i); + } + Block blockToDelete = blockBuilder.build(); + // TODO: a async call made by deletionExecutor, it calls into OrcPageSink which uses a different thread pool (i.e. commitExecutor) + // Better use consistent thread pool management + StoragePageSink pageSink = orcStorageManager.createStoragePageSink(fileSystemContext, transactionId, bucketNumber, columnIds, columnTypes, true); + pageSink.appendPages(ImmutableList.of(new Page(blockToDelete))); + List shardInfos = getFutureValue(pageSink.commit()); + // Guaranteed that shardInfos only has one element since we only call commit one time + ShardInfo newDeltaDeleteShard = Iterables.getOnlyElement(shardInfos); + return shardDeleteDelta(oldShardUuid, oldDeltaDeleteShardUuid, Optional.of(newDeltaDeleteShard)); + } + + // Note: This function will change rowsToDelete. + // Will merge the BitSet from oldDeltaDeleteShardUuid to rowsToDelete + // rowsToDelete and rowsDeleted must be mutually exclusive + private void mergeToRowsToDelete(BitSet rowsToDelete, UUID oldDeltaDeleteShardUuid) + { + Optional rowsDeleted = orcStorageManager.getRowsFromUuid(fileSystem, Optional.of(oldDeltaDeleteShardUuid)); + BitSet verify = new BitSet(); + verify.or(rowsToDelete); + verify.and(rowsDeleted.get()); + if (verify.cardinality() != 0) { + throw new PrestoException(RAPTOR_ERROR, "rowsToDelete and rowsDeleted are not mutually exclusive"); + } + if (rowsDeleted.isPresent()) { + rowsToDelete.or(rowsDeleted.get()); + } + } + + private int getRowCount(UUID oldShardUuid) + { + try (OrcDataSource dataSource = orcStorageManager.openShard(fileSystem, oldShardUuid, defaultReaderAttributes)) { + OrcReader reader = new OrcReader(dataSource, ORC, defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, orcFileTailSource); + if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) { + throw new IOException("File has too many rows"); + } + return toIntExact(reader.getFooter().getNumberOfRows()); + } + catch (Exception e) { + throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + oldShardUuid, e); + } + } + + private Collection shardDeleteDelta(UUID oldShardUuid, Optional oldDeltaDeleteShard, Optional newDeltaDeleteShard) + { + return ImmutableList.of(Slices.wrappedBuffer(SHARD_DELETE_DELTA_CODEC.toJsonBytes( + new ShardDeleteDelta(oldShardUuid, oldDeltaDeleteShard, newDeltaDeleteShard)))); + } +} diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/InplaceShardRewriter.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/InplaceShardRewriter.java new file mode 100644 index 0000000000000..11430bc07c742 --- /dev/null +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/InplaceShardRewriter.java @@ -0,0 +1,136 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.raptor.storage; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.raptor.backup.BackupManager; +import com.facebook.presto.raptor.metadata.ShardDelta; +import com.facebook.presto.raptor.metadata.ShardInfo; +import com.facebook.presto.raptor.metadata.ShardRecorder; +import com.facebook.presto.spi.type.Type; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.util.BitSet; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; +import static com.facebook.airlift.json.JsonCodec.jsonCodec; +import static java.util.concurrent.CompletableFuture.completedFuture; +import static java.util.concurrent.CompletableFuture.supplyAsync; + +public class InplaceShardRewriter + implements ShardRewriter +{ + private static final JsonCodec SHARD_DELTA_CODEC = jsonCodec(ShardDelta.class); + private final OrcStorageManager orcStorageManager; + private final StorageService storageService; + private final ShardRecorder shardRecorder; + private final BackupManager backupManager; + private final FileSystem fileSystem; + private final long transactionId; + private final OptionalInt bucketNumber; + private final UUID shardUuid; + private final Map columns; + private final ExecutorService deletionExecutor; + private final String nodeId; + + public InplaceShardRewriter( + FileSystem fileSystem, + long transactionId, + OptionalInt bucketNumber, + UUID shardUuid, + Map columns, + ExecutorService deletionExecutor, + String nodeId, + OrcStorageManager orcStorageManager, + StorageService storageService, + ShardRecorder shardRecorder, + BackupManager backupManager) + { + this.orcStorageManager = orcStorageManager; + this.storageService = storageService; + this.shardRecorder = shardRecorder; + this.backupManager = backupManager; + this.fileSystem = fileSystem; + this.transactionId = transactionId; + this.bucketNumber = bucketNumber; + this.shardUuid = shardUuid; + this.columns = columns; + this.deletionExecutor = deletionExecutor; + this.nodeId = nodeId; + } + + @Override + public CompletableFuture> rewrite(BitSet rowsToDelete) + { + if (rowsToDelete.isEmpty()) { + return completedFuture(ImmutableList.of()); + } + return supplyAsync(() -> rewriteShard(rowsToDelete), deletionExecutor); + } + + @VisibleForTesting + Collection rewriteShard(BitSet rowsToDelete) + { + if (rowsToDelete.isEmpty()) { + return ImmutableList.of(); + } + + UUID newShardUuid = UUID.randomUUID(); + Path input = storageService.getStorageFile(shardUuid); + Path output = storageService.getStagingFile(newShardUuid); + + OrcFileInfo info = orcStorageManager.rewriteFile(fileSystem, columns, input, output, rowsToDelete); + long rowCount = info.getRowCount(); + + if (rowCount == 0) { + return shardDelta(shardUuid, Optional.empty()); + } + + shardRecorder.recordCreatedShard(transactionId, newShardUuid); + + // submit for backup and wait until it finishes + getFutureValue(backupManager.submit(newShardUuid, output)); + + Set nodes = ImmutableSet.of(nodeId); + long uncompressedSize = info.getUncompressedSize(); + + ShardInfo shard = orcStorageManager.createShardInfo(fileSystem, newShardUuid, bucketNumber, output, nodes, rowCount, uncompressedSize); + + orcStorageManager.writeShard(newShardUuid); + + return shardDelta(shardUuid, Optional.of(shard)); + } + + private static Collection shardDelta(UUID oldShardUuid, Optional shardInfo) + { + List newShards = shardInfo.map(ImmutableList::of).orElse(ImmutableList.of()); + ShardDelta delta = new ShardDelta(ImmutableList.of(oldShardUuid), newShards); + return ImmutableList.of(Slices.wrappedBuffer(SHARD_DELTA_CODEC.toJsonBytes(delta))); + } +} diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcFileRewriter.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcFileRewriter.java index ce6201bbeaa45..c7ed489db9027 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcFileRewriter.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcFileRewriter.java @@ -227,7 +227,7 @@ private static OrcFileInfo rewrite( return new OrcFileInfo(rowCount, uncompressedSize); } - private static Page maskedPage(Block[] blocks, BitSet rowsToDelete, int start, int count) + public static Page maskedPage(Block[] blocks, BitSet rowsToDelete, int start, int count) { int[] ids = new int[count]; int size = 0; diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcPageSource.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcPageSource.java index 2825006054c08..470f5ab046690 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcPageSource.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcPageSource.java @@ -39,6 +39,7 @@ import static com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE; import static com.facebook.presto.raptor.RaptorErrorCode.RAPTOR_ERROR; +import static com.facebook.presto.raptor.storage.OrcFileRewriter.maskedPage; import static com.facebook.presto.spi.predicate.Utils.nativeValueToBlock; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.google.common.base.MoreObjects.toStringHelper; @@ -61,7 +62,9 @@ public class OrcPageSource private final OrcBatchRecordReader recordReader; private final OrcDataSource orcDataSource; + private final Optional rowsDeleted; private final BitSet rowsToDelete; + private final boolean tableSupportsDeltaDelete; private final List columnIds; private final List types; @@ -83,14 +86,18 @@ public OrcPageSource( List columnTypes, List columnIndexes, UUID shardUuid, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, - AggregatedMemoryContext systemMemoryContext) + AggregatedMemoryContext systemMemoryContext, + Optional rowsDeleted) { this.shardRewriter = requireNonNull(shardRewriter, "shardRewriter is null"); this.recordReader = requireNonNull(recordReader, "recordReader is null"); this.orcDataSource = requireNonNull(orcDataSource, "orcDataSource is null"); + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; this.rowsToDelete = new BitSet(toIntExact(recordReader.getFileRowCount())); + this.rowsDeleted = requireNonNull(rowsDeleted, "rowsDeleted is null"); checkArgument(columnIds.size() == columnTypes.size(), "ids and types mismatch"); checkArgument(columnIds.size() == columnIndexes.size(), "ids and indexes mismatch"); @@ -177,6 +184,10 @@ else if (columnIndexes[fieldId] == ROWID_COLUMN) { blocks[fieldId] = new LazyBlock(batchSize, new OrcBlockLoader(columnIndexes[fieldId], type)); } } + if (tableSupportsDeltaDelete && rowsDeleted.isPresent()) { + int row = toIntExact(filePosition); + return maskedPage(blocks, rowsDeleted.get(), row, batchSize); + } return new Page(batchSize, blocks); } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcStorageManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcStorageManager.java index 5ea4336c176c9..2db7c9c6cec80 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcStorageManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/OrcStorageManager.java @@ -33,7 +33,6 @@ import com.facebook.presto.raptor.filesystem.FileSystemContext; import com.facebook.presto.raptor.metadata.ColumnInfo; import com.facebook.presto.raptor.metadata.ColumnStats; -import com.facebook.presto.raptor.metadata.ShardDelta; import com.facebook.presto.raptor.metadata.ShardInfo; import com.facebook.presto.raptor.metadata.ShardRecorder; import com.facebook.presto.raptor.storage.StorageManagerConfig.OrcOptimizedWriterStage; @@ -41,6 +40,7 @@ import com.facebook.presto.spi.NodeManager; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.DecimalType; import com.facebook.presto.spi.type.NamedTypeSignature; @@ -54,8 +54,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import io.airlift.slice.Slice; -import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.Duration; import org.apache.hadoop.fs.FileSystem; @@ -69,7 +67,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; @@ -85,7 +82,6 @@ import java.util.concurrent.TimeoutException; import static com.facebook.airlift.concurrent.MoreFutures.allAsList; -import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; import static com.facebook.airlift.concurrent.Threads.daemonThreadsNamed; import static com.facebook.airlift.json.JsonCodec.jsonCodec; import static com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; @@ -118,10 +114,9 @@ import static com.google.common.base.Throwables.throwIfInstanceOf; import static io.airlift.units.DataSize.Unit.PETABYTE; import static java.lang.Math.min; +import static java.lang.Math.toIntExact; import static java.lang.String.format; import static java.util.Objects.requireNonNull; -import static java.util.concurrent.CompletableFuture.completedFuture; -import static java.util.concurrent.CompletableFuture.supplyAsync; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.Executors.newFixedThreadPool; import static java.util.stream.Collectors.toList; @@ -138,7 +133,6 @@ public class OrcStorageManager public static final DateTimeZone DEFAULT_STORAGE_TIMEZONE = UTC; // TODO: do not limit the max size of blocks to read for now; enable the limit when the Hive connector is ready public static final DataSize HUGE_MAX_READ_BLOCK_SIZE = new DataSize(1, PETABYTE); - private static final JsonCodec SHARD_DELTA_CODEC = jsonCodec(ShardDelta.class); private static final long MAX_ROWS = 1_000_000_000; private static final JsonCodec METADATA_CODEC = jsonCodec(OrcFileMetadata.class); @@ -254,6 +248,8 @@ public void shutdown() public ConnectorPageSource getPageSource( FileSystemContext fileSystemContext, UUID shardUuid, + Optional deltaShardUuid, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List columnIds, List columnTypes, @@ -297,10 +293,10 @@ public ConnectorPageSource getPageSource( Optional shardRewriter = Optional.empty(); if (transactionId.isPresent()) { checkState(allColumnTypes.isPresent()); - shardRewriter = Optional.of(createShardRewriter(fileSystem, transactionId.getAsLong(), bucketNumber, shardUuid, allColumnTypes.get())); + shardRewriter = Optional.of(createShardRewriter(fileSystemContext, fileSystem, orcFileTailSource, transactionId.getAsLong(), bucketNumber, shardUuid, deltaShardUuid, tableSupportsDeltaDelete, allColumnTypes.get())); } - - return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage); + Optional rowsDeleted = getRowsFromUuid(fileSystem, deltaShardUuid); + return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, tableSupportsDeltaDelete, bucketNumber, systemMemoryUsage, rowsDeleted); } catch (IOException | RuntimeException e) { closeQuietly(dataSource); @@ -312,6 +308,48 @@ public ConnectorPageSource getPageSource( } } + Optional getRowsFromUuid(FileSystem fileSystem, Optional deltaShardUuid) + { + if (!deltaShardUuid.isPresent()) { + return Optional.empty(); + } + try (OrcDataSource dataSource = openShard(fileSystem, deltaShardUuid.get(), defaultReaderAttributes)) { + AggregatedMemoryContext systemMemoryUsage = newSimpleAggregatedMemoryContext(); + OrcReader reader = new OrcReader( + dataSource, + ORC, + defaultReaderAttributes.getMaxMergeDistance(), + defaultReaderAttributes.getTinyStripeThreshold(), + HUGE_MAX_READ_BLOCK_SIZE, + orcFileTailSource); + if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) { + throw new IOException("File has too many rows"); + } + ImmutableMap.Builder includedColumns = ImmutableMap.builder(); + includedColumns.put(0, BIGINT); + OrcBatchRecordReader recordReader = reader.createBatchRecordReader( + includedColumns.build(), + OrcPredicate.TRUE, + DEFAULT_STORAGE_TIMEZONE, + systemMemoryUsage, + INITIAL_BATCH_SIZE); + BitSet bitSet = new BitSet(); + while (recordReader.nextBatch() > 0) { + Block block = recordReader.readBlock(BIGINT, 0); + for (int i = 0; i < block.getPositionCount(); i++) { + bitSet.set(toIntExact(block.getLong(i))); + } + } + return Optional.of(bitSet); + } + catch (IOException | RuntimeException e) { + throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + deltaShardUuid, e); + } + catch (Throwable t) { + throw t; + } + } + private static int toSpecialIndex(long columnId) { if (isShardRowIdColumn(columnId)) { @@ -341,17 +379,17 @@ public StoragePageSink createStoragePageSink( return new OrcStoragePageSink(orcDataEnvironment.getFileSystem(fileSystemContext), transactionId, columnIds, columnTypes, bucketNumber); } - private ShardRewriter createShardRewriter(FileSystem fileSystem, long transactionId, OptionalInt bucketNumber, UUID shardUuid, Map columns) + ShardRewriter createShardRewriter(FileSystemContext fileSystemContext, FileSystem fileSystem, OrcFileTailSource orcFileTailSource, long transactionId, OptionalInt bucketNumber, UUID shardUuid, Optional deltaShardUuid, boolean tableSupportsDeltaDelete, Map columns) { - return rowsToDelete -> { - if (rowsToDelete.isEmpty()) { - return completedFuture(ImmutableList.of()); - } - return supplyAsync(() -> rewriteShard(fileSystem, transactionId, bucketNumber, shardUuid, columns, rowsToDelete), deletionExecutor); - }; + if (tableSupportsDeltaDelete) { + return new DeltaShardRewriter(fileSystemContext, fileSystem, orcFileTailSource, transactionId, bucketNumber, shardUuid, deltaShardUuid, this, deletionExecutor, defaultReaderAttributes); + } + else { + return new InplaceShardRewriter(fileSystem, transactionId, bucketNumber, shardUuid, columns, deletionExecutor, nodeId, this, storageService, shardRecorder, backupManager); + } } - private void writeShard(UUID shardUuid) + void writeShard(UUID shardUuid) { if (backupStore.isPresent() && !backupStore.get().shardExists(shardUuid)) { throw new PrestoException(RAPTOR_ERROR, "Backup does not exist after write"); @@ -401,7 +439,7 @@ OrcDataSource openShard(FileSystem fileSystem, UUID shardUuid, ReaderAttributes } } - private ShardInfo createShardInfo(FileSystem fileSystem, UUID shardUuid, OptionalInt bucketNumber, Path file, Set nodes, long rowCount, long uncompressedSize) + ShardInfo createShardInfo(FileSystem fileSystem, UUID shardUuid, OptionalInt bucketNumber, Path file, Set nodes, long rowCount, long uncompressedSize) { try { return new ShardInfo(shardUuid, bucketNumber, nodes, computeShardStats(fileSystem, file), rowCount, fileSystem.getFileStatus(file).getLen(), uncompressedSize, xxhash64(fileSystem, file)); @@ -432,47 +470,7 @@ private List computeShardStats(FileSystem fileSystem, Path file) } } - @VisibleForTesting - Collection rewriteShard(FileSystem fileSystem, long transactionId, OptionalInt bucketNumber, UUID shardUuid, Map columns, BitSet rowsToDelete) - { - if (rowsToDelete.isEmpty()) { - return ImmutableList.of(); - } - - UUID newShardUuid = UUID.randomUUID(); - Path input = storageService.getStorageFile(shardUuid); - Path output = storageService.getStagingFile(newShardUuid); - - OrcFileInfo info = rewriteFile(fileSystem, columns, input, output, rowsToDelete); - long rowCount = info.getRowCount(); - - if (rowCount == 0) { - return shardDelta(shardUuid, Optional.empty()); - } - - shardRecorder.recordCreatedShard(transactionId, newShardUuid); - - // submit for backup and wait until it finishes - getFutureValue(backupManager.submit(newShardUuid, output)); - - Set nodes = ImmutableSet.of(nodeId); - long uncompressedSize = info.getUncompressedSize(); - - ShardInfo shard = createShardInfo(fileSystem, newShardUuid, bucketNumber, output, nodes, rowCount, uncompressedSize); - - writeShard(newShardUuid); - - return shardDelta(shardUuid, Optional.of(shard)); - } - - private static Collection shardDelta(UUID oldShardUuid, Optional shardInfo) - { - List newShards = shardInfo.map(ImmutableList::of).orElse(ImmutableList.of()); - ShardDelta delta = new ShardDelta(ImmutableList.of(oldShardUuid), newShards); - return ImmutableList.of(Slices.wrappedBuffer(SHARD_DELTA_CODEC.toJsonBytes(delta))); - } - - private OrcFileInfo rewriteFile(FileSystem fileSystem, Map columns, Path input, Path output, BitSet rowsToDelete) + OrcFileInfo rewriteFile(FileSystem fileSystem, Map columns, Path input, Path output, BitSet rowsToDelete) { try { return fileRewriter.rewrite(fileSystem, columns, input, output, rowsToDelete); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/ShardEjector.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/ShardEjector.java index 3b2ec0c97d219..299992b1003e1 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/ShardEjector.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/ShardEjector.java @@ -227,6 +227,7 @@ void process() ShardMetadata shard = queue.remove(); long shardSize = shard.getCompressedSize(); UUID shardUuid = shard.getShardUuid(); + Optional deltaUuid = shard.getDeltaUuid(); // verify backup exists if (!backupStore.get().shardExists(shardUuid)) { @@ -249,8 +250,10 @@ void process() nodes.put(target, targetSize + shardSize); nodeSize -= shardSize; - // move assignment - shardManager.replaceShardAssignment(shard.getTableId(), shardUuid, target, false); + if (!shard.isDelta()) { + // move assignment + shardManager.replaceShardAssignment(shard.getTableId(), shardUuid, deltaUuid, target, false); + } // delete local file Path file = storageService.getStorageFile(shardUuid); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManager.java index f2147cd285a38..990b0660adfb5 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManager.java @@ -31,18 +31,22 @@ public interface StorageManager default ConnectorPageSource getPageSource( FileSystemContext fileSystemContext, UUID shardUuid, + Optional deltaShardUuid, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List columnIds, List columnTypes, TupleDomain effectivePredicate, ReaderAttributes readerAttributes) { - return getPageSource(fileSystemContext, shardUuid, bucketNumber, columnIds, columnTypes, effectivePredicate, readerAttributes, OptionalLong.empty(), Optional.empty()); + return getPageSource(fileSystemContext, shardUuid, deltaShardUuid, tableSupportsDeltaDelete, bucketNumber, columnIds, columnTypes, effectivePredicate, readerAttributes, OptionalLong.empty(), Optional.empty()); } ConnectorPageSource getPageSource( FileSystemContext fileSystemContext, UUID shardUuid, + Optional deltaShardUuid, + boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List columnIds, List columnTypes, diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManagerConfig.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManagerConfig.java index 01fc6600f4b4e..7068708fa5815 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManagerConfig.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/StorageManagerConfig.java @@ -63,6 +63,7 @@ public class StorageManagerConfig private int recoveryThreads = 10; private int organizationThreads = 5; private boolean organizationEnabled = true; + private boolean tableSupportsDeltaDelete; private Duration organizationDiscoveryInterval = new Duration(6, TimeUnit.HOURS); private Duration organizationInterval = new Duration(7, TimeUnit.DAYS); @@ -73,6 +74,20 @@ public class StorageManagerConfig private String shardDayBoundaryTimeZone = TimeZoneKey.UTC_KEY.getId(); private int maxAllowedFilesPerWriter = Integer.MAX_VALUE; + @NotNull + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + + @Config("storage.table-supports-delta-delete-default") + @ConfigDescription("Tables's default tableSupportsDeltaDelete property when creating table") + public StorageManagerConfig setTableSupportsDeltaDelete(boolean tableSupportsDeltaDelete) + { + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; + return this; + } + @NotNull public URI getDataDirectory() { diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/CompactionSetCreator.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/CompactionSetCreator.java index b0014114dff7e..d47846b67b984 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/CompactionSetCreator.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/CompactionSetCreator.java @@ -77,7 +77,7 @@ private Set buildCompactionSets(Table tableInfo, Set shardsToCompact = builder.build(); if (shardsToCompact.size() > 1) { - compactionSets.add(createOrganizationSet(tableId, shardsToCompact)); + compactionSets.add(createOrganizationSet(tableId, tableInfo.isTableSupportsDeltaDelete(), shardsToCompact)); } builder = ImmutableSet.builder(); @@ -92,7 +92,7 @@ private Set buildCompactionSets(Table tableInfo, Set shardsToCompact = builder.build(); if (shardsToCompact.size() > 1) { - compactionSets.add(createOrganizationSet(tableId, shardsToCompact)); + compactionSets.add(createOrganizationSet(tableId, tableInfo.isTableSupportsDeltaDelete(), shardsToCompact)); } return compactionSets.build(); } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationJob.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationJob.java index 3dfbc7449ec58..099e509a61277 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationJob.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationJob.java @@ -24,9 +24,10 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; -import java.util.Set; import java.util.UUID; import static com.facebook.presto.spi.block.SortOrder.ASC_NULLS_FIRST; @@ -56,19 +57,19 @@ public OrganizationJob(OrganizationSet organizationSet, MetadataDao metadataDao, public void run() { try { - runJob(organizationSet.getTableId(), organizationSet.getBucketNumber(), organizationSet.getShards()); + runJob(organizationSet.getTableId(), organizationSet.isTableSupportsDeltaDelete(), organizationSet.getBucketNumber(), organizationSet.getShardsMap()); } catch (IOException e) { throw new UncheckedIOException(e); } } - private void runJob(long tableId, OptionalInt bucketNumber, Set shardUuids) + private void runJob(long tableId, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, Map> shardUuidsMap) throws IOException { long transactionId = shardManager.beginTransaction(); try { - runJob(transactionId, bucketNumber, tableId, shardUuids); + runJob(transactionId, tableSupportsDeltaDelete, bucketNumber, tableId, shardUuidsMap); } catch (Throwable e) { shardManager.rollbackTransaction(transactionId); @@ -76,13 +77,13 @@ private void runJob(long tableId, OptionalInt bucketNumber, Set shardUuids } } - private void runJob(long transactionId, OptionalInt bucketNumber, long tableId, Set shardUuids) + private void runJob(long transactionId, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, long tableId, Map> shardUuidsMap) throws IOException { TableMetadata metadata = getTableMetadata(tableId); - List newShards = performCompaction(transactionId, bucketNumber, shardUuids, metadata); - log.info("Compacted shards %s into %s", shardUuids, newShards.stream().map(ShardInfo::getShardUuid).collect(toList())); - shardManager.replaceShardUuids(transactionId, tableId, metadata.getColumns(), shardUuids, newShards, OptionalLong.empty()); + List newShards = performCompaction(transactionId, bucketNumber, shardUuidsMap, metadata); + log.info("Compacted shards %s into %s", shardUuidsMap, newShards.stream().map(ShardInfo::getShardUuid).collect(toList())); + shardManager.replaceShardUuids(transactionId, tableSupportsDeltaDelete, tableId, metadata.getColumns(), shardUuidsMap, newShards, OptionalLong.empty()); } private TableMetadata getTableMetadata(long tableId) @@ -99,16 +100,16 @@ private TableMetadata getTableMetadata(long tableId) return new TableMetadata(tableId, columns, sortColumnIds); } - private List performCompaction(long transactionId, OptionalInt bucketNumber, Set shardUuids, TableMetadata tableMetadata) + private List performCompaction(long transactionId, OptionalInt bucketNumber, Map> shardUuidsMap, TableMetadata tableMetadata) throws IOException { if (tableMetadata.getSortColumnIds().isEmpty()) { - return compactor.compact(transactionId, bucketNumber, shardUuids, tableMetadata.getColumns()); + return compactor.compact(transactionId, bucketNumber, shardUuidsMap, tableMetadata.getColumns()); } return compactor.compactSorted( transactionId, bucketNumber, - shardUuids, + shardUuidsMap, tableMetadata.getColumns(), tableMetadata.getSortColumnIds(), nCopies(tableMetadata.getSortColumnIds().size(), ASC_NULLS_FIRST)); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationSet.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationSet.java index c85f2ba0a5de8..55e0f41c677f3 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationSet.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/OrganizationSet.java @@ -13,7 +13,9 @@ */ package com.facebook.presto.raptor.storage.organization; +import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.UUID; @@ -24,13 +26,15 @@ public class OrganizationSet { private final long tableId; - private final Set shards; + private final boolean tableSupportsDeltaDelete; + private final Map> shardsMap; private final OptionalInt bucketNumber; - public OrganizationSet(long tableId, Set shards, OptionalInt bucketNumber) + public OrganizationSet(long tableId, boolean tableSupportsDeltaDelete, Map> shardsMap, OptionalInt bucketNumber) { this.tableId = tableId; - this.shards = requireNonNull(shards, "shards is null"); + this.tableSupportsDeltaDelete = tableSupportsDeltaDelete; + this.shardsMap = requireNonNull(shardsMap, "shards is null"); this.bucketNumber = requireNonNull(bucketNumber, "bucketNumber is null"); } @@ -39,9 +43,19 @@ public long getTableId() return tableId; } + public boolean isTableSupportsDeltaDelete() + { + return tableSupportsDeltaDelete; + } + + public Map> getShardsMap() + { + return shardsMap; + } + public Set getShards() { - return shards; + return shardsMap.keySet(); } public OptionalInt getBucketNumber() @@ -60,14 +74,15 @@ public boolean equals(Object o) } OrganizationSet that = (OrganizationSet) o; return tableId == that.tableId && - Objects.equals(shards, that.shards) && + tableSupportsDeltaDelete == that.tableSupportsDeltaDelete && + Objects.equals(shardsMap, that.shardsMap) && Objects.equals(bucketNumber, that.bucketNumber); } @Override public int hashCode() { - return Objects.hash(tableId, shards, bucketNumber); + return Objects.hash(tableId, tableSupportsDeltaDelete, shardsMap, bucketNumber); } @Override @@ -75,7 +90,8 @@ public String toString() { return toStringHelper(this) .add("tableId", tableId) - .add("shards", shards) + .add("tableSupportsDeltaDelete", tableSupportsDeltaDelete) + .add("shards", shardsMap) .add("bucketNumber", bucketNumber.isPresent() ? bucketNumber.getAsInt() : null) .omitNullValues() .toString(); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactionManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactionManager.java index 651801c571a2f..2577e231e707e 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactionManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactionManager.java @@ -199,6 +199,7 @@ private Collection filterAndCreateCompactionSets(long tableId, } Set filteredShards = tableShards.stream() + .filter(shard -> !shard.isDelta()) .filter(this::needsCompaction) .filter(shard -> !organizer.inProgress(shard.getShardUuid())) .collect(toSet()); @@ -232,6 +233,10 @@ private boolean needsCompaction(ShardMetadata shard) if (shard.getRowCount() < (FILL_FACTOR * maxShardRows)) { return true; } + + if (shard.getDeltaUuid().isPresent()) { + return true; + } return false; } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactor.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactor.java index 0ceebd0c76668..81493f2875bdb 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactor.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardCompactor.java @@ -37,15 +37,17 @@ import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.NoSuchElementException; +import java.util.Optional; import java.util.OptionalInt; import java.util.PriorityQueue; import java.util.Queue; -import java.util.Set; import java.util.UUID; import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.airlift.units.Duration.nanosSince; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; @@ -69,7 +71,7 @@ public ShardCompactor(StorageManager storageManager, ReaderAttributes readerAttr this.readerAttributes = requireNonNull(readerAttributes, "readerAttributes is null"); } - public List compact(long transactionId, OptionalInt bucketNumber, Set uuids, List columns) + public List compact(long transactionId, OptionalInt bucketNumber, Map> uuidsMap, List columns) throws IOException { long start = System.nanoTime(); @@ -80,22 +82,26 @@ public List compact(long transactionId, OptionalInt bucketNumber, Set List shardInfos; try { - shardInfos = compact(storagePageSink, bucketNumber, uuids, columnIds, columnTypes); + shardInfos = compact(storagePageSink, bucketNumber, uuidsMap, columnIds, columnTypes); } catch (IOException | RuntimeException e) { storagePageSink.rollback(); throw e; } - updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis()); + // Will consider delta shard as part of inputShards + int deltaCount = uuidsMap.values().stream().filter(uuid -> uuid.isPresent()).collect(toImmutableSet()).size(); + updateStats(uuidsMap.size() + deltaCount, shardInfos.size(), nanosSince(start).toMillis()); return shardInfos; } - private List compact(StoragePageSink storagePageSink, OptionalInt bucketNumber, Set uuids, List columnIds, List columnTypes) + private List compact(StoragePageSink storagePageSink, OptionalInt bucketNumber, Map> uuidsMap, List columnIds, List columnTypes) throws IOException { - for (UUID uuid : uuids) { - try (ConnectorPageSource pageSource = storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes)) { + for (Map.Entry> entry : uuidsMap.entrySet()) { + UUID uuid = entry.getKey(); + Optional deltaUuid = entry.getValue(); + try (ConnectorPageSource pageSource = storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, deltaUuid, true, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes)) { while (!pageSource.isFinished()) { Page page = pageSource.getNextPage(); if (isNullOrEmptyPage(page)) { @@ -111,7 +117,7 @@ private List compact(StoragePageSink storagePageSink, OptionalInt buc return getFutureValue(storagePageSink.commit()); } - public List compactSorted(long transactionId, OptionalInt bucketNumber, Set uuids, List columns, List sortColumnIds, List sortOrders) + public List compactSorted(long transactionId, OptionalInt bucketNumber, Map> uuidsMap, List columns, List sortColumnIds, List sortOrders) throws IOException { checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size"); @@ -130,11 +136,12 @@ public List compactSorted(long transactionId, OptionalInt bucketNumbe Queue rowSources = new PriorityQueue<>(); StoragePageSink outputPageSink = storageManager.createStoragePageSink(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, transactionId, bucketNumber, columnIds, columnTypes, false); try { - for (UUID uuid : uuids) { - ConnectorPageSource pageSource = storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes); + uuidsMap.forEach((uuid, deltaUuid) -> { + ConnectorPageSource pageSource = storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, deltaUuid, false, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes); SortedPageSource rowSource = new SortedPageSource(pageSource, columnTypes, sortIndexes, sortOrders); rowSources.add(rowSource); - } + }); + while (!rowSources.isEmpty()) { SortedPageSource rowSource = rowSources.poll(); if (!rowSource.hasNext()) { @@ -154,7 +161,9 @@ public List compactSorted(long transactionId, OptionalInt bucketNumbe outputPageSink.flush(); List shardInfos = getFutureValue(outputPageSink.commit()); - updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis()); + // Will consider delta shard as part of inputShards + int deltaCount = uuidsMap.values().stream().filter(uuid -> uuid.isPresent()).collect(toImmutableSet()).size(); + updateStats(uuidsMap.size() + deltaCount, shardInfos.size(), nanosSince(start).toMillis()); return shardInfos; } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardIndexInfo.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardIndexInfo.java index a875f3009da61..8e235d020a36f 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardIndexInfo.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardIndexInfo.java @@ -26,6 +26,8 @@ public class ShardIndexInfo private final long tableId; private final OptionalInt bucketNumber; private final UUID shardUuid; + private final boolean isDelta; + private final Optional deltaUuid; private final long rowCount; private final long uncompressedSize; private final Optional sortRange; @@ -35,6 +37,8 @@ public ShardIndexInfo( long tableId, OptionalInt bucketNumber, UUID shardUuid, + boolean isDelta, + Optional deltaUuid, long rowCount, long uncompressedSize, Optional sortRange, @@ -43,6 +47,8 @@ public ShardIndexInfo( this.tableId = tableId; this.bucketNumber = requireNonNull(bucketNumber, "bucketNumber is null"); this.shardUuid = requireNonNull(shardUuid, "shardUuid is null"); + this.isDelta = isDelta; + this.deltaUuid = requireNonNull(deltaUuid, "deltaUuid is null"); this.rowCount = rowCount; this.uncompressedSize = uncompressedSize; this.sortRange = requireNonNull(sortRange, "sortRange is null"); @@ -64,6 +70,16 @@ public UUID getShardUuid() return shardUuid; } + public boolean isDelta() + { + return isDelta; + } + + public Optional getDeltaUuid() + { + return deltaUuid; + } + public long getRowCount() { return rowCount; @@ -97,6 +113,8 @@ public boolean equals(Object o) return tableId == that.tableId && rowCount == that.rowCount && uncompressedSize == that.uncompressedSize && + deltaUuid == that.deltaUuid && + Objects.equals(deltaUuid, that.deltaUuid) && Objects.equals(bucketNumber, that.bucketNumber) && Objects.equals(shardUuid, that.shardUuid) && Objects.equals(sortRange, that.sortRange) && @@ -106,7 +124,7 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(tableId, bucketNumber, shardUuid, rowCount, uncompressedSize, sortRange, temporalRange); + return Objects.hash(tableId, bucketNumber, shardUuid, rowCount, isDelta, deltaUuid, uncompressedSize, sortRange, temporalRange); } @Override @@ -116,6 +134,8 @@ public String toString() .add("tableId", tableId) .add("bucketNumber", bucketNumber.isPresent() ? bucketNumber.getAsInt() : null) .add("shardUuid", shardUuid) + .add("isDelta", isDelta) + .add("deltaUuid", deltaUuid.isPresent() ? deltaUuid.get() : null) .add("rowCount", rowCount) .add("uncompressedSize", uncompressedSize) .add("sortRange", sortRange.orElse(null)) diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizationManager.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizationManager.java index e9208d76e9f02..f6985517de7cb 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizationManager.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizationManager.java @@ -280,7 +280,7 @@ private static Set getOverlappingOrganizationSets(Table tableIn else { Set indexInfos = builder.build(); if (indexInfos.size() > 1) { - organizationSets.add(createOrganizationSet(tableInfo.getTableId(), indexInfos)); + organizationSets.add(createOrganizationSet(tableInfo.getTableId(), tableInfo.isTableSupportsDeltaDelete(), indexInfos)); } builder = ImmutableSet.builder(); previousRange = nextRange; @@ -291,7 +291,7 @@ private static Set getOverlappingOrganizationSets(Table tableIn Set indexInfos = builder.build(); if (indexInfos.size() > 1) { - organizationSets.add(createOrganizationSet(tableInfo.getTableId(), indexInfos)); + organizationSets.add(createOrganizationSet(tableInfo.getTableId(), tableInfo.isTableSupportsDeltaDelete(), indexInfos)); } return organizationSets; } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizer.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizer.java index e85fed5722119..b7084b76e8765 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizer.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizer.java @@ -23,16 +23,17 @@ import javax.annotation.PreDestroy; import javax.inject.Inject; -import java.util.Set; +import java.util.Map; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicBoolean; import static com.facebook.airlift.concurrent.Threads.daemonThreadsNamed; import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.Sets.newConcurrentHashSet; import static java.util.Objects.requireNonNull; import static java.util.concurrent.CompletableFuture.runAsync; import static java.util.concurrent.Executors.newFixedThreadPool; @@ -47,7 +48,7 @@ public class ShardOrganizer private final AtomicBoolean shutdown = new AtomicBoolean(); // Tracks shards that are scheduled for compaction so that we do not schedule them more than once - private final Set shardsInProgress = newConcurrentHashSet(); + private final Map> shardsInProgress = new ConcurrentHashMap<>(); private final JobFactory jobFactory; private final CounterStat successCount = new CounterStat(); private final CounterStat failureCount = new CounterStat(); @@ -76,10 +77,12 @@ public void shutdown() public CompletableFuture enqueue(OrganizationSet organizationSet) { - shardsInProgress.addAll(organizationSet.getShards()); + shardsInProgress.putAll(organizationSet.getShardsMap()); return runAsync(jobFactory.create(organizationSet), executorService) .whenComplete((none, throwable) -> { - shardsInProgress.removeAll(organizationSet.getShards()); + for (UUID uuid : organizationSet.getShardsMap().keySet()) { + shardsInProgress.remove(uuid); + } if (throwable == null) { successCount.update(1); } @@ -92,7 +95,7 @@ public CompletableFuture enqueue(OrganizationSet organizationSet) public boolean inProgress(UUID shardUuid) { - return shardsInProgress.contains(shardUuid); + return shardsInProgress.keySet().contains(shardUuid); } @Managed diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizerUtil.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizerUtil.java index 8826fee7d2eee..e5625ef458710 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizerUtil.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/storage/organization/ShardOrganizerUtil.java @@ -42,6 +42,7 @@ import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; import static com.facebook.presto.raptor.storage.ColumnIndexStatsUtils.jdbcType; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Iterables.partition; import static com.google.common.collect.Maps.uniqueIndex; @@ -137,6 +138,8 @@ private static ShardIndexInfo toShardIndexInfo(ShardMetadata shardMetadata, Opti shardMetadata.getTableId(), shardMetadata.getBucketNumber(), shardMetadata.getShardUuid(), + shardMetadata.isDelta(), + shardMetadata.getDeltaUuid(), shardMetadata.getRowCount(), shardMetadata.getUncompressedSize(), sortRange, @@ -238,17 +241,16 @@ private static Object getValue(ResultSet resultSet, Type type, String columnName throw new IllegalArgumentException("Unhandled type: " + type); } - static OrganizationSet createOrganizationSet(long tableId, Set shardsToCompact) + static OrganizationSet createOrganizationSet(long tableId, boolean tableSupportsDeltaDelete, Set shardsToCompact) { - Set uuids = shardsToCompact.stream() - .map(ShardIndexInfo::getShardUuid) - .collect(toSet()); + Map> uuidsMap = shardsToCompact.stream() + .collect(toImmutableMap(ShardIndexInfo::getShardUuid, ShardIndexInfo::getDeltaUuid)); Set bucketNumber = shardsToCompact.stream() .map(ShardIndexInfo::getBucketNumber) .collect(toSet()); checkArgument(bucketNumber.size() == 1); - return new OrganizationSet(tableId, uuids, getOnlyElement(bucketNumber)); + return new OrganizationSet(tableId, tableSupportsDeltaDelete, uuidsMap, getOnlyElement(bucketNumber)); } } diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableMetadataSystemTable.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableMetadataSystemTable.java index d00390b97d3cf..ae46c726216d6 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableMetadataSystemTable.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableMetadataSystemTable.java @@ -89,7 +89,8 @@ public TableMetadataSystemTable(@ForMetadata IDBI dbi, TypeManager typeManager) new ColumnMetadata("distribution_name", VARCHAR), new ColumnMetadata("bucket_count", BIGINT), new ColumnMetadata("bucketing_columns", arrayOfVarchar), - new ColumnMetadata("organized", BOOLEAN))); + new ColumnMetadata("organized", BOOLEAN), + new ColumnMetadata("table_supports_delta_delete", BOOLEAN))); } @Override @@ -190,6 +191,9 @@ private static List buildPages(MetadataDao dao, ConnectorTableMetadata tab // organized BOOLEAN.writeBoolean(pageBuilder.nextBlockBuilder(), tableRow.isOrganized()); + + // delta delete enabled + BOOLEAN.writeBoolean(pageBuilder.nextBlockBuilder(), tableRow.isTableSupportsDeltaDelete()); } return pageBuilder.build(); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableStatsSystemTable.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableStatsSystemTable.java index 8df1bb55311e9..dd4da4c8a3336 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableStatsSystemTable.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/systemtables/TableStatsSystemTable.java @@ -63,6 +63,7 @@ public class TableStatsSystemTable .add(new ColumnMetadata("update_time", TIMESTAMP)) .add(new ColumnMetadata("table_version", BIGINT)) .add(new ColumnMetadata("shard_count", BIGINT)) + .add(new ColumnMetadata("delta_count", BIGINT)) .add(new ColumnMetadata("row_count", BIGINT)) .add(new ColumnMetadata("compressed_size", BIGINT)) .add(new ColumnMetadata("uncompressed_size", BIGINT)) @@ -112,6 +113,7 @@ private static List buildPages(MetadataDao dao, TupleDomain tuple TIMESTAMP.writeLong(pageBuilder.nextBlockBuilder(), row.getUpdateTime()); BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getTableVersion()); BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getShardCount()); + BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getDeltaCount()); BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getRowCount()); BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getCompressedSize()); BIGINT.writeLong(pageBuilder.nextBlockBuilder(), row.getUncompressedSize()); diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/TestRaptorConnector.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/TestRaptorConnector.java index cd319c6c68170..fb1dc44a77d77 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/TestRaptorConnector.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/TestRaptorConnector.java @@ -58,6 +58,7 @@ import java.util.Collection; import java.util.Optional; +import static com.facebook.presto.raptor.RaptorTableProperties.TABLE_SUPPORTS_DELTA_DELETE; import static com.facebook.presto.raptor.RaptorTableProperties.TEMPORAL_COLUMN_PROPERTY; import static com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry; import static com.facebook.presto.raptor.metadata.TestDatabaseShardManager.createShardManager; @@ -115,7 +116,7 @@ public void setup() config), new RaptorNodePartitioningProvider(nodeSupplier), new RaptorSessionProperties(config), - new RaptorTableProperties(typeRegistry), + new RaptorTableProperties(typeRegistry, config), ImmutableSet.of(), new AllowAllAccessControl(), dbi, @@ -234,7 +235,7 @@ private void assertSplitShard(Type temporalType, String min, String max, String new ConnectorTableMetadata( new SchemaTableName("test", "test"), ImmutableList.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("time", temporalType)), - ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "time")), + ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "time", TABLE_SUPPORTS_DELTA_DELETE, false)), false); connector.commit(transaction); @@ -275,7 +276,8 @@ private long createTable(String name) SESSION, new ConnectorTableMetadata( new SchemaTableName("test", name), - ImmutableList.of(new ColumnMetadata("id", BIGINT))), + ImmutableList.of(new ColumnMetadata("id", BIGINT)), + ImmutableMap.of(TABLE_SUPPORTS_DELTA_DELETE, false)), false); connector.commit(transaction); diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/integration/TestRaptorIntegrationSmokeTest.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/integration/TestRaptorIntegrationSmokeTest.java index 6e191dbe56835..a269da1430571 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/integration/TestRaptorIntegrationSmokeTest.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/integration/TestRaptorIntegrationSmokeTest.java @@ -374,6 +374,8 @@ public void testTableProperties() { computeActual("CREATE TABLE test_table_properties_1 (foo BIGINT, bar BIGINT, ds DATE) WITH (ordering=array['foo','bar'], temporal_column='ds')"); computeActual("CREATE TABLE test_table_properties_2 (foo BIGINT, bar BIGINT, ds DATE) WITH (ORDERING=array['foo','bar'], TEMPORAL_COLUMN='ds')"); + computeActual("CREATE TABLE test_table_properties_3 (foo BIGINT, bar BIGINT, ds DATE) WITH (TABLE_SUPPORTS_DELTA_DELETE=false)"); + computeActual("CREATE TABLE test_table_properties_4 (foo BIGINT, bar BIGINT, ds DATE) WITH (table_supports_delta_delete=true)"); } @Test @@ -589,6 +591,7 @@ public void testShowCreateTable() " bucket_count = 32,\n" + " bucketed_on = ARRAY['c1','c6'],\n" + " ordering = ARRAY['c6','c1'],\n" + + " table_supports_delta_delete = false,\n" + " temporal_column = 'c7'\n" + ")", getSession().getCatalog().get(), getSession().getSchema().get(), "test_show_create_table"); @@ -618,7 +621,8 @@ public void testShowCreateTable() " bucket_count = 32,\n" + " bucketed_on = ARRAY['c1','c6'],\n" + " ordering = ARRAY['c6','c1'],\n" + - " organized = true\n" + + " organized = true,\n" + + " table_supports_delta_delete = true\n" + ")", getSession().getCatalog().get(), getSession().getSchema().get(), "test_show_create_table_organized"); assertUpdate(createTableSql); @@ -632,6 +636,8 @@ public void testShowCreateTable() actualResult = computeActual("SHOW CREATE TABLE " + getSession().getCatalog().get() + "." + getSession().getSchema().get() + ".test_show_create_table_organized"); assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql); + // For table_supports_delta_delete, we don't have null value, thus can't distinguish between user set false and default false + // As a result, will always show table_supports_delta_delete properties when `SHOW CREATE TABLE` createTableSql = format("" + "CREATE TABLE %s.%s.%s (\n" + " \"c\"\"1\" bigint,\n" + @@ -640,10 +646,14 @@ public void testShowCreateTable() " \"c'4\" array(bigint),\n" + " c5 map(bigint, varchar)\n" + ")", - getSession().getCatalog().get(), getSession().getSchema().get(), "\"test_show_create_table\"\"2\""); + getSession().getCatalog().get(), getSession().getSchema().get(), "test_show_create_table_default"); assertUpdate(createTableSql); - actualResult = computeActual("SHOW CREATE TABLE \"test_show_create_table\"\"2\""); + actualResult = computeActual("SHOW CREATE TABLE \"test_show_create_table_default\""); + createTableSql += format("\n" + + "WITH (\n" + + " table_supports_delta_delete = false\n" + + ")"); assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql); } @@ -667,6 +677,9 @@ public void testTablesSystemTable() assertUpdate("" + "CREATE TABLE system_tables_test5 (c50 timestamp, c51 varchar, c52 double, c53 bigint, c54 bigint) " + "WITH (ordering = ARRAY['c51', 'c52'], distribution_name = 'test_distribution', bucket_count = 50, bucketed_on = ARRAY ['c53', 'c54'], organized = true)"); + assertUpdate("" + + "CREATE TABLE system_tables_test6 (c60 timestamp, c61 varchar, c62 double, c63 bigint, c64 bigint) " + + "WITH (ordering = ARRAY['c61', 'c62'], distribution_name = 'test_distribution', bucket_count = 50, bucketed_on = ARRAY ['c63', 'c64'], organized = true, table_supports_delta_delete = true)"); MaterializedResult actualResults = computeActual("SELECT * FROM system.tables"); assertEquals( @@ -680,35 +693,39 @@ public void testTablesSystemTable() .add(BIGINT) // bucket_count .add(new ArrayType(VARCHAR)) // bucket_columns .add(BOOLEAN) // organized + .add(BOOLEAN) // table_supports_delta_delete .build()); Map map = actualResults.getMaterializedRows().stream() .filter(row -> ((String) row.getField(1)).startsWith("system_tables_test")) .collect(toImmutableMap(row -> ((String) row.getField(1)), identity())); - assertEquals(map.size(), 6); + assertEquals(map.size(), 7); assertEquals( map.get("system_tables_test0").getFields(), - asList("tpch", "system_tables_test0", null, null, null, null, null, Boolean.FALSE)); + asList("tpch", "system_tables_test0", null, null, null, null, null, Boolean.FALSE, Boolean.FALSE)); assertEquals( map.get("system_tables_test1").getFields(), - asList("tpch", "system_tables_test1", "c10", null, null, null, null, Boolean.FALSE)); + asList("tpch", "system_tables_test1", "c10", null, null, null, null, Boolean.FALSE, Boolean.FALSE)); assertEquals( map.get("system_tables_test2").getFields(), - asList("tpch", "system_tables_test2", "c20", ImmutableList.of("c22", "c21"), null, null, null, Boolean.FALSE)); + asList("tpch", "system_tables_test2", "c20", ImmutableList.of("c22", "c21"), null, null, null, Boolean.FALSE, Boolean.FALSE)); assertEquals( map.get("system_tables_test3").getFields(), - asList("tpch", "system_tables_test3", "c30", null, null, 40L, ImmutableList.of("c34", "c33"), Boolean.FALSE)); + asList("tpch", "system_tables_test3", "c30", null, null, 40L, ImmutableList.of("c34", "c33"), Boolean.FALSE, Boolean.FALSE)); assertEquals( map.get("system_tables_test4").getFields(), - asList("tpch", "system_tables_test4", "c40", ImmutableList.of("c41", "c42"), "test_distribution", 50L, ImmutableList.of("c43", "c44"), Boolean.FALSE)); + asList("tpch", "system_tables_test4", "c40", ImmutableList.of("c41", "c42"), "test_distribution", 50L, ImmutableList.of("c43", "c44"), Boolean.FALSE, Boolean.FALSE)); assertEquals( map.get("system_tables_test5").getFields(), - asList("tpch", "system_tables_test5", null, ImmutableList.of("c51", "c52"), "test_distribution", 50L, ImmutableList.of("c53", "c54"), Boolean.TRUE)); + asList("tpch", "system_tables_test5", null, ImmutableList.of("c51", "c52"), "test_distribution", 50L, ImmutableList.of("c53", "c54"), Boolean.TRUE, Boolean.FALSE)); + assertEquals( + map.get("system_tables_test6").getFields(), + asList("tpch", "system_tables_test6", null, ImmutableList.of("c61", "c62"), "test_distribution", 50L, ImmutableList.of("c63", "c64"), Boolean.TRUE, Boolean.TRUE)); actualResults = computeActual("SELECT * FROM system.tables WHERE table_schema = 'tpch'"); long actualRowCount = actualResults.getMaterializedRows().stream() .filter(row -> ((String) row.getField(1)).startsWith("system_tables_test")) .count(); - assertEquals(actualRowCount, 6); + assertEquals(actualRowCount, 7); actualResults = computeActual("SELECT * FROM system.tables WHERE table_name = 'system_tables_test3'"); assertEquals(actualResults.getMaterializedRows().size(), 1); @@ -729,6 +746,7 @@ public void testTablesSystemTable() assertUpdate("DROP TABLE system_tables_test3"); assertUpdate("DROP TABLE system_tables_test4"); assertUpdate("DROP TABLE system_tables_test5"); + assertUpdate("DROP TABLE system_tables_test6"); assertEquals(computeActual("SELECT * FROM system.tables WHERE table_schema IN ('foo', 'bar')").getRowCount(), 0); } @@ -821,6 +839,76 @@ public void testTableStatsSystemTable() assertUpdate("DROP TABLE test_table_stats"); } + @SuppressWarnings("OverlyStrongTypeCast") + @Test + public void testTableStatsSystemTableWithDeltaDelete() + { + // create empty table + assertUpdate("CREATE TABLE test_table_stats (x bigint) WITH (table_supports_delta_delete = true)"); + + @Language("SQL") String sql = "" + + "SELECT create_time, update_time, table_version," + + " shard_count, row_count, uncompressed_size, delta_count\n" + + "FROM system.table_stats\n" + + "WHERE table_schema = 'tpch'\n" + + " AND table_name = 'test_table_stats'"; + MaterializedRow row = getOnlyElement(computeActual(sql).getMaterializedRows()); + + LocalDateTime createTime = (LocalDateTime) row.getField(0); + LocalDateTime updateTime1 = (LocalDateTime) row.getField(1); + assertEquals(createTime, updateTime1); + + assertEquals(row.getField(2), 1L); // table_version + assertEquals(row.getField(3), 0L); // shard_count + assertEquals(row.getField(4), 0L); // row_count + long size1 = (long) row.getField(5); // uncompressed_size + + // insert + assertUpdate("INSERT INTO test_table_stats VALUES (1), (2), (3), (4)", 4); + row = getOnlyElement(computeActual(sql).getMaterializedRows()); + + assertEquals(row.getField(0), createTime); + LocalDateTime updateTime2 = (LocalDateTime) row.getField(1); + assertLessThan(updateTime1, updateTime2); + + assertEquals(row.getField(2), 2L); // table_version + assertGreaterThanOrEqual((Long) row.getField(3), 1L); // shard_count + assertEquals(row.getField(4), 4L); // row_count + assertGreaterThanOrEqual((Long) row.getField(6), 0L); // delta_count + long size2 = (long) row.getField(5); // uncompressed_size + assertGreaterThan(size2, size1); + + // delete + assertUpdate("DELETE FROM test_table_stats WHERE x IN (2, 4)", 2); + row = getOnlyElement(computeActual(sql).getMaterializedRows()); + + assertEquals(row.getField(0), createTime); + LocalDateTime updateTime3 = (LocalDateTime) row.getField(1); + assertLessThan(updateTime2, updateTime3); + + assertEquals(row.getField(2), 3L); // table_version + assertGreaterThanOrEqual((Long) row.getField(3), 1L); // shard_count + assertEquals(row.getField(4), 2L); // row_count + assertGreaterThanOrEqual((Long) row.getField(6), 1L); // delta_count + long size3 = (long) row.getField(5); // uncompressed_Size + // without compaction, the size will grow with delta delete + assertGreaterThan(size3, size2); + + // add column + assertUpdate("ALTER TABLE test_table_stats ADD COLUMN y bigint"); + row = getOnlyElement(computeActual(sql).getMaterializedRows()); + + assertEquals(row.getField(0), createTime); + assertLessThan(updateTime3, (LocalDateTime) row.getField(1)); + + assertEquals(row.getField(2), 4L); // table_version + assertEquals(row.getField(4), 2L); // row_count + assertEquals(row.getField(5), size3); // uncompressed_size + + // cleanup + assertUpdate("DROP TABLE test_table_stats"); + } + @Test public void testAlterTable() { @@ -882,6 +970,33 @@ public void testDelete() assertUpdate("DROP TABLE test_delete_table"); } + @Test + public void testDeltaDelete() + { + assertUpdate("CREATE TABLE test_delta_delete_table (c1 bigint, c2 bigint) WITH (table_supports_delta_delete = true)"); + assertUpdate("INSERT INTO test_delta_delete_table VALUES (1, 1), (1, 2), (1, 3), (1, 4), (11, 1), (11, 2)", 6); + + assertUpdate("ALTER TABLE test_delta_delete_table ADD COLUMN c3 bigint"); + assertUpdate("INSERT INTO test_delta_delete_table VALUES (2, 1, 1), (2, 2, 2), (2, 3, 3), (2, 4, 4), (22, 1, 1), (22, 2, 2), (22, 4, 4)", 7); + + assertUpdate("DELETE FROM test_delta_delete_table WHERE c1 = 1", 4); + assertQuery("SELECT * FROM test_delta_delete_table", "VALUES (11, 1, NULL), (11, 2, NULL), (2, 1, 1), (2, 2, 2), (2, 3, 3), (2, 4, 4), (22, 1, 1), (22, 2, 2), (22, 4, 4)"); + + assertUpdate("DELETE FROM test_delta_delete_table WHERE c1 = 1", 0); + assertQuery("SELECT * FROM test_delta_delete_table", "VALUES (11, 1, NULL), (11, 2, NULL), (2, 1, 1), (2, 2, 2), (2, 3, 3), (2, 4, 4), (22, 1, 1), (22, 2, 2), (22, 4, 4)"); + + assertUpdate("ALTER TABLE test_delta_delete_table DROP COLUMN c2"); + assertUpdate("INSERT INTO test_delta_delete_table VALUES (3, 1), (3, 2), (3, 3), (3, 4)", 4); + + assertUpdate("DELETE FROM test_delta_delete_table WHERE c1 = 2", 4); + assertQuery("SELECT * FROM test_delta_delete_table", "VALUES (11, NULL), (11, NULL), (22, 1), (22, 2), (22, 4), (3, 1), (3, 2), (3, 3), (3, 4)"); + + assertUpdate("DELETE FROM test_delta_delete_table WHERE c1 % 11 = 0", 5); + assertQuery("SELECT * FROM test_delta_delete_table", "VALUES (3, 1), (3, 2), (3, 3), (3, 4)"); + + assertUpdate("DROP TABLE test_delta_delete_table"); + } + @Test public void testTriggerBucketBalancer() { diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestDatabaseShardManager.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestDatabaseShardManager.java index 3cea8fe02b7a0..c53610aeeddd9 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestDatabaseShardManager.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestDatabaseShardManager.java @@ -35,6 +35,7 @@ import com.google.common.io.Files; import io.airlift.slice.Slice; import io.airlift.units.Duration; +import javafx.util.Pair; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.IDBI; @@ -47,6 +48,7 @@ import java.io.IOException; import java.net.URI; import java.sql.ResultSet; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.time.LocalDate; @@ -61,6 +63,8 @@ import java.util.OptionalLong; import java.util.Set; import java.util.UUID; +import java.util.function.Function; +import java.util.stream.Collectors; import static com.facebook.presto.raptor.RaptorErrorCode.RAPTOR_EXTERNAL_BATCH_ALREADY_EXISTS; import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; @@ -81,6 +85,7 @@ import static com.facebook.presto.spi.type.VarcharType.createVarcharType; import static com.google.common.base.Strings.repeat; import static com.google.common.base.Ticker.systemTicker; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Iterators.concat; import static com.google.common.collect.Iterators.transform; @@ -93,6 +98,7 @@ import static java.util.stream.Collectors.toSet; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.fail; @Test(singleThreaded = true) @@ -121,6 +127,42 @@ public void teardown() deleteRecursively(dataDir.toPath(), ALLOW_INSECURE); } + @Test + public void testCreateTable() + throws SQLException + { + long tableId = createTable("test"); + List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); + + Statement stmt = dummyHandle.getConnection().createStatement(); + ResultSet resultSet = stmt.executeQuery("select * from " + shardIndexTable(tableId)); + ResultSetMetaData metaData = resultSet.getMetaData(); + assertEquals(metaData.getColumnLabel(1), "SHARD_ID"); + assertEquals(metaData.getColumnLabel(2), "SHARD_UUID"); + assertNotEquals(metaData.getColumnLabel(3), "DELTA_SHARD_UUID"); + resultSet.close(); + stmt.close(); + } + + @Test + public void testCreateTableWithDeltaDelete() + throws SQLException + { + long tableId = createTable("test"); + List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); + shardManager.createTable(tableId, true, columns, false, OptionalLong.empty()); + + Statement stmt = dummyHandle.getConnection().createStatement(); + ResultSet resultSet = stmt.executeQuery("select * from " + shardIndexTable(tableId)); + ResultSetMetaData metaData = resultSet.getMetaData(); + assertEquals(metaData.getColumnLabel(1), "SHARD_ID"); + assertEquals(metaData.getColumnLabel(2), "SHARD_UUID"); + assertEquals(metaData.getColumnLabel(3), "DELTA_SHARD_UUID"); + resultSet.close(); + stmt.close(); + } + @Test public void testCommit() { @@ -134,7 +176,7 @@ public void testCommit() List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0); @@ -150,7 +192,7 @@ public void testRollback() List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); List shards = ImmutableList.of(shardInfo(UUID.randomUUID(), "node1")); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.rollbackTransaction(transactionId); @@ -172,16 +214,16 @@ public void testAssignShard() List shardNodes = ImmutableList.of(shardInfo(shard, "node1")); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shardNodes, Optional.empty(), 0); ShardNodes actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); - assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node1"))); + assertEquals(actual, new ShardNodes(shard, Optional.empty(), ImmutableSet.of("node1"))); try { - shardManager.replaceShardAssignment(tableId, shard, "node2", true); + shardManager.replaceShardAssignment(tableId, shard, Optional.empty(), "node2", true); fail("expected exception"); } catch (PrestoException e) { @@ -189,16 +231,16 @@ public void testAssignShard() } // replace shard assignment to another node - shardManager.replaceShardAssignment(tableId, shard, "node2", false); + shardManager.replaceShardAssignment(tableId, shard, Optional.empty(), "node2", false); actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); - assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node2"))); + assertEquals(actual, new ShardNodes(shard, Optional.empty(), ImmutableSet.of("node2"))); // replacing shard assignment should be idempotent - shardManager.replaceShardAssignment(tableId, shard, "node2", false); + shardManager.replaceShardAssignment(tableId, shard, Optional.empty(), "node2", false); actual = getOnlyElement(getShardNodes(tableId, TupleDomain.all())); - assertEquals(actual, new ShardNodes(shard, ImmutableSet.of("node2"))); + assertEquals(actual, new ShardNodes(shard, Optional.empty(), ImmutableSet.of("node2"))); } @Test @@ -214,22 +256,22 @@ public void testGetNodeBytes() new ShardInfo(shard2, bucketNumber, ImmutableSet.of("node1"), ImmutableList.of(), 5, 55, 555, 0)); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shardNodes, Optional.empty(), 0); assertEquals(getShardNodes(tableId, TupleDomain.all()), ImmutableSet.of( - new ShardNodes(shard1, ImmutableSet.of("node1")), - new ShardNodes(shard2, ImmutableSet.of("node1")))); + new ShardNodes(shard1, Optional.empty(), ImmutableSet.of("node1")), + new ShardNodes(shard2, Optional.empty(), ImmutableSet.of("node1")))); assertEquals(shardManager.getNodeBytes(), ImmutableMap.of("node1", 88L)); - shardManager.replaceShardAssignment(tableId, shard1, "node2", false); + shardManager.replaceShardAssignment(tableId, shard1, Optional.empty(), "node2", false); assertEquals(getShardNodes(tableId, TupleDomain.all()), ImmutableSet.of( - new ShardNodes(shard1, ImmutableSet.of("node2")), - new ShardNodes(shard2, ImmutableSet.of("node1")))); + new ShardNodes(shard1, Optional.empty(), ImmutableSet.of("node2")), + new ShardNodes(shard2, Optional.empty(), ImmutableSet.of("node1")))); assertEquals(shardManager.getNodeBytes(), ImmutableMap.of("node1", 55L, "node2", 33L)); } @@ -249,7 +291,7 @@ public void testGetNodeTableShards() inputShards.add(shardInfo(uuid, node)); } - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, inputShards.build(), Optional.empty(), 0); @@ -271,7 +313,7 @@ public void testGetExistingShards() List shardNodes = ImmutableList.of(shardInfo(shard1, "node1"), shardInfo(shard2, "node1")); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shardNodes, Optional.empty(), 0); @@ -280,9 +322,83 @@ public void testGetExistingShards() assertEquals(actual, expected); } + @Test + public void testReplaceShardUuidsFunction() + throws SQLException + { + // node1 shard1 shard4 + // node2 shard2 + // node3 shard3 + + // goal: replace shard1 and shard4 with newUuid5 + + // Initial data + long tableId = createTable("test"); + List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + UUID uuid4 = UUID.randomUUID(); + ShardInfo shardInfo1 = new ShardInfo(uuid1, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 1, 1, 1, 1); + ShardInfo shardInfo2 = new ShardInfo(uuid2, OptionalInt.empty(), ImmutableSet.of("node2"), ImmutableList.of(), 2, 2, 2, 2); + ShardInfo shardInfo3 = new ShardInfo(uuid3, OptionalInt.empty(), ImmutableSet.of("node3"), ImmutableList.of(), 3, 3, 3, 3); + ShardInfo shardInfo4 = new ShardInfo(uuid4, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 4, 4, 4, 4); + + shardManager.createTable(tableId, true, columns, false, OptionalLong.empty()); + long transactionId = shardManager.beginTransaction(); + shardManager.commitShards(transactionId, tableId, columns, ImmutableList.of(shardInfo1, shardInfo2, shardInfo3, shardInfo4), Optional.empty(), 0); + + // New data + UUID newUuid5 = UUID.randomUUID(); + ShardInfo newShardInfo4 = new ShardInfo(newUuid5, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 5, 5, 5, 5); + + // toReplace + Set shardMetadata = shardManager.getNodeShards("node1"); + Set replacedUuids = shardMetadata.stream().map(ShardMetadata::getShardUuid).collect(toSet()); + Map> replaceUuidMap = replacedUuids.stream().collect(Collectors.toMap(uuid -> uuid, uuid -> Optional.empty())); + + transactionId = shardManager.beginTransaction(); + shardManager.replaceShardUuids(transactionId, true, tableId, columns, replaceUuidMap, ImmutableList.of(newShardInfo4), OptionalLong.of(0)); + + // check shards on this node1 are correct + shardMetadata = shardManager.getNodeShards("node1"); + assertEquals(shardMetadata.size(), 1); + for (ShardMetadata actual : shardMetadata) { + assertEquals(actual.getShardUuid(), newUuid5); + assertEquals(actual.getDeltaUuid(), Optional.empty()); + assertEquals(actual.getRowCount(), 5); + assertEquals(actual.getCompressedSize(), 5); + assertEquals(actual.getUncompressedSize(), 5); + } + + // check that shards are replaced in index table as well + Set shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); + Set actualAllUuids = shardNodes.stream() + .map(BucketShards::getShards) + .flatMap(Collection::stream) + .map(ShardNodes::getShardUuid) + .collect(toSet()); + Set expectedAllUuids = ImmutableSet.of(uuid2, uuid3, newUuid5); + assertEquals(actualAllUuids, expectedAllUuids); + + // Verify statistics + Statement stmt = dummyHandle.getConnection().createStatement(); + ResultSet resultSet = stmt.executeQuery("SELECT * FROM tables where table_id = " + tableId); + resultSet.next(); + assertEquals(resultSet.getLong("shard_count"), 3); + assertEquals(resultSet.getLong("delta_count"), 0); + assertEquals(resultSet.getLong("row_count"), 10); + assertEquals(resultSet.getLong("compressed_size"), 10); + assertEquals(resultSet.getLong("uncompressed_size"), 10); + resultSet.close(); + stmt.close(); + } + @Test public void testReplaceShardUuids() { + // node1 shard1 / node2 shard2 / node3 shard3 + // replace shard1 with two new shard long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); List nodes = ImmutableList.of("node1", "node2", "node3"); @@ -294,23 +410,27 @@ public void testReplaceShardUuids() .add(shardInfo(originalUuids.get(2), nodes.get(2))) .build(); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, true, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, oldShards, Optional.empty(), 0); + // newShards List expectedUuids = ImmutableList.of(UUID.randomUUID(), UUID.randomUUID()); List newShards = ImmutableList.builder() .add(shardInfo(expectedUuids.get(0), nodes.get(0))) .add(shardInfo(expectedUuids.get(1), nodes.get(0))) .build(); + // toReplace Set shardMetadata = shardManager.getNodeShards(nodes.get(0)); Set replacedUuids = shardMetadata.stream().map(ShardMetadata::getShardUuid).collect(toSet()); + Map> replaceUuidMap = replacedUuids.stream().collect(Collectors.toMap(uuid -> uuid, uuid -> Optional.empty())); transactionId = shardManager.beginTransaction(); - shardManager.replaceShardUuids(transactionId, tableId, columns, replacedUuids, newShards, OptionalLong.of(0)); + shardManager.replaceShardUuids(transactionId, true, tableId, columns, replaceUuidMap, newShards, OptionalLong.of(0)); + // check that shards are replaced in shards table for node1 shardMetadata = shardManager.getNodeShards(nodes.get(0)); Set actualUuids = shardMetadata.stream().map(ShardMetadata::getShardUuid).collect(toSet()); assertEquals(actualUuids, ImmutableSet.copyOf(expectedUuids)); @@ -321,7 +441,7 @@ public void testReplaceShardUuids() expectedAllUuids.addAll(expectedUuids); // check that shards are replaced in index table as well - Set shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, TupleDomain.all())); + Set shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); Set actualAllUuids = shardNodes.stream() .map(BucketShards::getShards) .flatMap(Collection::stream) @@ -329,11 +449,33 @@ public void testReplaceShardUuids() .collect(toSet()); assertEquals(actualAllUuids, expectedAllUuids); - // verify that conflicting updates are handled - newShards = ImmutableList.of(shardInfo(UUID.randomUUID(), nodes.get(0))); + // Verify conflict is handled + // Try to replace shard1 with newShards again (shard1 already deleted, delete shards that's already deleted) + try { + newShards = ImmutableList.of(shardInfo(UUID.randomUUID(), nodes.get(0))); + transactionId = shardManager.beginTransaction(); + shardManager.replaceShardUuids(transactionId, true, tableId, columns, replaceUuidMap, newShards, OptionalLong.of(0)); + fail("expected exception"); + } + catch (PrestoException e) { + assertEquals(e.getErrorCode(), TRANSACTION_CONFLICT.toErrorCode()); + } + // Try to add new delta to shard1 (shard1 already deleted) + try { + transactionId = shardManager.beginTransaction(); + ShardInfo newDelta = shardInfo(UUID.randomUUID(), nodes.get(0)); + Map, Optional>> shardMap = ImmutableMap.of(originalUuids.get(0), new Pair(Optional.empty(), Optional.of(newDelta))); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMap, OptionalLong.of(0)); + fail("expected exception"); + } + catch (PrestoException e) { + assertEquals(e.getErrorCode(), TRANSACTION_CONFLICT.toErrorCode()); + } + // Try to delete shard1 (shard1 already deleted) try { transactionId = shardManager.beginTransaction(); - shardManager.replaceShardUuids(transactionId, tableId, columns, replacedUuids, newShards, OptionalLong.of(0)); + Map, Optional>> shardMap = ImmutableMap.of(originalUuids.get(0), new Pair(Optional.empty(), Optional.empty())); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMap, OptionalLong.of(0)); fail("expected exception"); } catch (PrestoException e) { @@ -341,6 +483,185 @@ public void testReplaceShardUuids() } } + @Test + public void testReplaceDeltaUuidsFunction() + throws SQLException + { + // node1 shard1 shard4 + // node2 shard2 + // node3 shard3 + + // goal: shard4 add delta1 + + // Initial data + long tableId = createTable("test"); + List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); + UUID uuid1 = UUID.randomUUID(); + UUID uuid2 = UUID.randomUUID(); + UUID uuid3 = UUID.randomUUID(); + UUID uuid4 = UUID.randomUUID(); + ShardInfo shardInfo1 = new ShardInfo(uuid1, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 1, 1, 1, 1); + ShardInfo shardInfo2 = new ShardInfo(uuid2, OptionalInt.empty(), ImmutableSet.of("node2"), ImmutableList.of(), 2, 2, 2, 2); + ShardInfo shardInfo3 = new ShardInfo(uuid3, OptionalInt.empty(), ImmutableSet.of("node3"), ImmutableList.of(), 3, 3, 3, 3); + ShardInfo shardInfo4 = new ShardInfo(uuid4, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 4, 4, 4, 4); + + shardManager.createTable(tableId, true, columns, false, OptionalLong.empty()); + long transactionId = shardManager.beginTransaction(); + shardManager.commitShards(transactionId, tableId, columns, ImmutableList.of(shardInfo1, shardInfo2, shardInfo3, shardInfo4), Optional.empty(), 0); + + // delta + UUID delta1 = UUID.randomUUID(); + ShardInfo deltaInfo1 = new ShardInfo(delta1, OptionalInt.empty(), ImmutableSet.of("node1"), ImmutableList.of(), 1, 1, 1, 1); + + // toReplace + Map, Optional>> shardMap = ImmutableMap.of(uuid4, new Pair<>(Optional.empty(), Optional.of(deltaInfo1))); + transactionId = shardManager.beginTransaction(); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMap, OptionalLong.of(0)); + + // check shards on this node1 are correct + Set shardMetadata = shardManager.getNodeShards("node1"); + assertEquals(shardMetadata.size(), 3); + + // check index table as well + Set shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); + Set actualAllUuids = shardNodes.stream() + .map(BucketShards::getShards) + .flatMap(Collection::stream) + .map(ShardNodes::getShardUuid) + .collect(toSet()); + Set expectedAllUuids = ImmutableSet.of(uuid1, uuid2, uuid3, uuid4); + assertEquals(actualAllUuids, expectedAllUuids); + + // Verify statistics + Statement stmt = dummyHandle.getConnection().createStatement(); + ResultSet resultSet = stmt.executeQuery("SELECT * FROM tables where table_id = " + tableId); + resultSet.next(); + assertEquals(resultSet.getLong("shard_count"), 4); + assertEquals(resultSet.getLong("delta_count"), 1); + assertEquals(resultSet.getLong("row_count"), 9); + assertEquals(resultSet.getLong("compressed_size"), 11); + assertEquals(resultSet.getLong("uncompressed_size"), 11); + resultSet.close(); + stmt.close(); + } + + @Test + public void testReplaceDeltaUuids() + { + // node1 shard1 / node2 shard2 / node3 shard3 + // Add delta to shard1 + // Delete shard2 + long tableId = createTable("test"); + List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); + List nodes = ImmutableList.of("node1", "node2", "node3"); + List originalUuids = ImmutableList.of(UUID.randomUUID(), UUID.randomUUID(), UUID.randomUUID()); + + List oldShards = ImmutableList.builder() + .add(shardInfo(originalUuids.get(0), nodes.get(0))) + .add(shardInfo(originalUuids.get(1), nodes.get(1))) + .add(shardInfo(originalUuids.get(2), nodes.get(2))) + .build(); + + shardManager.createTable(tableId, true, columns, false, OptionalLong.empty()); + + long transactionId = shardManager.beginTransaction(); + shardManager.commitShards(transactionId, tableId, columns, oldShards, Optional.empty(), 0); + + UUID newDeltaUuid1 = UUID.randomUUID(); + ShardInfo newDeltaShard1 = shardInfo(newDeltaUuid1, nodes.get(0)); + Map, Optional>> shardMap = new HashMap<>(); + shardMap.put(originalUuids.get(0), new Pair(Optional.empty(), Optional.of(newDeltaShard1))); + shardMap.put(originalUuids.get(1), new Pair(Optional.empty(), Optional.empty())); + + transactionId = shardManager.beginTransaction(); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMap, OptionalLong.of(0)); + + // check that delta shard are added in shards table for node1 + Set shardMetadata = shardManager.getNodeShards(nodes.get(0)); + Map> actualUuidsMap = shardMetadata.stream().collect(toImmutableMap(ShardMetadata::getShardUuid, ShardMetadata::getDeltaUuid)); + Map> expectedUuidsMap = ImmutableMap.of(originalUuids.get(0), Optional.of(newDeltaUuid1), newDeltaUuid1, Optional.empty()); + assertEquals(actualUuidsMap, expectedUuidsMap); + + // check that shard are deleted in shards table for node2 + shardMetadata = shardManager.getNodeShards(nodes.get(1)); + actualUuidsMap = shardMetadata.stream().collect(toImmutableMap(ShardMetadata::getShardUuid, ShardMetadata::getDeltaUuid)); + expectedUuidsMap = ImmutableMap.of(); + assertEquals(actualUuidsMap, expectedUuidsMap); + + // check index table, delta added and shard removed + Set shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); + Set expectedshardNodes = ImmutableSet.of( + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(originalUuids.get(0), Optional.of(newDeltaUuid1), ImmutableSet.of(nodes.get(0))))), + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(originalUuids.get(2), Optional.empty(), ImmutableSet.of(nodes.get(2)))))); + assertEquals(shardNodes, expectedshardNodes); + + // Verify conflict is handled + // Try to replace shard1 with newShards without knowing its new delta + // stimulate the other thread didn't catch the change (actually it's already committed up) + try { + transactionId = shardManager.beginTransaction(); + Map> replaceUuidMap = ImmutableMap.of(originalUuids.get(0), Optional.empty()); + Set newShards = ImmutableSet.of(shardInfo(UUID.randomUUID(), nodes.get(0))); + shardManager.replaceShardUuids(transactionId, true, tableId, columns, replaceUuidMap, newShards, OptionalLong.of(0)); + fail("expected exception"); + // todo check transaction id roll back + // todo shard change roll back + } + catch (PrestoException e) { + assertEquals(e.getErrorCode(), TRANSACTION_CONFLICT.toErrorCode()); + } + // Try to delete shard1 with newShards without knowing its new delta + try { + transactionId = shardManager.beginTransaction(); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, + ImmutableMap.of(originalUuids.get(0), new Pair(Optional.empty(), Optional.empty())), OptionalLong.of(0)); + fail("expected exception"); + } + catch (PrestoException e) { + assertEquals(e.getErrorCode(), TRANSACTION_CONFLICT.toErrorCode()); + } + + // node1 shard1 newDelta / node3 shard3 + // replace the newDelta with another new delta + transactionId = shardManager.beginTransaction(); + UUID anotherNewDeltaUuid1 = UUID.randomUUID(); + shardMap = ImmutableMap.of(originalUuids.get(0), new Pair(Optional.of(newDeltaUuid1), Optional.of(shardInfo(anotherNewDeltaUuid1, nodes.get(0))))); + shardManager.replaceDeltaUuids(transactionId, tableId, columns, shardMap, OptionalLong.of(0)); + + // check that delta shard are added in shards table for node1 + shardMetadata = shardManager.getNodeShards(nodes.get(0)); + actualUuidsMap = shardMetadata.stream().collect(toImmutableMap(ShardMetadata::getShardUuid, ShardMetadata::getDeltaUuid)); + expectedUuidsMap = ImmutableMap.of(originalUuids.get(0), Optional.of(anotherNewDeltaUuid1), anotherNewDeltaUuid1, Optional.empty()); + assertEquals(actualUuidsMap, expectedUuidsMap); + + // check index table, delta modified + shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); + expectedshardNodes = ImmutableSet.of( + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(originalUuids.get(0), Optional.of(anotherNewDeltaUuid1), ImmutableSet.of(nodes.get(0))))), + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(originalUuids.get(2), Optional.empty(), ImmutableSet.of(nodes.get(2)))))); + assertEquals(shardNodes, expectedshardNodes); + + // node1 shard1 anotherNewDelta / node3 shard3 + // rewrite shard1 to shard4 + transactionId = shardManager.beginTransaction(); + UUID uuid4 = UUID.randomUUID(); + Map> replaceUuidMap = ImmutableMap.of(originalUuids.get(0), Optional.of(anotherNewDeltaUuid1)); + shardManager.replaceShardUuids(transactionId, true, tableId, columns, replaceUuidMap, ImmutableSet.of(shardInfo(uuid4, nodes.get(0))), OptionalLong.of(0)); + + // check that new shard are added, old shard and delta are deleted in shards table for node1 + shardMetadata = shardManager.getNodeShards(nodes.get(0)); + actualUuidsMap = shardMetadata.stream().collect(toImmutableMap(ShardMetadata::getShardUuid, ShardMetadata::getDeltaUuid)); + expectedUuidsMap = ImmutableMap.of(uuid4, Optional.empty()); + assertEquals(actualUuidsMap, expectedUuidsMap); + + // check index table, old shard and delta deleted, new shard added + shardNodes = ImmutableSet.copyOf(shardManager.getShardNodes(tableId, true, TupleDomain.all())); + expectedshardNodes = ImmutableSet.of( + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(uuid4, Optional.empty(), ImmutableSet.of(nodes.get(0))))), + new BucketShards(OptionalInt.empty(), ImmutableSet.of(new ShardNodes(originalUuids.get(2), Optional.empty(), ImmutableSet.of(nodes.get(2)))))); + assertEquals(shardNodes, expectedshardNodes); + } + @Test public void testExternalBatches() { @@ -350,7 +671,7 @@ public void testExternalBatches() List shards = ImmutableList.of(shardInfo(UUID.randomUUID(), "node1")); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, externalBatchId, 0); @@ -417,9 +738,9 @@ public void testEmptyTable() { long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); - try (ResultIterator iterator = shardManager.getShardNodes(tableId, TupleDomain.all())) { + try (ResultIterator iterator = shardManager.getShardNodes(tableId, false, TupleDomain.all())) { assertFalse(iterator.hasNext()); } } @@ -429,9 +750,9 @@ public void testEmptyTableBucketed() { long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, true, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, true, OptionalLong.empty()); - try (ResultIterator iterator = shardManager.getShardNodesBucketed(tableId, true, ImmutableList.of(), TupleDomain.all())) { + try (ResultIterator iterator = shardManager.getShardNodesBucketed(tableId, false, true, ImmutableList.of(), TupleDomain.all())) { assertFalse(iterator.hasNext()); } } @@ -441,11 +762,11 @@ public void testTemporalColumnTableCreation() { long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, TIMESTAMP)); - shardManager.createTable(tableId, columns, false, OptionalLong.of(1)); + shardManager.createTable(tableId, false, columns, false, OptionalLong.of(1)); long tableId2 = createTable("test2"); List columns2 = ImmutableList.of(new ColumnInfo(1, TIMESTAMP)); - shardManager.createTable(tableId2, columns2, true, OptionalLong.of(1)); + shardManager.createTable(tableId2, false, columns2, true, OptionalLong.of(1)); } @Test @@ -511,7 +832,7 @@ public void testShardPruning() RaptorColumnHandle c6 = new RaptorColumnHandle("raptor", "c6", 6, BOOLEAN); long tableId = createTable("test"); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0); @@ -609,7 +930,7 @@ public void testShardPruningTruncatedValues() RaptorColumnHandle c1 = new RaptorColumnHandle("raptor", "c1", 1, createVarcharType(10)); long tableId = createTable("test"); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0); @@ -644,7 +965,7 @@ public void testShardPruningNoStats() List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); RaptorColumnHandle c1 = new RaptorColumnHandle("raptor", "c1", 1, BIGINT); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0); @@ -659,7 +980,7 @@ public void testAddNewColumn() { long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); int before = columnCount(tableId); ColumnInfo newColumn = new ColumnInfo(2, BIGINT); @@ -675,7 +996,7 @@ public void testAddDuplicateColumn() { long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); int before = columnCount(tableId); shardManager.addColumn(tableId, columns.get(0)); @@ -695,7 +1016,8 @@ public void testMaintenanceBlocked() long transactionId = shardManager.beginTransaction(); try { - shardManager.replaceShardUuids(transactionId, tableId, columns, oldShards, ImmutableSet.of(), OptionalLong.empty()); + Map> oldShardMap = oldShards.stream().collect(toImmutableMap(Function.identity(), uuid -> Optional.empty())); + shardManager.replaceShardUuids(transactionId, false, tableId, columns, oldShardMap, ImmutableSet.of(), OptionalLong.empty()); fail("expected exception"); } catch (PrestoException e) { @@ -706,14 +1028,14 @@ public void testMaintenanceBlocked() private Set getShardNodes(long tableId, TupleDomain predicate) { - try (ResultIterator iterator = shardManager.getShardNodes(tableId, predicate)) { + try (ResultIterator iterator = shardManager.getShardNodes(tableId, false, predicate)) { return ImmutableSet.copyOf(concat(transform(iterator, i -> i.getShards().iterator()))); } } private long createTable(String name) { - return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0); + return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0, false); } public static ShardInfo shardInfo(UUID shardUuid, String nodeIdentifier) @@ -729,7 +1051,7 @@ public static ShardInfo shardInfo(UUID shardUuid, String nodeId, List toShardNodes(List shards) { return shards.stream() - .map(shard -> new ShardNodes(shard.getShardUuid(), shard.getNodeIdentifiers())) + .map(shard -> new ShardNodes(shard.getShardUuid(), Optional.empty(), shard.getNodeIdentifiers())) .collect(toSet()); } diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestMetadataDao.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestMetadataDao.java index dcd7f4ca28209..b1ae2a89e5584 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestMetadataDao.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestMetadataDao.java @@ -54,7 +54,7 @@ public void tearDown() public void testTemporalColumn() { Long columnId = 1L; - long tableId = dao.insertTable("schema1", "table1", true, false, null, 0); + long tableId = dao.insertTable("schema1", "table1", true, false, null, 0, false); dao.insertColumn(tableId, columnId, "col1", 1, "bigint", null, null); Long temporalColumnId = dao.getTemporalColumnId(tableId); assertNull(temporalColumnId); @@ -64,7 +64,7 @@ public void testTemporalColumn() assertNotNull(temporalColumnId); assertEquals(temporalColumnId, columnId); - long tableId2 = dao.insertTable("schema1", "table2", true, false, null, 0); + long tableId2 = dao.insertTable("schema1", "table2", true, false, null, 0, false); Long columnId2 = dao.getTemporalColumnId(tableId2); assertNull(columnId2); } @@ -73,7 +73,7 @@ public void testTemporalColumn() public void testGetTableInformation() { Long columnId = 1L; - long tableId = dao.insertTable("schema1", "table1", true, false, null, 0); + long tableId = dao.insertTable("schema1", "table1", true, false, null, 0, false); dao.insertColumn(tableId, columnId, "col1", 1, "bigint", null, null); Table info = dao.getTableInformation(tableId); diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java index 19058aef41d01..1bbc5cd4721df 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java @@ -66,6 +66,7 @@ import static com.facebook.presto.raptor.RaptorTableProperties.DISTRIBUTION_NAME_PROPERTY; import static com.facebook.presto.raptor.RaptorTableProperties.ORDERING_PROPERTY; import static com.facebook.presto.raptor.RaptorTableProperties.ORGANIZED_PROPERTY; +import static com.facebook.presto.raptor.RaptorTableProperties.TABLE_SUPPORTS_DELTA_DELETE; import static com.facebook.presto.raptor.RaptorTableProperties.TEMPORAL_COLUMN_PROPERTY; import static com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry; import static com.facebook.presto.raptor.metadata.TestDatabaseShardManager.createShardManager; @@ -139,7 +140,7 @@ public void testRenameColumn() public void testAddColumn() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - metadata.createTable(SESSION, buildTable(ImmutableMap.of(), tableMetadataBuilder(DEFAULT_TEST_ORDERS) + metadata.createTable(SESSION, buildTable(ImmutableMap.of(TABLE_SUPPORTS_DELTA_DELETE, false), tableMetadataBuilder(DEFAULT_TEST_ORDERS) .column("orderkey", BIGINT) .column("price", BIGINT)), false); @@ -156,7 +157,7 @@ public void testAddColumn() public void testDropColumn() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - metadata.createTable(SESSION, buildTable(ImmutableMap.of(), tableMetadataBuilder(DEFAULT_TEST_ORDERS) + metadata.createTable(SESSION, buildTable(ImmutableMap.of(TABLE_SUPPORTS_DELTA_DELETE, false), tableMetadataBuilder(DEFAULT_TEST_ORDERS) .column("orderkey", BIGINT) .column("price", BIGINT)), false); @@ -174,7 +175,7 @@ public void testDropColumn() public void testAddColumnAfterDropColumn() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - metadata.createTable(SESSION, buildTable(ImmutableMap.of(), tableMetadataBuilder(DEFAULT_TEST_ORDERS) + metadata.createTable(SESSION, buildTable(ImmutableMap.of(TABLE_SUPPORTS_DELTA_DELETE, false), tableMetadataBuilder(DEFAULT_TEST_ORDERS) .column("orderkey", BIGINT) .column("price", BIGINT)), false); @@ -198,7 +199,8 @@ public void testDropColumnDisallowed() BUCKET_COUNT_PROPERTY, 16, BUCKETED_ON_PROPERTY, ImmutableList.of("orderkey"), ORDERING_PROPERTY, ImmutableList.of("totalprice"), - TEMPORAL_COLUMN_PROPERTY, "orderdate"); + TEMPORAL_COLUMN_PROPERTY, "orderdate", + TABLE_SUPPORTS_DELTA_DELETE, false); ConnectorTableMetadata ordersTable = buildTable(properties, tableMetadataBuilder(DEFAULT_TEST_ORDERS) .column("orderkey", BIGINT) .column("totalprice", DOUBLE) @@ -298,7 +300,8 @@ public void testTableProperties() ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of( ORDERING_PROPERTY, ImmutableList.of("orderdate", "custkey"), - TEMPORAL_COLUMN_PROPERTY, "orderdate")); + TEMPORAL_COLUMN_PROPERTY, "orderdate", + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, ordersTable, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -324,6 +327,29 @@ public void testTableProperties() metadata.dropTable(SESSION, tableHandle); } + @Test + public void testTablePropertiesDeltaDelete() + { + assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); + + ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of( + TABLE_SUPPORTS_DELTA_DELETE, true)); + metadata.createTable(SESSION, ordersTable, false); + + ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); + assertInstanceOf(tableHandle, RaptorTableHandle.class); + RaptorTableHandle raptorTableHandle = (RaptorTableHandle) tableHandle; + assertEquals(raptorTableHandle.getTableId(), 1); + + long tableId = raptorTableHandle.getTableId(); + MetadataDao metadataDao = dbi.onDemand(MetadataDao.class); + + // verify delta delete enabled property + assertTrue(metadataDao.getTableInformation(tableId).isTableSupportsDeltaDelete()); + + metadata.dropTable(SESSION, tableHandle); + } + @Test public void testTablePropertiesWithOrganization() { @@ -331,7 +357,8 @@ public void testTablePropertiesWithOrganization() ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of( ORDERING_PROPERTY, ImmutableList.of("orderdate", "custkey"), - ORGANIZED_PROPERTY, true)); + ORGANIZED_PROPERTY, true, + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, ordersTable, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -361,7 +388,8 @@ public void testCreateBucketedTable() ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of( BUCKET_COUNT_PROPERTY, 16, - BUCKETED_ON_PROPERTY, ImmutableList.of("custkey", "orderkey"))); + BUCKETED_ON_PROPERTY, ImmutableList.of("custkey", "orderkey"), + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, ordersTable, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -396,7 +424,8 @@ public void testCreateBucketedTableAsSelect() ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of( BUCKET_COUNT_PROPERTY, 32, - BUCKETED_ON_PROPERTY, ImmutableList.of("orderkey", "custkey"))); + BUCKETED_ON_PROPERTY, ImmutableList.of("orderkey", "custkey"), + TABLE_SUPPORTS_DELTA_DELETE, false)); ConnectorNewTableLayout layout = metadata.getNewTableLayout(SESSION, ordersTable).get(); assertEquals(layout.getPartitionColumns(), ImmutableList.of("orderkey", "custkey")); @@ -437,7 +466,8 @@ public void testCreateBucketedTableExistingDistribution() ConnectorTableMetadata table = getOrdersTable(ImmutableMap.of( BUCKET_COUNT_PROPERTY, 16, BUCKETED_ON_PROPERTY, ImmutableList.of("orderkey"), - DISTRIBUTION_NAME_PROPERTY, "orders")); + DISTRIBUTION_NAME_PROPERTY, "orders", + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, table, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -460,7 +490,8 @@ public void testCreateBucketedTableExistingDistribution() table = getLineItemsTable(ImmutableMap.of( BUCKET_COUNT_PROPERTY, 16, BUCKETED_ON_PROPERTY, ImmutableList.of("orderkey"), - DISTRIBUTION_NAME_PROPERTY, "orders")); + DISTRIBUTION_NAME_PROPERTY, "orders", + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, table, false); tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_LINEITEMS); @@ -511,7 +542,8 @@ public void testInvalidTemporalOrganization() assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); metadata.createTable(SESSION, getOrdersTable(ImmutableMap.of( TEMPORAL_COLUMN_PROPERTY, "orderdate", - ORGANIZED_PROPERTY, true)), + ORGANIZED_PROPERTY, true, + TABLE_SUPPORTS_DELTA_DELETE, false)), false); } @@ -519,7 +551,7 @@ public void testInvalidTemporalOrganization() public void testInvalidOrderingOrganization() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - metadata.createTable(SESSION, getOrdersTable(ImmutableMap.of(ORGANIZED_PROPERTY, true)), false); + metadata.createTable(SESSION, getOrdersTable(ImmutableMap.of(ORGANIZED_PROPERTY, true, TABLE_SUPPORTS_DELTA_DELETE, false)), false); } @Test @@ -527,7 +559,8 @@ public void testSortOrderProperty() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of(ORDERING_PROPERTY, ImmutableList.of("orderdate", "custkey"))); + ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of(ORDERING_PROPERTY, + ImmutableList.of("orderdate", "custkey"), TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, ordersTable, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -554,7 +587,8 @@ public void testTemporalColumn() { assertNull(metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS)); - ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "orderdate")); + ConnectorTableMetadata ordersTable = getOrdersTable(ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "orderdate", + TABLE_SUPPORTS_DELTA_DELETE, false)); metadata.createTable(SESSION, ordersTable, false); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); @@ -837,7 +871,7 @@ private Long getTableDistributionId(long tableId) private static ConnectorTableMetadata getOrdersTable() { - return getOrdersTable(ImmutableMap.of()); + return getOrdersTable(ImmutableMap.of(TABLE_SUPPORTS_DELTA_DELETE, false)); } private static ConnectorTableMetadata getOrdersTable(Map properties) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorSplitManager.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorSplitManager.java index 4693f0e2cd924..c4464f7fb3ea9 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorSplitManager.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorSplitManager.java @@ -20,6 +20,7 @@ import com.facebook.presto.raptor.RaptorColumnHandle; import com.facebook.presto.raptor.RaptorConnectorId; import com.facebook.presto.raptor.RaptorMetadata; +import com.facebook.presto.raptor.RaptorSplit; import com.facebook.presto.raptor.RaptorSplitManager; import com.facebook.presto.raptor.RaptorTableHandle; import com.facebook.presto.raptor.RaptorTableLayoutHandle; @@ -56,6 +57,7 @@ import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; import static com.facebook.airlift.testing.Assertions.assertInstanceOf; +import static com.facebook.presto.raptor.RaptorTableProperties.TABLE_SUPPORTS_DELTA_DELETE; import static com.facebook.presto.raptor.metadata.DatabaseShardManager.shardIndexTable; import static com.facebook.presto.raptor.metadata.SchemaDaoUtil.createTablesWithRetry; import static com.facebook.presto.raptor.metadata.TestDatabaseShardManager.shardInfo; @@ -80,6 +82,7 @@ public class TestRaptorSplitManager .column("ds", createVarcharType(10)) .column("foo", createVarcharType(10)) .column("bar", BigintType.BIGINT) + .property(TABLE_SUPPORTS_DELTA_DELETE, false) .build(); private Handle dummyHandle; @@ -153,7 +156,11 @@ public void testSanity() ConnectorSplitSource splitSource = getSplits(raptorSplitManager, layout); int splitCount = 0; while (!splitSource.isFinished()) { - splitCount += getSplits(splitSource, 1000).size(); + List splits = getSplits(splitSource, 1000); + splitCount += splits.size(); + RaptorSplit split = (RaptorSplit) (splits.get(0)); + assertEquals(split.isTableSupportsDeltaDelete(), false); + assertEquals(split.getColumnTypes(), Optional.empty()); } assertEquals(splitCount, 4); } diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardCleaner.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardCleaner.java index ce3868114521d..d3e501c333f38 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardCleaner.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardCleaner.java @@ -197,7 +197,7 @@ public void testCleanLocalShardsImmediately() TestingShardDao shardDao = dbi.onDemand(TestingShardDao.class); MetadataDao metadataDao = dbi.onDemand(MetadataDao.class); - long tableId = metadataDao.insertTable("test", "test", false, false, null, 0); + long tableId = metadataDao.insertTable("test", "test", false, false, null, 0, false); UUID shard1 = randomUUID(); UUID shard2 = randomUUID(); @@ -243,7 +243,7 @@ public void testCleanLocalShards() TestingShardDao shardDao = dbi.onDemand(TestingShardDao.class); MetadataDao metadataDao = dbi.onDemand(MetadataDao.class); - long tableId = metadataDao.insertTable("test", "test", false, false, null, 0); + long tableId = metadataDao.insertTable("test", "test", false, false, null, 0, false); UUID shard1 = randomUUID(); UUID shard2 = randomUUID(); diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardDao.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardDao.java index cc48df9c40404..e7cc247d95ef3 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardDao.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestShardDao.java @@ -24,6 +24,7 @@ import org.testng.annotations.Test; import java.sql.SQLException; +import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; import java.util.Set; @@ -167,8 +168,8 @@ public void testNodeShards() dao.insertBuckets(distributionId, ImmutableList.of(i), ImmutableList.of(nodeId)); } - long plainTableId = metadataDao.insertTable("test", "plain", false, false, null, 0); - long bucketedTableId = metadataDao.insertTable("test", "bucketed", false, false, distributionId, 0); + long plainTableId = metadataDao.insertTable("test", "plain", false, false, null, 0, false); + long bucketedTableId = metadataDao.insertTable("test", "bucketed", false, false, distributionId, 0, false); long shardId1 = dao.insertShard(shardUuid1, plainTableId, null, 1, 11, 111, 888_111); long shardId2 = dao.insertShard(shardUuid2, plainTableId, null, 2, 22, 222, 888_222); @@ -178,11 +179,11 @@ public void testNodeShards() OptionalInt noBucket = OptionalInt.empty(); OptionalLong noRange = OptionalLong.empty(); - ShardMetadata shard1 = new ShardMetadata(plainTableId, shardId1, shardUuid1, noBucket, 1, 11, 111, OptionalLong.of(888_111), noRange, noRange); - ShardMetadata shard2 = new ShardMetadata(plainTableId, shardId2, shardUuid2, noBucket, 2, 22, 222, OptionalLong.of(888_222), noRange, noRange); - ShardMetadata shard3 = new ShardMetadata(bucketedTableId, shardId3, shardUuid3, OptionalInt.of(8), 3, 33, 333, OptionalLong.of(888_333), noRange, noRange); - ShardMetadata shard4 = new ShardMetadata(bucketedTableId, shardId4, shardUuid4, OptionalInt.of(9), 4, 44, 444, OptionalLong.of(888_444), noRange, noRange); - ShardMetadata shard5 = new ShardMetadata(bucketedTableId, shardId5, shardUuid5, OptionalInt.of(7), 5, 55, 555, OptionalLong.of(888_555), noRange, noRange); + ShardMetadata shard1 = new ShardMetadata(plainTableId, shardId1, shardUuid1, false, Optional.empty(), noBucket, 1, 11, 111, OptionalLong.of(888_111), noRange, noRange); + ShardMetadata shard2 = new ShardMetadata(plainTableId, shardId2, shardUuid2, false, Optional.empty(), noBucket, 2, 22, 222, OptionalLong.of(888_222), noRange, noRange); + ShardMetadata shard3 = new ShardMetadata(bucketedTableId, shardId3, shardUuid3, false, Optional.empty(), OptionalInt.of(8), 3, 33, 333, OptionalLong.of(888_333), noRange, noRange); + ShardMetadata shard4 = new ShardMetadata(bucketedTableId, shardId4, shardUuid4, false, Optional.empty(), OptionalInt.of(9), 4, 44, 444, OptionalLong.of(888_444), noRange, noRange); + ShardMetadata shard5 = new ShardMetadata(bucketedTableId, shardId5, shardUuid5, false, Optional.empty(), OptionalInt.of(7), 5, 55, 555, OptionalLong.of(888_555), noRange, noRange); assertEquals(dao.getShards(plainTableId), ImmutableSet.of(shardUuid1, shardUuid2)); assertEquals(dao.getShards(bucketedTableId), ImmutableSet.of(shardUuid3, shardUuid4, shardUuid5)); @@ -278,7 +279,7 @@ public void testShardSelection() private long createTable(String name) { - return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0); + return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0, false); } private static void assertContainsShardNode(Set nodes, String nodeName, UUID shardUuid) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestBucketBalancer.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestBucketBalancer.java index 37a5da0a38d84..e08c95f283d10 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestBucketBalancer.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestBucketBalancer.java @@ -275,11 +275,11 @@ private long createBucketedTable(String tableName, long distributionId) private long createBucketedTable(String tableName, long distributionId, DataSize compressedSize) { MetadataDao dao = dbi.onDemand(MetadataDao.class); - long tableId = dao.insertTable("test", tableName, false, false, distributionId, 0); + long tableId = dao.insertTable("test", tableName, false, false, distributionId, 0, false); List columnsA = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columnsA, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columnsA, false, OptionalLong.empty()); - metadataDao.updateTableStats(tableId, 1024, 1024 * 1024 * 1024, compressedSize.toBytes(), compressedSize.toBytes() * 2); + metadataDao.updateTableStats(tableId, 1024, 0, 1024 * 1024 * 1024, compressedSize.toBytes(), compressedSize.toBytes() * 2); return tableId; } diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcFileRewriter.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcFileRewriter.java index 371f6ce66423a..e0257b9042434 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcFileRewriter.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcFileRewriter.java @@ -57,6 +57,7 @@ import java.util.BitSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.OptionalInt; import java.util.UUID; import java.util.stream.Collectors; @@ -532,6 +533,8 @@ public void testRewriterDropThenAddDifferentColumns() ConnectorPageSource source = storageManager.getPageSource( FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, + Optional.empty(), + false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), @@ -651,6 +654,8 @@ public void testRewriterDropThenAddSameColumns() ConnectorPageSource source = storageManager.getPageSource( FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, + Optional.empty(), + false, OptionalInt.empty(), ImmutableList.of(3L, 7L, 8L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcStorageManager.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcStorageManager.java index 1b9cc5966049e..500decb97bdd1 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcStorageManager.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestOrcStorageManager.java @@ -25,6 +25,7 @@ import com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment; import com.facebook.presto.raptor.filesystem.RaptorLocalFileSystem; import com.facebook.presto.raptor.metadata.ColumnStats; +import com.facebook.presto.raptor.metadata.ShardDeleteDelta; import com.facebook.presto.raptor.metadata.ShardDelta; import com.facebook.presto.raptor.metadata.ShardInfo; import com.facebook.presto.raptor.metadata.ShardManager; @@ -33,6 +34,7 @@ import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.NodeManager; import com.facebook.presto.spi.Page; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.predicate.NullableValue; import com.facebook.presto.spi.predicate.TupleDomain; @@ -353,14 +355,17 @@ public void testRewriter() // delete one row BitSet rowsToDelete = new BitSet(); rowsToDelete.set(0); - Collection fragments = manager.rewriteShard( + InplaceShardRewriter shardRewriter = (InplaceShardRewriter) manager.createShardRewriter( + FileSystemContext.DEFAULT_RAPTOR_CONTEXT, fileSystem, + new StorageOrcFileTailSource(), transactionId, OptionalInt.empty(), shards.get(0).getShardUuid(), - IntStream.range(0, columnIds.size()).boxed().collect(Collectors.toMap(index -> String.valueOf(columnIds.get(index)), columnTypes::get)), - rowsToDelete); - + Optional.empty(), + false, + IntStream.range(0, columnIds.size()).boxed().collect(Collectors.toMap(index -> String.valueOf(columnIds.get(index)), columnTypes::get))); + Collection fragments = shardRewriter.rewriteShard(rowsToDelete); Slice shardDelta = Iterables.getOnlyElement(fragments); ShardDelta shardDeltas = jsonCodec(ShardDelta.class).fromJson(shardDelta.getBytes()); ShardInfo shardInfo = Iterables.getOnlyElement(shardDeltas.getNewShards()); @@ -380,6 +385,184 @@ public void testRewriter() assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid()); } + @Test + public void testWriteDeltaDelete() + throws Exception + { + FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(FileSystemContext.DEFAULT_RAPTOR_CONTEXT); + + // delete one row + BitSet rowsToDelete = new BitSet(); + rowsToDelete.set(0); + Collection fragments = deltaDelete(rowsToDelete, false); + + Slice shardDelta = Iterables.getOnlyElement(fragments); + ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes()); + ShardInfo shardInfo = shardDeltas.getNewDeltaDeleteShard().get(); + + // Check that output file (new delta file) has one row + assertEquals(shardInfo.getRowCount(), 1); + assertTrue(checkContent(fileSystem, shardInfo.getShardUuid(), rowsToDelete)); + + // Check that storage file is same as backup file + File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString()); + File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid()); + assertFileEquals(storageFile, backupFile); + + // Verify recorded shard + List recordedShards = shardRecorder.getShards(); + assertEquals(recordedShards.size(), 2); // original file + delta file + assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID); + assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid()); + } + + @Test + public void testWriteDeltaDeleteEmpty() + { + // delete zero row + BitSet rowsToDelete = new BitSet(); + Collection fragments = deltaDelete(rowsToDelete, false); + + assertEquals(ImmutableList.of(), fragments); + List recordedShards = shardRecorder.getShards(); + assertEquals(recordedShards.size(), 1); // no delta file + } + + @Test + public void testWriteDeltaDeleteAll() + { + // delete every row + BitSet rowsToDelete = new BitSet(); + rowsToDelete.set(0); + rowsToDelete.set(1); + rowsToDelete.set(2); + Collection fragments = deltaDelete(rowsToDelete, false); + + Slice shardDelta = Iterables.getOnlyElement(fragments); + ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes()); + assertEquals(shardDeltas.getNewDeltaDeleteShard(), Optional.empty()); + + // verify recorded shard + List recordedShards = shardRecorder.getShards(); + assertEquals(recordedShards.size(), 1); + } + + @Test + // rowsToDelete and rowsDeleted must be mutually exclusive + public void testWriteDeltaDeleteMerge() + throws Exception + { + FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(FileSystemContext.DEFAULT_RAPTOR_CONTEXT); + + BitSet rowsToDelete = new BitSet(); + rowsToDelete.set(0); + Collection fragments = deltaDelete(rowsToDelete, true); + + Slice shardDelta = Iterables.getOnlyElement(fragments); + ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes()); + ShardInfo shardInfo = shardDeltas.getNewDeltaDeleteShard().get(); + + // Check that output file (new delta file) has merged 2 rows + assertEquals(shardInfo.getRowCount(), 2); + assertTrue(checkContent(fileSystem, shardInfo.getShardUuid(), rowsToDelete)); + + // Check that storage file is same as backup file + File storageFile = new File(storageService.getStorageFile(shardInfo.getShardUuid()).toString()); + File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid()); + assertFileEquals(storageFile, backupFile); + + // Verify recorded shard + List recordedShards = shardRecorder.getShards(); + assertEquals(recordedShards.size(), 3); // original file + old delta + new delta + assertEquals(recordedShards.get(2).getTransactionId(), TRANSACTION_ID); + assertEquals(recordedShards.get(2).getShardUuid(), shardInfo.getShardUuid()); + } + + @Test + public void testWriteDeltaDeleteMergeAll() + { + // delete every row + BitSet rowsToDelete = new BitSet(); + rowsToDelete.set(0); + rowsToDelete.set(1); + Collection fragments = deltaDelete(rowsToDelete, true); + + Slice shardDelta = Iterables.getOnlyElement(fragments); + ShardDeleteDelta shardDeltas = jsonCodec(ShardDeleteDelta.class).fromJson(shardDelta.getBytes()); + assertEquals(shardDeltas.getNewDeltaDeleteShard(), Optional.empty()); + + // verify recorded shard + List recordedShards = shardRecorder.getShards(); + assertEquals(recordedShards.size(), 2); // original file + old delta + } + + @Test(expectedExceptions = PrestoException.class) + public void testWriteDeltaDeleteMergeConflict() + { + // delete same row + BitSet rowsToDelete = new BitSet(); + rowsToDelete.set(2); + Collection fragments = deltaDelete(rowsToDelete, true); + deltaDelete(rowsToDelete, true); + } + + private Collection deltaDelete(BitSet rowsToDelete, boolean oldDeltaDeleteExist) + { + OrcStorageManager manager = createOrcStorageManager(); + FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(FileSystemContext.DEFAULT_RAPTOR_CONTEXT); + + List columnIds = ImmutableList.of(3L, 7L); + List columnTypes = ImmutableList.of(BIGINT, createVarcharType(10)); + + // create file with 3 rows + StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes); + List pages = rowPagesBuilder(columnTypes) + .row(123L, "hello") + .row(456L, "bye") + .row(456L, "test") + .build(); + sink.appendPages(pages); + List shards = getFutureValue(sink.commit()); + assertEquals(shardRecorder.getShards().size(), 1); + + List oldDeltaDeleteShards = null; + if (oldDeltaDeleteExist) { + // create oldDeltaDeleteExist with 1 row + List deltaColumnIds = ImmutableList.of(0L); + List deltaColumnTypes = ImmutableList.of(BIGINT); + StoragePageSink deltaSink = createStoragePageSink(manager, deltaColumnIds, deltaColumnTypes); + List deltaPages = rowPagesBuilder(deltaColumnTypes) + .row(2L) + .build(); + deltaSink.appendPages(deltaPages); + oldDeltaDeleteShards = getFutureValue(deltaSink.commit()); + } + + // delta delete + DeltaShardRewriter shardRewriter = (DeltaShardRewriter) manager.createShardRewriter( + FileSystemContext.DEFAULT_RAPTOR_CONTEXT, + fileSystem, + new StorageOrcFileTailSource(), + TRANSACTION_ID, + OptionalInt.empty(), + shards.get(0).getShardUuid(), + oldDeltaDeleteExist ? Optional.of(oldDeltaDeleteShards.get(0).getShardUuid()) : Optional.empty(), + true, + null); + Collection fragments = shardRewriter.writeDeltaDeleteFile(rowsToDelete); + return fragments; + } + + private boolean checkContent(FileSystem fileSystem, UUID shardUuid, BitSet rowsToDelete) + { + OrcStorageManager manager = createOrcStorageManager(); + Optional rows = manager.getRowsFromUuid(fileSystem, Optional.of(shardUuid)); + if (rows.isPresent()) { + return rows.get().equals(rowsToDelete); + } + return false; + } + public void testWriterRollback() { // verify staging directory is empty @@ -575,7 +758,7 @@ private static ConnectorPageSource getPageSource( UUID uuid, TupleDomain tupleDomain) { - return manager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, OptionalInt.empty(), columnIds, columnTypes, tupleDomain, READER_ATTRIBUTES); + return manager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), columnIds, columnTypes, tupleDomain, READER_ATTRIBUTES); } private static StoragePageSink createStoragePageSink(StorageManager manager, List columnIds, List columnTypes) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestShardEjector.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestShardEjector.java index 24d2b068a7921..43c5258c9c194 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestShardEjector.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestShardEjector.java @@ -130,7 +130,7 @@ public void testEjector() long tableId = createTable("test"); List columns = ImmutableList.of(new ColumnInfo(1, BIGINT)); - shardManager.createTable(tableId, columns, false, OptionalLong.empty()); + shardManager.createTable(tableId, false, columns, false, OptionalLong.empty()); long transactionId = shardManager.beginTransaction(); shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0); @@ -143,7 +143,7 @@ public void testEjector() ejector.process(); - shardManager.getShardNodes(tableId, TupleDomain.all()); + shardManager.getShardNodes(tableId, false, TupleDomain.all()); Set ejectedShards = shards.subList(0, 4).stream() .map(ShardInfo::getShardUuid) @@ -176,7 +176,7 @@ public void testEjector() private long createTable(String name) { - return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0); + return dbi.onDemand(MetadataDao.class).insertTable("test", name, false, false, null, 0, false); } private static Set uuids(Set metadata) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestStorageManagerConfig.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestStorageManagerConfig.java index 97944fd8d92f0..bf5e2a872b20f 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestStorageManagerConfig.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/TestStorageManagerConfig.java @@ -65,6 +65,7 @@ public void testDefaults() .setOrganizationThreads(5) .setCompactionEnabled(true) .setOrganizationEnabled(true) + .setTableSupportsDeltaDelete(false) .setOrganizationInterval(new Duration(7, DAYS)) .setOrganizationDiscoveryInterval(new Duration(6, HOURS)) .setMaxShardRows(1_000_000) @@ -98,6 +99,7 @@ public void testExplicitPropertyMappings() .put("storage.organization-enabled", "false") .put("storage.organization-interval", "4h") .put("storage.organization-discovery-interval", "2h") + .put("storage.table-supports-delta-delete-default", "true") .put("storage.ejector-interval", "9h") .put("storage.max-recovery-threads", "12") .put("storage.max-organization-threads", "12") @@ -126,6 +128,7 @@ public void testExplicitPropertyMappings() .setCompactionEnabled(false) .setCompactionInterval(new Duration(4, HOURS)) .setOrganizationEnabled(false) + .setTableSupportsDeltaDelete(true) .setOrganizationInterval(new Duration(4, HOURS)) .setOrganizationDiscoveryInterval(new Duration(2, HOURS)) .setShardEjectorInterval(new Duration(9, HOURS)) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestCompactionSetCreator.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestCompactionSetCreator.java index 0ec4924562d55..d7e82bc28dd09 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestCompactionSetCreator.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestCompactionSetCreator.java @@ -16,13 +16,15 @@ import com.facebook.presto.raptor.metadata.Table; import com.facebook.presto.spi.type.Type; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airlift.units.DataSize; import org.testng.annotations.Test; import java.time.Duration; -import java.util.HashSet; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; @@ -40,10 +42,10 @@ public class TestCompactionSetCreator { private static final long MAX_SHARD_ROWS = 100; private static final DataSize MAX_SHARD_SIZE = new DataSize(100, DataSize.Unit.BYTE); - private static final Table tableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.empty(), false); - private static final Table temporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.of(1), false); - private static final Table bucketedTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.of(3), OptionalLong.empty(), false); - private static final Table bucketedTemporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.of(3), OptionalLong.of(1), false); + private static final Table tableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.empty(), false, false); + private static final Table temporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.of(1), false, false); + private static final Table bucketedTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.of(3), OptionalLong.empty(), false, false); + private static final Table bucketedTemporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.of(3), OptionalLong.of(1), false, false); private final CompactionSetCreator compactionSetCreator = new CompactionSetCreator(new TemporalFunction(UTC), MAX_SHARD_SIZE, MAX_SHARD_ROWS); @@ -57,7 +59,7 @@ public void testNonTemporalOrganizationSetSimple() Set compactionSets = compactionSetCreator.createCompactionSets(tableInfo, inputShards); assertEquals(compactionSets.size(), 1); - assertEquals(getOnlyElement(compactionSets).getShards(), extractIndexes(inputShards, 0, 1, 2)); + assertEquals(getOnlyElement(compactionSets).getShardsMap(), extractIndexes(inputShards, 0, 1, 2)); } @Test @@ -71,11 +73,11 @@ public void testNonTemporalSizeBasedOrganizationSet() Set compactionSets = compactionSetCreator.createCompactionSets(tableInfo, inputShards); - Set actual = new HashSet<>(); + Map> actual = new HashMap<>(); for (OrganizationSet set : compactionSets) { - actual.addAll(set.getShards()); + actual.putAll(set.getShardsMap()); } - assertTrue(extractIndexes(inputShards, 0, 1, 2).containsAll(actual)); + assertTrue(extractIndexes(inputShards, 0, 1, 2).keySet().containsAll(actual.keySet())); } @Test @@ -89,12 +91,12 @@ public void testNonTemporalRowCountBasedOrganizationSet() Set compactionSets = compactionSetCreator.createCompactionSets(tableInfo, inputShards); - Set actual = new HashSet<>(); + Map> actual = new HashMap<>(); for (OrganizationSet set : compactionSets) { - actual.addAll(set.getShards()); + actual.putAll(set.getShardsMap()); } - assertTrue(extractIndexes(inputShards, 0, 2, 3).containsAll(actual)); + assertTrue(extractIndexes(inputShards, 0, 2, 3).keySet().containsAll(actual.keySet())); } @Test @@ -115,8 +117,8 @@ public void testTemporalCompactionNoCompactionAcrossDays() assertEquals(actual.size(), 2); Set expected = ImmutableSet.of( - new OrganizationSet(temporalTableInfo.getTableId(), extractIndexes(inputShards, 0, 3), OptionalInt.empty()), - new OrganizationSet(temporalTableInfo.getTableId(), extractIndexes(inputShards, 1, 2), OptionalInt.empty())); + new OrganizationSet(temporalTableInfo.getTableId(), false, extractIndexes(inputShards, 0, 3), OptionalInt.empty()), + new OrganizationSet(temporalTableInfo.getTableId(), false, extractIndexes(inputShards, 1, 2), OptionalInt.empty())); assertEquals(actual, expected); } @@ -143,8 +145,8 @@ public void testTemporalCompactionSpanningDays() assertEquals(compactionSets.size(), 2); Set expected = ImmutableSet.of( - new OrganizationSet(tableId, extractIndexes(inputShards, 0, 1, 5, 6), OptionalInt.empty()), - new OrganizationSet(tableId, extractIndexes(inputShards, 2, 3, 4), OptionalInt.empty())); + new OrganizationSet(tableId, false, extractIndexes(inputShards, 0, 1, 5, 6), OptionalInt.empty()), + new OrganizationSet(tableId, false, extractIndexes(inputShards, 2, 3, 4), OptionalInt.empty())); assertEquals(compactionSets, expected); } @@ -169,8 +171,8 @@ public void testTemporalCompactionDate() assertEquals(actual.size(), 2); Set expected = ImmutableSet.of( - new OrganizationSet(tableId, extractIndexes(inputShards, 0, 3, 5), OptionalInt.empty()), - new OrganizationSet(tableId, extractIndexes(inputShards, 1, 4), OptionalInt.empty())); + new OrganizationSet(tableId, false, extractIndexes(inputShards, 0, 3, 5), OptionalInt.empty()), + new OrganizationSet(tableId, false, extractIndexes(inputShards, 1, 4), OptionalInt.empty())); assertEquals(actual, expected); } @@ -191,16 +193,16 @@ public void testBucketedTableCompaction() assertEquals(actual.size(), 2); Set expected = ImmutableSet.of( - new OrganizationSet(tableId, extractIndexes(inputShards, 0, 3, 5), OptionalInt.of(1)), - new OrganizationSet(tableId, extractIndexes(inputShards, 1, 2, 4), OptionalInt.of(2))); + new OrganizationSet(tableId, false, extractIndexes(inputShards, 0, 3, 5), OptionalInt.of(1)), + new OrganizationSet(tableId, false, extractIndexes(inputShards, 1, 2, 4), OptionalInt.of(2))); assertEquals(actual, expected); } - static Set extractIndexes(List inputShards, int... indexes) + static Map> extractIndexes(List inputShards, int... indexes) { - ImmutableSet.Builder builder = ImmutableSet.builder(); + ImmutableMap.Builder> builder = ImmutableMap.builder(); for (int index : indexes) { - builder.add(inputShards.get(index).getShardUuid()); + builder.put(inputShards.get(index).getShardUuid(), inputShards.get(index).getDeltaUuid()); } return builder.build(); } @@ -227,8 +229,8 @@ public void testBucketedTemporalTableCompaction() assertEquals(actual.size(), 2); Set expected = ImmutableSet.of( - new OrganizationSet(tableId, extractIndexes(inputShards, 0, 2), OptionalInt.of(1)), - new OrganizationSet(tableId, extractIndexes(inputShards, 1, 3), OptionalInt.of(2))); + new OrganizationSet(tableId, false, extractIndexes(inputShards, 0, 2), OptionalInt.of(1)), + new OrganizationSet(tableId, false, extractIndexes(inputShards, 1, 3), OptionalInt.of(2))); assertEquals(actual, expected); } @@ -238,6 +240,8 @@ private static ShardIndexInfo shardWithSize(long rows, long size) 1, OptionalInt.empty(), UUID.randomUUID(), + false, + Optional.empty(), rows, size, Optional.empty(), @@ -255,6 +259,8 @@ private static ShardIndexInfo shardWithBucket(int bucketNumber) 1, OptionalInt.of(bucketNumber), UUID.randomUUID(), + false, + Optional.empty(), 1, 1, Optional.empty(), @@ -268,6 +274,8 @@ private static ShardIndexInfo shardWithTemporalBucket(OptionalInt bucketNumber, 1, bucketNumber, UUID.randomUUID(), + false, + Optional.empty(), 1, 1, Optional.empty(), @@ -277,6 +285,8 @@ private static ShardIndexInfo shardWithTemporalBucket(OptionalInt bucketNumber, 1, bucketNumber, UUID.randomUUID(), + false, + Optional.empty(), 1, 1, Optional.empty(), diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardCompactor.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardCompactor.java index 164bdeebabc17..ebf1209da049d 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardCompactor.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardCompactor.java @@ -32,6 +32,7 @@ import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.MaterializedRow; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; @@ -43,12 +44,16 @@ import java.io.File; import java.io.IOException; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.UUID; import static com.facebook.airlift.concurrent.MoreFutures.getFutureValue; +import static com.facebook.presto.RowPagesBuilder.rowPagesBuilder; import static com.facebook.presto.raptor.storage.TestOrcStorageManager.createOrcStorageManager; import static com.facebook.presto.spi.block.SortOrder.ASC_NULLS_FIRST; import static com.facebook.presto.spi.type.BigintType.BIGINT; @@ -119,7 +124,10 @@ public void testShardCompactor(boolean useOptimizedOrcWriter) .sum(); long expectedOutputShards = computeExpectedOutputShards(totalRows); - Set inputUuids = inputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); + Map> inputUuids = new HashMap<>(); + for (ShardInfo shardInfo : inputShards) { + inputUuids.put(shardInfo.getShardUuid(), Optional.empty()); + } long transactionId = 1; ShardCompactor compactor = new ShardCompactor(storageManager, READER_ATTRIBUTES); @@ -127,7 +135,47 @@ public void testShardCompactor(boolean useOptimizedOrcWriter) assertEquals(outputShards.size(), expectedOutputShards); Set outputUuids = outputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); - assertShardEqualsIgnoreOrder(storageManager, inputUuids, outputUuids, columnIds, columnTypes); + assertShardEqualsIgnoreOrder(storageManager, inputUuids.keySet(), outputUuids, columnIds, columnTypes); + } + + @Test + public void testShardCompactorWithDelta() + throws Exception + { + StorageManager storageManager = createOrcStorageManager(dbi, temporary, MAX_SHARD_ROWS); + List columnIds = ImmutableList.of(3L, 7L, 2L, 1L, 5L); + List columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE, DATE, TIMESTAMP); + + List inputShards = createShards(storageManager, columnIds, columnTypes, 3); + assertEquals(inputShards.size(), 3); + + List deltaColumnIds = ImmutableList.of(1L); + List deltaColumnTypes = ImmutableList.of(BIGINT); + StoragePageSink deltaSink = createStoragePageSink(storageManager, deltaColumnIds, deltaColumnTypes); + List deltaPages = rowPagesBuilder(deltaColumnTypes) + .row(1L) + .row(2L) + .build(); + deltaSink.appendPages(deltaPages); + List deltaShards = getFutureValue(deltaSink.commit()); + + long totalRows = inputShards.stream() + .mapToLong(ShardInfo::getRowCount) + .sum(); + long expectedOutputShardsCount = computeExpectedOutputShards(totalRows - 2); + + Map> inputUuidsMap = new HashMap<>(); + inputUuidsMap.put(inputShards.get(0).getShardUuid(), Optional.of(deltaShards.get(0).getShardUuid())); + inputUuidsMap.put(inputShards.get(1).getShardUuid(), Optional.empty()); + inputUuidsMap.put(inputShards.get(2).getShardUuid(), Optional.empty()); + + long transactionId = 1; + ShardCompactor compactor = new ShardCompactor(storageManager, READER_ATTRIBUTES); + List outputShards = compactor.compact(transactionId, OptionalInt.empty(), inputUuidsMap, getColumnInfo(columnIds, columnTypes)); + assertEquals(outputShards.size(), expectedOutputShardsCount); + + Set outputUuids = outputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); + assertShardEqualsIgnoreOrder(storageManager, inputUuidsMap, outputUuids, columnIds, columnTypes); } @Test(dataProvider = "useOptimizedOrcWriter") @@ -149,7 +197,10 @@ public void testShardCompactorSorted(boolean useOptimizedOrcWriter) long totalRows = inputShards.stream().mapToLong(ShardInfo::getRowCount).sum(); long expectedOutputShards = computeExpectedOutputShards(totalRows); - Set inputUuids = inputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); + Map> inputUuids = new HashMap<>(); + for (ShardInfo shardInfo : inputShards) { + inputUuids.put(shardInfo.getShardUuid(), Optional.empty()); + } long transactionId = 1; ShardCompactor compactor = new ShardCompactor(storageManager, READER_ATTRIBUTES); @@ -159,7 +210,7 @@ public void testShardCompactorSorted(boolean useOptimizedOrcWriter) .collect(toList()); assertEquals(outputShards.size(), expectedOutputShards); - assertShardEqualsSorted(storageManager, inputUuids, outputUuids, columnIds, columnTypes, sortIndexes, sortOrders); + assertShardEqualsSorted(storageManager, inputUuids.keySet(), outputUuids, columnIds, columnTypes, sortIndexes, sortOrders); } private static long computeExpectedOutputShards(long totalRows) @@ -176,6 +227,15 @@ private void assertShardEqualsIgnoreOrder(StorageManager storageManager, Set> inputUuidsMap, Set outputUuids, List columnIds, List columnTypes) + throws IOException + { + MaterializedResult inputRows = getMaterializedRows(storageManager, ImmutableMap.copyOf(inputUuidsMap), columnIds, columnTypes); + MaterializedResult outputRows = getMaterializedRows(storageManager, ImmutableList.copyOf(outputUuids), columnIds, columnTypes); + + assertEqualsIgnoreOrder(outputRows, inputRows); + } + private void assertShardEqualsSorted(StorageManager storageManager, Set inputUuids, List outputUuids, List columnIds, List columnTypes, List sortIndexes, List sortOrders) throws IOException { @@ -236,7 +296,7 @@ private List getPages(StorageManager storageManager, Set uuids, List { ImmutableList.Builder pages = ImmutableList.builder(); for (UUID uuid : uuids) { - try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid)) { + try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid, Optional.empty(), false)) { while (!pageSource.isFinished()) { Page outputPage = pageSource.getNextPage(); if (outputPage == null) { @@ -254,7 +314,22 @@ private MaterializedResult getMaterializedRows(StorageManager storageManager, Li { MaterializedResult.Builder rows = MaterializedResult.resultBuilder(SESSION, columnTypes); for (UUID uuid : uuids) { - try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid)) { + try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid, Optional.empty(), false)) { + MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes); + rows.rows(result.getMaterializedRows()); + } + } + return rows.build(); + } + + private MaterializedResult getMaterializedRows(StorageManager storageManager, Map> uuidsMap, List columnIds, List columnTypes) + throws IOException + { + MaterializedResult.Builder rows = MaterializedResult.resultBuilder(SESSION, columnTypes); + for (Map.Entry> entry : uuidsMap.entrySet()) { + UUID uuid = entry.getKey(); + Optional deltaUuid = entry.getValue(); + try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid, deltaUuid, true)) { MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes); rows.rows(result.getMaterializedRows()); } @@ -262,9 +337,9 @@ private MaterializedResult getMaterializedRows(StorageManager storageManager, Li return rows.build(); } - private ConnectorPageSource getPageSource(StorageManager storageManager, List columnIds, List columnTypes, UUID uuid) + private ConnectorPageSource getPageSource(StorageManager storageManager, List columnIds, List columnTypes, UUID uuid, Optional deltaShardUuid, boolean tableSupportsDeltaDelete) { - return storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, OptionalInt.empty(), columnIds, columnTypes, TupleDomain.all(), READER_ATTRIBUTES); + return storageManager.getPageSource(FileSystemContext.DEFAULT_RAPTOR_CONTEXT, uuid, deltaShardUuid, tableSupportsDeltaDelete, OptionalInt.empty(), columnIds, columnTypes, TupleDomain.all(), READER_ATTRIBUTES); } private static List createSortedShards(StorageManager storageManager, List columnIds, List columnTypes, List sortChannels, List sortOrders, int shardCount) diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizationManager.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizationManager.java index e65d4483e2752..202c7eb341593 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizationManager.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizationManager.java @@ -58,8 +58,8 @@ public class TestShardOrganizationManager private MetadataDao metadataDao; private ShardOrganizerDao organizerDao; - private static final Table tableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.empty(), true); - private static final Table temporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.of(1), true); + private static final Table tableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.empty(), true, false); + private static final Table temporalTableInfo = new Table(1L, OptionalLong.empty(), Optional.empty(), OptionalInt.empty(), OptionalLong.of(1), true, false); private static final List types = ImmutableList.of(BIGINT, VARCHAR, DATE, TIMESTAMP); private static final TemporalFunction TEMPORAL_FUNCTION = new TemporalFunction(UTC); @@ -84,11 +84,11 @@ public void teardown() @Test public void testOrganizationEligibleTables() { - long table1 = metadataDao.insertTable("schema", "table1", false, true, null, 0); + long table1 = metadataDao.insertTable("schema", "table1", false, true, null, 0, false); metadataDao.insertColumn(table1, 1, "foo", 1, "bigint", 1, null); - metadataDao.insertTable("schema", "table2", false, true, null, 0); - metadataDao.insertTable("schema", "table3", false, false, null, 0); + metadataDao.insertTable("schema", "table2", false, true, null, 0, false); + metadataDao.insertTable("schema", "table3", false, false, null, 0, false); assertEquals(metadataDao.getOrganizationEligibleTables(), ImmutableSet.of(table1)); } @@ -96,13 +96,13 @@ public void testOrganizationEligibleTables() public void testTableDiscovery() throws Exception { - long table1 = metadataDao.insertTable("schema", "table1", false, true, null, 0); + long table1 = metadataDao.insertTable("schema", "table1", false, true, null, 0, false); metadataDao.insertColumn(table1, 1, "foo", 1, "bigint", 1, null); - long table2 = metadataDao.insertTable("schema", "table2", false, true, null, 0); + long table2 = metadataDao.insertTable("schema", "table2", false, true, null, 0, false); metadataDao.insertColumn(table2, 1, "foo", 1, "bigint", 1, null); - metadataDao.insertTable("schema", "table3", false, false, null, 0); + metadataDao.insertTable("schema", "table3", false, false, null, 0, false); long intervalMillis = 100; ShardOrganizationManager organizationManager = createShardOrganizationManager(intervalMillis); @@ -140,7 +140,7 @@ public void testSimple() assertEquals(actual.size(), 1); // Shards 0, 1 and 2 are overlapping, so we should get an organization set with these shards - assertEquals(getOnlyElement(actual).getShards(), extractIndexes(shards, 0, 1, 2)); + assertEquals(getOnlyElement(actual).getShardsMap(), extractIndexes(shards, 0, 1, 2)); } @Test @@ -167,7 +167,7 @@ public void testSimpleTemporal() // expect 2 organization sets, of overlapping shards (0, 2) and (1, 3) assertEquals(organizationSets.size(), 2); - assertEquals(actual, ImmutableSet.of(extractIndexes(shards, 0, 2), extractIndexes(shards, 1, 3))); + assertEquals(actual, ImmutableSet.of(extractIndexes(shards, 0, 2).keySet(), extractIndexes(shards, 1, 3).keySet())); } private static ShardIndexInfo shardWithSortRange(int bucketNumber, ShardRange sortRange) @@ -176,6 +176,8 @@ private static ShardIndexInfo shardWithSortRange(int bucketNumber, ShardRange so 1, OptionalInt.of(bucketNumber), UUID.randomUUID(), + false, + Optional.empty(), 1, 1, Optional.of(sortRange), @@ -188,6 +190,8 @@ private static ShardIndexInfo shardWithTemporalRange(int bucketNumber, ShardRang 1, OptionalInt.of(bucketNumber), UUID.randomUUID(), + false, + Optional.empty(), 1, 1, Optional.of(sortRange), diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizer.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizer.java index 43126d6f3ee4f..496b9188de091 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizer.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizer.java @@ -13,11 +13,12 @@ */ package com.facebook.presto.raptor.storage.organization; -import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableMap; import org.testng.annotations.Test; +import java.util.Map; +import java.util.Optional; import java.util.OptionalInt; -import java.util.Set; import java.util.UUID; import static com.google.common.collect.Iterables.getOnlyElement; @@ -35,18 +36,18 @@ public void testShardOrganizerInProgress() { ShardOrganizer organizer = createShardOrganizer(); - Set shards = ImmutableSet.of(UUID.randomUUID()); - OrganizationSet organizationSet = new OrganizationSet(1L, shards, OptionalInt.empty()); + Map> shards = ImmutableMap.of(UUID.randomUUID(), Optional.empty()); + OrganizationSet organizationSet = new OrganizationSet(1L, false, shards, OptionalInt.empty()); organizer.enqueue(organizationSet); - assertTrue(organizer.inProgress(getOnlyElement(shards))); + assertTrue(organizer.inProgress(getOnlyElement(shards.keySet()))); assertEquals(organizer.getShardsInProgress(), 1); - while (organizer.inProgress(getOnlyElement(shards))) { + while (organizer.inProgress(getOnlyElement(shards.keySet()))) { MILLISECONDS.sleep(10); } - assertFalse(organizer.inProgress(getOnlyElement(shards))); + assertFalse(organizer.inProgress(getOnlyElement(shards.keySet()))); assertEquals(organizer.getShardsInProgress(), 0); organizer.shutdown(); } diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizerUtil.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizerUtil.java index 55d4be526b094..3676f6706fe38 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizerUtil.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/storage/organization/TestShardOrganizerUtil.java @@ -110,6 +110,7 @@ public void testGetOrganizationEligibleShards() .column("orderstatus", createVarcharType(3)) .property("ordering", ImmutableList.of("orderstatus", "orderkey")) .property("temporal_column", "orderdate") + .property("table_supports_delta_delete", false) .build(), false); Table tableInfo = metadataDao.getTableInformation(tableName.getSchemaName(), tableName.getTableName()); @@ -226,6 +227,8 @@ private static List getShardIndexInfo(Table tableInfo, List tupleDomain = TupleDomain.withColumnDomains( @@ -183,11 +187,13 @@ public void testNoTableFilter() // Create "orders" table in a different schema createTable(tableMetadataBuilder(new SchemaTableName("test", "orders2")) .column("orderkey", BIGINT) + .property(TABLE_SUPPORTS_DELTA_DELETE, false) .build()); // Create another table that should not be selected createTable(tableMetadataBuilder(new SchemaTableName("schema1", "foo")) .column("orderkey", BIGINT) + .property(TABLE_SUPPORTS_DELTA_DELETE, false) .build()); TupleDomain tupleDomain = TupleDomain.withColumnDomains(