diff --git a/Dockerfile-native b/Dockerfile-native new file mode 100644 index 0000000000000..22d42de070717 --- /dev/null +++ b/Dockerfile-native @@ -0,0 +1,52 @@ +ARG PRESTO_VERSION + +FROM prestocpp/prestocpp-avx-centos:root-20230613 as Builder + +WORKDIR /app +COPY . . +RUN cd presto-native-execution && \ + make velox-submodule && \ + source /opt/rh/gcc-toolset-9/enable && \ + source velox/scripts/setup-helper-functions.sh && \ + (mkdir -p third_party && cd third_party && github_checkout aws/aws-sdk-cpp 1.9.96 --depth 1 --recurse-submodules --config advice.detachedHead=false && \ + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS:BOOL=OFF -DMINIMIZE_SIZE:BOOL=ON -DENABLE_TESTING:BOOL=OFF -DBUILD_ONLY:STRING="s3;identity-management") && \ + EXTRA_CMAKE_FLAGS=" -DVELOX_ENABLE_INT64_BUILD_PARTITION_BOUND=ON" PRESTO_ENABLE_PARQUET=ON PRESTO_ENABLE_S3=ON PRESTO_ENABLE_TESTING=OFF MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=8 NUM_THREADS=8 TREAT_WARNINGS_AS_ERRORS=0 make release \ + || tail -n 500 _build/release/CMakeFiles/CMakeError.log + +RUN cd presto-native-execution && \ + mkdir -p prestissimo && \ + cp _build/release/presto_cpp/main/presto_server prestissimo && \ + tar cvf prestissimo.tar entrypoint.sh velox.properties prestissimo + +## + +FROM prestocpp/prestocpp-avx-centos:root-20230613 +ENV PRESTO_HOME="/opt/presto-server" + +RUN dnf update -y && dnf install -y \ + awscli \ + gperf \ + iproute \ + lsof \ + procps \ + python3 \ + sysstat \ + tar \ + vim \ + wget \ + which \ + && \ + mkdir -p $PRESTO_HOME/etc/catalog && \ + mkdir -p /var/lib/presto/data + +WORKDIR /app +COPY --from=Builder /app/presto-native-execution/prestissimo.tar . +RUN tar xvf prestissimo.tar && \ + mkdir -p /opt/presto-server/etc && \ + mv prestissimo/presto_server /usr/local/bin/ && \ + mv velox.properties /opt/presto-server/etc/ && \ + mv entrypoint.sh /opt/ && \ + touch /opt/presto-native-execution-${PRESTO_VERSION} + +ENTRYPOINT ["/opt/entrypoint.sh"] + diff --git a/Dockerfile-native-debug b/Dockerfile-native-debug new file mode 100644 index 0000000000000..6dc86b2679a07 --- /dev/null +++ b/Dockerfile-native-debug @@ -0,0 +1,52 @@ +ARG PRESTO_VERSION + +FROM prestocpp/prestocpp-avx-centos:root-20230613 as Builder + +WORKDIR /app +COPY . . +RUN cd presto-native-execution && \ + make velox-submodule && \ + source /opt/rh/gcc-toolset-9/enable && \ + source velox/scripts/setup-helper-functions.sh && \ + (mkdir -p third_party && cd third_party && github_checkout aws/aws-sdk-cpp 1.9.96 --depth 1 --recurse-submodules --config advice.detachedHead=false && \ + cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS:BOOL=OFF -DMINIMIZE_SIZE:BOOL=ON -DENABLE_TESTING:BOOL=OFF -DBUILD_ONLY:STRING="s3;identity-management") && \ + EXTRA_CMAKE_FLAGS=" -DVELOX_ENABLE_INT64_BUILD_PARTITION_BOUND=ON" PRESTO_ENABLE_PARQUET=ON PRESTO_ENABLE_S3=ON PRESTO_ENABLE_TESTING=OFF MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4 NUM_THREADS=2 TREAT_WARNINGS_AS_ERRORS=0 make debug \ + || tail -n 500 _build/debug/CMakeFiles/CMakeError.log + +RUN cd presto-native-execution && \ + mkdir -p prestissimo && \ + cp _build/debug/presto_cpp/main/presto_server prestissimo && \ + tar cvf prestissimo.tar entrypoint.sh velox.properties prestissimo + +## + +FROM prestocpp/prestocpp-avx-centos:root-20230613 +ENV PRESTO_HOME="/opt/presto-server" + +RUN dnf update -y && dnf install -y \ + awscli \ + gperf \ + iproute \ + lsof \ + procps \ + python3 \ + sysstat \ + tar \ + vim \ + wget \ + which \ + && \ + mkdir -p $PRESTO_HOME/etc/catalog && \ + mkdir -p /var/lib/presto/data + +WORKDIR /app +COPY --from=Builder /app/presto-native-execution/prestissimo.tar . +RUN tar xvf prestissimo.tar && \ + mkdir -p /opt/presto-server/etc && \ + mv prestissimo/presto_server /usr/local/bin/ && \ + mv velox.properties /opt/presto-server/etc/ && \ + mv entrypoint.sh /opt/ && \ + touch /opt/presto-native-execution-${PRESTO_VERSION} + +ENTRYPOINT ["/opt/entrypoint.sh"] + diff --git a/Jenkinsfile b/Jenkinsfile index 0a7fa55f65060..6162b0d923116 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -93,6 +93,7 @@ pipeline { sh(script: "git show -s --format=%cd --date=format:'%Y%m%d%H%M%S'", returnStdout: true).trim() + "-" + env.PRESTO_COMMIT_SHA.substring(0, 7) env.DOCKER_IMAGE = env.AWS_ECR + "/presto:${PRESTO_BUILD_VERSION}" + env.DOCKER_NATIVE_IMAGE = env.AWS_ECR + "/presto-native:${PRESTO_BUILD_VERSION}" } sh 'printenv | sort' @@ -168,6 +169,22 @@ pipeline { } } + stage('Docker Native Build') { + steps { + echo "Building ${DOCKER_NATIVE_IMAGE}" + withCredentials([[ + $class: 'AmazonWebServicesCredentialsBinding', + credentialsId: "${AWS_CREDENTIAL_ID}", + accessKeyVariable: 'AWS_ACCESS_KEY_ID', + secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + sh '''#!/bin/bash -ex + docker buildx build -f Dockerfile-native --load --platform "linux/amd64" -t "${DOCKER_NATIVE_IMAGE}-amd64" \ + --build-arg "PRESTO_VERSION=${PRESTO_VERSION}" . + ''' + } + } + } + stage('Publish Docker') { when { anyOf { @@ -194,6 +211,7 @@ pipeline { docker manifest annotate "${DOCKER_IMAGE}" "${DOCKER_IMAGE}-amd64" --os linux --arch amd64 docker manifest annotate "${DOCKER_IMAGE}" "${DOCKER_IMAGE}-arm64" --os linux --arch arm64 docker manifest push "${DOCKER_IMAGE}" + docker push "${DOCKER_NATIVE_IMAGE}-amd64" ''' } } @@ -202,3 +220,4 @@ pipeline { } } } + diff --git a/jenkins/agent-dind.yaml b/jenkins/agent-dind.yaml index 6dc3a13b2936a..04514d5750a33 100644 --- a/jenkins/agent-dind.yaml +++ b/jenkins/agent-dind.yaml @@ -1,23 +1,23 @@ apiVersion: v1 kind: Pod metadata: - namespace: oss-agent - labels: - containers: dind + namespace: oss-agent + labels: + containers: dind spec: - nodeSelector: - eks.amazonaws.com/nodegroup: eks-oss-presto-dynamic-managed-ng - serviceAccountName: oss-agent - containers: - - name: dind - image: docker:20.10.16-dind-alpine3.15 - securityContext: - privileged: true - tty: true - resources: - requests: - memory: "4Gi" - cpu: "2000m" - limits: - memory: "4Gi" - cpu: "2000m" + nodeSelector: + eks.amazonaws.com/nodegroup: eks-oss-presto-dynamic-managed-ng + serviceAccountName: oss-agent + containers: + - name: dind + image: docker:20.10.16-dind-alpine3.15 + securityContext: + privileged: true + tty: true + resources: + requests: + memory: "24Gi" + cpu: "7000m" + limits: + memory: "24Gi" + cpu: "7000m" diff --git a/jenkins/agent-maven.yaml b/jenkins/agent-maven.yaml index 92651edaf2dde..720a8c74e575c 100644 --- a/jenkins/agent-maven.yaml +++ b/jenkins/agent-maven.yaml @@ -1,26 +1,27 @@ apiVersion: v1 kind: Pod metadata: - namespace: oss-agent - labels: - containers: maven + namespace: oss-agent + labels: + containers: maven spec: - nodeSelector: - eks.amazonaws.com/nodegroup: eks-oss-presto-dynamic-managed-ng - serviceAccountName: oss-agent - containers: - - name: maven - image: maven:3.8.6-openjdk-8-slim - env: - - name: MAVEN_OPTS - value: "-Xmx8000m -Xms8000m" - resources: - requests: - memory: "10Gi" - cpu: "4000m" - limits: - memory: "10Gi" - cpu: "4000m" - tty: true - command: - - cat + nodeSelector: + eks.amazonaws.com/nodegroup: eks-oss-presto-dynamic-managed-ng + serviceAccountName: oss-agent + containers: + - name: maven + image: maven:3.8.6-openjdk-8-slim + env: + - name: MAVEN_OPTS + value: "-Xmx8000m -Xms8000m" + resources: + requests: + memory: "10Gi" + cpu: "4000m" + limits: + memory: "10Gi" + cpu: "4000m" + tty: true + command: + - cat + diff --git a/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpcdsCostBasedPlan.java b/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpcdsCostBasedPlan.java index b66998e5509ed..f9d50fba02d60 100644 --- a/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpcdsCostBasedPlan.java +++ b/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpcdsCostBasedPlan.java @@ -24,6 +24,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static com.facebook.presto.SystemSessionProperties.GENERATE_DOMAIN_FILTERS; import static com.facebook.presto.SystemSessionProperties.HANDLE_COMPLEX_EQUI_JOINS; import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY; @@ -56,7 +57,8 @@ public TestTpcdsCostBasedPlan() .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.AUTOMATIC.name()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()) .setSystemProperty(OPTIMIZE_JOINS_WITH_EMPTY_SOURCES, "false") - .setSystemProperty(HANDLE_COMPLEX_EQUI_JOINS, "true"); + .setSystemProperty(HANDLE_COMPLEX_EQUI_JOINS, "true") + .setSystemProperty(GENERATE_DOMAIN_FILTERS, "true"); LocalQueryRunner queryRunner = LocalQueryRunner.queryRunnerWithFakeNodeCountForStats(sessionBuilder.build(), 8); queryRunner.createCatalog( diff --git a/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpchCostBasedPlan.java b/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpchCostBasedPlan.java index ecb20d8918652..729078bad848b 100644 --- a/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpchCostBasedPlan.java +++ b/presto-benchto-benchmarks/src/test/java/com/facebook/presto/sql/planner/TestTpchCostBasedPlan.java @@ -25,6 +25,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static com.facebook.presto.SystemSessionProperties.GENERATE_DOMAIN_FILTERS; import static com.facebook.presto.SystemSessionProperties.HANDLE_COMPLEX_EQUI_JOINS; import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE; import static com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY; @@ -56,7 +57,8 @@ public TestTpchCostBasedPlan() .setSystemProperty("task_concurrency", "1") // these tests don't handle exchanges from local parallel .setSystemProperty(JOIN_REORDERING_STRATEGY, JoinReorderingStrategy.AUTOMATIC.name()) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()) - .setSystemProperty(HANDLE_COMPLEX_EQUI_JOINS, "true"); + .setSystemProperty(HANDLE_COMPLEX_EQUI_JOINS, "true") + .setSystemProperty(GENERATE_DOMAIN_FILTERS, "true"); LocalQueryRunner queryRunner = LocalQueryRunner.queryRunnerWithFakeNodeCountForStats(sessionBuilder.build(), 8); queryRunner.createCatalog( diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q13.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q13.plan.txt index 6b3cceffe01c6..05d924028b1a5 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q13.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q13.plan.txt @@ -5,22 +5,21 @@ final aggregation over () join (INNER, REPLICATED): join (INNER, REPLICATED): join (INNER, REPLICATED): - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [ss_addr_sk]) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [ca_address_sk]) - scan customer_address + remote exchange (REPLICATE, BROADCAST, []) + scan customer_demographics local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan household_demographics local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan customer_demographics + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan store diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q48.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q48.plan.txt index 409a033a02359..d621b13f173af 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q48.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q48.plan.txt @@ -3,20 +3,19 @@ final aggregation over () remote exchange (GATHER, SINGLE, []) partial aggregation over () join (INNER, REPLICATED): - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [ss_addr_sk]) + join (INNER, REPLICATED): + join (INNER, REPLICATED): join (INNER, REPLICATED): - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_demographics + scan store_sales local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + scan customer_demographics + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [ca_address_sk]) - scan customer_address + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan store diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q69.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q69.plan.txt index 27e5ae2f2ad92..f9c0c37d03bcf 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q69.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q69.plan.txt @@ -4,41 +4,7 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, [cd_credit_rating, cd_education_status, cd_gender, cd_marital_status, cd_purchase_estimate]) partial aggregation over (cd_credit_rating, cd_education_status, cd_gender, cd_marital_status, cd_purchase_estimate) - join (LEFT, PARTITIONED): - join (RIGHT, PARTITIONED): - final aggregation over (ws_bill_customer_sk) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [ws_bill_customer_sk]) - partial aggregation over (ws_bill_customer_sk) - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [c_customer_sk]) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [cd_demo_sk]) - scan customer_demographics - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [c_current_cdemo_sk]) - join (INNER, PARTITIONED): - final aggregation over (ss_customer_sk) - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [ss_customer_sk]) - partial aggregation over (ss_customer_sk) - join (INNER, REPLICATED): - scan store_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim - local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [c_customer_sk]) - join (INNER, REPLICATED): - scan customer - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_address + join (RIGHT, PARTITIONED): final aggregation over (cs_ship_customer_sk) local exchange (GATHER, SINGLE, []) remote exchange (REPARTITION, HASH, [cs_ship_customer_sk]) @@ -48,3 +14,38 @@ local exchange (GATHER, SINGLE, []) local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan date_dim + local exchange (GATHER, SINGLE, []) + join (RIGHT, PARTITIONED): + final aggregation over (ws_bill_customer_sk) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, [ws_bill_customer_sk]) + partial aggregation over (ws_bill_customer_sk) + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, [c_customer_sk]) + join (INNER, PARTITIONED): + remote exchange (REPARTITION, HASH, [cd_demo_sk]) + scan customer_demographics + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, [c_current_cdemo_sk]) + join (INNER, PARTITIONED): + final aggregation over (ss_customer_sk) + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, [ss_customer_sk]) + partial aggregation over (ss_customer_sk) + join (INNER, REPLICATED): + scan store_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim + local exchange (GATHER, SINGLE, []) + remote exchange (REPARTITION, HASH, [c_customer_sk]) + join (INNER, REPLICATED): + scan customer + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_address diff --git a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q85.plan.txt b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q85.plan.txt index 3f5ed949fc2a2..4acdbba49c802 100644 --- a/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q85.plan.txt +++ b/presto-benchto-benchmarks/src/test/resources/sql/presto/tpcds/q85.plan.txt @@ -7,29 +7,28 @@ local exchange (GATHER, SINGLE, []) join (INNER, REPLICATED): join (INNER, REPLICATED): join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [cd_demo_sk_0, cd_education_status_3, cd_marital_status_2]) - scan customer_demographics + remote exchange (REPARTITION, HASH, [ca_address_sk]) + scan customer_address local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [cd_education_status, cd_marital_status, wr_returning_cdemo_sk]) + remote exchange (REPARTITION, HASH, [wr_refunded_addr_sk]) join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [wr_refunded_addr_sk]) - join (INNER, PARTITIONED): - remote exchange (REPARTITION, HASH, [ws_item_sk, ws_order_number]) - join (INNER, REPLICATED): - scan web_sales - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan date_dim + remote exchange (REPARTITION, HASH, [wr_item_sk, wr_order_number]) + join (INNER, REPLICATED): + join (INNER, REPLICATED): + scan web_returns + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan customer_demographics local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [wr_item_sk, wr_order_number]) - join (INNER, REPLICATED): - scan web_returns - local exchange (GATHER, SINGLE, []) - remote exchange (REPLICATE, BROADCAST, []) - scan customer_demographics + remote exchange (REPLICATE, BROADCAST, []) + scan customer_demographics local exchange (GATHER, SINGLE, []) - remote exchange (REPARTITION, HASH, [ca_address_sk]) - scan customer_address + remote exchange (REPARTITION, HASH, [ws_item_sk, ws_order_number]) + join (INNER, REPLICATED): + scan web_sales + local exchange (GATHER, SINGLE, []) + remote exchange (REPLICATE, BROADCAST, []) + scan date_dim local exchange (GATHER, SINGLE, []) remote exchange (REPLICATE, BROADCAST, []) scan web_page diff --git a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java index 9237ff568815d..ef44bd319700a 100644 --- a/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -296,6 +296,7 @@ public final class SystemSessionProperties public static final String ENABLE_HISTORY_BASED_SCALED_WRITER = "enable_history_based_scaled_writer"; public static final String REMOVE_REDUNDANT_CAST_TO_VARCHAR_IN_JOIN = "remove_redundant_cast_to_varchar_in_join"; public static final String HANDLE_COMPLEX_EQUI_JOINS = "handle_complex_equi_joins"; + public static final String GENERATE_DOMAIN_FILTERS = "generate_domain_filters"; // TODO: Native execution related session properties that are temporarily put here. They will be relocated in the future. public static final String NATIVE_SIMPLIFIED_EXPRESSION_EVALUATION_ENABLED = "native_simplified_expression_evaluation_enabled"; @@ -1781,6 +1782,11 @@ public SystemSessionProperties( HANDLE_COMPLEX_EQUI_JOINS, "Handle complex equi-join conditions to open up join space for join reordering", featuresConfig.getHandleComplexEquiJoins(), + false), + booleanProperty( + GENERATE_DOMAIN_FILTERS, + "Generate extra domain filters from complex predicates for restricting column domains", + false, false)); } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java index 57caee0d9b240..dbc1f6f958421 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java @@ -40,6 +40,7 @@ import com.facebook.presto.sql.planner.iterative.rule.EvaluateZeroSample; import com.facebook.presto.sql.planner.iterative.rule.ExtractSpatialJoins; import com.facebook.presto.sql.planner.iterative.rule.GatherAndMergeWindows; +import com.facebook.presto.sql.planner.iterative.rule.GenerateDomainFilters; import com.facebook.presto.sql.planner.iterative.rule.ImplementBernoulliSampleAsFilter; import com.facebook.presto.sql.planner.iterative.rule.ImplementFilteredAggregations; import com.facebook.presto.sql.planner.iterative.rule.ImplementOffset; @@ -105,7 +106,9 @@ import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantIdentityProjections; import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantLimit; import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantSort; +import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantSortColumns; import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantTopN; +import com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantTopNColumns; import com.facebook.presto.sql.planner.iterative.rule.RemoveTrivialFilters; import com.facebook.presto.sql.planner.iterative.rule.RemoveUnreferencedScalarApplyNodes; import com.facebook.presto.sql.planner.iterative.rule.RemoveUnreferencedScalarLateralNodes; @@ -393,6 +396,7 @@ public PlanOptimizers( new PruneOrderByInAggregation(metadata.getFunctionAndTypeManager()), new RewriteSpatialPartitioningAggregation(metadata))) .build()), + new GenerateDomainFilters(metadata, sqlParser), new IterativeOptimizer( metadata, ruleStats, @@ -545,7 +549,9 @@ public PlanOptimizers( ImmutableSet.of( new RemoveRedundantDistinct(), new RemoveRedundantTopN(), + new RemoveRedundantTopNColumns(), new RemoveRedundantSort(), + new RemoveRedundantSortColumns(), new RemoveRedundantLimit(), new RemoveRedundantDistinctLimit(), new RemoveRedundantAggregateDistinct(), diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/Key.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/Key.java index db562e496d659..9fa48fbf91203 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/Key.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/Key.java @@ -48,6 +48,11 @@ public Key(Set variables) this.variables = ImmutableSet.copyOf(variables); } + public Set getVariables() + { + return variables; + } + /** * Determines if a provided key requirement is satisfied by this key. * This is true if the variables in this key are a subset of the variables in the key requirement. diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesImpl.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesImpl.java index e9e2a12dc4336..d293deff8c297 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesImpl.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/properties/LogicalPropertiesImpl.java @@ -21,11 +21,14 @@ import com.facebook.presto.sql.relational.FunctionResolution; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.BinaryOperator; import java.util.stream.Collectors; import static com.facebook.presto.sql.planner.iterative.properties.Key.getNormalizedKey; @@ -151,6 +154,16 @@ public boolean isDistinct(Set keyVars) return keyRequirementSatisfied(new Key(keyVars)); } + @Override + public Set getSmallestKeyVariablesSet(Set candidateVariables) + { + return keyProperty.getKeys().stream() + .map(Key::getVariables) + .filter(candidateVariables::containsAll) + .reduce(BinaryOperator.minBy(Comparator.comparingInt(Set::size))) + .orElseGet(Collections::emptySet); + } + @Override public boolean isAtMostSingleRow() { diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/GenerateDomainFilters.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/GenerateDomainFilters.java new file mode 100644 index 0000000000000..8932c324102ce --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/GenerateDomainFilters.java @@ -0,0 +1,132 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.iterative.rule; + +import com.facebook.presto.Session; +import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.expressions.LogicalRowExpressions; +import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.spi.VariableAllocator; +import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.plan.FilterNode; +import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.plan.PlanNodeIdAllocator; +import com.facebook.presto.spi.relation.ExpressionOptimizer; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.parser.SqlParser; +import com.facebook.presto.sql.planner.TypeProvider; +import com.facebook.presto.sql.planner.optimizations.ExpressionEquivalence; +import com.facebook.presto.sql.planner.optimizations.PlanOptimizer; +import com.facebook.presto.sql.planner.optimizations.PlanOptimizerResult; +import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; +import com.facebook.presto.sql.relational.FunctionResolution; +import com.facebook.presto.sql.relational.RowExpressionDeterminismEvaluator; +import com.facebook.presto.sql.relational.RowExpressionDomainTranslator; +import com.facebook.presto.sql.relational.RowExpressionOptimizer; + +import static com.facebook.presto.SystemSessionProperties.GENERATE_DOMAIN_FILTERS; +import static java.util.Objects.requireNonNull; + +public class GenerateDomainFilters + implements PlanOptimizer +{ + private final RowExpressionDomainTranslator rowExpressionDomainTranslator; + private final LogicalRowExpressions logicalRowExpressions; + private final SqlParser sqlParser; + private final Metadata metadata; + + public GenerateDomainFilters(Metadata metadata, SqlParser sqlParser) + { + requireNonNull(metadata, "metadata is null"); + this.metadata = metadata; + this.rowExpressionDomainTranslator = new RowExpressionDomainTranslator(metadata); + this.sqlParser = sqlParser; + this.logicalRowExpressions = new LogicalRowExpressions( + new RowExpressionDeterminismEvaluator(metadata.getFunctionAndTypeManager()), + new FunctionResolution(metadata.getFunctionAndTypeManager().getFunctionAndTypeResolver()), + metadata.getFunctionAndTypeManager()); + } + + @Override + public boolean isEnabled(Session session) + { + return session.getSystemProperty(GENERATE_DOMAIN_FILTERS, Boolean.class); + } + + @Override + public PlanOptimizerResult optimize(PlanNode plan, Session session, TypeProvider types, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator, WarningCollector warningCollector) + { + if (!isEnabled(session)) { + return PlanOptimizerResult.optimizerResult(plan, false); + } + + Rewriter rewriter = new Rewriter(logicalRowExpressions, rowExpressionDomainTranslator, sqlParser, metadata, session); + PlanNode rewrittenPlan = SimplePlanRewriter.rewriteWith(rewriter, plan); + return PlanOptimizerResult.optimizerResult(rewrittenPlan, true); + } + + private static class Rewriter + extends SimplePlanRewriter + { + private final LogicalRowExpressions logicalRowExpressions; + private final RowExpressionDomainTranslator rowExpressionDomainTranslator; + private final ExpressionEquivalence expressionEquivalence; + private final Metadata metadata; + private final Session session; + + public Rewriter(LogicalRowExpressions logicalRowExpressions, + RowExpressionDomainTranslator rowExpressionDomainTranslator, + SqlParser sqlParser, + Metadata metadata, + Session session) + { + this.logicalRowExpressions = logicalRowExpressions; + this.rowExpressionDomainTranslator = rowExpressionDomainTranslator; + this.expressionEquivalence = new ExpressionEquivalence(metadata, sqlParser); + this.metadata = metadata; + this.session = session; + } + + @Override + public PlanNode visitFilter(FilterNode node, RewriteContext context) + { + RowExpression predicate = node.getPredicate(); + TupleDomain inferredTupleDomain = rowExpressionDomainTranslator.fromPredicate(session.toConnectorSession(), predicate).getTupleDomain(); + + if (inferredTupleDomain.isAll()) { + return node; + } + + RowExpression withTupleDomainPredicates = logicalRowExpressions.combineConjuncts(predicate, + rowExpressionDomainTranslator.toPredicate(inferredTupleDomain)); + + return areExpressionsEquivalent(predicate, withTupleDomainPredicates) ? node : new FilterNode( + node.getSourceLocation(), + node.getId(), + node.getSource(), + withTupleDomainPredicates); + } + + private boolean areExpressionsEquivalent(RowExpression leftExpression, RowExpression rightExpression) + { + return expressionEquivalence.areExpressionsEquivalent(simplifyExpression(leftExpression), simplifyExpression(rightExpression)); + } + + private RowExpression simplifyExpression(RowExpression expression) + { + return new RowExpressionOptimizer(metadata).optimize(expression, ExpressionOptimizer.Level.SERIALIZABLE, session.toConnectorSession()); + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantSortColumns.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantSortColumns.java new file mode 100644 index 0000000000000..03cb40575929c --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantSortColumns.java @@ -0,0 +1,85 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.iterative.rule; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.matching.Captures; +import com.facebook.presto.matching.Pattern; +import com.facebook.presto.spi.plan.LogicalProperties; +import com.facebook.presto.spi.plan.OrderingScheme; +import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.iterative.GroupReference; +import com.facebook.presto.sql.planner.iterative.Rule; +import com.facebook.presto.sql.planner.plan.SortNode; + +import java.util.Optional; +import java.util.Set; + +import static com.facebook.presto.sql.planner.plan.Patterns.sort; +import static com.google.common.collect.ImmutableList.toImmutableList; + +/** + * Removes sort columns from input if the source has a Key that refers to the ordering columns + */ +public class RemoveRedundantSortColumns + implements Rule +{ + private static final Logger log = Logger.get(RemoveRedundantSortColumns.class); + private static final Pattern PATTERN = sort(); + + @Override + public Pattern getPattern() + { + return PATTERN; + } + + @Override + public Result apply(SortNode node, Captures captures, Context context) + { + OrderingScheme orderingScheme = node.getOrderingScheme(); + PlanNode source = node.getSource(); + log.debug("[%s] SortNode : %s", node.getId(), node); + + Optional sourceLogicalProperties = ((GroupReference) source).getLogicalProperties(); + OrderingScheme newOrderingScheme = pruneOrderingColumns(orderingScheme, source, sourceLogicalProperties); + + if (newOrderingScheme.equals(orderingScheme)) { + return Result.empty(); + } + + return Result.ofPlanNode(new SortNode(node.getSourceLocation(), node.getId(), node.getStatsEquivalentPlanNode(), source, newOrderingScheme, node.isPartial())); + } + + public static OrderingScheme pruneOrderingColumns(OrderingScheme nodeOrderingScheme, PlanNode source, Optional sourceLogicalProperties) + { + if (sourceLogicalProperties.isPresent()) { + LogicalProperties logicalProperties = sourceLogicalProperties.get(); + Set orderingVariables = nodeOrderingScheme.getOrderingsMap().keySet(); + log.debug("Current Node order variables: %s%nLogical properties for source [%s] : %s", orderingVariables, source.getId(), logicalProperties); + + Set smallestKeyVariables = logicalProperties.getSmallestKeyVariablesSet(orderingVariables); + if (smallestKeyVariables.isEmpty()) { + log.debug("No key variables found"); + return nodeOrderingScheme; + } + + return new OrderingScheme(nodeOrderingScheme.getOrderBy().stream() + .filter(ordering -> smallestKeyVariables.contains(ordering.getVariable())) + .collect(toImmutableList())); + } + + return nodeOrderingScheme; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantTopNColumns.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantTopNColumns.java new file mode 100644 index 0000000000000..7d59057ca035e --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/RemoveRedundantTopNColumns.java @@ -0,0 +1,62 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.iterative.rule; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.matching.Captures; +import com.facebook.presto.matching.Pattern; +import com.facebook.presto.spi.plan.LogicalProperties; +import com.facebook.presto.spi.plan.OrderingScheme; +import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.plan.TopNNode; +import com.facebook.presto.sql.planner.iterative.GroupReference; +import com.facebook.presto.sql.planner.iterative.Rule; + +import java.util.Optional; + +import static com.facebook.presto.sql.planner.iterative.rule.RemoveRedundantSortColumns.pruneOrderingColumns; +import static com.facebook.presto.sql.planner.plan.Patterns.topN; + +/** + * Removes sort columns from input if the source has a Key that refers to the ordering columns + */ +public class RemoveRedundantTopNColumns + implements Rule +{ + private static final Logger log = Logger.get(RemoveRedundantTopNColumns.class); + private static final Pattern PATTERN = topN(); + + @Override + public Pattern getPattern() + { + return PATTERN; + } + + @Override + public Result apply(TopNNode node, Captures captures, Context context) + { + OrderingScheme orderingScheme = node.getOrderingScheme(); + PlanNode source = node.getSource(); + log.debug("[%s] TopNNode : %s", node.getId(), node); + + Optional sourceLogicalProperties = ((GroupReference) source).getLogicalProperties(); + OrderingScheme newOrderingScheme = pruneOrderingColumns(orderingScheme, source, sourceLogicalProperties); + + if (newOrderingScheme.equals(orderingScheme)) { + return Result.empty(); + } + + return Result.ofPlanNode(new TopNNode(node.getSourceLocation(), node.getId(), source, node.getCount(), newOrderingScheme, node.getStep())); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/relational/RowExpressionDomainTranslator.java b/presto-main/src/main/java/com/facebook/presto/sql/relational/RowExpressionDomainTranslator.java index 40cb1ceea8589..d863d94ece208 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/relational/RowExpressionDomainTranslator.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/relational/RowExpressionDomainTranslator.java @@ -132,7 +132,7 @@ public ExtractionResult fromPredicate(ConnectorSession session, RowExpres return predicate.accept(new Visitor<>(metadata, session, columnExtractor), false); } - private RowExpression toPredicate(Domain domain, RowExpression reference) + public RowExpression toPredicate(Domain domain, RowExpression reference) { if (domain.getValues().isNone()) { return domain.isNullAllowed() ? isNull(reference) : FALSE_CONSTANT; diff --git a/presto-native-execution/entrypoint.sh b/presto-native-execution/entrypoint.sh new file mode 100755 index 0000000000000..5d631faffc544 --- /dev/null +++ b/presto-native-execution/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "node.id=$HOSTNAME" >> /opt/presto-server/etc/node.properties + +# Check for the reason of setting split_preload_per_driver: +# https://github.com/prestodb/presto/issues/20020#issuecomment-1785083459 +GLOG_logtostderr=1 presto_server \ + --etc-dir=/opt/presto-server/etc \ + --split_preload_per_driver=0 \ + 2>&1 | tee /var/log/presto-server/console.log diff --git a/presto-native-execution/velox.properties b/presto-native-execution/velox.properties new file mode 100644 index 0000000000000..2ea74450059e3 --- /dev/null +++ b/presto-native-execution/velox.properties @@ -0,0 +1,3 @@ +mutable-config=true +expression.eval_simplified=false + diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/LogicalProperties.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/LogicalProperties.java index 53f47873a64fd..385f7813039e4 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/plan/LogicalProperties.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/LogicalProperties.java @@ -38,6 +38,15 @@ public interface LogicalProperties */ boolean isDistinct(Set keyVars); + /** + * Determines the smallest set of variables that form a unique constraint for a node + * from a list of candidate variables + * + * @param candidateVariables + * @return If a key set exists, returns the smallest such set + */ + Set getSmallestKeyVariablesSet(Set candidateVariables); + /** * Determines if there is provably at most one tuple in a final or * intermediate result set produced by a PlanNode.