diff --git a/presto/docker/config/template/etc_coordinator/config_native_cpu.properties b/presto/docker/config/template/etc_coordinator/config_native_cpu.properties new file mode 100644 index 00000000..094597b4 --- /dev/null +++ b/presto/docker/config/template/etc_coordinator/config_native_cpu.properties @@ -0,0 +1,112 @@ +# Run this node as the cluster coordinator; it schedules and manages queries. +coordinator=true +# Do not schedule worker tasks on the coordinator to avoid resource contention. +node-scheduler.include-coordinator=false +# Coordinator REST/HTTP port for clients and workers. +http-server.http.port=8080 +# Embedded service that provides node discovery for workers. +discovery-server.enabled=true +# Address workers use to register with the discovery service. +discovery.uri=http://presto-coordinator:8080 + +# Set Presto version string to match workers for compatibility in tests. +presto.version=testversion + +# Keep up to 30 rolled log files to bound disk usage. +log.max-history=30 +# Rotate logs at ~100MB per file for manageable artifacts. +log.max-size=104857600B +# Reserve heap headroom per node to reduce full GC and OOM risk. +memory.heap-headroom-per-node={{ .HeadroomGb }}GB + +# Limit pending splits per task to avoid excessive memory usage. +node-scheduler.max-pending-splits-per-task=2000 +# Cap concurrent splits per node for balanced scheduling. +node-scheduler.max-splits-per-node=2000 + +# Optimizer flags +# Use a default filter factor to estimate the selectivity of filters in queries? Good for GPU. Bad for CPU. +optimizer.default-filter-factor-enabled=false +# Use known constraints to simplify plan and filters. +optimizer.exploit-constraints=true +# Rewrite large IN lists as joins for performance in some cases. +optimizer.in-predicates-as-inner-joins-enabled=true +# Allow partial aggregations to reduce data shuffled across stages. +optimizer.partial-aggregation-strategy=automatic +# Prefer partial aggregations when beneficial. +optimizer.prefer-partial-aggregation=true +# Default selectivity heuristic for joins when stats are missing. +optimizer.default-join-selectivity-coefficient=0.1 +# Infer additional range predicates to improve filtering. +optimizer.infer-inequality-predicates=true +# Support complex equi-join patterns in the optimizer. +optimizer.handle-complex-equi-joins=true +# Add dynamic domain filters to reduce scanned data. +optimizer.generate-domain-filters=true +# Upper limit for broadcasted table size to avoid memory blowups. +# See: https://github.com/prestodb/presto/issues/22161#issuecomment-1994128619 +join-max-broadcast-table-size={{ .JoinMaxBroadcastTableSizeMb }}MB + +# Client request timeout to avoid hung queries. +query.client.timeout=30m +# Use phased execution policy for improved large query scheduling. +query.execution-policy=phased +# Kill queries based on total reservation on blocked nodes to recover memory. +query.low-memory-killer.policy=total-reservation-on-blocked-nodes +# Upper limit on query wall time to keep tests bounded. +query.max-execution-time=30m +# Keep metadata of up to 1000 queries for UI and debugging. +query.max-history=1000 +# Memory quotas per node and cluster to protect stability. +query.max-total-memory-per-node={{ .JavaQueryMaxTotalMemPerNodeGb }}GB +query.max-total-memory={{ mul .JavaQueryMaxTotalMemPerNodeGb .NumberOfWorkers }}GB +query.max-memory-per-node={{ .JavaQueryMaxMemPerNodeGb }}GB +query.max-memory={{ mul .JavaQueryMaxMemPerNodeGb .NumberOfWorkers }}GB +# Allow deep stage DAGs required by certain benchmark queries. +query.max-stage-count=1300 +# Retain query info at least this long for diagnostics. +query.min-expire-age=120.00m +# Larger scheduling batches for better throughput in benchmarks. +query.min-schedule-split-batch-size=2000 +# Raise warning threshold to align with higher max stage count. +query.stage-count-warning-threshold=150 +# Increase serialized plan/query length limit for complex benchmark queries. +query.max-length=2000000 + +# Disable dynamic filtering for deterministic benchmarking. +experimental.enable-dynamic-filtering=false +# Cap revocable memory per node to avoid overcommit. +experimental.max-revocable-memory-per-node=50GB +# Limit disk spill usage per node to bound IO and disk usage. +experimental.max-spill-per-node=50GB +# Enable repartitioning improvements for shuffle efficiency. +experimental.optimized-repartitioning=true +# Enable dereference and subfield pushdown to reduce scanned data. +experimental.pushdown-dereference-enabled=true +experimental.pushdown-subfields-enabled=true +# Cluster-wide guardrail for spill during a single query per node. +experimental.query-max-spill-per-node=50GB +# Disable reserved memory pool to simplify test behavior. +experimental.reserved-pool-enabled=false +# Stop spilling when disk usage exceeds this fraction. +experimental.spiller-max-used-space-threshold=0.7 +# Directory for spill files during execution. +experimental.spiller-spill-path=/tmp + + +# Min workers before query starts; keep minimal for quick tests. +query-manager.required-workers=1 +# Maximum wait for required workers to join. +query-manager.required-workers-max-wait=10s + +# Set required configuration for Presto C++ workers as indicated in https://prestodb.io/docs/current/presto_cpp/properties.html#coordinator-properties +native-execution-enabled=true +# Disable Java-side hash generation optimizations not used by native workers. +optimizer.optimize-hash-generation=false +# Use RE2J regex engine for performance and determinism. +regex-library=RE2J +# Enable alternative function signatures for native compatibility. +use-alternative-function-signatures=true + +# Optimize for queries that can run entirely on a single worker. +single-node-execution-enabled=true diff --git a/presto/docker/config/template/etc_coordinator/config_native.properties b/presto/docker/config/template/etc_coordinator/config_native_gpu.properties similarity index 97% rename from presto/docker/config/template/etc_coordinator/config_native.properties rename to presto/docker/config/template/etc_coordinator/config_native_gpu.properties index b08eca85..18013b0a 100644 --- a/presto/docker/config/template/etc_coordinator/config_native.properties +++ b/presto/docker/config/template/etc_coordinator/config_native_gpu.properties @@ -25,6 +25,8 @@ node-scheduler.max-pending-splits-per-task=2000 node-scheduler.max-splits-per-node=2000 # Optimizer flags +# Use a default filter factor to estimate the selectivity of filters in queries? Good for GPU. Bad for CPU. +optimizer.default-filter-factor-enabled=true # Use known constraints to simplify plan and filters. optimizer.exploit-constraints=true # Rewrite large IN lists as joins for performance in some cases. diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml index 09fd5a87..d68451c3 100644 --- a/presto/docker/docker-compose.native-cpu.yml +++ b/presto/docker/docker-compose.native-cpu.yml @@ -4,7 +4,7 @@ services: file: docker-compose.common.yml service: presto-base-coordinator volumes: - - ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_coordinator/config_native_cpu.properties:/opt/presto-server/etc/config.properties presto-native-worker-cpu: extends: diff --git a/presto/docker/docker-compose.native-gpu.yml b/presto/docker/docker-compose.native-gpu.yml index 376c5167..3fc04566 100644 --- a/presto/docker/docker-compose.native-gpu.yml +++ b/presto/docker/docker-compose.native-gpu.yml @@ -4,7 +4,7 @@ services: file: docker-compose.common.yml service: presto-base-coordinator volumes: - - ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_coordinator/config_native_gpu.properties:/opt/presto-server/etc/config.properties presto-native-worker-gpu: extends: