Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Run this node as the cluster coordinator; it schedules and manages queries.
coordinator=true
# Do not schedule worker tasks on the coordinator to avoid resource contention.
node-scheduler.include-coordinator=false
# Coordinator REST/HTTP port for clients and workers.
http-server.http.port=8080
# Embedded service that provides node discovery for workers.
discovery-server.enabled=true
# Address workers use to register with the discovery service.
discovery.uri=http://presto-coordinator:8080

# Set Presto version string to match workers for compatibility in tests.
presto.version=testversion

# Keep up to 30 rolled log files to bound disk usage.
log.max-history=30
# Rotate logs at ~100MB per file for manageable artifacts.
log.max-size=104857600B
# Reserve heap headroom per node to reduce full GC and OOM risk.
memory.heap-headroom-per-node={{ .HeadroomGb }}GB

# Limit pending splits per task to avoid excessive memory usage.
node-scheduler.max-pending-splits-per-task=2000
# Cap concurrent splits per node for balanced scheduling.
node-scheduler.max-splits-per-node=2000

# Optimizer flags
# Use a default filter factor to estimate the selectivity of filters in queries? Good for GPU. Bad for CPU.
optimizer.default-filter-factor-enabled=false
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is identical to the GPU version other than this one flag

# Use known constraints to simplify plan and filters.
optimizer.exploit-constraints=true
# Rewrite large IN lists as joins for performance in some cases.
optimizer.in-predicates-as-inner-joins-enabled=true
# Allow partial aggregations to reduce data shuffled across stages.
optimizer.partial-aggregation-strategy=automatic
# Prefer partial aggregations when beneficial.
optimizer.prefer-partial-aggregation=true
# Default selectivity heuristic for joins when stats are missing.
optimizer.default-join-selectivity-coefficient=0.1
# Infer additional range predicates to improve filtering.
optimizer.infer-inequality-predicates=true
# Support complex equi-join patterns in the optimizer.
optimizer.handle-complex-equi-joins=true
# Add dynamic domain filters to reduce scanned data.
optimizer.generate-domain-filters=true
# Upper limit for broadcasted table size to avoid memory blowups.
# See: https://github.com/prestodb/presto/issues/22161#issuecomment-1994128619
join-max-broadcast-table-size={{ .JoinMaxBroadcastTableSizeMb }}MB

# Client request timeout to avoid hung queries.
query.client.timeout=30m
# Use phased execution policy for improved large query scheduling.
query.execution-policy=phased
# Kill queries based on total reservation on blocked nodes to recover memory.
query.low-memory-killer.policy=total-reservation-on-blocked-nodes
# Upper limit on query wall time to keep tests bounded.
query.max-execution-time=30m
# Keep metadata of up to 1000 queries for UI and debugging.
query.max-history=1000
# Memory quotas per node and cluster to protect stability.
query.max-total-memory-per-node={{ .JavaQueryMaxTotalMemPerNodeGb }}GB
query.max-total-memory={{ mul .JavaQueryMaxTotalMemPerNodeGb .NumberOfWorkers }}GB
query.max-memory-per-node={{ .JavaQueryMaxMemPerNodeGb }}GB
query.max-memory={{ mul .JavaQueryMaxMemPerNodeGb .NumberOfWorkers }}GB
# Allow deep stage DAGs required by certain benchmark queries.
query.max-stage-count=1300
# Retain query info at least this long for diagnostics.
query.min-expire-age=120.00m
# Larger scheduling batches for better throughput in benchmarks.
query.min-schedule-split-batch-size=2000
# Raise warning threshold to align with higher max stage count.
query.stage-count-warning-threshold=150
# Increase serialized plan/query length limit for complex benchmark queries.
query.max-length=2000000

# Disable dynamic filtering for deterministic benchmarking.
experimental.enable-dynamic-filtering=false
# Cap revocable memory per node to avoid overcommit.
experimental.max-revocable-memory-per-node=50GB
# Limit disk spill usage per node to bound IO and disk usage.
experimental.max-spill-per-node=50GB
# Enable repartitioning improvements for shuffle efficiency.
experimental.optimized-repartitioning=true
# Enable dereference and subfield pushdown to reduce scanned data.
experimental.pushdown-dereference-enabled=true
experimental.pushdown-subfields-enabled=true
# Cluster-wide guardrail for spill during a single query per node.
experimental.query-max-spill-per-node=50GB
# Disable reserved memory pool to simplify test behavior.
experimental.reserved-pool-enabled=false
# Stop spilling when disk usage exceeds this fraction.
experimental.spiller-max-used-space-threshold=0.7
# Directory for spill files during execution.
experimental.spiller-spill-path=/tmp


# Min workers before query starts; keep minimal for quick tests.
query-manager.required-workers=1
# Maximum wait for required workers to join.
query-manager.required-workers-max-wait=10s

# Set required configuration for Presto C++ workers as indicated in https://prestodb.io/docs/current/presto_cpp/properties.html#coordinator-properties
native-execution-enabled=true
# Disable Java-side hash generation optimizations not used by native workers.
optimizer.optimize-hash-generation=false
# Use RE2J regex engine for performance and determinism.
regex-library=RE2J
# Enable alternative function signatures for native compatibility.
use-alternative-function-signatures=true

# Optimize for queries that can run entirely on a single worker.
single-node-execution-enabled=true
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ node-scheduler.max-pending-splits-per-task=2000
node-scheduler.max-splits-per-node=2000

# Optimizer flags
# Use a default filter factor to estimate the selectivity of filters in queries? Good for GPU. Bad for CPU.
optimizer.default-filter-factor-enabled=true
# Use known constraints to simplify plan and filters.
optimizer.exploit-constraints=true
# Rewrite large IN lists as joins for performance in some cases.
Expand Down
2 changes: 1 addition & 1 deletion presto/docker/docker-compose.native-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
file: docker-compose.common.yml
service: presto-base-coordinator
volumes:
- ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties
- ./config/generated/etc_coordinator/config_native_cpu.properties:/opt/presto-server/etc/config.properties

presto-native-worker-cpu:
extends:
Expand Down
2 changes: 1 addition & 1 deletion presto/docker/docker-compose.native-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
file: docker-compose.common.yml
service: presto-base-coordinator
volumes:
- ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties
- ./config/generated/etc_coordinator/config_native_gpu.properties:/opt/presto-server/etc/config.properties

presto-native-worker-gpu:
extends:
Expand Down