From 57be8f71d0f322e23b1ac5c723c05fa77eb3632c Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Feb 2026 03:55:57 +0800 Subject: [PATCH 01/33] feat(helm): Add Presto query engine support to the Helm chart. --- docs/src/user-docs/guides-k8s-deployment.md | 73 +++++++++++ docs/src/user-docs/guides-using-presto.md | 81 ++++++++++++- .../templates/api-server-deployment.yaml | 2 +- .../templates/api-server-logs-pv.yaml | 2 +- .../templates/api-server-logs-pvc.yaml | 2 +- .../templates/api-server-service.yaml | 2 +- .../package-helm/templates/configmap.yaml | 92 ++++++++++++++ .../presto-coordinator-deployment.yaml | 82 +++++++++++++ .../templates/presto-coordinator-service.yaml | 17 +++ .../templates/presto-worker-deployment.yaml | 113 ++++++++++++++++++ .../templates/query-scheduler-deployment.yaml | 2 + .../templates/query-scheduler-logs-pv.yaml | 2 + .../templates/query-scheduler-logs-pvc.yaml | 2 + .../templates/query-scheduler-service.yaml | 2 + .../templates/query-worker-deployment.yaml | 2 + .../templates/query-worker-logs-pv.yaml | 2 + .../templates/query-worker-logs-pvc.yaml | 2 + .../query-worker-staged-streams-pv.yaml | 2 + .../query-worker-staged-streams-pvc.yaml | 2 + .../templates/reducer-deployment.yaml | 2 + .../templates/reducer-logs-pv.yaml | 2 + .../templates/reducer-logs-pvc.yaml | 2 + .../templates/reducer-service.yaml | 2 + tools/deployment/package-helm/values.yaml | 37 ++++++ 24 files changed, 522 insertions(+), 7 deletions(-) create mode 100644 tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml create mode 100644 tools/deployment/package-helm/templates/presto-coordinator-service.yaml create mode 100644 tools/deployment/package-helm/templates/presto-worker-deployment.yaml diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index e6c7249e0d..5cb469b6df 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -298,11 +298,82 @@ helm template clp . -f custom-values.yaml :::: +### Using Presto as the query engine + +To use [Presto][presto-guide] as the query engine instead of the default clp-s query pipeline, set +`query_engine` to `"presto"` and configure the Presto-specific settings: + +```{code-block} yaml +:caption: presto-values.yaml + +image: + prestoCoordinator: + repository: "ghcr.io/y-scope/presto/coordinator" + tag: "dev" + prestoWorker: + repository: "ghcr.io/y-scope/presto/prestissimo-worker" + tag: "dev" + +prestoWorker: + # See below "Worker scheduling" for more details on configuring Presto scheduling + replicas: 2 + +clpConfig: + package: + storage_engine: "clp-s" + query_engine: "presto" + + # Disable results cache retention since the Presto integration doesn't yet support garbage + # collection of search results. + results_cache: + retention_period: null + + presto: + coordinator: + logging_level: "INFO" + query_max_memory_gb: 1 + query_max_memory_per_node_gb: 1 + worker: + query_memory_gb: 4 + system_memory_gb: 8 + # Split filter config for the Presto CLP connector. For each dataset you want to query, add a + # filter entry. Replace with the dataset name (use "default" if you didn't specify one + # when compressing) and with the timestamp key used during compression. + # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file + split_filter: + clp.default.: + - columnName: "" + customOptions: + rangeMapping: + lowerBound: "begin_timestamp" + upperBound: "end_timestamp" + required: false +``` + +Install with the Presto values: + +```bash +helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml +``` + +:::{note} +When `query_engine` is set to `"presto"`, the chart deploys a Presto coordinator and Presto +worker(s) instead of the query scheduler, query workers, reducers, and results cache. +::: + +For more details on querying logs through Presto, see the [Using Presto][presto-guide] guide. + ### Worker scheduling You can control where workers are scheduled using standard Kubernetes scheduling primitives (`nodeSelector`, `affinity`, `tolerations`, `topologySpreadConstraints`). +:::{note} +When using Presto as the query engine, use `prestoWorker:` instead of `queryWorker:` and `reducer:` +to configure Presto worker scheduling. The `prestoWorker:` key supports the same `scheduling:` +options. +::: + #### Dedicated node pools To run compression workers, query workers, and reducers in separate node pools: @@ -588,6 +659,7 @@ To tear down a `kubeadm` cluster: * [External database setup][external-db-guide]: Using external MariaDB and MongoDB * [Using object storage][s3-storage]: Configuring S3 storage * [Configuring retention periods][retention-guide]: Setting up data retention policies +* [Using Presto][presto-guide]: Distributed SQL queries on compressed logs [aks]: https://azure.microsoft.com/en-us/products/kubernetes-service [api-server]: guides-using-the-api-server.md @@ -605,6 +677,7 @@ To tear down a `kubeadm` cluster: [kubeadm]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/ [kubectl]: https://kubernetes.io/docs/tasks/tools/ [logging-infra-issue]: https://github.com/y-scope/clp/issues/1760 +[presto-guide]: guides-using-presto.md [quick-start]: quick-start/index.md [retention-guide]: guides-retention.md [rfc-1918]: https://datatracker.ietf.org/doc/html/rfc1918#section-3 diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index a598347917..c103a1cce2 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -14,7 +14,80 @@ maintained in a [fork][yscope-presto] of the Presto project. At some point, thes been merged into the main Presto repository so that you can use official Presto releases with CLP. ::: -## Requirements +## Deployment options + +CLP supports Presto through two deployment methods: + +* **[Kubernetes (Helm)](#kubernetes-helm)**: Presto is deployed as part of the CLP Helm chart. This + is the simplest option if you are already using the [Kubernetes deployment][k8s-deployment]. +* **[Docker Compose](#docker-compose)**: Presto is deployed separately using Docker Compose alongside + a CLP package installation. + +## Kubernetes (Helm) + +When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting the `query_engine` to +`"presto"` in your Helm values. + +### Requirements + +* A running CLP Kubernetes deployment (see the [Kubernetes deployment guide][k8s-deployment]) + +### Set up + +1. Create a values file to enable Presto: + + ```{code-block} yaml + :caption: presto-values.yaml + + clpConfig: + package: + query_engine: "presto" + + # Disable results cache retention since the Presto integration doesn't yet support + # garbage collection of search results. + results_cache: + retention_period: null + + presto: + worker: + # Split filter config for the Presto CLP connector. For each dataset, add a filter entry. + # Replace with the dataset name (use "default" if you didn't specify one when + # compressing) and with the timestamp key used during compression. + # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file + split_filter: + clp.default.: + - columnName: "" + customOptions: + rangeMapping: + lowerBound: "begin_timestamp" + upperBound: "end_timestamp" + required: false + ``` + +2. Install (or upgrade) the Helm chart with the Presto values: + + ```bash + helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml + ``` + +3. Verify that the Presto coordinator and worker pods are running: + + ```bash + kubectl get pods -l "app.kubernetes.io/component in (presto-coordinator, presto-worker)" + ``` + +Once the pods are ready, you can [query your logs through Presto](#querying-your-logs-through-presto) +using CLP's Web UI. + +:::{note} +When using Kubernetes, Presto worker scheduling can be configured using the `prestoWorker.scheduling` +key in Helm values. See the [worker scheduling][k8s-scheduling] section of the Kubernetes deployment +guide for details. +::: + +## Docker Compose + +### Requirements * [CLP][clp-releases] (clp-json) v0.5.0 or higher * [Docker] v28 or higher @@ -22,9 +95,9 @@ been merged into the main Presto repository so that you can use official Presto * Python * python3-venv (for the version of Python installed) -## Set up +### Set up -Using Presto with CLP requires: +Using Presto with CLP via Docker Compose requires: * [Setting up CLP](#setting-up-clp) and compressing some logs. * [Setting up Presto](#setting-up-presto) to query CLP's metadata database and archives. @@ -227,6 +300,8 @@ These limitations will be addressed in a future release of the Presto integratio [clp-releases]: https://github.com/y-scope/clp/releases [docker-compose]: https://docs.docker.com/compose/install/ [Docker]: https://docs.docker.com/engine/install/ +[k8s-deployment]: guides-k8s-deployment.md +[k8s-scheduling]: guides-k8s-deployment.md#worker-scheduling [postgresql]: https://zenodo.org/records/10516401 [Presto]: https://prestodb.io/ [y-scope/presto#8]: https://github.com/y-scope/presto/issues/8 diff --git a/tools/deployment/package-helm/templates/api-server-deployment.yaml b/tools/deployment/package-helm/templates/api-server-deployment.yaml index 4ecaea6e4e..090a56403d 100644 --- a/tools/deployment/package-helm/templates/api-server-deployment.yaml +++ b/tools/deployment/package-helm/templates/api-server-deployment.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.api_server }} +{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/api-server-logs-pv.yaml b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml index e117f83bd1..73e3ad878c 100644 --- a/tools/deployment/package-helm/templates/api-server-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.api_server }} +{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "api-server" diff --git a/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml index d9429b6dad..1d1ec61af7 100644 --- a/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.api_server }} +{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} {{- include "clp.createPvc" (dict "root" . "component_category" "api-server" diff --git a/tools/deployment/package-helm/templates/api-server-service.yaml b/tools/deployment/package-helm/templates/api-server-service.yaml index 0aed0e7efa..a5cbbae19f 100644 --- a/tools/deployment/package-helm/templates/api-server-service.yaml +++ b/tools/deployment/package-helm/templates/api-server-service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.api_server }} +{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} apiVersion: "v1" kind: "Service" metadata: diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 6021d71863..b060a6dd90 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -219,6 +219,13 @@ data: {{- else }} mcp_server: null {{- end }} + {{- if eq .Values.clpConfig.package.query_engine "presto" }} + presto: + host: "{{ include "clp.fullname" . }}-presto-coordinator" + port: 8080 + {{- else }} + presto: null + {{- end }} mysql-logging.cnf: | [mysqld] @@ -321,6 +328,91 @@ data: {{ .Values.clpConfig.archive_output.target_segment_size | int }}, "ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }}, "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, + {{- if eq .Values.clpConfig.package.query_engine "presto" }} + "PrestoHost": "{{ include "clp.fullname" . }}-presto-coordinator", + "PrestoPort": 8080 + {{- else }} "PrestoHost": null, "PrestoPort": null + {{- end }} } + +{{- if eq .Values.clpConfig.package.query_engine "presto" }} +{{- with .Values.clpConfig.presto }} + presto-coordinator-catalog-clp.properties: | + connector.name=clp + clp.metadata-provider-type=mysql + clp.metadata-db-url=jdbc:mysql://{{ include "clp.fullname" $ }}-database:3306 + clp.metadata-db-name={{ $.Values.clpConfig.database.names.clp }} + clp.metadata-db-user={{ $.Values.credentials.database.username }} + clp.metadata-db-password={{ $.Values.credentials.database.password }} + clp.metadata-table-prefix=clp_ + clp.split-provider-type=mysql + clp.split-filter-provider-type=mysql + clp.split-filter-config=/opt/presto-server/etc/split-filter.json + + presto-coordinator-config-config.properties: | + coordinator=true + node-scheduler.include-coordinator=false + http-server.http.port=8080 + query.max-memory={{ .coordinator.query_max_memory_gb }}GB + query.max-memory-per-node={{ .coordinator.query_max_memory_per_node_gb }}GB + discovery-server.enabled=true + discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080 + optimizer.optimize-hash-generation=false + regex-library=RE2J + use-alternative-function-signatures=true + inline-sql-functions=false + nested-data-serialization-enabled=false + native-execution-enabled=true + + presto-coordinator-config-jvm.config: | + -server + -Xmx4G + -XX:+UseG1GC + -XX:G1HeapRegionSize=32M + -XX:+UseGCOverheadLimit + -XX:+ExplicitGCInvokesConcurrent + -XX:+HeapDumpOnOutOfMemoryError + -XX:+ExitOnOutOfMemoryError + -Djdk.attach.allowAttachSelf=true + + presto-coordinator-config-log.properties: | + com.facebook.presto={{ .coordinator.logging_level }} + + presto-coordinator-config-node.properties: | + node.environment=production + node.id=presto-coordinator + + presto-coordinator-config-split-filter.json: | + {{ .split_filter | toJson }} + + presto-worker-catalog-clp.properties: | + connector.name=clp + {{- with $.Values.clpConfig.archive_output.storage }} + {{- if eq .type "s3" }} + clp.storage-type=s3 + clp.s3-auth-provider=clp_package + clp.s3-access-key-id={{ .s3_config.aws_authentication.credentials.access_key_id }} + clp.s3-end-point=https://{{ .s3_config.bucket }}.s3.{{ .s3_config.region_code }}.amazonaws.com/ + clp.s3-secret-access-key={{ .s3_config.aws_authentication.credentials.secret_access_key }} + {{- end }}{{/* if eq .type "s3" */}} + {{- end }}{{/* with $.Values.clpConfig.archive_output.storage */}} + + presto-worker-config-config.properties: | + discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080 + http-server.http.port=8080 + query-memory-gb={{ .worker.query_memory_gb }} + shutdown-onset-sec=1 + system-memory-gb={{ .worker.system_memory_gb }} + register-test-functions=false + runtime-metrics-collection-enabled=false + + presto-worker-config-node.properties: | + node.environment=production + node.location=worker-location + + presto-worker-config-velox.properties: | + mutable-config=true +{{- end }}{{/* with .Values.clpConfig.presto */}} +{{- end }}{{/* if eq .Values.clpConfig.package.query_engine "presto" */}} diff --git a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml new file mode 100644 index 0000000000..c7c208dd0c --- /dev/null +++ b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml @@ -0,0 +1,82 @@ +{{- if eq .Values.clpConfig.package.query_engine "presto" }} +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: {{ include "clp.fullname" . }}-presto-coordinator + labels: + {{- include "clp.labels" . | nindent 4 }} + app.kubernetes.io/component: "presto-coordinator" +spec: + replicas: 1 + selector: + matchLabels: + {{- include "clp.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "presto-coordinator" + template: + metadata: + labels: + {{- include "clp.labels" . | nindent 8 }} + app.kubernetes.io/component: "presto-coordinator" + spec: + serviceAccountName: {{ include "clp.fullname" . }}-job-watcher + terminationGracePeriodSeconds: 30 + {{- include "clp.createSchedulingConfigs" (dict + "root" . + "component" "prestoCoordinator" + ) | nindent 6 }} + initContainers: + - {{- include "clp.waitFor" (dict + "root" . + "type" "job" + "name" "db-table-creator" + ) | nindent 10 }} + containers: + - name: "presto-coordinator" + image: "{{ .Values.image.prestoCoordinator.repository }}:{{ .Values.image.prestoCoordinator.tag }}" + imagePullPolicy: "{{ .Values.image.prestoCoordinator.pullPolicy }}" + ports: + - name: "presto-coord" + containerPort: 8080 + volumeMounts: + - name: "presto-catalog" + mountPath: "/opt/presto-server/etc/catalog" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/config.properties" + subPath: "presto-coordinator-config-config.properties" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/jvm.config" + subPath: "presto-coordinator-config-jvm.config" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/log.properties" + subPath: "presto-coordinator-config-log.properties" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/node.properties" + subPath: "presto-coordinator-config-node.properties" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/split-filter.json" + subPath: "presto-coordinator-config-split-filter.json" + readOnly: true + readinessProbe: + {{- include "clp.readinessProbeTimings" . | nindent 12 }} + httpGet: &presto-coordinator-health-check + path: "/v1/info" + port: "presto-coord" + livenessProbe: + {{- include "clp.livenessProbeTimings" . | nindent 12 }} + httpGet: *presto-coordinator-health-check + volumes: + - name: "presto-catalog" + configMap: + name: {{ include "clp.fullname" . }}-config + items: + - key: "presto-coordinator-catalog-clp.properties" + path: "clp.properties" + - name: "presto-config" + configMap: + name: {{ include "clp.fullname" . }}-config +{{- end }} diff --git a/tools/deployment/package-helm/templates/presto-coordinator-service.yaml b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml new file mode 100644 index 0000000000..e96aabe873 --- /dev/null +++ b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml @@ -0,0 +1,17 @@ +{{- if eq .Values.clpConfig.package.query_engine "presto" }} +apiVersion: "v1" +kind: "Service" +metadata: + name: {{ include "clp.fullname" . }}-presto-coordinator + labels: + {{- include "clp.labels" . | nindent 4 }} + app.kubernetes.io/component: "presto-coordinator" +spec: + clusterIP: "None" + selector: + {{- include "clp.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "presto-coordinator" + ports: + - port: 8080 + targetPort: "presto-coord" +{{- end }} diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml new file mode 100644 index 0000000000..17a5bb05a8 --- /dev/null +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -0,0 +1,113 @@ +{{- if eq .Values.clpConfig.package.query_engine "presto" }} +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: {{ include "clp.fullname" . }}-presto-worker + labels: + {{- include "clp.labels" . | nindent 4 }} + app.kubernetes.io/component: "presto-worker" +spec: + replicas: {{ .Values.prestoWorker.replicas }} + selector: + matchLabels: + {{- include "clp.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "presto-worker" + template: + metadata: + labels: + {{- include "clp.labels" . | nindent 8 }} + app.kubernetes.io/component: "presto-worker" + spec: + serviceAccountName: {{ include "clp.fullname" . }}-job-watcher + terminationGracePeriodSeconds: 30 + {{- include "clp.createSchedulingConfigs" (dict + "root" . + "component" "prestoWorker" + ) | nindent 6 }} + initContainers: + - {{- include "clp.waitFor" (dict + "root" . + "type" "service" + "name" "presto-coordinator" + ) | nindent 10 }} + - name: "setup-configs" + image: "busybox:latest" + command: + - "/bin/sh" + - "-c" + - | + set -e + cp /etc/presto-config/presto-worker-config-config.properties /opt/presto-server/etc/config.properties + cp /etc/presto-config/presto-worker-config-node.properties /opt/presto-server/etc/node.properties + + # Query coordinator for version and add it to config + DISCOVERY_URI="http://{{ include "clp.fullname" . }}-presto-coordinator:8080" + VERSION=$(wget -q -O - "${DISCOVERY_URI}/v1/info") + PRESTO_VERSION=$(echo "$VERSION" \ + | sed 's/.*"version":"//' \ + | sed 's/".*//') + echo "presto.version=${PRESTO_VERSION}" >> /opt/presto-server/etc/config.properties + + # Set node identity from hostname + HOSTNAME=$(hostname) + HOST_IP=$(hostname -i) + echo "node.internal-address=${HOST_IP}" >> /opt/presto-server/etc/node.properties + echo "node.id=${HOSTNAME}" >> /opt/presto-server/etc/node.properties + volumeMounts: + - name: "presto-config" + mountPath: "/etc/presto-config" + readOnly: true + - name: "presto-etc" + mountPath: "/opt/presto-server/etc" + containers: + - name: "presto-worker" + image: "{{ .Values.image.prestoWorker.repository }}:{{ .Values.image.prestoWorker.tag }}" + imagePullPolicy: "{{ .Values.image.prestoWorker.pullPolicy }}" + ports: + - name: "presto-worker" + containerPort: 8080 + volumeMounts: + - name: "presto-catalog" + mountPath: "/opt/presto-server/etc/catalog" + readOnly: true + - name: "presto-config" + mountPath: "/opt/presto-server/etc/velox.properties" + subPath: "presto-worker-config-velox.properties" + readOnly: true + - name: "presto-etc" + mountPath: "/opt/presto-server/etc" + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} + - name: {{ include "clp.volumeName" (dict + "component_category" "shared-data" + "name" "archives" + ) | quote }} + mountPath: "/var/data/archives" + readOnly: true + {{- end }} + readinessProbe: + {{- include "clp.readinessProbeTimings" . | nindent 12 }} + tcpSocket: &presto-worker-health-check + port: "presto-worker" + livenessProbe: + {{- include "clp.livenessProbeTimings" . | nindent 12 }} + tcpSocket: *presto-worker-health-check + volumes: + - name: "presto-catalog" + configMap: + name: {{ include "clp.fullname" . }}-config + items: + - key: "presto-worker-catalog-clp.properties" + path: "clp.properties" + - name: "presto-config" + configMap: + name: {{ include "clp.fullname" . }}-config + - name: "presto-etc" + emptyDir: {} + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "shared-data" + "name" "archives" + ) | nindent 10 }} + {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml b/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml index 5eba71bc30..c5e1260a45 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -101,3 +102,4 @@ spec: - name: "config" configMap: name: {{ include "clp.fullname" . }}-config +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml index bc66ac1ae3..321678d800 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "query-scheduler" @@ -6,3 +7,4 @@ "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/query_scheduler" .Values.clpConfig.logs_directory) ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml b/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml index fa56f7af7e..e4ab9015d6 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createPvc" (dict "root" . "component_category" "query-scheduler" @@ -5,3 +6,4 @@ "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-scheduler-service.yaml b/tools/deployment/package-helm/templates/query-scheduler-service.yaml index 9f2ffa7172..ac31b2d801 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-service.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-service.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} apiVersion: "v1" kind: "Service" metadata: @@ -13,3 +14,4 @@ spec: ports: - port: 7000 targetPort: "query-scheduler" +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-deployment.yaml b/tools/deployment/package-helm/templates/query-worker-deployment.yaml index 33e4e20b86..539daf0fdf 100644 --- a/tools/deployment/package-helm/templates/query-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/query-worker-deployment.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -124,3 +125,4 @@ spec: "name" "staged-streams" ) | nindent 10 }} {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml index 1fd12c2e84..b676c53b37 100644 --- a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "query-worker" @@ -6,3 +7,4 @@ "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/query_worker" .Values.clpConfig.logs_directory) ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml b/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml index 84d478b40a..83833d6fc9 100644 --- a/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createPvc" (dict "root" . "component_category" "query-worker" @@ -5,3 +6,4 @@ "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml index 1223310166..9af7ad531b 100644 --- a/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} {{- include "clp.createStaticPv" (dict "root" . @@ -8,3 +9,4 @@ "hostPath" .Values.clpConfig.stream_output.storage.staging_directory ) }} {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml index e909a57b21..55fb4b342a 100644 --- a/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} {{- include "clp.createPvc" (dict "root" . @@ -7,3 +8,4 @@ "accessModes" (list "ReadWriteOnce") ) }} {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/reducer-deployment.yaml b/tools/deployment/package-helm/templates/reducer-deployment.yaml index 0c8714f59e..99c880e1b6 100644 --- a/tools/deployment/package-helm/templates/reducer-deployment.yaml +++ b/tools/deployment/package-helm/templates/reducer-deployment.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -79,3 +80,4 @@ spec: - name: "config" configMap: name: {{ include "clp.fullname" . }}-config +{{- end }} diff --git a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml index 4c467877ae..699316845b 100644 --- a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "reducer" @@ -6,3 +7,4 @@ "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/reducer" .Values.clpConfig.logs_directory) ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml b/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml index 40a3affaea..40c6ca72ff 100644 --- a/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} {{- include "clp.createPvc" (dict "root" . "component_category" "reducer" @@ -5,3 +6,4 @@ "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") ) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/reducer-service.yaml b/tools/deployment/package-helm/templates/reducer-service.yaml index f015db46b4..b10f7f0f6b 100644 --- a/tools/deployment/package-helm/templates/reducer-service.yaml +++ b/tools/deployment/package-helm/templates/reducer-service.yaml @@ -1,3 +1,4 @@ +{{- if ne .Values.clpConfig.package.query_engine "presto" }} apiVersion: "v1" kind: "Service" metadata: @@ -15,3 +16,4 @@ spec: - name: "reducer-{{ $i }}" port: {{ add 14009 $i }} {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 29ec4b7683..2644fb309d 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -17,6 +17,14 @@ image: repository: "ghcr.io/y-scope/clp/clp-package" pullPolicy: "Always" tag: "main" + prestoCoordinator: + repository: "ghcr.io/y-scope/presto/coordinator" + pullPolicy: "Always" + tag: "dev" + prestoWorker: + repository: "ghcr.io/y-scope/presto/prestissimo-worker" + pullPolicy: "Always" + tag: "dev" # - If false: Single-node deployment. # - PVs use local storage bound to a single node. @@ -63,6 +71,22 @@ queryWorker: # topologyKey: "kubernetes.io/hostname" # whenUnsatisfiable: "DoNotSchedule" +prestoWorker: + replicas: 1 + # Controls which nodes run Presto workers + # scheduling: + # nodeSelector: + # yscope.io/nodeType: compute + # tolerations: + # - key: "yscope.io/dedicated" + # operator: "Equal" + # value: "query" + # effect: "NoSchedule" + # topologySpreadConstraints: + # - maxSkew: 1 + # topologyKey: "kubernetes.io/hostname" + # whenUnsatisfiable: "DoNotSchedule" + reducer: replicas: 1 # Controls which nodes run reducers @@ -156,6 +180,19 @@ clpConfig: # port: 30800 # logging_level: "INFO" + # Presto query engine config. Only used when package.query_engine is "presto". + presto: + coordinator: + logging_level: "INFO" + query_max_memory_gb: 1 + query_max_memory_per_node_gb: 1 + worker: + query_memory_gb: 4 + system_memory_gb: 8 + # Split filter config for Presto CLP connector. + # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file + split_filter: {} + # log-ingestor config. Currently, the config is applicable only if `logs_input.type` is "s3". log_ingestor: port: 30302 From 96e9a45e59de1ff93de8b045f03c29cfcef19001 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Feb 2026 04:35:06 +0800 Subject: [PATCH 02/33] docs: Remove redundant worker configuration comment in Presto guide --- docs/src/user-docs/guides-using-presto.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index c103a1cce2..6a16fbcc00 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -49,7 +49,6 @@ When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting th retention_period: null presto: - worker: # Split filter config for the Presto CLP connector. For each dataset, add a filter entry. # Replace with the dataset name (use "default" if you didn't specify one when # compressing) and with the timestamp key used during compression. From 80892548d65351cf78a22494448393d8ac724233 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Feb 2026 04:42:24 +0800 Subject: [PATCH 03/33] chore(helm): Bump chart version to 0.1.4-dev.4 --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 2a38fb8b15..448124ff84 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.1.4-dev.2" +version: "0.1.4-dev.4" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.9.1-dev" From 00d7292de1ed8cb51c0fbcf9986862d1f34a161c Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Feb 2026 21:35:49 +0800 Subject: [PATCH 04/33] feat(helm): Improve modularity and add Presto support refinements - Replace `query_engine` conditions with modular flags in Helm templates. - Refactor Presto configurations to use dedicated flags and scripts. - Enhance worker and reducer node scheduling logic. - Update documentation to reflect Presto integration and better configuration flexibility. --- docs/src/user-docs/guides-k8s-deployment.md | 40 +++++++++++-- docs/src/user-docs/guides-using-presto.md | 16 +++++ .../deployment/package-helm/.set-up-common.sh | 3 + .../set-up-multi-dedicated-test.sh | 20 ++++++- .../package-helm/set-up-multi-shared-test.sh | 5 +- .../templates/api-server-deployment.yaml | 2 +- .../templates/api-server-logs-pv.yaml | 2 +- .../templates/api-server-logs-pvc.yaml | 2 +- .../templates/api-server-service.yaml | 2 +- .../package-helm/templates/configmap.yaml | 60 +++++++++++++------ .../presto-coordinator-deployment.yaml | 4 +- .../templates/presto-coordinator-service.yaml | 7 ++- .../templates/presto-worker-deployment.yaml | 38 +++++------- .../templates/query-scheduler-deployment.yaml | 2 +- .../templates/query-scheduler-logs-pv.yaml | 2 +- .../templates/query-scheduler-logs-pvc.yaml | 2 +- .../templates/query-scheduler-service.yaml | 2 +- .../templates/query-worker-deployment.yaml | 2 +- .../templates/query-worker-logs-pv.yaml | 2 +- .../templates/query-worker-logs-pvc.yaml | 2 +- .../query-worker-staged-streams-pv.yaml | 2 +- .../query-worker-staged-streams-pvc.yaml | 2 +- .../templates/reducer-deployment.yaml | 2 +- .../templates/reducer-logs-pv.yaml | 2 +- .../templates/reducer-logs-pvc.yaml | 2 +- .../templates/reducer-service.yaml | 2 +- tools/deployment/package-helm/values.yaml | 53 ++++++++-------- 27 files changed, 179 insertions(+), 101 deletions(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 5cb469b6df..88c178bbd9 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -323,12 +323,21 @@ clpConfig: storage_engine: "clp-s" query_engine: "presto" + # Disable the clp-s query pipeline since Presto replaces it. + # NOTE: The API server currently depends on the clp-s query pipeline and does not work with + # Presto. Keep it enabled if you need the API server; disable it if not. + api_server: null + query_scheduler: null + query_worker: null + reducer: null + # Disable results cache retention since the Presto integration doesn't yet support garbage # collection of search results. results_cache: retention_period: null presto: + port: 30889 coordinator: logging_level: "INFO" query_max_memory_gb: 1 @@ -386,6 +395,9 @@ To run compression workers, query workers, and reducers in separate node pools: # Label query nodes kubectl label nodes node3 node4 yscope.io/nodeType=query + + # Label Presto nodes (if using Presto as the query engine) + kubectl label nodes node5 node6 yscope.io/nodeType=presto ``` 2. Configure scheduling: @@ -399,19 +411,27 @@ To run compression workers, query workers, and reducers in separate node pools: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: compression + yscope.io/nodeType: "compression" queryWorker: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: query + yscope.io/nodeType: "query" reducer: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: query + yscope.io/nodeType: "query" + + # If using Presto as the query engine, configure prestoWorker instead of + # queryWorker and reducer. + prestoWorker: + replicas: 2 + scheduling: + nodeSelector: + yscope.io/nodeType: "presto" ``` 3. Install: @@ -441,7 +461,7 @@ To run all worker types in the same node pool: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: compute + yscope.io/nodeType: "compute" topologySpreadConstraints: - maxSkew: 1 topologyKey: "kubernetes.io/hostname" @@ -454,13 +474,21 @@ To run all worker types in the same node pool: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: compute + yscope.io/nodeType: "compute" reducer: replicas: 2 scheduling: nodeSelector: - yscope.io/nodeType: compute + yscope.io/nodeType: "compute" + + # If using Presto as the query engine, configure prestoWorker instead of + # queryWorker and reducer. + prestoWorker: + replicas: 2 + scheduling: + nodeSelector: + yscope.io/nodeType: "compute" ``` 3. Install: diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index 6a16fbcc00..2e92f61cf8 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -43,12 +43,28 @@ When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting th package: query_engine: "presto" + # Disable the clp-s query pipeline since Presto replaces it. + # NOTE: The API server currently depends on the clp-s query pipeline and does not work + # with Presto. Keep it enabled if you need the API server; disable it if not. + api_server: null + query_scheduler: null + query_worker: null + reducer: null + # Disable results cache retention since the Presto integration doesn't yet support # garbage collection of search results. results_cache: retention_period: null presto: + port: 30889 + coordinator: + logging_level: "INFO" + query_max_memory_gb: 1 + query_max_memory_per_node_gb: 1 + worker: + query_memory_gb: 4 + system_memory_gb: 8 # Split filter config for the Presto CLP connector. For each dataset, add a filter entry. # Replace with the dataset name (use "default" if you didn't specify one when # compressing) and with the timestamp key used during compression. diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index 2a0204d0d2..c9d5a18f05 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -82,6 +82,9 @@ nodes: - containerPort: 30800 hostPort: 30800 protocol: TCP + - containerPort: 30889 + hostPort: 30889 + protocol: TCP EOF for ((i = 0; i < num_workers; i++)); do diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 2e8ab3c23d..e85afbcf56 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -10,9 +10,11 @@ CLP_HOME="${CLP_HOME:-/tmp/clp}" CLUSTER_NAME="${CLUSTER_NAME:-clp-test}" NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}" NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}" +NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" +PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}" # shellcheck source=.set-up-common.sh source "${script_dir}/.set-up-common.sh" @@ -21,14 +23,16 @@ echo "=== Multi-node setup with dedicated worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Compression nodes: ${NUM_COMPRESSION_NODES}" echo "Query nodes: ${NUM_QUERY_NODES}" +echo "Presto nodes: ${NUM_PRESTO_NODES}" echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" +echo "Presto workers: ${PRESTO_WORKER_REPLICAS}" echo "" prepare_environment "${CLUSTER_NAME}" -total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES)) +total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES + NUM_PRESTO_NODES)) echo "Creating kind cluster..." generate_kind_config "${total_workers}" | kind create cluster --name "${CLUSTER_NAME}" --config=- @@ -43,11 +47,18 @@ for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do done # Label query nodes -for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do +query_end=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES)) +for ((i = NUM_COMPRESSION_NODES; i < query_end; i++)); do echo "Labeling ${worker_nodes[$i]} as query node" kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite done +# Label Presto nodes +for ((i = query_end; i < total_workers; i++)); do + echo "Labeling ${worker_nodes[$i]} as presto node" + kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=presto --overwrite +done + echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 @@ -57,6 +68,9 @@ helm install test "${script_dir}" \ --set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \ - --set "reducer.replicas=${REDUCER_REPLICAS}" + --set "reducer.replicas=${REDUCER_REPLICAS}" \ + --set "reducer.scheduling.nodeSelector.yscope\.io/nodeType=query" \ + --set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \ + --set "prestoWorker.scheduling.nodeSelector.yscope\.io/nodeType=presto" wait_for_cluster_ready diff --git a/tools/deployment/package-helm/set-up-multi-shared-test.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh index 766e836ac3..6fe6015fcc 100755 --- a/tools/deployment/package-helm/set-up-multi-shared-test.sh +++ b/tools/deployment/package-helm/set-up-multi-shared-test.sh @@ -12,6 +12,7 @@ NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" +PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}" # shellcheck source=.set-up-common.sh source "${script_dir}/.set-up-common.sh" @@ -22,6 +23,7 @@ echo "Worker nodes: ${NUM_WORKER_NODES}" echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" +echo "Presto workers: ${PRESTO_WORKER_REPLICAS}" echo "" prepare_environment "${CLUSTER_NAME}" @@ -36,6 +38,7 @@ helm install test "${script_dir}" \ --set "distributedDeployment=true" \ --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ - --set "reducer.replicas=${REDUCER_REPLICAS}" + --set "reducer.replicas=${REDUCER_REPLICAS}" \ + --set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" wait_for_cluster_ready diff --git a/tools/deployment/package-helm/templates/api-server-deployment.yaml b/tools/deployment/package-helm/templates/api-server-deployment.yaml index 090a56403d..4ecaea6e4e 100644 --- a/tools/deployment/package-helm/templates/api-server-deployment.yaml +++ b/tools/deployment/package-helm/templates/api-server-deployment.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} +{{- if .Values.clpConfig.api_server }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/api-server-logs-pv.yaml b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml index 73e3ad878c..e117f83bd1 100644 --- a/tools/deployment/package-helm/templates/api-server-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} +{{- if .Values.clpConfig.api_server }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "api-server" diff --git a/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml index 1d1ec61af7..d9429b6dad 100644 --- a/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} +{{- if .Values.clpConfig.api_server }} {{- include "clp.createPvc" (dict "root" . "component_category" "api-server" diff --git a/tools/deployment/package-helm/templates/api-server-service.yaml b/tools/deployment/package-helm/templates/api-server-service.yaml index a5cbbae19f..0aed0e7efa 100644 --- a/tools/deployment/package-helm/templates/api-server-service.yaml +++ b/tools/deployment/package-helm/templates/api-server-service.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }} +{{- if .Values.clpConfig.api_server }} apiVersion: "v1" kind: "Service" metadata: diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index b060a6dd90..da3f3783cf 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -129,15 +129,18 @@ data: package: query_engine: {{ .Values.clpConfig.package.query_engine | quote }} storage_engine: {{ .Values.clpConfig.package.storage_engine | quote }} + {{- with .Values.clpConfig.query_scheduler }} query_scheduler: - host: "{{ include "clp.fullname" . }}-query-scheduler" - jobs_poll_delay: {{ .Values.clpConfig.query_scheduler.jobs_poll_delay }} - logging_level: {{ .Values.clpConfig.query_scheduler.logging_level | quote }} - num_archives_to_search_per_sub_job: {{ - .Values.clpConfig.query_scheduler.num_archives_to_search_per_sub_job | int }} + host: "{{ include "clp.fullname" $ }}-query-scheduler" + jobs_poll_delay: {{ .jobs_poll_delay }} + logging_level: {{ .logging_level | quote }} + num_archives_to_search_per_sub_job: {{ .num_archives_to_search_per_sub_job | int }} port: 7000 + {{- end }} + {{- with .Values.clpConfig.query_worker }} query_worker: - logging_level: {{ .Values.clpConfig.query_worker.logging_level | quote }} + logging_level: {{ .logging_level | quote }} + {{- end }} queue: host: "{{ include "clp.fullname" . }}-queue" port: 5672 @@ -146,11 +149,13 @@ data: host: "{{ include "clp.fullname" . }}-redis" port: 6379 query_backend_database: {{ .Values.clpConfig.redis.query_backend_database | int }} + {{- with .Values.clpConfig.reducer }} reducer: base_port: 14009 - host: "{{ include "clp.fullname" . }}-reducer" - logging_level: {{ .Values.clpConfig.reducer.logging_level | quote }} - upsert_interval: {{ .Values.clpConfig.reducer.upsert_interval | int }} + host: "{{ include "clp.fullname" $ }}-reducer" + logging_level: {{ .logging_level | quote }} + upsert_interval: {{ .upsert_interval | int }} + {{- end }} results_cache: db_name: {{ .Values.clpConfig.results_cache.db_name | quote }} host: "{{ include "clp.fullname" . }}-results-cache" @@ -219,10 +224,10 @@ data: {{- else }} mcp_server: null {{- end }} - {{- if eq .Values.clpConfig.package.query_engine "presto" }} + {{- if .Values.clpConfig.presto }} presto: host: "{{ include "clp.fullname" . }}-presto-coordinator" - port: 8080 + port: 8889 {{- else }} presto: null {{- end }} @@ -328,16 +333,15 @@ data: {{ .Values.clpConfig.archive_output.target_segment_size | int }}, "ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }}, "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, - {{- if eq .Values.clpConfig.package.query_engine "presto" }} + {{- if .Values.clpConfig.presto }} "PrestoHost": "{{ include "clp.fullname" . }}-presto-coordinator", - "PrestoPort": 8080 + "PrestoPort": 8889 {{- else }} "PrestoHost": null, "PrestoPort": null {{- end }} } -{{- if eq .Values.clpConfig.package.query_engine "presto" }} {{- with .Values.clpConfig.presto }} presto-coordinator-catalog-clp.properties: | connector.name=clp @@ -354,11 +358,11 @@ data: presto-coordinator-config-config.properties: | coordinator=true node-scheduler.include-coordinator=false - http-server.http.port=8080 + http-server.http.port=8889 query.max-memory={{ .coordinator.query_max_memory_gb }}GB query.max-memory-per-node={{ .coordinator.query_max_memory_per_node_gb }}GB discovery-server.enabled=true - discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080 + discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8889 optimizer.optimize-hash-generation=false regex-library=RE2J use-alternative-function-signatures=true @@ -400,8 +404,8 @@ data: {{- end }}{{/* with $.Values.clpConfig.archive_output.storage */}} presto-worker-config-config.properties: | - discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080 - http-server.http.port=8080 + discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8889 + http-server.http.port=8889 query-memory-gb={{ .worker.query_memory_gb }} shutdown-onset-sec=1 system-memory-gb={{ .worker.system_memory_gb }} @@ -414,5 +418,23 @@ data: presto-worker-config-velox.properties: | mutable-config=true + + presto-worker-setup-configs.sh: | + #!/bin/sh + set -e + + CONFIG_TEMPLATE_DIR="/etc/presto-config" + PRESTO_CONFIG_DIR="/opt/presto-server/etc" + + cp "${CONFIG_TEMPLATE_DIR}/presto-worker-config-config.properties" "${PRESTO_CONFIG_DIR}/config.properties" + cp "${CONFIG_TEMPLATE_DIR}/presto-worker-config-node.properties" "${PRESTO_CONFIG_DIR}/node.properties" + + # Query coordinator for version and append to config + DISCOVERY_URI="http://{{ include "clp.fullname" $ }}-presto-coordinator:8889" + PRESTO_VERSION=$(wget -q -O - "${DISCOVERY_URI}/v1/info" | jq -r '.version') + echo "presto.version=${PRESTO_VERSION}" >> "${PRESTO_CONFIG_DIR}/config.properties" + + # Set node identity from hostname + echo "node.internal-address=$(hostname -i)" >> "${PRESTO_CONFIG_DIR}/node.properties" + echo "node.id=$(hostname)" >> "${PRESTO_CONFIG_DIR}/node.properties" {{- end }}{{/* with .Values.clpConfig.presto */}} -{{- end }}{{/* if eq .Values.clpConfig.package.query_engine "presto" */}} diff --git a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml index c7c208dd0c..acae4e4278 100644 --- a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml @@ -1,4 +1,4 @@ -{{- if eq .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.presto }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -36,7 +36,7 @@ spec: imagePullPolicy: "{{ .Values.image.prestoCoordinator.pullPolicy }}" ports: - name: "presto-coord" - containerPort: 8080 + containerPort: 8889 volumeMounts: - name: "presto-catalog" mountPath: "/opt/presto-server/etc/catalog" diff --git a/tools/deployment/package-helm/templates/presto-coordinator-service.yaml b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml index e96aabe873..f69ff79b78 100644 --- a/tools/deployment/package-helm/templates/presto-coordinator-service.yaml +++ b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml @@ -1,4 +1,4 @@ -{{- if eq .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.presto }} apiVersion: "v1" kind: "Service" metadata: @@ -7,11 +7,12 @@ metadata: {{- include "clp.labels" . | nindent 4 }} app.kubernetes.io/component: "presto-coordinator" spec: - clusterIP: "None" + type: "NodePort" selector: {{- include "clp.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: "presto-coordinator" ports: - - port: 8080 + - port: 8889 targetPort: "presto-coord" + nodePort: {{ .Values.clpConfig.presto.port }} {{- end }} diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml index 17a5bb05a8..0556e95c2b 100644 --- a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -1,4 +1,4 @@ -{{- if eq .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.presto }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -31,41 +31,24 @@ spec: "name" "presto-coordinator" ) | nindent 10 }} - name: "setup-configs" - image: "busybox:latest" - command: - - "/bin/sh" - - "-c" - - | - set -e - cp /etc/presto-config/presto-worker-config-config.properties /opt/presto-server/etc/config.properties - cp /etc/presto-config/presto-worker-config-node.properties /opt/presto-server/etc/node.properties - - # Query coordinator for version and add it to config - DISCOVERY_URI="http://{{ include "clp.fullname" . }}-presto-coordinator:8080" - VERSION=$(wget -q -O - "${DISCOVERY_URI}/v1/info") - PRESTO_VERSION=$(echo "$VERSION" \ - | sed 's/.*"version":"//' \ - | sed 's/".*//') - echo "presto.version=${PRESTO_VERSION}" >> /opt/presto-server/etc/config.properties - - # Set node identity from hostname - HOSTNAME=$(hostname) - HOST_IP=$(hostname -i) - echo "node.internal-address=${HOST_IP}" >> /opt/presto-server/etc/node.properties - echo "node.id=${HOSTNAME}" >> /opt/presto-server/etc/node.properties + image: "bitnami/kubectl:latest" + command: ["/bin/sh", "/scripts/presto-worker-setup-configs.sh"] volumeMounts: - name: "presto-config" mountPath: "/etc/presto-config" readOnly: true - name: "presto-etc" mountPath: "/opt/presto-server/etc" + - name: "presto-scripts" + mountPath: "/scripts" + readOnly: true containers: - name: "presto-worker" image: "{{ .Values.image.prestoWorker.repository }}:{{ .Values.image.prestoWorker.tag }}" imagePullPolicy: "{{ .Values.image.prestoWorker.pullPolicy }}" ports: - name: "presto-worker" - containerPort: 8080 + containerPort: 8889 volumeMounts: - name: "presto-catalog" mountPath: "/opt/presto-server/etc/catalog" @@ -103,6 +86,13 @@ spec: name: {{ include "clp.fullname" . }}-config - name: "presto-etc" emptyDir: {} + - name: "presto-scripts" + configMap: + name: {{ include "clp.fullname" . }}-config + items: + - key: "presto-worker-setup-configs.sh" + path: "presto-worker-setup-configs.sh" + defaultMode: 0755 {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . diff --git a/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml b/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml index c5e1260a45..298356100e 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-deployment.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_scheduler }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml index 321678d800..14722f15aa 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_scheduler }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "query-scheduler" diff --git a/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml b/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml index e4ab9015d6..d0772a7627 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-logs-pvc.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_scheduler }} {{- include "clp.createPvc" (dict "root" . "component_category" "query-scheduler" diff --git a/tools/deployment/package-helm/templates/query-scheduler-service.yaml b/tools/deployment/package-helm/templates/query-scheduler-service.yaml index ac31b2d801..a09f0163dd 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-service.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-service.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_scheduler }} apiVersion: "v1" kind: "Service" metadata: diff --git a/tools/deployment/package-helm/templates/query-worker-deployment.yaml b/tools/deployment/package-helm/templates/query-worker-deployment.yaml index 539daf0fdf..c87c0368bb 100644 --- a/tools/deployment/package-helm/templates/query-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/query-worker-deployment.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_worker }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml index b676c53b37..013b454566 100644 --- a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_worker }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "query-worker" diff --git a/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml b/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml index 83833d6fc9..c4ea208895 100644 --- a/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-worker-logs-pvc.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_worker }} {{- include "clp.createPvc" (dict "root" . "component_category" "query-worker" diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml index 9af7ad531b..9e56424e63 100644 --- a/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_worker }} {{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} {{- include "clp.createStaticPv" (dict "root" . diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml index 55fb4b342a..a8cf05f1ff 100644 --- a/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.query_worker }} {{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} {{- include "clp.createPvc" (dict "root" . diff --git a/tools/deployment/package-helm/templates/reducer-deployment.yaml b/tools/deployment/package-helm/templates/reducer-deployment.yaml index 99c880e1b6..c01a551748 100644 --- a/tools/deployment/package-helm/templates/reducer-deployment.yaml +++ b/tools/deployment/package-helm/templates/reducer-deployment.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.reducer }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml index 699316845b..f274774a0d 100644 --- a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.reducer }} {{- include "clp.createStaticPv" (dict "root" . "component_category" "reducer" diff --git a/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml b/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml index 40c6ca72ff..a41514a03a 100644 --- a/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/reducer-logs-pvc.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.reducer }} {{- include "clp.createPvc" (dict "root" . "component_category" "reducer" diff --git a/tools/deployment/package-helm/templates/reducer-service.yaml b/tools/deployment/package-helm/templates/reducer-service.yaml index b10f7f0f6b..fa0b21d93b 100644 --- a/tools/deployment/package-helm/templates/reducer-service.yaml +++ b/tools/deployment/package-helm/templates/reducer-service.yaml @@ -1,4 +1,4 @@ -{{- if ne .Values.clpConfig.package.query_engine "presto" }} +{{- if .Values.clpConfig.reducer }} apiVersion: "v1" kind: "Service" metadata: diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 2644fb309d..1d6051dca6 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -44,7 +44,7 @@ compressionWorker: # Controls which nodes run compression workers # scheduling: # nodeSelector: - # yscope.io/nodeType: compute + # yscope.io/nodeType: "compression" # tolerations: # - key: "yscope.io/dedicated" # operator: "Equal" @@ -60,7 +60,7 @@ queryWorker: # Controls which nodes run query workers # scheduling: # nodeSelector: - # yscope.io/nodeType: compute + # yscope.io/nodeType: "query" # tolerations: # - key: "yscope.io/dedicated" # operator: "Equal" @@ -71,32 +71,32 @@ queryWorker: # topologyKey: "kubernetes.io/hostname" # whenUnsatisfiable: "DoNotSchedule" -prestoWorker: +reducer: replicas: 1 - # Controls which nodes run Presto workers + # Controls which nodes run reducers # scheduling: # nodeSelector: - # yscope.io/nodeType: compute + # yscope.io/nodeType: "query" # tolerations: # - key: "yscope.io/dedicated" # operator: "Equal" - # value: "query" + # value: "reducer" # effect: "NoSchedule" # topologySpreadConstraints: # - maxSkew: 1 # topologyKey: "kubernetes.io/hostname" # whenUnsatisfiable: "DoNotSchedule" -reducer: +prestoWorker: replicas: 1 - # Controls which nodes run reducers + # Controls which nodes run Presto workers # scheduling: # nodeSelector: - # yscope.io/nodeType: compute + # yscope.io/nodeType: "presto" # tolerations: # - key: "yscope.io/dedicated" # operator: "Equal" - # value: "reducer" + # value: "presto" # effect: "NoSchedule" # topologySpreadConstraints: # - maxSkew: 1 @@ -153,10 +153,6 @@ clpConfig: query_backend_database: 0 compression_backend_database: 1 - reducer: - logging_level: "INFO" - upsert_interval: 100 # milliseconds - results_cache: port: 30017 db_name: "clp-query-results" @@ -171,6 +167,10 @@ clpConfig: query_worker: logging_level: "INFO" + reducer: + logging_level: "INFO" + upsert_interval: 100 # milliseconds + webui: port: 30000 results_metadata_collection_name: "results-metadata" @@ -180,18 +180,19 @@ clpConfig: # port: 30800 # logging_level: "INFO" - # Presto query engine config. Only used when package.query_engine is "presto". - presto: - coordinator: - logging_level: "INFO" - query_max_memory_gb: 1 - query_max_memory_per_node_gb: 1 - worker: - query_memory_gb: 4 - system_memory_gb: 8 - # Split filter config for Presto CLP connector. - # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file - split_filter: {} + # Presto query engine config. Uncomment to enable Presto deployments. + presto: null + # port: 30889 + # coordinator: + # logging_level: "INFO" + # query_max_memory_gb: 1 + # query_max_memory_per_node_gb: 1 + # worker: + # query_memory_gb: 4 + # system_memory_gb: 8 + # # Split filter config for Presto CLP connector. + # # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file + # split_filter: {} # log-ingestor config. Currently, the config is applicable only if `logs_input.type` is "s3". log_ingestor: From 9c83eb39b1e1ecd65c10b87e8cde0ae46a09edf4 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 11 Mar 2026 01:03:28 +0800 Subject: [PATCH 05/33] chore(helm): Bump chart version to 0.2.1-dev.1 --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index f50a438cd3..83984a7a31 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.2.1-dev.0" +version: "0.2.1-dev.1" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.10.1-dev" From 5fc55fcb0b1767e8c1afe89a189b6026b4d4dc16 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 11 Mar 2026 02:05:22 +0800 Subject: [PATCH 06/33] docs: Clarify Helm configuration for enabling Presto and resource optimization --- docs/src/user-docs/guides-k8s-deployment.md | 16 +++++++++------- docs/src/user-docs/guides-using-presto.md | 11 ++++++----- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 0348c81e57..700e9fdf05 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -248,8 +248,11 @@ helm template clp . -f custom-values.yaml ### Using Presto as the query engine -To use [Presto][presto-guide] as the query engine instead of the default clp-s query pipeline, set -`query_engine` to `"presto"` and configure the Presto-specific settings: +To use [Presto][presto-guide] as the query engine, set `query_engine` to `"presto"` and configure +the Presto-specific settings. The `query_engine` setting controls which search interface the Web UI +displays. Presto runs alongside the existing compression pipeline; setting the clp-s native query +components to `null` is optional but recommended to save resources when you don't need both query +paths: ```{code-block} yaml :caption: presto-values.yaml @@ -271,9 +274,8 @@ clpConfig: storage_engine: "clp-s" query_engine: "presto" - # Disable the clp-s query pipeline since Presto replaces it. - # NOTE: The API server currently depends on the clp-s query pipeline and does not work with - # Presto. Keep it enabled if you need the API server; disable it if not. + # Optional: Disable the clp-s native query pipeline to save resources. + # NOTE: The API server depends on the clp-s native query pipeline. api_server: null query_scheduler: null query_worker: null @@ -314,8 +316,8 @@ helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml ``` :::{note} -When `query_engine` is set to `"presto"`, the chart deploys a Presto coordinator and Presto -worker(s) instead of the query scheduler, query workers, reducers, and results cache. +Presto is deployed when `clpConfig.presto` is set to a non-null value. To disable the clp-s native query +components, set their config keys to `null` as shown above. ::: For more details on querying logs through Presto, see the [Using Presto][presto-guide] guide. diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index 2e92f61cf8..1b9e84de2f 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -25,8 +25,10 @@ CLP supports Presto through two deployment methods: ## Kubernetes (Helm) -When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting the `query_engine` to -`"presto"` in your Helm values. +When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting `clpConfig.presto` to +a non-null configuration and `query_engine` to `"presto"`. The `query_engine` setting controls which +search interface the Web UI displays. Presto runs alongside the existing compression pipeline; the +clp-s native query components can optionally be disabled to save resources. ### Requirements @@ -43,9 +45,8 @@ When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting th package: query_engine: "presto" - # Disable the clp-s query pipeline since Presto replaces it. - # NOTE: The API server currently depends on the clp-s query pipeline and does not work - # with Presto. Keep it enabled if you need the API server; disable it if not. + # Optional: Disable the clp-s native query pipeline to save resources. + # NOTE: The API server depends on the clp-s native query pipeline. api_server: null query_scheduler: null query_worker: null From 6611a4631adfd51c5ba5e5149dc0db82e9f00e4a Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 11 Mar 2026 02:23:36 +0800 Subject: [PATCH 07/33] docs: Remove redundant comments on Presto configuration in K8s guide --- docs/src/user-docs/guides-k8s-deployment.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 700e9fdf05..bef106851a 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -375,8 +375,6 @@ To run compression workers, query workers, and reducers in separate node pools: nodeSelector: yscope.io/nodeType: "query" - # If using Presto as the query engine, configure prestoWorker instead of - # queryWorker and reducer. prestoWorker: replicas: 2 scheduling: @@ -432,8 +430,6 @@ To run all worker types in the same node pool: nodeSelector: yscope.io/nodeType: "compute" - # If using Presto as the query engine, configure prestoWorker instead of - # queryWorker and reducer. prestoWorker: replicas: 2 scheduling: From 4ac6a7145e9e02e83d6bce9ac00095c99cdf165b Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 11 Mar 2026 02:48:10 +0800 Subject: [PATCH 08/33] chore(helm): Remove extraneous newline in `values.yaml` --- tools/deployment/package-helm/values.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index d1f3467cb0..3e260870b9 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -94,7 +94,6 @@ prestoWorker: # topologyKey: "kubernetes.io/hostname" # whenUnsatisfiable: "DoNotSchedule" - clpConfig: # List of third-party services bundled (deployed) as part of the chart. # Remove a service from this list to use an external instance instead, and configure its host/port From 3ea167011774dd51679fd02ac56badf1d2c71304 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 12 Mar 2026 13:31:01 -0400 Subject: [PATCH 09/33] docs(helm): Clarify Presto SQL configuration in `values.yaml` --- tools/deployment/package-helm/values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 6876b52a46..0341295a3e 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -193,7 +193,8 @@ clpConfig: # port: 30800 # logging_level: "INFO" - # Presto query engine config. Uncomment to enable Presto deployments. + # Presto query engine config. Uncomment to enable Presto deployments. If you want the Web UI to + # expose Presto SQL, also set `package.query_engine: "presto"`. presto: null # port: 30889 # coordinator: From 80204d7f5f185d4576f76de1518683515cc73037 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 12 Mar 2026 13:31:21 -0400 Subject: [PATCH 10/33] chore(helm): Bump chart version to 0.2.1-dev.2 --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 83984a7a31..dedbceb015 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.2.1-dev.1" +version: "0.2.1-dev.2" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.10.1-dev" From dda97e2def6a32de42eaf99e0b02699f4537b53b Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 12 Mar 2026 14:46:17 -0400 Subject: [PATCH 11/33] refactor(helm/docs): Rename `package.query_engine` to `webui.query_engine` and update Presto configurations --- docs/src/user-docs/guides-k8s-deployment.md | 19 ++++++++++--------- docs/src/user-docs/guides-using-presto.md | 8 ++++---- docs/src/user-docs/quick-start/clp-text.md | 2 +- .../package-helm/templates/configmap.yaml | 6 +++--- tools/deployment/package-helm/values.yaml | 12 ++++++------ 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index bef106851a..297f98749a 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -200,6 +200,8 @@ clpConfig: # Use clp-text, instead of clp-json (default) package: storage_engine: "clp" # Use "clp-s" for clp-json, "clp" for clp-text + + webui: query_engine: "clp" # Use "clp-s" for clp-json, "clp" for clp-text, "presto" for Presto # Configure archive output @@ -248,11 +250,11 @@ helm template clp . -f custom-values.yaml ### Using Presto as the query engine -To use [Presto][presto-guide] as the query engine, set `query_engine` to `"presto"` and configure -the Presto-specific settings. The `query_engine` setting controls which search interface the Web UI -displays. Presto runs alongside the existing compression pipeline; setting the clp-s native query -components to `null` is optional but recommended to save resources when you don't need both query -paths: +To use [Presto][presto-guide] as the query engine, set `webui.query_engine` to `"presto"` and +configure the Presto-specific settings. The `query_engine` setting controls which search interface +the Web UI displays. Presto runs alongside the existing compression pipeline; setting the clp-s +native query components to `null` is optional but recommended to save resources when you don't need +both query paths: ```{code-block} yaml :caption: presto-values.yaml @@ -260,18 +262,17 @@ paths: image: prestoCoordinator: repository: "ghcr.io/y-scope/presto/coordinator" - tag: "dev" + tag: "clp-v0.10.0" prestoWorker: repository: "ghcr.io/y-scope/presto/prestissimo-worker" - tag: "dev" + tag: "clp-v0.10.0" prestoWorker: # See below "Worker scheduling" for more details on configuring Presto scheduling replicas: 2 clpConfig: - package: - storage_engine: "clp-s" + webui: query_engine: "presto" # Optional: Disable the clp-s native query pipeline to save resources. diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index 1b9e84de2f..5539abccff 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -26,9 +26,9 @@ CLP supports Presto through two deployment methods: ## Kubernetes (Helm) When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting `clpConfig.presto` to -a non-null configuration and `query_engine` to `"presto"`. The `query_engine` setting controls which -search interface the Web UI displays. Presto runs alongside the existing compression pipeline; the -clp-s native query components can optionally be disabled to save resources. +a non-null configuration and `webui.query_engine` to `"presto"`. The `query_engine` setting controls +which search interface the Web UI displays. Presto runs alongside the existing compression pipeline; +the clp-s native query components can optionally be disabled to save resources. ### Requirements @@ -42,7 +42,7 @@ clp-s native query components can optionally be disabled to save resources. :caption: presto-values.yaml clpConfig: - package: + webui: query_engine: "presto" # Optional: Disable the clp-s native query pipeline to save resources. diff --git a/docs/src/user-docs/quick-start/clp-text.md b/docs/src/user-docs/quick-start/clp-text.md index d8c436edd7..fc0a735d38 100644 --- a/docs/src/user-docs/quick-start/clp-text.md +++ b/docs/src/user-docs/quick-start/clp-text.md @@ -100,7 +100,7 @@ helm repo update clp helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG \ --set clpConfig.package.storage_engine=clp \ - --set clpConfig.package.query_engine=clp \ + --set clpConfig.webui.query_engine=clp \ --set clpConfig.webui.port="$CLP_WEBUI_PORT" \ --set clpConfig.results_cache.port="$CLP_RESULTS_CACHE_PORT" \ --set clpConfig.database.port="$CLP_DATABASE_PORT" \ diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 4ff9a92e38..b724381837 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -127,7 +127,7 @@ data: log_ingestor: null {{- end }} package: - query_engine: {{ .Values.clpConfig.package.query_engine | quote }} + query_engine: {{ .Values.clpConfig.webui.query_engine | quote }} storage_engine: {{ .Values.clpConfig.package.storage_engine | quote }} {{- with .Values.clpConfig.query_scheduler }} query_scheduler: @@ -254,7 +254,7 @@ data: webui-client-settings.json: | { "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, - "ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }}, + "ClpQueryEngine": {{ .Values.clpConfig.webui.query_engine | quote }}, "LogsInputType": {{ .Values.clpConfig.logs_input.type | quote }}, {{- if eq .Values.clpConfig.logs_input.type "fs" }} "LogsInputRootDir": "/mnt/logs", @@ -323,7 +323,7 @@ data: {{ .Values.clpConfig.archive_output.target_encoded_file_size | int }}, "ArchiveOutputTargetSegmentSize": {{ .Values.clpConfig.archive_output.target_segment_size | int }}, - "ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }}, + "ClpQueryEngine": {{ .Values.clpConfig.webui.query_engine | quote }}, "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, {{- if .Values.clpConfig.presto }} "PrestoHost": "{{ include "clp.fullname" . }}-presto-coordinator", diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 0341295a3e..e03fcbb42d 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -11,12 +11,12 @@ image: tag: "main" prestoCoordinator: repository: "ghcr.io/y-scope/presto/coordinator" - pullPolicy: "Always" - tag: "dev" + pullPolicy: "IfNotPresent" + tag: "clp-v0.10.0" prestoWorker: repository: "ghcr.io/y-scope/presto/prestissimo-worker" - pullPolicy: "Always" - tag: "dev" + pullPolicy: "IfNotPresent" + tag: "clp-v0.10.0" # - If false: Single-node deployment. # - Pods automatically tolerate control-plane taints. @@ -106,7 +106,6 @@ clpConfig: package: storage_engine: "clp-s" - query_engine: "clp-s" # API server config api_server: @@ -185,6 +184,7 @@ clpConfig: upsert_interval: 100 # milliseconds webui: + query_engine: "clp-s" port: 30000 results_metadata_collection_name: "results-metadata" rate_limit: 1000 @@ -194,7 +194,7 @@ clpConfig: # logging_level: "INFO" # Presto query engine config. Uncomment to enable Presto deployments. If you want the Web UI to - # expose Presto SQL, also set `package.query_engine: "presto"`. + # expose Presto SQL, also set `webui.query_engine: "presto"`. presto: null # port: 30889 # coordinator: From 5525e7e6cdfcaac14724dd3c7866507d51473877 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:02:02 -0400 Subject: [PATCH 12/33] refactor(helm): Use dynamic host and port for CLP metadata DB in Presto configurations --- tools/deployment/package-helm/templates/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index b724381837..3721282923 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -338,7 +338,7 @@ data: presto-coordinator-catalog-clp.properties: | connector.name=clp clp.metadata-provider-type=mysql - clp.metadata-db-url=jdbc:mysql://{{ include "clp.fullname" $ }}-database:3306 + clp.metadata-db-url=jdbc:mysql://{{ include "clp.databaseHost" $ }}:{{ include "clp.databasePort" $ }} clp.metadata-db-name={{ $.Values.clpConfig.database.names.clp }} clp.metadata-db-user={{ $.Values.credentials.database.username }} clp.metadata-db-password={{ $.Values.credentials.database.password }} From 55d32d4adea3e45927c477c91dfb54747c452ccf Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:03:06 -0400 Subject: [PATCH 13/33] refactor(helm): Rename service account for Presto deployments to `service-account` --- .../package-helm/templates/presto-coordinator-deployment.yaml | 2 +- .../package-helm/templates/presto-worker-deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml index acae4e4278..12cb654165 100644 --- a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml @@ -18,7 +18,7 @@ spec: {{- include "clp.labels" . | nindent 8 }} app.kubernetes.io/component: "presto-coordinator" spec: - serviceAccountName: {{ include "clp.fullname" . }}-job-watcher + serviceAccountName: {{ include "clp.fullname" . }}-service-account terminationGracePeriodSeconds: 30 {{- include "clp.createSchedulingConfigs" (dict "root" . diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml index 0556e95c2b..d728fceb01 100644 --- a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -18,7 +18,7 @@ spec: {{- include "clp.labels" . | nindent 8 }} app.kubernetes.io/component: "presto-worker" spec: - serviceAccountName: {{ include "clp.fullname" . }}-job-watcher + serviceAccountName: {{ include "clp.fullname" . }}-service-account terminationGracePeriodSeconds: 30 {{- include "clp.createSchedulingConfigs" (dict "root" . From 72160c1001294f52b20a6975319dfa0bfa6c82f9 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:05:07 -0400 Subject: [PATCH 14/33] docs: Simplify Presto setup instructions in K8s deployment guide --- docs/src/user-docs/guides-k8s-deployment.md | 74 +-------------------- 1 file changed, 2 insertions(+), 72 deletions(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 297f98749a..36471d0922 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -250,78 +250,8 @@ helm template clp . -f custom-values.yaml ### Using Presto as the query engine -To use [Presto][presto-guide] as the query engine, set `webui.query_engine` to `"presto"` and -configure the Presto-specific settings. The `query_engine` setting controls which search interface -the Web UI displays. Presto runs alongside the existing compression pipeline; setting the clp-s -native query components to `null` is optional but recommended to save resources when you don't need -both query paths: - -```{code-block} yaml -:caption: presto-values.yaml - -image: - prestoCoordinator: - repository: "ghcr.io/y-scope/presto/coordinator" - tag: "clp-v0.10.0" - prestoWorker: - repository: "ghcr.io/y-scope/presto/prestissimo-worker" - tag: "clp-v0.10.0" - -prestoWorker: - # See below "Worker scheduling" for more details on configuring Presto scheduling - replicas: 2 - -clpConfig: - webui: - query_engine: "presto" - - # Optional: Disable the clp-s native query pipeline to save resources. - # NOTE: The API server depends on the clp-s native query pipeline. - api_server: null - query_scheduler: null - query_worker: null - reducer: null - - # Disable results cache retention since the Presto integration doesn't yet support garbage - # collection of search results. - results_cache: - retention_period: null - - presto: - port: 30889 - coordinator: - logging_level: "INFO" - query_max_memory_gb: 1 - query_max_memory_per_node_gb: 1 - worker: - query_memory_gb: 4 - system_memory_gb: 8 - # Split filter config for the Presto CLP connector. For each dataset you want to query, add a - # filter entry. Replace with the dataset name (use "default" if you didn't specify one - # when compressing) and with the timestamp key used during compression. - # See https://docs.yscope.com/presto/connector/clp.html#split-filter-config-file - split_filter: - clp.default.: - - columnName: "" - customOptions: - rangeMapping: - lowerBound: "begin_timestamp" - upperBound: "end_timestamp" - required: false -``` - -Install with the Presto values: - -```bash -helm install clp clp/clp DOCS_VAR_HELM_VERSION_FLAG -f presto-values.yaml -``` - -:::{note} -Presto is deployed when `clpConfig.presto` is set to a non-null value. To disable the clp-s native query -components, set their config keys to `null` as shown above. -::: - -For more details on querying logs through Presto, see the [Using Presto][presto-guide] guide. +To use Presto as the query engine, see the [Using Presto][presto-guide] guide for setup +instructions, including the Helm values file and installation steps. ### Worker scheduling From 4337cd12e13fc9830e42c1d6a860e4fa369671df Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:10:20 -0400 Subject: [PATCH 15/33] docs: Remove dead link to logging infrastructure issue in K8s deployment guide --- docs/src/user-docs/guides-k8s-deployment.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 36471d0922..30fba7cc68 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -588,7 +588,6 @@ To tear down a `kubeadm` cluster: [kind]: https://kind.sigs.k8s.io/ [kubeadm]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/ [kubectl]: https://kubernetes.io/docs/tasks/tools/ -[logging-infra-issue]: https://github.com/y-scope/clp/issues/1760 [presto-guide]: guides-using-presto.md [quick-start]: quick-start/index.md [retention-guide]: guides-retention.md From efb640bd328852877829462e2571167df2e199c5 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:13:52 -0400 Subject: [PATCH 16/33] refactor(helm): Replace wget with curl for fetching Presto version in configmap template --- tools/deployment/package-helm/templates/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 3721282923..20c97df50a 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -423,7 +423,7 @@ data: # Query coordinator for version and append to config DISCOVERY_URI="http://{{ include "clp.fullname" $ }}-presto-coordinator:8889" - PRESTO_VERSION=$(wget -q -O - "${DISCOVERY_URI}/v1/info" | jq -r '.version') + PRESTO_VERSION=$(curl -sf "${DISCOVERY_URI}/v1/info" | jq -r '.version') echo "presto.version=${PRESTO_VERSION}" >> "${PRESTO_CONFIG_DIR}/config.properties" # Set node identity from hostname From 9df3643e912362a124d10a9e59f235ec479ea24a Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:44:23 -0400 Subject: [PATCH 17/33] refactor(helm): Update Presto version extraction to use `nodeVersion.version` in configmap template --- tools/deployment/package-helm/templates/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 20c97df50a..9ab3d504ad 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -423,7 +423,7 @@ data: # Query coordinator for version and append to config DISCOVERY_URI="http://{{ include "clp.fullname" $ }}-presto-coordinator:8889" - PRESTO_VERSION=$(curl -sf "${DISCOVERY_URI}/v1/info" | jq -r '.version') + PRESTO_VERSION=$(curl -sf "${DISCOVERY_URI}/v1/info" | jq -r '.nodeVersion.version') echo "presto.version=${PRESTO_VERSION}" >> "${PRESTO_CONFIG_DIR}/config.properties" # Set node identity from hostname From c8fd988b2d65c395071f05e2b8c52a0092c4c737 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 15:44:28 -0400 Subject: [PATCH 18/33] docs: Fix incorrect property name in Presto setup instructions for K8s deployment --- docs/src/user-docs/guides-using-presto.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/user-docs/guides-using-presto.md b/docs/src/user-docs/guides-using-presto.md index 5539abccff..c87b454020 100644 --- a/docs/src/user-docs/guides-using-presto.md +++ b/docs/src/user-docs/guides-using-presto.md @@ -26,7 +26,7 @@ CLP supports Presto through two deployment methods: ## Kubernetes (Helm) When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting `clpConfig.presto` to -a non-null configuration and `webui.query_engine` to `"presto"`. The `query_engine` setting controls +a non-null configuration and `clpConfig.webui.query_engine` to `"presto"`. The `query_engine` setting controls which search interface the Web UI displays. Presto runs alongside the existing compression pipeline; the clp-s native query components can optionally be disabled to save resources. From 1939ce193b256b98177d1907c138dbdd92f9f603 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 16:05:20 -0400 Subject: [PATCH 19/33] refactor(helm): Set default Presto replicas to 0 in multi-shared and multi-dedicated test setups --- tools/deployment/package-helm/set-up-multi-dedicated-test.sh | 4 ++-- tools/deployment/package-helm/set-up-multi-shared-test.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 85fab34ffd..e5762e4010 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -10,11 +10,11 @@ CLP_HOME="${CLP_HOME:-/tmp/clp}" CLUSTER_NAME="${CLUSTER_NAME:-clp-test}" NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}" NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}" -NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}" +NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-0}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" -PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}" +PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-0}" # shellcheck source=.set-up-common.sh source "${script_dir}/.set-up-common.sh" diff --git a/tools/deployment/package-helm/set-up-multi-shared-test.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh index 44afcc9697..991ce0f54b 100755 --- a/tools/deployment/package-helm/set-up-multi-shared-test.sh +++ b/tools/deployment/package-helm/set-up-multi-shared-test.sh @@ -12,7 +12,7 @@ NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" -PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}" +PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-0}" # shellcheck source=.set-up-common.sh source "${script_dir}/.set-up-common.sh" From 6688324fa72736d618b3fc34c4cd8a13f148e2a2 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 16:20:58 -0400 Subject: [PATCH 20/33] Add support for enabling Presto in test setups with configurable Helm flags --- .../deployment/package-helm/.set-up-common.sh | 31 ++++++++++++++++++- .../set-up-multi-dedicated-test.sh | 13 +++++++- .../package-helm/set-up-multi-shared-test.sh | 9 +++++- tools/deployment/package-helm/set-up-test.sh | 4 ++- 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index 529260f8f9..ab01cb7aea 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -73,11 +73,12 @@ get_image_helm_args() { } # Parses common arguments shared across set-up scripts. -# Sets CLP_PACKAGE_IMAGE global variable. +# Sets CLP_PACKAGE_IMAGE and ENABLE_PRESTO global variables. # # @param {string[]} args Script arguments parse_common_args() { CLP_PACKAGE_IMAGE="" + ENABLE_PRESTO="false" while [[ $# -gt 0 ]]; do case "$1" in --clp-package-image) @@ -88,6 +89,10 @@ parse_common_args() { CLP_PACKAGE_IMAGE="$2" shift 2 ;; + --presto) + ENABLE_PRESTO="true" + shift + ;; *) echo "Unknown argument: $1" >&2 exit 1 @@ -96,6 +101,30 @@ parse_common_args() { done } +# Returns helm --set flags to enable Presto with a minimal config. +# Only prints flags when ENABLE_PRESTO is "true"; otherwise prints nothing. +# +# @return Prints helm --set flags to stdout +get_presto_helm_args() { + if [[ "${ENABLE_PRESTO}" != "true" ]]; then + return + fi + + echo "--set" "clpConfig.webui.query_engine=presto" \ + "--set" "clpConfig.presto.port=30889" \ + "--set" "clpConfig.presto.coordinator.logging_level=INFO" \ + "--set" "clpConfig.presto.coordinator.query_max_memory_gb=1" \ + "--set" "clpConfig.presto.coordinator.query_max_memory_per_node_gb=1" \ + "--set" "clpConfig.presto.worker.query_memory_gb=4" \ + "--set" "clpConfig.presto.worker.system_memory_gb=8" \ + "--set-json" "clpConfig.presto.split_filter={}" \ + "--set" "clpConfig.api_server=null" \ + "--set" "clpConfig.query_scheduler=null" \ + "--set" "clpConfig.query_worker=null" \ + "--set" "clpConfig.reducer=null" \ + "--set" "clpConfig.results_cache.retention_period=null" +} + # Generates kind cluster configuration YAML # # @param {int} num_workers Number of worker nodes (0 for single-node cluster) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index e5762e4010..091eadc294 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -21,6 +21,15 @@ source "${script_dir}/.set-up-common.sh" parse_common_args "$@" +# When --presto is passed, use non-zero defaults for Presto nodes/replicas if not explicitly set. +if [[ "${ENABLE_PRESTO}" == "true" ]]; then + NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}" + : "${NUM_PRESTO_NODES:=2}" + [[ "${NUM_PRESTO_NODES}" -eq 0 ]] && NUM_PRESTO_NODES=2 + PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}" + [[ "${PRESTO_WORKER_REPLICAS}" -eq 0 ]] && PRESTO_WORKER_REPLICAS=2 +fi + echo "=== Multi-node setup with dedicated worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Compression nodes: ${NUM_COMPRESSION_NODES}" @@ -30,6 +39,7 @@ echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" echo "Presto workers: ${PRESTO_WORKER_REPLICAS}" +echo "Presto: ${ENABLE_PRESTO}" echo "" prepare_environment "${CLUSTER_NAME}" @@ -64,7 +74,7 @@ done echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 -# Word splitting is intentional: get_image_helm_args returns multiple --set flags. +# Word splitting is intentional: helper functions return multiple --set flags. # shellcheck disable=SC2046 helm install test "${script_dir}" \ --set "distributedDeployment=true" \ @@ -76,6 +86,7 @@ helm install test "${script_dir}" \ --set "reducer.scheduling.nodeSelector.yscope\.io/nodeType=query" \ --set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \ --set "prestoWorker.scheduling.nodeSelector.yscope\.io/nodeType=presto" \ + $(get_presto_helm_args) \ $(get_image_helm_args "${CLUSTER_NAME}" "${CLP_PACKAGE_IMAGE}") wait_for_cluster_ready diff --git a/tools/deployment/package-helm/set-up-multi-shared-test.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh index 991ce0f54b..92ed7b61be 100755 --- a/tools/deployment/package-helm/set-up-multi-shared-test.sh +++ b/tools/deployment/package-helm/set-up-multi-shared-test.sh @@ -19,6 +19,11 @@ source "${script_dir}/.set-up-common.sh" parse_common_args "$@" +# When --presto is passed, use non-zero defaults for Presto replicas if not explicitly set. +if [[ "${ENABLE_PRESTO}" == "true" ]]; then + [[ "${PRESTO_WORKER_REPLICAS}" -eq 0 ]] && PRESTO_WORKER_REPLICAS=2 +fi + echo "=== Multi-node setup with shared worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Worker nodes: ${NUM_WORKER_NODES}" @@ -26,6 +31,7 @@ echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" echo "Presto workers: ${PRESTO_WORKER_REPLICAS}" +echo "Presto: ${ENABLE_PRESTO}" echo "" prepare_environment "${CLUSTER_NAME}" @@ -36,7 +42,7 @@ generate_kind_config "${NUM_WORKER_NODES}" | kind create cluster --name "${CLUST echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 -# Word splitting is intentional: get_image_helm_args returns multiple --set flags. +# Word splitting is intentional: helper functions return multiple --set flags. # shellcheck disable=SC2046 helm install test "${script_dir}" \ --set "distributedDeployment=true" \ @@ -44,6 +50,7 @@ helm install test "${script_dir}" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ --set "reducer.replicas=${REDUCER_REPLICAS}" \ --set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \ + $(get_presto_helm_args) \ $(get_image_helm_args "${CLUSTER_NAME}" "${CLP_PACKAGE_IMAGE}") wait_for_cluster_ready diff --git a/tools/deployment/package-helm/set-up-test.sh b/tools/deployment/package-helm/set-up-test.sh index 4bbfc5ee5d..f396ef9e14 100755 --- a/tools/deployment/package-helm/set-up-test.sh +++ b/tools/deployment/package-helm/set-up-test.sh @@ -15,6 +15,7 @@ parse_common_args "$@" echo "=== Single-node setup ===" echo "Cluster: ${CLUSTER_NAME}" +echo "Presto: ${ENABLE_PRESTO}" echo "" prepare_environment "${CLUSTER_NAME}" @@ -25,9 +26,10 @@ generate_kind_config 0 | kind create cluster --name "${CLUSTER_NAME}" --config=- echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 -# Word splitting is intentional: get_image_helm_args returns multiple --set flags. +# Word splitting is intentional: helper functions return multiple --set flags. # shellcheck disable=SC2046 helm install test "${script_dir}" \ + $(get_presto_helm_args) \ $(get_image_helm_args "${CLUSTER_NAME}" "${CLP_PACKAGE_IMAGE}") wait_for_cluster_ready From f6ff83a7e133055ee5391a0d3a090d458774b5c0 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 17:09:18 -0400 Subject: [PATCH 21/33] fix(deployment): Pre-create shared-data PVs in `set-up-multi-dedicated-test.sh` to avoid node-affinity conflict with `nodeSelector`. --- .../design-deployment-orchestration.md | 6 ++- docs/src/user-docs/guides-k8s-deployment.md | 7 +++- .../set-up-multi-dedicated-test.sh | 41 +++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index 904ca5a43f..0046df73e4 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -244,8 +244,10 @@ Services require persistent storage for logs, data, archives, and streams. * **Docker Compose**: Uses bind mounts for host directories and named volumes for database data. Conditional mounts use variable interpolation to mount empty tmpfs when not needed. * **Kubernetes**: Uses dynamically provisioned PersistentVolumeClaims for persistent data (database, - results cache, archives, streams) and `emptyDir` volumes for ephemeral state (Redis, staging - directories). Service logs are emitted to pod stdout/stderr. + results cache) and `emptyDir` volumes for ephemeral state (Redis, staging directories). Service + logs are emitted to pod stdout/stderr. For shared-data volumes (archives, streams), single-node + deployments use dynamic provisioning while distributed `fs` deployments require pre-provisioned + PersistentVolumes backed by shared storage (e.g., NFS/CephFS). ### Deployment types diff --git a/docs/src/user-docs/guides-k8s-deployment.md b/docs/src/user-docs/guides-k8s-deployment.md index 9d88356b8a..9c4d8223fa 100644 --- a/docs/src/user-docs/guides-k8s-deployment.md +++ b/docs/src/user-docs/guides-k8s-deployment.md @@ -132,8 +132,11 @@ this section for testing or development. require shared local storage between workers. If you use S3 storage, you can skip this section. ::: - If storage type is set to `fs`, users must manually provision the persistent volumes and update - `accessModes` of PVCs. + If storage type is set to `fs`, the shared-data directories (`/var/data/archives`, + `/var/data/streams`) must be accessible from all worker nodes (e.g., via NFS/CephFS mounted at + the same path). Users must pre-create PersistentVolumes backed by this shared storage and use + `claimRef` to bind them to the chart's PVCs (`-clp-shared-data-archives` and + `-clp-shared-data-streams`). 2. **External databases** (recommended for production): * See the [external database setup guide][external-db-guide] for using external diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index cdef35d492..e3772a588c 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -50,6 +50,47 @@ for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite done +# Pre-create shared-data PVs with hostPath (no node affinity) so PVCs bind to them instead of +# dynamically provisioned node-local volumes. Without this, the local-path-provisioner pins PVs to +# whichever node claims them first, which conflicts with nodeSelector when workers are on dedicated +# node pools. +echo "Creating shared-data PersistentVolumes..." +kubectl apply -f - <<'PVEOF' +apiVersion: v1 +kind: PersistentVolume +metadata: + name: test-clp-shared-data-archives +spec: + capacity: + storage: 50Gi + accessModes: [ReadWriteOnce] + persistentVolumeReclaimPolicy: Retain + storageClassName: standard + claimRef: + namespace: default + name: test-clp-shared-data-archives + hostPath: + path: /var/data/archives + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: test-clp-shared-data-streams +spec: + capacity: + storage: 20Gi + accessModes: [ReadWriteOnce] + persistentVolumeReclaimPolicy: Retain + storageClassName: standard + claimRef: + namespace: default + name: test-clp-shared-data-streams + hostPath: + path: /var/data/streams + type: DirectoryOrCreate +PVEOF + echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 From eec670947acb8bd6d4ed97c6bae49227af4a6174 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 17:15:46 -0400 Subject: [PATCH 22/33] chore(helm): bump chart version to 0.2.1-dev.2 --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 83984a7a31..dedbceb015 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.2.1-dev.1" +version: "0.2.1-dev.2" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.10.1-dev" From 48aa730cf7888e02451812fb3e70680435dc9e99 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 16 Mar 2026 17:17:03 -0400 Subject: [PATCH 23/33] fix(deployment): correct heredoc identifier in `set-up-multi-dedicated-test.sh` --- tools/deployment/package-helm/set-up-multi-dedicated-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index e3772a588c..1300199ce3 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -55,7 +55,7 @@ done # whichever node claims them first, which conflicts with nodeSelector when workers are on dedicated # node pools. echo "Creating shared-data PersistentVolumes..." -kubectl apply -f - <<'PVEOF' +kubectl apply -f - <<'EOF' apiVersion: v1 kind: PersistentVolume metadata: @@ -89,7 +89,7 @@ spec: hostPath: path: /var/data/streams type: DirectoryOrCreate -PVEOF +EOF echo "Installing Helm chart..." helm uninstall test --ignore-not-found From f6d3bc0012d65445165374d0ac90b79505d68af0 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 17 Mar 2026 12:03:58 -0400 Subject: [PATCH 24/33] update PV configuration in `set-up-multi-dedicated-test.sh` (remove reclaim policy, set empty storage class) --- .../package-helm/set-up-multi-dedicated-test.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 1300199ce3..3625beea1d 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -55,7 +55,7 @@ done # whichever node claims them first, which conflicts with nodeSelector when workers are on dedicated # node pools. echo "Creating shared-data PersistentVolumes..." -kubectl apply -f - <<'EOF' +kubectl apply -f - < Date: Tue, 17 Mar 2026 15:28:35 -0400 Subject: [PATCH 25/33] fix(deployment): pre-create shared-data directories on nodes in `set-up-multi-dedicated-test.sh` and update PV configuration --- .../package-helm/set-up-multi-dedicated-test.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 3625beea1d..334aa8e5db 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -54,6 +54,12 @@ done # dynamically provisioned node-local volumes. Without this, the local-path-provisioner pins PVs to # whichever node claims them first, which conflicts with nodeSelector when workers are on dedicated # node pools. +echo "Creating shared-data directories on all nodes..." +for node in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do + docker exec "${node}" mkdir -p /var/data/archives /var/data/streams + docker exec "${node}" chmod 777 /var/data/archives /var/data/streams +done + echo "Creating shared-data PersistentVolumes..." kubectl apply -f - < Date: Tue, 17 Mar 2026 15:37:52 -0400 Subject: [PATCH 26/33] fix(deployment): update shared-data directory creation to use `CLP_HOME` and adjust PV paths in `set-up-multi-dedicated-test.sh` --- .../package-helm/set-up-multi-dedicated-test.sh | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 334aa8e5db..f775c5c970 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -54,11 +54,10 @@ done # dynamically provisioned node-local volumes. Without this, the local-path-provisioner pins PVs to # whichever node claims them first, which conflicts with nodeSelector when workers are on dedicated # node pools. -echo "Creating shared-data directories on all nodes..." -for node in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do - docker exec "${node}" mkdir -p /var/data/archives /var/data/streams - docker exec "${node}" chmod 777 /var/data/archives /var/data/streams -done +# Pre-create shared-data directories under CLP_HOME, which kind's extraMounts expose on every node. +# Without a shared path, hostPath PVs would only contain data on the node that wrote it. +shared_data_dir="${CLP_HOME}/data" +mkdir -p "${shared_data_dir}/archives" "${shared_data_dir}/streams" echo "Creating shared-data PersistentVolumes..." kubectl apply -f - < Date: Wed, 18 Mar 2026 01:04:54 -0400 Subject: [PATCH 27/33] bump chart version --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index dedbceb015..430e13ff20 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.2.1-dev.2" +version: "0.2.1-dev.3" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.10.1-dev" From 578fb0d1c6163f7782431e0a78028a6eff1b7cee Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Mar 2026 05:08:05 -0400 Subject: [PATCH 28/33] fix(helm): update Presto configurations and add S3 storage support in Helm templates --- tools/deployment/package-helm/.set-up-common.sh | 16 +++++++++++++++- .../package-helm/templates/configmap.yaml | 7 ++++++- .../templates/presto-worker-deployment.yaml | 9 +++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index ab01cb7aea..6e9918f3b1 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -117,7 +117,21 @@ get_presto_helm_args() { "--set" "clpConfig.presto.coordinator.query_max_memory_per_node_gb=1" \ "--set" "clpConfig.presto.worker.query_memory_gb=4" \ "--set" "clpConfig.presto.worker.system_memory_gb=8" \ - "--set-json" "clpConfig.presto.split_filter={}" \ + "--set-json" "$(cat <<'SPLIT_FILTER' +clpConfig.presto.split_filter={ + "clp.default.default": [{ + "columnName": "timestamp", + "customOptions": { + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + } + }, + "required": false + }] +} +SPLIT_FILTER +)" \ "--set" "clpConfig.api_server=null" \ "--set" "clpConfig.query_scheduler=null" \ "--set" "clpConfig.query_worker=null" \ diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 9ab3d504ad..4d14c50ad2 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -390,7 +390,12 @@ data: clp.storage-type=s3 clp.s3-auth-provider=clp_package clp.s3-access-key-id={{ .s3_config.aws_authentication.credentials.access_key_id }} - clp.s3-end-point=https://{{ .s3_config.bucket }}.s3.{{ .s3_config.region_code }}.amazonaws.com/ + clp.s3-bucket={{ .s3_config.bucket }} + {{- if .s3_config.endpoint_url }} + clp.s3-end-point={{ trimSuffix "/" .s3_config.endpoint_url }} + {{- else }} + clp.s3-end-point=https://s3.{{ .s3_config.region_code }}.amazonaws.com + {{- end }} clp.s3-secret-access-key={{ .s3_config.aws_authentication.credentials.secret_access_key }} {{- end }}{{/* if eq .type "s3" */}} {{- end }}{{/* with $.Values.clpConfig.archive_output.storage */}} diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml index d728fceb01..a123c09f66 100644 --- a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -66,6 +66,10 @@ spec: ) | quote }} mountPath: "/var/data/archives" readOnly: true + {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} + - name: "host-ca-certs" + mountPath: "/etc/ssl/certs" + readOnly: true {{- end }} readinessProbe: {{- include "clp.readinessProbeTimings" . | nindent 12 }} @@ -99,5 +103,10 @@ spec: "component_category" "shared-data" "name" "archives" ) | nindent 10 }} + {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} + - name: "host-ca-certs" + hostPath: + path: "/etc/ssl/certs" + type: "Directory" {{- end }} {{- end }} From e144e6988b611bdb6f4b99dcabc7354a08c38182 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Mar 2026 17:59:37 -0400 Subject: [PATCH 29/33] fix(helm): increase liveness probe initial delay to 180s in `_helpers.tpl` --- tools/deployment/package-helm/templates/_helpers.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 933e5c45fe..8f19af6da8 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -88,7 +88,7 @@ Creates timings for liveness probes. @return {string} YAML-formatted liveness probe timing configuration */}} {{- define "clp.livenessProbeTimings" -}} -initialDelaySeconds: 60 +initialDelaySeconds: 180 periodSeconds: 30 timeoutSeconds: 4 failureThreshold: 3 From 333941cda46e2b5bc61e7b6db14bcd41a6bb02a3 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Mar 2026 18:55:36 -0400 Subject: [PATCH 30/33] remove cert hack --- .../package-helm/templates/presto-worker-deployment.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml index a123c09f66..d728fceb01 100644 --- a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -66,10 +66,6 @@ spec: ) | quote }} mountPath: "/var/data/archives" readOnly: true - {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} - - name: "host-ca-certs" - mountPath: "/etc/ssl/certs" - readOnly: true {{- end }} readinessProbe: {{- include "clp.readinessProbeTimings" . | nindent 12 }} @@ -103,10 +99,5 @@ spec: "component_category" "shared-data" "name" "archives" ) | nindent 10 }} - {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} - - name: "host-ca-certs" - hostPath: - path: "/etc/ssl/certs" - type: "Directory" {{- end }} {{- end }} From 8c5f6d87f0467faa5de2d4e1d1f60a641178e4db Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 18 Mar 2026 19:05:22 -0400 Subject: [PATCH 31/33] fix(deployment): strip whitespace in JSON heredoc for `split_filter` configuration --- tools/deployment/package-helm/.set-up-common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index 6e9918f3b1..294de226a1 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -117,8 +117,8 @@ get_presto_helm_args() { "--set" "clpConfig.presto.coordinator.query_max_memory_per_node_gb=1" \ "--set" "clpConfig.presto.worker.query_memory_gb=4" \ "--set" "clpConfig.presto.worker.system_memory_gb=8" \ - "--set-json" "$(cat <<'SPLIT_FILTER' -clpConfig.presto.split_filter={ + "--set-json" "clpConfig.presto.split_filter=$(tr -d '[:space:]' <<'SPLIT_FILTER' +{ "clp.default.default": [{ "columnName": "timestamp", "customOptions": { From b6aa271b30bb32b1861d96a3e9cd351fa4b5cc85 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 19 Mar 2026 02:10:21 -0400 Subject: [PATCH 32/33] fix(helm): update Presto worker image tag to `clp-v0.10.0-fix.1` in values.yaml --- tools/deployment/package-helm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index e03fcbb42d..f575ef5c54 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -16,7 +16,7 @@ image: prestoWorker: repository: "ghcr.io/y-scope/presto/prestissimo-worker" pullPolicy: "IfNotPresent" - tag: "clp-v0.10.0" + tag: "clp-v0.10.0-fix.1" # - If false: Single-node deployment. # - Pods automatically tolerate control-plane taints. From 38aa1ad6b3bc7c691ed79c6e68f219597aee1a43 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 19 Mar 2026 03:30:33 -0400 Subject: [PATCH 33/33] feat(helm): add configurability for external Presto instances and conditional chart deployment --- .../package-helm/templates/_helpers.tpl | 41 +++++++++++++------ .../package-helm/templates/configmap.yaml | 10 +++-- .../presto-coordinator-deployment.yaml | 2 +- .../templates/presto-coordinator-service.yaml | 2 +- .../templates/presto-worker-deployment.yaml | 2 +- tools/deployment/package-helm/values.yaml | 6 ++- 6 files changed, 41 insertions(+), 22 deletions(-) diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 8f19af6da8..db2d5a80d9 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -151,19 +151,6 @@ persistentVolumeClaim: claimName: {{ include "clp.fullname" .root }}-{{ include "clp.volumeName" . }} {{- end }} -{{/* -Checks if a given service is in the bundled list. - -@param {object} root Root template context -@param {string} service The service name to check (e.g., "database", "queue", "redis", - "results_cache") -@return {string} "true" if bundled, empty string otherwise -*/}} -{{- define "clp.isBundled" -}} -{{- if has .service .root.Values.clpConfig.bundled -}}true{{- end -}} -{{- end }} - - {{/* Gets the host for the database service. @@ -276,6 +263,34 @@ Gets the port for the results cache service. {{- end -}} {{- end }} +{{/* +Gets the host for the Presto service. + +@param {object} . Root template context +@return {string} The Presto host +*/}} +{{- define "clp.prestoHost" -}} +{{- if has "presto" .Values.clpConfig.bundled -}} +{{- printf "%s-presto-coordinator" (include "clp.fullname" .) -}} +{{- else -}} +{{- .Values.clpConfig.presto.host -}} +{{- end -}} +{{- end }} + +{{/* +Gets the port for the Presto service. + +@param {object} . Root template context +@return {string} The Presto port +*/}} +{{- define "clp.prestoPort" -}} +{{- if has "presto" .Values.clpConfig.bundled -}} +8889 +{{- else -}} +{{- .Values.clpConfig.presto.port -}} +{{- end -}} +{{- end }} + {{/* Gets the BROKER_URL env var for Celery workers. diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 4d14c50ad2..48e3e32587 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -226,8 +226,8 @@ data: {{- end }} {{- if .Values.clpConfig.presto }} presto: - host: "{{ include "clp.fullname" . }}-presto-coordinator" - port: 8889 + host: "{{ include "clp.prestoHost" . }}" + port: {{ include "clp.prestoPort" . | int }} {{- else }} presto: null {{- end }} @@ -326,14 +326,15 @@ data: "ClpQueryEngine": {{ .Values.clpConfig.webui.query_engine | quote }}, "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, {{- if .Values.clpConfig.presto }} - "PrestoHost": "{{ include "clp.fullname" . }}-presto-coordinator", - "PrestoPort": 8889 + "PrestoHost": "{{ include "clp.prestoHost" . }}", + "PrestoPort": {{ include "clp.prestoPort" . | int }} {{- else }} "PrestoHost": null, "PrestoPort": null {{- end }} } +{{- if and .Values.clpConfig.presto (has "presto" .Values.clpConfig.bundled) }} {{- with .Values.clpConfig.presto }} presto-coordinator-catalog-clp.properties: | connector.name=clp @@ -435,3 +436,4 @@ data: echo "node.internal-address=$(hostname -i)" >> "${PRESTO_CONFIG_DIR}/node.properties" echo "node.id=$(hostname)" >> "${PRESTO_CONFIG_DIR}/node.properties" {{- end }}{{/* with .Values.clpConfig.presto */}} +{{- end }}{{/* if has "presto" .Values.clpConfig.bundled */}} diff --git a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml index 12cb654165..f340156652 100644 --- a/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-coordinator-deployment.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.presto }} +{{- if and .Values.clpConfig.presto (has "presto" .Values.clpConfig.bundled) }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/templates/presto-coordinator-service.yaml b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml index f69ff79b78..da856a687e 100644 --- a/tools/deployment/package-helm/templates/presto-coordinator-service.yaml +++ b/tools/deployment/package-helm/templates/presto-coordinator-service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.presto }} +{{- if and .Values.clpConfig.presto (has "presto" .Values.clpConfig.bundled) }} apiVersion: "v1" kind: "Service" metadata: diff --git a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml index d728fceb01..8329a28f6b 100644 --- a/tools/deployment/package-helm/templates/presto-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/presto-worker-deployment.yaml @@ -1,4 +1,4 @@ -{{- if .Values.clpConfig.presto }} +{{- if and .Values.clpConfig.presto (has "presto" .Values.clpConfig.bundled) }} apiVersion: "apps/v1" kind: "Deployment" metadata: diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index f575ef5c54..9211e85491 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -103,6 +103,7 @@ clpConfig: - "queue" - "redis" - "results_cache" + - "presto" package: storage_engine: "clp-s" @@ -193,9 +194,10 @@ clpConfig: # port: 30800 # logging_level: "INFO" - # Presto query engine config. Uncomment to enable Presto deployments. If you want the Web UI to - # expose Presto SQL, also set `webui.query_engine: "presto"`. + # Presto query engine config. When "presto" is in `bundled`, the chart deploys Presto coordinator + # and worker pods. When not in `bundled`, set `host` and `port` to an external Presto instance. presto: null + # host: "localhost" # port: 30889 # coordinator: # logging_level: "INFO"