From 4fda75571068f4929afc5cbf85f0fee52540ba89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Mon, 18 May 2026 17:10:07 +0100 Subject: [PATCH 1/2] chore(chart): align lobu chart to owletto fork (pre-consolidation, byte-identical) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace charts/lobu/* with the contents of packages/owletto/deploy/charts/lobu/ at owletto-main (chart version 0.3.0, last bumped by owletto#179 = lobu#878's worker-smoke gate). The two charts have diverged since lobu's "public Helm chart" landed (#573), and prod's Flux still pulls the owletto fork — every chart change since has needed double-PRs to keep the two in sync. This commit makes `helm template charts/lobu` produce byte-identical output to the owletto fork given any values, so the follow-up Flux repoint is a no-op render diff. The repoint itself happens in a separate owletto PR; deletion of the owletto fork happens in a third PR after prod verifies the new source. No version bump. No behavior change. The chart stays at 0.3.0. Files dropped (orphaned by the alignment): - charts/lobu/templates/secret.yaml — referenced `.Values.secrets.create` and `include "lobu.secretName"`, neither of which exist in the owletto-style values.yaml / _helpers.tpl. Without `secrets.create`, the chart-managed Secret feature was unreachable; production has always used an externally managed Secret via `secretName`. - charts/lobu/values.example.yaml — its walkthrough was for `secrets.create`, which no longer exists. README updated to point at values.yaml + an operator-provided values file instead. CI: - .github/workflows/helm-chart.yml: drop the `Render install example` step that fed values.example.yaml. Verified: - `helm lint charts/lobu` → 0 errors - `helm template ... charts/lobu -f ` vs same on the owletto fork: 0 line diff - `helm template lobu charts/lobu` (defaults): 0 line diff - `make build-packages` + `make typecheck`: clean --- .github/workflows/helm-chart.yml | 3 - README.md | 5 +- charts/lobu/Chart.yaml | 9 +- charts/lobu/templates/NOTES.txt | 38 ++- charts/lobu/templates/_helpers.tpl | 45 ++-- charts/lobu/templates/app-pvc.yaml | 4 +- charts/lobu/templates/deployment.yaml | 113 ++------ .../lobu/templates/embeddings-deployment.yaml | 20 +- charts/lobu/templates/embeddings-pvc.yaml | 5 +- charts/lobu/templates/embeddings-service.yaml | 3 +- charts/lobu/templates/ingress.yaml | 4 +- charts/lobu/templates/migration-job.yaml | 10 +- charts/lobu/templates/secret.yaml | 13 - charts/lobu/templates/service.yaml | 3 +- charts/lobu/templates/smoke-test-job.yaml | 108 ++------ charts/lobu/templates/worker-deployment.yaml | 17 +- charts/lobu/templates/worker-pvc.yaml | 5 +- charts/lobu/values.example.yaml | 87 ------- charts/lobu/values.yaml | 245 ++++++------------ 19 files changed, 188 insertions(+), 549 deletions(-) delete mode 100644 charts/lobu/templates/secret.yaml delete mode 100644 charts/lobu/values.example.yaml diff --git a/.github/workflows/helm-chart.yml b/.github/workflows/helm-chart.yml index 236fbfca6..edc26f37a 100644 --- a/.github/workflows/helm-chart.yml +++ b/.github/workflows/helm-chart.yml @@ -40,9 +40,6 @@ jobs: - name: Render default values run: helm template lobu charts/lobu --namespace lobu >/tmp/lobu-default.yaml - - name: Render install example - run: helm template lobu charts/lobu --namespace lobu -f charts/lobu/values.example.yaml >/tmp/lobu-example.yaml - publish: needs: lint if: ${{ github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.publish) }} diff --git a/README.md b/README.md index 4cf134184..9904fa068 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,11 @@ Single-process Node remains the simplest deployment: run it with `node`, `pm2`, ```bash helm install lobu oci://ghcr.io/lobu-ai/charts/lobu \ --namespace lobu --create-namespace \ - -f charts/lobu/values.example.yaml + -f your-values.yaml ``` + See `charts/lobu/values.yaml` for the full set of tunables. At minimum supply an + ingress host, a `secretName` Secret containing `DATABASE_URL` + `JWT_SECRET` + + `BETTER_AUTH_SECRET` + provider API keys, and a `database.existingSecret`. ## Architecture diff --git a/charts/lobu/Chart.yaml b/charts/lobu/Chart.yaml index 22992f4a5..de35d77b2 100644 --- a/charts/lobu/Chart.yaml +++ b/charts/lobu/Chart.yaml @@ -1,17 +1,16 @@ apiVersion: v2 name: lobu -description: Lobu self-hosted AI agent platform +description: Lobu Platform - Never forget anything. User content analysis with AI. type: application version: 7.1.0 appVersion: 7.1.0 keywords: - lobu - - agents - - ai - - memory + - content + - analytics home: https://github.com/lobu-ai/lobu sources: - https://github.com/lobu-ai/lobu maintainers: - name: lobu-ai - email: emre@lobu.ai + email: emrekabakci@gmail.com diff --git a/charts/lobu/templates/NOTES.txt b/charts/lobu/templates/NOTES.txt index bbdb04f44..39948e7ab 100644 --- a/charts/lobu/templates/NOTES.txt +++ b/charts/lobu/templates/NOTES.txt @@ -1,35 +1,33 @@ -Lobu has been installed. +Insights Platform has been deployed! +1. Application URL: {{- if .Values.ingress.enabled }} -Application URL: -{{- range .Values.ingress.hosts }} - https://{{ . }} -{{- end }} + https://{{ .Values.ingress.host }} {{- else }} -Port-forward the app service: - kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "lobu.fullname" . }}-app {{ .Values.service.port }}:{{ .Values.service.port }} - open http://localhost:{{ .Values.service.port }} + kubectl port-forward svc/{{ include "lobu.fullname" . }}-app {{ .Values.service.port }}:{{ .Values.service.port }} {{- end }} -Secrets: -{{- $secretName := include "lobu.secretName" . }} -{{- if $secretName }} - Using runtime secret: {{ $secretName }} +2. Worker Status: +{{- if .Values.worker.enabled }} + Worker is enabled with {{ .Values.worker.replicaCount }} replica(s) + kubectl get pods -l app.kubernetes.io/component=worker {{- else }} - No runtime secret configured. Create a Secret with JWT_SECRET, BETTER_AUTH_SECRET, - provider API keys, and other Lobu settings, then set secretName. + Worker is disabled {{- end }} -Database: +3. Database: {{- if .Values.database.existingSecret }} - DATABASE_URL comes from Secret {{ .Values.database.existingSecret }} key {{ .Values.database.existingSecretKey }}. + Using credentials from secret: {{ .Values.database.existingSecret }} {{- else }} - Ensure DATABASE_URL is present in the runtime secret. + Ensure DATABASE_URL is set in your secrets {{- end }} -Useful checks: - kubectl -n {{ .Release.Namespace }} get pods -l app.kubernetes.io/instance={{ .Release.Name }} - kubectl -n {{ .Release.Namespace }} logs deploy/{{ include "lobu.fullname" . }}-app +4. Secrets: +{{- if .Values.secretName }} + Using secret: {{ .Values.secretName }} +{{- else }} + WARNING: No secretName configured. Ensure all required env vars are set. +{{- end }} {{- if and (gt (int (.Values.app.replicaCount | default 1)) 1) (or (not .Values.service.sessionAffinity) (eq (.Values.service.sessionAffinity | default "None") "None")) }} diff --git a/charts/lobu/templates/_helpers.tpl b/charts/lobu/templates/_helpers.tpl index 07a549231..6dad111f5 100644 --- a/charts/lobu/templates/_helpers.tpl +++ b/charts/lobu/templates/_helpers.tpl @@ -22,14 +22,14 @@ Create a default fully qualified app name. {{- end }} {{/* -Create chart name and version as used by labels. +Create chart name and version as used by the chart label. */}} {{- define "lobu.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* -Common labels. +Common labels */}} {{- define "lobu.labels" -}} helm.sh/chart: {{ include "lobu.chart" . }} @@ -41,54 +41,57 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end }} {{/* -Selector labels. +Selector labels */}} {{- define "lobu.selectorLabels" -}} app.kubernetes.io/name: {{ include "lobu.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} +{{/* +App selector labels +*/}} {{- define "lobu.appSelectorLabels" -}} {{ include "lobu.selectorLabels" . }} app.kubernetes.io/component: api {{- end }} +{{/* +Worker selector labels +*/}} {{- define "lobu.workerSelectorLabels" -}} {{ include "lobu.selectorLabels" . }} app.kubernetes.io/component: worker {{- end }} +{{/* +Embeddings selector labels +*/}} {{- define "lobu.embeddingsSelectorLabels" -}} {{ include "lobu.selectorLabels" . }} app.kubernetes.io/component: embeddings {{- end }} {{/* -Resolve the image tag. +Create the app image name */}} -{{- define "lobu.imageTag" -}} -{{- default .Chart.AppVersion .Values.image.tag }} -{{- end }} - {{- define "lobu.appImage" -}} -{{- printf "%s/%s-app:%s" .Values.image.registry .Values.image.repository (include "lobu.imageTag" .) }} +{{- $tag := .Values.image.tag | default .Chart.AppVersion }} +{{- printf "%s/%s-app:%s" .Values.image.registry .Values.image.repository $tag }} {{- end }} +{{/* +Create the worker image name +*/}} {{- define "lobu.workerImage" -}} -{{- printf "%s/%s-worker:%s" .Values.image.registry .Values.image.repository (include "lobu.imageTag" .) }} -{{- end }} - -{{- define "lobu.embeddingsImage" -}} -{{- printf "%s/%s-embeddings:%s" .Values.image.registry .Values.image.repository (include "lobu.imageTag" .) }} +{{- $tag := .Values.image.tag | default .Chart.AppVersion }} +{{- printf "%s/%s-worker:%s" .Values.image.registry .Values.image.repository $tag }} {{- end }} {{/* -The Secret loaded into pods via envFrom, if configured. +Create the embeddings service image name */}} -{{- define "lobu.secretName" -}} -{{- if .Values.secrets.create }} -{{- default (printf "%s-secrets" (include "lobu.fullname" .)) .Values.secrets.name }} -{{- else }} -{{- .Values.secretName }} -{{- end }} +{{- define "lobu.embeddingsImage" -}} +{{- $tag := .Values.image.tag | default .Chart.AppVersion }} +{{- printf "%s/%s-embeddings:%s" .Values.image.registry .Values.image.repository $tag }} {{- end }} diff --git a/charts/lobu/templates/app-pvc.yaml b/charts/lobu/templates/app-pvc.yaml index 2ffdf7604..5658c2f34 100644 --- a/charts/lobu/templates/app-pvc.yaml +++ b/charts/lobu/templates/app-pvc.yaml @@ -8,9 +8,9 @@ metadata: app.kubernetes.io/component: api spec: accessModes: - {{- toYaml .Values.app.workspaces.accessModes | nindent 4 }} + - ReadWriteOnce {{- if .Values.app.workspaces.storageClass }} - storageClassName: {{ .Values.app.workspaces.storageClass | quote }} + storageClassName: {{ .Values.app.workspaces.storageClass }} {{- end }} resources: requests: diff --git a/charts/lobu/templates/deployment.yaml b/charts/lobu/templates/deployment.yaml index fa43e8cb5..d0aec6812 100644 --- a/charts/lobu/templates/deployment.yaml +++ b/charts/lobu/templates/deployment.yaml @@ -7,55 +7,15 @@ metadata: app.kubernetes.io/component: api spec: replicas: {{ .Values.app.replicaCount }} - {{- /* - Deploy strategy resolution: - 1. Explicit `app.strategy` override always wins. - 2. Else if `app.allowMultiReplica: true` AND workspaces is RWX - (or disabled) → RollingUpdate (maxSurge: 1, maxUnavailable: 0). - This is the operator opt-in path for true blue/green deploys. - 3. Else → Recreate (the safe default). - - Phase 5 (workspaces PVC is OFF by default + LOBU_SESSION_STORE - defaults to snapshot mode) means most deploys naturally land on the - rolling-update branch as soon as `allowMultiReplica: true` is set. - The RWX check still applies for self-hosters who re-enable the PVC. - - Why `allowMultiReplica` is an explicit flag, not auto-detected: - several in-memory components break with >1 gateway replicas OR - during the brief RollingUpdate overlap: - * `SseManager` (gateway/services/sse-manager.ts) — SSE streams are - pod-local; a job claimed by pod B broadcasts to no-one if the - client is on pod A. - * AskUser question routing - (gateway/connections/interaction-bridge.ts:193-214) — pending - questions live in a per-pod Map, button clicks can land on the - wrong pod and be dropped. - * Telegram polling mode (gateway/connections/chat-instance-manager - .ts:610-613) — every replica long-polls the same bot, causing - conflicts. - Snapshot-mode session state and the per-conversation advisory lock - are necessary but not sufficient. The flag forces operators to - acknowledge "I have only webhook-mode Chat connections AND accept - the SSE / AskUser handoff caveats" before opting in. - */}} - {{- $rwxConfigured := has "ReadWriteMany" (.Values.app.workspaces.accessModes | default (list)) }} - {{- $rollSafe := and .Values.app.allowMultiReplica (or (not .Values.app.workspaces.enabled) $rwxConfigured) }} - {{- if .Values.app.strategy }} + {{- with .Values.app.strategy }} strategy: - {{- toYaml .Values.app.strategy | nindent 4 }} - {{- else if $rollSafe }} - # Operator opted in via app.allowMultiReplica + RWX workspaces. - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 + {{- toYaml . | nindent 4 }} {{- else }} - # Safe default. RWO PVC + in-memory SSE/AskUser/Telegram-polling - # state make rolling overlap unsafe — see comment above. + {{- if .Values.app.workspaces.enabled }} strategy: type: Recreate {{- end }} + {{- end }} selector: matchLabels: {{- include "lobu.appSelectorLabels" . | nindent 6 }} @@ -63,56 +23,34 @@ spec: metadata: labels: {{- include "lobu.appSelectorLabels" . | nindent 8 }} - {{- with .Values.app.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} spec: {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.app.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - # Grace period must exceed preStopDelaySeconds + reasonable shutdown - # time for the gateway (graceful_shutdown in server.ts cleans up - # task scheduler, embedded gateway, DB pool, HTTP server). Default - # k8s grace period is 30s; bump so the preStop sleep doesn't eat - # the whole window. - terminationGracePeriodSeconds: {{ .Values.app.terminationGracePeriodSeconds | default 45 }} containers: - name: app image: {{ include "lobu.appImage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- with .Values.app.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} ports: - - name: http - containerPort: 8787 + - containerPort: 8787 protocol: TCP env: {{- range $key, $value := .Values.app.env }} - name: {{ $key }} value: {{ $value | quote }} {{- end }} - {{- if and .Values.ingress.hosts (not (hasKey .Values.app.env "PUBLIC_WEB_URL")) }} - - name: PUBLIC_WEB_URL + {{- if .Values.ingress.hosts }} + - name: BASE_URL value: {{ printf "https://%s" (first .Values.ingress.hosts) | quote }} {{- end }} {{- $workerSmoke := .Values.releaseGates.smokeTest.workerSmoke | default dict }} {{- if and $workerSmoke (hasKey $workerSmoke "enabled") $workerSmoke.enabled }} - # Pin SMOKE_TEST_ALLOWED_HOST to the in-cluster app Service DNS - # name so /api/internal/smoke/dispatch refuses any request - # whose Host header is not the cluster-internal service. The - # smoke Job hits this exact hostname via its curl URL; public - # ingress traffic always carries the operator's external host - # in Host, so this is the second layer of ingress-bypass - # defense (the first is the x-forwarded-* refusal in the - # route handler). + # Pin SMOKE_TEST_ALLOWED_HOST to the in-cluster app Service + # DNS name so /api/internal/smoke/dispatch refuses any + # request whose Host header is the operator's public + # ingress hostname. Belt-and-braces with the + # x-forwarded-* refusal in the route handler. - name: SMOKE_TEST_ALLOWED_HOST value: {{ printf "%s-app" (include "lobu.fullname" .) | quote }} {{- end }} @@ -147,29 +85,10 @@ spec: key: {{ .Values.database.existingSecretKey }} {{- end }} {{- end }} - {{- $secretName := include "lobu.secretName" . }} - {{- if $secretName }} + {{- if .Values.secretName }} envFrom: - secretRef: - name: {{ $secretName }} - {{- end }} - {{- /* - PreStop hook is only useful under RollingUpdate (the new pod is - already serving, so deregistering the old pod via Service - endpoint removal + giving downstream caches time to notice it - shrinks the "old pod kept getting traffic during drain" window). - Under `Recreate`, the new pod doesn't start until the old one - fully terminates — adding a preStop sleep would EXTEND the - no-available-server window by its duration. We only emit the - hook when preStopDelaySeconds is explicitly > 0; ops repos - using RollingUpdate set it, Recreate-mode deploys leave it at - the default 0. - */ -}} - {{- if gt (int (.Values.app.preStopDelaySeconds | default 0)) 0 }} - lifecycle: - preStop: - exec: - command: ["sh", "-c", "sleep {{ .Values.app.preStopDelaySeconds }}"] + name: {{ .Values.secretName }} {{- end }} # Readiness probes the DB too (/health/ready does SELECT 1). # Failing readiness pulls the pod out of the Service endpoint set @@ -179,7 +98,7 @@ spec: readinessProbe: httpGet: path: /health/ready - port: http + port: 8787 initialDelaySeconds: {{ .Values.healthCheck.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.healthCheck.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.healthCheck.readinessProbe.timeoutSeconds }} @@ -187,7 +106,7 @@ spec: livenessProbe: httpGet: path: /health - port: http + port: 8787 initialDelaySeconds: {{ .Values.healthCheck.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.healthCheck.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.healthCheck.livenessProbe.timeoutSeconds }} diff --git a/charts/lobu/templates/embeddings-deployment.yaml b/charts/lobu/templates/embeddings-deployment.yaml index c3e5fbd38..8ff640a9b 100644 --- a/charts/lobu/templates/embeddings-deployment.yaml +++ b/charts/lobu/templates/embeddings-deployment.yaml @@ -24,10 +24,6 @@ spec: metadata: labels: {{- include "lobu.embeddingsSelectorLabels" . | nindent 8 }} - {{- with .Values.embeddings.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} spec: {{- with .Values.imagePullSecrets }} imagePullSecrets: @@ -41,13 +37,8 @@ spec: - name: embeddings image: {{ include "lobu.embeddingsImage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- with .Values.embeddings.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} ports: - - name: http - containerPort: {{ .Values.embeddings.service.port }} + - containerPort: {{ .Values.embeddings.service.port }} protocol: TCP env: - name: PORT @@ -58,16 +49,15 @@ spec: - name: {{ $key }} value: {{ $value | quote }} {{- end }} - {{- $secretName := include "lobu.secretName" . }} - {{- if $secretName }} + {{- if .Values.secretName }} envFrom: - secretRef: - name: {{ $secretName }} + name: {{ .Values.secretName }} {{- end }} readinessProbe: httpGet: path: /health - port: http + port: {{ .Values.embeddings.service.port }} initialDelaySeconds: {{ .Values.healthCheck.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.healthCheck.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.healthCheck.readinessProbe.timeoutSeconds }} @@ -75,7 +65,7 @@ spec: livenessProbe: httpGet: path: /health - port: http + port: {{ .Values.embeddings.service.port }} initialDelaySeconds: {{ .Values.healthCheck.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.healthCheck.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.healthCheck.livenessProbe.timeoutSeconds }} diff --git a/charts/lobu/templates/embeddings-pvc.yaml b/charts/lobu/templates/embeddings-pvc.yaml index ceead71dd..13030c420 100644 --- a/charts/lobu/templates/embeddings-pvc.yaml +++ b/charts/lobu/templates/embeddings-pvc.yaml @@ -5,12 +5,11 @@ metadata: name: {{ include "lobu.fullname" . }}-embeddings-cache labels: {{- include "lobu.labels" . | nindent 4 }} - app.kubernetes.io/component: embeddings spec: accessModes: - {{- toYaml .Values.embeddings.cache.accessModes | nindent 4 }} + - ReadWriteOnce {{- if .Values.embeddings.cache.storageClass }} - storageClassName: {{ .Values.embeddings.cache.storageClass | quote }} + storageClassName: {{ .Values.embeddings.cache.storageClass }} {{- end }} resources: requests: diff --git a/charts/lobu/templates/embeddings-service.yaml b/charts/lobu/templates/embeddings-service.yaml index f8cc56496..2b743dc24 100644 --- a/charts/lobu/templates/embeddings-service.yaml +++ b/charts/lobu/templates/embeddings-service.yaml @@ -5,12 +5,11 @@ metadata: name: {{ include "lobu.fullname" . }}-embeddings labels: {{- include "lobu.labels" . | nindent 4 }} - app.kubernetes.io/component: embeddings spec: type: ClusterIP ports: - port: {{ .Values.embeddings.service.port }} - targetPort: http + targetPort: {{ .Values.embeddings.service.port }} protocol: TCP name: http selector: diff --git a/charts/lobu/templates/ingress.yaml b/charts/lobu/templates/ingress.yaml index 2423d4825..477e9b70a 100644 --- a/charts/lobu/templates/ingress.yaml +++ b/charts/lobu/templates/ingress.yaml @@ -5,10 +5,10 @@ metadata: name: {{ include "lobu.fullname" . }} labels: {{- include "lobu.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} annotations: + {{- with .Values.ingress.annotations }} {{- toYaml . | nindent 4 }} - {{- end }} + {{- end }} spec: {{- if .Values.ingress.className }} ingressClassName: {{ .Values.ingress.className }} diff --git a/charts/lobu/templates/migration-job.yaml b/charts/lobu/templates/migration-job.yaml index ef55033c3..447bb7ea8 100644 --- a/charts/lobu/templates/migration-job.yaml +++ b/charts/lobu/templates/migration-job.yaml @@ -5,7 +5,6 @@ metadata: name: {{ include "lobu.fullname" . }}-migrate labels: {{- include "lobu.labels" . | nindent 4 }} - app.kubernetes.io/component: migrate annotations: "helm.sh/hook": pre-install,pre-upgrade "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded @@ -72,10 +71,6 @@ spec: - name: DB_PREFLIGHT_TIMEOUT_SECONDS value: {{ .Values.releaseGates.databasePreflight.timeoutSeconds | quote }} {{- end }} - {{- if and .Values.secrets.create (hasKey .Values.secrets.stringData "DATABASE_URL") (not .Values.migrations.database.existingSecret) (not (hasKey .Values.app.env "DATABASE_URL")) }} - - name: DATABASE_URL - value: {{ index .Values.secrets.stringData "DATABASE_URL" | quote }} - {{- end }} {{- if .Values.migrations.database.existingSecret }} {{- if .Values.migrations.database.usePooler }} - name: DB_USER @@ -98,10 +93,9 @@ spec: key: {{ .Values.migrations.database.existingSecretKey }} {{- end }} {{- end }} - {{- $secretName := include "lobu.secretName" . }} - {{- if and $secretName (not .Values.secrets.create) }} + {{- if .Values.secretName }} envFrom: - secretRef: - name: {{ $secretName }} + name: {{ .Values.secretName }} {{- end }} {{- end }} diff --git a/charts/lobu/templates/secret.yaml b/charts/lobu/templates/secret.yaml deleted file mode 100644 index 9367f9d69..000000000 --- a/charts/lobu/templates/secret.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.secrets.create }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "lobu.secretName" . }} - labels: - {{- include "lobu.labels" . | nindent 4 }} -type: Opaque -stringData: - {{- range $key, $value := .Values.secrets.stringData }} - {{ $key }}: {{ $value | quote }} - {{- end }} -{{- end }} diff --git a/charts/lobu/templates/service.yaml b/charts/lobu/templates/service.yaml index 6ce0a457d..34abdc142 100644 --- a/charts/lobu/templates/service.yaml +++ b/charts/lobu/templates/service.yaml @@ -4,7 +4,6 @@ metadata: name: {{ include "lobu.fullname" . }}-app labels: {{- include "lobu.labels" . | nindent 4 }} - app.kubernetes.io/component: api spec: type: {{ .Values.service.type }} {{- with .Values.service.sessionAffinity }} @@ -19,7 +18,7 @@ spec: {{- end }} ports: - port: {{ .Values.service.port }} - targetPort: http + targetPort: 8787 protocol: TCP name: http selector: diff --git a/charts/lobu/templates/smoke-test-job.yaml b/charts/lobu/templates/smoke-test-job.yaml index 32c00d227..d3956ac64 100644 --- a/charts/lobu/templates/smoke-test-job.yaml +++ b/charts/lobu/templates/smoke-test-job.yaml @@ -31,36 +31,23 @@ spec: - name: smoke-test image: {{ include "lobu.appImage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- if $workerSmokeEnabled }} - {{- $secretName := include "lobu.secretName" . }} - {{- if $secretName }} + {{- if and $workerSmokeEnabled .Values.secretName }} envFrom: - secretRef: - # Phase 3 needs SMOKE_TEST_TOKEN from the deployment Secret - # to authenticate against /api/internal/smoke/dispatch. - # DATABASE_URL also comes from the same Secret on production - # installs (chart-managed secrets.create path also exposes it - # via the inline `env` block below). - name: {{ $secretName }} - {{- end }} + # Phase 3 needs SMOKE_TEST_TOKEN + DATABASE_URL from the + # deployment Secret to authenticate against + # /api/internal/smoke/dispatch and poll the snapshot row. + name: {{ .Values.secretName }} {{- end }} + {{- if $workerSmokeEnabled }} env: - {{- if and .Values.secrets.create (hasKey .Values.secrets.stringData "DATABASE_URL") }} - - name: DATABASE_URL - value: {{ index .Values.secrets.stringData "DATABASE_URL" | quote }} - {{- end }} - - name: REQUIRED_SCHEMA - value: {{ join "," (default (list) .Values.releaseGates.smokeTest.requiredSchema) | quote }} - {{- if $workerSmokeEnabled }} - name: WORKER_SMOKE_ENABLED value: "1" - # NOTE: the smoke agentId and organizationId are NOT passed - # to the dispatch endpoint — the gateway pins them - # server-side from SMOKE_TEST_AGENT_ID / SMOKE_TEST_ORG_ID - # in the deployment Secret so a leaked SMOKE_TEST_TOKEN - # cannot target a real tenant. The chart values below are - # only here so operators can keep the chart-side knobs in - # sync with what they configure in the Secret. + # The smoke agentId + organizationId are NOT passed to the + # dispatch endpoint — the gateway pins them server-side from + # SMOKE_TEST_AGENT_ID / SMOKE_TEST_ORG_ID in the deployment + # Secret. This makes a leaked SMOKE_TEST_TOKEN unable to + # target real tenants. See lobu#878 for the codex finding. - name: WORKER_SMOKE_CONV_PREFIX value: {{ default "smoke-" $workerSmoke.conversationIdPrefix | quote }} - name: WORKER_SMOKE_TIMEOUT @@ -71,14 +58,14 @@ spec: value: {{ .Release.Name | quote }} - name: WORKER_SMOKE_REVISION value: {{ .Release.Revision | quote }} - {{- end }} + {{- end }} command: - /bin/bash - -ec - | url="http://{{ include "lobu.fullname" . }}-app:{{ .Values.service.port }}{{ .Values.releaseGates.smokeTest.path }}" deadline=$((SECONDS + {{ .Values.releaseGates.smokeTest.timeoutSeconds | int }})) - echo "waiting for $url" + echo "phase 1: waiting for $url" while true; do if curl -fsS --max-time 5 "$url" >/tmp/lobu-smoke-response 2>/tmp/lobu-smoke-error; then @@ -97,77 +84,18 @@ spec: sleep {{ .Values.releaseGates.smokeTest.intervalSeconds | int }} done - if [ -z "${REQUIRED_SCHEMA}" ]; then - echo "no requiredSchema configured — skipping schema check" - exit 0 - fi - if [ -z "${DATABASE_URL}" ]; then - echo "DATABASE_URL not set in smoke-test env — skipping schema check" >&2 - exit 0 - fi - - echo "checking required schema columns: ${REQUIRED_SCHEMA}" - node --input-type=module <<'NODE' - import postgres from "postgres"; - - const required = (process.env.REQUIRED_SCHEMA || "") - .split(",") - .map((s) => s.trim()) - .filter(Boolean) - .map((entry) => { - const [table, column] = entry.split("."); - if (!table || !column) { - console.error(`ERROR: bad requiredSchema entry "${entry}" — must be table.column`); - process.exit(2); - } - return { table, column }; - }); - - const sql = postgres(process.env.DATABASE_URL, { - max: 1, - connect_timeout: 10, - idle_timeout: 1, - onnotice: () => {}, - }); - - try { - const missing = []; - for (const { table, column } of required) { - const rows = await sql` - SELECT 1 FROM information_schema.columns - WHERE table_schema = 'public' - AND table_name = ${table} - AND column_name = ${column} - LIMIT 1 - `; - if (rows.length === 0) { - missing.push(`${table}.${column}`); - } - } - if (missing.length > 0) { - console.error("ERROR: required schema columns missing after migrate:"); - for (const m of missing) console.error(` - ${m}`); - console.error( - "This usually means a migration in db/migrations/ was edited after\n" + - "it was first applied to this database. Ship a new dated migration\n" + - "instead of editing an existing one." - ); - process.exit(1); - } - console.log(`schema check passed — all ${required.length} required column(s) present`); - } finally { - await sql.end({ timeout: 1 }).catch(() => {}); - } - NODE - if [ -z "${WORKER_SMOKE_ENABLED:-}" ]; then - echo "workerSmoke disabled — skipping phase 3" + echo "workerSmoke disabled — gateway-only smoke check passed" exit 0 fi if [ -z "${SMOKE_TEST_TOKEN:-}" ]; then echo "SMOKE_TEST_TOKEN env not set — cannot run worker smoke" >&2 exit 1 fi + if [ -z "${DATABASE_URL:-}" ]; then + echo "DATABASE_URL env not set — cannot poll snapshot" >&2 + exit 1 + fi conv_id="${WORKER_SMOKE_CONV_PREFIX}${WORKER_SMOKE_RELEASE}-${WORKER_SMOKE_REVISION}" dispatch_url="http://{{ include "lobu.fullname" . }}-app:{{ .Values.service.port }}/api/internal/smoke/dispatch" diff --git a/charts/lobu/templates/worker-deployment.yaml b/charts/lobu/templates/worker-deployment.yaml index 3d1a14f29..0d6eacb6b 100644 --- a/charts/lobu/templates/worker-deployment.yaml +++ b/charts/lobu/templates/worker-deployment.yaml @@ -24,10 +24,6 @@ spec: metadata: labels: {{- include "lobu.workerSelectorLabels" . | nindent 8 }} - {{- with .Values.worker.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} spec: {{- with .Values.imagePullSecrets }} imagePullSecrets: @@ -38,6 +34,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} initContainers: + # Wait for the app pod's /health before starting the worker daemon. + # Without this the daemon's hard health check (worker.ts:77) crashes + # the container when both pods restart simultaneously, costing + # 30-60s of connector ingestion downtime per deploy. - name: wait-for-app image: {{ include "lobu.workerImage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} @@ -67,10 +67,6 @@ spec: - name: worker image: {{ include "lobu.workerImage" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- with .Values.worker.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} env: - name: API_URL value: http://{{ include "lobu.fullname" . }}-app:{{ .Values.service.port }} @@ -115,11 +111,10 @@ spec: key: {{ .Values.database.existingSecretKey }} {{- end }} {{- end }} - {{- $secretName := include "lobu.secretName" . }} - {{- if $secretName }} + {{- if .Values.secretName }} envFrom: - secretRef: - name: {{ $secretName }} + name: {{ .Values.secretName }} {{- end }} {{- if .Values.worker.cache.enabled }} volumeMounts: diff --git a/charts/lobu/templates/worker-pvc.yaml b/charts/lobu/templates/worker-pvc.yaml index b9d791e4a..bd10d1063 100644 --- a/charts/lobu/templates/worker-pvc.yaml +++ b/charts/lobu/templates/worker-pvc.yaml @@ -5,12 +5,11 @@ metadata: name: {{ include "lobu.fullname" . }}-worker-cache labels: {{- include "lobu.labels" . | nindent 4 }} - app.kubernetes.io/component: worker spec: accessModes: - {{- toYaml .Values.worker.cache.accessModes | nindent 4 }} + - ReadWriteOnce {{- if .Values.worker.cache.storageClass }} - storageClassName: {{ .Values.worker.cache.storageClass | quote }} + storageClassName: {{ .Values.worker.cache.storageClass }} {{- end }} resources: requests: diff --git a/charts/lobu/values.example.yaml b/charts/lobu/values.example.yaml deleted file mode 100644 index d72fc8a7c..000000000 --- a/charts/lobu/values.example.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Minimal public Helm install template for Lobu. -# -# 1. Create the namespace and secrets: -# -# kubectl create namespace lobu -# kubectl -n lobu create secret generic lobu-db \ -# --from-literal=uri='postgresql://USER:PASSWORD@HOST:5432/lobu?sslmode=require' -# kubectl -n lobu create secret generic lobu-secrets \ -# --from-literal=JWT_SECRET='replace-with-a-long-random-value' \ -# --from-literal=BETTER_AUTH_SECRET='replace-with-a-long-random-value' \ -# --from-literal=ANTHROPIC_API_KEY='sk-ant-...' -# -# 2. Copy this file, fill in your hostname/storage classes as needed, then run: -# -# helm install lobu oci://ghcr.io/lobu-ai/charts/lobu \ -# --namespace lobu --create-namespace \ -# -f values.example.yaml - -image: - # The image pipeline publishes timestamp tags plus latest. Pin a timestamp - # tag from GHCR in production; latest keeps first-time installs simple. - tag: "latest" - -# Existing Secret loaded into all Lobu pods. Put JWT_SECRET, -# BETTER_AUTH_SECRET, provider API keys, OAuth client secrets, and other Lobu -# runtime secrets here. DATABASE_URL can live here too if you leave -# database.existingSecret empty. -secretName: lobu-secrets - -database: - # Secret key must contain a full Postgres URL. Postgres must have pgvector. - existingSecret: lobu-db - existingSecretKey: uri - -app: - replicaCount: 1 - env: - NODE_ENV: production - # PUBLIC_WEB_URL is derived from ingress.hosts[0] below. Set it here only - # when your public URL differs from that host. - # Migrations are handled by the Helm pre-install/pre-upgrade Job below. - SKIP_MIGRATIONS: "1" - # Optional but recommended: start restrictive and add domains needed by - # your enabled skills/connectors. Use "*" only for trusted dev clusters. - WORKER_ALLOWED_DOMAINS: "" - workspaces: - enabled: true - size: 20Gi - # storageClass: fast-ssd - -worker: - enabled: true - replicaCount: 1 - cache: - enabled: true - size: 5Gi - # storageClass: fast-ssd - -embeddings: - enabled: true - cache: - enabled: true - size: 5Gi - # storageClass: fast-ssd - -migrations: - enabled: true - # If you use secrets.create and place DATABASE_URL in secrets.stringData - # instead of the external lobu-db Secret above, leave this database block empty; - # the migration hook reads secrets.stringData.DATABASE_URL directly. - database: - existingSecret: lobu-db - existingSecretKey: uri - -ingress: - enabled: true - className: nginx - annotations: - # Uncomment when using cert-manager. - # cert-manager.io/cluster-issuer: letsencrypt-prod - hosts: - - lobu.example.com - tls: - # Uncomment after configuring TLS for your ingress controller. - # - secretName: lobu-tls - # hosts: - # - lobu.example.com diff --git a/charts/lobu/values.yaml b/charts/lobu/values.yaml index 625385950..6192fc72e 100644 --- a/charts/lobu/values.yaml +++ b/charts/lobu/values.yaml @@ -1,125 +1,67 @@ -# Default values for the public Lobu chart. -# See values.example.yaml for a minimal copy/paste install template. +# Default values for lobu chart +# Global settings nameOverride: "" fullnameOverride: "" +# Image settings (shared between app, worker, and embeddings) image: registry: ghcr.io - # The chart appends -app, -worker, and -embeddings for the three images. repository: lobu-ai/lobu - pullPolicy: IfNotPresent - # The image pipeline publishes timestamp tags plus latest. Pin a timestamp - # tag for production upgrades; latest keeps first-time installs simple. + pullPolicy: Always + # Tag defaults to chart appVersion tag: "latest" -# Public images do not require pull secrets. Set this if your cluster must -# authenticate to GHCR or you mirror images into a private registry. -imagePullSecrets: [] - -# Existing Kubernetes Secret loaded into app, worker, embeddings, and migration -# pods via envFrom. It commonly contains DATABASE_URL, JWT_SECRET, -# BETTER_AUTH_SECRET, provider API keys, and integration OAuth credentials. -secretName: "" - -# Optional chart-managed Secret. Prefer an external secret manager for -# production; this is here for small test clusters and quick demos. -secrets: - create: false - name: "" - stringData: {} +imagePullSecrets: + - name: ghcr-credentials +# App deployment settings (API backend) app: replicaCount: 1 + # With workspaces.enabled=false (Phase 5 default), the chart leaves + # strategy unset so the Deployment uses kube-default RollingUpdate. + # Setting `workspaces.enabled: true` (legacy RWO path) still pins to + # Recreate below. strategy: {} - podAnnotations: {} - podSecurityContext: {} - securityContext: {} - # Seconds the preStop hook sleeps before SIGTERM is sent. Gives Service - # endpoint deregistration + downstream LB cache time to notice the pod - # is going away, so in-flight requests during the deregistration lag - # still hit a live process. - # - # Default 0 (preStop hook NOT emitted). Set to ~15 when running with - # `app.strategy.type: RollingUpdate` and replicaCount > 1. The - # workspaces PVC is OFF by default in Phase 5 so the chart now picks - # RollingUpdate automatically when `allowMultiReplica: true`. - preStopDelaySeconds: 0 - # Total time k8s waits for the pod to stop before SIGKILL. Must be - # > preStopDelaySeconds + actual shutdown time. The gateway's graceful - # shutdown closes the HTTP server, stops the task scheduler, drains the - # DB pool — empirically ~5s. - terminationGracePeriodSeconds: 45 resources: requests: cpu: 100m - memory: 512Mi + memory: 256Mi limits: - cpu: 2000m - memory: 2Gi + cpu: 1000m + memory: 1Gi + # Non-secret environment variables env: NODE_ENV: production - # Opt-in to multi-replica / rolling deploys. DEFAULT FALSE — leaving - # this off keeps the safe `strategy: Recreate` behavior. Setting it - # true makes the chart pick `RollingUpdate` (maxSurge: 1, - # maxUnavailable: 0) as long as `app.workspaces.enabled` is false (the - # Phase 5 default) or workspaces is RWX-configured. - # - # Prerequisites BEFORE setting true — chart cannot detect these: - # 1. NO active Chat connections in `mode: "polling"` (Telegram). - # Multiple replicas long-polling the same bot conflict. Use - # webhook mode only — see - # gateway/connections/chat-instance-manager.ts:610. - # 2. Acknowledge that the gateway has in-memory state for SSE - # streams (gateway/services/sse-manager.ts) and AskUser - # questions (gateway/connections/interaction-bridge.ts:193). - # A request whose SSE stream / AskUser click lands on a - # different replica than the one holding the state will be - # silently dropped. Migrating these to Postgres LISTEN/NOTIFY + - # durable storage is tracked as separate hardening work — until - # then, occasional dropped streams / button clicks are the cost - # of zero-downtime deploys on this configuration. - # - # If you re-enable the workspaces PVC (legacy path), you must either - # provision RWX-capable storage (NFS / EFS / CephFS / Longhorn-RWX) - # or leave `allowMultiReplica: false` — RWO + multi-replica is - # rejected by the chart's strategy helper and falls back to Recreate. - # - # Single-replica + RollingUpdate (replicaCount: 1, allowMultiReplica: true) - # still creates a brief overlap window where both pods are running. - # The same in-memory caveats apply during that window, just for a - # shorter span (~5-15s). - allowMultiReplica: false - - # Persistent workspaces volume. Embedded agent workers used to store - # session and workspace state below /app/workspaces (session.jsonl, - # input/output/temp scratch). Phase 5 moved session.jsonl to Postgres - # (`agent_transcript_snapshot`) and made input/output/temp pod-local - # ephemeral storage — a per-conversation advisory lock pins all turns - # of one conversation to a single pod for its run lifetime, so no PVC - # is required for correctness. Set `enabled: true` only if you have a - # specific reason to persist scratch files across pod restarts; the - # PVC blocks rolling deploys on the default RWO storage class. + # Persistent workspaces volume. The embedded worker used to store + # session state below /app/workspaces//.openclaw/session.jsonl, + # but Phase 5 (LOBU_SESSION_STORE default-on, snapshot mode) moved + # the durable transcript into Postgres (`agent_transcript_snapshot`) + # and made the rest of {WORKSPACE_DIR} (input/output/temp) pod-local + # ephemeral — a per-conversation advisory lock pins runs to one pod + # for the run lifetime. Disable by default so the deployment can + # roll across replicas; flip to `true` only if you have a specific + # reason to persist scratch files across restarts. workspaces: enabled: false size: 20Gi storageClass: "" - accessModes: - - ReadWriteOnce +# Worker deployment settings worker: - # Connector ingestion worker. Disable if you only need the API/chat gateway. enabled: true replicaCount: 1 + # RWO cache PVCs cannot be mounted by old and new pods at once. + # Defaults to Recreate when cache.enabled; override for RWX/no-cache deployments. strategy: {} - podAnnotations: {} + + # Cache PVCs are mounted as root-owned by default; grant the image user group write access. podSecurityContext: fsGroup: 1001 fsGroupChangePolicy: OnRootMismatch - securityContext: {} resources: requests: @@ -132,24 +74,25 @@ worker: env: POLL_INTERVAL_MS: "10000" + # Transformer model cache volume cache: enabled: true size: 5Gi storageClass: "" - accessModes: - - ReadWriteOnce +# Embeddings service deployment settings embeddings: enabled: true replicaCount: 1 + # RWO cache PVCs cannot be mounted by old and new pods at once. + # Defaults to Recreate when cache.enabled; override for RWX/no-cache deployments. strategy: {} - podAnnotations: {} + + # Cache PVCs are mounted as root-owned by default; grant the image user group write access. podSecurityContext: fsGroup: 1001 fsGroupChangePolicy: OnRootMismatch - securityContext: {} - # If set, app and worker use this URL instead of the in-chart service. serviceUrl: "" resources: @@ -168,9 +111,8 @@ embeddings: enabled: true size: 5Gi storageClass: "" - accessModes: - - ReadWriteOnce +# Service settings service: type: ClusterIP port: 8787 @@ -189,35 +131,38 @@ service: sessionAffinity: None sessionAffinityTimeoutSeconds: 10800 -# Optional Ingress for public webhooks and the admin UI. When hosts are set, -# the app Deployment derives PUBLIC_WEB_URL from the first host unless you set -# app.env.PUBLIC_WEB_URL explicitly. +# Ingress settings ingress: - enabled: false - className: "" + enabled: true + className: traefik annotations: {} - hosts: [] + hosts: [] # List of hosts (e.g., [summaries.now, app.lobu.ai]) tls: [] -# Database configuration. For most installs, create a Secret with DATABASE_URL -# and set database.existingSecret below. Alternatively, keep database empty and -# provide DATABASE_URL through secretName/secrets. When migrations.enabled and -# secrets.create are both true, put DATABASE_URL in secrets.stringData or set -# migrations.database.existingSecret so the hook does not depend on a Secret -# that Helm has not created yet. +# Secret reference (must exist in namespace) +# Contains: JWT_SECRET, GITHUB_TOKEN, etc. +secretName: "" + +# Database settings (for connecting to CNPG cluster) database: + # If set, overrides DATABASE_URL from secret + host: "" + port: 5432 + name: "app" # Database name (required when usePooler=true) + # Secret containing database credentials (from CNPG) existingSecret: "" - existingSecretKey: uri - name: app + existingSecretKey: "uri" + # Use PgBouncer pooler instead of direct connection usePooler: false - poolerService: "" + poolerService: "" # e.g., "db-pooler-rw" +# Health check settings healthCheck: readinessProbe: initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 3 - failureThreshold: 6 + failureThreshold: 3 livenessProbe: initialDelaySeconds: 30 periodSeconds: 20 @@ -225,80 +170,52 @@ healthCheck: failureThreshold: 3 migrations: - # When enabled, Helm runs migrations as a pre-install/pre-upgrade Job and you - # should set app.env.SKIP_MIGRATIONS: "1" so app pods do not repeat them. If - # secrets.create is true and DATABASE_URL lives in that chart-managed Secret, - # the Job reads secrets.stringData.DATABASE_URL directly because normal chart - # resources are not created before pre-install hooks run. enabled: false database: existingSecret: "" - existingSecretKey: uri - name: app + existingSecretKey: "uri" + name: "app" usePooler: false poolerService: "" releaseGates: databasePreflight: + # When migrations are enabled, verify the target database is reachable + # before invoking dbmate. This fails fast on wrong DB names instead of + # letting dbmate attempt to create a missing production database. enabled: true timeoutSeconds: 10 smokeTest: + # Helm post-install/post-upgrade hook. The release is not considered + # healthy until the in-cluster app Service responds successfully. enabled: true path: /api/health timeoutSeconds: 300 intervalSeconds: 2 - # Post-migration schema verification. Each entry is `.`; - # the smoke job fails the rollout if any listed column is missing from - # `information_schema.columns`. Catches the case where a migration was - # edited after it was first applied to prod — the new ALTER never re-runs - # (its version is already in `schema_migrations`), but `dbmate up` on a - # fresh DB applies the edited file cleanly, so PR CI stays green. - # Add a column here whenever a new migration introduces one that app - # routes assume exists. - requiredSchema: - - device_workers.id - - device_workers.organization_id - - connections.device_worker_id - - connections.organization_id - # Phase 3 of the smoke job — drive an actual worker run end-to-end. + # Phase 3 — drive an actual worker run end-to-end. + # + # When enabled, the Job POSTs to /api/internal/smoke/dispatch, which + # inserts a synthetic chat_message run. The runs-queue MessageConsumer + # claims it, spawns a worker subprocess, and on completion writes a + # row into `agent_transcript_snapshot`. The Job polls for that row + # and fails the deploy if `terminal_status='completed'` doesn't + # materialise within `timeoutSeconds`. # - # When enabled, the Job POSTs to the internal /api/internal/smoke/dispatch - # endpoint, which inserts a synthetic chat_message run. The runs-queue - # MessageConsumer in the app pod claims it, spawns a worker subprocess, - # the worker runs, and on terminal cleanup writes a row into - # `agent_transcript_snapshot`. The Job polls that row and fails the - # deploy if `terminal_status='completed'` doesn't materialise inside - # `workerSmokeTimeoutSeconds`. This makes the recurring class of - # "gateway boots fine but workers can't process a single message" - # regressions un-shippable. + # Default OFF. Operators MUST add three keys to the deployment Secret + # before enabling, AND preprovision a matching agent row: # - # Default OFF: the chart cannot preprovision the synthetic agent for - # you. Operators MUST add three keys to the deployment Secret before - # enabling, AND preprovision a matching agent row: + # SMOKE_TEST_TOKEN= + # SMOKE_TEST_AGENT_ID= + # SMOKE_TEST_ORG_ID= # - # 1. Generate a random token (≥32 chars) and add to the Secret: - # SMOKE_TEST_TOKEN= - # SMOKE_TEST_AGENT_ID= - # SMOKE_TEST_ORG_ID= - # The gateway PINS the smoke agentId + organizationId from the - # env at dispatch time — caller-supplied values are ignored. - # This makes it structurally impossible for a leaked - # SMOKE_TEST_TOKEN to dispatch a synthetic run against a real - # tenant's agent. - # 2. Preprovision the synthetic agent. The simplest path is - # `lobu apply` against a dedicated "smoke" org/agent project - # whose only agent.id matches `SMOKE_TEST_AGENT_ID`. - # 3. Bump this stanza to `enabled: true` and roll the chart. + # The gateway PINS the smoke agentId + organizationId from the env + # at dispatch time — caller-supplied values in the dispatch body are + # ignored. This makes a leaked SMOKE_TEST_TOKEN structurally unable + # to dispatch a synthetic run against a real tenant. Mirrored from + # lobu#878. workerSmoke: enabled: false - # The smoke Job appends a release-scoped suffix so each helm - # upgrade gets its own conversation id within this prefix. conversationIdPrefix: "smoke-" - # How long to wait for the snapshot row to appear with - # `terminal_status='completed'`. Worker spawn + LLM round-trip + - # snapshot POST is ~10-30s on the prod cluster; 90s leaves - # headroom for a cold image pull on the worker subprocess. timeoutSeconds: 90 - # DB poll interval inside the Job. intervalSeconds: 3 From cfdcb52ce3251a5bbb3fc4d91f4c834fbdd337cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Mon, 18 May 2026 17:19:26 +0100 Subject: [PATCH 2/2] docs(deployment): drop kubernetes.mdx refs to removed chart bits After lobu#882's chart alignment to the owletto fork, the docs were pointing at: - charts/lobu/values.example.yaml (file deleted) - secrets.create / secrets.stringData (feature removed with the secret.yaml template) Replace the curl-the-example flow with an inline values.yaml stub and explicit kubectl-create-secret commands matching the new owletto-style chart values shape. Drop the Chart-managed-secrets section since the feature no longer exists. --- .../content/docs/deployment/kubernetes.mdx | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/packages/landing/src/content/docs/deployment/kubernetes.mdx b/packages/landing/src/content/docs/deployment/kubernetes.mdx index 9ddcbf335..1effead6e 100644 --- a/packages/landing/src/content/docs/deployment/kubernetes.mdx +++ b/packages/landing/src/content/docs/deployment/kubernetes.mdx @@ -35,14 +35,7 @@ Add any other provider, platform, or OAuth secrets your deployment needs to `lob ## Prepare values -Start from the chart's example values file: - -```bash -curl -fsSLo values.yaml \ - https://raw.githubusercontent.com/lobu-ai/lobu/main/charts/lobu/values.example.yaml -``` - -Edit at least these fields: +Start a `values.yaml` with at least these fields: ```yaml secretName: lobu-secrets @@ -51,11 +44,22 @@ database: existingSecret: lobu-db existingSecretKey: uri +# Run migrations as a Helm pre-install/pre-upgrade Job. SKIP_MIGRATIONS +# below stops app pods from also trying — the Job is the single writer. +migrations: + enabled: true + database: + existingSecret: lobu-db + existingSecretKey: uri + app: env: NODE_ENV: production SKIP_MIGRATIONS: "1" WORKER_ALLOWED_DOMAINS: "" + # Read by server.ts to mint OAuth redirect URIs, webhook callbacks, + # and invite links. MUST match your public origin. + PUBLIC_WEB_URL: "https://lobu.example.com" ingress: enabled: true @@ -64,7 +68,18 @@ ingress: - lobu.example.com ``` -When `ingress.hosts` is set, the app derives `PUBLIC_WEB_URL` from the first host. If your public URL differs from that host, set `app.env.PUBLIC_WEB_URL` explicitly. +Create the referenced secrets before installing: + +```bash +kubectl -n lobu create secret generic lobu-db \ + --from-literal=uri='postgresql://USER:PASSWORD@HOST:5432/lobu?sslmode=require' +kubectl -n lobu create secret generic lobu-secrets \ + --from-literal=JWT_SECRET='replace-with-a-long-random-value' \ + --from-literal=BETTER_AUTH_SECRET='replace-with-a-long-random-value' \ + --from-literal=ANTHROPIC_API_KEY='sk-ant-...' +``` + +Always set `app.env.PUBLIC_WEB_URL` to your public origin (e.g. `https://lobu.example.com`). The server reads this to mint OAuth redirect URIs, webhook callbacks, and invite links; without it those URLs default to `http://localhost:8787` and the install will be broken end-to-end. ## Install @@ -109,12 +124,6 @@ helm upgrade lobu oci://ghcr.io/lobu-ai/charts/lobu \ -f values.yaml ``` -## Chart-managed secrets - -For production, prefer an external secret manager or pre-created Kubernetes Secrets. For demos, the chart can create a Secret with `secrets.create: true`. - -If you enable both `secrets.create` and `migrations.enabled` and put `DATABASE_URL` in `secrets.stringData`, leave `migrations.database.existingSecret` empty. Pre-install hooks run before normal Helm resources are created, so the migration job reads `secrets.stringData.DATABASE_URL` directly in that setup. - ## Next steps - Configure chat platforms from the [admin UI](/guides/admin-ui/) or the connection API.