diff --git a/apps/operator/config/crd/bases/app.helios.io_heliosapps.yaml b/apps/operator/config/crd/bases/app.helios.io_heliosapps.yaml index c620b1c..02d412b 100644 --- a/apps/operator/config/crd/bases/app.helios.io_heliosapps.yaml +++ b/apps/operator/config/crd/bases/app.helios.io_heliosapps.yaml @@ -101,9 +101,9 @@ spec: description: ContextSubpath is the path where the Dockerfile is located type: string databaseSecretRef: - default: api-db-secret - description: DatabaseSecretRef is the name of the secret containing - database credentials for migrations + description: |- + DatabaseSecretRef is the name of the secret containing database credentials for migrations. + Defaults to {appName}-db-secret if not set. type: string description: description: Description of the application diff --git a/apps/operator/config/manager/kustomization.yaml b/apps/operator/config/manager/kustomization.yaml index 9215473..ad13e96 100644 --- a/apps/operator/config/manager/kustomization.yaml +++ b/apps/operator/config/manager/kustomization.yaml @@ -4,3 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller + newName: controller + newTag: latest diff --git a/apps/operator/config/rbac/role.yaml b/apps/operator/config/rbac/role.yaml index 11eea45..3fa8bfe 100644 --- a/apps/operator/config/rbac/role.yaml +++ b/apps/operator/config/rbac/role.yaml @@ -70,6 +70,8 @@ rules: - apiGroups: - rbac.authorization.k8s.io resources: + - clusterrolebindings + - clusterroles - rolebindings - roles verbs: diff --git a/apps/operator/internal/controller/argocd/application.go b/apps/operator/internal/controller/argocd/application.go index 907a87f..2841ff8 100644 --- a/apps/operator/internal/controller/argocd/application.go +++ b/apps/operator/internal/controller/argocd/application.go @@ -8,13 +8,68 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -// GenerateArgoApplication creates an ArgoCD Application manifest. +// GenerateArgoApplication creates an ArgoCD Application manifest with PreSync hooks +// if the HeliosApp has a database trait for automatic database migrations. func GenerateArgoApplication(heliosApp *appv1alpha1.HeliosApp) (*unstructured.Unstructured, error) { appName := heliosApp.Name + "-argocd" targetNamespace := cmp.Or(heliosApp.Spec.ArgoCDNamespace, "argocd") project := cmp.Or(heliosApp.Spec.ArgoCDProject, "default") gitOpsBranch := cmp.Or(heliosApp.Spec.GitOpsBranch, "main") + spec := map[string]any{ + "project": project, + "source": map[string]any{ + "repoURL": shared.RewriteGiteaURL(heliosApp.Spec.GitOpsRepo), + "targetRevision": gitOpsBranch, + "path": heliosApp.Spec.GitOpsPath, + }, + "destination": map[string]any{ + "server": "https://kubernetes.default.svc", + "namespace": heliosApp.Namespace, + }, + "syncPolicy": map[string]any{ + "automated": map[string]any{ + "prune": true, + "selfHeal": true, + }, + "syncOptions": []any{ + "CreateNamespace=true", + }, + }, + "ignoreDifferences": []any{ + map[string]any{ + "group": "apps", + "kind": "Deployment", + "jqPathExpressions": []any{ + `.spec.template.spec.containers[].env[]? | select(.name | test("^DB_"))`, + }, + }, + }, + } + + // Add PreSync hook if database trait exists + if HasDatabaseTrait(heliosApp) { + spec["syncPolicy"] = map[string]any{ + "automated": map[string]any{ + "prune": true, + "selfHeal": true, + }, + "syncOptions": []any{ + "CreateNamespace=true", + }, + } + + // Add PreSync hook to application + // Note: PreSync Job is created and managed by PreSyncReconciler + // This is referenced via Job annotations, not stored in Application spec + syncPolicy := spec["syncPolicy"].(map[string]any) + syncPolicy["syncOptions"] = append( + syncPolicy["syncOptions"].([]any), + "SkipDryRunOnMissingResource=true", + ) + spec["syncPolicy"] = syncPolicy + } + app := map[string]any{ "apiVersion": "argoproj.io/v1alpha1", "kind": "Application", @@ -26,36 +81,7 @@ func GenerateArgoApplication(heliosApp *appv1alpha1.HeliosApp) (*unstructured.Un "app.kubernetes.io/managed-by": "helios-operator", }, }, - "spec": map[string]any{ - "project": project, - "source": map[string]any{ - "repoURL": shared.RewriteGiteaURL(heliosApp.Spec.GitOpsRepo), - "targetRevision": gitOpsBranch, - "path": heliosApp.Spec.GitOpsPath, - }, - "destination": map[string]any{ - "server": "https://kubernetes.default.svc", - "namespace": heliosApp.Namespace, - }, - "syncPolicy": map[string]any{ - "automated": map[string]any{ - "prune": true, - "selfHeal": true, - }, - "syncOptions": []any{ - "CreateNamespace=true", - }, - }, - "ignoreDifferences": []any{ - map[string]any{ - "group": "apps", - "kind": "Deployment", - "jqPathExpressions": []any{ - `.spec.template.spec.containers[].env[]? | select(.name | test("^DB_"))`, - }, - }, - }, - }, + "spec": spec, } return &unstructured.Unstructured{Object: app}, nil diff --git a/apps/operator/internal/controller/argocd/presync.go b/apps/operator/internal/controller/argocd/presync.go new file mode 100644 index 0000000..467360a --- /dev/null +++ b/apps/operator/internal/controller/argocd/presync.go @@ -0,0 +1,431 @@ +package argocd + +import ( + "context" + "encoding/json" + "fmt" + + appv1alpha1 "github.com/helios-platform-team/helios-platform/apps/operator/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + logf "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + // preSyncFinalizerKey is used to ensure cluster-scoped RBAC resources + // (ClusterRole and ClusterRoleBinding) are properly cleaned up when a HeliosApp is deleted. + preSyncFinalizerKey = "argocd.helios.io/presync-cleanup" + + // databaseTraitType is the type identifier for database traits in components. + databaseTraitType = "database" +) + +// PreSyncReconciler creates PreSync Jobs and supporting resources for database migrations. +type PreSyncReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// NewPreSyncReconciler creates a new PreSyncReconciler. +func NewPreSyncReconciler(c client.Client, scheme *runtime.Scheme) *PreSyncReconciler { + return &PreSyncReconciler{ + Client: c, + Scheme: scheme, + } +} + +// ReconcilePreSyncResources creates PreSync Job, EventListener, and supporting RBAC +// when a HeliosApp has a database trait. This enables automatic database migrations +// before ArgoCD deploys the application. +func (r *PreSyncReconciler) ReconcilePreSyncResources( + ctx context.Context, + heliosApp *appv1alpha1.HeliosApp, +) error { + log := logf.FromContext(ctx) + + // Check if any component has database trait + hasDatabaseTrait := false + for _, comp := range heliosApp.Spec.Components { + for _, trait := range comp.Traits { + if trait.Type == databaseTraitType { + hasDatabaseTrait = true + break + } + } + } + + if !hasDatabaseTrait { + log.Info("No database trait found, skipping PreSync resource creation") + return nil + } + + log.Info("Creating PreSync resources for database migrations", "app", heliosApp.Name) + + // Add finalizer to ensure cluster-scoped RBAC resources are cleaned up on deletion + if err := r.AddPreSyncFinalizer(ctx, heliosApp); err != nil { + return fmt.Errorf("failed to add presync finalizer: %w", err) + } + + // 1. Create ServiceAccount for PreSync Job execution + if err := r.reconcileServiceAccount(ctx, heliosApp); err != nil { + return fmt.Errorf("failed to create ServiceAccount: %w", err) + } + + // 2. Create ClusterRole for Job management + if err := r.reconcileRole(ctx, heliosApp); err != nil { + return fmt.Errorf("failed to create ClusterRole: %w", err) + } + + // 3. Create ClusterRoleBinding + if err := r.reconcileRoleBinding(ctx, heliosApp); err != nil { + return fmt.Errorf("failed to create RoleBinding: %w", err) + } + + // 4. Create PreSync Job template (not executed yet, just stored for ArgoCD) + if err := r.reconcilePreSyncJobconfig(ctx, heliosApp); err != nil { + return fmt.Errorf("failed to create PreSync Job config: %w", err) + } + + log.Info("PreSync resources created successfully", "app", heliosApp.Name) + return nil +} + +// reconcileServiceAccount creates a ServiceAccount for PreSync Job execution. +func (r *PreSyncReconciler) reconcileServiceAccount(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-migrator", heliosApp.Name), + Namespace: heliosApp.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": heliosApp.Name, + "app.kubernetes.io/component": "database-migration", + "app.kubernetes.io/managed-by": "helios-operator", + }, + }, + } + + if err := ctrl.SetControllerReference(heliosApp, sa, r.Scheme); err != nil { + return err + } + + if err := r.Create(ctx, sa); err != nil { + if apierrors.IsAlreadyExists(err) { + return nil // Already exists, no error + } + return err + } + + return nil +} + +// reconcileRole creates a ClusterRole for Job and Pod management. +func (r *PreSyncReconciler) reconcileRole(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + role := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-presync-job-role", heliosApp.Name), + Labels: map[string]string{ + "app.kubernetes.io/name": heliosApp.Name, + "app.kubernetes.io/managed-by": "helios-operator", + }, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"batch"}, + Resources: []string{"jobs"}, + Verbs: []string{"get", "list", "watch", "create", "delete"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods/log"}, + Verbs: []string{"get"}, + }, + }, + } + + if err := r.Create(ctx, role); err != nil { + if apierrors.IsAlreadyExists(err) { + return nil + } + return err + } + + return nil +} + +// reconcileRoleBinding creates a ClusterRoleBinding for the ServiceAccount. +func (r *PreSyncReconciler) reconcileRoleBinding(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + binding := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-presync-job-binding", heliosApp.Name), + Labels: map[string]string{ + "app.kubernetes.io/name": heliosApp.Name, + "app.kubernetes.io/managed-by": "helios-operator", + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: fmt.Sprintf("%s-presync-job-role", heliosApp.Name), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: fmt.Sprintf("%s-migrator", heliosApp.Name), + Namespace: heliosApp.Namespace, + }, + }, + } + + if err := r.Create(ctx, binding); err != nil { + if apierrors.IsAlreadyExists(err) { + return nil + } + return err + } + + return nil +} + +// reconcilePreSyncJobconfig stores the PreSync Job configuration as a ConfigMap. +// This config will be referenced by the ArgoCD Application and executed as a PreSync hook. +func (r *PreSyncReconciler) reconcilePreSyncJobconfig(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + log := logf.FromContext(ctx) + + // Find the first component with database trait to get the correct database secret + var databaseComponentName string + for _, comp := range heliosApp.Spec.Components { + for _, trait := range comp.Traits { + if trait.Type == databaseTraitType { + databaseComponentName = comp.Name + break + } + } + if databaseComponentName != "" { + break + } + } + + if databaseComponentName == "" { + log.V(1).Info("No database component found, skipping PreSync Job config creation") + return nil + } + + // The database secret is named {componentName}-db-secret following the operator's convention + databaseSecretName := fmt.Sprintf("%s-db-secret", databaseComponentName) + + // Find migration image reference from components + // For now, use the standard naming convention: -migrate:latest + migrateImage := fmt.Sprintf("index.docker.io/{{.Values.dockerOrg}}/%s-migrate:latest", heliosApp.Name) + + // Build PreSync Job YAML + preSyncJob := map[string]interface{}{ + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": map[string]interface{}{ + "name": fmt.Sprintf("%s-db-migrate-presync", heliosApp.Name), + "namespace": heliosApp.Namespace, + "labels": map[string]interface{}{ + "app": heliosApp.Name, + "job-type": "db-migration", + }, + "annotations": map[string]interface{}{ + "argocd.argoproj.io/hook": "PreSync", + "argocd.argoproj.io/hook-delete-policy": "BeforeHookCreation", + }, + }, + "spec": map[string]interface{}{ + "backoffLimit": 3, + "ttlSecondsAfterFinished": 3600, + "template": map[string]interface{}{ + "metadata": map[string]interface{}{ + "labels": map[string]interface{}{ + "app": heliosApp.Name, + "job-type": "db-migration", + }, + }, + "spec": map[string]interface{}{ + "serviceAccountName": fmt.Sprintf("%s-migrator", heliosApp.Name), + "containers": []map[string]interface{}{ + { + "name": "db-migrate", + "image": migrateImage, + "imagePullPolicy": "Always", + "env": []map[string]interface{}{ + { + "name": "PGRST_DB_URI", + "valueFrom": map[string]interface{}{ + "secretKeyRef": map[string]interface{}{ + "name": databaseSecretName, + "key": "PGRST_DB_URI", + }, + }, + }, + }, + "resources": map[string]interface{}{ + "requests": map[string]interface{}{ + "cpu": "100m", + "memory": "128Mi", + }, + "limits": map[string]interface{}{ + "cpu": "500m", + "memory": "512Mi", + }, + }, + "securityContext": map[string]interface{}{ + "readOnlyRootFilesystem": true, + }, + }, + }, + "restartPolicy": "Never", + "securityContext": map[string]interface{}{ + "runAsNonRoot": true, + "runAsUser": 1000, + }, + }, + }, + }, + } + + // Store as annotation on HeliosApp for ArgoCD to reference + jobBytes, err := json.Marshal(preSyncJob) + if err != nil { + return fmt.Errorf("failed to marshal PreSync Job: %w", err) + } + + // Update HeliosApp with PreSync Job definition as annotation + // Refetch the latest object to avoid stale object conflicts after finalizer addition + latest := &appv1alpha1.HeliosApp{} + if err := r.Get(ctx, client.ObjectKeyFromObject(heliosApp), latest); err != nil { + return fmt.Errorf("failed to refetch HeliosApp before annotation update: %w", err) + } + heliosAppCopy := latest.DeepCopy() + if heliosAppCopy.Annotations == nil { + heliosAppCopy.Annotations = make(map[string]string) + } + heliosAppCopy.Annotations["helios.io/presync-job"] = string(jobBytes) + heliosAppCopy.Annotations["helios.io/has-database-trait"] = "true" + + if err := r.Update(ctx, heliosAppCopy); err != nil { + return fmt.Errorf("failed to update HeliosApp with PreSync Job config: %w", err) + } + + return nil +} + +// HasDatabaseTrait checks if the HeliosApp has any component with database trait. +func HasDatabaseTrait(heliosApp *appv1alpha1.HeliosApp) bool { + for _, comp := range heliosApp.Spec.Components { + for _, trait := range comp.Traits { + if trait.Type == databaseTraitType { + return true + } + } + } + return false +} + +// AddPreSyncFinalizer adds the presync cleanup finalizer to the HeliosApp. +// This finalizer ensures cluster-scoped RBAC resources are properly cleaned up +// when the HeliosApp is deleted. +func (r *PreSyncReconciler) AddPreSyncFinalizer(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + // Check if finalizer already exists + for _, finalizer := range heliosApp.Finalizers { + if finalizer == preSyncFinalizerKey { + return nil // Finalizer already added + } + } + + // Add finalizer + heliosAppCopy := heliosApp.DeepCopy() + heliosAppCopy.Finalizers = append(heliosAppCopy.Finalizers, preSyncFinalizerKey) + + if err := r.Update(ctx, heliosAppCopy); err != nil { + return fmt.Errorf("failed to add presync finalizer: %w", err) + } + + return nil +} + +// HandlePreSyncCleanup cleans up cluster-scoped RBAC resources when HeliosApp is deleted. +// This method should be called when the HeliosApp has a deletion timestamp and the +// presync cleanup finalizer is present. +func (r *PreSyncReconciler) HandlePreSyncCleanup(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) error { + log := logf.FromContext(ctx) + + // Delete ClusterRole + roleName := fmt.Sprintf("%s-presync-job-role", heliosApp.Name) + role := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: roleName, + }, + } + + if err := r.Delete(ctx, role); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete ClusterRole '%s': %w", roleName, err) + } + log.Info("ClusterRole not found, skipping deletion", "name", roleName) + } else { + log.Info("Deleted ClusterRole", "name", roleName) + } + + // Delete ClusterRoleBinding + bindingName := fmt.Sprintf("%s-presync-job-binding", heliosApp.Name) + binding := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + } + + if err := r.Delete(ctx, binding); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete ClusterRoleBinding '%s': %w", bindingName, err) + } + log.Info("ClusterRoleBinding not found, skipping deletion", "name", bindingName) + } else { + log.Info("Deleted ClusterRoleBinding", "name", bindingName) + } + + // Remove finalizer after cleanup + heliosAppCopy := heliosApp.DeepCopy() + finalizers := []string{} + for _, finalizer := range heliosAppCopy.Finalizers { + if finalizer != preSyncFinalizerKey { + finalizers = append(finalizers, finalizer) + } + } + heliosAppCopy.Finalizers = finalizers + + if err := r.Update(ctx, heliosAppCopy); err != nil { + return fmt.Errorf("failed to remove presync finalizer: %w", err) + } + + log.Info("Presync cleanup completed and finalizer removed", "app", heliosApp.Name) + return nil +} + +// HasPreSyncFinalizer checks if the HeliosApp has the presync cleanup finalizer. +func HasPreSyncFinalizer(heliosApp *appv1alpha1.HeliosApp) bool { + for _, finalizer := range heliosApp.Finalizers { + if finalizer == preSyncFinalizerKey { + return true + } + } + return false +} + +// GetPreSyncFinalizerKey returns the presync cleanup finalizer key. +// This is exported for use by the main controller. +func GetPreSyncFinalizerKey() string { + return preSyncFinalizerKey +} diff --git a/apps/operator/internal/controller/argocd/reconciler.go b/apps/operator/internal/controller/argocd/reconciler.go index 2bc5839..41fecdf 100644 --- a/apps/operator/internal/controller/argocd/reconciler.go +++ b/apps/operator/internal/controller/argocd/reconciler.go @@ -25,10 +25,17 @@ func NewReconciler(c client.Client, scheme *runtime.Scheme) *Reconciler { return &Reconciler{Client: c, Scheme: scheme} } -// Reconcile ensures the ArgoCD Application and sync RBAC exist. +// Reconcile ensures the ArgoCD Application, PreSync resources, and sync RBAC exist. func (r *Reconciler) Reconcile(ctx context.Context, app *appv1alpha1.HeliosApp) error { log := logf.FromContext(ctx) + // Reconcile PreSync resources if database trait exists + preSyncReconciler := NewPreSyncReconciler(r.Client, r.Scheme) + if err := preSyncReconciler.ReconcilePreSyncResources(ctx, app); err != nil { + log.Error(err, "Failed to reconcile PreSync resources") + return fmt.Errorf("failed to reconcile PreSync resources: %w", err) + } + log.Info("Ensuring ArgoCD Application exists") argoApp, err := GenerateArgoApplication(app) if err != nil { diff --git a/apps/operator/internal/controller/database/reconciler.go b/apps/operator/internal/controller/database/reconciler.go index a091406..ad490b2 100644 --- a/apps/operator/internal/controller/database/reconciler.go +++ b/apps/operator/internal/controller/database/reconciler.go @@ -9,6 +9,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -29,6 +30,97 @@ func NewReconciler(c client.Client, scheme *runtime.Scheme) *Reconciler { return &Reconciler{Client: c, Scheme: scheme} } +// ReconcileSystemSecrets copies system-level secrets (docker-credentials, etc.) +// from the default namespace to the app's namespace. This ensures that Tekton +// tasks have access to required secrets for image building and pushing. +func (r *Reconciler) ReconcileSystemSecrets(ctx context.Context, app *appv1alpha1.HeliosApp) error { + log := logf.FromContext(ctx) + + // System secrets to provision to each app namespace + systemSecrets := []string{"docker-credentials", "helios-gitops-bot"} + + for _, secretName := range systemSecrets { + // Check if secret already exists in app namespace + appSecret := &corev1.Secret{} + err := r.Client.Get(ctx, types.NamespacedName{ + Name: secretName, + Namespace: app.Namespace, + }, appSecret) + + if err == nil { + log.V(1).Info("System secret already exists in app namespace, skipping", + "secret", secretName, + "namespace", app.Namespace) + continue + } + + if !errors.IsNotFound(err) { + log.Error(err, "Failed to check for system secret in app namespace", + "secret", secretName, + "namespace", app.Namespace) + return fmt.Errorf("failed to check for system secret %s in namespace %s: %w", secretName, app.Namespace, err) + } + + // Read secret from default namespace + defaultSecret := &corev1.Secret{} + err = r.Client.Get(ctx, types.NamespacedName{ + Name: secretName, + Namespace: "default", + }, defaultSecret) + + if err != nil { + if errors.IsNotFound(err) { + return fmt.Errorf("required system secret %s not found in default namespace: %w", secretName, err) + } + log.Error(err, "Failed to read system secret from default namespace", + "secret", secretName) + return fmt.Errorf("failed to read system secret %s from default namespace: %w", secretName, err) + } + + // Copy secret to app namespace + newSecret := r.copySecret(defaultSecret, app.Namespace) + + // Note: Do NOT set controller reference for system secrets + // They are not "owned" by the app and should persist if the app is deleted + + if err := r.Client.Create(ctx, newSecret); err != nil { + if errors.IsAlreadyExists(err) { + log.Info("System secret was created concurrently, skipping", + "secret", secretName, + "namespace", app.Namespace) + continue + } + log.Error(err, "Failed to create system secret in app namespace", + "secret", secretName, + "namespace", app.Namespace) + return fmt.Errorf("failed to create system secret %s in namespace %s: %w", secretName, app.Namespace, err) + } + + log.Info("Successfully provisioned system secret to app namespace", + "secret", secretName, + "namespace", app.Namespace) + } + + return nil +} + +// copySecret creates a copy of a secret with a new namespace. +// It preserves the secret type and data, but resets metadata. +func (r *Reconciler) copySecret(source *corev1.Secret, targetNamespace string) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: source.Name, + Namespace: targetNamespace, + Labels: map[string]string{ + "helios.io/managed-by": "operator", + "helios.io/system-secret": "true", + }, + }, + Type: source.Type, + Data: source.Data, + } +} + // ReconcileSecrets ensures database credential secrets exist for all // components with database traits. If a secret already exists, it is not // modified to preserve existing credentials. diff --git a/apps/operator/internal/controller/heliosapp_controller.go b/apps/operator/internal/controller/heliosapp_controller.go index 3e19b18..9166011 100644 --- a/apps/operator/internal/controller/heliosapp_controller.go +++ b/apps/operator/internal/controller/heliosapp_controller.go @@ -79,7 +79,7 @@ func NewHeliosAppReconciler( // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings;clusterroles;clusterrolebindings,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=argoproj.io,resources=applications,verbs=get;list;watch;create;update;patch;delete // Reconcile handles the reconciliation loop for HeliosApp. @@ -98,6 +98,12 @@ func (r *HeliosAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( log.Info("Reconciling HeliosApp", "name", heliosApp.Name, "namespace", heliosApp.Namespace) + // Handle deletion: Clean up cluster-scoped RBAC resources if needed + if !heliosApp.DeletionTimestamp.IsZero() { + log.Info("HeliosApp is being deleted, handling cleanup", "app", heliosApp.Name) + return r.handlePreSyncCleanup(ctx, &heliosApp) + } + // Pre-flight validation: Check if all referenced secrets exist if err := r.validateSecretReferences(ctx, &heliosApp); err != nil { log.Error(err, "Pre-flight validation failed: referenced secret does not exist") @@ -130,6 +136,18 @@ func (r *HeliosAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, err } + // ------------------------------------------------------------------ + // PHASE 0.3: System Secrets Provisioning + // Copy system-level secrets (docker-credentials, etc.) from the default + // namespace to the app's namespace. This ensures Tekton tasks have access + // to required secrets for image building and pushing. + // ------------------------------------------------------------------ + if err := r.Database.ReconcileSystemSecrets(ctx, &heliosApp); err != nil { + log.Error(err, "Failed to provision system secrets") + r.updateStatus(ctx, &heliosApp, appv1alpha1.PhaseFailed, fmt.Sprintf("System secret provisioning failed: %v", err)) + return ctrl.Result{}, err + } + // ------------------------------------------------------------------ // PHASE 0.5: Database Credential Secrets // Generate and store secure credentials for components with database traits. @@ -288,3 +306,32 @@ func (r *HeliosAppReconciler) validateSecretReferences(ctx context.Context, app return nil } + +// handlePreSyncCleanup handles cleanup of cluster-scoped resources when HeliosApp is deleted. +// It removes the presync finalizer and triggers cleanup of ClusterRole and ClusterRoleBinding +// resources that were created for database migration PreSync hooks. +func (r *HeliosAppReconciler) handlePreSyncCleanup(ctx context.Context, heliosApp *appv1alpha1.HeliosApp) (ctrl.Result, error) { + log := logf.FromContext(ctx) + + // Check if the presync cleanup finalizer is present + hasPreSyncFinalizer := false + for _, finalizer := range heliosApp.Finalizers { + if finalizer == argocd.GetPreSyncFinalizerKey() { + hasPreSyncFinalizer = true + break + } + } + + if hasPreSyncFinalizer { + // Create a PreSyncReconciler to handle cleanup + preSyncReconciler := argocd.NewPreSyncReconciler(r.Client, r.Scheme) + + // Clean up cluster-scoped RBAC resources + if err := preSyncReconciler.HandlePreSyncCleanup(ctx, heliosApp); err != nil { + log.Error(err, "Failed to cleanup presync resources") + return ctrl.Result{}, fmt.Errorf("failed to cleanup presync resources: %w", err) + } + } + + return ctrl.Result{}, nil +} diff --git a/apps/operator/internal/controller/heliosapp_controller_unit_test.go b/apps/operator/internal/controller/heliosapp_controller_unit_test.go index 9a5edcf..a18492f 100644 --- a/apps/operator/internal/controller/heliosapp_controller_unit_test.go +++ b/apps/operator/internal/controller/heliosapp_controller_unit_test.go @@ -93,11 +93,32 @@ func TestHeliosAppReconciler_Reconcile_Success(t *testing.T) { }, } + dockerCredentialsSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "docker-credentials", + Namespace: "default", + }, + Type: corev1.SecretTypeDockercfg, + Data: map[string][]byte{ + ".dockercfg": []byte(`{}`), + }, + } + + heliosGitopsBotSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "helios-gitops-bot", + Namespace: "default", + }, + Data: map[string][]byte{ + "token": []byte("dummy-token"), + }, + } + // 3. Setup Fake Client // We init with the object existing fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(heliosApp, gitOpsSecret). + WithObjects(heliosApp, gitOpsSecret, dockerCredentialsSecret, heliosGitopsBotSecret). WithStatusSubresource(heliosApp). Build() @@ -177,7 +198,28 @@ func TestHeliosAppReconciler_Reconcile_PendingImage(t *testing.T) { }, } - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(heliosApp).WithStatusSubresource(heliosApp).Build() + dockerCredentialsSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "docker-credentials", + Namespace: "default", + }, + Type: corev1.SecretTypeDockercfg, + Data: map[string][]byte{ + ".dockercfg": []byte(`{}`), + }, + } + + heliosGitopsBotSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "helios-gitops-bot", + Namespace: "default", + }, + Data: map[string][]byte{ + "token": []byte("dummy-token"), + }, + } + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(heliosApp, dockerCredentialsSecret, heliosGitopsBotSecret).WithStatusSubresource(heliosApp).Build() r := &HeliosAppReconciler{ Client: fakeClient, diff --git a/apps/operator/internal/controller/interfaces.go b/apps/operator/internal/controller/interfaces.go index 2901043..14c3e0c 100644 --- a/apps/operator/internal/controller/interfaces.go +++ b/apps/operator/internal/controller/interfaces.go @@ -19,6 +19,7 @@ type ArgoCDReconciler interface { // DatabaseReconciler handles database provisioning and secret management. type DatabaseReconciler interface { + ReconcileSystemSecrets(ctx context.Context, app *appv1alpha1.HeliosApp) error ReconcileSecrets(ctx context.Context, app *appv1alpha1.HeliosApp) error ReconcileInstances(ctx context.Context, app *appv1alpha1.HeliosApp) error ReconcileInjection(ctx context.Context, app *appv1alpha1.HeliosApp) (pending bool, err error) diff --git a/apps/portal/examples/postgrest-template/content/source/Dockerfile.migrate b/apps/portal/examples/postgrest-template/content/source/Dockerfile.migrate new file mode 100644 index 0000000..0e136fb --- /dev/null +++ b/apps/portal/examples/postgrest-template/content/source/Dockerfile.migrate @@ -0,0 +1,28 @@ +# Dockerfile.migrate: Build a Docker image containing golang-migrate and SQL migration scripts +# This image is used by ArgoCD PreSync hooks to run database migrations before deploying PostgREST + +FROM golang:1.22-alpine AS builder + +# Install wget for downloading golang-migrate +RUN apk add --no-cache wget + +# Download golang-migrate binary +RUN wget -qO- https://github.com/golang-migrate/migrate/releases/download/v4.17.0/migrate-linux-amd64.tar.gz | tar xz -C /tmp/ + +# Final stage: minimal runtime image +FROM alpine:3.19 + +# Install PostgreSQL client for schema inspection if needed +RUN apk add --no-cache postgresql-client bash + +# Copy golang-migrate binary from builder +COPY --from=builder /tmp/migrate /usr/local/bin/migrate + +# Copy migration scripts from source repo +# These will be mounted from the Kubernetes volume in the PreSync Job +COPY db/migrations /migrations + +# Create a simple wrapper script to run migrations +RUN echo '#!/bin/bash\nset -e\necho "Running database migrations..."\nmigrate -path /migrations -database "$PGRST_DB_URI" up\necho "Migrations completed successfully"\n' > /entrypoint.sh && chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/apps/portal/examples/postgrest-template/template.yaml b/apps/portal/examples/postgrest-template/template.yaml index 35e5da1..c92edf4 100644 --- a/apps/portal/examples/postgrest-template/template.yaml +++ b/apps/portal/examples/postgrest-template/template.yaml @@ -18,12 +18,11 @@ spec: - name - port - dockerOrg - - repoName properties: name: - title: Name + title: Service Name type: string - description: Unique name of the PostgREST API service + description: Unique name for the PostgREST API service and Docker repository (e.g. my-api) ui:autofocus: true port: title: API Port @@ -34,10 +33,6 @@ spec: title: Docker Registry Org/User type: string description: Your Docker Hub username or Organization - repoName: - title: Docker Repository Name - type: string - description: The name of the Docker repository (e.g. my-api) - title: PostgREST Configuration properties: @@ -108,11 +103,11 @@ spec: url: ./content/source targetPath: ./source values: - name: ${{ parameters.repoName }} + name: ${{ parameters.name }} owner: ${{ user.entity.metadata.name or 'guest' }} port: ${{ parameters.port }} description: "PostgREST API: ${{ parameters.name }}" - image: index.docker.io/${{ parameters.dockerOrg }}/${{ parameters.repoName }} + image: index.docker.io/${{ parameters.dockerOrg }}/${{ parameters.name }} apiSchema: ${{ parameters.apiSchema }} jwtSecret: ${{ secrets.jwtSecret }} jwtRole: ${{ parameters.jwtRole }} @@ -132,8 +127,8 @@ spec: action: gitea:create-webhook input: repoUrl: ${{ parameters.repoUrl }} - webhookUrl: http://el-${{ parameters.repoName }}-db-migrate-listener.${{ parameters.namespace }}.svc.cluster.local:8080 - webhookSecret: ${{ parameters.repoName }} + webhookUrl: http://el-${{ parameters.name }}-db-migrate-listener.${{ parameters.namespace }}.svc.cluster.local:8080 + webhookSecret: ${{ parameters.name }} events: - push @@ -145,10 +140,9 @@ spec: url: ./content/gitops targetPath: ./gitops values: - name: ${{ parameters.repoName }} - image: index.docker.io/${{ parameters.dockerOrg }}/${{ parameters.repoName }} + name: ${{ parameters.name }} + image: index.docker.io/${{ parameters.dockerOrg }}/${{ parameters.name }} dockerOrg: ${{ parameters.dockerOrg }} - repoName: ${{ parameters.repoName }} port: ${{ parameters.port }} namespace: ${{ parameters.namespace }} databaseType: ${{ parameters.databaseConfig.dbType }} @@ -161,13 +155,15 @@ spec: owner: ${{ user.entity.metadata.name or 'guest' }} sourceRepo: ${{ steps['publish-source'].output.remoteUrl }} gitopsRepo: ${{ steps['publish-source'].output.remoteUrl | replace(".git", "") }}-gitops + # Migration image configuration for PreSync Job + migrateImage: index.docker.io/${{ parameters.dockerOrg }}/${{ parameters.name }}-migrate:latest - id: publish-gitops name: Publish GitOps Manifests action: publish:gitea input: description: GitOps Manifests for ${{ parameters.name }} - repoUrl: ${{ parameters.repoUrl }}-gitops + repoUrl: ${{ parameters.repoUrl | replace('.git', '') }}-gitops sourcePath: ./gitops repoVisibility: public @@ -184,12 +180,12 @@ spec: action: kubernetes:create-secret input: namespace: ${{ parameters.namespace }} - secretName: ${{ parameters.repoName }}-webhook-secret + secretName: ${{ parameters.name }}-webhook-secret data: # Tekton Triggers interceptor expects the shared secret under key `secret`. - secret: ${{ parameters.repoName }} + secret: ${{ parameters.name }} # Keep `secretToken` for compatibility with any legacy consumers. - secretToken: ${{ parameters.repoName }} + secretToken: ${{ parameters.name }} # 5. Deploy HeliosApp to cluster - id: apply-heliosapp diff --git a/cue/definitions/argocd/pre-sync-job.cue b/cue/definitions/argocd/pre-sync-job.cue new file mode 100644 index 0000000..bc7cc3c --- /dev/null +++ b/cue/definitions/argocd/pre-sync-job.cue @@ -0,0 +1,171 @@ +// pre-sync-job.cue: ArgoCD PreSync Job template for database migrations +// This job runs before ArgoCD syncs the application, ensuring migrations complete successfully before deployment +package argocd + +// #DatabaseMigrationPreSyncJob generates a Kubernetes Job with ArgoCD PreSync hook annotation +// Used by the operator to create a Job that runs before PostgREST pods are deployed +#DatabaseMigrationPreSyncJob: { + // Input parameters + appName: string + namespace: string + migrateImageRef: string // e.g., "myorg/my-app-migrate:latest" + databaseSecretRef: string // Secret containing database credentials + backoffLimit: int | *3 + ttlSecondsAfterFinished: int | *3600 + serviceAccountName: string | *"\(appName)-migrator" + + // Output: Kubernetes Job object with PreSync hook + output: { + apiVersion: "batch/v1" + kind: "Job" + metadata: { + name: "\(appName)-db-migrate-presync" + namespace: namespace + labels: { + "app": appName + "job-type": "db-migration" + // ArgoCD hook annotations + "argocd.argoproj.io/hook": "PreSync" + "argocd.argoproj.io/hook-deletion-policy": "BeforeHookCreation" + } + annotations: { + "argocd.argoproj.io/hook": "PreSync" + "argocd.argoproj.io/hook-deletion-policy": "BeforeHookCreation" + } + } + spec: { + backoffLimit: backoffLimit + ttlSecondsAfterFinished: ttlSecondsAfterFinished + serviceAccountName: serviceAccountName + template: { + metadata: { + labels: { + "app": appName + "job-type": "db-migration" + } + } + spec: { + // Run database migrations + containers: [{ + name: "db-migrate" + image: migrateImageRef + imagePullPolicy: "Always" + env: [ + { + name: "PGRST_DB_URI" + valueFrom: secretKeyRef: { + name: databaseSecretRef + key: "uri" + } + }, + ] + resources: { + requests: { + cpu: "100m" + memory: "128Mi" + } + limits: { + cpu: "500m" + memory: "512Mi" + } + } + securityContext: { + runAsNonRoot: true + runAsUser: 1000 + fsReadOnlyRootFilesystem: true + } + }] + restartPolicy: "Never" + securityContext: { + runAsNonRoot: true + runAsUser: 1000 + } + } + } + } + } +} + +// #DatabaseMigrationServiceAccount generates ServiceAccount for migration Job +#DatabaseMigrationServiceAccount: { + appName: string + namespace: string + + output: { + apiVersion: "v1" + kind: "ServiceAccount" + metadata: { + name: "\(appName)-migrator" + namespace: namespace + labels: { + "app": appName + } + } + } +} + +// #DatabaseMigrationPostSyncJob generates a PostSync hook to restart PostgREST pods +// This ensures pods pull the latest migration image and schema changes take effect +#DatabaseMigrationPostSyncJob: { + appName: string + namespace: string + postgreName: string + ttlSecondsAfterFinished: int | *600 + + output: { + apiVersion: "batch/v1" + kind: "Job" + metadata: { + name: "\(appName)-postgrest-restart-postsync" + namespace: namespace + labels: { + "app": appName + "job-type": "pod-restart" + "argocd.argoproj.io/hook": "PostSync" + "argocd.argoproj.io/hook-deletion-policy": "BeforeHookCreation" + } + annotations: { + "argocd.argoproj.io/hook": "PostSync" + "argocd.argoproj.io/hook-deletion-policy": "BeforeHookCreation" + } + } + spec: { + ttlSecondsAfterFinished: ttlSecondsAfterFinished + template: { + metadata: { + labels: { + "app": appName + "job-type": "pod-restart" + } + } + spec: { + // Restart PostgREST Deployment to pull latest migration image + serviceAccountName: "\(appName)-migrator" + containers: [{ + name: "kubectl" + image: "bitnami/kubectl:latest" + command: [ + "kubectl", + "rollout", + "restart", + "deployment/\(postgreName)", + "-n", + namespace, + ] + resources: { + requests: { + cpu: "50m" + memory: "64Mi" + } + limits: { + cpu: "200m" + memory: "256Mi" + } + } + }] + restartPolicy: "Never" + } + } + } + } +} diff --git a/cue/definitions/tekton/pipelines/db-migrate-image.cue b/cue/definitions/tekton/pipelines/db-migrate-image.cue new file mode 100644 index 0000000..050b50f --- /dev/null +++ b/cue/definitions/tekton/pipelines/db-migrate-image.cue @@ -0,0 +1,66 @@ +// db-migrate-image pipeline definition. +// Builds a Docker image containing golang-migrate and SQL migration scripts. +// This image is used by ArgoCD PreSync hooks to run database migrations before deploying PostgREST. +package pipelines + +import "helios.io/cue/definitions/tekton" + +// ===================================================== +// PIPELINE DEFINITION +// Simple 2-task pipeline: fetch source code, then build migration image +// ===================================================== + +// Simplified params for db-migrate-image pipeline +#DbMigrateImageParams: [ + // App source and image params + tekton.#CommonParams.app.repoUrl, + tekton.#CommonParams.app.repoRevision, + tekton.#CommonParams.app.imageRepo, + tekton.#CommonParams.image.contextSubpath, + tekton.#CommonParams.image.dockerSecret, +] + +// Only needs source workspace +#DbMigrateImageWorkspaces: [ + // #PipelineWorkspaces is local (patterns.cue) + #PipelineWorkspaces.source, +] + +// Define the pipeline configuration +_dbMigrateImageConfig: { + description: "Build database migration Docker image with golang-migrate tool and migration scripts" + + // Use simplified params for migration image build + params: #DbMigrateImageParams + + // Use source workspace + workspaces: #DbMigrateImageWorkspaces + + // Compose tasks from patterns + tasks: [ + // 1. Fetch source code (includes db/migrations/) + (#FetchSourcePattern & {}).task, + + // 2. Build and push migration image (tagged as :latest) + (#BuildMigrateImagePattern & { + _runAfter: ["fetch-source-code"] + }).task, + ] +} + +// Register pipeline in the registry +#PipelineRegistry: "db-migrate-image": { + name: "db-migrate-image" + description: "Build migration image with database migration tool and scripts" + config: _dbMigrateImageConfig +} + +// ===================================================== +// DIRECT EXPORT +// ===================================================== + +// Convenience: render pipeline for default namespace +DbMigrateImage: (#RenderPipeline & { + pipelineType: "db-migrate-image" + namespace: "default" +}).output diff --git a/cue/definitions/tekton/pipelines/patterns.cue b/cue/definitions/tekton/pipelines/patterns.cue index e414dd8..665fac2 100644 --- a/cue/definitions/tekton/pipelines/patterns.cue +++ b/cue/definitions/tekton/pipelines/patterns.cue @@ -180,6 +180,31 @@ import "helios.io/cue/definitions/tekton" } } +// #BuildMigrateImagePattern - Build Docker image with database migration tool and scripts +// Tags image as /-migrate:latest for use by PreSync Jobs +#BuildMigrateImagePattern: { + _name: string | *"build-migrate-image" + _runAfter: [...string] + + task: { + name: _name + taskRef: name: #TaskNames.kanikoBuild + runAfter: _runAfter + workspaces: [{ + name: "source" + workspace: #PipelineWorkspaces.source.name + }] + params: [ + // Build migration image with :latest tag (will be pulled by PreSync Job) + {name: tekton.#CommonParams.image.name.name, value: "$(params.\(#PipelineParams.imageRepo.name))-migrate:latest"}, + {name: tekton.#CommonParams.image.contextSubpath.name, value: "."}, + {name: tekton.#CommonParams.image.dockerSecret.name, value: "$(params.\(#PipelineParams.dockerSecret.name))"}, + // Override Dockerfile to use Dockerfile.migrate + {name: "DOCKERFILE", value: "./Dockerfile.migrate"}, + ] + } +} + // #UpdateGitOpsPattern #UpdateGitOpsPattern: { _name: string | *"update-gitops-manifest" diff --git a/cue/definitions/tekton/triggers/db-migrate-trigger.cue b/cue/definitions/tekton/triggers/db-migrate-trigger.cue index c5938d5..e65569a 100644 --- a/cue/definitions/tekton/triggers/db-migrate-trigger.cue +++ b/cue/definitions/tekton/triggers/db-migrate-trigger.cue @@ -41,7 +41,7 @@ import ( {name: "git-revision", description: "Git commit SHA from webhook"}, ] - // PipelineRun for db-migrate pipeline + // PipelineRun for db-migrate pipeline (runs database migrations via PreSync hook) resourcetemplates: [{ apiVersion: "tekton.dev/v1beta1" kind: "PipelineRun" @@ -69,7 +69,7 @@ import ( params: [ {name: "app-repo-url", value: "$(tt.params.git-repo-url)"}, {name: "app-repo-revision", value: "$(tt.params.git-revision)"}, - {name: "db-secret-name", value: _bp.databaseSecretRef}, + {name: "db-secret-name", value: "\(_bp.appName)-db-secret"}, {name: "migration-source", value: "db/migrations"}, {name: "namespace", value: _bp.namespace}, ] diff --git a/docs/OPERATOR.md b/docs/OPERATOR.md index 7063ea1..c73a84e 100644 --- a/docs/OPERATOR.md +++ b/docs/OPERATOR.md @@ -171,3 +171,191 @@ make -C apps/operator uninstall - `config/`: Kustomize manifests for CRDs, RBAC, and deployment. - `internal/controller/`: Reconciliation logic for `HeliosApp`. - `internal/cue/`: Go wrapper for the CUE rendering engine. + +## 🗄️ PostgREST Database Migration Flow + +The operator supports automatic database migrations for PostgREST services via an integrated CI/CD → GitOps → ArgoCD workflow. + +### Overview + +When a HeliosApp component has a `database` trait, the platform automatically: + +1. **Builds Migration Image** (Tekton): Creates a Docker image with `golang-migrate` tool and SQL migration scripts +2. **Pushes to Registry** (Tekton): Tags the image as `/-migrate:latest` +3. **Runs PreSync Job** (ArgoCD): Before syncing PostgREST Deployment, ArgoCD runs the PreSync Job to execute migrations +4. **Blocks on Failure** (ArgoCD): If migrations fail, ArgoCD sync is blocked and PostgREST is not updated +5. **Restarts Pods** (ArgoCD): After migrations succeed, PostSync Job restarts PostgREST pods to invalidate schema cache + +### Components + +#### Tekton Pipeline: `db-migrate-image` +- **Trigger**: Activates when `db/migrations/` path changes in source repository +- **Tasks**: + 1. Clone source repository + 2. Build Docker image with `Dockerfile.migrate` and migration scripts + 3. Push image to registry with `:latest` tag +- **Location**: `cue/definitions/tekton/pipelines/db-migrate-image.cue` + +#### GitOps Manifests (created via Template Scaffolder) +- **Dockerfile.migrate**: Multi-stage build with golang-migrate +- **presync-job.yaml**: ArgoCD PreSync hook Job that runs migrations +- **kustomization.yaml**: Bundles namespace, HeliosApp, and presync-job resources +- **Location**: `apps/portal/examples/postgrest-template/content/gitops/` + +#### ArgoCD Hooks +- **PreSync**: Runs migration Job before Deployment sync +- **PostSync**: Restarts PostgREST pods after successful sync +- **Hook Deletion Policy**: `BeforeHookCreation` ensures old Jobs are cleaned up before new ones start + +### Operation + +#### Scaffolding a PostgREST App with Migrations + +```bash +# Use the Backstage scaffolder: choose "PostgREST API Template" +# The template will: +# 1. Create source repository with Dockerfile, migrations, etc. +# 2. Create GitOps repository with kustomization.yaml, helios-app.yaml, presync-job.yaml +# 3. Create db-migrate-image Tekton trigger to watch db/migrations/ changes +# 4. Apply HeliosApp and namespace to cluster +``` + +#### Triggering Database Migrations + +```bash +# Add or modify a migration file +echo "CREATE TABLE new_table (id SERIAL PRIMARY KEY);" > db/migrations/000002_add_table.up.sql + +# Push to source repository +git add db/migrations/000002_add_table.up.sql +git commit -m "Add new_table migration" +git push origin main + +# Automated flow: +# 1. Webhook triggers db-migrate-image Tekton pipeline +# 2. Pipeline builds migration image with tag :latest +# 3. Pipeline pushes image to registry +# 4. ArgoCD detects presync-job.yaml referencing -migrate:latest +# 5. ArgoCD's PreSync hook runs the Job with fresh image +# 6. Job executes: migrate -path /migrations -database $PGRST_DB_URI up +# 7. If successful: ArgoCD syncs Deployment, PostSync restarts pods +# 8. If failed: ArgoCD blocks sync, operator logs failure +``` + +#### Migration Script in Docker Image + +The `Dockerfile.migrate` embeds an entrypoint script: + +```bash +#!/bin/bash +set -e +echo "Running database migrations..." +migrate -path /migrations -database "$PGRST_DB_URI" up +echo "Migrations completed successfully" +``` + +The `PGRST_DB_URI` environment variable is injected from a database Secret created by the operator's database trait. The secret is named following the convention `{componentName}-db-secret`, where `componentName` is the name of the component with the database trait (e.g., `postgrest-api-db-secret` for a component named `postgrest-api`). The secret contains the key `PGRST_DB_URI` with the PostgreSQL connection string. + +### Configuration + +#### HeliosApp Example + +```yaml +apiVersion: app.helios.io/v1alpha1 +kind: HeliosApp +metadata: + name: my-api +spec: + components: + - name: postgrest-api + type: web-service + properties: + image: index.docker.io/postgrest/postgrest:latest + traits: + - type: database + properties: + dbType: postgres + version: "16" + storage: "1Gi" +``` + +When the `database` trait is present, the operator automatically creates: +- Database credential Secret named `{componentName}-db-secret` (e.g., `postgrest-api-db-secret`) +- Database StatefulSet and Service for the component +- PreSync Job configured to use `/{appName}-migrate:latest` image and injected with `PGRST_DB_URI` from the database secret +- ServiceAccount with permissions to run the PreSync Job +- ClusterRole and ClusterRoleBinding for Job and Pod management + +### Troubleshooting + +#### Migration Job Fails + +Check the ArgoCD Application status: + +```bash +kubectl get application -argocd -n argocd -o yaml +# Look for: syncResult.syncPhase = Failed or Synced=false +``` + +View Job logs: + +```bash +kubectl logs job/-db-migrate-presync -n +``` + +Retry migration: + +```bash +# Edit source repository migration file or create new migration +# Commit and push changes +# db-migrate-image pipeline will re-run and retry migrations +``` + +#### Image Not Pulling + +Verify image exists in registry: + +```bash +docker pull /-migrate:latest +# or check via registry UI +``` + +Verify PreSync Job has correct image reference: + +```bash +kubectl get job -db-migrate-presync -n -o yaml | grep image +``` + +#### Pod Not Restarting After Migration + +Verify PostSync Job ran: + +```bash +kubectl get job -postgrest-restart-postsync -n -o yaml +``` + +Manually restart pods if needed: + +```bash +kubectl rollout restart deployment/ -n +``` + +### RBAC Permissions + +The operator requires these permissions for migration management: + +```yaml +apiGroups: + - batch +resources: + - jobs +verbs: + - create + - delete + - get + - list + - patch + - watch +``` + +These are configured in `apps/operator/config/rbac/role.yaml`. diff --git a/docs/POSTGREST_MIGRATION_TEST_GUIDE.md b/docs/POSTGREST_MIGRATION_TEST_GUIDE.md new file mode 100644 index 0000000..4aa0f6b --- /dev/null +++ b/docs/POSTGREST_MIGRATION_TEST_GUIDE.md @@ -0,0 +1,813 @@ +# PostgREST Database Migration Testing Guide + +## Overview + +This guide provides step-by-step instructions to verify that database migrations run automatically before PostgREST deployment, and that failed migrations block ArgoCD syncs. + +**What we're testing:** +1. ✅ Tekton pipeline builds migration Docker image +2. ✅ Migration image is pushed to registry +3. ✅ ArgoCD PreSync Job runs migrations before deploying PostgREST +4. ✅ Failed migrations block ArgoCD sync +5. ✅ PostgREST reflects schema changes immediately after sync + +--- + +## Prerequisites + +### Cluster Setup +```bash +# Verify cluster is running +kubectl cluster-info + +# Verify required namespaces +kubectl get ns | grep -E "tekton|argocd|default" +``` + +### Dependencies +- Kubernetes 1.24+ +- Tekton Pipelines v0.50+ +- ArgoCD v2.8+ +- Helios Operator v0.1+ +- Backstage Portal with PostgREST template + +### Install/Verify Components +```bash +# Check Tekton Pipelines +kubectl get pods -n tekton-pipelines | grep tekton-pipelines-controller + +# Check ArgoCD +kubectl get pods -n argocd | grep argocd- + +# Check Helios Operator +kubectl get pods -n helios-system | grep operator +``` + +--- + +## Test 1: Scaffold a New PostgREST Application + +### Step 1.1: Create Application via Backstage + +1. Open Backstage Portal: `http://localhost:3000` +2. Navigate to **Create** → **PostgREST API Template** +3. Fill in the form: + - **Service Name:** `test-api` (will be used for all resources) + - **API Port:** `3000` + - **Docker Registry Org:** `` (e.g., `mycompany`) + - **API Schema:** `public` + - **JWT Secret:** `your-secret-key-min-32-chars-required-here` (min 32 chars) + - **Repository URL:** Point to your Gitea instance + +### Step 1.2: Verify Scaffolding Completed + +The template should create: +- **Source repository** with `db/migrations/` directory +- **GitOps repository** with minimal manifests (namespace.yaml, helios-app.yaml) +- **HeliosApp CRD** that triggers operator to: + - Auto-generate PreSync Job from CUE definitions + - Auto-generate Tekton EventListener for migration triggers + - Auto-generate RBAC permissions for Job execution + +```bash +# Verify source repo was created +git clone +cd test-api +ls -la db/migrations/ + +# Verify GitOps repo (now minimal - just base files) +git clone +cd test-api-gitops +ls -la # Should show: namespace.yaml, helios-app.yaml +``` + +--- + +## Test 2: Verify Operator Auto-Generated Resources + +### Step 2.1: Check HeliosApp Status + +```bash +# Verify HeliosApp CRD was created by scaffolder +kubectl get heliosapp -n +kubectl describe heliosapp test-api -n + +# Check operator generated annotations +kubectl get heliosapp test-api -o jsonpath='{.metadata.annotations}' | jq . +# Look for: helios.io/has-database-trait=true, helios.io/presync-job=... +``` + +### Step 2.2: Verify Operator Generated PreSync Resources + +```bash +# Check ServiceAccount was auto-created +kubectl get sa -n | grep migrator + +# Verify ClusterRole for Job management +kubectl get clusterrole | grep presync-job-role + +# Verify ClusterRoleBinding +kubectl get clusterrolebinding | grep presync-job-binding +``` + +### Step 2.3: Check Tekton EventListener (Auto-Generated) + +```bash +# Once HeliosApp is created, operator should create EventListener +kubectl get eventlistener -n | grep test-api + +# Find the external URL +kubectl get svc -n el-services | grep test-api-db-migrate-listener +``` + +### Step 2.3: Verify Pipeline Trigger Configuration + +```bash +# Check Trigger definition +kubectl get triggers -n default +kubectl describe trigger test-api-db-migrate-trigger + +# Verify PipelineRun definition +kubectl logs -n tekton-pipelines -l app=tekton-pipelines-controller -f +``` + +--- + +## Test 3: Trigger Migration Pipeline + +### Step 3.1: Create Initial Migration File + +```bash +# In the source repository +cd test-api +mkdir -p db/migrations + +# Create first migration +cat > db/migrations/001_init.sql << 'EOF' +-- Create initial schema +CREATE SCHEMA IF NOT EXISTS api; + +CREATE TABLE api.users ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + email TEXT UNIQUE NOT NULL, + created_at TIMESTAMP DEFAULT NOW() +); + +-- Grant permissions for PostgREST +GRANT USAGE ON SCHEMA api TO anon; +GRANT SELECT ON api.users TO anon; +EOF + +git add db/migrations/ +git commit -m "Add initial migration" +git push +``` + +### Step 3.2: Monitor Pipeline Execution + +```bash +# Watch for PipelineRun creation +kubectl get pipelinerun -n default -w + +# Get latest PipelineRun +PIPELINERUN=$(kubectl get pipelinerun -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}') + +# Monitor task execution +kubectl describe pipelinerun $PIPELINERUN -n default + +# Watch task logs +kubectl get taskrun -n default -w +kubectl logs -n default -f $(kubectl get taskrun -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}') +``` + +### Step 3.3: Verify Image Build + +```bash +# Check if image was built and pushed +# Option 1: Check Docker registry +docker pull /test-api-migrate:latest +docker inspect /test-api-migrate:latest + +# Option 2: Check Tekton task results +kubectl get taskrun -n default -o yaml | grep -A 5 "image-digest" +``` + +--- + +## Test 4: Verify Migration Image Contents + +### Step 4.1: Inspect Migration Image + +```bash +# Verify golang-migrate binary is present +docker run --rm /test-api-migrate:latest which migrate + +# Verify migration scripts are copied +docker run --rm /test-api-migrate:latest ls -la /migrations/ + +# Test migration execution (dry run) +docker run --rm \ + -e PGRST_DB_URI="postgres://user:password@localhost:5432/testdb" \ + /test-api-migrate:latest \ + migrate -path /migrations -database "$PGRST_DB_URI" version +``` + +--- + +## Test 5: Deploy HeliosApp and Check ArgoCD + +### Step 5.1: Verify HeliosApp Creation + +```bash +# Check if HeliosApp was created in cluster +kubectl get heliosapp -n default +kubectl describe heliosapp test-api -n default + +# Verify HeliosApp status +kubectl get heliosapp test-api -o jsonpath='{.status}' | jq . +``` + +### Step 5.2: Verify ArgoCD Application Created + +```bash +# Check ArgoCD Application +kubectl get application -n argocd test-api +kubectl describe application test-api -n argocd + +# Get ArgoCD UI URL +kubectl port-forward svc/argocd-server -n argocd 8080:443 + +# Login and navigate to Applications +# Look for: test-api (should show PreSync hook status) +``` + +### Step 5.3: Monitor ArgoCD Application Sync + +```bash +# Watch application status +kubectl get application -n argocd test-api -w + +# Check sync result +kubectl get application test-api -n argocd -o jsonpath='{.status.operationState.phase}' + +# View detailed status +kubectl describe application test-api -n argocd +``` + +--- + +## Test 6: Verify PreSync Job Execution + +### Step 6.1: Check PreSync Job Created + +```bash +# List Jobs in default namespace +kubectl get jobs -n default + +# Find PreSync Job +PRESYNC_JOB=$(kubectl get jobs -n default -o jsonpath='{.items[*].metadata.name}' | grep presync) +kubectl describe job $PRESYNC_JOB -n default + +# Get Job Pod +PRESYNC_POD=$(kubectl get pods -n default -l job-name=$PRESYNC_JOB -o jsonpath='{.items[0].metadata.name}') +``` + +### Step 6.2: View PreSync Job Logs + +```bash +# Get logs from migration Job +kubectl logs $PRESYNC_POD -n default + +# Expected output should show: +# - Connecting to database +# - Running migrations +# - "1 migration(s) applied" + +# Example successful output: +# flyway Validate +# flyway Repair +# Repairing schema history table in schema [public]... +# Repairing successful. +# flyway Migrate +# Migrating schema [public] to version 1 - init +# Successfully applied 1 migration to schema [public] +``` + +### Step 6.3: Verify Job Status + +```bash +# Check Job completion +kubectl get job $PRESYNC_JOB -n default -o jsonpath='{.status}' + +# Expected: { "succeeded": 1 } (or active: 0, failed: 0, succeeded: 1) + +# Verify Job cleanup (TTL after finished) +kubectl get job $PRESYNC_JOB -n default -o jsonpath='{.spec.ttlSecondsAfterFinished}' +# Expected: 3600 (1 hour) +``` + +--- + +## Test 7: Verify PostgREST Deployment + +### Step 7.1: Check PostgREST Pod + +```bash +# Get PostgREST pods +kubectl get pod -n default -l app=test-api + +# Describe Pod +kubectl describe pod -n default -l app=test-api + +# Verify environment variables +kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].spec.containers[0].env}' | jq . +``` + +### Step 7.2: Verify PostgREST is Ready + +```bash +# Port-forward to PostgREST +kubectl port-forward svc/test-api 3000:3000 -n default + +# Test API endpoint +curl http://localhost:3000/ + +# Should return either: +# - OpenAPI documentation (if available) +# - API version info +# - Or 404 (if no default route configured) + +# Query created table via REST +curl http://localhost:3000/api/users + +# Expected response: [] (empty array for new table) +``` + +### Step 7.3: Verify Database Connection + +```bash +# Check PGRST_DB_URI environment variable +kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].spec.containers[0].env[?(@.name=="PGRST_DB_URI")].value}' + +# Exec into pod and verify database connectivity +kubectl exec -it $(kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].metadata.name}') -n default -- bash + +# Inside pod: +psql $PGRST_DB_URI -c "\dt api.*" # List tables in api schema +psql $PGRST_DB_URI -c "SELECT * FROM api.users;" # Query the table +``` + +--- + +## Test 8: Verify Schema Changes Reflected + +### Step 8.1: Add New Migration + +```bash +# In source repository +cd test-api + +cat > db/migrations/002_add_posts.sql << 'EOF' +CREATE TABLE api.posts ( + id SERIAL PRIMARY KEY, + user_id INTEGER REFERENCES api.users(id) ON DELETE CASCADE, + title TEXT NOT NULL, + content TEXT, + created_at TIMESTAMP DEFAULT NOW() +); + +GRANT SELECT ON api.posts TO anon; +EOF + +git add db/migrations/ +git commit -m "Add posts table" +git push +``` + +### Step 8.2: Trigger New Pipeline Run + +```bash +# Webhook should trigger automatically +# Monitor PipelineRun creation +kubectl get pipelinerun -n default --sort-by=.metadata.creationTimestamp + +# Wait for pipeline to complete +kubectl wait --for=condition=Succeeded pipelinerun/ -n default --timeout=5m +``` + +### Step 8.3: Sync ArgoCD Application + +```bash +# Force sync ArgoCD Application +argocd app sync test-api --server $ARGOCD_SERVER + +# OR via kubectl +kubectl patch application test-api -n argocd --type merge -p '{"metadata":{"annotations":{"argocd.argoproj.io/refresh":"hard"}}}' + +# Monitor sync +kubectl get application test-api -n argocd -w +``` + +### Step 8.4: Test New API Endpoint + +```bash +# Port-forward to PostgREST (if not already) +kubectl port-forward svc/test-api 3000:3000 -n default & + +# Test new endpoint +curl http://localhost:3000/api/posts + +# Expected: [] (empty array) + +# Test with data +curl -X POST http://localhost:3000/api/users \ + -H "Content-Type: application/json" \ + -d '{"name":"Alice","email":"alice@example.com"}' + +curl -X POST http://localhost:3000/api/posts \ + -H "Content-Type: application/json" \ + -d '{"user_id":1,"title":"Hello World","content":"This is my first post"}' + +curl http://localhost:3000/api/posts +# Expected: [{"id":1,"user_id":1,"title":"Hello World",...}] +``` + +--- + +## Test 9: Test Migration Failure Scenario + +### Step 9.1: Create Intentional Migration Error + +```bash +# In source repository +cat > db/migrations/003_bad_migration.sql << 'EOF' +-- This will fail: invalid SQL syntax +CREAT TABLE invalid_table ( + id INTEGER +); +EOF + +git add db/migrations/ +git commit -m "Add intentional bad migration" +git push +``` + +### Step 9.2: Monitor Pipeline Failure + +```bash +# Watch PipelineRun +kubectl get pipelinerun -n default -w + +# Get failed PipelineRun +kubectl get pipelinerun -n default --sort-by=.metadata.creationTimestamp -o name | tail -1 + +# Check failure details +kubectl describe pipelinerun -n default + +# Check task logs for error +kubectl get taskrun -n default -o jsonpath='{.items[-1].metadata.name}' | xargs -I {} kubectl logs {} -n default +``` + +### Step 9.3: Verify PreSync Job Fails + +```bash +# The Job should fail after backoff limit +kubectl get job -n default -w + +# Find PreSync Job +PRESYNC_JOB=$(kubectl get jobs -n default -o jsonpath='{.items[*].metadata.name}' | grep presync | tail -1) + +# Check job status +kubectl get job $PRESYNC_JOB -n default -o jsonpath='{.status}' +# Expected: { "failed": 1 } (failed after 3 retries) + +# View failure logs +kubectl logs -n default $(kubectl get pods -n default -l job-name=$PRESYNC_JOB -o jsonpath='{.items[0].metadata.name}') +# Expected: SQL syntax error message +``` + +### Step 9.4: Verify ArgoCD Blocks Sync + +```bash +# Check Application status +kubectl describe application test-api -n argocd | grep -A 10 "OperationState" + +# Expected status: +# - Phase: Failed +# - Message: PreSync job failed + +# Verify PostgREST pods are NOT updated +kubectl describe pod -n default -l app=test-api | grep -A 3 "Image:" +# Should still show old image + +# Verify existing tables are still there (no schema corruption) +kubectl exec -it $(kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].metadata.name}') -n default -- \ + psql $PGRST_DB_URI -c "SELECT * FROM api.posts;" +``` + +### Step 9.5: Fix and Recover + +```bash +# Fix the migration file +cat > db/migrations/003_good_migration.sql << 'EOF' +CREATE TABLE api.comments ( + id SERIAL PRIMARY KEY, + post_id INTEGER REFERENCES api.posts(id) ON DELETE CASCADE, + content TEXT NOT NULL, + created_at TIMESTAMP DEFAULT NOW() +); + +GRANT SELECT ON api.comments TO anon; +EOF + +# Remove bad migration +git rm db/migrations/003_bad_migration.sql + +git commit -m "Fix migration" +git push +``` + +```bash +# Monitor new PipelineRun (should succeed) +kubectl get pipelinerun -n default -w + +# Wait for migration to complete +PRESYNC_JOB=$(kubectl get jobs -n default -o jsonpath='{.items[*].metadata.name}' | grep presync | tail -1) +kubectl wait --for=condition=complete job/$PRESYNC_JOB -n default --timeout=2m + +# Check ArgoCD sync now succeeds +kubectl get application test-api -n argocd -o jsonpath='{.status.sync.status}' +# Expected: In Sync or Synced +``` + +--- + +## Test 10: Kubernetes Resource Verification Checklist + +### Resources Created + +```bash +# Namespace +kubectl get ns | grep default + +# HeliosApp CRD +kubectl get heliosapp -n default + +# ArgoCD Application +kubectl get application -n argocd + +# Tekton PipelineRun +kubectl get pipelinerun -n default + +# Tekton PipelineRun resources +kubectl get triggerbinding -n default +kubectl get trigger -n default +kubectl get eventlistener -n default + +# Kubernetes Jobs (PreSync) +kubectl get jobs -n default + +# Kubernetes Deployment (PostgREST) +kubectl get deployment -n default + +# Kubernetes Service (PostgREST) +kubectl get svc -n default + +# Kubernetes Secret (Database credentials, Webhook) +kubectl get secret -n default | grep -E "test-api|webhook" + +# ServiceAccount (for Jobs) +kubectl get sa -n default | grep migrator +``` + +### Verify RBAC Permissions + +```bash +# Check Operator ClusterRole has required permissions +kubectl get clusterrole manager-role -o yaml | grep -A 20 "batch" + +# Expected: +# - apiGroups: +# - batch +# resources: +# - jobs +# verbs: +# - create +# - delete +# - get +# - list +# - patch +# - watch + +# Verify ServiceAccount bindings +kubectl get clusterrolebinding | grep manager-role +``` + +--- + +## Troubleshooting Guide + +### Issue: Webhook Not Triggering Pipeline + +**Symptoms:** Push to repository but no PipelineRun created + +**Diagnosis:** +```bash +# Check EventListener is accessible +kubectl get svc -n el-services +curl http://el-test-api-db-migrate-listener.el-services.svc.cluster.local:8080 + +# Check EventListener logs +kubectl logs -n tekton-pipelines -l app=tekton-events-controller -f + +# Verify webhook secret matches +kubectl get secret test-api-webhook-secret -n default -o jsonpath='{.data.secret}' | base64 -d + +# Verify CEL filter in trigger +kubectl get trigger test-api-db-migrate-trigger -n default -o yaml | grep -A 5 "filter:" +``` + +**Solution:** +- Verify webhook URL is correct and accessible +- Ensure webhook secret matches in repository settings +- Check CEL filter matches your migration file paths (should be `db/migrations/`) + +--- + +### Issue: PreSync Job Stuck in Pending + +**Symptoms:** Job created but Pod never starts + +**Diagnosis:** +```bash +# Check Job status +kubectl describe job -n default + +# Check Pod events +kubectl describe pod -n default + +# Check ServiceAccount permissions +kubectl auth can-i create jobs --as=system:serviceaccount:default: + +# Check resource availability +kubectl describe nodes | grep -A 5 "Allocated resources" +``` + +**Solution:** +- Verify ServiceAccount has required permissions (RBAC) +- Check cluster has sufficient resources (CPU, memory) +- Verify migration image is accessible from cluster + +--- + +### Issue: Migration Runs But Schema Not Visible in PostgREST + +**Symptoms:** Migration succeeds, but new tables not accessible via REST API + +**Diagnosis:** +```bash +# Verify migration actually ran in database +kubectl exec -it $(kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].metadata.name}') -n default -- \ + psql $PGRST_DB_URI -c "\dt" + +# Check PostgREST logs +kubectl logs -n default -l app=test-api | grep -i schema + +# Verify PGRST_DB_SCHEMA matches migration schema +kubectl get pod -n default -l app=test-api -o jsonpath='{.items[0].spec.containers[0].env[?(@.name=="PGRST_DB_SCHEMA")].value}' + +# Check permissions in database +psql $PGRST_DB_URI -c "\z" # List all permissions +``` + +**Solution:** +- Grant proper permissions in migration: `GRANT SELECT ON schema.table TO anon;` +- Restart PostgREST pod to reload schema: `kubectl rollout restart deployment/test-api -n default` +- Verify `PGRST_DB_SCHEMA` environment variable matches schema name + +--- + +### Issue: ArgoCD Application Stuck in Syncing + +**Symptoms:** Application shows "Syncing" status indefinitely + +**Diagnosis:** +```bash +# Check Application status +kubectl get application test-api -n argocd -o yaml | grep -A 10 "operationState" + +# Check ArgoCD server logs +kubectl logs -n argocd -l app.kubernetes.io/name=argocd-server -f + +# Check PreSync Job status +PRESYNC_JOB=$(kubectl get jobs -n default -o jsonpath='{.items[*].metadata.name}' | grep presync | tail -1) +kubectl get job $PRESYNC_JOB -n default -o yaml + +# Check if Job Pod is stuck +kubectl get pod -n default -l job-name=$PRESYNC_JOB +``` + +**Solution:** +- Delete stuck job: `kubectl delete job -n default` +- Sync application again: `argocd app sync test-api` +- Check application manifests have valid syntax: `argocd app get test-api` + +--- + +### Issue: Image 404 When Running Migration Job + +**Symptoms:** Migration Job fails with "ImagePullBackOff" + +**Diagnosis:** +```bash +# Check image exists in registry +docker pull /test-api-migrate:latest + +# Verify image is accessible from cluster +kubectl run -it --rm debug --image=/test-api-migrate:latest --restart=Never -- bash + +# Check ImagePullSecret if using private registry +kubectl get secret -n default | grep docker +``` + +**Solution:** +- Verify pipeline successfully built and pushed image +- Check Docker registry credentials in cluster +- Verify image name in presync-job.yaml matches registry + +--- + +## Success Criteria + +✅ **All tests pass if:** + +1. ✅ Tekton pipeline triggered on migration file changes +2. ✅ Pipeline builds and pushes migration image successfully +3. ✅ ArgoCD PreSync Job runs before PostgREST deployment +4. ✅ PostgREST pods deploy after successful migration +5. ✅ Schema changes immediately visible via PostgREST API +6. ✅ Failed migration Job blocks ArgoCD sync +7. ✅ ArgoCD Application status shows correct sync phase +8. ✅ PostgREST pods NOT updated if migration fails +9. ✅ Migration can be fixed and deployment retried +10. ✅ All Kubernetes resources properly created and configured + +--- + +## Quick Health Check Script + +```bash +#!/bin/bash +set -e + +echo "🔍 PostgREST Migration Setup Health Check" +echo "" + +# Check Kubernetes cluster +echo "✅ Kubernetes cluster:" +kubectl cluster-info | head -2 + +# Check namespaces +echo "✅ Tekton namespace:" +kubectl get ns tekton-pipelines -o name + +echo "✅ ArgoCD namespace:" +kubectl get ns argocd -o name + +# Check Tekton controller +echo "✅ Tekton controller running:" +kubectl get pods -n tekton-pipelines -l app=tekton-pipelines-controller -o name + +# Check ArgoCD components +echo "✅ ArgoCD server running:" +kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-server -o name + +# Check example app +if kubectl get heliosapp test-api -n default 2>/dev/null; then + echo "✅ HeliosApp 'test-api' exists" + echo "✅ ArgoCD Application:" + kubectl get application test-api -n argocd -o name 2>/dev/null || echo "⚠️ Not found yet" +else + echo "⚠️ HeliosApp 'test-api' not found (scaffold first)" +fi + +echo "" +echo "🎉 Health check complete!" +``` + +Save as `scripts/health-check-postgrest.sh` and run: +```bash +bash scripts/health-check-postgrest.sh +``` + +--- + +## References + +- [Tekton Pipelines Documentation](https://tekton.dev/docs/) +- [ArgoCD Hooks Documentation](https://argo-cd.readthedocs.io/en/stable/user-guide/resource_hooks/) +- [PostgREST API Documentation](https://postgrest.org/en/stable/) +- [golang-migrate Documentation](https://github.com/golang-migrate/migrate) +- [Kubernetes Jobs Documentation](https://kubernetes.io/docs/concepts/workloads/controllers/job/) +