diff --git a/packages/documentation/astro.config.mjs b/packages/documentation/astro.config.mjs index 682347efc1..ad855c8a97 100644 --- a/packages/documentation/astro.config.mjs +++ b/packages/documentation/astro.config.mjs @@ -168,6 +168,36 @@ export default defineConfig({ { label: 'Helm and Kubernetes', link: '/integration/deployment/helm-k8s' + }, + { + label: 'Deployment example', + collapsed: true, + items: [ + { + label: 'Overview', + link: '/integration/deploy-to-prod/01-architecture-overview' + }, + { + label: 'Configuration variables', + link: '/integration/deploy-to-prod/02-configuration-variables' + }, + { + label: 'Secrets management', + link: '/integration/deploy-to-prod/03-secrets-management' + }, + { + label: 'Infrastructure setup', + link: '/integration/deploy-to-prod/04-infrastructure-setup' + }, + { + label: 'Services deployment', + link: '/integration/deploy-to-prod/05-services-deployment' + }, + { + label: 'Troubleshooting', + link: '/integration/deploy-to-prod/06-troubleshooting' + } + ] } ] }, diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/01-architecture-overview.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/01-architecture-overview.mdx new file mode 100644 index 0000000000..f3c17c8f88 --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/01-architecture-overview.mdx @@ -0,0 +1,191 @@ +--- +title: Overview +--- + +import { + MermaidWrapper, + Mermaid, + LinkOut, + Tooltip +} from '@interledger/docs-design-system' + +This guide provides an approach for you to deploy and integrate Rafiki on Google Cloud Platform (GCP) using Terraform, Kubernetes, Helm charts, and Argo CD. The reference architecture used in this guide is the Interledger Test Network. For this example we'll assume you are a digital wallet provider that wants to deploy your wallet application and Rafiki in your Kubernetes cluster. + +:::caution +As the Interledger Test Network is used to showcase Rafiki's functionalities and to serve as a sandbox environment, this example is intended for informational purposes only and should not be used for a production deployment. +::: + +## Prerequisites + +Before beginning the integration, ensure you have the following: + +- A GCP account with billing enabled +- The gcloud CLI installed and + authenticated +- Hashicorp Terraform installed +- A Kubernetes cluster deployed via GKE +- Helm installed +- kubectl installed + and configured +- The argocd CLI +- Domain name for your wallet (required for SSL/TLS certificates) + +## Architecture overview + +The deployment follows the Interledger Test Network reference architecture, which includes: + +- **GKE Cluster**: Managed Kubernetes cluster on GCP +- **Rafiki Services**: Core Interledger functionality (Admin API, Auth Service, Backend) +- **PostgreSQL**: Database for Rafiki and wallet data +- **Redis**: Caching and session management +- **NGINX Ingress**: Load balancing and SSL termination +- **Argo CD**: GitOps continuous deployment +- **Digital Wallet**: Your wallet application integrated with Rafiki +- **GateHub Integration**: Payment backend and ledger services + +## Reference architecture + +The following diagram illustrates the architecture based on the Interledger Test Network wallet application with GateHub integration: + + + + Rafiki + PostgreSQL1 --> Rafiki + + Rafiki --> AdminAPI + Rafiki --> AuthAPI + + AdminAPI --> WalletBackend + AuthAPI --> WalletBackend + + FreecurrencyAPI --> WalletBackend + GateHub --> WalletBackend + + WalletBackend --> PostgreSQL2 + WalletBackend --> WalletFrontend + + OpenPayments --> PostgreSQL2 + OpenPayments --> WalletBackend + + WalletFrontend --> ClientDevices + + %% Styling + classDef redisStyle fill:#DC382D,stroke:#333,stroke-width:2px,color:#fff + classDef postgresStyle fill:#336791,stroke:#333,stroke-width:2px,color:#fff + classDef rafikiStyle fill:#4ECDC4,stroke:#333,stroke-width:2px,color:#fff + classDef openPaymentsStyle fill:#00A86B,stroke:#333,stroke-width:2px,color:#fff + classDef apiStyle fill:#FF69B4,stroke:#333,stroke-width:2px,color:#fff + classDef walletBackendStyle fill:#E6F3FF,stroke:#333,stroke-width:2px + classDef frontendStyle fill:#E8F5E8,stroke:#333,stroke-width:2px + classDef clientStyle fill:#E6E6FA,stroke:#333,stroke-width:2px + classDef nodeStyle fill:#68A063,stroke:#333,stroke-width:2px,color:#fff + classDef gatehubStyle fill:#1E3A8A,stroke:#333,stroke-width:2px,color:#fff + + class Redis redisStyle + class PostgreSQL1,PostgreSQL2 postgresStyle + class Rafiki rafikiStyle + class OpenPayments openPaymentsStyle + class AdminAPI,AuthAPI apiStyle + class WalletBackend walletBackendStyle + class WalletFrontend frontendStyle + class ClientDevices clientStyle + class NodeJS nodeStyle + class GateHub,GateHubService gatehubStyle`} +/> + + + +## Component details + +### External services + +- **Redis**: Provides caching and session management for the Rafiki system +- **FreecurrencyAPI**: External service providing real-time currency exchange rates +- **GateHub**: Payment backend service providing ledger and payment processing capabilities + +### Database layer + +- **PostgreSQL (Rafiki)**: Primary database storing Interledger accounts, wallet addresses, and core transaction data +- **PostgreSQL (Open Payments)**: Dedicated database for Open Payments protocol data and wallet-specific information + +### Rafiki core services + +- **Rafiki**: Core Interledger payment engine handling protocol implementation and payment processing +- **Admin API**: GraphQL interface for administrative functions and account management +- **Auth API**: Authentication and authorization service implementing Open Payments standards + +### GKE cluster - wallet services + +#### Wallet backend namespace + +- **Wallet Backend**: Main Node.js/Express application orchestrating wallet functionality +- **Rates Service**: Handles currency exchange rate management and calculations +- **Webhook Service**: Manages incoming webhook notifications from external services +- **Rafiki Integration**: Interface layer for communication with Rafiki services +- **GateHub Service**: Integration component for GateHub payment backend +- **Internal Ledger System**: Manages internal transaction ledgering and accounting +- **IDP & Ledgering**: Identity provider integration and additional ledger management + +#### Wallet frontend namespace + +- **Wallet Frontend**: Next.js/React application providing the user interface + +#### Ingress layer + +- **NGINX Ingress**: Routes external traffic to appropriate services based on hostname and path +- **cert-manager**: Automatically provisions and manages SSL/TLS certificates + +#### Platform services + +- **Argo CD**: GitOps continuous deployment system managing application deployments +- **Monitoring**: Prometheus metrics collection and Grafana dashboards for system observability + +## Next steps + +1. **[Configuration variables](/integration/deploy-to-prod/02-configuration-variables)**: Review all variables that need customization for your environment +2. **[Secrets management](/integration/deploy-to-prod/03-secrets-management)**: Set up secure secret generation, storage, and rotation strategies +3. **[Infrastructure setup](/integration/deploy-to-prod/04-infrastructure-setup)**: Deploy GCP infrastructure using Terraform +4. **[Rafiki services deployment](/integration/deploy-to-prod/05-services-deployment)**: Install and configure all services using Helm and Argo CD +5. **[Troubleshooting](/integration/deploy-to-prod/06-troubleshooting)**: Verify end-to-end payment flows and resolve common issues diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/02-configuration-variables.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/02-configuration-variables.mdx new file mode 100644 index 0000000000..a965195053 --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/02-configuration-variables.mdx @@ -0,0 +1,372 @@ +--- +title: Configure variables +--- + +This document outlines all variables you must customize for you specific environment when deploying Rafiki with your digital wallet. + +## Google Cloud environment variables + +These are the primary variables you'll need to define for your deployment: + +
+ +| Variable | Example Value | Description | +| ----------------- | ------------------------------ | ------------------------------------------------------- | +| `YOUR_PROJECT_ID` | `my-wallet-project-123` | Your Google Cloud Project ID | +| `YOUR_DOMAIN` | `mywallet.com` | Your primary domain name | +| `YOUR_COMPANY` | `MyWallet Inc` | Your company/organization name | +| `YOUR_EMAIL` | `admin@mywallet.com` | Administrative email for certificates and notifications | +| `YOUR_REGION` | `us-central1` | Primary GCP region for deployment | +| `YOUR_REGISTRY` | `gcr.io/my-wallet-project-123` | Container registry for your wallet images | + +
+ +## Terraform variables + +_Found in: `terraform/variables.tf` (definitions) and `terraform.tfvars` (values)_ + +Configure your infrastructure deployment with these variables: + +
+ +| Variable | Default Value | Required | Description | +| ------------------------ | ----------------------- | -------- | ----------------------------------------------------- | +| `project_id` | - | Yes | GCP Project ID where resources will be created | +| `domain_name` | - | Yes | Primary domain for your wallet (e.g., `mywallet.com`) | +| `region` | `us-central1` | No | Primary GCP region for cluster and resources | +| `cluster_name` | `rafiki-wallet-cluster` | No | Name of the GKE cluster | +| `node_pool_machine_type` | `e2-standard-4` | No | GCE machine type for Kubernetes nodes | +| `min_node_count` | `1` | No | Minimum number of nodes in the cluster | +| `max_node_count` | `10` | No | Maximum number of nodes for autoscaling | +| `disk_size_gb` | `100` | No | Boot disk size for each node in GB | +| `enable_network_policy` | `true` | No | Enable Kubernetes network policies for security | + +
+ +## DNS configuration + +_Found in: DNS provider settings and `terraform/dns.tf`_ + +Set up your domain and subdomains according to the following convention: + +
+ +| Subdomain | Example FQDN | Purpose | Description | +| ------------ | ------------------------- | -------------- | --------------------------------- | +| `wallet` | `wallet.mywallet.com` | User Interface | Main wallet web application | +| `api` | `api.mywallet.com` | Wallet API | Wallet backend REST/GraphQL API | +| `auth` | `auth.mywallet.com` | Rafiki Auth | Rafiki authentication server | +| `backend` | `backend.mywallet.com` | Rafiki Backend | Rafiki payment processing backend | +| `admin` | `admin.mywallet.com` | Rafiki Admin | Rafiki administrative GraphQL API | +| `argocd` | `argocd.mywallet.com` | GitOps | Argo CD web interface | +| `grafana` | `grafana.mywallet.com` | Monitoring | Grafana dashboards | +| `prometheus` | `prometheus.mywallet.com` | Metrics | Prometheus metrics endpoint | + +
+ +## Database configuration + +### PostgreSQL variables + +
+ +| Variable | Example Value | Description | +| ------------------- | -------------------- | ------------------------------------------ | +| `POSTGRES_DB` | `rafiki` | Primary database name for Rafiki | +| `POSTGRES_USER` | `rafiki` | Database username for Rafiki services | +| `POSTGRES_PASSWORD` | `` | Database password (use Kubernetes secrets) | +| `WALLET_DB_NAME` | `wallet` | Database name for wallet application | +| `WALLET_DB_USER` | `wallet_user` | Database username for wallet application | +| `DB_HOST` | `rafiki-postgresql` | Database service hostname within cluster | +| `DB_PORT` | `5432` | Database port | +| `DB_SSL_MODE` | `require` | SSL mode for database connections | + +
+ +### Redis variables + +
+ +| Variable | Example Value | Description | +| ---------------- | ---------------------------------- | ------------------------------------------ | +| `REDIS_HOST` | `rafiki-redis-master` | Redis service hostname within cluster | +| `REDIS_PORT` | `6379` | Redis port | +| `REDIS_PASSWORD` | `` | Redis password (if authentication enabled) | +| `REDIS_DB` | `0` | Redis database number | +| `REDIS_URL` | `redis://rafiki-redis-master:6379` | Complete Redis connection URL | + +
+ +## Rafiki service configuration + +Configure your Rafiki services with these variables: + +### Rafiki auth service variables + +_Found in: `helm-values/rafiki/values.yaml` under `rafiki-auth.env` section_ + +
+ +| Variable | Example Value | Description | +| -------------------- | ---------------------------------------- | -------------------------------------- | +| `AUTH_DATABASE_URL` | `postgresql://rafiki:***@host:5432/auth` | Database connection for auth service | +| `AUTH_SERVER_URL` | `https://auth.mywallet.com` | Public URL for the auth server | +| `COOKIE_KEY` | `<32-byte-secret>` | Secret key for cookie encryption | +| `REDIS_URL` | `redis://redis:6379` | Redis connection for session storage | +| `NODE_ENVIRONMENT` | `production` | Runtime environment | +| `LOG_LEVEL` | `info` | Logging level | +| `TRUST_PROXY` | `true` | Enable when behind load balancer/proxy | +| `ADMIN_PORT` | `3003` | Port for auth admin API | +| `AUTH_PORT` | `3006` | Port for auth server | +| `INTROSPECTION_PORT` | `3007` | Port for token introspection | +| `INTERACTION_PORT` | `3009` | Port for user interaction flows | + +
+ +### Rafiki backend service variables + +_Found in: `helm-values/rafiki/values.yaml` under `rafiki-backend.env` section_ + +
+ +| Variable | Example Value | Description | +| -------------------- | ------------------------------------------ | ------------------------------------------ | +| `DATABASE_URL` | `postgresql://rafiki:***@host:5432/rafiki` | Main database connection | +| `REDIS_URL` | `redis://redis:6379` | Redis connection for caching | +| `WEBHOOK_URL` | `https://api.mywallet.com/webhooks/rafiki` | Webhook endpoint for payment notifications | +| `OPEN_PAYMENTS_URL` | `https://backend.mywallet.com` | Public URL for Open Payments API | +| `ILP_ADDRESS` | `test.mywallet` | ILP address for this instance | +| `ILP_CONNECTOR_URL` | `https://ilp.mywallet.com` | ILP connector endpoint | +| `EXCHANGE_RATES_URL` | `https://api.mywallet.com/rates` | Exchange rates endpoint | +| `PRIVATE_KEY_FILE` | `/path/to/private.key` | Private key file path | + +
+ +### Worker configuration variables + +_Found in: `helm-values/rafiki/values.yaml` under `rafiki-backend.env` section_ + +
+ +| Variable | Example Value | Description | +| ------------------------------ | ------------- | ------------------------------------------- | +| `OUTGOING_PAYMENT_WORKERS` | `1` | Number of outgoing payment workers | +| `INCOMING_PAYMENT_WORKERS` | `1` | Number of incoming payment workers | +| `WALLET_ADDRESS_WORKERS` | `1` | Number of wallet address processing workers | +| `WEBHOOK_WORKERS` | `1` | Number of webhook processing workers | +| `OUTGOING_PAYMENT_WORKER_IDLE` | `200` | Idle time (ms) for outgoing payment workers | +| `INCOMING_PAYMENT_WORKER_IDLE` | `200` | Idle time (ms) for incoming payment workers | +| `WALLET_ADDRESS_WORKER_IDLE` | `1000` | Idle time (ms) for wallet address workers | +| `WEBHOOK_WORKER_IDLE` | `200` | Idle time (ms) for webhook workers | + +
+ +### Additional Rafiki configuration + +_Found in: `helm-values/rafiki/values.yaml` under various sections (global, rafiki-backend.env)_ + +
+ +| Variable | Example Value | Description | +| --------------------- | --------------------------------------------- | ----------------------------------- | +| `PAYMENT_POINTER_URL` | `https://wallet.mywallet.com/.well-known/pay` | Payment pointer well-known endpoint | +| `AUTH_SERVER_SECRET` | `<32-byte-secret>` | Secret for auth server JWT signing | +| `USE_TIGERBEETLE` | `false` | Disable TigerBeetle for accounting | + +
+ +## TLS certificate configuration + +_Found in: `k8s-manifests/cert-manager/cluster-issuer.yaml`_ + +
+ +| Variable | Example Value | Description | +| --------------------- | ------------------------------------------------ | ------------------------------------------------ | +| `CERT_MANAGER_EMAIL` | `certificates@mywallet.com` | Email for Let's Encrypt certificate registration | +| `CLUSTER_ISSUER_NAME` | `letsencrypt-prod` | Name of the cert-manager cluster issuer | +| `ACME_SERVER` | `https://acme-v02.api.letsencrypt.org/directory` | ACME server URL (prod vs staging) | +| `CERT_SECRET_NAME` | `{service}-tls` | Pattern for TLS secret names | + +
+ +## Container image configuration + +_Found in: `helm-values/rafiki/values.yaml` and `helm-values/wallet/values.yaml` under image sections_ + +
+ +| Variable | Example Value | Description | +| ---------------------- | ------------------------------------------- | ---------------------------------- | +| `RAFIKI_AUTH_IMAGE` | `ghcr.io/interledger/rafiki-auth:latest` | Rafiki auth server container image | +| `RAFIKI_BACKEND_IMAGE` | `ghcr.io/interledger/rafiki-backend:latest` | Rafiki backend container image | +| `RAFIKI_ADMIN_IMAGE` | `ghcr.io/interledger/rafiki-admin:latest` | Rafiki admin API container image | +| `WALLET_UI_IMAGE` | `gcr.io/my-project/wallet-ui:v1.0.0` | Your wallet UI container image | +| `WALLET_API_IMAGE` | `gcr.io/my-project/wallet-api:v1.0.0` | Your wallet API container image | +| `POSTGRES_IMAGE` | `postgres:14` | PostgreSQL container image version | +| `REDIS_IMAGE` | `redis:7-alpine` | Redis container image version | + +
+ +## Resource limits + +_Found in: `helm-values/rafiki/values.yaml` and `helm-values/wallet/values.yaml` under resources sections_ + +
+ +| Resource Type | CPU Request | CPU Limit | Memory Request | Memory Limit | Description | +| -------------- | ----------- | --------- | -------------- | ------------ | ------------------------------------ | +| Rafiki Auth | `100m` | `500m` | `128Mi` | `512Mi` | Authentication server resources | +| Rafiki Backend | `200m` | `1000m` | `256Mi` | `1Gi` | Payment processing backend resources | +| Rafiki Admin | `100m` | `500m` | `128Mi` | `512Mi` | Admin API resources | +| Wallet API | `200m` | `1000m` | `256Mi` | `1Gi` | Wallet backend API resources | +| Wallet UI | `50m` | `200m` | `64Mi` | `256Mi` | Wallet frontend resources | +| PostgreSQL | `250m` | `1000m` | `256Mi` | `2Gi` | Database resources | +| Redis | `100m` | `500m` | `128Mi` | `512Mi` | Cache resources | + +
+ +## Storage configuration + +_Found in: `helm-values/rafiki/values.yaml` and `helm-values/wallet/values.yaml` under persistence sections_ + +
+ +| Storage Type | Size | Storage Class | Description | +| --------------- | ------- | ------------- | ---------------------------------------- | +| Rafiki Database | `20Gi` | `ssd` | Persistent storage for Rafiki PostgreSQL | +| Wallet Database | `50Gi` | `ssd` | Persistent storage for wallet PostgreSQL | +| Redis | `5Gi` | `ssd` | Persistent storage for Redis cache | +| Backup Storage | `100Gi` | `standard` | Storage for database backups | + +
+ +## Monitoring configuration + +_Found in: `k8s-manifests/monitoring/values.yaml`_ + +
+ +| Variable | Example Value | Description | +| ------------------------ | ----------------------------- | ------------------------------------- | +| `GRAFANA_ADMIN_PASSWORD` | `` | Grafana admin user password | +| `PROMETHEUS_RETENTION` | `15d` | How long to retain Prometheus metrics | +| `ALERTMANAGER_SLACK_URL` | `https://hooks.slack.com/...` | Slack webhook for alerts | +| `GRAFANA_SMTP_HOST` | `smtp.gmail.com:587` | SMTP server for Grafana notifications | +| `GRAFANA_SMTP_USER` | `notifications@mywallet.com` | SMTP username for email alerts | + +
+ +## Security configuration + +_Found in: `helm-values/rafiki/values.yaml` and `helm-values/wallet/values.yaml` under various security sections_ + +
+ +| Variable | Example Value | Description | +| ------------------------ | ------------- | ------------------------------------------- | +| `NETWORK_POLICY_ENABLED` | `true` | Enable Kubernetes network policies | +| `POD_SECURITY_STANDARD` | `restricted` | Pod security standard level | +| `SERVICE_ACCOUNT_NAME` | `rafiki-sa` | Kubernetes service account name | +| `RBAC_ENABLED` | `true` | Enable role-based access control | +| `ADMISSION_CONTROLLER` | `gatekeeper` | Admission controller for policy enforcement | + +
+ +## Backup configuration + +_Found in: `k8s-manifests/backup/postgres-backup.yaml`_ + +
+ +| Variable | Example Value | Description | +| ----------------------- | ----------------------- | ---------------------------------- | +| `BACKUP_SCHEDULE` | `0 2 * * *` | Cron schedule for database backups | +| `BACKUP_RETENTION_DAYS` | `30` | How many days to keep backups | +| `BACKUP_STORAGE_BUCKET` | `gs://mywallet-backups` | GCS bucket for storing backups | +| `BACKUP_ENCRYPTION_KEY` | `` | KMS key for backup encryption | + +
+ +## Environment-specific overrides + +### Development environment + +
+ +| Variable | Development Value | Description | +| ------------------- | ---------------------------------------------- | --------------------------------- | +| `ACME_SERVER` | `https://acme-staging-v02.api.letsencrypt.org` | Use Let's Encrypt staging for dev | +| `LOG_LEVEL` | `debug` | Enable debug logging | +| `REPLICA_COUNT` | `1` | Single replica for all services | +| `RESOURCE_REQUESTS` | `50% of production` | Lower resource requests | +| `STORAGE_SIZE` | `10Gi` | Smaller storage volumes | + +
+ +### Staging environment + +
+ +| Variable | Staging Value | Description | +| -------------------- | ------------------- | -------------------------------- | +| `REPLICA_COUNT` | `2` | Moderate replicas for testing | +| `RESOURCE_REQUESTS` | `75% of production` | Higher than dev, lower than prod | +| `MONITORING_ENABLED` | `true` | Full monitoring enabled | +| `BACKUP_ENABLED` | `false` | No backups in staging | + +
+ +### Production environment + +
+ +| Variable | Production Value | Description | +| ----------------------- | ----------------- | -------------------------- | +| `REPLICA_COUNT` | `3-5` | High availability replicas | +| `RESOURCE_REQUESTS` | `Full allocation` | Production resource limits | +| `MONITORING_ENABLED` | `true` | Comprehensive monitoring | +| `BACKUP_ENABLED` | `true` | Full backup strategy | +| `NETWORK_POLICIES` | `true` | Enhanced security | +| `POD_SECURITY_POLICIES` | `true` | Strict security policies | + +
+ +## DNS record configuration + +Configure these DNS A records pointing to your static IP: + +
+ | Name | Value | TTL | + | ---------------------------- | ---------------- | --- | + | `wallet.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `api.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `auth.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `backend.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `admin.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `argocd.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `grafana.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + | `prometheus.YOUR_DOMAIN.com` | `YOUR_STATIC_IP` | 300 | + +
+ +## Note about placeholders + +All configuration files use these placeholder patterns that you must replace: + +- `YOUR_DOMAIN.com` - Replace with your actual domain +- `YOUR_PROJECT_ID` - Replace with your GCP project ID +- `YOUR_REGISTRY` - Replace with your container registry +- `YOUR_EMAIL` - Replace with your administrative email +- `` - Generate using `openssl rand -base64 32` (refer to the [Secrets Management](/integration/deploy-to-prod/03-secrets-management) guide) +- `` - Generate strong passwords for admin accounts + +Ensure you systematically replace all placeholder values before deployment to avoid configuration errors. + +## Next steps + +Now that you understand all the configuration variables, proceed to: + +1. **[Secrets management](/integration/deploy-to-prod/03-secrets-management)**: Generate and securely store all required secrets before infrastructure deployment +2. **[Infrastructure setup](/integration/deploy-to-prod/04-infrastructure-setup)**: Deploy GCP infrastructure using Terraform +3. **[Services deployment](/integration/deploy-to-prod/05-services-deployment)**: Install and configure Rafiki and wallet services diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/03-secrets-management.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/03-secrets-management.mdx new file mode 100644 index 0000000000..5fc1048ed0 --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/03-secrets-management.mdx @@ -0,0 +1,84 @@ +--- +title: Secrets management +--- + +Proper secrets management is critical for securing your Rafiki deployment. This section covers all types of secrets used in the architecture. + +:::note +You should generate and prepare all secrets before beginning infrastructure deployment. +::: + +## Types of secrets + +### Database credentials + +Database credentials are used by all services to connect to PostgreSQL and Redis instances. + +
+ +| Secret Type | Usage | +| ------------------ | ----------------------- | +| PostgreSQL Admin | Database administration | +| PostgreSQL Service | Application connections | +| Redis Password | Cache connections | + +
+ +### Application Secrets + +Application-specific secrets used by Rafiki services for encryption and authentication. + +
+ +| Secret Type | Usage | +| -------------------- | ---------------------------- | +| `COOKIE_KEY` | Session cookie encryption | +| `AUTH_SERVER_SECRET` | JWT token signing | +| `WEBHOOK_SECRET` | Webhook signature validation | +| `PRIVATE_KEY_FILE` | ILP packet signing | + +
+ +### TLS certificates + +TLS certificates for securing communications between services and external clients. + +
+ +| Certificate Type | Usage | +| ---------------- | ---------------------- | +| Let's Encrypt | Public HTTPS endpoints | +| Internal CA | Service-to-service | + +
+ +### External service credentials + +Credentials for connecting to external services and APIs. + +
+ +| Credential Type | Usage | +| ------------------- | -------------------- | +| GCP Service Account | Google Cloud APIs | +| Container Registry | Image pulls | +| Monitoring APIs | Metrics and alerting | + +
+ +## Best practices + +- **Generate strong secrets**: Use cryptographically secure methods to generate secrets +- **Secure Kubernetes namespaces**: Create Kubernetes secrets for each namespace +- **Use a secrets manager**: For enhanced security, store secrets in Google Cloud Secrets Manager or Hashicorp Vault +- **Rotate secrets on a regular basis**: Automate secret rotations with CronJobs + +## Next steps + +With your secrets securely generated and managed: + +1. **Secure storage**: Store your generated secrets in a secure location (not in Git!) +2. **[Setup infrastructure](/integration/deploy-to-prod/04-infrastructure-setup)**: Deploy GCP infrastructure using Terraform - you'll need these secrets during the deployment process +3. **[Deploy services](/integration/deploy-to-prod/05-services-deployment)**: Configure and deploy Rafiki and wallet services using your pre-generated secrets + +Proper secrets management is foundational to a secure Rafiki deployment. Regular rotation, secure storage, and comprehensive monitoring ensure your payment infrastructure remains protected. diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/04-infrastructure-setup.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/04-infrastructure-setup.mdx new file mode 100644 index 0000000000..b20a295187 --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/04-infrastructure-setup.mdx @@ -0,0 +1,270 @@ +--- +title: Infrastructure setup +--- + +This guide covers the infrastructure setup phase, including Terraform configuration, Google Kubernetes Engine (GKE) cluster deployment, and core Kubernetes components. + +:::caution +Before starting infrastructure deployment, ensure you have completed [configuration variables](/integration/deploy-to-prod/02-configuration-variables) review and [secrets management](/integration/deploy-to-prod/03-secrets-management) setup. You'll need generated secrets ready for the deployment process. +::: + +## Project structure + +Create the following directory structure for your infrastructure: + +``` +rafiki-wallet-infrastructure/ +├── terraform/ +│ ├── main.tf +│ ├── variables.tf +│ ├── outputs.tf +│ ├── gke.tf +│ ├── networking.tf +│ └── dns.tf +├── k8s-manifests/ +│ ├── argocd/ +│ ├── ingress-nginx/ +│ └── cert-manager/ +└── helm-values/ + ├── rafiki/ + └── wallet/ +``` + +## Terraform configuration + +### Main configuration + +Configure the Terraform providers and backend: + +```hcl +# terraform/main.tf +terraform { + required_version = ">= 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 4.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +data "google_client_config" "default" {} + +provider "kubernetes" { + host = "https://${google_container_cluster.primary.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate) +} +``` + +### Variables definition + +Define all the variables needed for your deployment: + +```hcl +# terraform/variables.tf +variable "project_id" { + description = "GCP Project ID" + type = string +} + +variable "region" { + description = "GCP Region" + type = string + default = "us-central1" +} + +variable "cluster_name" { + description = "GKE Cluster name" + type = string + default = "rafiki-wallet-cluster" +} + +variable "domain_name" { + description = "Domain name for the wallet" + type = string +} + +variable "node_pool_machine_type" { + description = "Machine type for GKE nodes" + type = string + default = "e2-standard-4" +} + +variable "min_node_count" { + description = "Minimum number of nodes in the cluster" + type = number + default = 1 +} + +variable "max_node_count" { + description = "Maximum number of nodes for autoscaling" + type = number + default = 10 +} + +variable "disk_size_gb" { + description = "Boot disk size for each node in GB" + type = number + default = 100 +} + +variable "enable_network_policy" { + description = "Enable Kubernetes network policies" + type = bool + default = true +} +``` + +### GKE cluster configuration + +Create a GKE cluster with security and scalability features: + +```hcl +# terraform/gke.tf +resource "google_container_cluster" "primary" { + name = var.cluster_name + location = var.region + + # We can't create a cluster with no node pool defined, but we want to only use + # separately managed node pools. So we create the smallest possible default + # node pool and immediately delete it. + remove_default_node_pool = true + initial_node_count = 1 + + network = google_compute_network.vpc.name + subnetwork = google_compute_subnetwork.subnet.name + + workload_identity_config { + workload_pool = "${var.project_id}.svc.id.goog" + } + + addons_config { + http_load_balancing { + disabled = false + } + horizontal_pod_autoscaling { + disabled = false + } + } + + network_policy { + enabled = var.enable_network_policy + } + + # Enable network policy addon if network policy is enabled + dynamic "addons_config" { + for_each = var.enable_network_policy ? [1] : [] + content { + network_policy_config { + disabled = false + } + } + } +} + +resource "google_container_node_pool" "primary_nodes" { + name = "${var.cluster_name}-node-pool" + location = var.region + cluster = google_container_cluster.primary.name + node_count = var.min_node_count + + node_config { + preemptible = false + machine_type = var.node_pool_machine_type + disk_size_gb = var.disk_size_gb + + service_account = google_service_account.kubernetes.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + workload_metadata_config { + mode = "GKE_METADATA" + } + + # Security settings + shielded_instance_config { + enable_secure_boot = true + enable_integrity_monitoring = true + } + } + + autoscaling { + min_node_count = var.min_node_count + max_node_count = var.max_node_count + } + + management { + auto_repair = true + auto_upgrade = true + } +} + +resource "google_service_account" "kubernetes" { + account_id = "${var.cluster_name}-sa" + display_name = "GKE Service Account for ${var.cluster_name}" +} + +# IAM binding for the service account +resource "google_project_iam_member" "kubernetes" { + project = var.project_id + role = "roles/container.nodeServiceAccount" + member = "serviceAccount:${google_service_account.kubernetes.email}" +} +``` + +### Networking configuration + +Set up virtual private cloud (VPC) networking with proper IP ranges and firewall rules: + +```hcl +# terraform/networking.tf +resource "google_compute_network" "vpc" { + name = "${var.cluster_name}-vpc" + auto_create_subnetworks = "false" +} + +resource "google_compute_subnetwork" "subnet" { + name = "${var.cluster_name}-subnet" + region = var.region + network = google_compute_network.vpc.name + ip_cidr_range = "10.10.0.0/24" + + secondary_ip_range { + range_name = "services-range" + ip_cidr_range = "192.168.1.0/24" + } + + secondary_ip_range { + range_name = "pod-ranges" + ip_cidr_range = "192.168.64.0/22" + } +} + +resource "google_compute_global_address" "ingress_ip" { + name = "${var.cluster_name}-ingress-ip" +} + +resource "google_compute_firewall" "allow_ingress" { + name = "${var.cluster_name}-allow-ingress" + network = google_compute_network.vpc.name + + allow { + protocol = "tcp" + ports = ["80", "443"] + } + + source_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} +``` diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/05-services-deployment.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/05-services-deployment.mdx new file mode 100644 index 0000000000..f005514aa7 --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/05-services-deployment.mdx @@ -0,0 +1,964 @@ +--- +title: Services deployment +--- + +This guide covers deploying Rafiki services and your digital wallet application using Helm charts and Argo CD. + +## Rafiki deployment + +Set up the Rafiki namespace and required secrets: + +:::note +If you followed the recommended sequence, you should have already generated your secrets using the [secrets management guide](/integration/deploy-to-prod/03-secrets-management). The commands below assume you have the secrets ready to deploy. +::: + +```bash +# Create Rafiki namespace +kubectl create namespace rafiki + +# Create database credentials +kubectl create secret generic rafiki-db-secret \ + --from-literal=username=rafiki \ + --from-literal=password=$(openssl rand -base64 32) \ + --from-literal=postgres-password=$(openssl rand -base64 32) \ + --namespace rafiki + +# Create application secrets +kubectl create secret generic rafiki-secrets \ + --from-literal=auth-server-secret=$(openssl rand -base64 32) \ + --from-literal=cookie-key=$(openssl rand -base64 32) \ + --from-literal=webhook-secret=$(openssl rand -base64 32) \ + --namespace rafiki + +# Verify secrets +kubectl get secrets -n rafiki +``` + +### Rafiki Helm configuration + +Configure the Rafiki services with the following settings: + +```yaml +# helm-values/rafiki/values.yaml - CUSTOMIZE all YOUR_DOMAIN references +global: + image: + registry: ghcr.io + tag: 'latest' + +postgresql: + enabled: true + primary: + persistence: + size: 20Gi + storageClass: 'ssd' # Use SSD for better performance + auth: + existingSecret: rafiki-db-secret + secretKeys: + adminPasswordKey: postgres-password + userPasswordKey: password + metrics: + enabled: true # Enable PostgreSQL metrics + +redis: + enabled: true + architecture: standalone + auth: + enabled: false + master: + persistence: + size: 5Gi + metrics: + enabled: true # Enable Redis metrics + +rafiki-auth: + enabled: true + image: + repository: interledger/rafiki-auth + replicaCount: 2 # High availability + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/rate-limit: '100' # Rate limiting + hosts: + - host: auth.YOUR_DOMAIN.com # Replace with your domain + paths: + - path: / + pathType: Prefix + tls: + - secretName: rafiki-auth-tls + hosts: + - auth.YOUR_DOMAIN.com # Replace with your domain + env: + AUTH_DATABASE_URL: 'postgresql://rafiki:$(POSTGRES_PASSWORD)@rafiki-postgresql:5432/rafiki' + REDIS_URL: 'redis://rafiki-redis-master:6379' + COOKIE_KEY: + valueFrom: + secretKeyRef: + name: rafiki-secrets + key: cookie-key + TRUST_PROXY: 'true' # Enable proxy trust for GCP Load Balancer + NODE_ENVIRONMENT: 'production' # Set environment mode + LOG_LEVEL: 'info' # Set logging level + +rafiki-backend: + enabled: true + image: + repository: interledger/rafiki-backend + replicaCount: 3 # Scale for payment processing + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 1000m + memory: 1Gi + ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/rate-limit: '1000' # Higher rate limit for payments + hosts: + - host: backend.YOUR_DOMAIN.com # Replace with your domain + paths: + - path: / + pathType: Prefix + tls: + - secretName: rafiki-backend-tls + hosts: + - backend.YOUR_DOMAIN.com # Replace with your domain + env: + DATABASE_URL: 'postgresql://rafiki:$(POSTGRES_PASSWORD)@rafiki-postgresql:5432/rafiki' + REDIS_URL: 'redis://rafiki-redis-master:6379' + WEBHOOK_URL: 'https://api.YOUR_DOMAIN.com/webhooks/rafiki' # Replace with your domain + OPEN_PAYMENTS_URL: 'https://backend.YOUR_DOMAIN.com' # Replace with your domain + ILP_ADDRESS: 'test.YOUR_DOMAIN' # Replace with your ILP address + ILP_CONNECTOR_URL: 'https://ilp.YOUR_DOMAIN.com' # Replace with your domain + EXCHANGE_RATES_URL: 'https://api.YOUR_DOMAIN.com/rates' # Replace with your domain + TRUST_PROXY: 'true' + NODE_ENVIRONMENT: 'production' + LOG_LEVEL: 'info' + # Worker configuration + OUTGOING_PAYMENT_WORKERS: '1' + INCOMING_PAYMENT_WORKERS: '1' + WALLET_ADDRESS_WORKERS: '1' + WEBHOOK_WORKERS: '1' + # Worker idle times (milliseconds) + OUTGOING_PAYMENT_WORKER_IDLE: '200' + INCOMING_PAYMENT_WORKER_IDLE: '200' + WALLET_ADDRESS_WORKER_IDLE: '1000' + WEBHOOK_WORKER_IDLE: '200' + # TigerBeetle configuration + USE_TIGERBEETLE: 'true' + TIGERBEETLE_CLUSTER_ID: '0' + TIGERBEETLE_REPLICA_ADDRESSES: '10.5.0.50:4342' + # Private key configuration + # PRIVATE_KEY_FILE: "/path/to/private.key" # Uncomment and configure as needed + # Volume mounts for private key file (if needed) + # volumes: + # - name: private-key + # secret: + # secretName: rafiki-private-key + # volumeMounts: + # - name: private-key + # mountPath: /path/to/private.key + # subPath: private.key + # readOnly: true + +rafiki-admin: + enabled: true + image: + repository: interledger/rafiki-admin + replicaCount: 2 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/rate-limit: '50' # Lower rate limit for admin + hosts: + - host: admin.YOUR_DOMAIN.com # Replace with your domain + paths: + - path: / + pathType: Prefix + tls: + - secretName: rafiki-admin-tls + hosts: + - admin.YOUR_DOMAIN.com # Replace with your domain + env: + DATABASE_URL: 'postgresql://rafiki:$(POSTGRES_PASSWORD)@rafiki-postgresql:5432/rafiki' + TRUST_PROXY: 'true' + +# Monitoring and observability +serviceMonitor: + enabled: true # Enable Prometheus monitoring + +# Network policies for security +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - from: + - namespaceSelector: + matchLabels: + name: monitoring +``` + +### Deploy Rafiki with Argo CD + +Create an Argo CD application for Rafiki deployment: + +```yaml +# k8s-manifests/argocd/rafiki-application.yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: rafiki + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://interledger.github.io/charts/interledger + chart: rafiki + targetRevision: '*' + helm: + valueFiles: + - values.yaml + values: | + # Include your customized values here or reference from Git repo + global: + image: + registry: ghcr.io + tag: "latest" + # Add other values from helm-values/rafiki/values.yaml + destination: + server: https://kubernetes.default.svc + namespace: rafiki + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m +``` + +Deploy Rafiki using Argo CD: + +```bash +# Deploy Rafiki using Argo CD +kubectl apply -f k8s-manifests/argocd/rafiki-application.yaml + +# Monitor deployment +kubectl get applications -n argocd +argocd app sync rafiki +argocd app wait rafiki --health +``` + +## Digital wallet deployment + +Set up the wallet namespace and required secrets: + +```bash +# Create wallet namespace +kubectl create namespace wallet + +# Create wallet database secret +kubectl create secret generic wallet-db-secret \ + --from-literal=username=wallet \ + --from-literal=password=$(openssl rand -base64 32) \ + --from-literal=postgres-password=$(openssl rand -base64 32) \ + --from-literal=database-url="postgresql://wallet:$(openssl rand -base64 32)@wallet-postgresql:5432/wallet" \ + --namespace wallet + +# Create wallet application secrets +kubectl create secret generic wallet-secrets \ + --from-literal=jwt-secret=$(openssl rand -base64 32) \ + --from-literal=webhook-secret=$(openssl rand -base64 32) \ + --from-literal=session-secret=$(openssl rand -base64 32) \ + --namespace wallet + +# Create Redis secret +kubectl create secret generic wallet-redis-secret \ + --from-literal=password=$(openssl rand -base64 32) \ + --namespace wallet + +# If using private container registry, create image pull secret +kubectl create secret docker-registry gcr-json-key \ + --docker-server=gcr.io \ + --docker-username=_json_key \ + --docker-password="$(cat path/to/service-account-key.json)" \ + --docker-email=YOUR_EMAIL@YOUR_DOMAIN.com \ + --namespace wallet +``` + +### Wallet Helm configuration + +Configure your digital wallet application: + +```yaml +# helm-values/wallet/values.yaml - CUSTOMIZE all YOUR_DOMAIN and YOUR_REGISTRY references +replicaCount: 3 # High availability + +image: + repository: YOUR_REGISTRY/digital-wallet # Replace with your container registry + tag: 'v1.0.0' # Replace with your wallet version + pullPolicy: IfNotPresent + +imagePullSecrets: + - name: gcr-json-key # If using private registry + +service: + type: ClusterIP + port: 80 + targetPort: 3000 + +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/rate-limit: '1000' + nginx.ingress.kubernetes.io/ssl-redirect: 'true' + hosts: + - host: wallet.YOUR_DOMAIN.com # Replace with your domain + paths: + - path: / + pathType: Prefix + tls: + - secretName: wallet-tls + hosts: + - wallet.YOUR_DOMAIN.com # Replace with your domain + +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 1000m + memory: 1Gi + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + +config: + rafiki: + authServerUrl: 'https://auth.YOUR_DOMAIN.com' # Replace with your domain + backendUrl: 'https://backend.YOUR_DOMAIN.com' # Replace with your domain + adminUrl: 'https://admin.YOUR_DOMAIN.com' # Replace with your domain + database: + url: 'postgresql://wallet:$(POSTGRES_PASSWORD)@wallet-postgresql:5432/wallet' + redis: + url: 'redis://wallet-redis-master:6379' + +# Separate PostgreSQL for wallet data +postgresql: + enabled: true + nameOverride: 'wallet-postgresql' + primary: + persistence: + size: 50Gi + storageClass: 'ssd' + auth: + database: 'wallet' + username: 'wallet' + existingSecret: wallet-db-secret + secretKeys: + adminPasswordKey: postgres-password + userPasswordKey: password + +# Separate Redis for wallet sessions +redis: + enabled: true + nameOverride: 'wallet-redis' + architecture: standalone + auth: + enabled: true + existingSecret: wallet-redis-secret + existingSecretPasswordKey: password + +# Environment variables for wallet application +env: + - name: RAFIKI_AUTH_SERVER_URL + value: 'https://auth.YOUR_DOMAIN.com' # Replace with your domain + - name: RAFIKI_BACKEND_URL + value: 'https://backend.YOUR_DOMAIN.com' # Replace with your domain + - name: RAFIKI_ADMIN_URL + value: 'https://admin.YOUR_DOMAIN.com' # Replace with your domain + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: wallet-db-secret + key: database-url + - name: REDIS_URL + value: 'redis://wallet-redis-master:6379' + - name: JWT_SECRET + valueFrom: + secretKeyRef: + name: wallet-secrets + key: jwt-secret + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: wallet-secrets + key: webhook-secret + - name: NODE_ENV + value: 'production' + - name: LOG_LEVEL + value: 'info' +``` + +### Create wallet API ingress + +Set up additional ingress for wallet API: + +```yaml +# k8s-manifests/wallet/api-ingress.yaml - CUSTOMIZE host field +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wallet-api-ingress + namespace: wallet + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/ssl-redirect: 'true' + nginx.ingress.kubernetes.io/rate-limit: '500' +spec: + ingressClassName: nginx + tls: + - hosts: + - api.YOUR_DOMAIN.com # Replace with your domain + secretName: wallet-api-tls + rules: + - host: api.YOUR_DOMAIN.com # Replace with your domain + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: wallet-api + port: + number: 80 +``` + +### Deploy wallet with Argo CD + +Create an Argo CD application for wallet deployment: + +```yaml +# k8s-manifests/argocd/wallet-application.yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: digital-wallet + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://github.com/YOUR_ORG/wallet-helm-charts # Replace with your repo + path: charts/digital-wallet + targetRevision: HEAD + helm: + valueFiles: + - values.yaml + destination: + server: https://kubernetes.default.svc + namespace: wallet + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m +``` + +Deploy the wallet application: + +```bash +# Apply wallet API ingress +kubectl apply -f k8s-manifests/wallet/api-ingress.yaml + +# Deploy wallet using Argo CD +kubectl apply -f k8s-manifests/argocd/wallet-application.yaml + +# Monitor deployment +kubectl get applications -n argocd +argocd app sync digital-wallet +argocd app wait digital-wallet --health +``` + +## Service verification + +### Health checks + +Verify all services are running correctly: + +```bash +# Check Rafiki service health endpoints +curl -k https://auth.YOUR_DOMAIN.com/health +curl -k https://backend.YOUR_DOMAIN.com/health +curl -k https://admin.YOUR_DOMAIN.com/health + +# Check wallet application +curl -k https://wallet.YOUR_DOMAIN.com/health +curl -k https://api.YOUR_DOMAIN.com/health + +# Verify payment pointer resolution +curl -k https://backend.YOUR_DOMAIN.com/.well-known/pay +``` + +### Database Initialization + +Initialize databases with required schemas: + +```bash +# Connect to Rafiki database and run migrations (if needed) +kubectl exec -it -n rafiki deployment/rafiki-admin -- npm run migrate + +# Verify database tables +kubectl exec -it -n rafiki rafiki-postgresql-0 -- psql -U rafiki -d rafiki -c "\dt" + +# Connect to wallet database and run migrations +kubectl exec -it -n wallet deployment/wallet-api -- npm run migrate + +# Verify wallet database +kubectl exec -it -n wallet wallet-postgresql-0 -- psql -U wallet -d wallet -c "\dt" +``` + +## Monitoring setup + +### Install monitoring stack + +Configure monitoring with Prometheus and Grafana: + +```yaml +# k8s-manifests/monitoring/values.yaml - CUSTOMIZE domain references +prometheus: + prometheusSpec: + retention: 15d + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: ssd + accessModes: ['ReadWriteOnce'] + resources: + requests: + storage: 30Gi + ingress: + enabled: true + ingressClassName: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + hosts: + - prometheus.YOUR_DOMAIN.com # Replace with your domain + tls: + - secretName: prometheus-tls + hosts: + - prometheus.YOUR_DOMAIN.com # Replace with your domain + +grafana: + adminPassword: 'CHANGE_THIS_PASSWORD' # Replace with secure password + ingress: + enabled: true + ingressClassName: nginx + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + hosts: + - grafana.YOUR_DOMAIN.com # Replace with your domain + tls: + - secretName: grafana-tls + hosts: + - grafana.YOUR_DOMAIN.com # Replace with your domain + + # SMTP configuration for alerts + smtp: + enabled: true + host: 'smtp.gmail.com:587' # Replace with your SMTP server + user: 'notifications@YOUR_DOMAIN.com' # Replace with your email + password: 'YOUR_SMTP_PASSWORD' # Replace with your SMTP password + from_address: 'notifications@YOUR_DOMAIN.com' # Replace with your email + +alertmanager: + alertmanagerSpec: + storage: + volumeClaimTemplate: + spec: + storageClassName: ssd + accessModes: ['ReadWriteOnce'] + resources: + requests: + storage: 5Gi + config: + global: + smtp_smarthost: 'smtp.gmail.com:587' # Replace with your SMTP server + smtp_from: 'alerts@YOUR_DOMAIN.com' # Replace with your email + route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'web.hook' + receivers: + - name: 'web.hook' + slack_configs: + - api_url: 'YOUR_SLACK_WEBHOOK_URL' # Replace with your Slack webhook + channel: '#alerts' + title: 'Rafiki Wallet Alert' +``` + +Install the monitoring stack: + +```bash +# Add Prometheus community repo +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update + +# Install kube-prometheus-stack with custom values +helm install monitoring prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + --create-namespace \ + -f k8s-manifests/monitoring/values.yaml + +# Verify monitoring installation +kubectl get pods -n monitoring +kubectl get svc -n monitoring +kubectl get ingress -n monitoring +``` + +## Post-deployment configuration + +### Initialize Rafiki assets + +Create initial assets and configuration for Rafiki: + +```bash +# Create a script to initialize Rafiki with basic assets +cat > initialize-rafiki.sh << 'EOF' +#!/bin/bash + +# Get admin API endpoint +ADMIN_URL="https://admin.YOUR_DOMAIN.com" + +# Create USD asset +curl -X POST $ADMIN_URL/graphql \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -d '{ + "query": "mutation CreateAsset($input: CreateAssetInput!) { createAsset(input: $input) { asset { id code scale } } }", + "variables": { + "input": { + "code": "USD", + "scale": 2 + } + } + }' + +# Create EUR asset +curl -X POST $ADMIN_URL/graphql \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -d '{ + "query": "mutation CreateAsset($input: CreateAssetInput!) { createAsset(input: $input) { asset { id code scale } } }", + "variables": { + "input": { + "code": "EUR", + "scale": 2 + } + } + }' + +echo "Assets created successfully" +EOF + +chmod +x initialize-rafiki.sh +``` + +### Set up backup jobs + +Configure automated database backups: + +```yaml +# k8s-manifests/backup/postgres-backup.yaml - CUSTOMIZE GCS bucket and project ID +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup + namespace: rafiki +spec: + schedule: '0 2 * * *' # Daily at 2 AM + jobTemplate: + spec: + template: + spec: + serviceAccountName: backup-sa + containers: + - name: postgres-backup + image: google/cloud-sdk:alpine + command: + - /bin/bash + - -c + - | + BACKUP_DATE=$(date +%Y%m%d-%H%M%S) + + # Backup Rafiki database + pg_dump $RAFIKI_DATABASE_URL | gzip > /tmp/rafiki-backup-${BACKUP_DATE}.sql.gz + gsutil cp /tmp/rafiki-backup-${BACKUP_DATE}.sql.gz gs://YOUR_PROJECT-backups/rafiki/ + + # Backup Wallet database + pg_dump $WALLET_DATABASE_URL | gzip > /tmp/wallet-backup-${BACKUP_DATE}.sql.gz + gsutil cp /tmp/wallet-backup-${BACKUP_DATE}.sql.gz gs://YOUR_PROJECT-backups/wallet/ + + # Cleanup old backups (keep last 30 days) + gsutil -m rm gs://YOUR_PROJECT-backups/rafiki/rafiki-backup-$(date -d '30 days ago' +%Y%m%d)*.sql.gz || true + gsutil -m rm gs://YOUR_PROJECT-backups/wallet/wallet-backup-$(date -d '30 days ago' +%Y%m%d)*.sql.gz || true + env: + - name: RAFIKI_DATABASE_URL + valueFrom: + secretKeyRef: + name: rafiki-db-secret + key: database-url + - name: WALLET_DATABASE_URL + valueFrom: + secretKeyRef: + name: wallet-db-secret + key: database-url + restartPolicy: OnFailure +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: backup-sa + namespace: rafiki + annotations: + iam.gke.io/gcp-service-account: backup-sa@YOUR_PROJECT_ID.iam.gserviceaccount.com # Replace with your project ID +``` + +Create the backup infrastructure: + +```bash +# Create GCS bucket for backups +gsutil mb gs://YOUR_PROJECT-backups + +# Apply backup job +kubectl apply -f k8s-manifests/backup/postgres-backup.yaml + +# Test backup job manually +kubectl create job --from=cronjob/postgres-backup manual-backup -n rafiki +``` + +## Security hardening + +### Network policies + +Implement network policies for enhanced security: + +```yaml +# k8s-manifests/security/network-policies.yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rafiki-network-policy + namespace: rafiki +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - from: + - namespaceSelector: + matchLabels: + name: wallet + ports: + - protocol: TCP + port: 80 + egress: + - to: [] + ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + - to: + - namespaceSelector: + matchLabels: + name: wallet +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: wallet-network-policy + namespace: wallet +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - from: + - namespaceSelector: + matchLabels: + name: rafiki + egress: + - to: [] + ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + - to: + - namespaceSelector: + matchLabels: + name: rafiki +``` + +Apply network policies: + +```bash +# Label namespaces for network policies +kubectl label namespace ingress-nginx name=ingress-nginx +kubectl label namespace rafiki name=rafiki +kubectl label namespace wallet name=wallet + +# Apply network policies +kubectl apply -f k8s-manifests/security/network-policies.yaml +``` + +## Deployment verification + +### Service status verification + +Check all services are running correctly: + +```bash +# Verify all pods are running +kubectl get pods -n rafiki +kubectl get pods -n wallet +kubectl get pods -n monitoring + +# Check services +kubectl get svc -n rafiki +kubectl get svc -n wallet + +# Check ingress +kubectl get ingress -A + +# Check certificates +kubectl get certificates -A +``` + +## Service URLs summary + +Your deployed services are available at these URLs: + +| Service | URL | Purpose | +| ------------------ | ------------------------------------ | ------------------------------ | +| **Wallet UI** | `https://wallet.YOUR_DOMAIN.com` | User-facing wallet application | +| **Wallet API** | `https://api.YOUR_DOMAIN.com` | Wallet backend API | +| **Rafiki Auth** | `https://auth.YOUR_DOMAIN.com` | Authentication server | +| **Rafiki Backend** | `https://backend.YOUR_DOMAIN.com` | Payment processing | +| **Rafiki Admin** | `https://admin.YOUR_DOMAIN.com` | Administrative API | +| **Argo CD** | `https://argocd.YOUR_DOMAIN.com` | GitOps management | +| **Grafana** | `https://grafana.YOUR_DOMAIN.com` | Monitoring dashboards | +| **Prometheus** | `https://prometheus.YOUR_DOMAIN.com` | Metrics collection | + +## Performance optimization + +### Database performance tuning + +Optimize PostgreSQL settings: + +```sql +-- Connect to PostgreSQL and optimize settings +-- For Rafiki database +ALTER SYSTEM SET shared_buffers = '256MB'; +ALTER SYSTEM SET effective_cache_size = '1GB'; +ALTER SYSTEM SET maintenance_work_mem = '64MB'; +ALTER SYSTEM SET checkpoint_completion_target = 0.9; +ALTER SYSTEM SET wal_buffers = '16MB'; +ALTER SYSTEM SET default_statistics_target = 100; + +-- Reload configuration +SELECT pg_reload_conf(); + +-- Create indexes for better performance +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_payments_wallet_address_id ON payments(wallet_address_id); +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_payments_created_at ON payments(created_at); +``` + +### Resource scaling + +Monitor and adjust resource allocations based on actual usage: + +```bash +# Check resource usage +kubectl top nodes +kubectl top pods -n rafiki +kubectl top pods -n wallet + +# Scale deployments based on load +kubectl scale deployment rafiki-backend --replicas=5 -n rafiki +kubectl scale deployment wallet-api --replicas=5 -n wallet + +# Enable horizontal pod autoscaling if not already enabled +kubectl autoscale deployment rafiki-backend --cpu-percent=70 --min=3 --max=10 -n rafiki +kubectl autoscale deployment wallet-api --cpu-percent=70 --min=3 --max=10 -n wallet +``` + +## Next steps + +With your services deployed, you can now: + +1. **Configure monitoring alerts**: Set up alerting rules for monitoring +2. **Implement backup verification**: Test backup and restore procedures +3. **Test performance**: Conduct load testing to validate scaling configuration +4. **Review security**: Perform security audit and penetration testing +5. **[Troubleshoot issues](/integration/deploy-to-prod/06-troubleshooting)**: Address any deployment issues + +Your Rafiki infrastructure is now fully deployed and ready for integration with your digital wallet application! diff --git a/packages/documentation/src/content/docs/integration/deploy-to-prod/06-troubleshooting.mdx b/packages/documentation/src/content/docs/integration/deploy-to-prod/06-troubleshooting.mdx new file mode 100644 index 0000000000..9eeac3583a --- /dev/null +++ b/packages/documentation/src/content/docs/integration/deploy-to-prod/06-troubleshooting.mdx @@ -0,0 +1,181 @@ +--- +title: Troubleshooting +--- + +This guide helps diagnose and resolve common issues when deploying and integrating Rafiki with your digital wallet. + +## Pre-deployment checklist + +Before troubleshooting issues, ensure you've completed all required customizations: + +### Required customizations + +
+ +| File/Location | Variables to Update | +| ----------------------------- | ------------------------------------- | +| `terraform.tfvars` | `project_id`, `domain_name`, `region` | +| `cluster-issuer.yaml` | `email` field | +| `argocd/ingress.yaml` | `host` fields | +| `ingress-nginx/values.yaml` | `loadBalancerIP` | +| `rafiki/values.yaml` | All `YOUR_DOMAIN.com` references | +| `wallet/values.yaml` | `YOUR_DOMAIN.com`, `YOUR_REGISTRY` | +| `monitoring/values.yaml` | Domain references, passwords, SMTP | +| `backup/postgres-backup.yaml` | GCS bucket, project ID | +| Environment secrets | Database passwords, API tokens | +| DNS records | Point domains to static IP | + +
+ +### Security checklist + +
+ +| Security Item | Description | +| ------------------ | -------------------------------------------------------- | +| TLS Certificates | Let's Encrypt configured for all domains | +| Database Passwords | Strong, randomly generated passwords | +| API Secrets | 32-byte secrets for auth and webhooks | +| Network Policies | Enabled to restrict pod-to-pod communication | +| RBAC | Proper service accounts and permissions | +| Image Security | Using official images with known vulnerabilities patched | +| Backup Encryption | KMS encryption for backup data | + +
+ +## Common issues and solutions + +### Infrastructure issues + +#### Issue: Error creating cluster + +**Solutions:** + +1. **Check GCP permissions:** + + ```bash + # Verify current user has required permissions + gcloud auth list + gcloud projects get-iam-policy PROJECT_ID + + # Add required roles + gcloud projects add-iam-policy-binding PROJECT_ID \ + --member="user:your-email@domain.com" \ + --role="roles/container.admin" + ``` + +2. **Enable required APIs:** + + ```bash + gcloud services enable container.googleapis.com + gcloud services enable compute.googleapis.com + gcloud services enable dns.googleapis.com + ``` + +3. **Check quota limits:** + ```bash + gcloud compute project-info describe --project=PROJECT_ID + ``` + +#### Issue: GKE cluster creation hangs + +**Symptoms:** + +- Terraform hangs on cluster creation +- Cluster shows "PROVISIONING" status for extended time + +**Solutions:** + +1. **Check region availability:** + + ```bash + # List available zones in region + gcloud compute zones list --filter="region:us-central1" + + # Try different region + terraform apply -var="region=us-east1" + ``` + +2. **Reduce initial node count:** + + ```bash + terraform apply -var="min_node_count=1" + ``` + +3. **Check for resource conflicts:** + + ```bash + # List existing clusters + gcloud container clusters list + + # Clean up if needed + gcloud container clusters delete OLD_CLUSTER_NAME --region=REGION + ``` + +### DNS and certificate issues + +#### Issue: certificate not issued + +**Symptoms:** + +- Certificate shows `READY: False` status +- TLS certificate secret not created +- HTTPS connections fail with certificate errors +- Let's Encrypt certificate challenges failing + +```bash +kubectl get certificates -A +NAME READY SECRET AGE +rafiki-auth-tls False rafiki-auth-tls 10m +``` + +**Solutions:** + +1. **Verify DNS records are correctly configured:** + + ```bash + # Check if domain resolves to your cluster IP + nslookup auth.YOUR_DOMAIN.com + dig auth.YOUR_DOMAIN.com + + # Verify static IP is assigned + kubectl get ingress -A + ``` + +2. **Check cert-manager is running properly:** + + ```bash + # Verify cert-manager pods are healthy + kubectl get pods -n cert-manager + + # Check cluster-issuer status + kubectl get clusterissuer + kubectl describe clusterissuer letsencrypt-prod + ``` + +3 . **Check firewall rules allow HTTP/HTTPS traffic:** + +```bash +# Ensure HTTP challenge can reach your domain +gcloud compute firewall-rules list +curl -I http://auth.YOUR_DOMAIN.com/.well-known/acme-challenge/test +``` + +**Debugging:** + +```bash +# Check certificate status +kubectl describe certificate rafiki-auth-tls -n rafiki + +# Check certificate request +kubectl get certificaterequests -A +kubectl describe certificaterequest -n rafiki + +# Check cert-manager logs +kubectl logs -n cert-manager deployment/cert-manager +kubectl logs -n cert-manager deployment/cert-manager-webhook +kubectl logs -n cert-manager deployment/cert-manager-cainjector + +# Check ingress-nginx logs for HTTP challenges +kubectl logs -n ingress-nginx deployment/ingress-nginx-controller +```