diff --git a/QUICKSTART-DEPLOY.md b/QUICKSTART-DEPLOY.md index 5e34c271ad..ad09a2e3bd 100644 --- a/QUICKSTART-DEPLOY.md +++ b/QUICKSTART-DEPLOY.md @@ -5,6 +5,7 @@ This guide will help you get the Unkey deployment platform up and running locall ## Prerequisites - Docker and Docker Compose +- Go 1.24 or later - A terminal/command line ## Step 1: Start the Platform @@ -12,21 +13,12 @@ This guide will help you get the Unkey deployment platform up and running locall 1. Start all services using Docker Compose: ```bash -docker-compose up -d +docker-compose up metald-aio dashboard ctrl -d ``` -This will start: +2. Wait for all services to be healthy -- MySQL database (port 3306) -- Dashboard (port 3000) -- Control plane services -- Supporting infrastructure - -2. Wait for all services to be healthy (this may take 1-2 minutes): - -```bash -docker-compose ps -``` +The platform now uses a Docker backend that creates containers instead of VMs, making it much faster and easier to run locally. ## Step 2: Set Up Your Workspace @@ -36,31 +28,17 @@ docker-compose ps http://localhost:3000 ``` -2. Sign in or create an account through the authentication flow +2. Create a workspace and copy its id -3. Once logged in, you'll automatically have a workspace created. Navigate to: +3. Create a new project by filling out the form: -``` -http://localhost:3000/projects -``` +Go to http://localhost:3000/projects -4. Create a new project by filling out the form: +- **Name**: Choose any name (e.g., "My Test App") +- **Slug**: This will auto-generate based on the name +- **Git URL**: Optional, leave blank for testing - - **Name**: Choose any name (e.g., "My Test App") - - **Slug**: This will auto-generate based on the name - - **Git URL**: Optional, leave blank for testing - -5. After creating the project, **copy the Project ID** from the project details. It will look like: - -``` -proj_xxxxxxxxxxxxxxxxxx -``` - -6. Also note your **Workspace ID** (you can find this settings). It will look like: - -``` -ws_xxxxxxxxxxxxxxxxxx -``` +4. After creating the project, **copy the Project ID** from the project details. It will look like: ## Step 3: Deploy a Version @@ -82,21 +60,71 @@ go run . version create \ Keep the context as shown, there's a demo api in that folder. Replace `YOUR_WORKSPACE_ID` and `YOUR_PROJECT_ID` with the actual values you copied from the dashboard. -3. The CLI will show real-time progress as your deployment goes through these stages: - - Downloading Docker image - - Building rootfs - - Uploading rootfs - - Creating VM - - Booting VM - - Assigning domains - - Completed +3. The CLI will: + - Always build a fresh Docker image from your code + - Set the PORT environment variable to 8080 in the container + - Use the Docker backend to create a container instead of a VM + - Automatically allocate a random host port (e.g., 35432) to avoid conflicts + - Show real-time progress as your deployment goes through the stages ## Step 4: View Your Deployment -1. Return to the dashboard and navigate to: +1. Once the deployment completes, the CLI will show you the available domains: + +``` +Deployment Complete + Version ID: v_xxxxxxxxxxxxxxxxxx + Status: Ready + Environment: Production + +Domains + https://main-commit-workspace.unkey.app + http://localhost:35432 +``` + +2. If you're using the `demo_api` you can curl the `/v1/liveness` endpoint +3. Return to the dashboard and navigate to: ``` http://localhost:3000/versions http://localhost:3000/deployments ``` +### Important: Your Application Must Listen on the PORT Environment Variable + +**Your deployed application MUST read the `PORT` environment variable and listen on that port.** The platform sets `PORT=8080` in the container, and your code needs to use this value. + +**Example for different languages:** + +```javascript +// Node.js +const port = process.env.PORT || 3000; +app.listen(port, () => { + console.log(`Server running on port ${port}`); +}); +``` + +```python +# Python +import os +port = int(os.environ.get('PORT', 3000)) +app.run(host='0.0.0.0', port=port) +``` + +```go +// Go +port := os.Getenv("PORT") +if port == "" { + port = "3000" +} +http.ListenAndServe(":"+port, handler) +``` + +The demo_api already follows this pattern and listens on the PORT environment variable. + +## Troubleshooting + +- If you see "port is already allocated" errors, the system will automatically retry with a new random port +- Check container logs: `docker logs ` +- Verify the demo_api is listening on the PORT environment variable (should be 8080) +- Make sure your Dockerfile exposes the correct port (8080 in the demo_api example) diff --git a/go/apps/ctrl/services/version/deploy_workflow.go b/go/apps/ctrl/services/version/deploy_workflow.go index 4818d6e455..577515aa3c 100644 --- a/go/apps/ctrl/services/version/deploy_workflow.go +++ b/go/apps/ctrl/services/version/deploy_workflow.go @@ -7,6 +7,7 @@ import ( "strings" "time" + "connectrpc.com/connect" vmprovisionerv1 "github.com/unkeyed/unkey/go/gen/proto/metal/vmprovisioner/v1" "github.com/unkeyed/unkey/go/gen/proto/metal/vmprovisioner/v1/vmprovisionerv1connect" "github.com/unkeyed/unkey/go/pkg/db" @@ -188,100 +189,53 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro createResult, err := hydra.Step(ctx, "metald-create-vm", func(stepCtx context.Context) (*vmprovisionerv1.CreateVmResponse, error) { w.logger.Info("creating VM for deployment", "version_id", req.VersionID, "docker_image", req.DockerImage, "workspace_id", req.WorkspaceID, "project_id", req.ProjectID) - // MOCK: VM configuration no longer needed since we're bypassing metald - // TODO: Remove this comment when real metald calls are restored - _ = &vmprovisionerv1.VmConfig{ + // Create VM configuration for Docker backend + vmConfig := &vmprovisionerv1.VmConfig{ Cpu: &vmprovisionerv1.CpuConfig{ - VcpuCount: 2, - MaxVcpuCount: 4, - Topology: nil, - Features: nil, + VcpuCount: 1, }, Memory: &vmprovisionerv1.MemoryConfig{ - SizeBytes: 2 * 1024 * 1024 * 1024, // 2GB - MaxSizeBytes: 8 * 1024 * 1024 * 1024, // 8GB - HotplugEnabled: true, - Backing: nil, + SizeBytes: 536870912, // 512MB }, Boot: &vmprovisionerv1.BootConfig{ - KernelPath: "/opt/vm-assets/vmlinux", - KernelArgs: "console=ttyS0 reboot=k panic=1 pci=off", - InitrdPath: "", - BootOptions: nil, + KernelPath: "/boot/vmlinux", + InitrdPath: "/boot/initrd", + KernelArgs: "console=ttyS0 quiet", }, Storage: []*vmprovisionerv1.StorageDevice{{ - Id: "rootfs", - Path: "/opt/vm-assets/rootfs.ext4", - ReadOnly: false, - IsRootDevice: true, - InterfaceType: "virtio-blk", - Options: map[string]string{ - "docker_image": req.DockerImage, - "auto_build": "true", - }, + Id: "root", + Path: "/dev/vda", }}, - Network: []*vmprovisionerv1.NetworkInterface{{ - Id: "eth0", - InterfaceType: "virtio-net", - Mode: vmprovisionerv1.NetworkMode_NETWORK_MODE_DUAL_STACK, - Ipv4Config: &vmprovisionerv1.IPv4Config{ - Dhcp: true, - Address: "", - Netmask: "", - Gateway: "", - DnsServers: nil, - }, - Ipv6Config: &vmprovisionerv1.IPv6Config{ - Slaac: true, - PrivacyExtensions: true, - Address: "", - PrefixLength: 0, - Gateway: "", - DnsServers: nil, - LinkLocal: "", - }, - }}, - Console: &vmprovisionerv1.ConsoleConfig{ - Enabled: true, - Output: "/tmp/standard-vm-console.log", - Input: "", - ConsoleType: "serial", - }, Metadata: map[string]string{ - "template": "standard", - "purpose": "general", - "docker_image": req.DockerImage, - "runtime": "docker", - "version_id": req.VersionID, - "workspace_id": req.WorkspaceID, - "project_id": req.ProjectID, - "created_by": "deploy-workflow", + "docker_image": req.DockerImage, + "exposed_ports": "8080/tcp", + "env_vars": "PORT=8080", + "version_id": req.VersionID, + "workspace_id": req.WorkspaceID, + "project_id": req.ProjectID, + "created_by": "deploy-workflow", }, } - // MOCK: Bypassing metald CreateVm call due to missing VM infrastructure - // TODO: Remove this mock and use real metald call once VM assets are available - w.logger.Info("MOCK: Simulating VM creation request", "docker_image", req.DockerImage) - - // Generate realistic mock VM ID and response - mockVMID := uid.New("vm") // Generate mock VM ID - resp := &vmprovisionerv1.CreateVmResponse{ - VmId: mockVMID, - State: vmprovisionerv1.VmState_VM_STATE_CREATED, + // Make real metald CreateVm call + resp, err := w.metaldClient.CreateVm(stepCtx, connect.NewRequest(&vmprovisionerv1.CreateVmRequest{ + Config: vmConfig, + })) + if err != nil { + w.logger.Error("metald CreateVm call failed", "error", err, "docker_image", req.DockerImage) + return nil, fmt.Errorf("failed to create VM: %w", err) } - w.logger.Info("MOCK: VM creation simulated successfully", "vm_id", mockVMID, "docker_image", req.DockerImage) - - w.logger.Info("VM created successfully", "vm_id", resp.GetVmId(), "state", resp.GetState().String(), "docker_image", req.DockerImage) + w.logger.Info("VM created successfully", "vm_id", resp.Msg.VmId, "state", resp.Msg.State.String(), "docker_image", req.DockerImage) - return resp, nil + return resp.Msg, nil }) if err != nil { w.logger.Error("VM creation failed", "error", err, "version_id", req.VersionID) return err } - w.logger.Info("VM creation completed", "vm_id", createResult.GetVmId(), "state", createResult.GetState().String()) + w.logger.Info("VM creation completed", "vm_id", createResult.VmId, "state", createResult.State.String()) // Step 8: Log building rootfs err = hydra.StepVoid(ctx, "log-building-rootfs", func(stepCtx context.Context) error { @@ -365,97 +319,68 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro return err } - // Step 13: Poll VM status (network calls to metald) - w.logger.Info("starting VM status polling", "vm_id", createResult.GetVmId(), "max_attempts", 30) - - _, err = hydra.Step(ctx, "metald-poll-vm-status", func(stepCtx context.Context) (*struct{}, error) { - for attempt := 1; attempt <= 30; attempt++ { - w.logger.Info("checking VM status", "vm_id", createResult.GetVmId(), "attempt", attempt) - - // MOCK: Bypassing metald GetVmInfo call - simulating realistic VM preparation - // TODO: Remove this mock and use real metald call once VM assets are available - w.logger.Info("MOCK: Simulating VM status request", "vm_id", createResult.GetVmId(), "attempt", attempt) - - // Simulate realistic VM preparation progression - var mockState vmprovisionerv1.VmState - if attempt <= 2 { - mockState = vmprovisionerv1.VmState_VM_STATE_UNSPECIFIED // Use UNSPECIFIED to simulate building state - w.logger.Info("MOCK: VM still building", "vm_id", createResult.GetVmId(), "attempt", attempt) - } else { - mockState = vmprovisionerv1.VmState_VM_STATE_CREATED - w.logger.Info("MOCK: VM preparation complete", "vm_id", createResult.GetVmId(), "attempt", attempt) - } - - resp := &vmprovisionerv1.GetVmInfoResponse{ - VmId: createResult.GetVmId(), - State: mockState, - Config: nil, - Metrics: nil, - BackendInfo: nil, - NetworkInfo: nil, - } + // Step 13: Skip VM status polling for Docker backend (VM is immediately ready) + w.logger.Info("skipping VM status polling for Docker backend", "vm_id", createResult.VmId) - w.logger.Info("VM status check", "vm_id", createResult.GetVmId(), "state", resp.GetState().String(), "attempt", attempt) - - // Check if VM is ready for boot - if resp.GetState() == vmprovisionerv1.VmState_VM_STATE_CREATED || - resp.GetState() == vmprovisionerv1.VmState_VM_STATE_RUNNING { - w.logger.Info("VM is ready", "vm_id", createResult.GetVmId(), "state", resp.GetState().String()) - return &struct{}{}, nil - } + // Step 14: Boot VM (network call to metald) + _, err = hydra.Step(ctx, "metald-boot-vm", func(stepCtx context.Context) (*vmprovisionerv1.BootVmResponse, error) { + w.logger.Info("booting VM", "vm_id", createResult.VmId) + + // Make real metald BootVm call + resp, err := w.metaldClient.BootVm(stepCtx, connect.NewRequest(&vmprovisionerv1.BootVmRequest{ + VmId: createResult.VmId, + })) + if err != nil { + w.logger.Error("metald BootVm call failed", "error", err, "vm_id", createResult.VmId) + return nil, fmt.Errorf("failed to boot VM: %w", err) + } - // Sleep before next attempt (except on last attempt) - if attempt < 30 { - w.logger.Info("VM not ready yet, sleeping before next check", "vm_id", createResult.GetVmId(), "state", resp.GetState().String(), "attempt", attempt, "sleep_duration", "1s") - time.Sleep(1 * time.Second) - } + if !resp.Msg.Success { + w.logger.Error("VM boot was not successful", "vm_id", createResult.VmId, "state", resp.Msg.State.String()) + return nil, fmt.Errorf("VM boot was not successful, state: %s", resp.Msg.State.String()) } - // If we reach here, we exceeded max attempts - return nil, fmt.Errorf("VM polling timed out after 30 attempts (30 seconds)") + w.logger.Info("VM booted successfully", "vm_id", createResult.VmId, "state", resp.Msg.State.String()) + return resp.Msg, nil }) if err != nil { - w.logger.Error("VM status polling failed", "error", err, "vm_id", createResult.GetVmId()) + w.logger.Error("VM boot failed", "error", err, "vm_id", createResult.VmId) return err } - // Step 14: Boot VM (network call to metald) - _, err = hydra.Step(ctx, "metald-boot-vm", func(stepCtx context.Context) (*vmprovisionerv1.BootVmResponse, error) { - w.logger.Info("booting VM", "vm_id", createResult.GetVmId()) + w.logger.Info("VM boot completed successfully", "vm_id", createResult.VmId) - // MOCK: Bypassing metald BootVm call - simulating successful boot - // TODO: Remove this mock and use real metald call once VM assets are available - w.logger.Info("MOCK: Simulating VM boot request", "vm_id", createResult.GetVmId()) + // Step 15: Get VM info to retrieve port mappings + vmInfo, err := hydra.Step(ctx, "metald-get-vm-info", func(stepCtx context.Context) (*vmprovisionerv1.GetVmInfoResponse, error) { + w.logger.Info("getting VM info for port mappings", "vm_id", createResult.VmId) - // Simulate successful VM boot - resp := &vmprovisionerv1.BootVmResponse{ - Success: true, - State: vmprovisionerv1.VmState_VM_STATE_RUNNING, + resp, err := w.metaldClient.GetVmInfo(stepCtx, connect.NewRequest(&vmprovisionerv1.GetVmInfoRequest{ + VmId: createResult.VmId, + })) + if err != nil { + w.logger.Error("metald GetVmInfo call failed", "error", err, "vm_id", createResult.VmId) + return nil, fmt.Errorf("failed to get VM info: %w", err) } - w.logger.Info("MOCK: VM boot simulated successfully", "vm_id", createResult.GetVmId()) - - if !resp.GetSuccess() { - w.logger.Error("VM boot was not successful", "vm_id", createResult.GetVmId(), "state", resp.GetState().String()) - return nil, fmt.Errorf("VM boot was not successful, state: %s", resp.GetState().String()) + if resp.Msg.NetworkInfo != nil { + w.logger.Info("VM info retrieved successfully", "vm_id", createResult.VmId, "port_mappings", len(resp.Msg.NetworkInfo.PortMappings)) + } else { + w.logger.Warn("VM info retrieved but no network info", "vm_id", createResult.VmId) } - w.logger.Info("VM booted successfully", "vm_id", createResult.GetVmId(), "state", resp.GetState().String()) - return resp, nil + return resp.Msg, nil }) if err != nil { - w.logger.Error("VM boot failed", "error", err, "vm_id", createResult.GetVmId()) + w.logger.Error("failed to get VM info", "error", err, "vm_id", createResult.VmId) return err } - w.logger.Info("VM boot completed successfully", "vm_id", createResult.GetVmId()) - // Step 16: Log booting VM err = hydra.StepVoid(ctx, "log-booting-vm", func(stepCtx context.Context) error { return db.Query.InsertVersionStep(stepCtx, w.db.RW(), db.InsertVersionStepParams{ VersionID: req.VersionID, Status: "booting_vm", - Message: sql.NullString{String: fmt.Sprintf("VM booted successfully: %s", createResult.GetVmId()), Valid: true}, + Message: sql.NullString{String: fmt.Sprintf("VM booted successfully: %s", createResult.VmId), Valid: true}, ErrorMessage: sql.NullString{String: "", Valid: false}, CreatedAt: time.Now().UnixMilli(), }) @@ -469,7 +394,9 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro assignedHostnames, err := hydra.Step(ctx, "assign-domains", func(stepCtx context.Context) ([]string, error) { w.logger.Info("assigning domains to version", "version_id", req.VersionID) - // Generate hostnames for this deployment + var hostnames []string + + // Generate primary hostname for this deployment // Use Git info for hostname generation gitInfo := git.GetInfo() branch := "main" // Default branch @@ -487,26 +414,63 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro // Generate hostnames: branch-identifier-workspace.unkey.app // Replace underscores with dashes for valid hostname format cleanIdentifier := strings.ReplaceAll(identifier, "_", "-") - hostname := fmt.Sprintf("%s-%s-%s.unkey.app", branch, cleanIdentifier, req.WorkspaceID) - // Create route entry + primaryHostname := fmt.Sprintf("%s-%s-%s.unkey.app", branch, cleanIdentifier, req.WorkspaceID) + + // Create route entry for primary hostname routeID := uid.New("route") insertErr := db.Query.InsertHostnameRoute(stepCtx, w.db.RW(), db.InsertHostnameRouteParams{ ID: routeID, WorkspaceID: req.WorkspaceID, ProjectID: req.ProjectID, - Hostname: hostname, + Hostname: primaryHostname, VersionID: req.VersionID, IsEnabled: true, CreatedAt: time.Now().UnixMilli(), UpdatedAt: sql.NullInt64{Valid: true, Int64: time.Now().UnixMilli()}, }) if insertErr != nil { - w.logger.Error("failed to create route", "error", insertErr, "hostname", hostname, "version_id", req.VersionID) - return nil, fmt.Errorf("failed to create route for hostname %s: %w", hostname, insertErr) + w.logger.Error("failed to create route", "error", insertErr, "hostname", primaryHostname, "version_id", req.VersionID) + return nil, fmt.Errorf("failed to create route for hostname %s: %w", primaryHostname, insertErr) + } + + hostnames = append(hostnames, primaryHostname) + w.logger.Info("primary domain assigned successfully", "hostname", primaryHostname, "version_id", req.VersionID, "route_id", routeID) + + // Add localhost:port hostname for development + w.logger.Info("checking for port mappings", "has_network_info", vmInfo.NetworkInfo != nil, "port_mappings_count", func() int { + if vmInfo.NetworkInfo != nil { + return len(vmInfo.NetworkInfo.PortMappings) + } + return 0 + }()) + + if vmInfo.NetworkInfo != nil && len(vmInfo.NetworkInfo.PortMappings) > 0 { + for _, portMapping := range vmInfo.NetworkInfo.PortMappings { + localhostHostname := fmt.Sprintf("localhost:%d", portMapping.HostPort) + + // Create route entry for localhost:port + localhostRouteID := uid.New("route") + insertErr := db.Query.InsertHostnameRoute(stepCtx, w.db.RW(), db.InsertHostnameRouteParams{ + ID: localhostRouteID, + WorkspaceID: req.WorkspaceID, + ProjectID: req.ProjectID, + Hostname: localhostHostname, + VersionID: req.VersionID, + IsEnabled: true, + CreatedAt: time.Now().UnixMilli(), + UpdatedAt: sql.NullInt64{Valid: true, Int64: time.Now().UnixMilli()}, + }) + if insertErr != nil { + w.logger.Error("failed to create localhost route", "error", insertErr, "hostname", localhostHostname, "version_id", req.VersionID) + return nil, fmt.Errorf("failed to create route for hostname %s: %w", localhostHostname, insertErr) + } + + hostnames = append(hostnames, localhostHostname) + w.logger.Info("localhost domain assigned successfully", "hostname", localhostHostname, "version_id", req.VersionID, "route_id", localhostRouteID, "container_port", portMapping.ContainerPort, "host_port", portMapping.HostPort) + } } - w.logger.Info("domain assigned successfully", "hostname", hostname, "version_id", req.VersionID, "route_id", routeID) - return []string{hostname}, nil + return hostnames, nil }) if err != nil { w.logger.Error("domain assignment failed", "error", err, "version_id", req.VersionID) @@ -517,7 +481,7 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro err = hydra.StepVoid(ctx, "log-assigning-domains", func(stepCtx context.Context) error { var message string if len(assignedHostnames) > 0 { - message = fmt.Sprintf("Assigned hostname: %s", assignedHostnames[0]) + message = fmt.Sprintf("Assigned hostnames: %s", strings.Join(assignedHostnames, ", ")) } else { message = "Domain assignment completed" } @@ -575,7 +539,7 @@ func (w *DeployWorkflow) Run(ctx hydra.WorkflowContext, req *DeployRequest) erro return err } - w.logger.Info("deployment workflow stage completed successfully", "version_id", req.VersionID, "vm_id", createResult.GetVmId()) + w.logger.Info("deployment workflow stage completed successfully", "version_id", req.VersionID, "vm_id", createResult.VmId) w.logger.Info("deployment workflow completed", "execution_id", ctx.ExecutionID(), diff --git a/go/cmd/version/main.go b/go/cmd/version/main.go index 3df75e423f..85cdbaa7c1 100644 --- a/go/cmd/version/main.go +++ b/go/cmd/version/main.go @@ -55,6 +55,10 @@ var createCmd = &cli.Command{ Usage: "Docker image tag (e.g., ghcr.io/user/app:tag). If not provided, builds from current directory", Required: false, }, + &cli.BoolFlag{ + Name: "force-build", + Usage: "Force build Docker image even if --docker-image is provided", + }, &cli.StringFlag{ Name: "dockerfile", Usage: "Path to Dockerfile", @@ -123,29 +127,34 @@ func createAction(ctx context.Context, cmd *cli.Command) error { dockerfile := cmd.String("dockerfile") buildContext := cmd.String("context") + // Always build the image, ignoring any provided docker-image + dockerImage = "" + return runDeploymentSteps(ctx, cmd, workspaceID, projectID, branch, dockerImage, dockerfile, buildContext, commit, logger) } -func printDeploymentComplete(versionID, workspace, branch string) { - // Use actual Git info for hostname generation - gitInfo := git.GetInfo() - identifier := versionID - if gitInfo.IsRepo && gitInfo.CommitSHA != "" { - identifier = gitInfo.CommitSHA - } - +func printDeploymentComplete(version *ctrlv1.Version) { fmt.Println() fmt.Println("Deployment Complete") - fmt.Printf(" Version ID: %s\n", versionID) + fmt.Printf(" Version ID: %s\n", version.GetId()) fmt.Printf(" Status: Ready\n") fmt.Printf(" Environment: Production\n") fmt.Println() fmt.Println("Domains") - // Replace underscores with dashes for valid hostname format - cleanIdentifier := strings.ReplaceAll(identifier, "_", "-") - fmt.Printf(" https://%s-%s-%s.unkey.app\n", branch, cleanIdentifier, workspace) - fmt.Printf(" https://api.acme.com\n") + hostnames := version.GetHostnames() + if len(hostnames) > 0 { + for _, hostname := range hostnames { + // Check if it's a localhost hostname (don't add https://) + if strings.HasPrefix(hostname, "localhost:") { + fmt.Printf(" http://%s\n", hostname) + } else { + fmt.Printf(" https://%s\n", hostname) + } + } + } else { + fmt.Printf(" No hostnames assigned\n") + } } func runDeploymentSteps(ctx context.Context, cmd *cli.Command, workspace, project, branch, dockerImage, dockerfile, buildContext, commit string, logger logging.Logger) error { @@ -324,17 +333,18 @@ func runDeploymentSteps(ctx context.Context, cmd *cli.Command, workspace, projec fmt.Printf(" Version ID: %s\n", versionID) // Poll for version status updates - if err := pollVersionStatus(ctx, logger, client, versionID); err != nil { + finalVersion, err := pollVersionStatus(ctx, logger, client, versionID) + if err != nil { return fmt.Errorf("deployment failed: %w", err) } - printDeploymentComplete(versionID, workspace, branch) + printDeploymentComplete(finalVersion) return nil } // pollVersionStatus polls the control plane API and displays deployment steps as they occur -func pollVersionStatus(ctx context.Context, logger logging.Logger, client ctrlv1connect.VersionServiceClient, versionID string) error { +func pollVersionStatus(ctx context.Context, logger logging.Logger, client ctrlv1connect.VersionServiceClient, versionID string) (*ctrlv1.Version, error) { ticker := time.NewTicker(2 * time.Second) defer ticker.Stop() @@ -346,10 +356,10 @@ func pollVersionStatus(ctx context.Context, logger logging.Logger, client ctrlv1 for { select { case <-ctx.Done(): - return ctx.Err() + return nil, ctx.Err() case <-timeout.C: fmt.Printf("Error: Deployment timeout after 5 minutes\n") - return fmt.Errorf("deployment timeout") + return nil, fmt.Errorf("deployment timeout") case <-ticker.C: // Always poll version status getReq := connect.NewRequest(&ctrlv1.GetVersionRequest{ @@ -377,12 +387,12 @@ func pollVersionStatus(ctx context.Context, logger logging.Logger, client ctrlv1 // Check if deployment is complete if version.GetStatus() == ctrlv1.VersionStatus_VERSION_STATUS_ACTIVE { - return nil + return version, nil } // Check if deployment failed if version.GetStatus() == ctrlv1.VersionStatus_VERSION_STATUS_FAILED { - return fmt.Errorf("deployment failed") + return nil, fmt.Errorf("deployment failed") } } } diff --git a/go/demo_api/Dockerfile b/go/demo_api/Dockerfile index 2e069edfb7..bbb1b96c87 100644 --- a/go/demo_api/Dockerfile +++ b/go/demo_api/Dockerfile @@ -17,6 +17,6 @@ WORKDIR /root/ COPY --from=builder /app/main . -EXPOSE 8080 +ENV PORT 8080 CMD ["./main"] diff --git a/go/deploy/Dockerfile.dev b/go/deploy/Dockerfile.dev index b5deff6e8d..c867950f72 100644 --- a/go/deploy/Dockerfile.dev +++ b/go/deploy/Dockerfile.dev @@ -1,61 +1,53 @@ # Dockerfile.dev - Development environment for all Unkey deploy services # Based on LOCAL_DEPLOYMENT_GUIDE.md for maximum production parity -# Build stage - compile all services -FROM fedora:42 AS builder +# Install stage - install all dependencies once +FROM fedora:42 AS install -# Install development tools (following LOCAL_DEPLOYMENT_GUIDE.md) +# Install all dependencies (dev tools + runtime deps + Docker CLI) RUN dnf install -y dnf-plugins-core && \ dnf group install -y development-tools && \ - dnf install -y git make golang curl wget iptables-legacy && \ + dnf install -y git make golang curl wget iptables-legacy \ + systemd systemd-devel procps-ng util-linux && \ + dnf config-manager addrepo --from-repofile=https://download.docker.com/linux/fedora/docker-ce.repo && \ + dnf install -y docker-ce-cli && \ dnf clean all - # Set up Go environment ENV GOPATH=/go ENV PATH=$PATH:/go/bin:/usr/local/go/bin +# Base build stage with source code +FROM install AS build-base + # Copy source code COPY . /src/go WORKDIR /src/go # Protobuf files are already generated in go/proto/ - no need to generate them again -# Build all services directly using go build (protobufs already generated) -# Go will download dependencies as needed during build +# Build assetmanagerd +FROM build-base AS build-assetmanagerd WORKDIR /src/go/deploy/assetmanagerd RUN go build -o assetmanagerd ./cmd/assetmanagerd +# Build billaged +FROM build-base AS build-billaged WORKDIR /src/go/deploy/billaged RUN go build -o billaged ./cmd/billaged +# Build builderd +FROM build-base AS build-builderd WORKDIR /src/go/deploy/builderd RUN go build -o builderd ./cmd/builderd +# Build metald +FROM build-base AS build-metald WORKDIR /src/go/deploy/metald RUN go build -o metald ./cmd/metald -# Runtime stage - Fedora with systemd -FROM fedora:42 - -# Install runtime dependencies -RUN dnf update -y && \ - dnf install -y \ - systemd \ - systemd-devel \ - iptables-legacy \ - curl \ - wget \ - procps-ng \ - util-linux \ - && \ - dnf clean all - -# Install Docker CLI for metald Docker backend -RUN dnf install -y dnf-plugins-core && \ - dnf config-manager addrepo --from-repofile=https://download.docker.com/linux/fedora/docker-ce.repo && \ - dnf install -y docker-ce-cli && \ - dnf clean all +# Runtime stage - reuse install stage (all deps already installed) +FROM install AS runtime # Create billaged user (following systemd service requirements) RUN useradd -r -s /bin/false billaged @@ -72,11 +64,11 @@ RUN mkdir -p /opt/assetmanagerd/{cache,data} && \ # Set ownership for service directories RUN chown -R billaged:billaged /opt/billaged /var/log/billaged -# Copy built binaries from builder stage -COPY --from=builder /src/go/deploy/assetmanagerd/assetmanagerd /usr/local/bin/ -COPY --from=builder /src/go/deploy/billaged/billaged /usr/local/bin/ -COPY --from=builder /src/go/deploy/builderd/builderd /usr/local/bin/ -COPY --from=builder /src/go/deploy/metald/metald /usr/local/bin/ +# Copy built binaries from respective build stages +COPY --from=build-assetmanagerd /src/go/deploy/assetmanagerd/assetmanagerd /usr/local/bin/ +COPY --from=build-billaged /src/go/deploy/billaged/billaged /usr/local/bin/ +COPY --from=build-builderd /src/go/deploy/builderd/builderd /usr/local/bin/ +COPY --from=build-metald /src/go/deploy/metald/metald /usr/local/bin/ # Make binaries executable @@ -217,7 +209,7 @@ LABEL org.unkey.component="deploy-services" \ # AIDEV-NOTE: This Dockerfile follows the LOCAL_DEPLOYMENT_GUIDE.md as closely as possible # Key features: # 1. Uses Fedora 42 (production parity) -# 2. Multi-stage build with development tools +# 2. Multi-stage build with parallel service compilation for faster builds # 3. systemd as process manager # 4. All services built using existing Makefiles # 5. TLS disabled for development diff --git a/go/deploy/metald/internal/backend/docker/client.go b/go/deploy/metald/internal/backend/docker/client.go index 7e03174298..ba43700fde 100644 --- a/go/deploy/metald/internal/backend/docker/client.go +++ b/go/deploy/metald/internal/backend/docker/client.go @@ -4,14 +4,16 @@ import ( "context" "encoding/json" "fmt" + "io" "log/slog" - "net" + "math/rand" "strconv" "strings" "sync" "time" "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/image" "github.com/docker/docker/client" "github.com/docker/go-connections/nat" backendtypes "github.com/unkeyed/unkey/go/deploy/metald/internal/backend/types" @@ -64,7 +66,7 @@ func NewDockerBackend(logger *slog.Logger, config *DockerBackendConfig) (*Docker // Verify Docker connection ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - + if _, err := dockerClient.Ping(ctx); err != nil { return nil, fmt.Errorf("failed to connect to Docker daemon: %w", err) } @@ -126,6 +128,7 @@ func NewDockerBackend(logger *slog.Logger, config *DockerBackendConfig) (*Docker vmErrorCounter: vmErrorCounter, } + return backend, nil } @@ -222,6 +225,17 @@ func (d *DockerBackend) BootVM(ctx context.Context, vmID string) error { slog.String("container_id", vm.ContainerID), ) + // Check if container still exists before starting + _, err := d.dockerClient.ContainerInspect(ctx, vm.ContainerID) + if err != nil { + span.RecordError(err) + d.vmErrorCounter.Add(ctx, 1, metric.WithAttributes( + attribute.String("operation", "boot"), + attribute.String("error", "container_not_found"), + )) + return fmt.Errorf("container not found before start: %w", err) + } + // Start container if err := d.dockerClient.ContainerStart(ctx, vm.ContainerID, container.StartOptions{}); err != nil { span.RecordError(err) @@ -235,7 +249,7 @@ func (d *DockerBackend) BootVM(ctx context.Context, vmID string) error { // Update VM state and network info d.mutex.Lock() vm.State = metaldv1.VmState_VM_STATE_RUNNING - + // Get container network info networkInfo, err := d.getContainerNetworkInfo(ctx, vm.ContainerID) if err != nil { @@ -549,10 +563,10 @@ func (d *DockerBackend) GetVMMetrics(ctx context.Context, vmID string) (*backend Timestamp: time.Now(), CpuTimeNanos: int64(dockerStats.CPUStats.CPUUsage.TotalUsage), MemoryUsageBytes: int64(dockerStats.MemoryStats.Usage), - DiskReadBytes: 0, // TODO: Calculate from BlkioStats - DiskWriteBytes: 0, // TODO: Calculate from BlkioStats - NetworkRxBytes: 0, // TODO: Calculate from NetworkStats - NetworkTxBytes: 0, // TODO: Calculate from NetworkStats + DiskReadBytes: 0, // TODO: Calculate from BlkioStats + DiskWriteBytes: 0, // TODO: Calculate from BlkioStats + NetworkRxBytes: 0, // TODO: Calculate from NetworkStats + NetworkTxBytes: 0, // TODO: Calculate from NetworkStats } // Calculate disk I/O @@ -609,12 +623,12 @@ func (d *DockerBackend) vmConfigToContainerSpec(ctx context.Context, vmID string CPUs: float64(config.GetCpu().GetVcpuCount()), } - // Determine image from metadata or use default - if dockerImage, ok := config.Metadata["docker_image"]; ok { - spec.Image = dockerImage - } else { - spec.Image = d.config.DefaultImage + // Docker image must be specified in metadata + dockerImage, ok := config.Metadata["docker_image"] + if !ok || dockerImage == "" { + return nil, fmt.Errorf("docker_image must be specified in VM config metadata") } + spec.Image = dockerImage // Extract exposed ports from metadata if exposedPorts, ok := config.Metadata["exposed_ports"]; ok { @@ -626,6 +640,16 @@ func (d *DockerBackend) vmConfigToContainerSpec(ctx context.Context, vmID string } } + // Extract environment variables from metadata + if envVars, ok := config.Metadata["env_vars"]; ok { + vars := strings.Split(envVars, ",") + for _, envVar := range vars { + if envVar = strings.TrimSpace(envVar); envVar != "" { + spec.Env = append(spec.Env, envVar) + } + } + } + // Allocate host ports for exposed ports for _, exposedPort := range spec.ExposedPorts { containerPort, err := strconv.Atoi(strings.Split(exposedPort, "/")[0]) @@ -633,19 +657,15 @@ func (d *DockerBackend) vmConfigToContainerSpec(ctx context.Context, vmID string continue } - hostPort, err := d.portAllocator.allocatePort(vmID) - if err != nil { - return nil, fmt.Errorf("failed to allocate port: %w", err) - } - protocol := "tcp" if strings.Contains(exposedPort, "/udp") { protocol = "udp" } + // We'll allocate the port during container creation with retry logic spec.PortMappings = append(spec.PortMappings, PortMapping{ ContainerPort: containerPort, - HostPort: hostPort, + HostPort: 0, // Will be allocated during creation Protocol: protocol, }) } @@ -660,6 +680,27 @@ func (d *DockerBackend) createContainer(ctx context.Context, spec *ContainerSpec ) defer span.End() + d.logger.Info("checking if image exists locally", "image", spec.Image) + _, err := d.dockerClient.ImageInspect(ctx, spec.Image) + if err != nil { + d.logger.Info("image not found locally, pulling image", "image", spec.Image, "error", err.Error()) + pullResponse, err := d.dockerClient.ImagePull(ctx, spec.Image, image.PullOptions{}) + if err != nil { + return "", fmt.Errorf("failed to pull image %s: %w", spec.Image, err) + } + defer pullResponse.Close() + + // Read the pull response to completion to ensure pull finishes + _, err = io.ReadAll(pullResponse) + if err != nil { + return "", fmt.Errorf("failed to read pull response for image %s: %w", spec.Image, err) + } + + d.logger.Info("image pulled successfully", "image", spec.Image) + } else { + d.logger.Info("image found locally, skipping pull", "image", spec.Image) + } + // Build container configuration config := &container.Config{ Image: spec.Image, @@ -670,6 +711,9 @@ func (d *DockerBackend) createContainer(ctx context.Context, spec *ContainerSpec WorkingDir: spec.WorkingDir, } + // Log the container command for debugging + d.logger.Info("container configuration", "image", spec.Image, "cmd", config.Cmd, "env", config.Env) + // Set up exposed ports for _, mapping := range spec.PortMappings { port := nat.Port(fmt.Sprintf("%d/%s", mapping.ContainerPort, mapping.Protocol)) @@ -679,35 +723,77 @@ func (d *DockerBackend) createContainer(ctx context.Context, spec *ContainerSpec // Build host configuration hostConfig := &container.HostConfig{ PortBindings: make(nat.PortMap), - AutoRemove: d.config.AutoRemove, + AutoRemove: false, // Don't auto-remove containers for debugging Privileged: d.config.Privileged, Resources: container.Resources{ - Memory: spec.Memory, + Memory: spec.Memory, NanoCPUs: int64(spec.CPUs * 1e9), }, } - // Set up port bindings - for _, mapping := range spec.PortMappings { - containerPort := nat.Port(fmt.Sprintf("%d/%s", mapping.ContainerPort, mapping.Protocol)) - hostConfig.PortBindings[containerPort] = []nat.PortBinding{ - { - HostIP: "0.0.0.0", - HostPort: strconv.Itoa(mapping.HostPort), - }, + // Set up port bindings with retry logic + maxRetries := 5 + for retry := 0; retry < maxRetries; retry++ { + // Clear previous port bindings + hostConfig.PortBindings = make(nat.PortMap) + + // Allocate ports for this attempt + var allocatedPorts []int + portAllocationFailed := false + + for i, mapping := range spec.PortMappings { + if mapping.HostPort == 0 { + // Allocate a new port + hostPort, err := d.portAllocator.allocatePort(spec.Labels["unkey.vm.id"]) + if err != nil { + // Release any ports allocated in this attempt + for _, port := range allocatedPorts { + d.portAllocator.releasePort(port, spec.Labels["unkey.vm.id"]) + } + portAllocationFailed = true + break + } + spec.PortMappings[i].HostPort = hostPort + allocatedPorts = append(allocatedPorts, hostPort) + } + + containerPort := nat.Port(fmt.Sprintf("%d/%s", mapping.ContainerPort, mapping.Protocol)) + hostConfig.PortBindings[containerPort] = []nat.PortBinding{ + { + HostIP: "0.0.0.0", + HostPort: strconv.Itoa(spec.PortMappings[i].HostPort), + }, + } + } + + if portAllocationFailed { + continue // Try again with new ports } - } - // Create container - containerName := d.config.ContainerPrefix + spec.Labels["unkey.vm.id"] - resp, err := d.dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, containerName) - if err != nil { - span.RecordError(err) - return "", fmt.Errorf("failed to create container: %w", err) + // Create container + containerName := d.config.ContainerPrefix + spec.Labels["unkey.vm.id"] + resp, err := d.dockerClient.ContainerCreate(ctx, config, hostConfig, nil, nil, containerName) + if err != nil { + // If it's a port binding error, release ports and try again + if strings.Contains(err.Error(), "port is already allocated") || strings.Contains(err.Error(), "bind") { + for _, port := range allocatedPorts { + d.portAllocator.releasePort(port, spec.Labels["unkey.vm.id"]) + } + d.logger.Warn("port binding failed, retrying with new ports", "error", err, "retry", retry+1) + continue + } + // Other errors are not retryable + span.RecordError(err) + return "", fmt.Errorf("failed to create container: %w", err) + } + + // Success! + span.SetAttributes(attribute.String("container_id", resp.ID)) + return resp.ID, nil } - span.SetAttributes(attribute.String("container_id", resp.ID)) - return resp.ID, nil + // If we get here, all retries failed + return "", fmt.Errorf("failed to create container after %d retries due to port conflicts", maxRetries) } // getContainerNetworkInfo gets network information for a container @@ -739,6 +825,47 @@ func (d *DockerBackend) getContainerNetworkInfo(ctx context.Context, containerID } } + // Add port mappings from container inspect + if inspect.NetworkSettings != nil && inspect.NetworkSettings.Ports != nil { + var portMappings []*metaldv1.PortMapping + for containerPort, hostBindings := range inspect.NetworkSettings.Ports { + if len(hostBindings) > 0 { + // Parse container port (e.g., "3000/tcp" -> 3000) + portStr := strings.Split(string(containerPort), "/")[0] + containerPortNum, err := strconv.Atoi(portStr) + if err != nil { + continue + } + + // Get protocol (tcp/udp) + protocol := "tcp" + if strings.Contains(string(containerPort), "/udp") { + protocol = "udp" + } + + // Add mapping for each host binding + for _, hostBinding := range hostBindings { + hostPortNum, err := strconv.Atoi(hostBinding.HostPort) + if err != nil { + continue + } + + portMappings = append(portMappings, &metaldv1.PortMapping{ + ContainerPort: int32(containerPortNum), + HostPort: int32(hostPortNum), + Protocol: protocol, + }) + } + } + } + + // Initialize networkInfo if it doesn't exist + if networkInfo == nil { + networkInfo = &metaldv1.VmNetworkInfo{} + } + networkInfo.PortMappings = portMappings + } + return networkInfo, nil } @@ -749,18 +876,17 @@ func (pa *portAllocator) allocatePort(vmID string) (int, error) { pa.mutex.Lock() defer pa.mutex.Unlock() - // Find available port - for port := pa.minPort; port <= pa.maxPort; port++ { + // Try random ports to avoid conflicts + maxAttempts := 100 + for attempt := 0; attempt < maxAttempts; attempt++ { + port := rand.Intn(pa.maxPort-pa.minPort+1) + pa.minPort if _, exists := pa.allocated[port]; !exists { - // Check if port is actually available - if pa.isPortAvailable(port) { - pa.allocated[port] = vmID - return port, nil - } + pa.allocated[port] = vmID + return port, nil } } - return 0, fmt.Errorf("no available ports in range %d-%d", pa.minPort, pa.maxPort) + return 0, fmt.Errorf("no available ports in range %d-%d after %d attempts", pa.minPort, pa.maxPort, maxAttempts) } // releasePort releases a port from a VM @@ -773,15 +899,6 @@ func (pa *portAllocator) releasePort(port int, vmID string) { } } -// isPortAvailable checks if a port is available on the host -func (pa *portAllocator) isPortAvailable(port int) bool { - conn, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) - if err != nil { - return false - } - conn.Close() - return true -} // Ensure DockerBackend implements Backend interface var _ backendtypes.Backend = (*DockerBackend)(nil) @@ -793,4 +910,4 @@ var _ backendtypes.Backend = (*DockerBackend)(nil) // 2. No privileged operations - Docker daemon handles isolation // 3. Familiar container semantics - easier debugging and monitoring // 4. Fast startup times - containers start instantly vs VM boot time -// 5. Resource efficiency - shared kernel, no VM overhead \ No newline at end of file +// 5. Resource efficiency - shared kernel, no VM overhead diff --git a/go/deploy/metald/internal/backend/docker/types.go b/go/deploy/metald/internal/backend/docker/types.go index 2022890f8b..87d26876ed 100644 --- a/go/deploy/metald/internal/backend/docker/types.go +++ b/go/deploy/metald/internal/backend/docker/types.go @@ -50,8 +50,6 @@ type DockerBackendConfig struct { // ContainerPrefix is the prefix for container names (defaults to unkey-vm-) ContainerPrefix string `json:"container_prefix,omitempty"` - // DefaultImage is the default image to use if none specified - DefaultImage string `json:"default_image,omitempty"` // PortRange defines the range of host ports to allocate PortRange struct { @@ -72,7 +70,6 @@ func DefaultDockerBackendConfig() *DockerBackendConfig { DockerHost: "", // Use default Docker socket NetworkName: "bridge", ContainerPrefix: "unkey-vm-", - DefaultImage: "alpine:latest", PortRange: struct { Min int `json:"min"` Max int `json:"max"`