diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..f756647b8a --- /dev/null +++ b/Makefile @@ -0,0 +1,39 @@ +.PHONY: pull build down migrate-clickhouse migrate-clickhouse-reset integration generate-sql nuke-docker + +pull: + docker compose -f ./deployment/docker-compose.yaml pull + +build: pull + docker compose -f ./deployment/docker-compose.yaml build + +down: + docker compose -f ./deployment/docker-compose.yaml down + +up: down build + docker compose -f ./deployment/docker-compose.yaml up -d + +migrate-clickhouse: + @export GOOSE_DRIVER=clickhouse && \ + export GOOSE_DBSTRING="tcp://default:password@127.0.0.1:9000" && \ + export GOOSE_MIGRATION_DIR=./internal/clickhouse/schema && \ + goose up + +migrate-clickhouse-reset: + @export GOOSE_DRIVER=clickhouse && \ + export GOOSE_DBSTRING="tcp://default:password@127.0.0.1:9000" && \ + export GOOSE_MIGRATION_DIR=./internal/clickhouse/schema && \ + goose down-to 0 + +integration: up + @cd apps/api && \ + $(MAKE) seed && \ + pnpm test:integration + +generate-sql: + @cd internal/db && \ + pnpm drizzle-kit generate --dialect=mysql + +nuke-docker: + docker stop $$(docker ps -aq) + docker system prune -af + docker volume prune --all -f diff --git a/Taskfile.yml b/Taskfile.yml deleted file mode 100644 index cbe10e4c2a..0000000000 --- a/Taskfile.yml +++ /dev/null @@ -1,63 +0,0 @@ -version: "3" - -tasks: - pull: - cmds: - - docker compose -f ./deployment/docker-compose.yaml pull - build: - deps: [pull] - cmds: - - docker compose -f ./deployment/docker-compose.yaml build - - down: - cmds: - - docker compose -f ./deployment/docker-compose.yaml down - up: - deps: [down, build] - cmds: - - docker compose -f ./deployment/docker-compose.yaml up -d - - migrate: - cmds: - - task: migrate-db - - task: migrate-clickhouse - - migrate-clickhouse: - env: - GOOSE_DRIVER: clickhouse - GOOSE_DBSTRING: "tcp://default:password@127.0.0.1:9000" - GOOSE_MIGRATION_DIR: ./internal/clickhouse/schema - cmds: - - goose up - migrate-clickhouse-reset: - env: - GOOSE_DRIVER: clickhouse - GOOSE_DBSTRING: "tcp://default:password@127.0.0.1:9000" - GOOSE_MIGRATION_DIR: ./internal/clickhouse/schema - cmds: - - goose down-to 0 - - migrate-db: - env: - DRIZZLE_DATABASE_URL: "mysql://unkey:password@localhost:3306/unkey" - dir: internal/db - cmds: - - pnpm drizzle-kit push - - integration: - deps: [up] - dir: apps/api - cmds: - - task: seed - - pnpm test:integration - - generate-sql: - dir: internal/db - cmds: - - pnpm drizzle-kit generate --dialect=mysql - - nuke-docker: - cmds: - - docker stop $(docker ps -aq) || true - - docker system prune -af - - docker volume prune --all -f diff --git a/apps/agent/Makefile b/apps/agent/Makefile new file mode 100644 index 0000000000..07c5835eb2 --- /dev/null +++ b/apps/agent/Makefile @@ -0,0 +1,26 @@ +.PHONY: install fmt test build race lint generate + +install: + go mod tidy + +fmt: lint + go fmt ./... + +test: + go test -cover -json -failfast ./... | tparse -all -progress + +build: + go build -o unkey ./cmd/main.go + +race: + go install github.com/amit-davidson/Chronos/cmd/chronos + ~/go/bin/chronos --file=./cmd/main.go --mod=$$(pwd) + +lint: + golangci-lint run + +generate: + go get github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen + mkdir -p ./pkg/openapi + go run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen --config=./pkg/openapi/config.yaml ./pkg/openapi/openapi.json + buf generate \ No newline at end of file diff --git a/apps/agent/Taskfile.yml b/apps/agent/Taskfile.yml deleted file mode 100644 index 444ca5d876..0000000000 --- a/apps/agent/Taskfile.yml +++ /dev/null @@ -1,34 +0,0 @@ -version: '3' - -tasks: - install: - cmd: - go mod tidy - fmt: - cmds: - - go fmt ./... - - task: lint - test: - cmds: - - go test -cover -json -failfast ./... | tparse -all -progress - - build: - cmds: - - go build -o unkey ./cmd/main.go - - race: - cmds: - - go install github.com/amit-davidson/Chronos/cmd/chronos - - ~/go/bin/chronos --file=./cmd/main.go --mod={{.TASKFILE_DIR}} - - lint: - cmds: - - golangci-lint run - - generate: - cmds: - - go get github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen - - mkdir -p ./pkg/openapi - - go run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen --config=./pkg/openapi/config.yaml ./pkg/openapi/openapi.json - - - buf generate diff --git a/go/Makefile b/go/Makefile new file mode 100644 index 0000000000..2b767d0b9c --- /dev/null +++ b/go/Makefile @@ -0,0 +1,34 @@ +.PHONY: install fmt test-unit test-full build generate lint pull build-docker + +install: + go mod tidy + +fmt: lint + @go fmt ./... + +pull: + docker pull mysql:latest + docker pull redis:latest + docker pull grafana/otel-lgtm:latest + +build-docker: + docker build -t apiv2:latest . + +test-full: pull build-docker + @export INTEGRATION_TEST=true && \ + export SIMULATION_TEST=false && \ + echo "Running full tests... this can take more than 30min... run 'make test-unit' for faster tests" && \ + go test -failfast -timeout=60m -shuffle=on -v -json ./... | tparse -all -progress -smallscreen + +test-unit: + go test -json -race -failfast -timeout=30m ./... | tparse -all -progress -smallscreen + +build: + go build -o unkey ./main.go + +generate: + go generate ./... + # buf generate + +lint: + @golangci-lint run diff --git a/go/Taskfile.yml b/go/Taskfile.yml deleted file mode 100644 index f25f92d5e4..0000000000 --- a/go/Taskfile.yml +++ /dev/null @@ -1,49 +0,0 @@ -version: "3" - -tasks: - install: - cmd: go mod tidy - fmt: - cmds: - - go fmt ./... - - task: lint - - pull: - cmds: - - docker pull mysql:latest - - docker pull redis:latest - - docker pull grafana/otel-lgtm:latest - - build-docker: - cmds: - - docker build -t apiv2:latest . - - test-full: - deps: - - pull - - build-docker - env: - INTEGRATION_TEST: true - SIMULATION_TEST: false - cmds: - - cmd: echo "Running full tests... this can take more than 30min... run 'task test-unit' for faster tests" - silent: true - - cmd: go test -failfast -timeout=60m -shuffle=on -v -json ./... | tparse -all -progress -smallscreen - silent: true - - test-unit: - cmds: - - go test -json -race -failfast -timeout=30m ./... | tparse -all -progress -smallscreen - - build: - cmds: - - go build -o unkey ./main.go - - generate: - cmds: - - go generate ./... - # - buf generate - - lint: - cmds: - - golangci-lint run diff --git a/go/pkg/hydra/MIGRATION_BEST_PRACTICES.md b/go/pkg/hydra/MIGRATION_BEST_PRACTICES.md deleted file mode 100644 index 6d17c168e2..0000000000 --- a/go/pkg/hydra/MIGRATION_BEST_PRACTICES.md +++ /dev/null @@ -1,226 +0,0 @@ -# Database Layer Migration Best Practices - -This document captures the key learnings from migrating Hydra from GORM to SQLC, providing guidance for future database layer migrations. - -## Migration Strategy - -### 1. Incremental Migration with Panic Protection - -**Problem**: Ensuring complete migration without missing any operations. - -**Solution**: Use panic-driven migration validation: - -```go -// GORM implementation - add panic to force migration -func (s *gormStore) CreateWorkflow(ctx context.Context, workflow *WorkflowExecution) error { - panic("CreateWorkflow has been migrated to SQLC - use engine.GetSQLCStore().CreateWorkflow() instead") -} -``` - -**Benefits**: -- Forces immediate identification of unmigrated code paths -- Prevents partial migrations -- Provides clear migration instructions - -### 2. Dual Store Architecture During Migration - -**Problem**: Maintaining system availability during migration. - -**Solution**: Run both stores temporarily: - -```go -type Engine struct { - store store.Store // GORM store - sqlc store.Store // SQLC store for migration -} -``` - -**Benefits**: -- Allows gradual migration -- Enables A/B testing of implementations -- Provides rollback capability - -### 3. Operation-by-Operation Migration - -**Problem**: Managing complexity of large-scale migrations. - -**Solution**: Migrate operations in logical groups: - -1. **Core Operations**: CreateWorkflow, GetWorkflow, etc. -2. **Step Operations**: CreateStep, UpdateStepStatus, etc. -3. **Worker Coordination**: AcquireWorkflowLease, HeartbeatLease, etc. -4. **Advanced Features**: Cron jobs, cleanup operations, etc. -5. **Testing Helpers**: GetAllWorkflows, GetAllSteps, etc. - -## Code Quality Patterns - -### 1. Type Conversion Helpers - -Create reusable conversion functions: - -```go -// Helper functions for converting from nullable types -func nullInt64ToPtr(n sql.NullInt64) *int64 { - if !n.Valid { - return nil - } - return &n.Int64 -} - -func nullStringToPtr(n sql.NullString) *string { - if !n.Valid { - return nil - } - return &n.String -} -``` - -### 2. Consistent Error Handling - -```go -func (s *sqlcStore) GetWorkflow(ctx context.Context, namespace, id string) (*WorkflowExecution, error) { - workflow, err := s.queries.GetWorkflow(ctx, sqlcstore.GetWorkflowParams{ - ID: id, - Namespace: namespace, - }) - if err != nil { - if err == sql.ErrNoRows { - return nil, errors.New("workflow not found") - } - return nil, err - } - // ... conversion logic -} -``` - -### 3. Transaction Support - -Leverage SQLC's built-in transaction support: - -```go -func (s *sqlcStore) WithTx(ctx context.Context, fn func(Store) error) error { - tx, err := s.db.BeginTx(ctx, nil) - if err != nil { - return err - } - defer tx.Rollback() - - txQueries := s.queries.WithTx(tx) - txStore := &sqlcStore{ - db: s.db, - queries: txQueries, - clock: s.clock, - } - - if err := fn(txStore); err != nil { - return err - } - return tx.Commit() -} -``` - -## Testing Strategy - -### 1. Migration Validation Tests - -- **Architectural constraints**: Prevent GORM dependencies from returning -- **Interface completeness**: Ensure all methods are implemented -- **Type conversion validation**: Verify model mappings are correct - -### 2. Operation Coverage Tests - -- **End-to-end testing**: Test complete workflows through the new store -- **Error path testing**: Verify error handling and edge cases -- **Performance testing**: Ensure new implementation meets performance requirements - -### 3. Integration Tests - -- **Cross-operation testing**: Verify operations work together correctly -- **Concurrency testing**: Test multiple workers and operations -- **Failure scenario testing**: Test behavior under database failures - -## Performance Considerations - -### 1. Query Optimization - -- Use SQLC's type-safe queries for optimal performance -- Leverage database indexes for frequently queried fields -- Use prepared statements through SQLC for repeated operations - -### 2. Connection Management - -- Share database connections between SQLC and other components -- Configure appropriate connection pool settings -- Use context for query timeouts and cancellation - -### 3. Type Conversions - -- Minimize allocations in conversion functions -- Use nullable types appropriately to avoid unnecessary conversions -- Cache conversion results where appropriate - -## Common Pitfalls - -### 1. Incomplete Migration - -**Problem**: Missing some usage sites during migration. - -**Prevention**: -- Use panic-driven validation -- Comprehensive grep/search for old patterns -- Architectural tests to prevent regression - -### 2. Type Conversion Errors - -**Problem**: Incorrect mapping between SQLC and domain models. - -**Prevention**: -- Create comprehensive type conversion tests -- Use consistent conversion patterns -- Validate all nullable field handling - -### 3. Transaction Boundary Issues - -**Problem**: Incorrect transaction usage with SQLC. - -**Prevention**: -- Understand SQLC's transaction patterns -- Test transaction rollback scenarios -- Use appropriate isolation levels - -## Migration Checklist - -### Pre-Migration -- [ ] Analyze current database usage patterns -- [ ] Design new schema and SQLC queries -- [ ] Create comprehensive test suite -- [ ] Plan migration phases - -### During Migration -- [ ] Implement panic protection in old code -- [ ] Migrate operations in logical groups -- [ ] Test each phase thoroughly -- [ ] Monitor system performance - -### Post-Migration -- [ ] Remove old implementation code -- [ ] Clean up unused dependencies -- [ ] Update documentation -- [ ] Add architectural constraint tests - -### Final Verification -- [ ] Run full test suite -- [ ] Performance testing -- [ ] Security review -- [ ] Documentation updates - -## Lessons Learned - -1. **Panic-driven migration** is highly effective for ensuring completeness -2. **Incremental migration** reduces risk and allows for validation at each step -3. **Type conversion helpers** reduce code duplication and errors -4. **Comprehensive testing** is crucial for migration confidence -5. **Architectural tests** prevent regression over time -6. **Documentation updates** are essential for team knowledge transfer - -This migration strategy successfully migrated a complex workflow orchestration engine with zero downtime and improved performance characteristics. \ No newline at end of file diff --git a/go/pkg/hydra/MIGRATION_PROGRESS.md b/go/pkg/hydra/MIGRATION_PROGRESS.md deleted file mode 100644 index 6cc0df3f76..0000000000 --- a/go/pkg/hydra/MIGRATION_PROGRESS.md +++ /dev/null @@ -1,273 +0,0 @@ -# Hydra GORM to SQLC Migration Progress - -This document tracks the progress of migrating the Hydra workflow orchestration engine from GORM to SQLC for better performance and security. - -## Project Overview - -**Objective**: Replace GORM ORM with SQLC for type-safe, performant database operations while maintaining MySQL-only support. - -**Approach**: Incremental migration using a dual store architecture to enable zero-downtime migration and constant testing. - -## Completed Work - -### 1. Security Analysis ✅ -- **File**: `hydra-security-analysis.md` -- **Scope**: Comprehensive security audit of the Hydra package -- **Key Findings**: - - Missing authentication/authorization controls - - DoS vulnerabilities from unlimited payload sizes - - Race conditions in lease-based coordination - - Potential information disclosure through error messages - - SQL injection risks (mitigated by GORM, but SQLC provides better protection) - -### 2. Database Schema Design ✅ -- **File**: `store/schema.sql` -- **Technology**: MySQL DDL with security improvements -- **Key Changes**: - - Used `VARBINARY` instead of `LONGBLOB` for size limits: - - `input_data VARBINARY(10485760)` (10MB limit for workflow inputs) - - `output_data VARBINARY(1048576)` (1MB limit for workflow outputs) - - Implemented ENUMs for type safety: - - `workflow_status ENUM('pending', 'running', 'completed', 'failed', 'sleeping')` - - `step_status ENUM('pending', 'running', 'completed', 'failed')` - - `trigger_type ENUM('api', 'cron', 'webhook')` - - Ensured compatibility with existing GORM schema - -### 3. SQLC Configuration ✅ -- **File**: `store/sqlc.json` (JSON format) -- **File**: `store/generate.go` (go:generate directive) -- **Configuration**: - ```json - { - "version": "2", - "sql": [ - { - "schema": "./schema.sql", - "queries": "./queries.sql", - "engine": "mysql", - "gen": { - "go": { - "package": "sqlc", - "out": "./sqlc", - "emit_json_tags": true, - "emit_db_tags": true, - "emit_prepared_queries": false, - "emit_interface": false, - "emit_exact_table_names": false, - "emit_empty_slices": true, - "overrides": [ - { - "column": "workflow_executions.input_data", - "go_type": {"type": "[]byte"} - }, - { - "column": "workflow_executions.output_data", - "go_type": {"type": "[]byte"} - }, - { - "column": "workflow_steps.output_data", - "go_type": {"type": "[]byte"} - } - ] - } - } - } - ] - } - ``` - -### 4. Query Analysis & Documentation ✅ -- **File**: `store/sqlc-queries-analysis.md` -- **Scope**: Analysis of all 25 database operations that need migration -- **Categories**: - - Workflow Execution Operations (10 operations) - - Workflow Step Operations (4 operations) - - Lease Operations (6 operations) - - Cron Job Operations (5 operations) -- **Key Insights**: Identified performance bottlenecks and race condition risks in current GORM implementation - -### 5. Dual Store Architecture ✅ -- **File**: `dual_store.go` -- **Purpose**: Enable both GORM and SQLC to coexist during incremental migration -- **Implementation**: All 25 methods implemented with GORM delegation and TODO comments for SQLC -- **Structure**: - ```go - type dualStore struct { - gorm store.Store // GORM implementation (fallback) - sqlc *sqlcstore.Queries // SQLC implementation (migration target) - db *sql.DB // Underlying database for transactions - clock clock.Clock - } - ``` - -### 6. Engine Constructor Simplification ✅ -- **Files**: `engine.go`, `apps/ctrl/run.go`, `test_helpers.go`, `workflow_performance_test.go` -- **Change**: Simplified constructor to take only DSN instead of pre-created Store -- **Benefits**: - - **Before**: Manual store creation required by consumers - ```go - gormStore, err := gorm.NewMySQLStore(dsn, clock) - engine := hydra.New(hydra.Config{Store: gormStore, ...}) - ``` - - **After**: Automatic store creation from DSN - ```go - engine := hydra.New(hydra.Config{DSN: dsn, ...}) - ``` - -### 7. SQLC Store Implementation ✅ -- **File**: `store/sqlc_store.go` -- **Features**: - - Independent MySQL connection (no GORM dependency) - - `NewSQLCStoreFromDSN()` function for direct DSN-based creation - - Type-safe database operations using generated SQLC code - - Proper `[]byte` handling for VARBINARY fields - -### 8. Dual Store Engine Integration ✅ -- **Implementation**: Engine now creates both GORM and SQLC stores automatically -- **Architecture**: - ```go - type Engine struct { - store store.Store // Main store (GORM) - current operations - sqlc store.Store // SQLC store - migration target - // ... other fields - } - ``` -- **Access Methods**: - - `GetStore()`: Returns GORM store (current operations) - - `GetSQLCStore()`: Returns SQLC store (for migration testing) - -## Current State - -### What Works ✅ -1. **Builds Successfully**: All packages compile without errors -2. **Both Stores Created**: Engine automatically creates GORM and SQLC stores from DSN -3. **MySQL-Only Support**: Removed SQLite dependencies, MySQL-only as requested -4. **Test Infrastructure**: Updated all test helpers to use DSN-based constructor -5. **Security Improvements**: VARBINARY size limits prevent DoS attacks -6. **Type Safety**: ENUMs and SQLC type generation provide compile-time safety - -### Code Architecture -``` -Engine Constructor (New) -├── DSN Input -├── Creates GORM Store ──── Current Operations (100%) -├── Creates SQLC Store ──── Future Migration Target (0% migrated) -└── Both Accessible via Methods -``` - -## Next Steps (Not Yet Started) - -### Immediate Next Tasks -1. **Generate SQLC Code**: - ```bash - cd store && go generate - ``` - -2. **Implement First SQLC Query**: Start with simple read operation like `GetWorkflow` - - Implement SQLC version in `store/sqlc_store.go` - - Add feature flag or method to switch between stores - - Write comparison tests to ensure identical behavior - -3. **Add Migration Testing**: - - Create tests that call both GORM and SQLC versions - - Verify identical results and performance characteristics - - Establish benchmarks for comparison - -### Migration Strategy -1. **Phase 1**: Read Operations (safest to start) - - `GetWorkflow` - - `GetStep` - - `GetLease` - - `GetCronJob` - -2. **Phase 2**: Simple Write Operations - - `CreateWorkflow` - - `CreateStep` - - `UpdateWorkflowStatus` - -3. **Phase 3**: Complex Operations with Transactions - - `AcquireWorkflowLease` (race condition sensitive) - - `WithTx` (transaction support) - -4. **Phase 4**: Complete Migration - - Switch default store from GORM to SQLC - - Remove GORM dependency - - Performance validation - -## Technical Decisions Made - -### Database Technology -- **Choice**: MySQL only (no SQLite support) -- **Rationale**: Simplified maintenance, production focus - -### Schema Improvements -- **VARBINARY vs LONGBLOB**: Size limits prevent DoS attacks -- **ENUMs vs VARCHAR**: Type safety and storage efficiency -- **Constraints**: Proper foreign keys and indexes for performance - -### Architecture Patterns -- **Dual Store**: Enables incremental migration with zero downtime -- **DSN-Based Constructor**: Simplifies consumer code -- **Interface Consistency**: Both stores implement same `store.Store` interface - -### Security Enhancements -- **Payload Size Limits**: 10MB workflow inputs, 1MB outputs -- **Type Safety**: SQLC generates type-safe Go code -- **SQL Injection Prevention**: SQLC uses prepared statements exclusively - -## Files Modified/Created - -### Core Implementation -- `store/schema.sql` - MySQL DDL schema -- `store/sqlc.json` - SQLC configuration -- `store/generate.go` - Code generation directive -- `store/sqlc_store.go` - SQLC store implementation -- `dual_store.go` - Dual store wrapper -- `engine.go` - Engine constructor with dual store support - -### Documentation -- `hydra-security-analysis.md` - Security audit results -- `store/sqlc-queries-analysis.md` - Database operations analysis -- `MIGRATION_PROGRESS.md` - This file - -### Test & Build Updates -- `test_helpers.go` - DSN-based test engine creation -- `workflow_performance_test.go` - MySQL container for benchmarks -- `go/apps/ctrl/run.go` - Updated to use DSN constructor - -## Performance & Security Improvements Expected - -### Performance Benefits -- **Prepared Statements**: SQLC generates optimized prepared statements -- **Reduced Allocations**: Direct struct mapping without reflection -- **Better Query Plans**: Hand-tuned SQL vs ORM-generated queries -- **Connection Pooling**: Direct control over database/sql connection pool - -### Security Benefits -- **SQL Injection Prevention**: Prepared statements only, no dynamic SQL -- **Type Safety**: Compile-time verification of SQL queries and parameters -- **Size Limits**: VARBINARY constraints prevent unbounded data growth -- **Input Validation**: Strong typing prevents parameter type confusion - -## Key Learnings - -1. **Incremental Migration**: Dual store approach allows safe, testable migration -2. **MySQL-Only Focus**: Simplified architecture and better performance optimization -3. **Security-First**: Size limits and type safety prevent common vulnerabilities -4. **Test Infrastructure**: DSN-based constructors simplify test setup -5. **SQLC Benefits**: Type safety and performance without sacrificing SQL control - -## Migration Metrics (To Be Tracked) - -When migration begins, track: -- **Coverage**: X/25 operations migrated to SQLC -- **Performance**: Query execution time comparisons -- **Memory**: Allocation differences between GORM and SQLC -- **Test Coverage**: Ensure identical behavior verification -- **Error Rates**: Monitor for any behavioral differences - ---- - -**Status**: Foundation Complete - Ready for incremental SQLC migration -**Last Updated**: 2025-07-03 -**Next Milestone**: Generate SQLC code and implement first query migration \ No newline at end of file