diff --git a/.github/workflows/job_bazel.yaml b/.github/workflows/job_bazel.yaml index cf8df657da..1d4ccd118b 100644 --- a/.github/workflows/job_bazel.yaml +++ b/.github/workflows/job_bazel.yaml @@ -33,6 +33,6 @@ jobs: # Running containers is temporary until we moved them inside of bazel, # at that point they are only created if they are actually needed - name: Start containers - run: docker compose -f ./dev/docker-compose.yaml up s3 clickhouse kafka mysql vault -d --wait + run: docker compose -f ./dev/docker-compose.yaml up s3 clickhouse mysql vault -d --wait - name: Run tests run: bazel test //... --test_output=errors diff --git a/MODULE.bazel b/MODULE.bazel index 70fa10b9d7..5a4c4978e1 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -39,6 +39,7 @@ use_repo( "com_github_go_sql_driver_mysql", "com_github_google_go_containerregistry", "com_github_google_go_containerregistry_pkg_authn_k8schain", + "com_github_hashicorp_memberlist", "com_github_maypok86_otter", "com_github_moby_buildkit", "com_github_oapi_codegen_nullable", @@ -50,7 +51,6 @@ use_repo( "com_github_prometheus_client_golang", "com_github_redis_go_redis_v9", "com_github_restatedev_sdk_go", - "com_github_segmentio_kafka_go", "com_github_shirou_gopsutil_v4", "com_github_spiffe_go_spiffe_v2", "com_github_sqlc_dev_plugin_sdk_go", diff --git a/Makefile b/Makefile index 65db120cc8..4708b5e8d3 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,7 @@ pull: ## Pull latest Docker images for services .PHONY: up up: pull ## Start all infrastructure services - @docker compose -f ./dev/docker-compose.yaml up -d planetscale mysql redis clickhouse s3 otel kafka restate ctrl-api --wait + @docker compose -f ./dev/docker-compose.yaml up -d planetscale mysql redis clickhouse s3 otel restate ctrl-api --wait .PHONY: clean clean: ## Stop and remove all services with volumes @@ -85,13 +85,14 @@ generate: generate-sql ## Generate code from protobuf and other sources rm -rf ./gen || true rm ./pkg/db/*_generated.go || true go generate ./... + go run ./tools/exportoneof ./gen/proto bazel run //:gazelle go fmt ./... pnpm --dir=web fmt .PHONY: test test: ## Run tests with bazel - docker compose -f ./dev/docker-compose.yaml up -d mysql clickhouse s3 kafka vault --wait + docker compose -f ./dev/docker-compose.yaml up -d mysql clickhouse s3 vault --wait bazel test //... make clean-docker-test diff --git a/cmd/api/main.go b/cmd/api/main.go index 1c03d22dda..014d7bb09e 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -73,9 +73,21 @@ var Cmd = &cli.Command{ cli.String("vault-token", "Bearer token for vault service authentication", cli.EnvVar("UNKEY_VAULT_TOKEN")), - // Kafka Configuration - cli.StringSlice("kafka-brokers", "Comma-separated list of Kafka broker addresses for distributed cache invalidation", - cli.EnvVar("UNKEY_KAFKA_BROKERS")), + // Gossip Cluster Configuration + cli.Bool("gossip-enabled", "Enable gossip-based distributed cache invalidation", + cli.Default(false), cli.EnvVar("UNKEY_GOSSIP_ENABLED")), + cli.String("gossip-bind-addr", "Address for gossip listeners. Default: 0.0.0.0", + cli.Default("0.0.0.0"), cli.EnvVar("UNKEY_GOSSIP_BIND_ADDR")), + cli.Int("gossip-lan-port", "LAN memberlist port. Default: 7946", + cli.Default(7946), cli.EnvVar("UNKEY_GOSSIP_LAN_PORT")), + cli.Int("gossip-wan-port", "WAN memberlist port for bridges. Default: 7947", + cli.Default(7947), cli.EnvVar("UNKEY_GOSSIP_WAN_PORT")), + cli.StringSlice("gossip-lan-seeds", "LAN seed addresses (e.g. k8s headless service DNS)", + cli.EnvVar("UNKEY_GOSSIP_LAN_SEEDS")), + cli.StringSlice("gossip-wan-seeds", "Cross-region bridge seed addresses", + cli.EnvVar("UNKEY_GOSSIP_WAN_SEEDS")), + cli.String("gossip-secret-key", "Base64-encoded AES-256 key for encrypting gossip traffic", + cli.EnvVar("UNKEY_GOSSIP_SECRET_KEY")), // ClickHouse Proxy Service Configuration cli.String( @@ -142,10 +154,9 @@ func action(ctx context.Context, cmd *cli.Command) error { config := api.Config{ // Basic configuration - CacheInvalidationTopic: "", - Platform: cmd.String("platform"), - Image: cmd.String("image"), - Region: cmd.String("region"), + Platform: cmd.String("platform"), + Image: cmd.String("image"), + Region: cmd.String("region"), // Database configuration DatabasePrimary: cmd.String("database-primary"), @@ -176,8 +187,14 @@ func action(ctx context.Context, cmd *cli.Command) error { VaultURL: cmd.String("vault-url"), VaultToken: cmd.String("vault-token"), - // Kafka configuration - KafkaBrokers: cmd.StringSlice("kafka-brokers"), + // Gossip cluster configuration + GossipEnabled: cmd.Bool("gossip-enabled"), + GossipBindAddr: cmd.String("gossip-bind-addr"), + GossipLANPort: cmd.Int("gossip-lan-port"), + GossipWANPort: cmd.Int("gossip-wan-port"), + GossipLANSeeds: cmd.StringSlice("gossip-lan-seeds"), + GossipWANSeeds: cmd.StringSlice("gossip-wan-seeds"), + GossipSecretKey: cmd.String("gossip-secret-key"), // ClickHouse proxy configuration ChproxyToken: cmd.String("chproxy-auth-token"), diff --git a/cmd/frontline/main.go b/cmd/frontline/main.go index 7023a1aeda..2d6577ef5e 100644 --- a/cmd/frontline/main.go +++ b/cmd/frontline/main.go @@ -75,6 +75,22 @@ var Cmd = &cli.Command{ cli.String("ctrl-addr", "Address of the control plane", cli.Default("localhost:8080"), cli.EnvVar("UNKEY_CTRL_ADDR")), + // Gossip Cluster Configuration + cli.Bool("gossip-enabled", "Enable gossip-based distributed cache invalidation", + cli.Default(false), cli.EnvVar("UNKEY_GOSSIP_ENABLED")), + cli.String("gossip-bind-addr", "Address for gossip listeners. Default: 0.0.0.0", + cli.Default("0.0.0.0"), cli.EnvVar("UNKEY_GOSSIP_BIND_ADDR")), + cli.Int("gossip-lan-port", "LAN memberlist port. Default: 7946", + cli.Default(7946), cli.EnvVar("UNKEY_GOSSIP_LAN_PORT")), + cli.Int("gossip-wan-port", "WAN memberlist port for bridges. Default: 7947", + cli.Default(7947), cli.EnvVar("UNKEY_GOSSIP_WAN_PORT")), + cli.StringSlice("gossip-lan-seeds", "LAN seed addresses (e.g. k8s headless service DNS)", + cli.EnvVar("UNKEY_GOSSIP_LAN_SEEDS")), + cli.StringSlice("gossip-wan-seeds", "Cross-region bridge seed addresses", + cli.EnvVar("UNKEY_GOSSIP_WAN_SEEDS")), + cli.String("gossip-secret-key", "Base64-encoded AES-256 key for encrypting gossip traffic", + cli.EnvVar("UNKEY_GOSSIP_SECRET_KEY")), + // Logging Sampler Configuration cli.Float("log-sample-rate", "Baseline probability (0.0-1.0) of emitting log events. Default: 1.0", cli.Default(1.0), cli.EnvVar("UNKEY_LOG_SAMPLE_RATE")), @@ -118,6 +134,15 @@ func action(ctx context.Context, cmd *cli.Command) error { VaultURL: cmd.String("vault-url"), VaultToken: cmd.String("vault-token"), + // Gossip cluster configuration + GossipEnabled: cmd.Bool("gossip-enabled"), + GossipBindAddr: cmd.String("gossip-bind-addr"), + GossipLANPort: cmd.Int("gossip-lan-port"), + GossipWANPort: cmd.Int("gossip-wan-port"), + GossipLANSeeds: cmd.StringSlice("gossip-lan-seeds"), + GossipWANSeeds: cmd.StringSlice("gossip-wan-seeds"), + GossipSecretKey: cmd.String("gossip-secret-key"), + // Logging sampler configuration LogSampleRate: cmd.Float("log-sample-rate"), LogSlowThreshold: cmd.Duration("log-slow-threshold"), diff --git a/cmd/sentinel/main.go b/cmd/sentinel/main.go index db7341b6a2..38a9ef8d20 100644 --- a/cmd/sentinel/main.go +++ b/cmd/sentinel/main.go @@ -53,6 +53,19 @@ var Cmd = &cli.Command{ cli.Default(0.25), cli.EnvVar("UNKEY_OTEL_TRACE_SAMPLING_RATE")), cli.Int("prometheus-port", "Enable Prometheus /metrics endpoint on specified port. Set to 0 to disable.", cli.EnvVar("UNKEY_PROMETHEUS_PORT")), + // Gossip Cluster Configuration + cli.Bool("gossip-enabled", "Enable gossip-based distributed cache invalidation", + cli.Default(false), cli.EnvVar("UNKEY_GOSSIP_ENABLED")), + cli.String("gossip-bind-addr", "Address for gossip listeners. Default: 0.0.0.0", + cli.Default("0.0.0.0"), cli.EnvVar("UNKEY_GOSSIP_BIND_ADDR")), + cli.Int("gossip-lan-port", "LAN memberlist port. Default: 7946", + cli.Default(7946), cli.EnvVar("UNKEY_GOSSIP_LAN_PORT")), + cli.Int("gossip-wan-port", "WAN memberlist port for bridges. Default: 7947", + cli.Default(7947), cli.EnvVar("UNKEY_GOSSIP_WAN_PORT")), + cli.StringSlice("gossip-lan-seeds", "LAN seed addresses (e.g. k8s headless service DNS)", + cli.EnvVar("UNKEY_GOSSIP_LAN_SEEDS")), + cli.StringSlice("gossip-wan-seeds", "Cross-region bridge seed addresses", + cli.EnvVar("UNKEY_GOSSIP_WAN_SEEDS")), // Logging Sampler Configuration cli.Float("log-sample-rate", "Baseline probability (0.0-1.0) of emitting log events. Default: 1.0", cli.Default(1.0), cli.EnvVar("UNKEY_LOG_SAMPLE_RATE")), @@ -83,6 +96,14 @@ func action(ctx context.Context, cmd *cli.Command) error { OtelTraceSamplingRate: cmd.Float("otel-trace-sampling-rate"), PrometheusPort: cmd.Int("prometheus-port"), + // Gossip cluster configuration + GossipEnabled: cmd.Bool("gossip-enabled"), + GossipBindAddr: cmd.String("gossip-bind-addr"), + GossipLANPort: cmd.Int("gossip-lan-port"), + GossipWANPort: cmd.Int("gossip-wan-port"), + GossipLANSeeds: cmd.StringSlice("gossip-lan-seeds"), + GossipWANSeeds: cmd.StringSlice("gossip-wan-seeds"), + // Logging sampler configuration LogSampleRate: cmd.Float("log-sample-rate"), LogSlowThreshold: cmd.Duration("log-slow-threshold"), diff --git a/dev/Tiltfile b/dev/Tiltfile index 9c6d7f70f5..75126804f6 100644 --- a/dev/Tiltfile +++ b/dev/Tiltfile @@ -180,6 +180,8 @@ docker_build_with_restart( live_update=[sync('./bin/unkey', '/unkey')] ) + + # Vault service k8s_yaml('k8s/manifests/vault.yaml') k8s_resource( diff --git a/dev/docker-compose.yaml b/dev/docker-compose.yaml index 92ab4b1836..33faad041f 100644 --- a/dev/docker-compose.yaml +++ b/dev/docker-compose.yaml @@ -76,8 +76,6 @@ services: condition: service_healthy clickhouse: condition: service_healthy - kafka: - condition: service_started ctrl-api: condition: service_started environment: @@ -111,13 +109,6 @@ services: start_period: 10s interval: 5s - # The Kafka broker, available at localhost:9092 - kafka: - container_name: kafka - image: bufbuild/bufstream:0.4.4 - network_mode: host - command: ["serve", "--inmemory"] - # Vault service for encryption and key management vault: networks: @@ -438,7 +429,6 @@ volumes: clickhouse: clickhouse-keeper: s3: - kafka_data: networks: default: diff --git a/dev/k8s/manifests/api.yaml b/dev/k8s/manifests/api.yaml index f179e7989a..30f5e8db33 100644 --- a/dev/k8s/manifests/api.yaml +++ b/dev/k8s/manifests/api.yaml @@ -23,6 +23,12 @@ spec: imagePullPolicy: Never # Use local images ports: - containerPort: 7070 + - containerPort: 7946 + name: gossip-lan + protocol: TCP + - containerPort: 7946 + name: gossip-lan-udp + protocol: UDP env: # Server Configuration - name: UNKEY_HTTP_PORT @@ -38,8 +44,6 @@ spec: value: "unkey:local" - name: UNKEY_REGION value: "local" - - name: UNKEY_INSTANCE_ID - value: "api-dev" # Database Configuration - name: UNKEY_DATABASE_PRIMARY value: "unkey:password@tcp(mysql:3306)/unkey?parseTime=true&interpolateParams=true" @@ -71,6 +75,13 @@ spec: # Request Body Configuration - name: UNKEY_MAX_REQUEST_BODY_SIZE value: "10485760" + # Gossip Configuration + - name: UNKEY_GOSSIP_ENABLED + value: "true" + - name: UNKEY_GOSSIP_LAN_PORT + value: "7946" + - name: UNKEY_GOSSIP_LAN_SEEDS + value: "api-gossip-lan" readinessProbe: httpGet: path: /health/ready @@ -129,3 +140,23 @@ spec: targetPort: 7070 protocol: TCP type: LoadBalancer + +--- +apiVersion: v1 +kind: Service +metadata: + name: api-gossip-lan + namespace: unkey +spec: + clusterIP: None + selector: + app: api + ports: + - name: gossip-lan + port: 7946 + targetPort: 7946 + protocol: TCP + - name: gossip-lan-udp + port: 7946 + targetPort: 7946 + protocol: UDP diff --git a/dev/k8s/manifests/cilium-policies.yaml b/dev/k8s/manifests/cilium-policies.yaml index ae986d5c5c..5657fd7b92 100644 --- a/dev/k8s/manifests/cilium-policies.yaml +++ b/dev/k8s/manifests/cilium-policies.yaml @@ -7,6 +7,48 @@ # a CiliumNetworkPolicy in the customer namespace, Cilium automatically enables # default deny for the selected endpoints. We don't need an explicit deny-all policy. --- +# 1. Allow gossip traffic between API pods +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: api-gossip-lan + namespace: unkey +spec: + endpointSelector: + matchLabels: + app: api + ingress: + - fromEndpoints: + - matchLabels: + app: api + toPorts: + - ports: + - port: "7946" + protocol: TCP + - port: "7946" + protocol: UDP +--- +# 1b. Allow gossip traffic between Frontline pods +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: frontline-gossip-lan + namespace: unkey +spec: + endpointSelector: + matchLabels: + app: frontline + ingress: + - fromEndpoints: + - matchLabels: + app: frontline + toPorts: + - ports: + - port: "7946" + protocol: TCP + - port: "7946" + protocol: UDP +--- # 2. Block K8s API server access from customer pods # Prevents customer workloads from accessing the Kubernetes API apiVersion: cilium.io/v2 @@ -102,6 +144,17 @@ spec: - ports: - port: "53" protocol: ANY + # Gossip between sentinel pods + - toEndpoints: + - matchLabels: + io.kubernetes.pod.namespace: sentinel + app.kubernetes.io/component: sentinel + toPorts: + - ports: + - port: "7946" + protocol: TCP + - port: "7946" + protocol: UDP # MySQL in unkey namespace - toEndpoints: - matchLabels: diff --git a/dev/k8s/manifests/frontline.yaml b/dev/k8s/manifests/frontline.yaml index c8f27b8ccb..0fa0651b85 100644 --- a/dev/k8s/manifests/frontline.yaml +++ b/dev/k8s/manifests/frontline.yaml @@ -26,6 +26,12 @@ spec: name: http - containerPort: 7443 name: https + - containerPort: 7946 + name: gossip-lan + protocol: TCP + - containerPort: 7946 + name: gossip-lan-udp + protocol: UDP env: - name: UNKEY_HTTP_PORT value: "7070" @@ -51,6 +57,13 @@ spec: value: "vault-test-token-123" - name: UNKEY_OTEL value: "false" + # Gossip Configuration + - name: UNKEY_GOSSIP_ENABLED + value: "true" + - name: UNKEY_GOSSIP_LAN_PORT + value: "7946" + - name: UNKEY_GOSSIP_LAN_SEEDS + value: "frontline-gossip-lan" volumeMounts: - name: tls-certs mountPath: /certs @@ -97,3 +110,23 @@ spec: port: 443 targetPort: 7443 type: LoadBalancer + +--- +apiVersion: v1 +kind: Service +metadata: + name: frontline-gossip-lan + namespace: unkey +spec: + clusterIP: None + selector: + app: frontline + ports: + - name: gossip-lan + port: 7946 + targetPort: 7946 + protocol: TCP + - name: gossip-lan-udp + port: 7946 + targetPort: 7946 + protocol: UDP diff --git a/gen/proto/cache/v1/BUILD.bazel b/gen/proto/cache/v1/BUILD.bazel index f7326eaeea..5b906ab214 100644 --- a/gen/proto/cache/v1/BUILD.bazel +++ b/gen/proto/cache/v1/BUILD.bazel @@ -2,7 +2,10 @@ load("@rules_go//go:def.bzl", "go_library") go_library( name = "cache", - srcs = ["invalidation.pb.go"], + srcs = [ + "invalidation.pb.go", + "oneof_interfaces.go", + ], importpath = "github.com/unkeyed/unkey/gen/proto/cache/v1", visibility = ["//visibility:public"], deps = [ diff --git a/gen/proto/cache/v1/invalidation.pb.go b/gen/proto/cache/v1/invalidation.pb.go index 513fa08838..5183017d55 100644 --- a/gen/proto/cache/v1/invalidation.pb.go +++ b/gen/proto/cache/v1/invalidation.pb.go @@ -26,14 +26,17 @@ type CacheInvalidationEvent struct { state protoimpl.MessageState `protogen:"open.v1"` // The name/identifier of the cache to invalidate CacheName string `protobuf:"bytes,1,opt,name=cache_name,json=cacheName,proto3" json:"cache_name,omitempty"` - // The cache key to invalidate - CacheKey string `protobuf:"bytes,2,opt,name=cache_key,json=cacheKey,proto3" json:"cache_key,omitempty"` // Unix millisecond timestamp when the invalidation was triggered Timestamp int64 `protobuf:"varint,3,opt,name=timestamp,proto3" json:"timestamp,omitempty"` // Optional: The node that triggered the invalidation (to avoid self-invalidation) SourceInstance string `protobuf:"bytes,4,opt,name=source_instance,json=sourceInstance,proto3" json:"source_instance,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // Types that are valid to be assigned to Action: + // + // *CacheInvalidationEvent_CacheKey + // *CacheInvalidationEvent_ClearAll + Action isCacheInvalidationEvent_Action `protobuf_oneof:"action"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *CacheInvalidationEvent) Reset() { @@ -73,13 +76,6 @@ func (x *CacheInvalidationEvent) GetCacheName() string { return "" } -func (x *CacheInvalidationEvent) GetCacheKey() string { - if x != nil { - return x.CacheKey - } - return "" -} - func (x *CacheInvalidationEvent) GetTimestamp() int64 { if x != nil { return x.Timestamp @@ -94,17 +90,62 @@ func (x *CacheInvalidationEvent) GetSourceInstance() string { return "" } +func (x *CacheInvalidationEvent) GetAction() isCacheInvalidationEvent_Action { + if x != nil { + return x.Action + } + return nil +} + +func (x *CacheInvalidationEvent) GetCacheKey() string { + if x != nil { + if x, ok := x.Action.(*CacheInvalidationEvent_CacheKey); ok { + return x.CacheKey + } + } + return "" +} + +func (x *CacheInvalidationEvent) GetClearAll() bool { + if x != nil { + if x, ok := x.Action.(*CacheInvalidationEvent_ClearAll); ok { + return x.ClearAll + } + } + return false +} + +type isCacheInvalidationEvent_Action interface { + isCacheInvalidationEvent_Action() +} + +type CacheInvalidationEvent_CacheKey struct { + // Invalidate a specific cache key + CacheKey string `protobuf:"bytes,2,opt,name=cache_key,json=cacheKey,proto3,oneof"` +} + +type CacheInvalidationEvent_ClearAll struct { + // Clear the entire cache + ClearAll bool `protobuf:"varint,5,opt,name=clear_all,json=clearAll,proto3,oneof"` +} + +func (*CacheInvalidationEvent_CacheKey) isCacheInvalidationEvent_Action() {} + +func (*CacheInvalidationEvent_ClearAll) isCacheInvalidationEvent_Action() {} + var File_cache_v1_invalidation_proto protoreflect.FileDescriptor const file_cache_v1_invalidation_proto_rawDesc = "" + "\n" + - "\x1bcache/v1/invalidation.proto\x12\bcache.v1\"\x9b\x01\n" + + "\x1bcache/v1/invalidation.proto\x12\bcache.v1\"\xc6\x01\n" + "\x16CacheInvalidationEvent\x12\x1d\n" + "\n" + - "cache_name\x18\x01 \x01(\tR\tcacheName\x12\x1b\n" + - "\tcache_key\x18\x02 \x01(\tR\bcacheKey\x12\x1c\n" + + "cache_name\x18\x01 \x01(\tR\tcacheName\x12\x1c\n" + "\ttimestamp\x18\x03 \x01(\x03R\ttimestamp\x12'\n" + - "\x0fsource_instance\x18\x04 \x01(\tR\x0esourceInstanceB\x97\x01\n" + + "\x0fsource_instance\x18\x04 \x01(\tR\x0esourceInstance\x12\x1d\n" + + "\tcache_key\x18\x02 \x01(\tH\x00R\bcacheKey\x12\x1d\n" + + "\tclear_all\x18\x05 \x01(\bH\x00R\bclearAllB\b\n" + + "\x06actionB\x97\x01\n" + "\fcom.cache.v1B\x11InvalidationProtoP\x01Z3github.com/unkeyed/unkey/gen/proto/cache/v1;cachev1\xa2\x02\x03CXX\xaa\x02\bCache.V1\xca\x02\bCache\\V1\xe2\x02\x14Cache\\V1\\GPBMetadata\xea\x02\tCache::V1b\x06proto3" var ( @@ -136,6 +177,10 @@ func file_cache_v1_invalidation_proto_init() { if File_cache_v1_invalidation_proto != nil { return } + file_cache_v1_invalidation_proto_msgTypes[0].OneofWrappers = []any{ + (*CacheInvalidationEvent_CacheKey)(nil), + (*CacheInvalidationEvent_ClearAll)(nil), + } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/gen/proto/cache/v1/oneof_interfaces.go b/gen/proto/cache/v1/oneof_interfaces.go new file mode 100644 index 0000000000..2e46f4576e --- /dev/null +++ b/gen/proto/cache/v1/oneof_interfaces.go @@ -0,0 +1,6 @@ +// Code generated by tools/exportoneof. DO NOT EDIT. + +package cachev1 + +// IsCacheInvalidationEvent_Action is the exported form of the protobuf oneof interface isCacheInvalidationEvent_Action. +type IsCacheInvalidationEvent_Action = isCacheInvalidationEvent_Action diff --git a/gen/proto/cluster/v1/BUILD.bazel b/gen/proto/cluster/v1/BUILD.bazel new file mode 100644 index 0000000000..5083f5a5c0 --- /dev/null +++ b/gen/proto/cluster/v1/BUILD.bazel @@ -0,0 +1,16 @@ +load("@rules_go//go:def.bzl", "go_library") + +go_library( + name = "cluster", + srcs = [ + "envelope.pb.go", + "oneof_interfaces.go", + ], + importpath = "github.com/unkeyed/unkey/gen/proto/cluster/v1", + visibility = ["//visibility:public"], + deps = [ + "//gen/proto/cache/v1:cache", + "@org_golang_google_protobuf//reflect/protoreflect", + "@org_golang_google_protobuf//runtime/protoimpl", + ], +) diff --git a/gen/proto/cluster/v1/envelope.pb.go b/gen/proto/cluster/v1/envelope.pb.go new file mode 100644 index 0000000000..f404a24e11 --- /dev/null +++ b/gen/proto/cluster/v1/envelope.pb.go @@ -0,0 +1,257 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.8 +// protoc (unknown) +// source: cluster/v1/envelope.proto + +package clusterv1 + +import ( + v1 "github.com/unkeyed/unkey/gen/proto/cache/v1" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Direction int32 + +const ( + Direction_DIRECTION_UNSPECIFIED Direction = 0 + Direction_DIRECTION_LAN Direction = 1 + Direction_DIRECTION_WAN Direction = 2 +) + +// Enum value maps for Direction. +var ( + Direction_name = map[int32]string{ + 0: "DIRECTION_UNSPECIFIED", + 1: "DIRECTION_LAN", + 2: "DIRECTION_WAN", + } + Direction_value = map[string]int32{ + "DIRECTION_UNSPECIFIED": 0, + "DIRECTION_LAN": 1, + "DIRECTION_WAN": 2, + } +) + +func (x Direction) Enum() *Direction { + p := new(Direction) + *p = x + return p +} + +func (x Direction) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Direction) Descriptor() protoreflect.EnumDescriptor { + return file_cluster_v1_envelope_proto_enumTypes[0].Descriptor() +} + +func (Direction) Type() protoreflect.EnumType { + return &file_cluster_v1_envelope_proto_enumTypes[0] +} + +func (x Direction) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Direction.Descriptor instead. +func (Direction) EnumDescriptor() ([]byte, []int) { + return file_cluster_v1_envelope_proto_rawDescGZIP(), []int{0} +} + +// ClusterMessage is the envelope for all gossip broadcast messages. +// The oneof field routes the payload to the correct handler via MessageMux. +type ClusterMessage struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Which pool this message was sent on (LAN or WAN). + Direction Direction `protobuf:"varint,1,opt,name=direction,proto3,enum=cluster.v1.Direction" json:"direction,omitempty"` + // The region of the node that originated this message. + SourceRegion string `protobuf:"bytes,2,opt,name=source_region,json=sourceRegion,proto3" json:"source_region,omitempty"` + // The node ID that originated this message. + SenderNode string `protobuf:"bytes,3,opt,name=sender_node,json=senderNode,proto3" json:"sender_node,omitempty"` + // Unix millisecond timestamp when the message was created. + // Used to measure transport latency on the receiving end. + SentAtMs int64 `protobuf:"varint,4,opt,name=sent_at_ms,json=sentAtMs,proto3" json:"sent_at_ms,omitempty"` + // Types that are valid to be assigned to Payload: + // + // *ClusterMessage_CacheInvalidation + Payload isClusterMessage_Payload `protobuf_oneof:"payload"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ClusterMessage) Reset() { + *x = ClusterMessage{} + mi := &file_cluster_v1_envelope_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ClusterMessage) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ClusterMessage) ProtoMessage() {} + +func (x *ClusterMessage) ProtoReflect() protoreflect.Message { + mi := &file_cluster_v1_envelope_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ClusterMessage.ProtoReflect.Descriptor instead. +func (*ClusterMessage) Descriptor() ([]byte, []int) { + return file_cluster_v1_envelope_proto_rawDescGZIP(), []int{0} +} + +func (x *ClusterMessage) GetDirection() Direction { + if x != nil { + return x.Direction + } + return Direction_DIRECTION_UNSPECIFIED +} + +func (x *ClusterMessage) GetSourceRegion() string { + if x != nil { + return x.SourceRegion + } + return "" +} + +func (x *ClusterMessage) GetSenderNode() string { + if x != nil { + return x.SenderNode + } + return "" +} + +func (x *ClusterMessage) GetSentAtMs() int64 { + if x != nil { + return x.SentAtMs + } + return 0 +} + +func (x *ClusterMessage) GetPayload() isClusterMessage_Payload { + if x != nil { + return x.Payload + } + return nil +} + +func (x *ClusterMessage) GetCacheInvalidation() *v1.CacheInvalidationEvent { + if x != nil { + if x, ok := x.Payload.(*ClusterMessage_CacheInvalidation); ok { + return x.CacheInvalidation + } + } + return nil +} + +type isClusterMessage_Payload interface { + isClusterMessage_Payload() +} + +type ClusterMessage_CacheInvalidation struct { + CacheInvalidation *v1.CacheInvalidationEvent `protobuf:"bytes,5,opt,name=cache_invalidation,json=cacheInvalidation,proto3,oneof"` // next payload type = 6 +} + +func (*ClusterMessage_CacheInvalidation) isClusterMessage_Payload() {} + +var File_cluster_v1_envelope_proto protoreflect.FileDescriptor + +const file_cluster_v1_envelope_proto_rawDesc = "" + + "\n" + + "\x19cluster/v1/envelope.proto\x12\n" + + "cluster.v1\x1a\x1bcache/v1/invalidation.proto\"\x87\x02\n" + + "\x0eClusterMessage\x123\n" + + "\tdirection\x18\x01 \x01(\x0e2\x15.cluster.v1.DirectionR\tdirection\x12#\n" + + "\rsource_region\x18\x02 \x01(\tR\fsourceRegion\x12\x1f\n" + + "\vsender_node\x18\x03 \x01(\tR\n" + + "senderNode\x12\x1c\n" + + "\n" + + "sent_at_ms\x18\x04 \x01(\x03R\bsentAtMs\x12Q\n" + + "\x12cache_invalidation\x18\x05 \x01(\v2 .cache.v1.CacheInvalidationEventH\x00R\x11cacheInvalidationB\t\n" + + "\apayload*L\n" + + "\tDirection\x12\x19\n" + + "\x15DIRECTION_UNSPECIFIED\x10\x00\x12\x11\n" + + "\rDIRECTION_LAN\x10\x01\x12\x11\n" + + "\rDIRECTION_WAN\x10\x02B\xa1\x01\n" + + "\x0ecom.cluster.v1B\rEnvelopeProtoP\x01Z7github.com/unkeyed/unkey/gen/proto/cluster/v1;clusterv1\xa2\x02\x03CXX\xaa\x02\n" + + "Cluster.V1\xca\x02\n" + + "Cluster\\V1\xe2\x02\x16Cluster\\V1\\GPBMetadata\xea\x02\vCluster::V1b\x06proto3" + +var ( + file_cluster_v1_envelope_proto_rawDescOnce sync.Once + file_cluster_v1_envelope_proto_rawDescData []byte +) + +func file_cluster_v1_envelope_proto_rawDescGZIP() []byte { + file_cluster_v1_envelope_proto_rawDescOnce.Do(func() { + file_cluster_v1_envelope_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_cluster_v1_envelope_proto_rawDesc), len(file_cluster_v1_envelope_proto_rawDesc))) + }) + return file_cluster_v1_envelope_proto_rawDescData +} + +var file_cluster_v1_envelope_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_cluster_v1_envelope_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_cluster_v1_envelope_proto_goTypes = []any{ + (Direction)(0), // 0: cluster.v1.Direction + (*ClusterMessage)(nil), // 1: cluster.v1.ClusterMessage + (*v1.CacheInvalidationEvent)(nil), // 2: cache.v1.CacheInvalidationEvent +} +var file_cluster_v1_envelope_proto_depIdxs = []int32{ + 0, // 0: cluster.v1.ClusterMessage.direction:type_name -> cluster.v1.Direction + 2, // 1: cluster.v1.ClusterMessage.cache_invalidation:type_name -> cache.v1.CacheInvalidationEvent + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_cluster_v1_envelope_proto_init() } +func file_cluster_v1_envelope_proto_init() { + if File_cluster_v1_envelope_proto != nil { + return + } + file_cluster_v1_envelope_proto_msgTypes[0].OneofWrappers = []any{ + (*ClusterMessage_CacheInvalidation)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_cluster_v1_envelope_proto_rawDesc), len(file_cluster_v1_envelope_proto_rawDesc)), + NumEnums: 1, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_cluster_v1_envelope_proto_goTypes, + DependencyIndexes: file_cluster_v1_envelope_proto_depIdxs, + EnumInfos: file_cluster_v1_envelope_proto_enumTypes, + MessageInfos: file_cluster_v1_envelope_proto_msgTypes, + }.Build() + File_cluster_v1_envelope_proto = out.File + file_cluster_v1_envelope_proto_goTypes = nil + file_cluster_v1_envelope_proto_depIdxs = nil +} diff --git a/gen/proto/cluster/v1/oneof_interfaces.go b/gen/proto/cluster/v1/oneof_interfaces.go new file mode 100644 index 0000000000..f1ca341a30 --- /dev/null +++ b/gen/proto/cluster/v1/oneof_interfaces.go @@ -0,0 +1,6 @@ +// Code generated by tools/exportoneof. DO NOT EDIT. + +package clusterv1 + +// IsClusterMessage_Payload is the exported form of the protobuf oneof interface isClusterMessage_Payload. +type IsClusterMessage_Payload = isClusterMessage_Payload diff --git a/gen/proto/ctrl/v1/BUILD.bazel b/gen/proto/ctrl/v1/BUILD.bazel index bceaeacfcb..0b134f9e4a 100644 --- a/gen/proto/ctrl/v1/BUILD.bazel +++ b/gen/proto/ctrl/v1/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "custom_domain.pb.go", "deployment.pb.go", "environment.pb.go", + "oneof_interfaces.go", "openapi.pb.go", "secrets.pb.go", "service.pb.go", diff --git a/gen/proto/ctrl/v1/oneof_interfaces.go b/gen/proto/ctrl/v1/oneof_interfaces.go new file mode 100644 index 0000000000..10894fb398 --- /dev/null +++ b/gen/proto/ctrl/v1/oneof_interfaces.go @@ -0,0 +1,15 @@ +// Code generated by tools/exportoneof. DO NOT EDIT. + +package ctrlv1 + +// IsCiliumNetworkPolicyState_State is the exported form of the protobuf oneof interface isCiliumNetworkPolicyState_State. +type IsCiliumNetworkPolicyState_State = isCiliumNetworkPolicyState_State + +// IsReportDeploymentStatusRequest_Change is the exported form of the protobuf oneof interface isReportDeploymentStatusRequest_Change. +type IsReportDeploymentStatusRequest_Change = isReportDeploymentStatusRequest_Change + +// IsSentinelState_State is the exported form of the protobuf oneof interface isSentinelState_State. +type IsSentinelState_State = isSentinelState_State + +// IsDeploymentState_State is the exported form of the protobuf oneof interface isDeploymentState_State. +type IsDeploymentState_State = isDeploymentState_State diff --git a/gen/proto/hydra/v1/BUILD.bazel b/gen/proto/hydra/v1/BUILD.bazel index 7dfde2a9d9..86f8ffef2c 100644 --- a/gen/proto/hydra/v1/BUILD.bazel +++ b/gen/proto/hydra/v1/BUILD.bazel @@ -15,6 +15,7 @@ go_library( "deployment_restate.pb.go", "key_refill.pb.go", "key_refill_restate.pb.go", + "oneof_interfaces.go", "quota_check.pb.go", "quota_check_restate.pb.go", "routing.pb.go", diff --git a/gen/proto/hydra/v1/oneof_interfaces.go b/gen/proto/hydra/v1/oneof_interfaces.go new file mode 100644 index 0000000000..d0fdf32a8c --- /dev/null +++ b/gen/proto/hydra/v1/oneof_interfaces.go @@ -0,0 +1,6 @@ +// Code generated by tools/exportoneof. DO NOT EDIT. + +package hydrav1 + +// IsDeployRequest_Source is the exported form of the protobuf oneof interface isDeployRequest_Source. +type IsDeployRequest_Source = isDeployRequest_Source diff --git a/go.mod b/go.mod index 2bc234b055..a9096eb9a1 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/google/go-containerregistry v0.20.7 github.com/google/go-containerregistry/pkg/authn/k8schain v0.0.0-20260114192324-795787c558e1 github.com/gordonklaus/ineffassign v0.2.0 + github.com/hashicorp/memberlist v0.5.4 github.com/kisielk/errcheck v1.9.0 github.com/maypok86/otter v1.2.4 github.com/moby/buildkit v0.26.3 @@ -65,7 +66,6 @@ require ( github.com/prometheus/client_golang v1.23.2 github.com/redis/go-redis/v9 v9.17.2 github.com/restatedev/sdk-go v0.23.0 - github.com/segmentio/kafka-go v0.4.50 github.com/shirou/gopsutil/v4 v4.25.6 github.com/spiffe/go-spiffe/v2 v2.6.0 github.com/sqlc-dev/plugin-sdk-go v1.23.0 @@ -132,6 +132,7 @@ require ( github.com/TwiN/go-color v1.4.1 // indirect github.com/andybalholm/brotli v1.2.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.1 // indirect + github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect @@ -235,6 +236,7 @@ require ( github.com/golang-jwt/jwt/v5 v5.3.0 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/protobuf v1.5.4 // indirect + github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.27.0 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect @@ -242,7 +244,14 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect + github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/go-metrics v0.5.4 // indirect + github.com/hashicorp/go-msgpack/v2 v2.1.5 // indirect + github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/go-sockaddr v1.0.7 // indirect + github.com/hashicorp/golang-lru v0.5.0 // indirect github.com/in-toto/in-toto-golang v0.9.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/invopop/jsonschema v0.13.0 // indirect @@ -314,6 +323,7 @@ require ( github.com/sagikazarmark/locafero v0.11.0 // indirect github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect github.com/sasha-s/go-deadlock v0.3.5 // indirect + github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect github.com/secure-systems-lab/go-securesystemslib v0.9.1 // indirect github.com/segmentio/asm v1.2.1 // indirect github.com/segmentio/encoding v0.5.3 // indirect diff --git a/go.sum b/go.sum index 3c2b30818f..fc048e6ae7 100644 --- a/go.sum +++ b/go.sum @@ -32,6 +32,7 @@ buf.build/go/standard v0.1.0 h1:g98T9IyvAl0vS3Pq8iVk6Cvj2ZiFvoUJRtfyGa0120U= buf.build/go/standard v0.1.0/go.mod h1:PiqpHz/7ZFq+kqvYhc/SK3lxFIB9N/aiH2CFC2JHIQg= cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= @@ -85,6 +86,7 @@ github.com/ClickHouse/ch-go v0.69.0 h1:nO0OJkpxOlN/eaXFj0KzjTz5p7vwP1/y3GN4qc5z/ github.com/ClickHouse/ch-go v0.69.0/go.mod h1:9XeZpSAT4S0kVjOpaJ5186b7PY/NH/hhF8R6u0WIjwg= github.com/ClickHouse/clickhouse-go/v2 v2.42.0 h1:MdujEfIrpXesQUH0k0AnuVtJQXk6RZmxEhsKUCcv5xk= github.com/ClickHouse/clickhouse-go/v2 v2.42.0/go.mod h1:riWnuo4YMVdajYll0q6FzRBomdyCrXyFY3VXeXczA8s= +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= @@ -93,12 +95,19 @@ github.com/Microsoft/hcsshim v0.14.0-rc.1 h1:qAPXKwGOkVn8LlqgBN8GS0bxZ83hOJpcjxz github.com/Microsoft/hcsshim v0.14.0-rc.1/go.mod h1:hTKFGbnDtQb1wHiOWv4v0eN+7boSWAHyK/tNAaYZL0c= github.com/TwiN/go-color v1.4.1 h1:mqG0P/KBgHKVqmtL5ye7K0/Gr4l6hTksPgTgMk3mUzc= github.com/TwiN/go-color v1.4.1/go.mod h1:WcPf/jtiW95WBIsEeY1Lc/b8aaWoiqQpu5cf8WFxu+s= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 h1:aM1rlcoLz8y5B2r4tTLMiVTrMtpfY0O8EScKJxaSaEc= github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092/go.mod h1:rYqSE9HbjzpHTI74vwPvae4ZVYZd1lue2ta6xHPdblA= github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU= @@ -152,6 +161,8 @@ github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xW github.com/basgys/goxml2json v1.1.1-0.20231018121955-e66ee54ceaad h1:3swAvbzgfaI6nKuDDU7BiKfZRdF+h2ZwKgMHd8Ha4t8= github.com/basgys/goxml2json v1.1.1-0.20231018121955-e66ee54ceaad/go.mod h1:9+nBLYNWkvPcq9ep0owWUsPTLgL9ZXTsZWcCSVGGLJ0= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bitly/go-simplejson v0.5.1 h1:xgwPbetQScXt1gh9BmoJ6j9JMr3TElvuIyjR8pgdoow= @@ -176,6 +187,7 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589 h1:krfRl01rzPzxSxyLyrChD+U+MzsBXbm0OwYYB67uF+4= @@ -192,6 +204,8 @@ github.com/cilium/statedb v0.4.6 h1:pundFmW0Dhinsv0ZINdFsxzlb6d3ZQkQM7aJW9eMtD8= github.com/cilium/statedb v0.4.6/go.mod h1:DlxX9OQi/nM8oumUuz8VjxXUtVRiEfbfo8Ri1YWNCGI= github.com/cilium/stream v0.0.1 h1:82zuM/WwkLiac2Jg5FrzPxZHvIBbxXTi4VY7M+EYLs0= github.com/cilium/stream v0.0.1/go.mod h1:/e83AwqvNKpyg4n3C41qmnmj1x2G9DwzI+jb7GkF4lI= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/cli/browser v1.3.0 h1:LejqCrpWr+1pRqmEPDGnTZOjsMe7sehifLynZJuqJpo= github.com/cli/browser v1.3.0/go.mod h1:HH8s+fOAxjhQoBUAsKuPCbqUuxZDhQ2/aD+SzsEfBTk= github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y+bSQAYZnetRJ70VMVKm5CKI0= @@ -306,6 +320,12 @@ github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AY github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -364,6 +384,7 @@ github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6 h1:teYtXy9B7y5 github.com/go-quicktest/qt v1.101.1-0.20240301121107-c6c8733fa1e6/go.mod h1:p4lGIVX+8Wa6ZPNDvqcxq36XpUDLh42FLetFU7odllI= github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= @@ -372,6 +393,7 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= @@ -384,16 +406,30 @@ github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArs github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/cel-go v0.27.0 h1:e7ih85+4qVrBuqQWTW4FKSqZYokVuc3HnhH5keboFTo= github.com/google/cel-go v0.27.0/go.mod h1:tTJ11FWqnhw5KKpnWpvW9CJC3Y9GK4EIS0WXnBbebzw= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -414,8 +450,30 @@ github.com/gordonklaus/ineffassign v0.2.0 h1:Uths4KnmwxNJNzq87fwQQDDnbNb7De00VOk github.com/gordonklaus/ineffassign v0.2.0/go.mod h1:TIpymnagPSexySzs7F9FnO1XFTy8IT3a59vmZp5Y9Lw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= +github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-metrics v0.5.4 h1:8mmPiIJkTPPEbAiV97IxdAGNdRdaWwVap1BU6elejKY= +github.com/hashicorp/go-metrics v0.5.4/go.mod h1:CG5yz4NZ/AI/aQt9Ucm/vdBnbh7fvmv4lxZ350i+QQI= +github.com/hashicorp/go-msgpack/v2 v2.1.5 h1:Ue879bPnutj/hXfmUk6s/jtIK90XxgiUIcXRl656T44= +github.com/hashicorp/go-msgpack/v2 v2.1.5/go.mod h1:bjCsRXpZ7NsJdk45PoCQnzRGDaK8TKm5ZnDI/9y3J4M= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-sockaddr v1.0.7 h1:G+pTkSO01HpR5qCxg7lxfsFEZaG+C0VssTy/9dbT+Fw= +github.com/hashicorp/go-sockaddr v1.0.7/go.mod h1:FZQbEYa1pxkQ7WLpyXJ6cbjpT8q0YgQaK/JakXqGyWw= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/memberlist v0.5.4 h1:40YY+3qq2tAUhZIMEK8kqusKZBBjdwJ3NUjvYkcxh74= +github.com/hashicorp/memberlist v0.5.4/go.mod h1:OgN6xiIo6RlHUWk+ALjP9e32xWCoQrsOCmHrWCm2MWA= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/in-toto/in-toto-golang v0.9.0 h1:tHny7ac4KgtsfrG6ybU8gVOZux2H8jN05AXJ9EBM1XU= github.com/in-toto/in-toto-golang v0.9.0/go.mod h1:xsBVrVsHNsB61++S6Dy2vWosKhuA3lUTQd+eF9HdeMo= @@ -439,8 +497,15 @@ github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.13-0.20220915233716-71ac16282d12 h1:9Nu54bhS/H/Kgo2/7xNSUuC5G28VR8ljfrLKU2G4IjU= github.com/json-iterator/go v1.1.13-0.20220915233716-71ac16282d12/go.mod h1:TBzl5BIHNXfS9+C35ZyJaklL7mLDbgUkcgXzSLa8Tk0= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/errcheck v1.9.0 h1:9xt1zI9EBfcYBvdU1nVrzMzzUPUtPKs9bVSIM3TAb3M= github.com/kisielk/errcheck v1.9.0/go.mod h1:kQxWMMVZgIkDq7U8xtG/n2juOjbLgZtedi0D+/VL/i8= @@ -450,6 +515,9 @@ github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+ github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -471,6 +539,7 @@ github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHP github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/maypok86/otter v1.2.4 h1:HhW1Pq6VdJkmWwcZZq19BlEQkHtI8xgsQzBVXJU0nfc= github.com/maypok86/otter v1.2.4/go.mod h1:mKLfoI7v1HOmQMwFgX4QkRk23mX6ge3RDvjdHOWG4R4= github.com/miekg/dns v1.1.69 h1:Kb7Y/1Jo+SG+a2GtfoFUfDkG//csdRPwRLkCsxDG9Sc= @@ -506,6 +575,8 @@ github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFL github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= @@ -518,6 +589,8 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/nishanths/exhaustive v0.12.0 h1:vIY9sALmw6T/yxiASewa4TQcFsVYZQQRUQJhKRf3Swg= @@ -557,6 +630,8 @@ github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplU github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU= +github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/paulmach/orb v0.12.0 h1:z+zOwjmG3MyEEqzv92UN49Lg1JFYx0L9GpGKNVDKk1s= github.com/paulmach/orb v0.12.0/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= @@ -590,6 +665,7 @@ github.com/pingcap/log v1.1.0 h1:ELiPxACz7vdo1qAvvaWJg1NrYFoY6gqAh/+Uo6aXdD8= github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/tidb/pkg/parser v0.0.0-20250806091815-327a22d5ebf8 h1:q/BiM/E7N9M7zWhTwyRbVVmU2XQ/1PrYuefr5Djni0g= github.com/pingcap/tidb/pkg/parser v0.0.0-20250806091815-327a22d5ebf8/go.mod h1:mpCcwRdMnmvNkBxcT4AqiE0yuvfJTdmCJs7cfznJw1w= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -600,12 +676,29 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc= github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= github.com/protocolbuffers/protoscope v0.0.0-20221109213918-8e7a6aafa2c9 h1:arwj11zP0yJIxIRiDn22E0H8PxfF7TsTrc2wIPFIsf4= @@ -638,6 +731,8 @@ github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEV github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/sasha-s/go-deadlock v0.3.5 h1:tNCOEEDG6tBqrNDOX35j/7hL5FcFViG6awUGROb2NsU= github.com/sasha-s/go-deadlock v0.3.5/go.mod h1:bugP6EGbdGYObIlx7pUZtWqlvo8k9H6vCBBsiChJQ5U= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= +github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y= github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= github.com/secure-systems-lab/go-securesystemslib v0.9.1 h1:nZZaNz4DiERIQguNy0cL5qTdn9lR8XKHf4RUyG1Sx3g= @@ -646,8 +741,6 @@ github.com/segmentio/asm v1.2.1 h1:DTNbBqs57ioxAD4PrArqftgypG4/qNpXoJx8TVXxPR0= github.com/segmentio/asm v1.2.1/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= github.com/segmentio/encoding v0.5.3 h1:OjMgICtcSFuNvQCdwqMCv9Tg7lEOXGwm1J5RPQccx6w= github.com/segmentio/encoding v0.5.3/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= -github.com/segmentio/kafka-go v0.4.50 h1:mcyC3tT5WeyWzrFbd6O374t+hmcu1NKt2Pu1L3QaXmc= -github.com/segmentio/kafka-go v0.4.50/go.mod h1:Y1gn60kzLEEaW28YshXyk2+VCUKbJ3Qr6DrnT3i4+9E= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= @@ -657,6 +750,9 @@ github.com/shirou/gopsutil/v4 v4.25.6 h1:kLysI2JsKorfaFPcYmcJqbzROzsBWEOAtw6A7dI github.com/shirou/gopsutil/v4 v4.25.6/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= @@ -687,10 +783,12 @@ github.com/sqlc-dev/plugin-sdk-go v1.23.0/go.mod h1:I1r4THOfyETD+LI2gogN2LX8wCjw github.com/sqlc-dev/sqlc v1.30.0 h1:H4HrNwPc0hntxGWzAbhlfplPRN4bQpXFx+CaEMcKz6c= github.com/sqlc-dev/sqlc v1.30.0/go.mod h1:QnEN+npugyhUg1A+1kkYM3jc2OMOFsNlZ1eh8mdhad0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= @@ -730,6 +828,7 @@ github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea h1:SXhTLE6pb6eld/ github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea/go.mod h1:WPnis/6cRcDZSUvVmezrxJPkiO87ThFYsoUiMwWNDJk= github.com/tonistiigi/vt100 v0.0.0-20240514184818-90bafcd6abab h1:H6aJ0yKQ0gF49Qb2z5hI1UHxSQt4JMyxebFR15KnApw= github.com/tonistiigi/vt100 v0.0.0-20240514184818-90bafcd6abab/go.mod h1:ulncasL3N9uLrVann0m+CDlJKWsIAP34MPcOJF6VRvc= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/unkeyed/sdks/api/go/v2 v2.6.0 h1:xJwxkst+vCyUODKF1OYiUtWGJ4rQZVZH3YRlDplKxi8= @@ -754,14 +853,9 @@ github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIj github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= -github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= -github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= -github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= -github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= -github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc= @@ -852,6 +946,7 @@ go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go= go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0= go4.org/netipx v0.0.0-20231129151722-fdeea329fbba h1:0b9z3AuHCjxk0x/opv64kcgZLBseWJUpBw5I82+2U4M= go4.org/netipx v0.0.0-20231129151722-fdeea329fbba/go.mod h1:PLyyIXexvUFg3Owu6p/WfdlivPbZJsZdgWZlrGope/Y= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -873,11 +968,16 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= @@ -887,25 +987,39 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -930,6 +1044,7 @@ golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0= golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= @@ -960,17 +1075,25 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -989,7 +1112,9 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20191026110619-0b21df46bc1d/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/services/caches/BUILD.bazel b/internal/services/caches/BUILD.bazel index e730a5079f..48559bc5d1 100644 --- a/internal/services/caches/BUILD.bazel +++ b/internal/services/caches/BUILD.bazel @@ -10,13 +10,11 @@ go_library( importpath = "github.com/unkeyed/unkey/internal/services/caches", visibility = ["//:__subpackages__"], deps = [ - "//gen/proto/cache/v1:cache", "//pkg/cache", "//pkg/cache/clustering", "//pkg/cache/middleware", "//pkg/clock", "//pkg/db", - "//pkg/eventstream", "//pkg/uid", ], ) diff --git a/internal/services/caches/caches.go b/internal/services/caches/caches.go index f69c4c2038..1aa1fdda71 100644 --- a/internal/services/caches/caches.go +++ b/internal/services/caches/caches.go @@ -5,13 +5,11 @@ import ( "os" "time" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" "github.com/unkeyed/unkey/pkg/cache" "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/cache/middleware" "github.com/unkeyed/unkey/pkg/clock" "github.com/unkeyed/unkey/pkg/db" - "github.com/unkeyed/unkey/pkg/eventstream" "github.com/unkeyed/unkey/pkg/uid" ) @@ -62,39 +60,35 @@ type Config struct { // Clock provides time functionality, allowing easier testing. Clock clock.Clock - // Topic for distributed cache invalidation - CacheInvalidationTopic *eventstream.Topic[*cachev1.CacheInvalidationEvent] + // Broadcaster for distributed cache invalidation via gossip. + // If nil, caches operate in local-only mode (no distributed invalidation). + Broadcaster clustering.Broadcaster // NodeID identifies this node in the cluster (defaults to hostname-uniqueid to ensure uniqueness) NodeID string } +// clusterOpts bundles the dispatcher and key converter functions needed for +// distributed cache invalidation. These are coupled because converters are only +// meaningful when clustering is enabled (i.e., when a dispatcher exists). +// Pass nil when clustering is disabled. +type clusterOpts[K comparable] struct { + dispatcher *clustering.InvalidationDispatcher + broadcaster clustering.Broadcaster + nodeID string + keyToString func(K) string + stringToKey func(string) (K, error) +} + // createCache creates a cache instance with optional clustering support. // // This is a generic helper function that: // 1. Creates a local cache with the provided configuration -// 2. If a CacheInvalidationTopic is provided, wraps it with clustering for distributed invalidation +// 2. If clustering opts are provided, wraps it with clustering for distributed invalidation // 3. Returns the cache (either local or clustered) -// -// Type parameters: -// - K: The key type (must be comparable) -// - V: The value type to be stored in the cache -// -// Parameters: -// - config: The main configuration containing clustering settings -// - cacheConfig: The specific cache configuration (freshness, staleness, size, etc.) -// - keyToString: Optional converter from key type to string for serialization -// - stringToKey: Optional converter from string to key type for deserialization -// -// Returns: -// - cache.Cache[K, V]: The initialized cache instance -// - error: An error if cache creation failed func createCache[K comparable, V any]( - config Config, - dispatcher *clustering.InvalidationDispatcher, cacheConfig cache.Config[K, V], - keyToString func(K) string, - stringToKey func(string) (K, error), + opts *clusterOpts[K], ) (cache.Cache[K, V], error) { // Create local cache localCache, err := cache.New(cacheConfig) @@ -105,7 +99,7 @@ func createCache[K comparable, V any]( // If no clustering is enabled, return the local cache directly. // This avoids the ClusterCache wrapper overhead when clustering isn't needed, // keeping cache operations (Get/Set/etc) as fast as possible on the hot path. - if dispatcher == nil { + if opts == nil { return localCache, nil } @@ -113,11 +107,11 @@ func createCache[K comparable, V any]( // The cluster cache will automatically register with the dispatcher clusterCache, err := clustering.New(clustering.Config[K, V]{ LocalCache: localCache, - Topic: config.CacheInvalidationTopic, - Dispatcher: dispatcher, - NodeID: config.NodeID, - KeyToString: keyToString, - StringToKey: stringToKey, + Broadcaster: opts.broadcaster, + Dispatcher: opts.dispatcher, + NodeID: opts.nodeID, + KeyToString: opts.keyToString, + StringToKey: opts.stringToKey, }) if err != nil { return nil, err @@ -130,32 +124,6 @@ func createCache[K comparable, V any]( // // It configures each cache with specific freshness/staleness windows, size limits, // resource names for tracing, and wraps them with distributed invalidation if configured. -// -// Parameters: -// - config: Configuration options including logger, clock, and optional topic for distributed invalidation. -// -// Returns: -// - Caches: A struct containing all initialized cache instances. -// - error: An error if any cache failed to initialize. -// -// All caches are thread-safe and can be accessed concurrently. If a CacheInvalidationTopic -// is provided, the caches will automatically handle distributed cache invalidation across -// cluster nodes when entries are modified. -// -// Example: -// -// clock := clock.RealClock{} -// -// caches, err := caches.New(caches.Config{ -// Clock: clock, -// CacheInvalidationTopic: topic, // optional for distributed invalidation -// }) -// if err != nil { -// log.Fatalf("Failed to initialize caches: %v", err) -// } -// -// // Use the caches - invalidation is automatic -// key, err := caches.KeyByHash.Get(ctx, "some-hash") func New(config Config) (Caches, error) { // Apply default NodeID if not provided // Format: hostname-uniqueid to ensure uniqueness across nodes @@ -168,23 +136,46 @@ func New(config Config) (Caches, error) { config.NodeID = fmt.Sprintf("%s-%s", hostname, uid.New("node")) } - // Create invalidation dispatcher if clustering is enabled. - // We intentionally leave dispatcher as nil when clustering is disabled to avoid - // wrapping caches with ClusterCache. This eliminates wrapper overhead on the hot path - // (cache Get/Set operations) when clustering isn't needed. + // Build clustering options if a broadcaster is configured. + // When nil, createCache returns unwrapped local caches (no clustering overhead). var dispatcher *clustering.InvalidationDispatcher - if config.CacheInvalidationTopic != nil { + var scopedKeyOpts *clusterOpts[cache.ScopedKey] + var stringKeyOpts *clusterOpts[string] + + if config.Broadcaster != nil { var err error - dispatcher, err = clustering.NewInvalidationDispatcher(config.CacheInvalidationTopic) + dispatcher, err = clustering.NewInvalidationDispatcher(config.Broadcaster) if err != nil { return Caches{}, err } + + scopedKeyOpts = &clusterOpts[cache.ScopedKey]{ + dispatcher: dispatcher, + broadcaster: config.Broadcaster, + nodeID: config.NodeID, + keyToString: cache.ScopedKeyToString, + stringToKey: cache.ScopedKeyFromString, + } + stringKeyOpts = &clusterOpts[string]{ + dispatcher: dispatcher, + broadcaster: config.Broadcaster, + nodeID: config.NodeID, + keyToString: nil, // defaults handle string keys + stringToKey: nil, + } + } + + // Ensure the dispatcher is closed if any subsequent cache creation fails. + initialized := false + if dispatcher != nil { + defer func() { + if !initialized { + _ = dispatcher.Close() + } + }() } - // Create ratelimit namespace cache (uses ScopedKey) ratelimitNamespace, err := createCache( - config, - dispatcher, cache.Config[cache.ScopedKey, db.FindRatelimitNamespace]{ Fresh: time.Minute, Stale: 24 * time.Hour, @@ -192,17 +183,13 @@ func New(config Config) (Caches, error) { Resource: "ratelimit_namespace", Clock: config.Clock, }, - cache.ScopedKeyToString, - cache.ScopedKeyFromString, + scopedKeyOpts, ) if err != nil { return Caches{}, err } - // Create verification key cache (uses string keys, no conversion needed) verificationKeyByHash, err := createCache( - config, - dispatcher, cache.Config[string, db.CachedKeyData]{ Fresh: 10 * time.Second, Stale: 10 * time.Minute, @@ -210,17 +197,13 @@ func New(config Config) (Caches, error) { Resource: "verification_key_by_hash", Clock: config.Clock, }, - nil, // String keys don't need custom converters - nil, + stringKeyOpts, ) if err != nil { return Caches{}, err } - // Create API cache (uses ScopedKey) liveApiByID, err := createCache( - config, - dispatcher, cache.Config[cache.ScopedKey, db.FindLiveApiByIDRow]{ Fresh: 10 * time.Second, Stale: 24 * time.Hour, @@ -228,16 +211,13 @@ func New(config Config) (Caches, error) { Resource: "live_api_by_id", Clock: config.Clock, }, - cache.ScopedKeyToString, - cache.ScopedKeyFromString, + scopedKeyOpts, ) if err != nil { return Caches{}, err } clickhouseSetting, err := createCache( - config, - dispatcher, cache.Config[string, db.FindClickhouseWorkspaceSettingsByWorkspaceIDRow]{ Fresh: time.Minute, Stale: 24 * time.Hour, @@ -245,17 +225,13 @@ func New(config Config) (Caches, error) { Resource: "clickhouse_setting", Clock: config.Clock, }, - nil, - nil, + stringKeyOpts, ) if err != nil { return Caches{}, err } - // Create key_auth_id -> api row cache keyAuthToApiRow, err := createCache( - config, - dispatcher, cache.Config[cache.ScopedKey, db.FindKeyAuthsByKeyAuthIdsRow]{ Fresh: 10 * time.Minute, Stale: 24 * time.Hour, @@ -263,17 +239,13 @@ func New(config Config) (Caches, error) { Resource: "key_auth_to_api_row", Clock: config.Clock, }, - cache.ScopedKeyToString, - cache.ScopedKeyFromString, + scopedKeyOpts, ) if err != nil { return Caches{}, err } - // Create api_id -> key_auth row cache apiToKeyAuthRow, err := createCache( - config, - dispatcher, cache.Config[cache.ScopedKey, db.FindKeyAuthsByIdsRow]{ Fresh: 10 * time.Minute, Stale: 24 * time.Hour, @@ -281,13 +253,13 @@ func New(config Config) (Caches, error) { Resource: "api_to_key_auth_row", Clock: config.Clock, }, - cache.ScopedKeyToString, - cache.ScopedKeyFromString, + scopedKeyOpts, ) if err != nil { return Caches{}, err } + initialized = true return Caches{ RatelimitNamespace: middleware.WithTracing(ratelimitNamespace), LiveApiByID: middleware.WithTracing(liveApiByID), diff --git a/pkg/cache/clustering/BUILD.bazel b/pkg/cache/clustering/BUILD.bazel index 424bdbde9a..180ce278b3 100644 --- a/pkg/cache/clustering/BUILD.bazel +++ b/pkg/cache/clustering/BUILD.bazel @@ -3,6 +3,9 @@ load("@rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "clustering", srcs = [ + "broadcaster.go", + "broadcaster_gossip.go", + "broadcaster_noop.go", "cluster_cache.go", "dispatcher.go", "noop.go", @@ -11,10 +14,11 @@ go_library( visibility = ["//visibility:public"], deps = [ "//gen/proto/cache/v1:cache", + "//gen/proto/cluster/v1:cluster", "//pkg/assert", "//pkg/batch", "//pkg/cache", - "//pkg/eventstream", + "//pkg/cluster", "//pkg/logger", ], ) @@ -22,19 +26,12 @@ go_library( go_test( name = "clustering_test", size = "small", - srcs = [ - "consume_events_test.go", - "e2e_test.go", - "produce_events_test.go", - ], + srcs = ["gossip_e2e_test.go"], deps = [ ":clustering", - "//gen/proto/cache/v1:cache", "//pkg/cache", "//pkg/clock", - "//pkg/eventstream", - "//pkg/testutil/containers", - "//pkg/uid", + "//pkg/cluster", "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/cache/clustering/broadcaster.go b/pkg/cache/clustering/broadcaster.go new file mode 100644 index 0000000000..21cd9c9dad --- /dev/null +++ b/pkg/cache/clustering/broadcaster.go @@ -0,0 +1,21 @@ +package clustering + +import ( + "context" + + cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" +) + +// Broadcaster defines the interface for broadcasting cache invalidation events +// across cluster nodes. Implementations handle serialization and transport. +type Broadcaster interface { + // Broadcast sends one or more cache invalidation events to other nodes. + Broadcast(ctx context.Context, events ...*cachev1.CacheInvalidationEvent) error + + // Subscribe sets the single handler for incoming invalidation events from other nodes. + // Calling Subscribe again replaces the previous handler. + Subscribe(ctx context.Context, handler func(context.Context, *cachev1.CacheInvalidationEvent) error) + + // Close shuts down the broadcaster and releases resources. + Close() error +} diff --git a/pkg/cache/clustering/broadcaster_gossip.go b/pkg/cache/clustering/broadcaster_gossip.go new file mode 100644 index 0000000000..9bfa827a37 --- /dev/null +++ b/pkg/cache/clustering/broadcaster_gossip.go @@ -0,0 +1,80 @@ +package clustering + +import ( + "context" + "sync" + "sync/atomic" + + cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + "github.com/unkeyed/unkey/pkg/cluster" + "github.com/unkeyed/unkey/pkg/logger" +) + +// invalidationHandler wraps the handler func so we can use atomic.Pointer +// (atomic.Pointer requires a named type, not a bare func signature). +type invalidationHandler struct { + fn func(context.Context, *cachev1.CacheInvalidationEvent) error +} + +// GossipBroadcaster implements Broadcaster using the gossip cluster for +// cache invalidation. It builds ClusterMessage envelopes with the oneof +// variant directly, avoiding double serialization. +type GossipBroadcaster struct { + cluster cluster.Cluster + handler atomic.Pointer[invalidationHandler] + + closeOnce sync.Once + closeErr error +} + +var _ Broadcaster = (*GossipBroadcaster)(nil) + +// NewGossipBroadcaster creates a new gossip-based broadcaster wired to the +// given cluster instance. +func NewGossipBroadcaster(c cluster.Cluster) *GossipBroadcaster { + return &GossipBroadcaster{ + cluster: c, + handler: atomic.Pointer[invalidationHandler]{}, + closeOnce: sync.Once{}, + closeErr: nil, + } +} + +// HandleCacheInvalidation is the typed handler for cache invalidation messages. +// Register it with cluster.Subscribe(mux, broadcaster.HandleCacheInvalidation). +func (b *GossipBroadcaster) HandleCacheInvalidation(ci *clusterv1.ClusterMessage_CacheInvalidation) { + if h := b.handler.Load(); h != nil { + if err := h.fn(context.Background(), ci.CacheInvalidation); err != nil { + logger.Error("Failed to handle gossip cache event", "error", err) + } + } +} + +// Broadcast serializes the events and sends them via the gossip cluster. +func (b *GossipBroadcaster) Broadcast(_ context.Context, events ...*cachev1.CacheInvalidationEvent) error { + for _, event := range events { + if err := b.cluster.Broadcast(&clusterv1.ClusterMessage_CacheInvalidation{ + CacheInvalidation: event, + }); err != nil { + logger.Error("Failed to broadcast cache invalidation", "error", err) + } + } + + return nil +} + +// Subscribe sets the single handler for incoming invalidation events. +// Calling Subscribe again replaces the previous handler. +func (b *GossipBroadcaster) Subscribe(_ context.Context, handler func(context.Context, *cachev1.CacheInvalidationEvent) error) { + b.handler.Store(&invalidationHandler{fn: handler}) +} + +// Close shuts down the underlying cluster. It is safe to call multiple times; +// only the first call closes the cluster, subsequent calls return the original result. +func (b *GossipBroadcaster) Close() error { + b.closeOnce.Do(func() { + b.closeErr = b.cluster.Close() + }) + return b.closeErr +} diff --git a/pkg/cache/clustering/broadcaster_noop.go b/pkg/cache/clustering/broadcaster_noop.go new file mode 100644 index 0000000000..363ebda2b3 --- /dev/null +++ b/pkg/cache/clustering/broadcaster_noop.go @@ -0,0 +1,29 @@ +package clustering + +import ( + "context" + + cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" +) + +// noopBroadcaster is a no-op implementation of Broadcaster. +// Used when clustering is disabled. +type noopBroadcaster struct{} + +var _ Broadcaster = (*noopBroadcaster)(nil) + +// NewNoopBroadcaster returns a Broadcaster that does nothing. +func NewNoopBroadcaster() Broadcaster { + return &noopBroadcaster{} +} + +func (b *noopBroadcaster) Broadcast(_ context.Context, _ ...*cachev1.CacheInvalidationEvent) error { + return nil +} + +func (b *noopBroadcaster) Subscribe(_ context.Context, _ func(context.Context, *cachev1.CacheInvalidationEvent) error) { +} + +func (b *noopBroadcaster) Close() error { + return nil +} diff --git a/pkg/cache/clustering/cluster_cache.go b/pkg/cache/clustering/cluster_cache.go index d6705a131a..427b05303c 100644 --- a/pkg/cache/clustering/cluster_cache.go +++ b/pkg/cache/clustering/cluster_cache.go @@ -9,23 +9,21 @@ import ( "github.com/unkeyed/unkey/pkg/assert" "github.com/unkeyed/unkey/pkg/batch" "github.com/unkeyed/unkey/pkg/cache" - "github.com/unkeyed/unkey/pkg/eventstream" "github.com/unkeyed/unkey/pkg/logger" ) // ClusterCache wraps a local cache and automatically handles distributed invalidation -// across cluster nodes using an event stream. +// across cluster nodes using a Broadcaster. type ClusterCache[K comparable, V any] struct { localCache cache.Cache[K, V] - topic *eventstream.Topic[*cachev1.CacheInvalidationEvent] - producer eventstream.Producer[*cachev1.CacheInvalidationEvent] + broadcaster Broadcaster cacheName string nodeID string keyToString func(K) string stringToKey func(string) (K, error) onInvalidation func(ctx context.Context, key K) - // Batch processor for broadcasting invalidation events + // batchProcessor batches and sends invalidation events to other nodes. batchProcessor *batch.BatchProcessor[*cachev1.CacheInvalidationEvent] } @@ -34,8 +32,8 @@ type Config[K comparable, V any] struct { // Local cache instance to wrap LocalCache cache.Cache[K, V] - // Topic for broadcasting invalidations - Topic *eventstream.Topic[*cachev1.CacheInvalidationEvent] + // Broadcaster for sending/receiving invalidations + Broadcaster Broadcaster // Dispatcher routes invalidation events to this cache // Required for receiving invalidations from other nodes @@ -58,7 +56,7 @@ type Config[K comparable, V any] struct { func New[K comparable, V any](config Config[K, V]) (*ClusterCache[K, V], error) { // Validate required config err := assert.All( - assert.NotNilAndNotZero(config.Topic, "Topic is required for ClusterCache"), + assert.NotNilAndNotZero(config.Broadcaster, "Broadcaster is required for ClusterCache"), assert.NotNilAndNotZero(config.Dispatcher, "Dispatcher is required for ClusterCache"), ) if err != nil { @@ -91,10 +89,9 @@ func New[K comparable, V any](config Config[K, V]) (*ClusterCache[K, V], error) } c := &ClusterCache[K, V]{ - producer: nil, - batchProcessor: nil, + broadcaster: config.Broadcaster, + batchProcessor: nil, // set below; Flush closure captures c localCache: config.LocalCache, - topic: config.Topic, cacheName: config.LocalCache.Name(), nodeID: config.NodeID, keyToString: keyToString, @@ -104,9 +101,6 @@ func New[K comparable, V any](config Config[K, V]) (*ClusterCache[K, V], error) }, } - // Create a reusable producer from the topic - c.producer = config.Topic.NewProducer() - // Create batch processor for broadcasting invalidations // This avoids creating a goroutine for every cache write c.batchProcessor = batch.New(batch.Config[*cachev1.CacheInvalidationEvent]{ @@ -117,7 +111,7 @@ func New[K comparable, V any](config Config[K, V]) (*ClusterCache[K, V], error) FlushInterval: 100 * time.Millisecond, Consumers: 2, Flush: func(ctx context.Context, events []*cachev1.CacheInvalidationEvent) { - err := c.producer.Produce(ctx, events...) + err := c.broadcaster.Broadcast(ctx, events...) if err != nil { logger.Error("Failed to broadcast cache invalidations", "error", err, @@ -143,8 +137,6 @@ func (c *ClusterCache[K, V]) GetMany(ctx context.Context, keys []K) (values map[ return c.localCache.GetMany(ctx, keys) } -// Set stores a value in the local cache and broadcasts an invalidation event -// to other nodes in the cluster // Set stores a value in the local cache without broadcasting. // This is used when populating the cache after a database read. // The stale/fresh timers handle cache expiration, so there's no need to @@ -226,9 +218,17 @@ func (c *ClusterCache[K, V]) Restore(ctx context.Context, data []byte) error { return c.localCache.Restore(ctx, data) } -// Clear removes all entries from the local cache +// Clear removes all entries from the local cache and broadcasts a clear-all +// event to other nodes so they also clear this cache. func (c *ClusterCache[K, V]) Clear(ctx context.Context) { c.localCache.Clear(ctx) + + c.batchProcessor.Buffer(&cachev1.CacheInvalidationEvent{ + CacheName: c.cacheName, + Action: &cachev1.CacheInvalidationEvent_ClearAll{ClearAll: true}, + Timestamp: time.Now().UnixMilli(), + SourceInstance: c.nodeID, + }) } // Name returns the name of this cache instance @@ -249,29 +249,34 @@ func (c *ClusterCache[K, V]) HandleInvalidation(ctx context.Context, event *cach return false } - // Convert string key back to K type - key, err := c.stringToKey(event.GetCacheKey()) - if err != nil { - logger.Warn( - "Failed to convert cache key", - "cache", c.cacheName, - "key", event.GetCacheKey(), - "error", err, - ) + switch event.Action.(type) { + case *cachev1.CacheInvalidationEvent_ClearAll: + c.localCache.Clear(ctx) + return true + + case *cachev1.CacheInvalidationEvent_CacheKey: + key, err := c.stringToKey(event.GetCacheKey()) + if err != nil { + logger.Warn( + "Failed to convert cache key", + "cache", c.cacheName, + "key", event.GetCacheKey(), + "error", err, + ) + return false + } + c.onInvalidation(ctx, key) + return true + default: + logger.Warn("Unknown cache invalidation action", "cache", c.cacheName) return false } - - // Call the invalidation handler - c.onInvalidation(ctx, key) - return true } // Close gracefully shuts down the cluster cache and flushes any pending invalidation events. func (c *ClusterCache[K, V]) Close() error { - if c.batchProcessor != nil { - c.batchProcessor.Close() - } + c.batchProcessor.Close() return nil } @@ -279,7 +284,7 @@ func (c *ClusterCache[K, V]) Close() error { // Events are batched and sent asynchronously via the batch processor to avoid // creating a goroutine for every cache write operation. func (c *ClusterCache[K, V]) broadcastInvalidation(ctx context.Context, keys ...K) { - if c.batchProcessor == nil || len(keys) == 0 { + if len(keys) == 0 { return } @@ -287,7 +292,7 @@ func (c *ClusterCache[K, V]) broadcastInvalidation(ctx context.Context, keys ... for _, key := range keys { c.batchProcessor.Buffer(&cachev1.CacheInvalidationEvent{ CacheName: c.cacheName, - CacheKey: c.keyToString(key), + Action: &cachev1.CacheInvalidationEvent_CacheKey{CacheKey: c.keyToString(key)}, Timestamp: time.Now().UnixMilli(), SourceInstance: c.nodeID, }) diff --git a/pkg/cache/clustering/consume_events_test.go b/pkg/cache/clustering/consume_events_test.go deleted file mode 100644 index fa94b48c97..0000000000 --- a/pkg/cache/clustering/consume_events_test.go +++ /dev/null @@ -1,115 +0,0 @@ -package clustering_test - -import ( - "context" - "fmt" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/cache" - "github.com/unkeyed/unkey/pkg/clock" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/uid" -) - -func TestClusterCache_ConsumesInvalidationAndRemovesFromCache(t *testing.T) { - - brokers := containers.Kafka(t) - - // Create unique topic and instance ID for this test run to ensure fresh consumer group - topicName := fmt.Sprintf("test-clustering-consume-%s", uid.New(uid.TestPrefix)) - - // Create eventstream topic - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: uid.New(uid.TestPrefix), - }) - require.NoError(t, err) - - err = topic.EnsureExists(1, 1) - require.NoError(t, err) - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully created in Kafka - ctx := context.Background() - waitCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err) - - // Create local cache and populate it - localCache, err := cache.New(cache.Config[string, string]{ - Fresh: 5 * time.Minute, - Stale: 10 * time.Minute, - MaxSize: 1000, - Resource: "test-cache", - Clock: clock.New(), - }) - require.NoError(t, err) - - // Populate cache with test data - localCache.Set(ctx, "key1", "value1") - localCache.Set(ctx, "key2", "value2") - - // Verify data is in cache - value1, hit1 := localCache.Get(ctx, "key1") - require.Equal(t, cache.Hit, hit1, "key1 should be in cache initially") - require.Equal(t, "value1", value1, "key1 should have correct value") - - value2, hit2 := localCache.Get(ctx, "key2") - require.Equal(t, cache.Hit, hit2, "key2 should be in cache initially") - require.Equal(t, "value2", value2, "key2 should have correct value") - - // Set up consumer that will remove data from cache when invalidation event is received - consumer := topic.NewConsumer() - defer func() { require.NoError(t, consumer.Close()) }() - - consumerCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - var invalidationProcessed atomic.Bool - - consumer.Consume(consumerCtx, func(ctx context.Context, event *cachev1.CacheInvalidationEvent) error { - // Simulate the cache invalidation logic that would be in the main application - if event.GetCacheName() == "test-cache" { - localCache.Remove(ctx, event.GetCacheKey()) - } - - invalidationProcessed.Store(true) - return nil - }) - - // Wait for consumer to be ready and actually positioned - time.Sleep(5 * time.Second) - - // Produce an invalidation event - producer := topic.NewProducer() - invalidationEvent := &cachev1.CacheInvalidationEvent{ - CacheName: "test-cache", - CacheKey: "key1", - Timestamp: time.Now().UnixMilli(), - SourceInstance: "other-node", - } - - err = producer.Produce(consumerCtx, invalidationEvent) - require.NoError(t, err, "Failed to produce invalidation event") - - // Wait for event to be processed - require.Eventually(t, func() bool { - return invalidationProcessed.Load() - }, 5*time.Second, 100*time.Millisecond, "Cache invalidation event should be consumed and processed within 5 seconds") - - // Verify key1 was removed from cache - _, hit1After := localCache.Get(ctx, "key1") - require.Equal(t, cache.Miss, hit1After, "key1 should be removed from cache after invalidation event") - - // Verify key2 is still in cache (wasn't invalidated) - value2After, hit2After := localCache.Get(ctx, "key2") - require.Equal(t, cache.Hit, hit2After, "key2 should remain in cache (not invalidated)") - require.Equal(t, "value2", value2After, "key2 should retain correct value") -} diff --git a/pkg/cache/clustering/dispatcher.go b/pkg/cache/clustering/dispatcher.go index 3c87be6ade..1b841b5496 100644 --- a/pkg/cache/clustering/dispatcher.go +++ b/pkg/cache/clustering/dispatcher.go @@ -6,7 +6,6 @@ import ( cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" "github.com/unkeyed/unkey/pkg/assert" - "github.com/unkeyed/unkey/pkg/eventstream" ) // InvalidationHandler is an interface that cluster caches implement @@ -16,38 +15,37 @@ type InvalidationHandler interface { Name() string } -// InvalidationDispatcher routes cache invalidation events from Kafka -// to the appropriate cache instances within a single process. +// InvalidationDispatcher routes cache invalidation events from the +// broadcaster to the appropriate cache instances within a single process. // // In a distributed system, each process (server) has one dispatcher -// that consumes invalidation events and routes them to all local caches +// that receives invalidation events and routes them to all local caches // based on the cache name in the event. type InvalidationDispatcher struct { - mu sync.RWMutex - handlers map[string]InvalidationHandler // keyed by cache name - consumer eventstream.Consumer[*cachev1.CacheInvalidationEvent] + mu sync.RWMutex + handlers map[string]InvalidationHandler // keyed by cache name + broadcaster Broadcaster } // NewInvalidationDispatcher creates a new dispatcher that routes invalidation // events to registered caches. // -// Returns an error if topic is nil - use NewNoopDispatcher() if clustering is disabled. -func NewInvalidationDispatcher(topic *eventstream.Topic[*cachev1.CacheInvalidationEvent]) (*InvalidationDispatcher, error) { +// Returns an error if broadcaster is nil - use NewNoopDispatcher() if clustering is disabled. +func NewInvalidationDispatcher(broadcaster Broadcaster) (*InvalidationDispatcher, error) { err := assert.All( - assert.NotNil(topic, "topic is required for InvalidationDispatcher - use NewNoopDispatcher() if clustering is disabled"), + assert.NotNil(broadcaster, "broadcaster is required for InvalidationDispatcher - use NewNoopDispatcher() if clustering is disabled"), ) if err != nil { return nil, err } d := &InvalidationDispatcher{ - mu: sync.RWMutex{}, - consumer: nil, - handlers: make(map[string]InvalidationHandler), + mu: sync.RWMutex{}, + handlers: make(map[string]InvalidationHandler), + broadcaster: broadcaster, } - d.consumer = topic.NewConsumer() - d.consumer.Consume(context.Background(), d.handleEvent) + broadcaster.Subscribe(context.Background(), d.handleEvent) return d, nil } @@ -78,8 +76,8 @@ func (d *InvalidationDispatcher) Register(handler InvalidationHandler) { // Close stops the dispatcher and cleans up resources. func (d *InvalidationDispatcher) Close() error { - if d.consumer != nil { - return d.consumer.Close() + if d.broadcaster != nil { + return d.broadcaster.Close() } return nil } diff --git a/pkg/cache/clustering/e2e_test.go b/pkg/cache/clustering/e2e_test.go deleted file mode 100644 index 9f9b1a21c0..0000000000 --- a/pkg/cache/clustering/e2e_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package clustering_test - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/cache" - "github.com/unkeyed/unkey/pkg/cache/clustering" - "github.com/unkeyed/unkey/pkg/clock" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/uid" -) - -func TestClusterCache_EndToEndDistributedInvalidation(t *testing.T) { - - brokers := containers.Kafka(t) - - // Create unique topic and instance ID for this test run to ensure fresh consumer group - topicName := fmt.Sprintf("test-clustering-e2e-%s", uid.New(uid.TestPrefix)) - - // Create eventstream topic with real logger for debugging - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: uid.New(uid.TestPrefix), - }) - require.NoError(t, err) - - err = topic.EnsureExists(1, 1) - require.NoError(t, err) - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully created in Kafka - waitCtx, waitCancel := context.WithTimeout(context.Background(), 10*time.Second) - defer waitCancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err) - - // Create dispatcher (one per process in production) - dispatcher, err := clustering.NewInvalidationDispatcher(topic) - require.NoError(t, err) - defer func() { require.NoError(t, dispatcher.Close()) }() - - // Wait for dispatcher's consumer to be ready - time.Sleep(5 * time.Second) - - // Create two cache instances (simulating two nodes) - createCache := func(nodeID string) (*clustering.ClusterCache[string, string], cache.Cache[string, string], error) { - var localCache cache.Cache[string, string] - localCache, err = cache.New(cache.Config[string, string]{ - Fresh: 5 * time.Minute, - Stale: 10 * time.Minute, - MaxSize: 1000, - Resource: "test-cache", - Clock: clock.New(), - }) - if err != nil { - return nil, nil, err - } - - var clusterCache *clustering.ClusterCache[string, string] - clusterCache, err = clustering.New(clustering.Config[string, string]{ - LocalCache: localCache, - Topic: topic, - Dispatcher: dispatcher, - NodeID: nodeID, - }) - if err != nil { - return nil, nil, err - } - - return clusterCache, localCache, nil - } - - // Create cache instances for two nodes - clusterCache1, localCache1, err := createCache("node-1") - require.NoError(t, err) - - clusterCache2, localCache2, err := createCache("node-2") - require.NoError(t, err) - - ctx := context.Background() - - // Populate both caches with the same data - clusterCache1.Set(ctx, "shared-key", "initial-value") - clusterCache2.Set(ctx, "shared-key", "initial-value") - - // Verify both caches have the data - value1, hit1 := localCache1.Get(ctx, "shared-key") - require.Equal(t, cache.Hit, hit1, "node-1 should have cached data initially") - require.Equal(t, "initial-value", value1, "node-1 should have correct initial value") - - value2, hit2 := localCache2.Get(ctx, "shared-key") - require.Equal(t, cache.Hit, hit2, "node-2 should have cached data initially") - require.Equal(t, "initial-value", value2, "node-2 should have correct initial value") - - // Node 1 removes the key (simulating a database deletion) - // This should invalidate Node 2's cache via dispatcher - t.Logf("Node 1 calling Remove() - should broadcast invalidation") - clusterCache1.Remove(ctx, "shared-key") - t.Logf("Node 1 Remove() returned") - - // Wait for invalidation to propagate through dispatcher - require.Eventually(t, func() bool { - _, hit := localCache2.Get(ctx, "shared-key") - return hit == cache.Miss - }, 10*time.Second, 100*time.Millisecond, "Node 2's cache should be invalidated within 10 seconds") - - // Verify Node 1 also has the key removed - _, hit1After := localCache1.Get(ctx, "shared-key") - require.Equal(t, cache.Miss, hit1After, "Node 1 should have removed the key") - - // Verify Node 2's cache was invalidated (already checked in Eventually above) - _, hit2After := localCache2.Get(ctx, "shared-key") - require.Equal(t, cache.Miss, hit2After, "Node 2's cache should be invalidated after receiving event from Node 1") -} diff --git a/pkg/cache/clustering/gossip_e2e_test.go b/pkg/cache/clustering/gossip_e2e_test.go new file mode 100644 index 0000000000..3232fa99d4 --- /dev/null +++ b/pkg/cache/clustering/gossip_e2e_test.go @@ -0,0 +1,141 @@ +package clustering_test + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/unkeyed/unkey/pkg/cache" + "github.com/unkeyed/unkey/pkg/cache/clustering" + "github.com/unkeyed/unkey/pkg/clock" + "github.com/unkeyed/unkey/pkg/cluster" +) + +// twoNodeCluster sets up a two-node gossip cluster with a ClusterCache on each node. +// Both caches share the same cache name ("test_cache") so invalidation events route correctly. +type twoNodeCluster struct { + Cache1 *clustering.ClusterCache[string, string] + Cache2 *clustering.ClusterCache[string, string] +} + +func setupTwoNodeCluster(t *testing.T) twoNodeCluster { + t.Helper() + clk := clock.New() + + // --- Node 1 --- + mux1 := cluster.NewMessageMux() + c1, err := cluster.New(cluster.Config{ + Region: "us-east-1", + NodeID: "node-1", + BindAddr: "127.0.0.1", + OnMessage: mux1.OnMessage, + }) + require.NoError(t, err) + b1 := clustering.NewGossipBroadcaster(c1) + cluster.Subscribe(mux1, b1.HandleCacheInvalidation) + + d1, err := clustering.NewInvalidationDispatcher(b1) + require.NoError(t, err) + + lc1, err := cache.New(cache.Config[string, string]{ + Fresh: time.Minute, Stale: time.Hour, MaxSize: 1000, + Resource: "test_cache", Clock: clk, + }) + require.NoError(t, err) + + cc1, err := clustering.New(clustering.Config[string, string]{ + LocalCache: lc1, Broadcaster: b1, Dispatcher: d1, NodeID: "node-1", + }) + require.NoError(t, err) + + // --- Node 2 --- + mux2 := cluster.NewMessageMux() + c1Addr := c1.Members()[0].FullAddress().Addr + time.Sleep(50 * time.Millisecond) + + c2, err := cluster.New(cluster.Config{ + Region: "us-east-1", + NodeID: "node-2", + BindAddr: "127.0.0.1", + LANSeeds: []string{c1Addr}, + OnMessage: mux2.OnMessage, + }) + require.NoError(t, err) + b2 := clustering.NewGossipBroadcaster(c2) + cluster.Subscribe(mux2, b2.HandleCacheInvalidation) + + d2, err := clustering.NewInvalidationDispatcher(b2) + require.NoError(t, err) + + lc2, err := cache.New(cache.Config[string, string]{ + Fresh: time.Minute, Stale: time.Hour, MaxSize: 1000, + Resource: "test_cache", Clock: clk, + }) + require.NoError(t, err) + + cc2, err := clustering.New(clustering.Config[string, string]{ + LocalCache: lc2, Broadcaster: b2, Dispatcher: d2, NodeID: "node-2", + }) + require.NoError(t, err) + + t.Cleanup(func() { + require.NoError(t, cc1.Close()) + require.NoError(t, cc2.Close()) + require.NoError(t, c2.Close()) + require.NoError(t, c1.Close()) + }) + + // Wait for cluster to form + require.Eventually(t, func() bool { + return len(c1.Members()) == 2 && len(c2.Members()) == 2 + }, 5*time.Second, 100*time.Millisecond, "nodes should discover each other") + + return twoNodeCluster{Cache1: cc1, Cache2: cc2} +} + +func TestGossipCacheInvalidation_Remove(t *testing.T) { + ctx := context.Background() + tc := setupTwoNodeCluster(t) + + t.Run("remove propagates to peer", func(t *testing.T) { + // Set a value on node 2 + tc.Cache2.Set(ctx, "test-key", "test-value") + val, hit := tc.Cache2.Get(ctx, "test-key") + require.Equal(t, cache.Hit, hit) + require.Equal(t, "test-value", val) + + // Remove on node 1 — should propagate to node 2 + tc.Cache1.Remove(ctx, "test-key") + + require.Eventually(t, func() bool { + _, hit := tc.Cache2.Get(ctx, "test-key") + return hit == cache.Miss + }, 5*time.Second, 100*time.Millisecond, "key should be invalidated on node 2") + }) +} + +func TestGossipCacheInvalidation_Clear(t *testing.T) { + ctx := context.Background() + tc := setupTwoNodeCluster(t) + + t.Run("clear propagates to peers", func(t *testing.T) { + // Populate node 2's cache with multiple keys + tc.Cache2.Set(ctx, "key-a", "value-a") + tc.Cache2.Set(ctx, "key-b", "value-b") + tc.Cache2.Set(ctx, "key-c", "value-c") + + _, hit := tc.Cache2.Get(ctx, "key-a") + require.Equal(t, cache.Hit, hit) + + // Clear on node 1 — should propagate and clear node 2's cache + tc.Cache1.Clear(ctx) + + require.Eventually(t, func() bool { + _, hitA := tc.Cache2.Get(ctx, "key-a") + _, hitB := tc.Cache2.Get(ctx, "key-b") + _, hitC := tc.Cache2.Get(ctx, "key-c") + return hitA == cache.Miss && hitB == cache.Miss && hitC == cache.Miss + }, 5*time.Second, 100*time.Millisecond, "all keys should be cleared on node 2") + }) +} diff --git a/pkg/cache/clustering/produce_events_test.go b/pkg/cache/clustering/produce_events_test.go deleted file mode 100644 index 3bb803c8cb..0000000000 --- a/pkg/cache/clustering/produce_events_test.go +++ /dev/null @@ -1,131 +0,0 @@ -package clustering_test - -import ( - "context" - "fmt" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/cache" - "github.com/unkeyed/unkey/pkg/cache/clustering" - "github.com/unkeyed/unkey/pkg/clock" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/uid" -) - -func TestClusterCache_ProducesInvalidationOnRemoveAndSetNull(t *testing.T) { - - brokers := containers.Kafka(t) - - // Create unique topic and instance ID for this test run to ensure fresh consumer group - topicName := fmt.Sprintf("test-clustering-produce-%s", uid.New(uid.TestPrefix)) - - // Create eventstream topic - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: uid.New(uid.TestPrefix), - }) - require.NoError(t, err) - - err = topic.EnsureExists(1, 1) - require.NoError(t, err) - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully created in Kafka - ctx := context.Background() - waitCtx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err) - - // Create dispatcher with noop - we won't use it to consume, just need it for ClusterCache creation - dispatcher := clustering.NewNoopDispatcher() - defer func() { require.NoError(t, dispatcher.Close()) }() - - // Create local cache - localCache, err := cache.New(cache.Config[string, string]{ - Fresh: 5 * time.Minute, - Stale: 10 * time.Minute, - MaxSize: 1000, - Resource: "test-cache", - Clock: clock.New(), - }) - require.NoError(t, err) - - // Create cluster cache - this will produce events when we call Set/SetNull - clusterCache, err := clustering.New(clustering.Config[string, string]{ - LocalCache: localCache, - Topic: topic, - Dispatcher: dispatcher, - NodeID: "test-node-1", - }) - require.NoError(t, err) - - // Track received events - var receivedEventCount atomic.Int32 - var receivedEvents []*cachev1.CacheInvalidationEvent - var eventsMutex sync.Mutex - - consumer := topic.NewConsumer() - defer func() { require.NoError(t, consumer.Close()) }() - - consumerCtx, cancelConsumer := context.WithTimeout(context.Background(), 30*time.Second) - defer cancelConsumer() - - consumer.Consume(consumerCtx, func(ctx context.Context, event *cachev1.CacheInvalidationEvent) error { - eventsMutex.Lock() - receivedEvents = append(receivedEvents, event) - eventsMutex.Unlock() - - receivedEventCount.Add(1) - return nil - }) - - // Wait for consumer to be ready and actually positioned - time.Sleep(5 * time.Second) - - // Test Remove operation produces invalidation event - clusterCache.Set(ctx, "key1", "value1") // populate cache first - clusterCache.Remove(ctx, "key1") // then remove it - - // Test SetNull operation produces invalidation event - clusterCache.SetNull(ctx, "key2") - - // Wait for both events to be received - require.Eventually(t, func() bool { - return receivedEventCount.Load() == 2 - }, 5*time.Second, 100*time.Millisecond, "ClusterCache should produce invalidation events for Remove and SetNull operations within 5 seconds") - - // Verify events - eventsMutex.Lock() - defer eventsMutex.Unlock() - - require.Len(t, receivedEvents, 2, "Should receive exactly 2 events") - - // Find events by key - var removeEvent, setNullEvent *cachev1.CacheInvalidationEvent - for _, event := range receivedEvents { - switch event.GetCacheKey() { - case "key1": - removeEvent = event - case "key2": - setNullEvent = event - } - } - - require.NotNil(t, removeEvent, "Remove operation should produce invalidation event") - require.Equal(t, "test-cache", removeEvent.GetCacheName(), "Remove event should have correct cache name") - require.Equal(t, "key1", removeEvent.GetCacheKey(), "Remove event should have correct cache key") - require.Equal(t, "test-node-1", removeEvent.GetSourceInstance(), "Remove event should have correct source instance") - - require.NotNil(t, setNullEvent, "SetNull operation should produce invalidation event") - require.Equal(t, "test-cache", setNullEvent.GetCacheName(), "SetNull event should have correct cache name") - require.Equal(t, "key2", setNullEvent.GetCacheKey(), "SetNull event should have correct cache key") - require.Equal(t, "test-node-1", setNullEvent.GetSourceInstance(), "SetNull event should have correct source instance") -} diff --git a/pkg/cluster/BUILD.bazel b/pkg/cluster/BUILD.bazel new file mode 100644 index 0000000000..01d6afb358 --- /dev/null +++ b/pkg/cluster/BUILD.bazel @@ -0,0 +1,40 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "cluster", + srcs = [ + "bridge.go", + "cluster.go", + "config.go", + "delegate_lan.go", + "delegate_wan.go", + "discovery.go", + "doc.go", + "message.go", + "mux.go", + "noop.go", + ], + importpath = "github.com/unkeyed/unkey/pkg/cluster", + visibility = ["//visibility:public"], + deps = [ + "//gen/proto/cluster/v1:cluster", + "//pkg/logger", + "@com_github_hashicorp_memberlist//:memberlist", + "@org_golang_google_protobuf//proto", + ], +) + +go_test( + name = "cluster_test", + srcs = [ + "bridge_test.go", + "cluster_test.go", + "mux_test.go", + ], + embed = [":cluster"], + deps = [ + "//gen/proto/cache/v1:cache", + "//gen/proto/cluster/v1:cluster", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/cluster/bridge.go b/pkg/cluster/bridge.go new file mode 100644 index 0000000000..450db8d3f5 --- /dev/null +++ b/pkg/cluster/bridge.go @@ -0,0 +1,126 @@ +package cluster + +import ( + "io" + "time" + + "github.com/hashicorp/memberlist" + "github.com/unkeyed/unkey/pkg/logger" +) + +// evaluateBridge checks whether this node should be the bridge. +// The node with the lexicographically smallest name wins. +func (c *gossipCluster) evaluateBridge() { + // Don't evaluate during shutdown to avoid deadlocks + if c.closing.Load() { + return + } + + c.mu.RLock() + lan := c.lan + c.mu.RUnlock() + + if lan == nil { + return + } + + members := lan.Members() + if len(members) == 0 { + return + } + + // Find the node with the smallest name + smallest := members[0] + for _, m := range members[1:] { + if m.Name < smallest.Name { + smallest = m + } + } + + localName := lan.LocalNode().Name + shouldBeBridge := smallest.Name == localName + + if shouldBeBridge && !c.IsBridge() { + c.promoteToBridge() + } else if !shouldBeBridge && c.IsBridge() { + c.demoteFromBridge() + } +} + +// promoteToBridge creates a WAN memberlist and joins WAN seeds. +func (c *gossipCluster) promoteToBridge() { + c.mu.Lock() + if c.isBridge { + c.mu.Unlock() + return + } + + logger.Info("Promoting to bridge", "node", c.config.NodeID, "region", c.config.Region) + + wanCfg := memberlist.DefaultWANConfig() + wanCfg.Name = c.config.NodeID + "-wan" + wanCfg.BindAddr = c.config.BindAddr + wanCfg.BindPort = c.config.WANBindPort + wanCfg.AdvertisePort = c.config.WANBindPort + wanCfg.LogOutput = io.Discard + wanCfg.SecretKey = c.config.SecretKey + + wanCfg.Delegate = newWANDelegate(c) + + wanList, err := memberlist.Create(wanCfg) + if err != nil { + c.mu.Unlock() + logger.Error("Failed to create WAN memberlist", "error", err) + return + } + + c.wan = wanList + c.wanQueue = &memberlist.TransmitLimitedQueue{ + NumNodes: func() int { return wanList.NumMembers() }, + RetransmitMult: 4, + } + + c.isBridge = true + seeds := c.config.WANSeeds + c.mu.Unlock() + + // Join WAN seeds outside the lock with retries + if len(seeds) > 0 { + go c.joinSeeds("WAN", func() *memberlist.Memberlist { + c.mu.RLock() + defer c.mu.RUnlock() + return c.wan + }, seeds, nil) + } +} + +// demoteFromBridge shuts down the WAN memberlist. +func (c *gossipCluster) demoteFromBridge() { + c.mu.Lock() + if !c.isBridge { + c.mu.Unlock() + return + } + + logger.Info("Demoting from bridge", + "node", c.config.NodeID, + "region", c.config.Region, + ) + + wan := c.wan + c.wan = nil + c.wanQueue = nil + c.isBridge = false + c.mu.Unlock() + + // Leave and shutdown outside the lock since Leave can trigger callbacks + if wan != nil { + if err := wan.Leave(5 * time.Second); err != nil { + logger.Warn("Error leaving WAN pool", "error", err) + } + + if err := wan.Shutdown(); err != nil { + logger.Warn("Error shutting down WAN memberlist", "error", err) + } + } +} diff --git a/pkg/cluster/bridge_test.go b/pkg/cluster/bridge_test.go new file mode 100644 index 0000000000..0b032e8079 --- /dev/null +++ b/pkg/cluster/bridge_test.go @@ -0,0 +1,27 @@ +package cluster + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBridgeElection_SmallestNameWins(t *testing.T) { + t.Run("smallest name wins", func(t *testing.T) { + names := []string{ + "node-3", + "node-1", // smallest + "node-2", + } + + // Find smallest (same logic as evaluateBridge) + smallest := names[0] + for _, name := range names[1:] { + if name < smallest { + smallest = name + } + } + + require.Equal(t, "node-1", smallest) + }) +} diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go new file mode 100644 index 0000000000..f9228e12f2 --- /dev/null +++ b/pkg/cluster/cluster.go @@ -0,0 +1,283 @@ +package cluster + +import ( + "fmt" + "io" + "sync" + "sync/atomic" + "time" + + "github.com/hashicorp/memberlist" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + "github.com/unkeyed/unkey/pkg/logger" + "google.golang.org/protobuf/proto" +) + +const maxJoinAttempts = 10 + +// Cluster is the public interface for gossip-based cluster membership. +type Cluster interface { + Broadcast(msg clusterv1.IsClusterMessage_Payload) error + Members() []*memberlist.Node + IsBridge() bool + WANAddr() string + Close() error +} + +// gossipCluster manages a two-tier gossip membership: a LAN pool for intra-region +// communication and, on the elected bridge node, a WAN pool for cross-region +// communication. +type gossipCluster struct { + config Config + + mu sync.RWMutex + lan *memberlist.Memberlist + lanQueue *memberlist.TransmitLimitedQueue + wan *memberlist.Memberlist + wanQueue *memberlist.TransmitLimitedQueue + isBridge bool + closing atomic.Bool + + // evalCh is used to trigger async bridge evaluation from memberlist + // callbacks. This avoids calling Members() inside NotifyJoin/NotifyLeave + // where memberlist holds its internal state lock. + evalCh chan struct{} + done chan struct{} +} + +// New creates a new cluster node, starts the LAN memberlist, joins LAN seeds, +// and begins bridge evaluation. +func New(cfg Config) (Cluster, error) { + cfg.setDefaults() + + c := &gossipCluster{ + config: cfg, + mu: sync.RWMutex{}, + lan: nil, + lanQueue: nil, + wan: nil, + wanQueue: nil, + isBridge: false, + closing: atomic.Bool{}, + evalCh: make(chan struct{}, 1), + done: make(chan struct{}), + } + + // Start the async bridge evaluator + go c.bridgeEvalLoop() + + // Configure LAN memberlist + lanCfg := memberlist.DefaultLANConfig() + lanCfg.Name = cfg.NodeID + lanCfg.BindAddr = cfg.BindAddr + lanCfg.BindPort = cfg.BindPort + lanCfg.AdvertisePort = cfg.BindPort + lanCfg.LogOutput = io.Discard + lanCfg.SecretKey = cfg.SecretKey + lanCfg.Delegate = newLANDelegate(c) + lanCfg.Events = newLANEventDelegate(c) + + lan, err := memberlist.Create(lanCfg) + if err != nil { + close(c.done) + return nil, fmt.Errorf("failed to create LAN memberlist: %w", err) + } + + c.mu.Lock() + c.lan = lan + c.lanQueue = &memberlist.TransmitLimitedQueue{ + NumNodes: func() int { return lan.NumMembers() }, + RetransmitMult: 3, + } + c.mu.Unlock() + + // Join LAN seeds with retries — the headless service DNS may not be + // resolvable immediately at pod startup. + if len(cfg.LANSeeds) > 0 { + go c.joinSeeds("LAN", func() *memberlist.Memberlist { + c.mu.RLock() + defer c.mu.RUnlock() + return c.lan + }, cfg.LANSeeds, c.triggerEvalBridge) + } + + // Trigger initial bridge evaluation + c.triggerEvalBridge() + + return c, nil +} + +// joinSeeds attempts to join seeds on the given memberlist with exponential backoff. +// pool is used for logging ("LAN" or "WAN"). onSuccess is called after a successful join. +func (c *gossipCluster) joinSeeds(pool string, list func() *memberlist.Memberlist, seeds []string, onSuccess func()) { + backoff := 500 * time.Millisecond + + for attempt := 1; attempt <= maxJoinAttempts; attempt++ { + select { + case <-c.done: + return + default: + } + + ml := list() + if ml == nil { + return + } + + _, err := ml.Join(seeds) + if err == nil { + logger.Info("Joined "+pool+" seeds", "seeds", seeds, "attempt", attempt) + if onSuccess != nil { + onSuccess() + } + return + } + + logger.Warn("Failed to join "+pool+" seeds, retrying", + "error", err, + "seeds", seeds, + "attempt", attempt, + "next_backoff", backoff, + ) + + select { + case <-c.done: + return + case <-time.After(backoff): + } + + backoff = min(backoff*2, 10*time.Second) + } + + logger.Error("Exhausted retries joining "+pool+" seeds", + "seeds", seeds, + "attempts", maxJoinAttempts, + ) +} + +// triggerEvalBridge sends a non-blocking signal to the bridge evaluator goroutine. +func (c *gossipCluster) triggerEvalBridge() { + select { + case c.evalCh <- struct{}{}: + default: + // Already pending evaluation + } +} + +// bridgeEvalLoop runs in a goroutine and processes bridge evaluation requests. +func (c *gossipCluster) bridgeEvalLoop() { + for { + select { + case <-c.done: + return + case <-c.evalCh: + c.evaluateBridge() + } + } +} + +// Broadcast queues a message for delivery to all cluster members. +// The message is broadcast on the LAN pool. If this node is the bridge, +// it is also broadcast on the WAN pool. +func (c *gossipCluster) Broadcast(payload clusterv1.IsClusterMessage_Payload) error { + msg := &clusterv1.ClusterMessage{ + Payload: payload, + SourceRegion: c.config.Region, + SenderNode: c.config.NodeID, + SentAtMs: time.Now().UnixMilli(), + } + + c.mu.RLock() + lanQ := c.lanQueue + isBr := c.isBridge + wanQ := c.wanQueue + c.mu.RUnlock() + + if lanQ != nil { + msg.Direction = clusterv1.Direction_DIRECTION_LAN + lanBytes, err := proto.Marshal(msg) + if err != nil { + return fmt.Errorf("failed to marshal LAN message: %w", err) + } + lanQ.QueueBroadcast(newBroadcast(lanBytes)) + } + + if isBr && wanQ != nil { + msg.Direction = clusterv1.Direction_DIRECTION_WAN + wanBytes, err := proto.Marshal(msg) + if err != nil { + return fmt.Errorf("failed to marshal WAN message: %w", err) + } + wanQ.QueueBroadcast(newBroadcast(wanBytes)) + } + + return nil +} + +// IsBridge returns whether this node is currently the WAN bridge. +func (c *gossipCluster) IsBridge() bool { + c.mu.RLock() + defer c.mu.RUnlock() + return c.isBridge +} + +// WANAddr returns the WAN pool's advertise address (e.g. "127.0.0.1:54321") +// if this node is the bridge, or an empty string otherwise. +func (c *gossipCluster) WANAddr() string { + c.mu.RLock() + wan := c.wan + c.mu.RUnlock() + + if wan == nil { + return "" + } + + return wan.LocalNode().FullAddress().Addr +} + +// Members returns the current LAN memberlist nodes. +func (c *gossipCluster) Members() []*memberlist.Node { + c.mu.RLock() + lan := c.lan + c.mu.RUnlock() + + if lan == nil { + return nil + } + + return lan.Members() +} + +// Close gracefully leaves both LAN and WAN pools and shuts down. +// The closing flag prevents evaluateBridge from running during Leave. +// Safe to call multiple times; only the first call performs the shutdown. +func (c *gossipCluster) Close() error { + if alreadyClosing := c.closing.Swap(true); alreadyClosing { + return nil + } + close(c.done) + + // Demote from bridge first (leaves WAN). + c.demoteFromBridge() + + // Grab the LAN memberlist reference then nil it under lock. + c.mu.Lock() + lan := c.lan + c.lan = nil + c.lanQueue = nil + c.mu.Unlock() + + // Leave and shutdown without holding mu, since Leave triggers + // NotifyLeave callbacks. + if lan != nil { + if err := lan.Leave(5 * time.Second); err != nil { + logger.Warn("Error leaving LAN pool", "error", err) + } + + if err := lan.Shutdown(); err != nil { + return fmt.Errorf("failed to shutdown LAN memberlist: %w", err) + } + } + + return nil +} diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go new file mode 100644 index 0000000000..d4a465d610 --- /dev/null +++ b/pkg/cluster/cluster_test.go @@ -0,0 +1,362 @@ +package cluster + +import ( + "fmt" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" +) + +func testMessage(key string) *clusterv1.ClusterMessage_CacheInvalidation { + return &clusterv1.ClusterMessage_CacheInvalidation{ + CacheInvalidation: &cachev1.CacheInvalidationEvent{ + CacheName: "test", + Action: &cachev1.CacheInvalidationEvent_CacheKey{CacheKey: key}, + }, + } +} + +func TestCluster_SingleNode_BroadcastAndReceive(t *testing.T) { + c, err := New(Config{ + Region: "us-east-1", + NodeID: "test-node-1", + BindAddr: "127.0.0.1", + OnMessage: func(msg *clusterv1.ClusterMessage) { + }, + }) + require.NoError(t, err) + defer func() { require.NoError(t, c.Close()) }() + + // Single node should be bridge + require.Eventually(t, func() bool { + return c.IsBridge() + }, 2*time.Second, 50*time.Millisecond, "single node should become bridge") + + require.Len(t, c.Members(), 1, "should have 1 member") + + // Broadcast should succeed even with no peers (gossip has no one to deliver to) + require.NoError(t, c.Broadcast(testMessage("hello"))) +} + +func TestCluster_MultiNode_BroadcastDelivery(t *testing.T) { + const nodeCount = 3 + var clusters []Cluster + var received [nodeCount]atomic.Int32 + + // Create first node + c1, err := New(Config{ + Region: "us-east-1", + NodeID: "node-0", + BindAddr: "127.0.0.1", + OnMessage: func(msg *clusterv1.ClusterMessage) { + received[0].Add(1) + }, + }) + require.NoError(t, err) + clusters = append(clusters, c1) + + // Get the first node's address for seeding + c1Addr := c1.Members()[0].FullAddress().Addr + + // Create remaining nodes, seeding with first node + for i := 1; i < nodeCount; i++ { + idx := i + // Delay to ensure deterministic ordering for bridge election + time.Sleep(50 * time.Millisecond) + + cn, createErr := New(Config{ + Region: "us-east-1", + NodeID: fmt.Sprintf("node-%d", idx), + BindAddr: "127.0.0.1", + LANSeeds: []string{c1Addr}, + OnMessage: func(msg *clusterv1.ClusterMessage) { + received[idx].Add(1) + }, + }) + require.NoError(t, createErr) + clusters = append(clusters, cn) + } + + defer func() { + for i := len(clusters) - 1; i >= 0; i-- { + require.NoError(t, clusters[i].Close()) + } + }() + + // Wait for all nodes to see each other + require.Eventually(t, func() bool { + for _, c := range clusters { + if len(c.Members()) != nodeCount { + return false + } + } + return true + }, 5*time.Second, 100*time.Millisecond, "all nodes should see each other") + + // Wait for bridge election to settle + require.Eventually(t, func() bool { + bridgeCount := 0 + for _, c := range clusters { + if c.IsBridge() { + bridgeCount++ + } + } + return bridgeCount == 1 + }, 5*time.Second, 100*time.Millisecond, "exactly one node should be bridge") + + // The first node (oldest) should be bridge + require.True(t, clusters[0].IsBridge(), "oldest node should be bridge") + + t.Run("broadcast delivers to other nodes", func(t *testing.T) { + require.NoError(t, clusters[0].Broadcast(testMessage("multi-node-hello"))) + + // Gossip delivers to other nodes, not back to the sender (node-0). + for i := 1; i < nodeCount; i++ { + idx := i + require.Eventually(t, func() bool { + return received[idx].Load() >= 1 + }, 5*time.Second, 50*time.Millisecond, "node %d should have received the broadcast", idx) + } + }) +} + +func TestCluster_BridgeFailover(t *testing.T) { + // Create first node (will be bridge) + var recv1, recv2 atomic.Int32 + + c1, err := New(Config{ + Region: "us-east-1", + NodeID: "node-1", + BindAddr: "127.0.0.1", + OnMessage: func(msg *clusterv1.ClusterMessage) { + recv1.Add(1) + }, + }) + require.NoError(t, err) + + c1Addr := c1.Members()[0].FullAddress().Addr + + // Delay to ensure c1 is older + time.Sleep(50 * time.Millisecond) + + c2, err := New(Config{ + Region: "us-east-1", + NodeID: "node-2", + BindAddr: "127.0.0.1", + LANSeeds: []string{c1Addr}, + OnMessage: func(msg *clusterv1.ClusterMessage) { + recv2.Add(1) + }, + }) + require.NoError(t, err) + defer func() { require.NoError(t, c2.Close()) }() + + // Wait for both to see each other + require.Eventually(t, func() bool { + return len(c1.Members()) == 2 && len(c2.Members()) == 2 + }, 5*time.Second, 100*time.Millisecond) + + // Wait for bridge to settle: c1 should be bridge (oldest) + require.Eventually(t, func() bool { + return c1.IsBridge() && !c2.IsBridge() + }, 5*time.Second, 100*time.Millisecond, "c1 should be bridge, c2 should not") + + // Kill c1 (the bridge) + require.NoError(t, c1.Close()) + + // c2 should become bridge + require.Eventually(t, func() bool { + return c2.IsBridge() + }, 10*time.Second, 100*time.Millisecond, "c2 should become bridge after c1 leaves") +} + +func TestCluster_MultiRegion_WANBroadcast(t *testing.T) { + var recvA, recvB atomic.Int32 + var muB sync.Mutex + var lastKeyB string + + // --- Region A: single node (auto-promotes to bridge) --- + nodeA, err := New(Config{ + Region: "us-east-1", + NodeID: "node-a", + BindAddr: "127.0.0.1", + OnMessage: func(msg *clusterv1.ClusterMessage) { + recvA.Add(1) + }, + }) + require.NoError(t, err) + + // Wait for node A to become bridge + require.Eventually(t, func() bool { + return nodeA.IsBridge() + }, 5*time.Second, 50*time.Millisecond, "node A should become bridge") + + // Get node A's WAN address (assigned after promotion) + var wanAddrA string + require.Eventually(t, func() bool { + wanAddrA = nodeA.WANAddr() + return wanAddrA != "" + }, 5*time.Second, 50*time.Millisecond, "node A WAN address should be available") + + // --- Region B: single node, seeds WAN with region A's bridge --- + nodeB, err := New(Config{ + Region: "eu-west-1", + NodeID: "node-b", + BindAddr: "127.0.0.1", + WANSeeds: []string{wanAddrA}, + OnMessage: func(msg *clusterv1.ClusterMessage) { + muB.Lock() + lastKeyB = msg.GetCacheInvalidation().GetCacheKey() + muB.Unlock() + recvB.Add(1) + }, + }) + require.NoError(t, err) + + defer func() { + require.NoError(t, nodeB.Close()) + require.NoError(t, nodeA.Close()) + }() + + // Wait for node B to become bridge + require.Eventually(t, func() bool { + return nodeB.IsBridge() + }, 5*time.Second, 50*time.Millisecond, "node B should become bridge") + + // Wait for WAN pools to see each other (each bridge sees 2 WAN members) + implA := nodeA.(*gossipCluster) + implB := nodeB.(*gossipCluster) + require.Eventually(t, func() bool { + implA.mu.RLock() + wanA := implA.wan + implA.mu.RUnlock() + + implB.mu.RLock() + wanB := implB.wan + implB.mu.RUnlock() + + if wanA == nil || wanB == nil { + return false + } + return wanA.NumMembers() == 2 && wanB.NumMembers() == 2 + }, 10*time.Second, 100*time.Millisecond, "WAN pools should see each other") + + // Broadcast from region A + require.NoError(t, nodeA.Broadcast(testMessage("cross-region-hello"))) + + // Verify region B receives it via the WAN relay + require.Eventually(t, func() bool { + return recvB.Load() >= 1 + }, 10*time.Second, 100*time.Millisecond, "node B should receive cross-region broadcast") + + muB.Lock() + require.Equal(t, "cross-region-hello", lastKeyB) + muB.Unlock() +} + +func TestCluster_MultiRegion_BidirectionalBroadcast(t *testing.T) { + var muA, muB sync.Mutex + var msgsA, msgsB []string + + // --- Region A --- + nodeA, err := New(Config{ + Region: "us-east-1", + NodeID: "node-a", + BindAddr: "127.0.0.1", + OnMessage: func(msg *clusterv1.ClusterMessage) { + muA.Lock() + msgsA = append(msgsA, msg.GetCacheInvalidation().GetCacheKey()) + muA.Unlock() + }, + }) + require.NoError(t, err) + + require.Eventually(t, func() bool { + return nodeA.IsBridge() && nodeA.WANAddr() != "" + }, 5*time.Second, 50*time.Millisecond) + + wanAddrA := nodeA.WANAddr() + + // --- Region B --- + nodeB, err := New(Config{ + Region: "eu-west-1", + NodeID: "node-b", + BindAddr: "127.0.0.1", + WANSeeds: []string{wanAddrA}, + OnMessage: func(msg *clusterv1.ClusterMessage) { + muB.Lock() + msgsB = append(msgsB, msg.GetCacheInvalidation().GetCacheKey()) + muB.Unlock() + }, + }) + require.NoError(t, err) + + defer func() { + require.NoError(t, nodeB.Close()) + require.NoError(t, nodeA.Close()) + }() + + require.Eventually(t, func() bool { + return nodeB.IsBridge() + }, 5*time.Second, 50*time.Millisecond) + + // Wait for WAN connectivity + implA := nodeA.(*gossipCluster) + implB := nodeB.(*gossipCluster) + require.Eventually(t, func() bool { + implA.mu.RLock() + wanA := implA.wan + implA.mu.RUnlock() + + implB.mu.RLock() + wanB := implB.wan + implB.mu.RUnlock() + + if wanA == nil || wanB == nil { + return false + } + return wanA.NumMembers() == 2 && wanB.NumMembers() == 2 + }, 10*time.Second, 100*time.Millisecond, "WAN pools should connect") + + // Broadcast from A → B + require.NoError(t, nodeA.Broadcast(testMessage("from-east"))) + + require.Eventually(t, func() bool { + muB.Lock() + defer muB.Unlock() + for _, m := range msgsB { + if m == "from-east" { + return true + } + } + return false + }, 10*time.Second, 100*time.Millisecond, "B should receive message from A") + + // Broadcast from B → A + require.NoError(t, nodeB.Broadcast(testMessage("from-west"))) + + require.Eventually(t, func() bool { + muA.Lock() + defer muA.Unlock() + for _, m := range msgsA { + if m == "from-west" { + return true + } + } + return false + }, 10*time.Second, 100*time.Millisecond, "A should receive message from B") +} + +func TestCluster_Noop(t *testing.T) { + c := NewNoop() + + require.False(t, c.IsBridge()) + require.Nil(t, c.Members()) + require.NoError(t, c.Broadcast(testMessage("test"))) + require.NoError(t, c.Close()) +} diff --git a/pkg/cluster/config.go b/pkg/cluster/config.go new file mode 100644 index 0000000000..a5c828e132 --- /dev/null +++ b/pkg/cluster/config.go @@ -0,0 +1,45 @@ +package cluster + +import clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + +// Config configures a gossip cluster node. +type Config struct { + // Region identifies the geographic region (e.g. "us-east-1"). + Region string + + // NodeID is a unique identifier for this instance. + NodeID string + + // BindAddr is the address to bind memberlist listeners on. Default "0.0.0.0". + BindAddr string + + // BindPort is the LAN memberlist port. Default 0 (ephemeral). + // In production, set explicitly (e.g. 7946). + BindPort int + + // WANBindPort is the WAN memberlist port (used when this node becomes bridge). Default 0 (ephemeral). + // In production, set explicitly (e.g. 7947). + WANBindPort int + + // LANSeeds are addresses of existing LAN cluster members to join (e.g. k8s headless service). + LANSeeds []string + + // WANSeeds are addresses of cross-region bridges to join. + WANSeeds []string + + // SecretKey is a shared secret used for AES-256 encryption of all gossip traffic. + // When set, both LAN and WAN pools require this key to join and communicate. + // Must be 16, 24, or 32 bytes for AES-128, AES-192, or AES-256 respectively. + SecretKey []byte + + // OnMessage is called when a broadcast message is received from the cluster. + OnMessage func(msg *clusterv1.ClusterMessage) +} + +func (c *Config) setDefaults() { + if c.BindAddr == "" { + c.BindAddr = "0.0.0.0" + } + // BindPort and WANBindPort default to 0, which lets the OS pick ephemeral + // ports. In production, callers should set these explicitly (e.g. 7946/7947). +} diff --git a/pkg/cluster/delegate_lan.go b/pkg/cluster/delegate_lan.go new file mode 100644 index 0000000000..e147369f06 --- /dev/null +++ b/pkg/cluster/delegate_lan.go @@ -0,0 +1,91 @@ +package cluster + +import ( + "github.com/hashicorp/memberlist" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + "github.com/unkeyed/unkey/pkg/logger" + "google.golang.org/protobuf/proto" +) + +// lanDelegate handles memberlist callbacks for the LAN pool. +type lanDelegate struct { + cluster *gossipCluster +} + +var _ memberlist.Delegate = (*lanDelegate)(nil) + +func newLANDelegate(c *gossipCluster) *lanDelegate { + return &lanDelegate{cluster: c} +} + +func (d *lanDelegate) NodeMeta(limit int) []byte { return nil } +func (d *lanDelegate) LocalState(join bool) []byte { return nil } +func (d *lanDelegate) MergeRemoteState(buf []byte, join bool) {} +func (d *lanDelegate) GetBroadcasts(overhead, limit int) [][]byte { + d.cluster.mu.RLock() + q := d.cluster.lanQueue + d.cluster.mu.RUnlock() + + if q == nil { + return nil + } + return q.GetBroadcasts(overhead, limit) +} + +// NotifyMsg is called when a message is received via the LAN pool. +func (d *lanDelegate) NotifyMsg(data []byte) { + if len(data) == 0 { + return + } + + var msg clusterv1.ClusterMessage + if err := proto.Unmarshal(data, &msg); err != nil { + logger.Warn("Failed to unmarshal LAN cluster message", "error", err) + return + } + + // Deliver to the application callback + if d.cluster.config.OnMessage != nil { + d.cluster.config.OnMessage(&msg) + } + + // If this node is the bridge and the message originated locally (LAN direction), + // relay it to the WAN pool for cross-region delivery. + if d.cluster.IsBridge() && msg.Direction == clusterv1.Direction_DIRECTION_LAN { + d.cluster.mu.RLock() + wanQ := d.cluster.wanQueue + d.cluster.mu.RUnlock() + + if wanQ != nil { + relay := proto.Clone(&msg).(*clusterv1.ClusterMessage) + relay.Direction = clusterv1.Direction_DIRECTION_WAN + wanBytes, err := proto.Marshal(relay) + if err != nil { + logger.Warn("Failed to marshal WAN relay message", "error", err) + return + } + wanQ.QueueBroadcast(newBroadcast(wanBytes)) + } + } +} + +// lanEventDelegate handles join/leave events for bridge election. +type lanEventDelegate struct { + cluster *gossipCluster +} + +var _ memberlist.EventDelegate = (*lanEventDelegate)(nil) + +func newLANEventDelegate(c *gossipCluster) *lanEventDelegate { + return &lanEventDelegate{cluster: c} +} + +func (d *lanEventDelegate) NotifyJoin(node *memberlist.Node) { + d.cluster.triggerEvalBridge() +} + +func (d *lanEventDelegate) NotifyLeave(node *memberlist.Node) { + d.cluster.triggerEvalBridge() +} + +func (d *lanEventDelegate) NotifyUpdate(node *memberlist.Node) {} diff --git a/pkg/cluster/delegate_wan.go b/pkg/cluster/delegate_wan.go new file mode 100644 index 0000000000..d6cdd3ea63 --- /dev/null +++ b/pkg/cluster/delegate_wan.go @@ -0,0 +1,73 @@ +package cluster + +import ( + "github.com/hashicorp/memberlist" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + "github.com/unkeyed/unkey/pkg/logger" + "google.golang.org/protobuf/proto" +) + +// wanDelegate handles memberlist callbacks for the WAN pool. +type wanDelegate struct { + cluster *gossipCluster +} + +var _ memberlist.Delegate = (*wanDelegate)(nil) + +func newWANDelegate(c *gossipCluster) *wanDelegate { + return &wanDelegate{cluster: c} +} + +func (d *wanDelegate) NodeMeta(limit int) []byte { return nil } +func (d *wanDelegate) LocalState(join bool) []byte { return nil } +func (d *wanDelegate) MergeRemoteState(buf []byte, join bool) {} +func (d *wanDelegate) GetBroadcasts(overhead, limit int) [][]byte { + d.cluster.mu.RLock() + wanQ := d.cluster.wanQueue + d.cluster.mu.RUnlock() + + if wanQ == nil { + return nil + } + return wanQ.GetBroadcasts(overhead, limit) +} + +// NotifyMsg is called when a message is received via the WAN pool. +func (d *wanDelegate) NotifyMsg(data []byte) { + if len(data) == 0 { + return + } + + var msg clusterv1.ClusterMessage + if err := proto.Unmarshal(data, &msg); err != nil { + logger.Warn("Failed to unmarshal WAN cluster message", "error", err) + return + } + + // Skip messages that originated in our own region to avoid loops. + if msg.SourceRegion == d.cluster.config.Region { + return + } + + // Deliver to the application callback on this bridge node + if d.cluster.config.OnMessage != nil { + d.cluster.config.OnMessage(&msg) + } + + // Re-broadcast to the local LAN pool so all nodes in this region receive it. + d.cluster.mu.RLock() + lanQ := d.cluster.lanQueue + d.cluster.mu.RUnlock() + + if lanQ == nil { + return + } + + msg.Direction = clusterv1.Direction_DIRECTION_WAN + lanBytes, err := proto.Marshal(&msg) + if err != nil { + logger.Warn("Failed to marshal LAN relay message", "error", err) + return + } + lanQ.QueueBroadcast(newBroadcast(lanBytes)) +} diff --git a/pkg/cluster/discovery.go b/pkg/cluster/discovery.go new file mode 100644 index 0000000000..9dba5d9f76 --- /dev/null +++ b/pkg/cluster/discovery.go @@ -0,0 +1,32 @@ +package cluster + +import ( + "fmt" + "net" + + "github.com/unkeyed/unkey/pkg/logger" +) + +// ResolveDNSSeeds resolves a list of hostnames to "host:port" addresses. +// Hostnames that resolve to multiple A records (e.g. k8s headless services) +// produce one entry per IP. Literal IPs pass through unchanged. +func ResolveDNSSeeds(hosts []string, port int) []string { + var addrs []string + + for _, host := range hosts { + ips, err := net.LookupHost(host) + if err != nil { + logger.Warn("Failed to resolve seed host", "host", host, "error", err) + // Use the raw host as fallback (might be an IP already) + addrs = append(addrs, fmt.Sprintf("%s:%d", host, port)) + + continue + } + + for _, ip := range ips { + addrs = append(addrs, fmt.Sprintf("%s:%d", ip, port)) + } + } + + return addrs +} diff --git a/pkg/cluster/doc.go b/pkg/cluster/doc.go new file mode 100644 index 0000000000..4cc994b7fd --- /dev/null +++ b/pkg/cluster/doc.go @@ -0,0 +1,16 @@ +// Package cluster provides a two-tier gossip-based cluster membership using +// hashicorp/memberlist (SWIM protocol). +// +// Architecture: +// +// - LAN pool: all nodes in a region, using DefaultLANConfig (~1ms propagation) +// - WAN pool: one bridge per region (auto-elected oldest node), DefaultWANConfig +// +// Message flow for cache invalidation: +// +// node → LAN broadcast → bridge → WAN → remote bridges → their LAN pools +// +// Bridge election: the oldest node in the LAN pool (by join time encoded in +// the memberlist node name) automatically becomes the WAN bridge. When the +// bridge leaves, the next oldest node promotes itself. +package cluster diff --git a/pkg/cluster/message.go b/pkg/cluster/message.go new file mode 100644 index 0000000000..65bb389a73 --- /dev/null +++ b/pkg/cluster/message.go @@ -0,0 +1,21 @@ +package cluster + +import ( + "github.com/hashicorp/memberlist" +) + +// clusterBroadcast implements memberlist.Broadcast for the TransmitLimitedQueue. +type clusterBroadcast struct { + msg []byte +} + +var _ memberlist.Broadcast = (*clusterBroadcast)(nil) + +func (b *clusterBroadcast) Invalidates(other memberlist.Broadcast) bool { return false } +func (b *clusterBroadcast) Message() []byte { return b.msg } +func (b *clusterBroadcast) Finished() {} + +// newBroadcast wraps raw bytes in a memberlist.Broadcast for queue submission. +func newBroadcast(msg []byte) *clusterBroadcast { + return &clusterBroadcast{msg: msg} +} diff --git a/pkg/cluster/mux.go b/pkg/cluster/mux.go new file mode 100644 index 0000000000..3b1835b86c --- /dev/null +++ b/pkg/cluster/mux.go @@ -0,0 +1,71 @@ +package cluster + +import ( + "fmt" + "sync" + "time" + + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" + "github.com/unkeyed/unkey/pkg/logger" +) + +// MessageMux fans out incoming cluster messages to all registered subscribers. +// It sits between the cluster transport and application-level handlers, allowing +// multiple subsystems to share the same gossip cluster. +type MessageMux struct { + mu sync.RWMutex + handlers []func(*clusterv1.ClusterMessage) +} + +// NewMessageMux creates a new message multiplexer. +func NewMessageMux() *MessageMux { + return &MessageMux{ + mu: sync.RWMutex{}, + handlers: nil, + } +} + +// subscribe adds a raw handler that receives all cluster messages. +func (m *MessageMux) subscribe(handler func(*clusterv1.ClusterMessage)) { + m.mu.Lock() + m.handlers = append(m.handlers, handler) + m.mu.Unlock() +} + +// Subscribe registers a typed handler that only receives messages matching +// the given oneof payload variant. The type assertion is handled automatically. +func Subscribe[T clusterv1.IsClusterMessage_Payload](mux *MessageMux, handler func(T)) { + mux.subscribe(func(msg *clusterv1.ClusterMessage) { + payload, ok := msg.Payload.(T) + if !ok { + return + } + + handler(payload) + }) +} + +// OnMessage dispatches a ClusterMessage to all registered subscribers. +func (m *MessageMux) OnMessage(msg *clusterv1.ClusterMessage) { + now := time.Now().UnixMilli() + latencyMs := now - msg.SentAtMs + + logger.Info("cluster message received", + "latency_ms", latencyMs, + "received_at_ms", now, + "sent_at_ms", msg.SentAtMs, + "source_region", msg.SourceRegion, + "sender_node", msg.SenderNode, + "direction", msg.Direction.String(), + "payload_type", fmt.Sprintf("%T", msg.Payload), + ) + + m.mu.RLock() + snapshot := make([]func(*clusterv1.ClusterMessage), len(m.handlers)) + copy(snapshot, m.handlers) + m.mu.RUnlock() + + for _, h := range snapshot { + h(msg) + } +} diff --git a/pkg/cluster/mux_test.go b/pkg/cluster/mux_test.go new file mode 100644 index 0000000000..0a8d8b9ab3 --- /dev/null +++ b/pkg/cluster/mux_test.go @@ -0,0 +1,62 @@ +package cluster + +import ( + "testing" + + "github.com/stretchr/testify/require" + cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" +) + +func cacheInvalidationMessage(cacheName, cacheKey string) *clusterv1.ClusterMessage { + return &clusterv1.ClusterMessage{ + Payload: &clusterv1.ClusterMessage_CacheInvalidation{ + CacheInvalidation: &cachev1.CacheInvalidationEvent{ + CacheName: cacheName, + Action: &cachev1.CacheInvalidationEvent_CacheKey{CacheKey: cacheKey}, + }, + }, + } +} + +func TestMessageMux_RoutesToSubscriber(t *testing.T) { + t.Run("delivers payload to typed subscriber", func(t *testing.T) { + mux := NewMessageMux() + + var received *cachev1.CacheInvalidationEvent + Subscribe(mux, func(payload *clusterv1.ClusterMessage_CacheInvalidation) { + received = payload.CacheInvalidation + }) + + msg := cacheInvalidationMessage("my-cache", "my-key") + mux.OnMessage(msg) + + require.NotNil(t, received) + require.Equal(t, "my-cache", received.GetCacheName()) + require.Equal(t, "my-key", received.GetCacheKey()) + }) +} + +func TestMessageMux_MultipleSubscribers(t *testing.T) { + t.Run("fans out to all subscribers", func(t *testing.T) { + mux := NewMessageMux() + + var count1, count2 int + Subscribe(mux, func(payload *clusterv1.ClusterMessage_CacheInvalidation) { count1++ }) + Subscribe(mux, func(payload *clusterv1.ClusterMessage_CacheInvalidation) { count2++ }) + + mux.OnMessage(cacheInvalidationMessage("c", "k")) + + require.Equal(t, 1, count1) + require.Equal(t, 1, count2) + }) +} + +func TestMessageMux_NoSubscribersNoOp(t *testing.T) { + t.Run("no panic without subscribers", func(t *testing.T) { + mux := NewMessageMux() + + // Should not panic when no subscribers are registered + mux.OnMessage(cacheInvalidationMessage("c", "k")) + }) +} diff --git a/pkg/cluster/noop.go b/pkg/cluster/noop.go new file mode 100644 index 0000000000..2e12e8f434 --- /dev/null +++ b/pkg/cluster/noop.go @@ -0,0 +1,23 @@ +package cluster + +import ( + "github.com/hashicorp/memberlist" + clusterv1 "github.com/unkeyed/unkey/gen/proto/cluster/v1" +) + +// noopCluster is a no-op implementation of Cluster that does not participate in gossip. +// All operations are safe to call but do nothing. +type noopCluster struct{} + +var _ Cluster = noopCluster{} + +func (noopCluster) Broadcast(clusterv1.IsClusterMessage_Payload) error { return nil } +func (noopCluster) Members() []*memberlist.Node { return nil } +func (noopCluster) IsBridge() bool { return false } +func (noopCluster) WANAddr() string { return "" } +func (noopCluster) Close() error { return nil } + +// NewNoop returns a no-op cluster that does not participate in gossip. +func NewNoop() Cluster { + return noopCluster{} +} diff --git a/pkg/events/BUILD.bazel b/pkg/events/BUILD.bazel deleted file mode 100644 index 29fa175345..0000000000 --- a/pkg/events/BUILD.bazel +++ /dev/null @@ -1,12 +0,0 @@ -load("@rules_go//go:def.bzl", "go_library") - -go_library( - name = "events", - srcs = ["topic.go"], - importpath = "github.com/unkeyed/unkey/pkg/events", - visibility = ["//visibility:public"], - deps = [ - "//pkg/otel/tracing", - "@io_opentelemetry_go_otel//attribute", - ], -) diff --git a/pkg/events/topic.go b/pkg/events/topic.go deleted file mode 100644 index bd156d354e..0000000000 --- a/pkg/events/topic.go +++ /dev/null @@ -1,79 +0,0 @@ -package events - -import ( - "context" - "fmt" - "sync" - - "github.com/unkeyed/unkey/pkg/otel/tracing" - "go.opentelemetry.io/otel/attribute" -) - -// EventEmitter defines the contract for publishing events to a topic. -// Implementations must broadcast events to all registered subscribers. -type EventEmitter[E any] interface { - Emit(ctx context.Context, event E) -} - -// EventSubscriber defines the contract for receiving events from a topic. -// Subscribers receive events via a channel returned by Subscribe. -type EventSubscriber[E any] interface { - Subscribe(id string) <-chan E -} - -// Topic combines EventEmitter and EventSubscriber into a pub/sub messaging primitive. -// Topics are created with NewTopic and remain active for the lifetime of the application. -// Events emitted to a topic are broadcast to all current subscribers synchronously, -// blocking if any subscriber's channel buffer is full. -type Topic[E any] interface { - EventEmitter[E] - EventSubscriber[E] -} - -type listener[E any] struct { - id string - ch chan E -} - -type topic[E any] struct { - mu sync.RWMutex - bufferSize int - listeners []listener[E] -} - -// NewTopic creates a new topic with an optional buffer size. -// Omitting the buffer size will create an unbuffered topic. -func NewTopic[E any](bufferSize ...int) Topic[E] { - n := 0 - if len(bufferSize) > 0 { - n = bufferSize[0] - } - return &topic[E]{ - mu: sync.RWMutex{}, - bufferSize: n, - listeners: []listener[E]{}, - } -} - -func (t *topic[E]) Emit(ctx context.Context, event E) { - - t.mu.Lock() - defer t.mu.Unlock() - for _, l := range t.listeners { - _, span := tracing.Start(ctx, fmt.Sprintf("topic.Emit:%s", l.id)) - span.SetAttributes(attribute.Int("channelSize", len(l.ch))) - l.ch <- event - span.End() - } - -} - -// Subscribe returns a channel that will receive events from the topic. -// The id is used for debugging and tracing, not for uniqueness. -func (t *topic[E]) Subscribe(id string) <-chan E { - t.mu.Lock() - defer t.mu.Unlock() - ch := make(chan E, t.bufferSize) - t.listeners = append(t.listeners, listener[E]{id: id, ch: ch}) - return ch -} diff --git a/pkg/eventstream/BUILD.bazel b/pkg/eventstream/BUILD.bazel deleted file mode 100644 index 0295152808..0000000000 --- a/pkg/eventstream/BUILD.bazel +++ /dev/null @@ -1,34 +0,0 @@ -load("@rules_go//go:def.bzl", "go_library", "go_test") - -go_library( - name = "eventstream", - srcs = [ - "consumer.go", - "doc.go", - "interface.go", - "noop.go", - "producer.go", - "topic.go", - ], - importpath = "github.com/unkeyed/unkey/pkg/eventstream", - visibility = ["//visibility:public"], - deps = [ - "//pkg/assert", - "//pkg/logger", - "@com_github_segmentio_kafka_go//:kafka-go", - "@org_golang_google_protobuf//proto", - ], -) - -go_test( - name = "eventstream_test", - size = "small", - srcs = ["eventstream_integration_test.go"], - deps = [ - ":eventstream", - "//gen/proto/cache/v1:cache", - "//pkg/testutil/containers", - "//pkg/uid", - "@com_github_stretchr_testify//require", - ], -) diff --git a/pkg/eventstream/consumer.go b/pkg/eventstream/consumer.go deleted file mode 100644 index bb10ecc7f5..0000000000 --- a/pkg/eventstream/consumer.go +++ /dev/null @@ -1,263 +0,0 @@ -package eventstream - -import ( - "context" - "errors" - "fmt" - "io" - "reflect" - "sync" - "time" - - "github.com/segmentio/kafka-go" - "github.com/unkeyed/unkey/pkg/logger" - "google.golang.org/protobuf/proto" -) - -// isEOF checks if an error is an EOF error from Kafka -func isEOF(err error) bool { - return errors.Is(err, io.EOF) -} - -// consumer handles consuming events from Kafka topics -type consumer[T proto.Message] struct { - brokers []string - topic string - handler func(context.Context, T) error - reader *kafka.Reader - instanceID string - mu sync.Mutex - subscribed bool - fromBeginning bool - isPointerType bool // Cached check to avoid reflection on every message -} - -// NewConsumer creates a new consumer for receiving events from this topic. -// -// Returns a Consumer instance configured with the topic's broker addresses, -// topic name, instance ID, and logger. The consumer must have its Consume -// method called to begin processing messages. -// -// Each consumer automatically joins a Kafka consumer group named -// "{topic}::{instanceID}" for load balancing and fault tolerance. Multiple -// consumers with the same group will automatically distribute message -// processing across instances. -// -// The consumer implements single-handler semantics - only one Consume call -// is allowed per consumer instance. This design prevents race conditions -// and ensures clear ownership of message processing. -// -// Performance characteristics: -// - Consumer creation is lightweight (no network calls) -// - Kafka connections are established when Consume is called -// - Automatic offset management and consumer group rebalancing -// - Efficient protobuf deserialization with minimal allocations -// -// Options: -// - WithStartFromBeginning(): Start reading from the beginning of the topic -// -// Examples: -// -// // Default consumer (starts from latest) -// consumer := topic.NewConsumer() -// -// // Consumer that reads from beginning (useful for tests) -// consumer := topic.NewConsumer(eventstream.WithStartFromBeginning()) -// -// consumer.Consume(ctx, func(ctx context.Context, event *MyEvent) error { -// // Process the event -// return nil -// }) -// defer consumer.Close() -func (t *Topic[T]) NewConsumer(opts ...ConsumerOption) Consumer[T] { - cfg := &consumerConfig{ - fromBeginning: false, - } - for _, opt := range opts { - opt(cfg) - } - - t.mu.Lock() - defer t.mu.Unlock() - - // Return noop consumer if brokers are not configured - if len(t.brokers) == 0 { - return newNoopConsumer[T]() - } - - // Check once if T is a pointer type to avoid reflection on every message - isPointerType := reflect.TypeOf((*T)(nil)).Elem().Kind() == reflect.Ptr - - //nolint: exhaustruct - consumer := &consumer[T]{ - brokers: t.brokers, - topic: t.topic, - instanceID: t.instanceID, - fromBeginning: cfg.fromBeginning, - isPointerType: isPointerType, - } - - // Track consumer for cleanup - t.consumers = append(t.consumers, consumer) - - return consumer -} - -// Consume starts consuming events from the Kafka topic in a background goroutine. -// -// This method initiates event consumption by starting a background goroutine that -// continuously reads messages from Kafka and calls the provided handler for each -// event. The method returns immediately after starting the background processing. -// -// Single-handler enforcement: -// -// This method can only be called once per consumer instance. Subsequent calls -// are silently ignored to prevent multiple competing handlers and race conditions. -// This design ensures clear ownership of message processing. -// -// Handler function: -// -// The handler is called for each received event with a context that has a 30-second -// timeout. If the handler returns an error, the error is logged but message -// processing continues. Handler errors do not cause the consumer to stop. -// -// Message processing guarantees: -// - At-least-once delivery (messages may be redelivered on failure) -// - Messages from the same partition are processed in order -// - Automatic offset commits for successfully processed messages -// - Consumer group rebalancing handles instance failures automatically -// -// Error handling: -// -// All errors are logged rather than returned since this method runs asynchronously: -// - Kafka connection errors are logged and trigger automatic reconnection -// - Protobuf deserialization errors are logged and the message is skipped -// - Handler errors are logged but processing continues -// - Fatal errors (authentication, configuration) cause consumption to stop -// -// Performance characteristics: -// - Automatic message batching for improved throughput -// - Configurable consumer group for load balancing -// - Efficient protobuf deserialization with minimal allocations -// - Consumer group: "{topic}::{instanceID}" for instance-based load balancing -// -// Context handling: -// -// The provided context is used for the entire consumption lifecycle. When the -// context is cancelled, the background goroutine stops and the consumer shuts down -// gracefully. Context cancellation is the primary mechanism for stopping consumption. -// -// Resource management: -// -// The background goroutine automatically manages Kafka connections and consumer -// group membership. Call Close() when the consumer is no longer needed to ensure -// proper cleanup and consumer group departure. -// -// Example: -// -// consumer := topic.NewConsumer() -// -// // Start consuming in background -// consumer.Consume(ctx, func(ctx context.Context, event *MyEvent) error { -// log.Printf("Received event: %+v", event) -// // Process the event... -// return nil // nil = success, error = logged but processing continues -// }) -// -// // Do other work while consuming happens in background... -// -// // Clean shutdown -// consumer.Close() -func (c *consumer[T]) Consume(ctx context.Context, handler func(context.Context, T) error) { - c.mu.Lock() - defer c.mu.Unlock() - - if c.subscribed { - // Already consuming, ignore subsequent calls - return - } - - c.handler = handler - c.subscribed = true - - startOffset := kafka.LastOffset - if c.fromBeginning { - startOffset = kafka.FirstOffset - } - - //nolint: exhaustruct - readerConfig := kafka.ReaderConfig{ - Brokers: c.brokers, - Topic: c.topic, - GroupID: fmt.Sprintf("%s::%s", c.topic, c.instanceID), - StartOffset: startOffset, - } - - c.reader = kafka.NewReader(readerConfig) - - // Start consuming in a goroutine - go c.consumeLoop(ctx) -} - -// consumeLoop handles the main consumption loop in a background goroutine. -// This method logs all errors instead of returning them since it runs asynchronously. -func (c *consumer[T]) consumeLoop(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - default: - msg, err := c.reader.ReadMessage(ctx) - if err != nil { - // Check if context was cancelled - if ctx.Err() != nil { - return - } - - // EOF is expected when there are no more messages - don't log it - if !isEOF(err) { - logger.Warn("Failed to read message from Kafka", "error", err.Error(), "topic", c.topic) - } - - continue - } - - // Create new instance of the event type - var t T - // For pointer types, we need to allocate a new instance - // Use cached isPointerType to avoid reflection on every message - if c.isPointerType { - newInstance := reflect.New(reflect.TypeOf(t).Elem()).Interface() - var ok bool - t, ok = newInstance.(T) - if !ok { - logger.Error("Failed to cast reflected type to expected type", "topic", c.topic) - continue - } - } - - // Deserialize protobuf event - if err := proto.Unmarshal(msg.Value, t); err != nil { - logger.Warn("Failed to deserialize protobuf message", "error", err.Error(), "topic", c.topic) - continue - } - - // Call handler - if c.handler != nil { - handlerCtx, cancel := context.WithTimeout(ctx, 30*time.Second) - if err := c.handler(handlerCtx, t); err != nil { - logger.Error("Error handling event", "error", err.Error(), "topic", c.topic) - } - cancel() - } - } - } -} - -// Close closes the consumer -func (c *consumer[T]) Close() error { - if c.reader != nil { - return c.reader.Close() - } - return nil -} diff --git a/pkg/eventstream/doc.go b/pkg/eventstream/doc.go deleted file mode 100644 index 11a56d96a6..0000000000 --- a/pkg/eventstream/doc.go +++ /dev/null @@ -1,72 +0,0 @@ -// Package eventstream provides distributed event streaming with strong typing and protobuf serialization. -// -// The package implements a producer-consumer pattern for event-driven architectures using Kafka as the underlying -// message broker. All events are strongly typed using Go generics and serialized using Protocol Buffers for -// efficient network transmission and cross-language compatibility. -// -// This implementation was chosen over simpler approaches because we need strong consistency guarantees for cache -// invalidation across distributed nodes, type safety to prevent runtime errors, and efficient serialization for -// high-throughput scenarios. -// -// # Key Types -// -// The main entry point is [Topic], which provides access to typed producers and consumers for a specific Kafka topic. -// Producers implement the [Producer] interface for publishing events, while consumers implement the [Consumer] -// interface for receiving events. Both interfaces are generic and constrained to protobuf messages. -// -// # Usage -// -// Basic event streaming setup: -// -// topic := eventstream.NewTopic[*MyEvent](eventstream.TopicConfig{ -// Brokers: []string{"kafka:9092"}, -// Topic: "my-events", -// InstanceID: "instance-1", -// }) -// -// // Publishing events -// producer := topic.NewProducer() -// event := &MyEvent{Data: "hello"} -// err := producer.Produce(ctx, event) -// if err != nil { -// // Handle production error -// } -// -// // Consuming events -// consumer := topic.NewConsumer() -// err = consumer.Consume(ctx, func(ctx context.Context, event *MyEvent) error { -// // Process the event -// log.Printf("Received: %s", event.Data) -// return nil -// }) -// if err != nil { -// // Handle consumption error -// } -// -// For advanced configuration and cluster setup, see the examples in the package tests. -// -// # Error Handling -// -// The package distinguishes between transient errors (network timeouts, temporary unavailability) and permanent -// errors (invalid configuration, serialization failures). Transient errors are automatically retried by the -// underlying Kafka client, while permanent errors are returned immediately to the caller. -// -// Consumers enforce single-handler semantics and will return an error if [Consumer.Consume] is called multiple -// times on the same consumer instance. -// -// # Performance Characteristics -// -// Producers are designed for high throughput with minimal allocations. Events are serialized once and sent -// asynchronously to Kafka. Typical latency is <1ms for local publishing. -// -// Consumers use efficient protobuf deserialization and support automatic offset management. Memory usage scales -// linearly with the number of active consumer group members. -// -// # Architecture Notes -// -// The package uses Kafka's consumer groups for load balancing and fault tolerance. Each consumer automatically -// joins a consumer group named "{topic}::{instanceID}" to ensure proper message distribution across cluster instances. -// -// Messages include metadata headers for content type and source instance identification, enabling advanced routing -// and filtering scenarios. -package eventstream diff --git a/pkg/eventstream/eventstream_integration_test.go b/pkg/eventstream/eventstream_integration_test.go deleted file mode 100644 index e9cd9cd8ae..0000000000 --- a/pkg/eventstream/eventstream_integration_test.go +++ /dev/null @@ -1,194 +0,0 @@ -package eventstream_test - -import ( - "context" - "fmt" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/uid" -) - -func TestEventStreamIntegration(t *testing.T) { - - // Get Kafka brokers from test containers - brokers := containers.Kafka(t) - - // Create unique topic and instance ID for this test run to ensure fresh consumer group - topicName := fmt.Sprintf("test-eventstream-%s", uid.New(uid.TestPrefix)) - instanceID := uid.New(uid.TestPrefix) - - config := eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: instanceID, - } - - t.Logf("Test config: topic=%s, instanceID=%s, brokers=%v", topicName, instanceID, brokers) - - // Create topic instance - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](config) - require.NoError(t, err) - - // Ensure topic exists - t.Logf("Calling EnsureExists for topic...") - err = topic.EnsureExists(1, 1) - require.NoError(t, err, "Failed to create test topic") - t.Logf("Topic created successfully") - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully propagated before using it - waitCtx, waitCancel := context.WithTimeout(context.Background(), 10*time.Second) - defer waitCancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err, "Topic should become ready") - t.Logf("Topic is ready") - - // Test data - testEvent := &cachev1.CacheInvalidationEvent{ - CacheName: "test-cache", - CacheKey: "test-key-123", - Timestamp: time.Now().UnixMilli(), - SourceInstance: "test-producer", - } - - var receivedEvent *cachev1.CacheInvalidationEvent - - // Create consumer - t.Logf("Creating consumer...") - consumer := topic.NewConsumer() - defer func() { require.NoError(t, consumer.Close()) }() - - // Start consuming before producing - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - t.Logf("Starting consumer.Consume()...") - consumer.Consume(ctx, func(ctx context.Context, event *cachev1.CacheInvalidationEvent) error { - t.Logf("HANDLER CALLED: Received event: cache=%s, key=%s, timestamp=%d, source=%s", - event.GetCacheName(), event.GetCacheKey(), event.GetTimestamp(), event.GetSourceInstance()) - - receivedEvent = event - return nil - }) - - // Wait for consumer to be ready and actually positioned - // The consumer needs time to join the group, get partition assignment, and fetch metadata - t.Logf("Waiting for consumer to be ready...") - time.Sleep(5 * time.Second) - t.Logf("Consumer should be ready now") - - // Create producer and send test event - producer := topic.NewProducer() - - t.Logf("Producing event: cache=%s, key=%s, timestamp=%d, source=%s", - testEvent.GetCacheName(), testEvent.GetCacheKey(), testEvent.GetTimestamp(), testEvent.GetSourceInstance()) - - err = producer.Produce(ctx, testEvent) - require.NoError(t, err, "Failed to produce test event") - t.Logf("Event produced successfully") - - // Wait for event to be consumed - require.Eventually(t, func() bool { - return receivedEvent != nil - }, 10*time.Second, 100*time.Millisecond, "Event should be received within 10 seconds") - - // Verify the received event - require.Equal(t, testEvent.GetCacheName(), receivedEvent.GetCacheName(), "Cache name should match") - require.Equal(t, testEvent.GetCacheKey(), receivedEvent.GetCacheKey(), "Cache key should match") - require.Equal(t, testEvent.GetTimestamp(), receivedEvent.GetTimestamp(), "Timestamp should match") - require.Equal(t, testEvent.GetSourceInstance(), receivedEvent.GetSourceInstance(), "Source instance should match") - - t.Log("Event stream integration test passed - message produced and consumed successfully") -} - -func TestEventStreamMultipleMessages(t *testing.T) { - - brokers := containers.Kafka(t) - - // Create unique topic and instance ID for this test run to ensure fresh consumer group - topicName := fmt.Sprintf("test-multiple-%s", uid.New(uid.TestPrefix)) - - config := eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: uid.New(uid.TestPrefix), - } - - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](config) - require.NoError(t, err) - - err = topic.EnsureExists(1, 1) - require.NoError(t, err) - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully propagated before using it - waitCtx, waitCancel := context.WithTimeout(context.Background(), 10*time.Second) - defer waitCancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err, "Topic should become ready") - - // Test multiple messages - numMessages := 5 - var receivedCount atomic.Int32 - receivedKeys := make(map[string]bool) - var mu sync.Mutex // protect receivedKeys map - - // Create consumer - consumer := topic.NewConsumer() - defer func() { require.NoError(t, consumer.Close()) }() - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - consumer.Consume(ctx, func(ctx context.Context, event *cachev1.CacheInvalidationEvent) error { - t.Logf("Received event: cache=%s, key=%s", event.GetCacheName(), event.GetCacheKey()) - - mu.Lock() - receivedKeys[event.GetCacheKey()] = true - mu.Unlock() - - receivedCount.Add(1) - return nil - }) - - // Wait for consumer to be ready and actually positioned - time.Sleep(5 * time.Second) - - producer := topic.NewProducer() - - // Send multiple events - for i := range numMessages { - event := &cachev1.CacheInvalidationEvent{ - CacheName: "test-cache", - CacheKey: fmt.Sprintf("test-key-%d", i), - Timestamp: time.Now().UnixMilli(), - SourceInstance: "test-producer", - } - - err = producer.Produce(ctx, event) - require.NoError(t, err, "Failed to produce event %d", i) - } - - // Wait for all events to be consumed - require.Eventually(t, func() bool { - return int(receivedCount.Load()) == numMessages - }, 15*time.Second, 100*time.Millisecond, "Should receive all messages within 15 seconds") - - // Verify we got all the expected keys - mu.Lock() - defer mu.Unlock() - - for i := range numMessages { - expectedKey := fmt.Sprintf("test-key-%d", i) - require.True(t, receivedKeys[expectedKey], "Should receive key %s", expectedKey) - } - - t.Logf("Multiple messages test passed - sent and received %d messages", numMessages) -} diff --git a/pkg/eventstream/interface.go b/pkg/eventstream/interface.go deleted file mode 100644 index ed4d8253f7..0000000000 --- a/pkg/eventstream/interface.go +++ /dev/null @@ -1,115 +0,0 @@ -package eventstream - -import ( - "context" - - "google.golang.org/protobuf/proto" -) - -// Producer defines the interface for publishing events to a Kafka topic. -// -// Producers are designed for high-throughput scenarios with minimal latency overhead. -// All events are serialized using Protocol Buffers before transmission to ensure -// efficient encoding and cross-language compatibility. -// -// Implementations are safe for concurrent use from multiple goroutines. -type Producer[T proto.Message] interface { - // Produce publishes one or more events to the configured Kafka topic. - // - // The events are serialized to protobuf format and sent to Kafka. - // The method blocks until all messages are accepted by the broker or an error occurs. - // - // Context is used for timeout and cancellation. If the context is cancelled before - // the messages are sent, the method returns the context error and the messages are not - // published. - // - // Returns an error if: - // - Event serialization fails (invalid protobuf message) - // - Kafka broker is unreachable (after retries) - // - Context timeout or cancellation - // - Producer has been closed - // - // The method does not guarantee message delivery - use Kafka's acknowledgment - // settings for delivery guarantees. - Produce(ctx context.Context, events ...T) error - - // Close gracefully shuts down the producer and releases all resources. - // - // This method should be called when the producer is no longer needed to ensure - // proper cleanup of Kafka connections and prevent resource leaks. - // - // The method blocks until all pending messages are flushed and the producer - // is properly shut down. After Close returns, the producer cannot be reused. - // - // It is safe to call Close multiple times - subsequent calls are no-ops. - // - // Returns an error only if the underlying Kafka writer encounters an issue during - // shutdown. These errors are typically not actionable as the producer is already - // being shut down. - Close() error -} - -// Consumer defines the interface for consuming events from a Kafka topic. -// -// Consumers implement a single-handler pattern where each consumer instance can only -// have one active consumption handler. This design prevents race conditions and -// ensures clear ownership of message processing. -// -// Consumers automatically join a Kafka consumer group for load balancing and fault -// tolerance across multiple consumer instances. -type Consumer[T proto.Message] interface { - // Consume starts consuming events from the Kafka topic and calls the provided - // handler for each received event. - // - // This method can only be called once per consumer instance. Subsequent calls - // are ignored. This design ensures clear ownership of message processing - // and prevents race conditions from multiple handlers. - // - // The method starts consuming in the background and returns immediately. - // The handler function is called for each received event. If the handler returns - // an error, the error is logged but message processing continues. The consumer - // automatically commits offsets for successfully processed messages. - // - // Consumption continues until the context is cancelled or a fatal error occurs. - // All errors (connection failures, deserialization errors, handler errors) are - // logged using the consumer's logger rather than being returned, since this - // method is designed to run in the background. - // - // Message processing guarantees: - // - At-least-once delivery (messages may be redelivered on failure) - // - Messages from the same partition are processed in order - // - Consumer group rebalancing is handled automatically - // - // Performance characteristics: - // - Automatic batching for improved throughput - // - Configurable prefetch buffer for low latency - // - Efficient protobuf deserialization - // - // Error handling: - // - Transient errors (network timeouts) are retried automatically - // - Deserialization errors for individual messages are logged and skipped - // - Handler errors are logged but do not stop message processing - // - Fatal errors (authentication, configuration) are logged and cause consumption to stop - // - // Usage: - // consumer := topic.NewConsumer() - // consumer.Consume(ctx, handleEvent) - // // ... do other work, consumption happens in background - // consumer.Close() // when done - Consume(ctx context.Context, handler func(context.Context, T) error) - - // Close gracefully shuts down the consumer and releases all resources. - // - // This method should be called when the consumer is no longer needed to ensure - // proper cleanup of Kafka connections and consumer group membership. - // - // The method blocks until all pending messages are processed and the consumer - // has left its consumer group. After Close returns, the consumer cannot be reused. - // - // It is safe to call Close multiple times - subsequent calls are no-ops. - // - // Returns an error only if the underlying Kafka client encounters an issue during - // shutdown. These errors are typically not actionable as the consumer is already - // being shut down. - Close() error -} diff --git a/pkg/eventstream/noop.go b/pkg/eventstream/noop.go deleted file mode 100644 index daeaa1d029..0000000000 --- a/pkg/eventstream/noop.go +++ /dev/null @@ -1,58 +0,0 @@ -package eventstream - -import ( - "context" - "sync" - - "google.golang.org/protobuf/proto" -) - -// noopProducer is a no-op implementation of Producer -type noopProducer[T proto.Message] struct{} - -// newNoopProducer creates a new no-op producer -func newNoopProducer[T proto.Message]() Producer[T] { - return &noopProducer[T]{} -} - -// Produce does nothing (no-op) -func (n *noopProducer[T]) Produce(ctx context.Context, events ...T) error { - return nil -} - -// Close does nothing (no-op) -func (n *noopProducer[T]) Close() error { - return nil -} - -// noopConsumer is a no-op implementation of Consumer -type noopConsumer[T proto.Message] struct{} - -// newNoopConsumer creates a new no-op consumer -func newNoopConsumer[T proto.Message]() Consumer[T] { - return &noopConsumer[T]{} -} - -// Consume does nothing (no-op) -func (n *noopConsumer[T]) Consume(ctx context.Context, handler func(context.Context, T) error) { - // No-op: does nothing -} - -// Close does nothing (no-op) -func (n *noopConsumer[T]) Close() error { - return nil -} - -// NewNoopTopic creates a new no-op topic that can be safely used when event streaming is disabled. -// All operations (NewProducer, NewConsumer, Close) are no-ops and safe to call. -// The returned Topic will create noop producers and consumers. -func NewNoopTopic[T proto.Message]() *Topic[T] { - return &Topic[T]{ - mu: sync.Mutex{}, - brokers: nil, - topic: "", - instanceID: "", - consumers: nil, - producers: nil, - } -} diff --git a/pkg/eventstream/producer.go b/pkg/eventstream/producer.go deleted file mode 100644 index 8818aacbc6..0000000000 --- a/pkg/eventstream/producer.go +++ /dev/null @@ -1,176 +0,0 @@ -package eventstream - -import ( - "context" - "time" - - "github.com/segmentio/kafka-go" - "github.com/unkeyed/unkey/pkg/logger" - "google.golang.org/protobuf/proto" -) - -// producer handles producing events to Kafka topics -type producer[T proto.Message] struct { - writer *kafka.Writer - instanceID string - topic string -} - -// NewProducer creates a new producer for publishing events to this topic. -// -// Returns a Producer instance configured with the topic's broker addresses, -// topic name, instance ID, and logger. The producer is immediately ready to -// publish events using its Produce method. -// -// The returned producer is safe for concurrent use from multiple goroutines. -// Each call to NewProducer creates a fresh producer instance with its own -// underlying Kafka writer that will be created on first use. -// -// Performance characteristics: -// - Producer creation is lightweight (no network calls) -// - Kafka connections are established lazily on first Produce call -// - Each producer manages its own connection pool -// -// Example: -// -// producer := topic.NewProducer() -// err := producer.Produce(ctx, &MyEvent{Data: "hello"}) -func (t *Topic[T]) NewProducer() Producer[T] { - t.mu.Lock() - defer t.mu.Unlock() - - // Return noop producer if brokers are not configured - if len(t.brokers) == 0 { - return newNoopProducer[T]() - } - - producer := &producer[T]{ - //nolint: exhaustruct - writer: &kafka.Writer{ - Addr: kafka.TCP(t.brokers...), - Topic: t.topic, - Balancer: &kafka.LeastBytes{}, - RequiredAcks: kafka.RequireOne, // Wait for leader acknowledgment - Async: false, // Synchronous for reliability - ReadTimeout: 1 * time.Second, // Reduced from 10s - WriteTimeout: 1 * time.Second, // Reduced from 10s - BatchSize: 100, // Batch up to 100 messages - BatchBytes: 1048576, // Batch up to 1MB - BatchTimeout: 10 * time.Millisecond, // Send batch after 10ms even if not full - }, - instanceID: t.instanceID, - topic: t.topic, - } - - // Track producer for cleanup - t.producers = append(t.producers, producer) - - return producer -} - -// Produce publishes one or more events to the configured Kafka topic with protobuf serialization. -// -// The events are serialized using Protocol Buffers and sent to Kafka with metadata -// headers including content type and source instance ID. The method blocks until -// all messages are accepted by the Kafka broker or an error occurs. -// -// Message format: -// - Body: Protobuf-serialized event data -// - Headers: content-type=application/x-protobuf, source-instance={instanceID} -// -// Context handling: -// -// The context is used for timeout and cancellation. If the context is cancelled -// before the messages are sent, the method returns the context error and the -// messages are not published. A typical timeout of 10-30 seconds is recommended -// for production use. -// -// Performance characteristics: -// - Typical latency: <5ms for local Kafka, <50ms for remote Kafka -// - Throughput: ~10,000 messages/second per producer -// - Memory: Minimal allocations due to efficient protobuf serialization -// - Connection pooling: Reuses connections across multiple Produce calls -// - Batch sending: Multiple events are sent in a single batch for efficiency -// -// Error conditions: -// - Protobuf serialization failure (invalid message structure) -// - Kafka broker unreachable (network issues, broker down) -// - Authentication or authorization failure -// - Context timeout or cancellation -// - Topic does not exist (if auto-creation is disabled) -// -// Concurrency: -// -// This method is safe for concurrent use from multiple goroutines. Internal -// Kafka writer handles synchronization and connection pooling automatically. -// -// Delivery guarantees: -// -// The method uses Kafka's default acknowledgment settings (RequireOne), which -// provides good balance between performance and durability. For stronger -// guarantees, configure the underlying Kafka writer settings. -// -// Example: -// -// event1 := &MyEvent{ID: "123", Data: "hello world"} -// event2 := &MyEvent{ID: "124", Data: "goodbye world"} -// ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) -// defer cancel() -// -// if err := producer.Produce(ctx, event1, event2); err != nil { -// log.Printf("Failed to publish events: %v", err) -// return err -// } -func (p *producer[T]) Produce(ctx context.Context, events ...T) error { - if len(events) == 0 { - return nil - } - - // Create messages for all events - messages := make([]kafka.Message, 0, len(events)) - for i, event := range events { - // Serialize event to protobuf - data, err := proto.Marshal(event) - if err != nil { - logger.Error("Failed to serialize event", "error", err.Error(), "topic", p.topic, "event_index", i) - return err - } - - // Create message - // nolint: exhaustruct - msg := kafka.Message{ - Value: data, - Headers: []kafka.Header{ - {Key: "content-type", Value: []byte("application/x-protobuf")}, - {Key: "source-instance", Value: []byte(p.instanceID)}, - }, - } - messages = append(messages, msg) - } - - // Publish all messages in a single batch - err := p.writer.WriteMessages(ctx, messages...) - if err != nil { - logger.Error("Failed to publish events to Kafka", "error", err.Error(), "topic", p.topic, "event_count", len(events)) - return err - } - - return nil -} - -// Close gracefully shuts down the producer and releases its resources. -// -// This method closes the underlying Kafka writer, which will flush any pending -// messages and close network connections. It should be called when the producer -// is no longer needed to prevent resource leaks. -// -// The method blocks until all pending messages are flushed and the writer is -// properly closed. After Close returns, the producer should not be used. -// -// It is safe to call Close multiple times - subsequent calls are no-ops. -func (p *producer[T]) Close() error { - if p.writer != nil { - return p.writer.Close() - } - return nil -} diff --git a/pkg/eventstream/topic.go b/pkg/eventstream/topic.go deleted file mode 100644 index a79c91107b..0000000000 --- a/pkg/eventstream/topic.go +++ /dev/null @@ -1,254 +0,0 @@ -package eventstream - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/segmentio/kafka-go" - "github.com/unkeyed/unkey/pkg/assert" - "github.com/unkeyed/unkey/pkg/logger" - "google.golang.org/protobuf/proto" -) - -// TopicConfig configures a Topic instance. -type TopicConfig struct { - // Brokers is the list of Kafka broker addresses. - Brokers []string - - // Topic is the Kafka topic name for event streaming. - Topic string - - // InstanceID is a unique identifier for this instance in the cluster. - InstanceID string -} - -// Topic provides access to producers and consumers for a specific topic -type Topic[T proto.Message] struct { - brokers []string - topic string - instanceID string - - // Track consumers and producers for cleanup - mu sync.Mutex - consumers []Consumer[T] - producers []Producer[T] -} - -// NewTopic creates a new Topic with the provided configuration. -// -// The configuration is validated and a new Topic instance is returned that can be used -// to create producers and consumers for the specified Kafka topic. The topic will be -// automatically created in Kafka if it doesn't exist. -// -// Example: -// -// cfg := eventstream.TopicConfig{ -// Brokers: []string{"kafka:9092"}, -// Topic: "events", -// InstanceID: "instance-1", -// } -// topic := eventstream.NewTopic[*MyEvent](cfg) -func NewTopic[T proto.Message](config TopicConfig) (*Topic[T], error) { - // Validate required fields - err := assert.All( - assert.True(len(config.Brokers) > 0, "brokers list cannot be empty"), - assert.NotEmpty(config.Topic, "topic name cannot be empty"), - assert.NotEmpty(config.InstanceID, "instance ID cannot be empty"), - ) - if err != nil { - return nil, err - } - - topic := &Topic[T]{ - mu: sync.Mutex{}, - consumers: nil, - producers: nil, - brokers: config.Brokers, - topic: config.Topic, - instanceID: config.InstanceID, - } - - return topic, nil -} - -// EnsureExists creates the Kafka topic if it doesn't already exist. -// -// This method connects to the Kafka cluster, checks if the topic exists, -// and creates it with the given number of partitions and replication factor if it doesn't. -// This is typically called during application startup to ensure required -// topics are available before producers and consumers start operating. -// -// Parameters: -// - partitions: Number of partitions for the topic (affects parallelism) -// - replicationFactor: Number of replicas for fault tolerance (typically 3 for production) -// -// Topic configuration: -// - Replication factor: As specified by caller (use 3 for production, 1 for development) -// - Partition count: As specified by caller -// - Default retention and cleanup policies -// -// Error conditions: -// - Broker connectivity issues (network problems, authentication) -// - Insufficient permissions to create topics -// - Invalid topic name (contains invalid characters) -// - Cluster controller unavailable -// - All brokers unreachable -// -// Performance considerations: -// -// This operation involves multiple network round-trips and should not be -// called frequently. Typically used only during application initialization. -// -// Production usage: -// -// In production environments, topics are often pre-created by operations -// teams rather than created automatically by applications. -// -// Example: -// -// // Development (single broker, no replication) -// err := topic.EnsureExists(3, 1) -// -// // Production (high availability) -// err := topic.EnsureExists(6, 3) -func (t *Topic[T]) EnsureExists(partitions int, replicationFactor int) error { - // Try to connect to each broker until one succeeds - var lastErr error - for _, broker := range t.brokers { - conn, err := kafka.Dial("tcp", broker) - if err != nil { - lastErr = err - continue // Try next broker - } - defer func() { _ = conn.Close() }() - - // Successfully connected, create the topic - err = conn.CreateTopics(kafka.TopicConfig{ - ReplicaAssignments: nil, - ConfigEntries: nil, - Topic: t.topic, - NumPartitions: partitions, - ReplicationFactor: replicationFactor, - }) - return err - } - - // All brokers failed - if lastErr != nil { - return fmt.Errorf("failed to connect to any broker: %w", lastErr) - } - return fmt.Errorf("no brokers configured") -} - -// WaitUntilReady polls Kafka to verify the topic exists and is ready for use. -// It checks every 100ms until the topic is found or the context is cancelled. -func (t *Topic[T]) WaitUntilReady(ctx context.Context) error { - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: - // Try to connect to a broker and check if topic exists - for _, broker := range t.brokers { - conn, err := kafka.Dial("tcp", broker) - if err != nil { - continue - } - - partitions, err := conn.ReadPartitions(t.topic) - _ = conn.Close() - - if err == nil && len(partitions) > 0 { - // Topic exists and has partitions - return nil - } - } - } - } -} - -// ConsumerOption configures consumer behavior -type ConsumerOption func(*consumerConfig) - -// consumerConfig holds configuration for consumer creation -type consumerConfig struct { - fromBeginning bool -} - -// WithStartFromBeginning configures the consumer to start reading from the beginning of the topic. -// This is useful for testing scenarios where you want to consume all messages -// that were produced before the consumer started, rather than only new messages. -func WithStartFromBeginning() ConsumerOption { - return func(cfg *consumerConfig) { - cfg.fromBeginning = true - } -} - -// Close gracefully shuts down the topic and all associated consumers. -// -// This method closes all consumers that were created by this topic instance, -// ensuring proper cleanup of Kafka connections and consumer group memberships. -// It blocks until all consumers have been successfully closed. -// -// The method is safe to call multiple times - subsequent calls are no-ops. -// After Close returns, the topic should not be used to create new consumers. -// -// Error handling: -// -// If any consumer fails to close cleanly, the error is logged but Close -// continues attempting to close remaining consumers. This ensures that -// partial failures don't prevent cleanup of other resources. -// -// Performance: -// -// Close operations may take several seconds as consumers need to: -// - Finish processing any in-flight messages -// - Commit final offsets to Kafka -// - Leave their consumer groups -// - Close network connections -// -// Usage: -// -// This method is typically called during application shutdown or when -// the topic is no longer needed. It's recommended to use defer for -// automatic cleanup: -// -// topic := eventstream.NewTopic[*MyEvent](config) -// defer topic.Close() -// -// consumer := topic.NewConsumer() -// consumer.Consume(ctx, handler) -// // topic.Close() will automatically close the consumer -func (t *Topic[T]) Close() error { - t.mu.Lock() - defer t.mu.Unlock() - - var lastErr error - - // Close all consumers - for _, consumer := range t.consumers { - if err := consumer.Close(); err != nil { - logger.Error("Failed to close consumer", "error", err, "topic", t.topic) - lastErr = err - } - } - - // Close all producers - for _, producer := range t.producers { - if err := producer.Close(); err != nil { - logger.Error("Failed to close producer", "error", err, "topic", t.topic) - lastErr = err - } - } - - // Clear slices - t.consumers = nil - t.producers = nil - - return lastErr -} diff --git a/proto/cache/v1/invalidation.proto b/proto/cache/v1/invalidation.proto index 28178377bd..4da0e2ddd8 100644 --- a/proto/cache/v1/invalidation.proto +++ b/proto/cache/v1/invalidation.proto @@ -9,12 +9,16 @@ message CacheInvalidationEvent { // The name/identifier of the cache to invalidate string cache_name = 1; - // The cache key to invalidate - string cache_key = 2; - // Unix millisecond timestamp when the invalidation was triggered int64 timestamp = 3; // Optional: The node that triggered the invalidation (to avoid self-invalidation) string source_instance = 4; + + oneof action { + // Invalidate a specific cache key + string cache_key = 2; + // Clear the entire cache + bool clear_all = 5; + } } diff --git a/proto/cluster/v1/envelope.proto b/proto/cluster/v1/envelope.proto new file mode 100644 index 0000000000..9990bd4b15 --- /dev/null +++ b/proto/cluster/v1/envelope.proto @@ -0,0 +1,35 @@ +syntax = "proto3"; + +package cluster.v1; + +import "cache/v1/invalidation.proto"; + +option go_package = "github.com/unkeyed/unkey/gen/proto/cluster/v1;clusterv1"; + +enum Direction { + DIRECTION_UNSPECIFIED = 0; + DIRECTION_LAN = 1; + DIRECTION_WAN = 2; +} + +// ClusterMessage is the envelope for all gossip broadcast messages. +// The oneof field routes the payload to the correct handler via MessageMux. +message ClusterMessage { + // Which pool this message was sent on (LAN or WAN). + Direction direction = 1; + + // The region of the node that originated this message. + string source_region = 2; + + // The node ID that originated this message. + string sender_node = 3; + + // Unix millisecond timestamp when the message was created. + // Used to measure transport latency on the receiving end. + int64 sent_at_ms = 4; + + oneof payload { + cache.v1.CacheInvalidationEvent cache_invalidation = 5; + // next payload type = 6 + } +} diff --git a/svc/api/BUILD.bazel b/svc/api/BUILD.bazel index 6a61cc96bf..fbaa63abac 100644 --- a/svc/api/BUILD.bazel +++ b/svc/api/BUILD.bazel @@ -9,7 +9,6 @@ go_library( importpath = "github.com/unkeyed/unkey/svc/api", visibility = ["//visibility:public"], deps = [ - "//gen/proto/cache/v1:cache", "//gen/proto/ctrl/v1/ctrlv1connect", "//gen/proto/vault/v1/vaultv1connect", "//gen/rpc/ctrl", @@ -20,11 +19,12 @@ go_library( "//internal/services/keys", "//internal/services/ratelimit", "//internal/services/usagelimiter", + "//pkg/cache/clustering", "//pkg/clickhouse", "//pkg/clock", + "//pkg/cluster", "//pkg/counter", "//pkg/db", - "//pkg/eventstream", "//pkg/logger", "//pkg/otel", "//pkg/prometheus", diff --git a/svc/api/config.go b/svc/api/config.go index 1bae2ac831..423854fc85 100644 --- a/svc/api/config.go +++ b/svc/api/config.go @@ -8,11 +8,6 @@ import ( "github.com/unkeyed/unkey/pkg/tls" ) -const ( - // DefaultCacheInvalidationTopic is the default Kafka topic name for cache invalidation events - DefaultCacheInvalidationTopic = "cache-invalidations" -) - type Config struct { // InstanceID is the unique identifier for this instance of the API server InstanceID string @@ -77,14 +72,30 @@ type Config struct { VaultURL string VaultToken string - // --- Kafka configuration --- + // --- Gossip cluster configuration --- + + // GossipEnabled controls whether gossip-based cache invalidation is active + GossipEnabled bool + + // GossipBindAddr is the address to bind gossip listeners on (default "0.0.0.0") + GossipBindAddr string + + // GossipLANPort is the LAN memberlist port (default 7946) + GossipLANPort int + + // GossipWANPort is the WAN memberlist port for bridges (default 7947) + GossipWANPort int + + // GossipLANSeeds are addresses of existing LAN cluster members (e.g. k8s headless service DNS) + GossipLANSeeds []string - // KafkaBrokers is the list of Kafka broker addresses - KafkaBrokers []string + // GossipWANSeeds are addresses of cross-region bridges + GossipWANSeeds []string - // CacheInvalidationTopic is the Kafka topic name for cache invalidation events - // If empty, defaults to DefaultCacheInvalidationTopic - CacheInvalidationTopic string + // GossipSecretKey is a base64-encoded shared secret for AES-256 encryption of gossip traffic. + // When set, nodes must share this key to join and communicate. + // Generate with: openssl rand -base64 32 + GossipSecretKey string // --- ClickHouse proxy configuration --- diff --git a/svc/api/integration/cluster/cache/BUILD.bazel b/svc/api/integration/cluster/cache/BUILD.bazel index 8a86a3e985..5ab193a42b 100644 --- a/svc/api/integration/cluster/cache/BUILD.bazel +++ b/svc/api/integration/cluster/cache/BUILD.bazel @@ -3,18 +3,9 @@ load("@rules_go//go:def.bzl", "go_test") go_test( name = "cache_test", size = "medium", - srcs = [ - "consume_events_test.go", - "e2e_test.go", - "produce_events_test.go", - ], + srcs = ["e2e_test.go"], deps = [ - "//gen/proto/cache/v1:cache", - "//pkg/cache", - "//pkg/eventstream", - "//pkg/testutil/containers", "//pkg/timing", - "//pkg/uid", "//svc/api/integration", "//svc/api/internal/testutil/seed", "//svc/api/openapi", diff --git a/svc/api/integration/cluster/cache/consume_events_test.go b/svc/api/integration/cluster/cache/consume_events_test.go deleted file mode 100644 index cccf637f52..0000000000 --- a/svc/api/integration/cluster/cache/consume_events_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package cache - -import ( - "context" - "fmt" - "net/http" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/timing" - "github.com/unkeyed/unkey/pkg/uid" - "github.com/unkeyed/unkey/svc/api/integration" - "github.com/unkeyed/unkey/svc/api/internal/testutil/seed" - "github.com/unkeyed/unkey/svc/api/openapi" -) - -func TestAPI_ConsumesInvalidationEvents(t *testing.T) { - - // Start a single API node - h := integration.New(t, integration.Config{NumNodes: 1}) - addr := h.GetClusterAddrs()[0] - - // Create test API - api := h.Seed.CreateAPI(context.Background(), seed.CreateApiRequest{ - WorkspaceID: h.Seed.Resources.UserWorkspace.ID, - }) - rootKey := h.Seed.CreateRootKey(context.Background(), api.WorkspaceID, fmt.Sprintf("api.%s.read_api", api.ID)) - - headers := http.Header{ - "Authorization": []string{"Bearer " + rootKey}, - "Content-Type": []string{"application/json"}, - } - - // Step 1: Populate cache by making API call (first call will be MISS) - resp, err := integration.CallNode[openapi.V2ApisGetApiRequestBody, openapi.V2ApisGetApiResponseBody]( - t, addr, "POST", "/v2/apis.getApi", - headers, - openapi.V2ApisGetApiRequestBody{ApiId: api.ID}, - ) - require.NoError(t, err, "Initial API call should succeed") - require.Equal(t, http.StatusOK, resp.Status, "API should exist initially") - - // Step 1.5: Make a second call to populate cache (should be FRESH) - resp2, err := integration.CallNode[openapi.V2ApisGetApiRequestBody, openapi.V2ApisGetApiResponseBody]( - t, addr, "POST", "/v2/apis.getApi", - headers, - openapi.V2ApisGetApiRequestBody{ApiId: api.ID}, - ) - require.NoError(t, err, "Second API call should succeed") - require.Equal(t, http.StatusOK, resp2.Status, "API should exist on second call") - - // Verify cache shows fresh data in debug headers - cacheHeaders := resp2.Headers.Values(timing.HeaderName) - require.NotEmpty(t, cacheHeaders, "Should have cache debug headers") - - // Look for live_api_by_id cache with FRESH status - foundFresh := false - for _, headerValue := range cacheHeaders { - parsedHeader, err := timing.ParseEntry(headerValue) - if err != nil { - continue // Skip invalid headers - } - if parsedHeader.Attributes["cache"] == "live_api_by_id" && parsedHeader.Attributes["status"] == "fresh" { - foundFresh = true - break - } - } - require.True(t, foundFresh, "Cache should show FRESH status for live_api_by_id on second call") - - // Step 2: Produce invalidation event externally (simulating another node's action) - brokers := containers.Kafka(t) - topicName := "cache-invalidations" - - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: uid.New(uid.TestPrefix), // Use unique ID to avoid conflicts with API node - }) - require.NoError(t, err) - - // Ensure topic exists before producing - err = topic.EnsureExists(1, 1) - require.NoError(t, err, "Should be able to create topic") - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully propagated before using it - waitCtx, waitCancel := context.WithTimeout(context.Background(), 10*time.Second) - defer waitCancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err, "Topic should become ready") - - producer := topic.NewProducer() - - // Send invalidation event for the API - invalidationEvent := &cachev1.CacheInvalidationEvent{ - CacheName: "live_api_by_id", - CacheKey: api.ID, - Timestamp: time.Now().UnixMilli(), - SourceInstance: "external-node", - } - - ctx := context.Background() - err = producer.Produce(ctx, invalidationEvent) - require.NoError(t, err, "Should be able to produce invalidation event") - - // Step 3: Verify that the API node processes the invalidation and cache shows MISS/stale - var cacheInvalidated atomic.Bool - - require.Eventually(t, func() bool { - resp, err := integration.CallNode[openapi.V2ApisGetApiRequestBody, openapi.V2ApisGetApiResponseBody]( - t, addr, "POST", "/v2/apis.getApi", - headers, - openapi.V2ApisGetApiRequestBody{ApiId: api.ID}, - ) - if err != nil { - return false - } - - // Check cache debug headers for invalidation - cacheHeaders := resp.Headers.Values(timing.HeaderName) - if len(cacheHeaders) == 0 { - return false - } - - // Look for live_api_by_id cache that's no longer FRESH (should be MISS or STALE) - for _, headerValue := range cacheHeaders { - parsedHeader, err := timing.ParseEntry(headerValue) - if err != nil { - continue // Skip invalid headers - } - if parsedHeader.Attributes["cache"] == "live_api_by_id" { - // Cache should no longer be fresh after invalidation - if parsedHeader.Attributes["status"] != "fresh" { - cacheInvalidated.Store(true) - return true - } - } - } - - return false - }, 15*time.Second, 200*time.Millisecond, "API node should process invalidation event and cache should no longer be FRESH within 15 seconds") - - require.True(t, cacheInvalidated.Load(), "Cache should be invalidated after receiving external invalidation event") -} diff --git a/svc/api/integration/cluster/cache/produce_events_test.go b/svc/api/integration/cluster/cache/produce_events_test.go deleted file mode 100644 index 103ddc4be5..0000000000 --- a/svc/api/integration/cluster/cache/produce_events_test.go +++ /dev/null @@ -1,135 +0,0 @@ -package cache - -import ( - "context" - "fmt" - "net/http" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/require" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" - "github.com/unkeyed/unkey/pkg/cache" - "github.com/unkeyed/unkey/pkg/eventstream" - "github.com/unkeyed/unkey/pkg/testutil/containers" - "github.com/unkeyed/unkey/pkg/uid" - "github.com/unkeyed/unkey/svc/api/integration" - "github.com/unkeyed/unkey/svc/api/internal/testutil/seed" - "github.com/unkeyed/unkey/svc/api/openapi" -) - -func TestAPI_ProducesInvalidationEvents(t *testing.T) { - - // Set up event stream listener to capture invalidation events BEFORE starting API node - brokers := containers.Kafka(t) - topicName := "cache-invalidations" // Use same topic as API nodes - - // Create topic with unique instance ID for this test run - // This ensures we get a unique consumer group and don't resume from previous test runs - testInstanceID := uid.New(uid.TestPrefix) - topic, err := eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: brokers, - Topic: topicName, - InstanceID: testInstanceID, - }) - require.NoError(t, err) - - // Ensure topic exists - err = topic.EnsureExists(1, 1) - require.NoError(t, err, "Should be able to create topic") - defer func() { require.NoError(t, topic.Close()) }() - - // Wait for topic to be fully propagated before using it - waitCtx, waitCancel := context.WithTimeout(context.Background(), 10*time.Second) - defer waitCancel() - err = topic.WaitUntilReady(waitCtx) - require.NoError(t, err, "Topic should become ready") - - // Track received events - var receivedEvents []*cachev1.CacheInvalidationEvent - var eventsMutex sync.Mutex - - // Start consumer from latest offset to avoid old test events - consumer := topic.NewConsumer() - defer func() { require.NoError(t, consumer.Close()) }() - - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - consumer.Consume(ctx, func(ctx context.Context, event *cachev1.CacheInvalidationEvent) error { - eventsMutex.Lock() - receivedEvents = append(receivedEvents, event) - eventsMutex.Unlock() - return nil - }) - - // Wait for consumer to be ready and positioned at latest offset - // Consumer needs time to join group and subscribe to partitions - time.Sleep(3 * time.Second) - - // Now start API node - h := integration.New(t, integration.Config{NumNodes: 1}) - addr := h.GetClusterAddrs()[0] - - // Create test API - api := h.Seed.CreateAPI(context.Background(), seed.CreateApiRequest{ - WorkspaceID: h.Seed.Resources.UserWorkspace.ID, - }) - rootKey := h.Seed.CreateRootKey(context.Background(), api.WorkspaceID, fmt.Sprintf("api.%s.read_api", api.ID), fmt.Sprintf("api.%s.delete_api", api.ID)) - - headers := http.Header{ - "Authorization": []string{"Bearer " + rootKey}, - "Content-Type": []string{"application/json"}, - } - - // Test 1: API deletion should produce cache invalidation events - _, err = integration.CallNode[openapi.V2ApisDeleteApiRequestBody, openapi.V2ApisDeleteApiResponseBody]( - t, addr, "POST", "/v2/apis.deleteApi", - headers, - openapi.V2ApisDeleteApiRequestBody{ApiId: api.ID}, - ) - require.NoError(t, err, "API deletion should succeed") - - // Wait for invalidation events to be produced - require.Eventually(t, func() bool { - eventsMutex.Lock() - defer eventsMutex.Unlock() - return len(receivedEvents) > 0 - }, 15*time.Second, 200*time.Millisecond, "API deletion should produce cache invalidation events within 15 seconds") - - // Verify events - eventsMutex.Lock() - defer eventsMutex.Unlock() - - require.Greater(t, len(receivedEvents), 0, "Should receive at least one invalidation event") - - // Log all received events for debugging - t.Logf("Received %d invalidation events:", len(receivedEvents)) - for i, event := range receivedEvents { - t.Logf(" Event %d: CacheName=%s, CacheKey=%s, SourceInstance=%s", - i, event.GetCacheName(), event.GetCacheKey(), event.GetSourceInstance()) - } - - // Look for live_api_by_id cache invalidation event - // The cache key is scoped with format "workspaceID:apiID" - expectedCacheKey := cache.ScopedKey{ - WorkspaceID: api.WorkspaceID, - Key: api.ID, - }.String() - var apiByIDEvent *cachev1.CacheInvalidationEvent - for _, event := range receivedEvents { - if event.GetCacheName() == "live_api_by_id" && event.GetCacheKey() == expectedCacheKey { - apiByIDEvent = event - break - } - } - - t.Logf("Looking for cache key: %s", expectedCacheKey) - - require.NotNil(t, apiByIDEvent, "Should receive live_api_by_id invalidation event") - require.Equal(t, "live_api_by_id", apiByIDEvent.GetCacheName(), "Event should be for live_api_by_id cache") - require.Equal(t, expectedCacheKey, apiByIDEvent.GetCacheKey(), "Event should be for correct scoped cache key") - require.NotEmpty(t, apiByIDEvent.GetSourceInstance(), "Event should have source instance") - require.Greater(t, apiByIDEvent.GetTimestamp(), int64(0), "Event should have valid timestamp") -} diff --git a/svc/api/integration/harness.go b/svc/api/integration/harness.go index 0e8eac4c6b..ebe9d7563d 100644 --- a/svc/api/integration/harness.go +++ b/svc/api/integration/harness.go @@ -23,7 +23,6 @@ type ApiConfig struct { Nodes int MysqlDSN string ClickhouseDSN string - KafkaBrokers []string } // ApiCluster represents a cluster of API containers @@ -97,17 +96,11 @@ func New(t *testing.T, config Config) *Harness { h.Seed.Seed(ctx) - // For docker DSN, use docker service name - clickhouseDockerDSN := "clickhouse://default:password@clickhouse:9000?secure=false&skip_verify=true&dial_timeout=10s" - - // Create dynamic API container cluster for chaos testing - kafkaBrokers := containers.Kafka(t) - + // Create dynamic API container cluster cluster := h.RunAPI(ApiConfig{ Nodes: config.NumNodes, MysqlDSN: mysqlDockerDSN, - ClickhouseDSN: clickhouseDockerDSN, - KafkaBrokers: kafkaBrokers, + ClickhouseDSN: clickhouseHostDSN, }) h.apiCluster = cluster h.instanceAddrs = cluster.Addrs @@ -134,12 +127,10 @@ func (h *Harness) RunAPI(config ApiConfig) *ApiCluster { // Create API config for this node using host connections mysqlHostCfg := containers.MySQL(h.t) - mysqlHostCfg.DBName = "unkey" // Set the database name + mysqlHostCfg.DBName = "unkey" clickhouseHostDSN := containers.ClickHouse(h.t) - kafkaBrokers := containers.Kafka(h.t) vaultURL, vaultToken := containers.Vault(h.t) apiConfig := api.Config{ - CacheInvalidationTopic: "", MaxRequestBodySize: 0, HttpPort: 7070, ChproxyToken: "", @@ -161,7 +152,13 @@ func (h *Harness) RunAPI(config ApiConfig) *ApiCluster { TLSConfig: nil, VaultURL: vaultURL, VaultToken: vaultToken, - KafkaBrokers: kafkaBrokers, // Use host brokers for test runner connections + GossipEnabled: false, + GossipBindAddr: "", + GossipLANPort: 0, + GossipWANPort: 0, + GossipLANSeeds: nil, + GossipWANSeeds: nil, + GossipSecretKey: "", PprofEnabled: true, PprofUsername: "unkey", PprofPassword: "password", diff --git a/svc/api/internal/testutil/http.go b/svc/api/internal/testutil/http.go index b6faa92913..834ca717ad 100644 --- a/svc/api/internal/testutil/http.go +++ b/svc/api/internal/testutil/http.go @@ -88,9 +88,9 @@ func NewHarness(t *testing.T) *Harness { require.NoError(t, err) caches, err := caches.New(caches.Config{ - CacheInvalidationTopic: nil, - NodeID: "", - Clock: clk, + Broadcaster: nil, + NodeID: "", + Clock: clk, }) require.NoError(t, err) diff --git a/svc/api/run.go b/svc/api/run.go index 17d22fb039..382a274521 100644 --- a/svc/api/run.go +++ b/svc/api/run.go @@ -2,6 +2,7 @@ package api import ( "context" + "encoding/base64" "errors" "fmt" "log/slog" @@ -10,7 +11,6 @@ import ( "time" "connectrpc.com/connect" - cachev1 "github.com/unkeyed/unkey/gen/proto/cache/v1" "github.com/unkeyed/unkey/gen/proto/ctrl/v1/ctrlv1connect" "github.com/unkeyed/unkey/gen/proto/vault/v1/vaultv1connect" "github.com/unkeyed/unkey/gen/rpc/ctrl" @@ -21,11 +21,12 @@ import ( "github.com/unkeyed/unkey/internal/services/keys" "github.com/unkeyed/unkey/internal/services/ratelimit" "github.com/unkeyed/unkey/internal/services/usagelimiter" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/clickhouse" "github.com/unkeyed/unkey/pkg/clock" + "github.com/unkeyed/unkey/pkg/cluster" "github.com/unkeyed/unkey/pkg/counter" "github.com/unkeyed/unkey/pkg/db" - "github.com/unkeyed/unkey/pkg/eventstream" "github.com/unkeyed/unkey/pkg/logger" "github.com/unkeyed/unkey/pkg/otel" "github.com/unkeyed/unkey/pkg/prometheus" @@ -196,33 +197,55 @@ func Run(ctx context.Context, cfg Config) error { return fmt.Errorf("unable to create auditlogs service: %w", err) } - // Initialize cache invalidation topic - cacheInvalidationTopic := eventstream.NewNoopTopic[*cachev1.CacheInvalidationEvent]() - if len(cfg.KafkaBrokers) > 0 { - logger.Info("Initializing cache invalidation topic", "brokers", cfg.KafkaBrokers, "instanceID", cfg.InstanceID) + // Initialize gossip-based cache invalidation + var broadcaster clustering.Broadcaster + if cfg.GossipEnabled { + logger.Info("Initializing gossip cluster for cache invalidation", + "region", cfg.Region, + "instanceID", cfg.InstanceID, + ) + + mux := cluster.NewMessageMux() + + lanSeeds := cluster.ResolveDNSSeeds(cfg.GossipLANSeeds, cfg.GossipLANPort) + wanSeeds := cluster.ResolveDNSSeeds(cfg.GossipWANSeeds, cfg.GossipWANPort) - topicName := cfg.CacheInvalidationTopic - if topicName == "" { - topicName = DefaultCacheInvalidationTopic + var secretKey []byte + if cfg.GossipSecretKey != "" { + var decodeErr error + secretKey, decodeErr = base64.StdEncoding.DecodeString(cfg.GossipSecretKey) + if decodeErr != nil { + return fmt.Errorf("unable to decode gossip secret key: %w", decodeErr) + } } - cacheInvalidationTopic, err = eventstream.NewTopic[*cachev1.CacheInvalidationEvent](eventstream.TopicConfig{ - Brokers: cfg.KafkaBrokers, - Topic: topicName, - InstanceID: cfg.InstanceID, + gossipCluster, clusterErr := cluster.New(cluster.Config{ + Region: cfg.Region, + NodeID: cfg.InstanceID, + BindAddr: cfg.GossipBindAddr, + BindPort: cfg.GossipLANPort, + WANBindPort: cfg.GossipWANPort, + LANSeeds: lanSeeds, + WANSeeds: wanSeeds, + SecretKey: secretKey, + OnMessage: mux.OnMessage, }) - if err != nil { - return fmt.Errorf("unable to create cache invalidation topic: %w", err) + if clusterErr != nil { + logger.Error("Failed to create gossip cluster, continuing without cluster cache invalidation", + "error", clusterErr, + ) + } else { + gossipBroadcaster := clustering.NewGossipBroadcaster(gossipCluster) + cluster.Subscribe(mux, gossipBroadcaster.HandleCacheInvalidation) + broadcaster = gossipBroadcaster + r.Defer(gossipCluster.Close) } - - // Register topic for graceful shutdown - r.Defer(cacheInvalidationTopic.Close) } caches, err := caches.New(caches.Config{ - Clock: clk, - CacheInvalidationTopic: cacheInvalidationTopic, - NodeID: cfg.InstanceID, + Clock: clk, + Broadcaster: broadcaster, + NodeID: cfg.InstanceID, }) if err != nil { return fmt.Errorf("unable to create caches: %w", err) diff --git a/svc/frontline/BUILD.bazel b/svc/frontline/BUILD.bazel index 747d9ec917..69eb19da87 100644 --- a/svc/frontline/BUILD.bazel +++ b/svc/frontline/BUILD.bazel @@ -13,7 +13,9 @@ go_library( "//gen/proto/vault/v1/vaultv1connect", "//gen/rpc/ctrl", "//gen/rpc/vault", + "//pkg/cache/clustering", "//pkg/clock", + "//pkg/cluster", "//pkg/db", "//pkg/logger", "//pkg/otel", diff --git a/svc/frontline/config.go b/svc/frontline/config.go index 3fd289dcf6..ad14c1ae46 100644 --- a/svc/frontline/config.go +++ b/svc/frontline/config.go @@ -73,6 +73,31 @@ type Config struct { // VaultToken is the authentication token for the vault service VaultToken string + // --- Gossip cluster configuration --- + + // GossipEnabled controls whether gossip-based cache invalidation is active + GossipEnabled bool + + // GossipBindAddr is the address to bind gossip listeners on (default "0.0.0.0") + GossipBindAddr string + + // GossipLANPort is the LAN memberlist port (default 7946) + GossipLANPort int + + // GossipWANPort is the WAN memberlist port for bridges (default 7947) + GossipWANPort int + + // GossipLANSeeds are addresses of existing LAN cluster members (e.g. k8s headless service DNS) + GossipLANSeeds []string + + // GossipWANSeeds are addresses of cross-region bridges + GossipWANSeeds []string + + // GossipSecretKey is a base64-encoded shared secret for AES-256 encryption of gossip traffic. + // When set, nodes must share this key to join and communicate. + // Generate with: openssl rand -base64 32 + GossipSecretKey string + // --- Logging sampler configuration --- // LogSampleRate is the baseline probability (0.0-1.0) of emitting log events. diff --git a/svc/frontline/run.go b/svc/frontline/run.go index b502013dca..cea8e89733 100644 --- a/svc/frontline/run.go +++ b/svc/frontline/run.go @@ -3,6 +3,7 @@ package frontline import ( "context" "crypto/tls" + "encoding/base64" "errors" "fmt" "log/slog" @@ -14,7 +15,9 @@ import ( "github.com/unkeyed/unkey/gen/proto/vault/v1/vaultv1connect" "github.com/unkeyed/unkey/gen/rpc/ctrl" "github.com/unkeyed/unkey/gen/rpc/vault" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/clock" + "github.com/unkeyed/unkey/pkg/cluster" "github.com/unkeyed/unkey/pkg/db" "github.com/unkeyed/unkey/pkg/logger" "github.com/unkeyed/unkey/pkg/otel" @@ -129,13 +132,61 @@ func Run(ctx context.Context, cfg Config) error { } r.Defer(db.Close) + // Initialize gossip-based cache invalidation + var broadcaster clustering.Broadcaster + if cfg.GossipEnabled { + logger.Info("Initializing gossip cluster for cache invalidation", + "region", cfg.Region, + "instanceID", cfg.FrontlineID, + ) + + mux := cluster.NewMessageMux() + + lanSeeds := cluster.ResolveDNSSeeds(cfg.GossipLANSeeds, cfg.GossipLANPort) + wanSeeds := cluster.ResolveDNSSeeds(cfg.GossipWANSeeds, cfg.GossipWANPort) + + var secretKey []byte + if cfg.GossipSecretKey != "" { + var decodeErr error + secretKey, decodeErr = base64.StdEncoding.DecodeString(cfg.GossipSecretKey) + if decodeErr != nil { + return fmt.Errorf("unable to decode gossip secret key: %w", decodeErr) + } + } + + gossipCluster, clusterErr := cluster.New(cluster.Config{ + Region: cfg.Region, + NodeID: cfg.FrontlineID, + BindAddr: cfg.GossipBindAddr, + BindPort: cfg.GossipLANPort, + WANBindPort: cfg.GossipWANPort, + LANSeeds: lanSeeds, + WANSeeds: wanSeeds, + SecretKey: secretKey, + OnMessage: mux.OnMessage, + }) + if clusterErr != nil { + logger.Error("Failed to create gossip cluster, continuing without cluster cache invalidation", + "error", clusterErr, + ) + } else { + gossipBroadcaster := clustering.NewGossipBroadcaster(gossipCluster) + cluster.Subscribe(mux, gossipBroadcaster.HandleCacheInvalidation) + broadcaster = gossipBroadcaster + r.Defer(gossipCluster.Close) + } + } + // Initialize caches cache, err := caches.New(caches.Config{ - Clock: clk, + Clock: clk, + Broadcaster: broadcaster, + NodeID: cfg.FrontlineID, }) if err != nil { return fmt.Errorf("unable to create caches: %w", err) } + r.Defer(cache.Close) // Initialize certificate manager for dynamic TLS var certManager certmanager.Service diff --git a/svc/frontline/services/caches/BUILD.bazel b/svc/frontline/services/caches/BUILD.bazel index a5f7e4783b..f9ae58418b 100644 --- a/svc/frontline/services/caches/BUILD.bazel +++ b/svc/frontline/services/caches/BUILD.bazel @@ -7,8 +7,10 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/cache", + "//pkg/cache/clustering", "//pkg/cache/middleware", "//pkg/clock", "//pkg/db", + "//pkg/uid", ], ) diff --git a/svc/frontline/services/caches/caches.go b/svc/frontline/services/caches/caches.go index 2edfa6f900..a63d1ac2a5 100644 --- a/svc/frontline/services/caches/caches.go +++ b/svc/frontline/services/caches/caches.go @@ -3,12 +3,15 @@ package caches import ( "crypto/tls" "fmt" + "os" "time" "github.com/unkeyed/unkey/pkg/cache" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/cache/middleware" "github.com/unkeyed/unkey/pkg/clock" "github.com/unkeyed/unkey/pkg/db" + "github.com/unkeyed/unkey/pkg/uid" ) // Caches holds all cache instances used throughout frontline. @@ -21,50 +24,156 @@ type Caches struct { // HostName -> Certificate TLSCertificates cache.Cache[string, tls.Certificate] + + // dispatcher handles routing of invalidation events to all caches in this process. + dispatcher *clustering.InvalidationDispatcher +} + +// Close shuts down the caches and cleans up resources. +func (c *Caches) Close() error { + if c.dispatcher != nil { + return c.dispatcher.Close() + } + + return nil } // Config defines the configuration options for initializing caches. type Config struct { Clock clock.Clock + + // Broadcaster for distributed cache invalidation via gossip. + // If nil, caches operate in local-only mode (no distributed invalidation). + Broadcaster clustering.Broadcaster + + // NodeID identifies this node in the cluster (defaults to hostname-uniqueid to ensure uniqueness) + NodeID string } -func New(config Config) (Caches, error) { - frontlineRoute, err := cache.New(cache.Config[string, db.FrontlineRoute]{ - Fresh: 30 * time.Second, - Stale: 5 * time.Minute, - MaxSize: 10_000, - Resource: "frontline_route", - Clock: config.Clock, - }) +// clusterOpts bundles the dispatcher and key converter functions needed for +// distributed cache invalidation. +type clusterOpts[K comparable] struct { + dispatcher *clustering.InvalidationDispatcher + broadcaster clustering.Broadcaster + nodeID string + keyToString func(K) string + stringToKey func(string) (K, error) +} + +// createCache creates a cache instance with optional clustering support. +func createCache[K comparable, V any]( + cacheConfig cache.Config[K, V], + opts *clusterOpts[K], +) (cache.Cache[K, V], error) { + localCache, err := cache.New(cacheConfig) if err != nil { - return Caches{}, fmt.Errorf("failed to create sentinel config cache: %w", err) + return nil, err + } + + if opts == nil { + return localCache, nil } - sentinelsByEnvironment, err := cache.New(cache.Config[string, []db.Sentinel]{ - Fresh: 30 * time.Second, - Stale: 2 * time.Minute, - MaxSize: 10_000, - Resource: "sentinels_by_environment", - Clock: config.Clock, + clusterCache, err := clustering.New(clustering.Config[K, V]{ + LocalCache: localCache, + Broadcaster: opts.broadcaster, + Dispatcher: opts.dispatcher, + NodeID: opts.nodeID, + KeyToString: opts.keyToString, + StringToKey: opts.stringToKey, }) if err != nil { - return Caches{}, fmt.Errorf("failed to create instances by deployment cache: %w", err) + return nil, err } - tlsCertificate, err := cache.New(cache.Config[string, tls.Certificate]{ - Fresh: time.Hour, - Stale: time.Hour * 12, - MaxSize: 10_000, - Resource: "tls_certificate", - Clock: config.Clock, - }) + return clusterCache, nil +} + +func New(config Config) (*Caches, error) { + if config.NodeID == "" { + hostname, err := os.Hostname() + if err != nil { + hostname = "unknown" + } + config.NodeID = fmt.Sprintf("%s-%s", hostname, uid.New("node")) + } + + var dispatcher *clustering.InvalidationDispatcher + var stringKeyOpts *clusterOpts[string] + + if config.Broadcaster != nil { + var err error + dispatcher, err = clustering.NewInvalidationDispatcher(config.Broadcaster) + if err != nil { + return nil, err + } + + stringKeyOpts = &clusterOpts[string]{ + dispatcher: dispatcher, + broadcaster: config.Broadcaster, + nodeID: config.NodeID, + keyToString: nil, + stringToKey: nil, + } + } + + // Ensure the dispatcher is closed if any subsequent cache creation fails. + initialized := false + if dispatcher != nil { + defer func() { + if !initialized { + _ = dispatcher.Close() + } + }() + } + + frontlineRoute, err := createCache( + cache.Config[string, db.FrontlineRoute]{ + Fresh: 30 * time.Second, + Stale: 5 * time.Minute, + MaxSize: 10_000, + Resource: "frontline_route", + Clock: config.Clock, + }, + stringKeyOpts, + ) + if err != nil { + return nil, fmt.Errorf("failed to create frontline route cache: %w", err) + } + + sentinelsByEnvironment, err := createCache( + cache.Config[string, []db.Sentinel]{ + Fresh: 30 * time.Second, + Stale: 2 * time.Minute, + MaxSize: 10_000, + Resource: "sentinels_by_environment", + Clock: config.Clock, + }, + stringKeyOpts, + ) + if err != nil { + return nil, fmt.Errorf("failed to create sentinels by environment cache: %w", err) + } + + tlsCertificate, err := createCache( + cache.Config[string, tls.Certificate]{ + Fresh: time.Hour, + Stale: time.Hour * 12, + MaxSize: 10_000, + Resource: "tls_certificate", + Clock: config.Clock, + }, + stringKeyOpts, + ) if err != nil { - return Caches{}, fmt.Errorf("failed to create certificate cache: %w", err) + return nil, fmt.Errorf("failed to create certificate cache: %w", err) } - return Caches{ + initialized = true + return &Caches{ FrontlineRoutes: middleware.WithTracing(frontlineRoute), SentinelsByEnvironment: middleware.WithTracing(sentinelsByEnvironment), TLSCertificates: middleware.WithTracing(tlsCertificate), + dispatcher: dispatcher, }, nil } diff --git a/svc/krane/internal/sentinel/BUILD.bazel b/svc/krane/internal/sentinel/BUILD.bazel index c878a9dbaf..59e22c5c8e 100644 --- a/svc/krane/internal/sentinel/BUILD.bazel +++ b/svc/krane/internal/sentinel/BUILD.bazel @@ -29,9 +29,12 @@ go_library( "@io_k8s_api//policy/v1:policy", "@io_k8s_apimachinery//pkg/api/errors", "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", + "@io_k8s_apimachinery//pkg/apis/meta/v1/unstructured", + "@io_k8s_apimachinery//pkg/runtime/schema", "@io_k8s_apimachinery//pkg/types", "@io_k8s_apimachinery//pkg/util/intstr", "@io_k8s_apimachinery//pkg/watch", + "@io_k8s_client_go//dynamic", "@io_k8s_client_go//kubernetes", "@io_k8s_sigs_controller_runtime//pkg/client", ], diff --git a/svc/krane/internal/sentinel/apply.go b/svc/krane/internal/sentinel/apply.go index ff63ca700d..6edd3d4635 100644 --- a/svc/krane/internal/sentinel/apply.go +++ b/svc/krane/internal/sentinel/apply.go @@ -16,6 +16,8 @@ import ( policyv1 "k8s.io/api/policy/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -70,6 +72,16 @@ func (c *Controller) ApplySentinel(ctx context.Context, req *ctrlv1.ApplySentine return err } + _, err = c.ensureGossipServiceExists(ctx, req) + if err != nil { + return err + } + + err = c.ensureGossipCiliumPolicyExists(ctx, req) + if err != nil { + return err + } + var health ctrlv1.Health if req.GetReplicas() == 0 { health = ctrlv1.Health_HEALTH_PAUSED @@ -187,12 +199,16 @@ func (c *Controller) ensureSentinelExists(ctx context.Context, sentinel *ctrlv1. {Name: "UNKEY_ENVIRONMENT_ID", Value: sentinel.GetEnvironmentId()}, {Name: "UNKEY_SENTINEL_ID", Value: sentinel.GetSentinelId()}, {Name: "UNKEY_REGION", Value: c.region}, + {Name: "UNKEY_GOSSIP_ENABLED", Value: "true"}, + {Name: "UNKEY_GOSSIP_LAN_PORT", Value: strconv.Itoa(GossipLANPort)}, + {Name: "UNKEY_GOSSIP_LAN_SEEDS", Value: fmt.Sprintf("%s-gossip-lan", sentinel.GetK8SName())}, }, - Ports: []corev1.ContainerPort{{ - ContainerPort: SentinelPort, - Name: "sentinel", - }}, + Ports: []corev1.ContainerPort{ + {ContainerPort: SentinelPort, Name: "sentinel"}, + {ContainerPort: GossipLANPort, Name: "gossip-lan", Protocol: corev1.ProtocolTCP}, + {ContainerPort: GossipLANPort, Name: "gossip-lan-udp", Protocol: corev1.ProtocolUDP}, + }, LivenessProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ @@ -368,3 +384,141 @@ func (c *Controller) ensurePDBExists(ctx context.Context, sentinel *ctrlv1.Apply }) return err } + +// ensureGossipServiceExists creates or updates a headless Service for gossip LAN peer +// discovery. The Service uses clusterIP: None so that DNS resolves to individual pod IPs, +// allowing memberlist to discover all peers in the environment. The selector matches all +// sentinel pods in the environment (not just one k8sName) for cross-sentinel peer discovery. +func (c *Controller) ensureGossipServiceExists(ctx context.Context, sentinel *ctrlv1.ApplySentinel) (*corev1.Service, error) { + client := c.clientSet.CoreV1().Services(NamespaceSentinel) + + gossipName := fmt.Sprintf("%s-gossip-lan", sentinel.GetK8SName()) + + desired := &corev1.Service{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "v1", + Kind: "Service", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: gossipName, + Namespace: NamespaceSentinel, + Labels: labels.New(). + WorkspaceID(sentinel.GetWorkspaceId()). + ProjectID(sentinel.GetProjectId()). + EnvironmentID(sentinel.GetEnvironmentId()). + SentinelID(sentinel.GetSentinelId()). + ComponentGossipLAN(), + // No OwnerReferences: this Service is environment-scoped (selector matches all + // sentinel pods in the environment), so it must not be owned by a single Deployment. + // Krane manages its lifecycle via server-side apply. + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + ClusterIP: "None", + Selector: labels.New(). + EnvironmentID(sentinel.GetEnvironmentId()). + ComponentSentinel(), + Ports: []corev1.ServicePort{ + { + Name: "gossip-lan", + Port: GossipLANPort, + TargetPort: intstr.FromInt(GossipLANPort), + Protocol: corev1.ProtocolTCP, + }, + { + Name: "gossip-lan-udp", + Port: GossipLANPort, + TargetPort: intstr.FromInt(GossipLANPort), + Protocol: corev1.ProtocolUDP, + }, + }, + }, + } + + patch, err := json.Marshal(desired) + if err != nil { + return nil, fmt.Errorf("failed to marshal gossip service: %w", err) + } + + return client.Patch(ctx, gossipName, types.ApplyPatchType, patch, metav1.PatchOptions{ + FieldManager: fieldManagerKrane, + }) +} + +// ensureGossipCiliumPolicyExists creates or updates a CiliumNetworkPolicy that allows +// gossip traffic (TCP+UDP on GossipLANPort) between sentinel pods in the same environment. +func (c *Controller) ensureGossipCiliumPolicyExists(ctx context.Context, sentinel *ctrlv1.ApplySentinel) error { + policyName := fmt.Sprintf("%s-gossip-lan", sentinel.GetK8SName()) + + policy := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cilium.io/v2", + "kind": "CiliumNetworkPolicy", + "metadata": map[string]interface{}{ + "name": policyName, + "namespace": NamespaceSentinel, + "labels": labels.New(). + WorkspaceID(sentinel.GetWorkspaceId()). + ProjectID(sentinel.GetProjectId()). + EnvironmentID(sentinel.GetEnvironmentId()). + SentinelID(sentinel.GetSentinelId()). + ComponentGossipLAN(), + // No ownerReferences: this policy is environment-scoped (selects all sentinel + // pods in the environment), so it must not be owned by a single Deployment. + // Krane manages its lifecycle via server-side apply. + }, + "spec": map[string]interface{}{ + "endpointSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + labels.LabelKeyEnvironmentID: sentinel.GetEnvironmentId(), + labels.LabelKeyComponent: "sentinel", + }, + }, + "ingress": []interface{}{ + map[string]interface{}{ + "fromEndpoints": []interface{}{ + map[string]interface{}{ + "matchLabels": map[string]interface{}{ + labels.LabelKeyEnvironmentID: sentinel.GetEnvironmentId(), + labels.LabelKeyComponent: "sentinel", + }, + }, + }, + "toPorts": []interface{}{ + map[string]interface{}{ + "ports": []interface{}{ + map[string]interface{}{ + "port": strconv.Itoa(GossipLANPort), + "protocol": "TCP", + }, + map[string]interface{}{ + "port": strconv.Itoa(GossipLANPort), + "protocol": "UDP", + }, + }, + }, + }, + }, + }, + }, + }, + } + + gvr := schema.GroupVersionResource{ + Group: "cilium.io", + Version: "v2", + Resource: "ciliumnetworkpolicies", + } + + _, err := c.dynamicClient.Resource(gvr).Namespace(NamespaceSentinel).Apply( + ctx, + policyName, + policy, + metav1.ApplyOptions{FieldManager: fieldManagerKrane}, + ) + if err != nil { + return fmt.Errorf("failed to apply gossip cilium network policy: %w", err) + } + + return nil +} diff --git a/svc/krane/internal/sentinel/consts.go b/svc/krane/internal/sentinel/consts.go index 7ab7935d9f..f2ba4f48dd 100644 --- a/svc/krane/internal/sentinel/consts.go +++ b/svc/krane/internal/sentinel/consts.go @@ -9,6 +9,9 @@ const ( // SentinelPort is the port sentinel pods listen on. SentinelPort = 8040 + // GossipLANPort is the port used for gossip protocol LAN communication between sentinel pods. + GossipLANPort = 7946 + // SentinelNodeClass is the node class for sentinel workloads. SentinelNodeClass = "sentinel" diff --git a/svc/krane/internal/sentinel/controller.go b/svc/krane/internal/sentinel/controller.go index 66173f1f0d..93b126ad6c 100644 --- a/svc/krane/internal/sentinel/controller.go +++ b/svc/krane/internal/sentinel/controller.go @@ -8,6 +8,7 @@ import ( ctrlv1 "github.com/unkeyed/unkey/gen/proto/ctrl/v1" ctrl "github.com/unkeyed/unkey/gen/rpc/ctrl" "github.com/unkeyed/unkey/pkg/circuitbreaker" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" ) @@ -20,6 +21,7 @@ import ( type Controller struct { clientSet kubernetes.Interface cluster ctrl.ClusterServiceClient + dynamicClient dynamic.Interface cb circuitbreaker.CircuitBreaker[any] done chan struct{} stopOnce sync.Once @@ -29,15 +31,17 @@ type Controller struct { // Config holds the configuration required to create a new [Controller]. type Config struct { - ClientSet kubernetes.Interface - Cluster ctrl.ClusterServiceClient - Region string + Cluster ctrl.ClusterServiceClient + Region string + ClientSet kubernetes.Interface + DynamicClient dynamic.Interface } // New creates a [Controller] ready to be started with [Controller.Start]. func New(cfg Config) *Controller { return &Controller{ clientSet: cfg.ClientSet, + dynamicClient: cfg.DynamicClient, cluster: cfg.Cluster, cb: circuitbreaker.New[any]("sentinel_state_update"), done: make(chan struct{}), diff --git a/svc/krane/internal/sentinel/delete.go b/svc/krane/internal/sentinel/delete.go index 07b5767767..d2b3cc11b6 100644 --- a/svc/krane/internal/sentinel/delete.go +++ b/svc/krane/internal/sentinel/delete.go @@ -2,11 +2,13 @@ package sentinel import ( "context" + "fmt" ctrlv1 "github.com/unkeyed/unkey/gen/proto/ctrl/v1" "github.com/unkeyed/unkey/pkg/logger" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -21,7 +23,26 @@ func (c *Controller) DeleteSentinel(ctx context.Context, req *ctrlv1.DeleteSenti "name", req.GetK8SName(), ) - err := c.clientSet.CoreV1().Services(NamespaceSentinel).Delete(ctx, req.GetK8SName(), metav1.DeleteOptions{}) + gossipName := fmt.Sprintf("%s-gossip-lan", req.GetK8SName()) + + // Delete gossip headless service + err := c.clientSet.CoreV1().Services(NamespaceSentinel).Delete(ctx, gossipName, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + // Delete gossip CiliumNetworkPolicy + gvr := schema.GroupVersionResource{ + Group: "cilium.io", + Version: "v2", + Resource: "ciliumnetworkpolicies", + } + err = c.dynamicClient.Resource(gvr).Namespace(NamespaceSentinel).Delete(ctx, gossipName, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + + err = c.clientSet.CoreV1().Services(NamespaceSentinel).Delete(ctx, req.GetK8SName(), metav1.DeleteOptions{}) if err != nil && !apierrors.IsNotFound(err) { return err } diff --git a/svc/krane/pkg/labels/labels.go b/svc/krane/pkg/labels/labels.go index 3a84f20419..ecb9fbc066 100644 --- a/svc/krane/pkg/labels/labels.go +++ b/svc/krane/pkg/labels/labels.go @@ -117,6 +117,14 @@ func (l Labels) ComponentDeployment() Labels { return l } +// ComponentGossipLAN adds component label for gossip LAN resources (headless services, +// network policies). Distinct from ComponentSentinel so label selectors for sentinel +// services don't accidentally pick up gossip infrastructure. +func (l Labels) ComponentGossipLAN() Labels { + l[LabelKeyComponent] = "gossip-lan" + return l +} + // ComponentCiliumNetworkPolicy adds component label for Cilium network policy resources. // // This method sets "app.kubernetes.io/component" label to "ciliumnetworkpolicy" diff --git a/svc/krane/run.go b/svc/krane/run.go index 940f262f25..dbd49e5443 100644 --- a/svc/krane/run.go +++ b/svc/krane/run.go @@ -139,9 +139,10 @@ func Run(ctx context.Context, cfg Config) error { // Start the sentinel controller (independent control loop) sentinelCtrl := sentinel.New(sentinel.Config{ - ClientSet: clientset, - Cluster: cluster, - Region: cfg.Region, + ClientSet: clientset, + DynamicClient: dynamicClient, + Cluster: cluster, + Region: cfg.Region, }) if err := sentinelCtrl.Start(ctx); err != nil { return fmt.Errorf("failed to start sentinel controller: %w", err) diff --git a/svc/sentinel/BUILD.bazel b/svc/sentinel/BUILD.bazel index 2f9243327f..ae620ace03 100644 --- a/svc/sentinel/BUILD.bazel +++ b/svc/sentinel/BUILD.bazel @@ -10,8 +10,10 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/assert", + "//pkg/cache/clustering", "//pkg/clickhouse", "//pkg/clock", + "//pkg/cluster", "//pkg/db", "//pkg/logger", "//pkg/otel", diff --git a/svc/sentinel/config.go b/svc/sentinel/config.go index 992ac4e0e9..eaa917fd06 100644 --- a/svc/sentinel/config.go +++ b/svc/sentinel/config.go @@ -31,6 +31,26 @@ type Config struct { OtelTraceSamplingRate float64 PrometheusPort int + // --- Gossip cluster configuration --- + + // GossipEnabled controls whether gossip-based cache invalidation is active + GossipEnabled bool + + // GossipBindAddr is the address to bind gossip listeners on (default "0.0.0.0") + GossipBindAddr string + + // GossipLANPort is the LAN memberlist port (default 7946) + GossipLANPort int + + // GossipWANPort is the WAN memberlist port for bridges (default 7947) + GossipWANPort int + + // GossipLANSeeds are addresses of existing LAN cluster members (e.g. k8s headless service DNS) + GossipLANSeeds []string + + // GossipWANSeeds are addresses of cross-region bridges + GossipWANSeeds []string + // --- Logging sampler configuration --- // LogSampleRate is the baseline probability (0.0-1.0) of emitting log events. diff --git a/svc/sentinel/run.go b/svc/sentinel/run.go index 4d38a7b7d1..a6abca509a 100644 --- a/svc/sentinel/run.go +++ b/svc/sentinel/run.go @@ -7,8 +7,10 @@ import ( "log/slog" "net" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/clickhouse" "github.com/unkeyed/unkey/pkg/clock" + "github.com/unkeyed/unkey/pkg/cluster" "github.com/unkeyed/unkey/pkg/db" "github.com/unkeyed/unkey/pkg/logger" "github.com/unkeyed/unkey/pkg/otel" @@ -106,15 +108,54 @@ func Run(ctx context.Context, cfg Config) error { r.Defer(ch.Close) } + // Initialize gossip-based cache invalidation + var broadcaster clustering.Broadcaster + if cfg.GossipEnabled { + logger.Info("Initializing gossip cluster for cache invalidation", + "region", cfg.Region, + "instanceID", cfg.SentinelID, + ) + + mux := cluster.NewMessageMux() + + lanSeeds := cluster.ResolveDNSSeeds(cfg.GossipLANSeeds, cfg.GossipLANPort) + wanSeeds := cluster.ResolveDNSSeeds(cfg.GossipWANSeeds, cfg.GossipWANPort) + + gossipCluster, clusterErr := cluster.New(cluster.Config{ + Region: cfg.Region, + NodeID: cfg.SentinelID, + BindAddr: cfg.GossipBindAddr, + BindPort: cfg.GossipLANPort, + WANBindPort: cfg.GossipWANPort, + LANSeeds: lanSeeds, + WANSeeds: wanSeeds, + SecretKey: nil, // Sentinel gossip is locked down via CiliumNetworkPolicy + OnMessage: mux.OnMessage, + }) + if clusterErr != nil { + logger.Error("Failed to create gossip cluster, continuing without cluster cache invalidation", + "error", clusterErr, + ) + } else { + gossipBroadcaster := clustering.NewGossipBroadcaster(gossipCluster) + cluster.Subscribe(mux, gossipBroadcaster.HandleCacheInvalidation) + broadcaster = gossipBroadcaster + r.Defer(gossipCluster.Close) + } + } + routerSvc, err := router.New(router.Config{ DB: database, Clock: clk, EnvironmentID: cfg.EnvironmentID, Region: cfg.Region, + Broadcaster: broadcaster, + NodeID: cfg.SentinelID, }) if err != nil { return fmt.Errorf("unable to create router service: %w", err) } + r.Defer(routerSvc.Close) svcs := &routes.Services{ RouterService: routerSvc, diff --git a/svc/sentinel/services/router/BUILD.bazel b/svc/sentinel/services/router/BUILD.bazel index 623512f5e4..e4a20eba9b 100644 --- a/svc/sentinel/services/router/BUILD.bazel +++ b/svc/sentinel/services/router/BUILD.bazel @@ -12,10 +12,12 @@ go_library( "//internal/services/caches", "//pkg/array", "//pkg/cache", + "//pkg/cache/clustering", "//pkg/clock", "//pkg/codes", "//pkg/db", "//pkg/fault", "//pkg/logger", + "//pkg/uid", ], ) diff --git a/svc/sentinel/services/router/interface.go b/svc/sentinel/services/router/interface.go index 71cd0ad238..d37c85ab08 100644 --- a/svc/sentinel/services/router/interface.go +++ b/svc/sentinel/services/router/interface.go @@ -3,6 +3,7 @@ package router import ( "context" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/clock" "github.com/unkeyed/unkey/pkg/db" ) @@ -17,4 +18,11 @@ type Config struct { Clock clock.Clock EnvironmentID string Region string + + // Broadcaster for distributed cache invalidation via gossip. + // If nil, caches operate in local-only mode (no distributed invalidation). + Broadcaster clustering.Broadcaster + + // NodeID identifies this node in the cluster + NodeID string } diff --git a/svc/sentinel/services/router/service.go b/svc/sentinel/services/router/service.go index f1b1365812..564b775c19 100644 --- a/svc/sentinel/services/router/service.go +++ b/svc/sentinel/services/router/service.go @@ -3,16 +3,19 @@ package router import ( "context" "fmt" + "os" "time" "github.com/unkeyed/unkey/internal/services/caches" "github.com/unkeyed/unkey/pkg/array" "github.com/unkeyed/unkey/pkg/cache" + "github.com/unkeyed/unkey/pkg/cache/clustering" "github.com/unkeyed/unkey/pkg/clock" "github.com/unkeyed/unkey/pkg/codes" "github.com/unkeyed/unkey/pkg/db" "github.com/unkeyed/unkey/pkg/fault" "github.com/unkeyed/unkey/pkg/logger" + "github.com/unkeyed/unkey/pkg/uid" ) var _ Service = (*service)(nil) @@ -25,31 +28,116 @@ type service struct { deploymentCache cache.Cache[string, db.Deployment] instancesCache cache.Cache[string, []db.Instance] + + // dispatcher handles routing of invalidation events to all caches in this service. + dispatcher *clustering.InvalidationDispatcher } -func New(cfg Config) (*service, error) { - deploymentCache, err := cache.New[string, db.Deployment](cache.Config[string, db.Deployment]{ - Resource: "deployment", - Clock: cfg.Clock, - MaxSize: 1000, - Fresh: 30 * time.Second, - Stale: 5 * time.Minute, - }) +// Close shuts down the service and cleans up resources. +func (s *service) Close() error { + if s.dispatcher != nil { + return s.dispatcher.Close() + } + + return nil +} + +// clusterOpts bundles the dispatcher and key converter functions needed for +// distributed cache invalidation. +type clusterOpts[K comparable] struct { + dispatcher *clustering.InvalidationDispatcher + broadcaster clustering.Broadcaster + nodeID string + keyToString func(K) string + stringToKey func(string) (K, error) +} + +// createCache creates a cache instance with optional clustering support. +func createCache[K comparable, V any]( + cacheConfig cache.Config[K, V], + opts *clusterOpts[K], +) (cache.Cache[K, V], error) { + localCache, err := cache.New(cacheConfig) if err != nil { return nil, err } - instancesCache, err := cache.New[string, []db.Instance](cache.Config[string, []db.Instance]{ - Clock: cfg.Clock, - Resource: "instance", - MaxSize: 1000, - Fresh: 10 * time.Second, - Stale: 60 * time.Second, + if opts == nil { + return localCache, nil + } + + clusterCache, err := clustering.New(clustering.Config[K, V]{ + LocalCache: localCache, + Broadcaster: opts.broadcaster, + Dispatcher: opts.dispatcher, + NodeID: opts.nodeID, + KeyToString: opts.keyToString, + StringToKey: opts.stringToKey, }) if err != nil { return nil, err } + return clusterCache, nil +} + +func New(cfg Config) (*service, error) { + nodeID := cfg.NodeID + if nodeID == "" { + hostname, err := os.Hostname() + if err != nil { + hostname = "unknown" + } + nodeID = fmt.Sprintf("%s-%s", hostname, uid.New("node")) + } + + var dispatcher *clustering.InvalidationDispatcher + var stringKeyOpts *clusterOpts[string] + + if cfg.Broadcaster != nil { + var err error + dispatcher, err = clustering.NewInvalidationDispatcher(cfg.Broadcaster) + if err != nil { + return nil, err + } + + stringKeyOpts = &clusterOpts[string]{ + dispatcher: dispatcher, + broadcaster: cfg.Broadcaster, + nodeID: nodeID, + keyToString: nil, + stringToKey: nil, + } + } + + deploymentCache, err := createCache( + cache.Config[string, db.Deployment]{ + Resource: "deployment", + Clock: cfg.Clock, + MaxSize: 1000, + Fresh: 30 * time.Second, + Stale: 5 * time.Minute, + }, + stringKeyOpts, + ) + if err != nil { + return nil, err + } + + instancesCache, err := createCache( + cache.Config[string, []db.Instance]{ + Clock: cfg.Clock, + Resource: "instance", + MaxSize: 1000, + Fresh: 10 * time.Second, + Stale: 60 * time.Second, + }, + stringKeyOpts, + ) + if err != nil { + return nil, err + } + return &service{ db: cfg.DB, clock: cfg.Clock, @@ -57,6 +145,7 @@ func New(cfg Config) (*service, error) { region: cfg.Region, deploymentCache: deploymentCache, instancesCache: instancesCache, + dispatcher: dispatcher, }, nil } diff --git a/tools/exportoneof/BUILD.bazel b/tools/exportoneof/BUILD.bazel new file mode 100644 index 0000000000..b757648026 --- /dev/null +++ b/tools/exportoneof/BUILD.bazel @@ -0,0 +1,14 @@ +load("@rules_go//go:def.bzl", "go_binary", "go_library") + +go_library( + name = "exportoneof_lib", + srcs = ["main.go"], + importpath = "github.com/unkeyed/unkey/tools/exportoneof", + visibility = ["//visibility:private"], +) + +go_binary( + name = "exportoneof", + embed = [":exportoneof_lib"], + visibility = ["//visibility:public"], +) diff --git a/tools/exportoneof/main.go b/tools/exportoneof/main.go new file mode 100644 index 0000000000..17772d21c1 --- /dev/null +++ b/tools/exportoneof/main.go @@ -0,0 +1,116 @@ +// Command exportoneof scans protobuf-generated Go files for unexported oneof +// interfaces (e.g. isClusterMessage_Payload) and creates companion files that +// re-export them as public type aliases (e.g. IsClusterMessage_Payload). +// +// Usage: +// +// go run ./tools/exportoneof +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +var oneofPattern = regexp.MustCompile(`^type (is[A-Z]\w+) interface \{$`) + +type oneofInterface struct { + pkg string + unexported string + exported string +} + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "usage: exportoneof ") + os.Exit(1) + } + root := os.Args[1] + + packages := map[string][]oneofInterface{} + + err := filepath.Walk(root, func(path string, info os.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + if info.IsDir() || !strings.HasSuffix(path, ".pb.go") { + return nil + } + + f, openErr := os.Open(path) + if openErr != nil { + return fmt.Errorf("open %s: %w", path, openErr) + } + defer func() { _ = f.Close() }() + + dir := filepath.Dir(path) + scanner := bufio.NewScanner(f) + + var pkgName string + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "package ") { + pkgName = strings.TrimPrefix(line, "package ") + } + + if m := oneofPattern.FindStringSubmatch(line); m != nil { + unexported := m[1] + exported := "I" + unexported[1:] // isXxx → IsXxx + packages[dir] = append(packages[dir], oneofInterface{ + pkg: pkgName, + unexported: unexported, + exported: exported, + }) + } + } + if scanErr := scanner.Err(); scanErr != nil { + return fmt.Errorf("scan %s: %w", path, scanErr) + } + return nil + }) + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "walk error: %v\n", err) + os.Exit(1) + } + + for dir, ifaces := range packages { + if writeErr := writeFile(dir, ifaces); writeErr != nil { + _, _ = fmt.Fprintf(os.Stderr, "error writing %s: %v\n", dir, writeErr) + os.Exit(1) + } + } +} + +func writeFile(dir string, ifaces []oneofInterface) error { + path := filepath.Join(dir, "oneof_interfaces.go") + f, err := os.Create(path) + if err != nil { + return err + } + + var writeErr error + write := func(format string, args ...any) { + if writeErr != nil { + return + } + _, writeErr = fmt.Fprintf(f, format, args...) + } + + write("// Code generated by tools/exportoneof. DO NOT EDIT.\n\n") + write("package %s\n", ifaces[0].pkg) + + for _, iface := range ifaces { + write("\n// %s is the exported form of the protobuf oneof interface %s.\n", iface.exported, iface.unexported) + write("type %s = %s\n", iface.exported, iface.unexported) + } + + if closeErr := f.Close(); closeErr != nil { + return closeErr + } + + return writeErr +} diff --git a/web/apps/engineering/content/docs/architecture/services/cluster-service.mdx b/web/apps/engineering/content/docs/architecture/services/cluster-service.mdx new file mode 100644 index 0000000000..f22cb7f0e0 --- /dev/null +++ b/web/apps/engineering/content/docs/architecture/services/cluster-service.mdx @@ -0,0 +1,233 @@ +--- +title: Gossip Cluster +--- + +The `pkg/cluster` package provides gossip-based cluster membership and cross-region message propagation. Its primary use case is **cache invalidation** — when one node mutates data, all other nodes (including those in different regions) evict stale cache entries. + +Built on [hashicorp/memberlist](https://github.com/hashicorp/memberlist) (SWIM protocol). + +## Two-Tier Architecture + +The cluster uses a two-tier gossip design: a fast **LAN pool** within each region and a **WAN pool** that connects regions through elected **bridge** nodes. + +``` +┌──────────────────────── Region: us-east-1 ────────────────────────┐ +│ │ +│ ┌────────┐ ┌────────┐ ┌──────────────┐ │ +│ │ API-1 │◄────►│ API-2 │◄────►│ API-3 │ │ +│ │ │ │ │ │ (bridge) │ │ +│ └────────┘ └────────┘ └──────┬───────┘ │ +│ ▲ ▲ │ │ +│ └──── LAN pool (SWIM, ~1ms) ──────┘ │ +│ │ │ +└──────────────────────────────────────────┼─────────────────────────┘ + │ + WAN pool + (SWIM, tuned + for latency) + │ +┌──────────────────────────────────────────┼─────────────────────────┐ +│ │ │ +│ ┌────────┐ ┌────────┐ ┌──────┴───────┐ │ +│ │ API-4 │◄────►│ API-5 │◄────►│ API-6 │ │ +│ │ │ │ │ │ (bridge) │ │ +│ └────────┘ └────────┘ └──────────────┘ │ +│ ▲ ▲ ▲ │ +│ └──── LAN pool (SWIM, ~1ms) ──────┘ │ +│ │ +└──────────────────────── Region: eu-west-1 ────────────────────────┘ +``` + +### LAN Pool (intra-region) + +Every node in a region joins the same LAN pool. Uses `memberlist.DefaultLANConfig()` — tuned for low-latency networks with ~1ms propagation. All nodes broadcast and receive messages. + +- **Port**: `GossipLANPort` (default `7946`) +- **Seeds**: `GossipLANSeeds` — typically a Kubernetes headless service DNS name resolving to all pod IPs in the region +- **Encryption**: AES-256 via `GossipSecretKey` + +### WAN Pool (cross-region) + +Only the **bridge** node in each region participates in the WAN pool. Uses `memberlist.DefaultWANConfig()` — tolerates higher latency and packet loss typical of cross-region links. + +- **Port**: `GossipWANPort` (default `7947`) +- **Seeds**: `GossipWANSeeds` — addresses of bridge-capable nodes in other regions + +## Bridge Election + +Each region auto-elects exactly **one bridge** — the node whose `NodeID` is lexicographically smallest among all LAN pool members. This is fully deterministic and requires no coordination protocol. + +``` +evaluateBridge(): + members = LAN pool members + smallest = member with min(Name) + if smallest == me && !isBridge → promoteToBridge() + if smallest != me && isBridge → demoteFromBridge() +``` + +Election is re-evaluated whenever: +- A node **joins** the LAN pool (`NotifyJoin`) +- A node **leaves** the LAN pool (`NotifyLeave`) +- The initial LAN seed join completes + +### Failover + +When the bridge leaves (crash, scale-down, deployment), `NotifyLeave` fires on remaining nodes, triggering re-evaluation. The node with the next smallest name automatically promotes itself. No manual intervention required. + +## Message Flow + +### Same-region broadcast + +``` +API-1 calls Broadcast(CacheInvalidation{key: "api_123"}) + │ + ├─► Serialized as protobuf ClusterMessage (direction=LAN) + └─► Queued on LAN TransmitLimitedQueue + │ + ├─► API-2 receives via NotifyMsg → OnMessage handler + └─► API-3 receives via NotifyMsg → OnMessage handler +``` + +### Cross-region relay + +``` +API-1 (us-east-1) calls Broadcast(CacheInvalidation{key: "api_123"}) + │ + ├─► LAN broadcast → all us-east-1 nodes receive it + │ + └─► API-3 (bridge) receives LAN message + │ + ├─► Detects: I am bridge AND direction == LAN + ├─► Re-serializes with direction=WAN + └─► Queues on WAN TransmitLimitedQueue + │ + └─► API-6 (eu-west-1 bridge) receives via WAN + │ + ├─► Checks source_region != my region (not a loop) + ├─► Delivers to local OnMessage handler + └─► Re-broadcasts on eu-west-1 LAN pool + │ + ├─► API-4 receives it + └─► API-5 receives it +``` + +### Loop Prevention + +- LAN → WAN relay only happens for messages with `direction=LAN` (prevents re-relaying WAN messages) +- WAN → LAN re-broadcast is tagged `direction=WAN`, so the receiving bridge doesn't relay it again +- `source_region` check on the WAN delegate drops messages originating in the same region + +## Protobuf Envelope + +All messages use a single protobuf envelope (`proto/cluster/v1/envelope.proto`): + +```protobuf +message ClusterMessage { + Direction direction = 2; // LAN or WAN + string source_region = 3; // originating region + string sender_node = 4; // originating node ID + int64 sent_at_ms = 5; // creation timestamp (latency measurement) + + oneof payload { + CacheInvalidationEvent cache_invalidation = 1; + // future message types added here + } +} +``` + +Adding a new message type: +1. Add a new `oneof` variant to `ClusterMessage` +2. Call `cluster.Subscribe[*clusterv1.ClusterMessage_YourType](mux, handler)` + +The `MessageMux` handles routing automatically. + +## Wiring: API Service Example + +The API service (`svc/api/run.go`) wires gossip like this: + +```go +// 1. Create a message multiplexer (fan-out to multiple subsystems) +mux := cluster.NewMessageMux() + +// 2. Resolve seed addresses (DNS → IPs for k8s headless services) +lanSeeds := cluster.ResolveDNSSeeds(cfg.GossipLANSeeds, cfg.GossipLANPort) +wanSeeds := cluster.ResolveDNSSeeds(cfg.GossipWANSeeds, cfg.GossipWANPort) + +// 3. Create the gossip cluster +gossipCluster, _ := cluster.New(cluster.Config{ + Region: cfg.Region, + NodeID: cfg.InstanceID, + BindAddr: cfg.GossipBindAddr, + BindPort: cfg.GossipLANPort, + WANBindPort: cfg.GossipWANPort, + LANSeeds: lanSeeds, + WANSeeds: wanSeeds, + SecretKey: secretKey, + OnMessage: mux.OnMessage, +}) + +// 4. Wire cache invalidation +broadcaster := clustering.NewGossipBroadcaster(gossipCluster) +cluster.Subscribe(mux, broadcaster.HandleCacheInvalidation) + +// 5. Pass broadcaster to the cache layer +caches, _ := caches.New(caches.Config{ + Broadcaster: broadcaster, + NodeID: cfg.InstanceID, +}) +``` + +### Component Roles + +| Component | Role | +|---|---| +| `cluster.Cluster` | Manages LAN/WAN memberlists, bridge election, message transport | +| `cluster.MessageMux` | Routes incoming `ClusterMessage` payloads to typed handlers | +| `cluster.Subscribe[T]` | Generic subscription — only receives messages matching the oneof variant | +| `clustering.GossipBroadcaster` | Bridges `cache.Broadcaster` interface to gossip `Cluster.Broadcast()` | + +## Fail-Open Design + +Gossip is designed to **never** take down the API service. Every failure path degrades gracefully to local-only caching: + +| Failure | Behavior | +|---|---| +| `cluster.New()` fails at startup | Logs error, continues without gossip (local-only caching) | +| LAN/WAN seed join exhaustion | Retries in background goroutine, logs and gives up — never crashes | +| `Broadcast()` fails (proto marshal) | Error logged and swallowed, returns nil to caller | +| Bridge promotion fails | Logs error, node stays non-bridge — LAN still works | +| Incoming message handler errors | Logged, never propagated to request handling | +| Bridge node dies | Next node auto-promotes, no manual intervention | + +## Configuration Reference + +| Config Field | Default | Description | +|---|---|---| +| `GossipEnabled` | `false` | Enable gossip cluster | +| `GossipBindAddr` | `0.0.0.0` | Bind address for memberlist | +| `GossipLANPort` | `7946` | LAN memberlist port | +| `GossipWANPort` | `7947` | WAN memberlist port (bridge only) | +| `GossipLANSeeds` | — | Comma-separated LAN seed addresses | +| `GossipWANSeeds` | — | Comma-separated WAN seed addresses | +| `GossipSecretKey` | — | Base64-encoded AES key (`openssl rand -base64 32`) | + +## File Map + +``` +pkg/cluster/ +├── bridge.go # Bridge election, promote/demote logic +├── bridge_test.go # Election unit test +├── cluster.go # Cluster interface, gossipCluster impl, Broadcast, Close +├── cluster_test.go # Integration tests (single-node, multi-node, failover, multi-region) +├── config.go # Config struct and defaults +├── delegate_lan.go # LAN pool callbacks (message relay, event-driven election) +├── delegate_wan.go # WAN pool callbacks (cross-region receive + LAN re-broadcast) +├── discovery.go # DNS seed resolution (headless service → IPs) +├── doc.go # Package doc +├── message.go # memberlist.Broadcast wrapper +├── mux.go # MessageMux fan-out + generic Subscribe[T] +└── noop.go # No-op Cluster for when gossip is disabled + +pkg/cache/clustering/ +└── broadcaster_gossip.go # Bridges cache.Broadcaster ↔ cluster.Cluster +```