-
Notifications
You must be signed in to change notification settings - Fork 233
fix(ci): improve reliability of the router-tests #2577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
489 commits
Select commit
Hold shift + click to select a range
a79aeee
chore: CI stability run 48/50
jensneuse b6794af
chore: CI stability run 49/50
jensneuse addec93
chore: CI stability run 50/50
jensneuse b14ce85
chore: CI stability run 1/50
jensneuse ed4abe6
chore: CI stability run 2/50
jensneuse 42f09d3
chore: CI stability run 3/50
jensneuse 3e6a10c
chore: CI stability run 4/50
jensneuse 33c3fd1
chore: CI stability run 5/50
jensneuse 3026cf6
chore: CI stability run 6/50
jensneuse 4231b1b
chore: CI stability run 7/50
jensneuse a90c50d
chore: CI stability run 8/50
jensneuse c08901f
chore: CI stability run 9/50
jensneuse 80e1326
chore: CI stability run 10/50
jensneuse f5f176a
chore: CI stability run 1/50
jensneuse e6b65be
chore: CI stability run 11/50
jensneuse 77568a1
chore: CI stability run 2/50
jensneuse 39d46c8
chore: CI stability run 12/50
jensneuse aede796
chore: CI stability run 3/50
jensneuse 7981684
chore: CI stability run 13/50
jensneuse ed8978c
chore: CI stability run 4/50
jensneuse f6920fa
chore: CI stability run 14/50
jensneuse 844f093
chore: CI stability run 5/50
jensneuse 1b858d2
chore: CI stability run 15/50
jensneuse 6223f84
chore: CI stability run 6/50
jensneuse 5b7e085
chore: CI stability run 16/50
jensneuse 244e42c
chore: CI stability run 7/50
jensneuse 5cf5482
chore: CI stability run 17/50
jensneuse 5af22bb
chore: CI stability run 8/50
jensneuse 33841a3
chore: CI stability run 18/50
jensneuse 3ea6140
chore: CI stability run 9/50
jensneuse d5b542f
chore: CI stability run 19/50
jensneuse d73a102
chore: CI stability run 10/50
jensneuse 96116ba
chore: CI stability run 20/50
jensneuse 4c554b7
chore: CI stability run 11/50
jensneuse bf64f8d
chore: CI stability run 21/50
jensneuse 1d08d30
chore: CI stability run 12/50
jensneuse 694095c
chore: CI stability run 22/50
jensneuse 868dc14
chore: CI stability run 13/50
jensneuse 39bf2b4
chore: CI stability run 23/50
jensneuse e16223f
chore: CI stability run 14/50
jensneuse 5a5da80
chore: CI stability run 24/50
jensneuse c5d910b
chore: CI stability run 15/50
jensneuse 13fcced
chore: CI stability run 25/50
jensneuse 729c3c5
chore: CI stability run 16/50
jensneuse 0dd8dab
chore: CI stability run 26/50
jensneuse 0712b4b
chore: CI stability run 17/50
jensneuse ea207fe
chore: CI stability run 27/50
jensneuse 5622fd1
chore: CI stability run 18/50
jensneuse 008f4d1
chore: CI stability run 28/50
jensneuse 6fa813e
chore: CI stability run 19/50
jensneuse 9b01f94
chore: CI stability run 29/50
jensneuse d81a0a3
chore: CI stability run 20/50
jensneuse 57133c6
chore: CI stability run 30/50
jensneuse 91f0fb4
chore: CI stability run 21/50
jensneuse 4ed7533
chore: CI stability run 31/50
jensneuse c76e2f0
chore: CI stability run 22/50
jensneuse c013b25
chore: CI stability run 32/50
jensneuse 5044bff
chore: CI stability run 23/50
jensneuse 7083d1f
chore: CI stability run 33/50
jensneuse a52c258
chore: CI stability run 24/50
jensneuse d1b5a91
chore: CI stability run 34/50
jensneuse d0f1fa5
chore: CI stability run 25/50
jensneuse 44e3136
chore: CI stability run 35/50
jensneuse 2748789
chore: CI stability run 26/50
jensneuse 6f26dda
chore: CI stability run 36/50
jensneuse 27b65c0
chore: CI stability run 27/50
jensneuse 701a565
chore: CI stability run 37/50
jensneuse 8796b2d
chore: CI stability run 28/50
jensneuse e793702
chore: CI stability run 38/50
jensneuse 9b9645d
chore: CI stability run 29/50
jensneuse b3109c4
chore: CI stability run 39/50
jensneuse efd0511
chore: CI stability run 30/50
jensneuse c8fc80b
chore: CI stability run 40/50
jensneuse 0a32fd8
chore: CI stability run 31/50
jensneuse 840bd5f
chore: CI stability run 41/50
jensneuse 6d92a11
chore: CI stability run 32/50
jensneuse daddcaa
chore: CI stability run 42/50
jensneuse d7f8698
chore: CI stability run 33/50
jensneuse d522dab
chore: CI stability run 43/50
jensneuse d00b736
chore: CI stability run 34/50
jensneuse 8e97839
chore: CI stability run 44/50
jensneuse 212cc18
chore: CI stability run 35/50
jensneuse ab84d6c
chore: CI stability run 45/50
jensneuse b414246
chore: CI stability run 36/50
jensneuse f04d1b1
chore: CI stability run 46/50
jensneuse d28bef5
chore: CI stability run 37/50
jensneuse ab0c39f
chore: CI stability run 47/50
jensneuse b5d7776
chore: CI stability run 38/50
jensneuse b88679d
chore: CI stability run 48/50
jensneuse 86b7239
chore: CI stability run 39/50
jensneuse d9418e6
chore: CI stability run 49/50
jensneuse cf8b766
chore: CI stability run 40/50
jensneuse 5f5fcfa
chore: CI stability run 50/50
jensneuse cae15e5
chore: CI stability run 41/50
jensneuse e76dd9e
chore: CI stability run 1/50
jensneuse 57b7d9a
chore: CI stability run 2/50
jensneuse 36f2b17
chore: CI stability run 3/50
jensneuse f484110
chore: CI stability run 4/50
jensneuse 820350d
chore: CI stability run 5/50
jensneuse f100a2b
chore: CI stability run 6/50
jensneuse 87e4f14
chore: CI stability run 7/50
jensneuse f345aaf
chore: CI stability run 8/50
jensneuse 13910c2
chore: CI stability run 9/50
jensneuse 1e77eb9
chore: CI stability run 10/50
jensneuse ce4489d
chore: CI stability run 11/50
jensneuse bcb5159
chore: CI stability run 12/50
jensneuse 982cc2e
chore: CI stability run 13/50
jensneuse 95abbd2
chore: CI stability run 14/50
jensneuse 684b801
chore: CI stability run 15/50
jensneuse 80dfbc1
chore: CI stability run 16/50
jensneuse 51873bc
chore: CI stability run 17/50
jensneuse e49de83
chore: CI stability run 18/50
jensneuse ab64a6e
chore: CI stability run 19/50
jensneuse 92fff27
chore: CI stability run 20/50
jensneuse b07d931
chore: CI stability run 21/50
jensneuse 7937624
chore: CI stability run 22/50
jensneuse 56aded2
chore: CI stability run 23/50
jensneuse e8e4ccf
chore: CI stability run 24/50
jensneuse 515160b
chore: CI stability run 25/50
jensneuse 2cc9b21
chore: CI stability run 26/50
jensneuse 08f9b40
chore: CI stability run 27/50
jensneuse 124e8ff
chore: CI stability run 28/50
jensneuse a655848
chore: CI stability run 29/50
jensneuse e2ffbce
chore: CI stability run 30/50
jensneuse a51f2d0
chore: CI stability run 31/50
jensneuse d419d6b
chore: CI stability run 32/50
jensneuse 13b0955
chore: CI stability run 33/50
jensneuse 8e8fdb0
chore: CI stability run 34/50
jensneuse f5c6c30
chore: CI stability run 35/50
jensneuse 1fc27f5
chore: CI stability run 36/50
jensneuse 1181f6d
chore: CI stability run 37/50
jensneuse 7a278c8
chore: CI stability run 38/50
jensneuse 71afb71
chore: CI stability run 39/50
jensneuse f127f71
chore: CI stability run 40/50
jensneuse cdfdb20
chore: CI stability run 41/50
jensneuse 408ff5a
chore: CI stability run 42/50
jensneuse 9c3aa19
chore: CI stability run 43/50
jensneuse 0ab6a9c
chore: CI stability run 44/50
jensneuse bcf1a4a
chore: CI stability run 45/50
jensneuse 4b924fe
chore: CI stability run 46/50
jensneuse f320594
chore: CI stability run 47/50
jensneuse 1a3e36c
chore: CI stability run 48/50
jensneuse de154b9
chore: CI stability run 49/50
jensneuse d04fdc3
chore: CI stability run 50/50
jensneuse 4716298
chore: CI stability run 1/50
jensneuse 09d744d
chore: CI stability run 2/50
jensneuse 9a190fd
chore: CI stability run 3/50
jensneuse 8d772d0
chore: CI stability run 4/50
jensneuse 12bca88
chore: CI stability run 5/50
jensneuse c78acb1
chore: CI stability run 6/50
jensneuse 8dd3cf2
chore: CI stability run 7/50
jensneuse f15af76
chore: CI stability run 8/50
jensneuse ea4da2b
chore: CI stability run 9/50
jensneuse 53b070d
chore: CI stability run 10/50
jensneuse 797a096
fix: poll for async hook call count in TestStartSubscriptionHook
jensneuse 94e8381
chore: CI stability run 2/50
jensneuse 8b6b231
chore: CI stability run 3/50
jensneuse a55aaf8
chore: CI stability run 4/50
jensneuse ea720b5
chore: CI stability run 5/50
jensneuse 759a4e5
chore: CI stability run 6/50
jensneuse 13ed13c
chore: CI stability run 7/50
jensneuse fc58f46
chore: CI stability run 8/50
jensneuse 4b99b80
chore: CI stability run 9/50
jensneuse edbc619
chore: CI stability run 9/50 (retry - bun plugin build infra failure)
jensneuse f094a29
chore: CI stability run 10/50
jensneuse c3620b2
chore: CI stability run 11/50
jensneuse 46dd41e
chore: CI stability run 12/50
jensneuse fe62d00
chore: CI stability run 13/50
jensneuse 1026762
chore: CI stability run 14/50
jensneuse 713168a
chore: CI stability run 15/50
jensneuse 557eb1d
chore: CI stability run 16/50
jensneuse 6b54eab
chore: CI stability run 17/50
jensneuse 65568b7
chore: CI stability run 18/50
jensneuse 93bc92f
chore: CI stability run 19/50
jensneuse a05f2b8
chore: CI stability run 20/50
jensneuse 34259eb
chore: CI stability run 21/50
jensneuse 2a902ad
chore: CI stability run 21/50 (retry - GH Actions infra failure)
jensneuse a5dcd51
chore: CI stability run 21/50 (retry2 - GH Actions queue stuck)
jensneuse bc0077d
chore: CI stability run 22/50
jensneuse 6fadbe6
chore: CI stability run 23/50
jensneuse 038bfec
chore: CI stability run 24/50
jensneuse c113864
chore: CI stability run 25/50
jensneuse 62a1fa5
chore: CI stability run 26/50
jensneuse aac7171
chore: CI stability run 27/50
jensneuse 84cd948
chore: CI stability run 28/50
jensneuse d9211e6
chore: CI stability run 29/50
jensneuse a4e35b0
chore: CI stability run 30/50
jensneuse 8162a38
chore: CI stability run 31/50
jensneuse ce36754
chore: CI stability run 32/50
jensneuse bd15f5b
chore: CI stability run 33/50
jensneuse 1fa2c0f
chore: CI stability run 34/50
jensneuse acc101b
chore: CI stability run 35/50
jensneuse 8c8ad50
chore: CI stability run 36/50
jensneuse d7f100c
chore: CI stability run 37/50
jensneuse 43e6541
chore: CI stability run 38/50
jensneuse e88a19b
chore: CI stability run 39/50
jensneuse b200fd9
chore: CI stability run 40/50
jensneuse e643b95
chore: CI stability run 41/50
jensneuse cb063fc
chore: CI stability run 42/50
jensneuse bb9d843
chore: CI stability run 43/50
jensneuse 892343f
chore: CI stability run 44/50
jensneuse b21c146
chore: CI stability run 45/50
jensneuse 823dda3
chore: CI stability run 46/50
jensneuse ccaddd7
chore: CI stability run 47/50
jensneuse ec31861
chore: CI stability run 48/50
jensneuse ad26491
chore: CI stability run 49/50
jensneuse 9c58a10
chore: CI stability run 50/50
jensneuse f4a75a7
chore: CI stability run 1/50
jensneuse 1a7782e
chore: CI stability run 2/50
jensneuse a69b63f
chore: CI stability run 3/50
jensneuse f39a5b3
chore: CI stability run 4/50
jensneuse 1cb9686
chore: CI stability run 5/50
jensneuse c164c27
chore: CI stability run 6/50
jensneuse 556d830
chore: CI stability run 7/50
jensneuse 5cad44c
chore: CI stability run 8/50
jensneuse 26c03d4
chore: CI stability run 9/50
jensneuse b4c28e7
chore: CI stability run 1/10
jensneuse f38551e
chore: CI stability run 2/10
jensneuse de00c8f
chore: CI stability run 3/10
jensneuse c40c4d4
chore: CI stability run 4/10
jensneuse 1e4fc73
chore: CI stability run 5/10
jensneuse 2b9270d
chore: CI stability run 6/10
jensneuse ff25ef1
chore: CI stability run 7/10
jensneuse 8adbfce
chore: CI stability run 8/10
jensneuse 4e677a2
chore: CI stability run 9/10
jensneuse 7e2d1ca
chore: CI stability run 10/10
jensneuse 25416d3
Merge remote-tracking branch 'origin/main' into jensneuse/fix-flaky-t…
jensneuse 55158c4
refactor: colocate testdata with test subdirectories, add AGENTS.md
jensneuse 3d34cf7
refactor: remove os.Chdir hack, use testenv.ResolvePath for all paths
jensneuse 7f03608
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse d54f9a6
refactor: simplify paths, remove ResolvePath abstraction
jensneuse 8dbf9d9
refactor: colocate cache_warmup testdata with operations tests, remov…
jensneuse 3a7de10
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse b089c9a
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse 9bb499f
fix: address PR review feedback from Noroth and StarpTech
jensneuse acd896b
fix: restore cacheHashNotStored constant and comment in persisted ops…
jensneuse a93db46
refactor: move shared test helpers to testutils package and fix impor…
jensneuse 1a5c5b8
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse 4b68521
chore: add .serena/ to .gitignore
jensneuse 9c92bd4
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse 6d52b19
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse 21be65e
fix: replace undefined NatsWaitTimeout with EventWaitTimeout
jensneuse ac05837
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse acb2a02
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse 8d43d87
test(router-tests): address review follow-ups
jensneuse dbfbaf9
test(router-tests): reduce protocol test diff noise
jensneuse 5cbde3c
Merge remote-tracking branch 'origin/main' into jensneuse/fix-flaky-t…
jensneuse e33e467
Merge remote-tracking branch 'origin/main' into jensneuse/fix-flaky-t…
jensneuse 83e5e17
refactor(router-tests): move wait sync into SyncReporter
jensneuse 655cbf5
Merge branch 'main' into jensneuse/fix-flaky-tests
jensneuse File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,3 +24,6 @@ go.work.sum | |
|
|
||
| # Mise local config | ||
| **/mise.local.toml | ||
|
|
||
| # Serena | ||
| .serena/ | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| See [CLAUDE.md](./CLAUDE.md) for test development guidelines. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,207 @@ | ||
| # Router Integration Tests | ||
|
|
||
| ## Directory Structure | ||
|
|
||
| Tests are organized into subdirectories by functional area. New test files added to any subdirectory are automatically included in CI — no matrix or config changes needed. | ||
|
|
||
| | Directory | Purpose | | ||
| |-----------|---------| | ||
| | `observability/` | Prometheus metrics, logging, metrics exporters | | ||
| | `security/` | Auth, TLS, mTLS, error handling, circuit breakers, rate limiting | | ||
| | `operations/` | Caching, persisted ops, normalization, query plans, introspection | | ||
| | `subscriptions/` | WebSocket, HTTP subscriptions | | ||
| | `protocol/` | GraphQL/HTTP protocol, headers, uploads, config, plugins | | ||
| | `events/` | NATS, Kafka, Redis event-driven subscriptions | | ||
| | `telemetry/` | OpenTelemetry tracing and metrics | | ||
| | `connectrpc/` | ConnectRPC/gRPC subgraph integration | | ||
| | `lifecycle/` | Router startup, shutdown, goroutine leaks | | ||
| | `modules/` | Custom router modules | | ||
| | `fuzzquery/` | Fuzz testing for query parsing | | ||
|
|
||
| Shared test helpers live in `testutils/` and `testenv/` (test environment setup). Subdirectories import shared helpers via `"github.com/wundergraph/cosmo/router-tests/testutils"`. | ||
|
|
||
| ### Testdata Layout | ||
|
|
||
| Each subdirectory keeps its own `testdata/` for test fixtures specific to that area. Shared testdata (e.g., TLS certificates used by both `security/` and `events/`) lives in `testdata/` at the router-tests root. | ||
|
|
||
| Go tests run with CWD set to the package directory, so use simple relative paths: `testdata/...` for local fixtures, `../testdata/...` for shared fixtures, `../../router/...` for the router package. | ||
|
|
||
| | Location | Contents | Used By | | ||
| |----------|----------|---------| | ||
| | `operations/testdata/` | Query plan fixtures, introspection schemas, cache warmup fixtures | operations | | ||
| | `protocol/testdata/` | Router configs, query fixtures, MCP operations, tracing | protocol | | ||
| | `fuzzquery/testdata/` | Fuzz corpus | fuzzquery | | ||
| | `testdata/tls/` | TLS certificates (shared) | security, events | | ||
| | `testdata/connectrpc/` | Generated protobuf client stubs (Go package) | connectrpc | | ||
| | `testenv/testdata/` | Embedded router configs, CDN | all (via testenv) | | ||
|
|
||
| ## Test Synchronization Architecture | ||
|
|
||
| Tests synchronize with the engine via `SyncReporter` (`testenv/sync_reporter.go`), a channel-based wrapper around `EngineStats` that is injected into the router at test setup. It emits typed `Event` values on a buffered channel whenever a Reporter method is called (subscription added, trigger fired, message sent, etc.). | ||
|
|
||
| **Two synchronization mechanisms:** | ||
| - **Predicate-based** (`Wait()` on inner `EngineStats`): Used by `WaitForSubscriptionCount`, `WaitForTriggerCount`, etc. Best for "wait until absolute count matches" conditions. | ||
| - **Channel-based** (`Events()` channel): Used by `NATSPublishUntilReceived` and `KafkaPublishUntilReceived`. Best for "wait until this specific event happens" patterns. | ||
|
|
||
| Tests access the `SyncReporter` through `e.syncReporter()` or via the existing `WaitFor*` helpers which delegate internally. | ||
|
|
||
| ## Writing Reliable Tests | ||
|
|
||
| ### 1. NATS/Kafka/Redis subscription tests: use warm-up before publishing | ||
|
|
||
| **Problem:** After `WaitForSubscriptionCount` and `WaitForTriggerCount`, the internal subscription pipeline may not be fully wired up. A direct `Publish()` can be silently lost, causing timeouts. | ||
|
|
||
| **Fix:** Use the `*PublishUntilReceived` methods for the first message. These helpers publish, wait for a `SubscriptionUpdateSent` event on the SyncReporter channel, and retry if the message was lost. All three event systems have a safe variant: | ||
| - `NATSPublishUntilReceived` / `NATSPublishUntilMinMessagesSent` (for fan-out) | ||
| - `KafkaPublishUntilReceived` | ||
| - `RedisPublishUntilReceived` | ||
|
|
||
| ```go | ||
| // WRONG — message can be lost to race condition | ||
| xEnv.WaitForSubscriptionCount(1, timeout) | ||
| xEnv.WaitForTriggerCount(1, timeout) | ||
| err = conn.Publish(subject, data) | ||
| conn.Flush() | ||
| xEnv.WaitForMessagesSent(1, timeout) // may timeout forever | ||
|
|
||
| // RIGHT — retries until delivery is confirmed via SyncReporter event | ||
| xEnv.WaitForSubscriptionCount(1, timeout) | ||
| xEnv.WaitForTriggerCount(1, timeout) | ||
| xEnv.NATSPublishUntilReceived(conn, subject, data, 1, timeout) | ||
| // or: xEnv.KafkaPublishUntilReceived(topic, message, 1, timeout) | ||
| // or: xEnv.RedisPublishUntilReceived(topic, message, timeout) | ||
| ``` | ||
|
|
||
| ### 2. Error-testing with intentionally bad messages: warm up first, then single-send | ||
|
|
||
| **Problem:** `PublishUntilReceived` retries on failure. If the message is intentionally invalid, retries produce duplicates that pollute the subscription channel. | ||
|
|
||
| **Fix:** First confirm the pipeline is active with a valid warm-up message via `PublishUntilReceived`. Then use a single non-retrying publish for the bad message. | ||
|
|
||
| ```go | ||
| // Warm-up: confirm pipeline is active | ||
| xEnv.NATSPublishUntilReceived(conn, subject, validMsg, 1, timeout) | ||
| // read and validate the warm-up response... | ||
|
|
||
| // Now send the intentionally bad message (single publish, no retry) | ||
| conn.Publish(subject, invalidMsg) | ||
| conn.Flush() | ||
| // read and validate the error response... | ||
| ``` | ||
|
|
||
| ### 3. WebSocket reads: use `WSReadJSON` instead of `conn.ReadJSON` | ||
|
|
||
| **Problem:** `conn.ReadJSON()` blocks forever if the expected message never arrives (e.g., due to a lost publish). The test hangs until the 8-minute Go test timeout kills it. | ||
|
|
||
| **Fix:** Use `testenv.WSReadJSON` (or `testenv.WSWriteJSON` for writes) which has built-in retry with 2-second deadlines per attempt and exponential backoff (up to 10 retries). | ||
|
|
||
| ```go | ||
| // WRONG — hangs indefinitely if no message arrives | ||
| err = conn.ReadJSON(&msg) | ||
|
|
||
| // RIGHT — retries with deadline, fails fast after ~20 seconds | ||
| err = testenv.WSReadJSON(t, conn, &msg) | ||
| ``` | ||
|
|
||
| Only use manual `SetReadDeadline` + `conn.ReadJSON` when you expect an error (e.g., websocket close after config hot reload): | ||
|
|
||
| ```go | ||
| conn.SetReadDeadline(time.Now().Add(5 * time.Second)) | ||
| err = conn.ReadJSON(&msg) // may return websocket.CloseError | ||
| conn.SetReadDeadline(time.Time{}) | ||
| ``` | ||
|
|
||
| ### 4. Non-deterministic order: sort before asserting | ||
|
|
||
| **Problem:** Metrics, spans, or other collections may appear in non-deterministic order. Asserting on index positions (`metrics[0]`) fails intermittently. | ||
|
|
||
| **Fix:** Sort the collection by a stable key before making positional assertions. | ||
|
|
||
| ```go | ||
| sort.Slice(metrics, func(i, j int) bool { | ||
| return metrics[i].Labels["subgraph"] < metrics[j].Labels["subgraph"] | ||
| }) | ||
| require.Equal(t, "employees", metrics[0].Labels["subgraph"]) | ||
| ``` | ||
|
|
||
| ### 5. Flaky test prefix convention | ||
|
|
||
| Tests known to be flaky use the `TestFlaky` prefix (e.g., `TestFlakyNatsEvents`). CI runs these with `test_retry_count=3` and a separate `-run '^TestFlaky'` pass. Once the root cause is fixed, move tests back to their non-flaky parent function. | ||
|
|
||
| ### 6. Periodic exporters: wait for ALL expected items, not just one | ||
|
|
||
| **Problem:** When testing a periodic exporter (e.g., metrics log exporter with a 90ms interval), waiting for a single item to appear then asserting all items are present races — the exporter may not have exported everything in one cycle. | ||
|
|
||
| **Fix:** Use `require.Eventually` to wait for ALL expected items to appear, not just one sentinel value. | ||
|
|
||
| ```go | ||
| // WRONG — only waits for one metric, then asserts all are present | ||
| require.Eventually(t, func() bool { | ||
| return findMetricLog(logs, "router.http.requests") != nil | ||
| }, 5*time.Second, 100*time.Millisecond) | ||
| for _, m := range scopeMetric.Metrics { | ||
| require.NotNil(t, findMetricLog(logs, m.Name)) // may fail for other metrics | ||
| } | ||
|
|
||
| // RIGHT — waits for every expected item | ||
| require.Eventually(t, func() bool { | ||
| logs := observer.FilterMessage("Metric").All() | ||
| for _, m := range scopeMetric.Metrics { | ||
| if findMetricLog(logs, m.Name) == nil { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| }, 5*time.Second, 100*time.Millisecond) | ||
| ``` | ||
|
|
||
| ### 7. Async hook invocation: poll before asserting counts | ||
|
|
||
| **Problem:** Module hooks (e.g., `OnStartSubscription`) may fire asynchronously after `WaitForSubscriptionCount` returns. Asserting `HookCallCount` immediately can see `0` because the hook hasn't executed yet. | ||
|
|
||
| **Fix:** Use `require.Eventually` to poll for the expected hook count before asserting. | ||
|
|
||
| ```go | ||
| // WRONG — hook may not have fired yet | ||
| xEnv.WaitForSubscriptionCount(1, timeout) | ||
| assert.Equal(t, int32(1), customModule.HookCallCount.Load()) // may be 0 | ||
|
|
||
| // RIGHT — poll until hook fires | ||
| xEnv.WaitForSubscriptionCount(1, timeout) | ||
| require.Eventually(t, func() bool { | ||
| return customModule.HookCallCount.Load() >= 1 | ||
| }, time.Second*10, time.Millisecond*50) | ||
| ``` | ||
|
|
||
| ### 8. Buffered channels in worker pools: prevent goroutine leaks on context cancellation | ||
|
|
||
| **Problem:** When a worker pool uses an unbuffered completion channel and the main goroutine exits early (e.g., via context cancellation), workers block forever on sends, leaking goroutines and potentially crashing the process. | ||
|
|
||
| **Fix:** Always buffer completion channels with capacity equal to the number of workers so sends never block when the receiver has stopped listening. | ||
|
|
||
| ```go | ||
| // WRONG — workers block on send if main goroutine exits via <-done | ||
| itemCompleted := make(chan struct{}) | ||
|
|
||
| // RIGHT — buffered, workers can always send and exit cleanly | ||
| itemCompleted := make(chan struct{}, workerCount) | ||
| ``` | ||
|
|
||
| ## Key Test Helpers | ||
|
|
||
| | Helper | Location | Purpose | | ||
| |--------|----------|---------| | ||
| | `SyncReporter` | `testenv/sync_reporter.go` | Channel-based wrapper around EngineStats for test synchronization | | ||
| | `NATSPublishUntilReceived` | `testenv/testenv.go` | Publish NATS message with retry until `SubscriptionUpdateSent` event received | | ||
| | `KafkaPublishUntilReceived` | `testenv/testenv.go` | Same pattern for Kafka | | ||
| | `RedisPublishUntilReceived` | `testenv/testenv.go` | Same pattern for Redis | | ||
| | `WSReadJSON` / `WSWriteJSON` | `testenv/testenv.go` | WebSocket read/write with retry + read deadline (10 attempts, exponential backoff) | | ||
| | `WSReadMessage` / `WSWriteMessage` | `testenv/testenv.go` | WebSocket raw message read/write with retry + deadline | | ||
| | `WaitForSubscriptionCount` | `testenv/testenv.go` | Wait for subscription count to reach exact value (predicate-based) | | ||
| | `WaitForTriggerCount` | `testenv/testenv.go` | Wait for trigger count to reach at least N (predicate-based) | | ||
| | `WaitForMessagesSent` | `testenv/testenv.go` | Wait for MessagesSent to reach at least N (predicate-based) | | ||
| | `ConfigureAuth` | `testutils/utils.go` | Set up JWKS auth for tests | | ||
| | `ToPtr` | `testutils/utils.go` | Generic pointer helper | | ||
| | `EmployeesIDData` | `testutils/utils.go` | Standard expected response constant | | ||
|
|
||
| <!-- CI stability run: 10 of 10 --> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| package events_test | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/wundergraph/cosmo/router-tests/testenv" | ||
| ) | ||
|
|
||
| const EventWaitTimeout = time.Second * 30 | ||
|
|
||
| func assertLineEquals(t *testing.T, reader *bufio.Reader, expected string) { | ||
| t.Helper() | ||
| line := testenv.ReadSSELine(t, reader) | ||
| assert.Equal(t, expected, line) | ||
| } | ||
|
|
||
| func assertMultipartPrefix(t *testing.T, reader *bufio.Reader) { | ||
| t.Helper() | ||
| assertLineEquals(t, reader, "") | ||
| assertLineEquals(t, reader, "--graphql") | ||
| assertLineEquals(t, reader, "Content-Type: application/json") | ||
| assertLineEquals(t, reader, "") | ||
| } | ||
|
|
||
| func assertMultipartValueEventually(t *testing.T, reader *bufio.Reader, expected string) { | ||
| t.Helper() | ||
| assert.Eventually(t, func() bool { | ||
| assertMultipartPrefix(t, reader) | ||
| line, _, err := reader.ReadLine() | ||
| assert.NoError(t, err) | ||
| if string(line) == "{}" { | ||
| return false | ||
| } | ||
| assert.Equal(t, expected, string(line)) | ||
| return true | ||
| }, EventWaitTimeout, time.Millisecond*100) | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.