1+ name : Nightly Throughput Stress
2+
3+ on :
4+ schedule :
5+ # Run at 3 AM PST (11:00 UTC) - offset from existing nightly
6+ - cron : ' 00 11 * * *'
7+ push :
8+ branches :
9+ - add-nightly-throughput-stress-workflow
10+ workflow_dispatch :
11+ inputs :
12+ duration :
13+ description : ' Test duration (e.g., 6h, 1h)'
14+ required : false
15+ default : ' 5h'
16+ type : string
17+ timeout :
18+ description : ' Scenario timeout (should always be greater than duration)'
19+ required : false
20+ default : ' 5h30m'
21+ type : string
22+ job_timeout_minutes :
23+ description : ' GitHub Actions job timeout in minutes'
24+ required : false
25+ default : 360
26+ type : number
27+
28+ env :
29+ # Workflow configuration
30+ TEST_DURATION : ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
31+ TEST_TIMEOUT : ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
32+
33+ # Logging and artifacts
34+ WORKER_LOG_DIR : /tmp/throughput-stress-logs
35+
36+ # Omes configuration
37+ OMES_REPO : temporalio/omes
38+ OMES_REF : main
39+ RUN_ID : ${{ github.run_id }}-throughput-stress
40+
41+ jobs :
42+ throughput-stress :
43+ runs-on : ubuntu-latest-4-cores
44+ timeout-minutes : ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 360) }}
45+
46+ steps :
47+ - name : Print test configuration
48+ run : |
49+ echo "=== Throughput Stress Test Configuration ==="
50+ echo "Duration: $TEST_DURATION"
51+ echo "Timeout: $TEST_TIMEOUT"
52+ echo "Run ID: $RUN_ID"
53+ echo "=========================================="
54+
55+ - name : Checkout SDK
56+ uses : actions/checkout@v4
57+ with :
58+ submodules : recursive
59+
60+ - name : Checkout OMES
61+ uses : actions/checkout@v4
62+ with :
63+ repository : ${{ env.OMES_REPO }}
64+ ref : ${{ env.OMES_REF }}
65+ path : omes
66+
67+ - name : Setup Go
68+ uses : actions/setup-go@v5
69+ with :
70+ go-version-file : omes/go.mod
71+ cache-dependency-path : omes/go.sum
72+
73+ - name : Setup Rust
74+ uses : dtolnay/rust-toolchain@stable
75+
76+ - name : Setup Rust cache
77+ uses : Swatinem/rust-cache@v2
78+ with :
79+ workspaces : temporalio/bridge -> target
80+
81+ - name : Setup Python
82+ uses : actions/setup-python@v5
83+ with :
84+ python-version : " 3.13"
85+
86+ - name : Install protoc
87+ uses : arduino/setup-protoc@v3
88+ with :
89+ version : ' 23.x'
90+ repo-token : ${{ secrets.GITHUB_TOKEN }}
91+
92+ - name : Setup uv
93+ uses : astral-sh/setup-uv@v5
94+
95+ - name : Install poethepoet
96+ run : uv tool install poethepoet
97+
98+ - name : Install dependencies
99+ run : uv sync --all-extras
100+
101+ - name : Build SDK
102+ run : poe build-develop
103+
104+ - name : Install Temporal CLI
105+ uses : temporalio/setup-temporal@v0
106+
107+ - name : Setup log directory
108+ run : mkdir -p $WORKER_LOG_DIR
109+
110+ - name : Start Temporal Server
111+ run : |
112+ temporal server start-dev \
113+ --db-filename temporal-throughput-stress.sqlite \
114+ --sqlite-pragma journal_mode=WAL \
115+ --sqlite-pragma synchronous=OFF \
116+ --headless &> $WORKER_LOG_DIR/temporal-server.log &
117+
118+ - name : Run throughput stress scenario with local SDK
119+ working-directory : omes
120+ run : |
121+ # This makes the pipeline return the exit code of the first failing command
122+ # Otherwise the output of the `tee` command will be used
123+ # (which is troublesome when the scenario fails but the `tee` command succeeds)
124+ set -o pipefail
125+
126+ # Use run-scenario-with-worker to build and run in one step
127+ # Pass the SDK directory as --version for local testing
128+ # Note: The hardcoded values below match OMES defaults, except:
129+ # - visibility-count-timeout: 5m (vs 3m default)
130+ # to give CI a bit more time for visibility consistency
131+ go run ./cmd run-scenario-with-worker \
132+ --scenario throughput_stress \
133+ --language python \
134+ --version $(pwd)/.. \
135+ --run-id $RUN_ID \
136+ --duration $TEST_DURATION \
137+ --timeout $TEST_TIMEOUT \
138+ --max-concurrent 10 \
139+ --option internal-iterations=10 \
140+ --option continue-as-new-after-iterations=3 \
141+ --option sleep-time=1s \
142+ --option visibility-count-timeout=5m \
143+ --option min-throughput-per-hour=1000 \
144+ 2>&1 | tee $WORKER_LOG_DIR/scenario.log
145+
146+ - name : Upload logs on failure
147+ if : failure() || cancelled()
148+ uses : actions/upload-artifact@v4
149+ with :
150+ name : throughput-stress-logs
151+ path : ${{ env.WORKER_LOG_DIR }}
152+ retention-days : 30
153+
154+ - name : Notify Slack on failure
155+ if : failure() || cancelled()
156+ uses : slackapi/slack-github-action@v2
157+ with :
158+ webhook-type : incoming-webhook
159+ payload : |
160+ {
161+ "text": "Nightly Python throughput stress test failed",
162+ "blocks": [
163+ {
164+ "type": "section",
165+ "text": {
166+ "type": "mrkdwn",
167+ "text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
168+ }
169+ }
170+ ]
171+ }
172+ env :
173+ SLACK_WEBHOOK_URL : ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}
0 commit comments