From cdd6a6084129fe80f47ed621ffe994d73b80dbbf Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 5 Jan 2025 16:25:38 -0500 Subject: [PATCH 1/2] feat: github-env audit checks GITHUB_PATH too --- src/audit/github_env.rs | 149 ++++++++++++++------ tests/snapshot.rs | 4 + tests/snapshots/snapshot__github_env-2.snap | 16 +++ tests/snapshots/snapshot__github_env.snap | 12 +- tests/test-data/github-env/github-path.yml | 14 ++ 5 files changed, 148 insertions(+), 47 deletions(-) create mode 100644 tests/snapshots/snapshot__github_env-2.snap create mode 100644 tests/test-data/github-env/github-path.yml diff --git a/src/audit/github_env.rs b/src/audit/github_env.rs index 90eec2a5..d5e7bc23 100644 --- a/src/audit/github_env.rs +++ b/src/audit/github_env.rs @@ -12,8 +12,9 @@ use std::sync::LazyLock; use streaming_iterator::StreamingIterator; use tree_sitter::{Language, Parser, Query, QueryCapture, QueryCursor, QueryMatches, Tree}; -static GITHUB_ENV_WRITE_CMD: LazyLock = - LazyLock::new(|| Regex::new(r#"(?mi)^.+\s*>>?\s*"?%GITHUB_ENV%"?.*$"#).unwrap()); +static GITHUB_ENV_WRITE_CMD: LazyLock = LazyLock::new(|| { + Regex::new(r#"(?mi)^.+\s*>>?\s*"?%(?GITHUB_ENV|GITHUB_PATH)%"?.*$"#).unwrap() +}); pub(crate) struct GitHubEnv { // NOTE: interior mutability used since Parser::parse requires &mut self @@ -27,13 +28,14 @@ pub(crate) struct GitHubEnv { pwsh_pipeline_query: SpannedQuery, } -audit_meta!(GitHubEnv, "github-env", "dangerous use of GITHUB_ENV"); +audit_meta!(GitHubEnv, "github-env", "dangerous use of environment file"); /// Holds a tree-sitter query that contains a `@span` capture that /// covers the entire range of the query. struct SpannedQuery { inner: Query, span_idx: u32, + destination_idx: u32, } impl Deref for SpannedQuery { @@ -48,10 +50,12 @@ impl SpannedQuery { fn new(query: &'static str, language: &Language) -> Self { let query = Query::new(language, query).expect("malformed query"); let span_idx = query.capture_index_for_name("span").unwrap(); + let destination_idx = query.capture_index_for_name("destination").unwrap(); Self { inner: query, span_idx, + destination_idx, } } } @@ -63,12 +67,12 @@ const BASH_REDIRECT_QUERY: &str = r#" ) (file_redirect ( [ - (string (_ (variable_name))) - (expansion (variable_name)) - (simple_expansion (variable_name)) - ] @destination + (string (_ (variable_name) @destination)) + (expansion (variable_name) @destination) + (simple_expansion (variable_name) @destination) + ] )) - (#match? @destination "GITHUB_ENV") + (#match? @destination "^(GITHUB_ENV|GITHUB_PATH)$") ) @span "#; @@ -77,13 +81,13 @@ const BASH_PIPELINE_QUERY: &str = r#" (command name: (command_name) @cmd argument: [ - (string (_ (variable_name))) - (expansion) - (simple_expansion) - ] @arg + (string (_ (variable_name) @destination)) + (expansion (variable_name) @destination) + (simple_expansion (variable_name) @destination) + ] ) (#match? @cmd "tee") - (#match? @arg "GITHUB_ENV") + (#match? @destination "^(GITHUB_ENV|GITHUB_PATH)$") ) @span "#; @@ -102,7 +106,7 @@ const PWSH_REDIRECT_QUERY: &str = r#" ) (_)* ) - (#match? @destination "(?i)ENV:GITHUB_ENV") + (#match? @destination "(?i)ENV:GITHUB_ENV|ENV:GITHUB_PATH") )) @span "#; @@ -121,7 +125,7 @@ const PWSH_PIPELINE_QUERY: &str = r#" ) (_)*)) (#match? @cmd "(?i)out-file|add-content|set-content|tee-object") - (#match? @destination "(?i)ENV:GITHUB_ENV") + (#match? @destination "(?i)ENV:GITHUB_ENV|ENV:GITHUB_PATH") ) @span "#; @@ -165,7 +169,10 @@ impl GitHubEnv { cursor.matches(query, tree.root_node(), source.as_bytes()) } - fn bash_uses_github_env(&self, script_body: &str) -> Result>> { + fn bash_uses_github_env<'hay>( + &self, + script_body: &'hay str, + ) -> Result)>> { let mut cursor = QueryCursor::new(); let tree = self @@ -208,7 +215,16 @@ impl GitHubEnv { .iter() .find(|cap| cap.index == self.bash_redirect_query.span_idx) .unwrap(); - matching_spans.push(span.node.byte_range()); + + let destination = { + let cap = mat + .captures + .iter() + .find(|cap| cap.index == self.bash_redirect_query.destination_idx) + .unwrap(); + cap.node.utf8_text(script_body.as_bytes()).unwrap() + }; + matching_spans.push((destination, span.node.byte_range())); } }); @@ -221,18 +237,44 @@ impl GitHubEnv { let matches = self.query(query, &mut cursor, &tree, script_body); matches.for_each(|mat| { - for cap in mat.captures { - if cap.index == query.span_idx { - matching_spans.push(cap.node.byte_range()); - } - } + let span = mat + .captures + .iter() + .find(|cap| cap.index == query.span_idx) + .unwrap(); + + let destination = { + let cap = mat + .captures + .iter() + .find(|cap| cap.index == query.destination_idx) + .unwrap(); + cap.node.utf8_text(script_body.as_bytes()).unwrap() + }; + + matching_spans.push((destination, span.node.byte_range())); }); } Ok(matching_spans) } - fn pwsh_uses_github_env(&self, script_body: &str) -> Result { + fn cmd_uses_github_env<'hay>(&self, script_body: &'hay str) -> Vec<(&'hay str, Range)> { + GITHUB_ENV_WRITE_CMD + .captures_iter(script_body) + .map(|c| { + let name = c.name("destination").unwrap().as_str(); + let span = c.name("destination").unwrap().range(); + + (name, span) + }) + .collect() + } + + fn pwsh_uses_github_env<'hay>( + &self, + script_body: &'hay str, + ) -> Result)>> { let tree = &self .pwsh_parser .borrow_mut() @@ -241,24 +283,41 @@ impl GitHubEnv { let mut cursor = QueryCursor::new(); let queries = [&self.pwsh_redirect_query, &self.pwsh_pipeline_query]; + let mut matching_spans = vec![]; for query in queries { - let mut matches = self.query(query, &mut cursor, tree, script_body); - if matches.next().is_some() { - return Ok(true); - } + let matches = self.query(query, &mut cursor, tree, script_body); + matches.for_each(|mat| { + let span = mat + .captures + .iter() + .find(|cap| cap.index == query.span_idx) + .unwrap(); + + let destination = { + let cap = mat + .captures + .iter() + .find(|cap| cap.index == query.destination_idx) + .unwrap(); + cap.node.utf8_text(script_body.as_bytes()).unwrap() + }; + + matching_spans.push((destination, span.node.byte_range())); + }); } - Ok(false) + Ok(matching_spans) } - fn uses_github_env(&self, run_step_body: &str, shell: &str) -> anyhow::Result { + fn uses_github_env<'hay>( + &self, + run_step_body: &'hay str, + shell: &str, + ) -> Result)>> { match shell { - "bash" | "sh" => self - .bash_uses_github_env(run_step_body) - // NOTE: discard the spans for now. - .map(|r| !r.is_empty()), - "cmd" => Ok(GITHUB_ENV_WRITE_CMD.is_match(run_step_body)), + "bash" | "sh" => self.bash_uses_github_env(run_step_body), + "cmd" => Ok(self.cmd_uses_github_env(run_step_body)), "pwsh" | "powershell" => self.pwsh_uses_github_env(run_step_body), // TODO: handle python. &_ => { @@ -266,7 +325,7 @@ impl GitHubEnv { "'{}' shell not supported when evaluating usage of GITHUB_ENV", shell ); - Ok(false) + Ok(vec![]) } } } @@ -326,7 +385,9 @@ impl Audit for GitHubEnv { // nothing we can do about that. "bash" }); - if self.uses_github_env(run, shell)? { + + // TODO: actually use the spanning information here. + for (dest, _span) in self.uses_github_env(run, shell)? { findings.push( Self::finding() .severity(Severity::High) @@ -335,7 +396,7 @@ impl Audit for GitHubEnv { step.location() .primary() .with_keys(&["run".into()]) - .annotated("GITHUB_ENV write may allow code execution"), + .annotated(format!("write to {dest} may allow code execution")), ) .build(step.workflow())?, ) @@ -355,7 +416,8 @@ impl Audit for GitHubEnv { return Ok(findings); }; - if self.uses_github_env(run, shell)? { + // TODO: actually use the spanning information here. + for (dest, _span) in self.uses_github_env(run, shell)? { findings.push( Self::finding() .severity(Severity::High) @@ -364,7 +426,7 @@ impl Audit for GitHubEnv { step.location() .primary() .with_keys(&["run".into()]) - .annotated("GITHUB_ENV write may allow code execution"), + .annotated(format!("write to {dest} may allow code execution")), ) .build(step.action())?, ) @@ -427,6 +489,9 @@ mod tests { ("something | tee \"${$OTHER_ENV}\" # not $GITHUB_ENV", false), // comments not detected ("echo $foo >> GITHUB_ENV", false), // GITHUB_ENV is not a variable ("echo $foo | tee GITHUB_ENV", false), // GITHUB_ENV is not a variable + ("echo $foo | tee $GITHUB", false), // wrong variable, but same prefix + ("echo $foo | tee $GITHUB_", false), // wrong variable, but same prefix + ("echo $foo | tee $GITHUB_ENVX", false), // wrong variable, but same prefix ("echo completely-static >> $GITHUB_ENV", false), // LHS is completely static ("echo 'completely-static' >> $GITHUB_ENV", false), // LHS is completely static ("echo 'completely-static' \"foo\" >> $GITHUB_ENV", false), // LHS is completely static @@ -442,7 +507,8 @@ mod tests { let sut = GitHubEnv::new(audit_state).expect("failed to create audit"); let uses_github_env = sut.uses_github_env(case, "bash").unwrap(); - assert_eq!(uses_github_env, *expected, "failed: {case}"); + + assert!(!uses_github_env.is_empty() == *expected, "failed: {case}"); } } @@ -527,7 +593,8 @@ mod tests { let sut = GitHubEnv::new(audit_state).expect("failed to create audit"); let uses_github_env = sut.uses_github_env(case, "pwsh").unwrap(); - assert_eq!(uses_github_env, *expected, "failed: {case}"); + + assert!(!uses_github_env.is_empty() == *expected, "failed: {case}"); } } } diff --git a/tests/snapshot.rs b/tests/snapshot.rs index c8b3fab0..74d1aa76 100644 --- a/tests/snapshot.rs +++ b/tests/snapshot.rs @@ -381,5 +381,9 @@ fn github_env() -> Result<()> { .workflow(workflow_under_test("github-env/action.yml")) .run()?); + insta::assert_snapshot!(zizmor() + .workflow(workflow_under_test("github-env/github-path.yml")) + .run()?); + Ok(()) } diff --git a/tests/snapshots/snapshot__github_env-2.snap b/tests/snapshots/snapshot__github_env-2.snap new file mode 100644 index 00000000..590ab31a --- /dev/null +++ b/tests/snapshots/snapshot__github_env-2.snap @@ -0,0 +1,16 @@ +--- +source: tests/snapshot.rs +expression: "zizmor().workflow(workflow_under_test(\"github-env/github-path.yml\")).run()?" +snapshot_kind: text +--- +error[github-env]: dangerous use of environment file + --> @@INPUT@@:12:9 + | +12 | / run: | +13 | | message=$(echo "$TITLE" | grep -oP '[{\[][^}\]]+[}\]]' | sed 's/{\|}\|\[\|\]//g') +14 | | echo "$message" >> $GITHUB_PATH + | |__________________________________________^ write to GITHUB_PATH may allow code execution + | + = note: audit confidence → Low + +2 findings (1 ignored): 0 unknown, 0 informational, 0 low, 0 medium, 1 high diff --git a/tests/snapshots/snapshot__github_env.snap b/tests/snapshots/snapshot__github_env.snap index 0b0e8104..66559a79 100644 --- a/tests/snapshots/snapshot__github_env.snap +++ b/tests/snapshots/snapshot__github_env.snap @@ -3,30 +3,30 @@ source: tests/snapshot.rs expression: "zizmor().workflow(workflow_under_test(\"github-env/action.yml\")).run()?" snapshot_kind: text --- -error[github-env]: dangerous use of GITHUB_ENV +error[github-env]: dangerous use of environment file --> @@INPUT@@:10:7 | 10 | / run: | 11 | | echo "foo=$(bar)" >> $GITHUB_ENV - | |________________________________________^ GITHUB_ENV write may allow code execution + | |________________________________________^ write to GITHUB_ENV may allow code execution | = note: audit confidence → Low -error[github-env]: dangerous use of GITHUB_ENV +error[github-env]: dangerous use of environment file --> @@INPUT@@:15:7 | 15 | / run: | 16 | | echo "foo=$env:BAR" >> $env:GITHUB_ENV - | |______________________________________________^ GITHUB_ENV write may allow code execution + | |______________________________________________^ write to $env:GITHUB_ENV may allow code execution | = note: audit confidence → Low -error[github-env]: dangerous use of GITHUB_ENV +error[github-env]: dangerous use of environment file --> @@INPUT@@:20:7 | 20 | / run: | 21 | | echo LIBRARY=%LIBRARY% >> %GITHUB_ENV% - | |______________________________________________^ GITHUB_ENV write may allow code execution + | |______________________________________________^ write to GITHUB_ENV may allow code execution | = note: audit confidence → Low diff --git a/tests/test-data/github-env/github-path.yml b/tests/test-data/github-env/github-path.yml new file mode 100644 index 00000000..35eafe41 --- /dev/null +++ b/tests/test-data/github-env/github-path.yml @@ -0,0 +1,14 @@ +on: + pull_request_target: # zizmor: ignore[dangerous-triggers] + +jobs: + vulnerable: + runs-on: ubuntu-latest + + steps: + - name: Passes the title around + env: + TITLE: ${{ github.event.pull_request.title }} + run: | + message=$(echo "$TITLE" | grep -oP '[{\[][^}\]]+[}\]]' | sed 's/{\|}\|\[\|\]//g') + echo "$message" >> $GITHUB_PATH From 91d7a6040f9b9a1d4d8fca4de92da5158d836865 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 5 Jan 2025 16:32:44 -0500 Subject: [PATCH 2/2] docs: updates --- docs/audits.md | 21 +++++++++++++++------ docs/release-notes.md | 5 +++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/audits.md b/docs/audits.md index 1be9e520..6b9bb7c0 100644 --- a/docs/audits.md +++ b/docs/audits.md @@ -673,12 +673,17 @@ In general, users should use for [GitHub Actions environment files] [github-env.yml]: https://github.com/woodruffw/gha-hazmat/blob/main/.github/workflows/github-env.yml -Detects dangerous usages of the `GITHUB_ENV` environment variable. +Detects dangerous writes to the `GITHUB_ENV` and `GITHUB_PATH` environment variables. When used in workflows with dangerous triggers (such as `pull_request_target` and `workflow_run`), -`GITHUB_ENV` can be an arbitrary code execution risk. In particular, if the attacker is able to set -arbitrary variables or variable contents via `GITHUB_ENV`, they made be able to set `LD_PRELOAD` -or otherwise induce code execution implicitly within subsequent steps. +`GITHUB_ENV` and `GITHUB_PATH` can be an arbitrary code execution risk: + +* If the attacker is able to set arbitrary variables or variable contents via + `GITHUB_ENV`, they may be able to set `LD_PRELOAD` or otherwise induce code + execution implicitly within subsequent steps. +* If the attacker is able to add an arbitrary directory to the `$PATH` via + `GITHUB_PATH`, they may be able to execute arbitrary code by shadowing + ordinary system executables (such as `ssh`). Other resources: @@ -689,8 +694,12 @@ Other resources: ### Remediation -In general, you should avoid setting `GITHUB_ENV` within workflows that are attacker-triggered, -like `pull_request_target`. +In general, you should avoid modifying `GITHUB_ENV` and `GITHUB_PATH` within +sensitive workflows that are attacker-triggered, like `pull_request_target`. + +If you absolutely must use `GITHUB_ENV` or `GITHUB_PATH`, avoid passing +attacker-controlled values into either. Stick with literal strings and +values computed solely from trusted sources. If you need to pass state between steps, consider using `GITHUB_OUTPUT` instead. diff --git a/docs/release-notes.md b/docs/release-notes.md index b7b1b6da..8d93c8b1 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -9,6 +9,11 @@ of `zizmor`. ## Upcoming (UNRELEASED) +### Improved + +* The [github-env] audit now detects dangerous writes to `GITHUB_PATH`, + is more precise, and can produce multiple findings per run block (#391) + ### Fixed * `workflow_call.secrets` keys with missing values are now parsed correctly (#388)