Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improves expression expansion when handling matrices #231

Merged
merged 11 commits into from
Dec 10, 2024
43 changes: 30 additions & 13 deletions src/audit/self_hosted_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ use crate::{
AuditState,
};

use super::{audit_meta, WorkflowAudit};
use crate::models::Matrix;
use anyhow::Result;
use github_actions_models::{
common::expr::{ExplicitExpr, LoE},
workflow::{job::RunsOn, Job},
};

use super::{audit_meta, WorkflowAudit};

pub(crate) struct SelfHostedRunner;

audit_meta!(
Expand Down Expand Up @@ -106,18 +106,35 @@ impl WorkflowAudit for SelfHostedRunner {
),
// The entire `runs-on:` is an expression, which may or may
// not be a self-hosted runner when expanded, like above.
LoE::Expr(_) => results.push(
Self::finding()
.confidence(Confidence::Low)
.severity(Severity::Unknown)
.persona(Persona::Auditor)
.add_location(
job.location()
.with_keys(&["runs-on".into()])
.annotated("expression may expand into a self-hosted runner"),
LoE::Expr(exp) => {
let matrix = Matrix::try_from(&job)?;

let expansions = matrix.expand_values();

let self_hosted = expansions.iter().any(|(path, expansion)| {
exp.as_bare() == path && expansion.contains("self-hosted")
});

if self_hosted {
results.push(
Self::finding()
.confidence(Confidence::High)
.severity(Severity::Unknown)
.persona(Persona::Auditor)
.add_location(
job.location()
.with_keys(&["strategy".into()])
.annotated("matrix declares self-hosted runner"),
)
.add_location(
job.location().with_keys(&["runs-on".into()]).annotated(
"expression may expand into a self-hosted runner",
),
)
.build(workflow)?,
)
.build(workflow)?,
),
}
}
}
}

Expand Down
52 changes: 4 additions & 48 deletions src/audit/template_injection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ use std::ops::Deref;

use github_actions_models::{
common::expr::LoE,
workflow::job::{Matrix, NormalJob, StepBody, Strategy},
workflow::job::{NormalJob, StepBody, Strategy},
};

use super::{audit_meta, WorkflowAudit};
use crate::{
expr::{BinOp, Expr, UnOp},
finding::{Confidence, Persona, Severity},
models,
state::AuditState,
utils::extract_expressions,
};
Expand Down Expand Up @@ -117,50 +118,6 @@ impl TemplateInjection {
}
}

/// Checks whether the given `expr` into `matrix` is static.
fn matrix_is_static(&self, expr: &str, matrix: &Matrix) -> bool {
// If the matrix's dimensions are an expression, then it's not static.
let LoE::Literal(dimensions) = &matrix.dimensions else {
return false;
};

// Our `expr` should be a literal path of `matrix.foo.bar.baz.etc`,
// so we descend through the matrix based on it.
let mut keys = expr.split('.').skip(1);

let Some(key) = keys.next() else {
// No path means that we're effectively expanding the entire matrix,
// meaning *any* non-static component makes the entire expansion
// non-static.

// HACK: The correct way to do this is to walk `matrix.dimensions`,
// but it could be arbitrarily deep. Instead, we YOLO the dimensions
// back into YAML and see if the serialized equivalent has
// any indicators of expansion (`${{ ... }}`) in it.
// NOTE: Safe unwrap since `dimensions` was loaded directly from YAML
let dimensions_yaml = serde_yaml::to_string(&dimensions).unwrap();
return !(dimensions_yaml.contains("${{") && dimensions_yaml.contains("}}"));
};

match dimensions.get(key) {
// This indicates a malformed matrix or matrix ref, which is
// static for our purposes.
None => true,
// If our key is an expression, it's definitely not static.
Some(LoE::Expr(_)) => false,
Some(LoE::Literal(dim)) => {
// TODO: This is imprecise: technically we should walk the
// entire set of keys to determine if a specific index is
// accessed + whether that index is an expression.
// But doing that is hard, so we do the same YOLO reserialize
// trick as above and consider this non-static
// if it has any hint of a template expansion in it.
let dim_yaml = serde_yaml::to_string(&dim).unwrap();
!(dim_yaml.contains("${{") && dim_yaml.contains("}}"))
}
}
}

fn injectable_template_expressions(
&self,
run: &str,
Expand Down Expand Up @@ -222,12 +179,11 @@ impl TemplateInjection {
} else if context.starts_with("matrix.") || context == "matrix" {
if let Some(Strategy { matrix, .. }) = &job.strategy {
let matrix_is_static = match matrix {
// The matrix is statically defined, but one
// or more keys might contain expressions.
Some(LoE::Literal(matrix)) => self.matrix_is_static(context, matrix),
// The matrix is generated by an expression, meaning
// that it's trivially not static.
Some(LoE::Expr(_)) => false,

Some(inner) => models::Matrix::new(inner).is_static(),
// Context specifies a matrix, but there is no matrix defined.
// This is an invalid workflow so there's no point in flagging it.
None => continue,
Expand Down
170 changes: 164 additions & 6 deletions src/models.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
//! Enriching/context-bearing wrappers over GitHub Actions models
//! from the `github-actions-models` crate.

use std::fmt::Debug;
use std::{iter::Enumerate, ops::Deref};

use crate::finding::{Route, SymbolicLocation};
use crate::registry::WorkflowKey;
use anyhow::{Context, Result};
use anyhow::{bail, Context, Result};
use camino::Utf8Path;
use github_actions_models::common::expr::LoE;
use github_actions_models::workflow::event::{BareEvent, OptionalBody};
use github_actions_models::workflow::job::RunsOn;
use github_actions_models::workflow::job::{RunsOn, Strategy};
use github_actions_models::workflow::{
self,
self, job,
job::{NormalJob, StepBody},
Trigger,
};
use indexmap::IndexMap;
use serde_json::{json, Value};
use std::collections::HashMap;
use std::fmt::Debug;
use std::{iter::Enumerate, ops::Deref};
use terminal_link::Link;

/// Represents an entire GitHub Actions workflow.
Expand Down Expand Up @@ -224,6 +226,162 @@ impl<'w> Iterator for Jobs<'w> {
}
}

/// Represents an execution Matrix within a Job.
///
/// This type implements [`Deref`] for [job::NormalJob::Strategy`], providing
/// access to the underlying data model.
#[derive(Clone)]
pub(crate) struct Matrix<'w> {
inner: &'w LoE<job::Matrix>,
}

impl<'w> Deref for Matrix<'w> {
type Target = &'w LoE<job::Matrix>;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl<'w> TryFrom<&'w Job<'w>> for Matrix<'w> {
type Error = anyhow::Error;

fn try_from(value: &'w Job<'w>) -> std::result::Result<Self, Self::Error> {
let workflow::Job::NormalJob(job) = value.deref() else {
bail!("job is not a normal job")
};

let Some(Strategy {
matrix: Some(matrix),
..
}) = &job.strategy
else {
bail!("job does not define a strategy or interior matrix")
};

Ok(Matrix { inner: matrix })
}
}

impl<'w> Matrix<'w> {
pub(crate) fn new(inner: &'w LoE<job::Matrix>) -> Self {
Self { inner }
}

/// Expands the current Matrix into all possible values
/// By default, the return is a pair (String, String), in which
/// the first component is the expanded path (e.g. 'matrix.os') and
/// the second component is the string representation for the expanded value
/// (eg ubuntu-latest)
///
pub(crate) fn expand_values(&self) -> Vec<(String, String)> {
match &self.inner {
LoE::Expr(_) => vec![],
LoE::Literal(matrix) => {
let LoE::Literal(dimensions) = &matrix.dimensions else {
return vec![];
};

let mut expansions = Matrix::expand_dimensions(dimensions);

if let LoE::Literal(includes) = &matrix.include {
let additional_expansions = includes
.iter()
.flat_map(Matrix::expand_inclusions_or_exclusions)
.collect::<Vec<_>>();

expansions.extend(additional_expansions);
};

let LoE::Literal(excludes) = &matrix.exclude else {
return expansions;
};

let to_exclude = excludes
.iter()
.flat_map(Matrix::expand_inclusions_or_exclusions)
.collect::<Vec<_>>();

expansions
.into_iter()
.filter(|expanded| !to_exclude.contains(expanded))
.collect()
}
}
}

/// Checks whether some expanded path leads to an expression
pub(crate) fn is_static(&self) -> bool {
let expands_to_expression = self
.expand_values()
.iter()
.any(|(_, expansion)| expansion.starts_with("${{") && expansion.ends_with("}}"));

!expands_to_expression
}

fn expand_inclusions_or_exclusions(
include: &IndexMap<String, serde_yaml::Value>,
) -> Vec<(String, String)> {
let normalized = include
.iter()
.map(|(k, v)| (k.to_owned(), json!(v)))
.collect::<HashMap<_, _>>();

Matrix::expand(normalized)
}

fn expand_dimensions(
dimensions: &IndexMap<String, LoE<Vec<serde_yaml::Value>>>,
) -> Vec<(String, String)> {
let normalized = dimensions
.iter()
.map(|(k, v)| (k.to_owned(), json!(v)))
.collect::<HashMap<_, _>>();

Matrix::expand(normalized)
}

fn expand(values: HashMap<String, serde_json::Value>) -> Vec<(String, String)> {
values
.iter()
.flat_map(|(key, value)| Matrix::walk_path(value, format!("matrix.{}", key)))
.collect()
}

// Walks recursively a serde_json::Value tree, expanding it into a Vec<(String, String)>
// according to the inner value of each node
fn walk_path(tree: &serde_json::Value, current_path: String) -> Vec<(String, String)> {
match tree {
Value::Null => vec![],

// In the case of scalars, we just convert the value to a string
Value::Bool(inner) => vec![(current_path, inner.to_string())],
Value::Number(inner) => vec![(current_path, inner.to_string())],
Value::String(inner) => vec![(current_path, inner.to_string())],

// In the case of an array, we recursively create on expansion pair for each item
Value::Array(inner) => inner
.iter()
.flat_map(|value| Matrix::walk_path(value, current_path.clone()))
.collect(),

// In the case of an object, we recursively create on expansion pair for each
// value in the key/value set, using the key to form the expanded path using
// the dot notation
Value::Object(inner) => inner
.iter()
.flat_map(|(key, value)| {
let mut new_path = current_path.clone();
new_path.push('.');
new_path.push_str(key);
Matrix::walk_path(value, new_path)
})
.collect(),
}
}
}

/// Represents a single step in a normal workflow job.
///
/// This type implements [`Deref`] for [`workflow::job::Step`], which
Expand Down
Loading
Loading