diff --git a/Cargo.lock b/Cargo.lock index 859462a64..f6d5c6da9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,7 +18,7 @@ dependencies = [ "parking_lot 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "pin-project 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "trust-dns-proto 0.18.0-alpha.2 (registry+https://github.com/rust-lang/crates.io-index)", "trust-dns-resolver 0.18.0-alpha.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -34,7 +34,7 @@ dependencies = [ "futures-core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -132,7 +132,7 @@ dependencies = [ "actix-threadpool 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "copyless 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -570,7 +570,7 @@ dependencies = [ "serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", "smart-default 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "twoway 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -621,7 +621,7 @@ dependencies = [ "prometheus 0.7.0 (git+https://github.com/pingcap/rust-prometheus.git?rev=6a02b0d2943f8fffce672e236e22c6f925184d93)", "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "twoway 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -743,7 +743,7 @@ dependencies = [ "strum 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", "strum_macros 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", "tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1049,15 +1049,17 @@ dependencies = [ "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)", "smart-default 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", "tar 0.4.26 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "test-case 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "twoway 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1074,7 +1076,7 @@ dependencies = [ "indexmap 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1153,7 +1155,7 @@ dependencies = [ "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "pin-project 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "want 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1166,7 +1168,7 @@ dependencies = [ "bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", "native-tls 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1520,7 +1522,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1679,7 +1681,7 @@ dependencies = [ "smart-default 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", "twoway 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1778,7 +1780,7 @@ dependencies = [ "serde 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2010,7 +2012,7 @@ dependencies = [ "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", "serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "wasm-bindgen 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2056,6 +2058,14 @@ name = "ryu" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "schannel" version = "0.1.15" @@ -2144,7 +2154,7 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.8.9" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2380,7 +2390,7 @@ dependencies = [ [[package]] name = "tokio" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2422,7 +2432,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "native-tls 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2435,7 +2445,7 @@ dependencies = [ "futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "pin-project-lite 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2474,7 +2484,7 @@ dependencies = [ "rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "socket2 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -2492,7 +2502,7 @@ dependencies = [ "lru-cache 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "resolv-conf 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "trust-dns-proto 0.18.0-alpha.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -2604,6 +2614,16 @@ name = "version_check" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "want" version = "0.3.0" @@ -3022,6 +3042,7 @@ dependencies = [ "checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" +"checksum same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" "checksum schannel 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "f2f6abf258d99c3c1c5c2131d99d064e94b7b3dd5f416483057f308fea253339" "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d" "checksum security-framework 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "eee63d0f4a9ec776eeb30e220f0bc1e092c3ad744b2a379e3993070364d3adc2" @@ -3032,7 +3053,7 @@ dependencies = [ "checksum serde_derive 1.0.104 (registry+https://github.com/rust-lang/crates.io-index)" = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" "checksum serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "051c49229f282f7c6f3813f8286cc1e3323e8051823fce42c7ea80fe13521704" "checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" -"checksum serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)" = "38b08a9a90e5260fe01c6480ec7c811606df6d3a660415808c3c3fa8ed95b582" +"checksum serde_yaml 0.8.11 (registry+https://github.com/rust-lang/crates.io-index)" = "691b17f19fc1ec9d94ec0b5864859290dff279dbd7b03f017afda54eb36c3c35" "checksum sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" "checksum signal-hook-registry 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1797d48f38f91643908bb14e35e79928f9f4b3cefb2420a564dde0991b4358dc" @@ -3060,7 +3081,7 @@ dependencies = [ "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum threadpool 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e2f0c90a5f3459330ac8bc0d2f879c693bb7a2f59689c1083fc4ef83834da865" "checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" -"checksum tokio 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "ffa2fdcfa937b20cb3c822a635ceecd5fc1a27a6a474527e5516aa24b8c8820a" +"checksum tokio 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8fdd17989496f49cdc57978c96f0c9fe5e4a58a8bddc6813c449a4624f6a030b" "checksum tokio-current-thread 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d16217cad7f1b840c5a97dfb3c43b0c871fef423a6e8d2118c604e843662a443" "checksum tokio-executor 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0f27ee0e6db01c5f0b2973824547ce7e637b2ed79b891a9677b0de9bd532b6ac" "checksum tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bde02a3a5291395f59b06ec6945a3077602fac2b07eeeaf0dee2122f3619828" @@ -3087,6 +3108,7 @@ dependencies = [ "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" "checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" +"checksum walkdir 2.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" "checksum want 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" "checksum wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d" "checksum wasm-bindgen 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)" = "5205e9afdf42282b192e2310a5b463a6d1c1d774e30dc3c791ac37ab42d2616c" diff --git a/cincinnati/src/lib.rs b/cincinnati/src/lib.rs index ae6e6d8dc..d3e2a6af5 100644 --- a/cincinnati/src/lib.rs +++ b/cincinnati/src/lib.rs @@ -68,6 +68,14 @@ impl Release { Release::Concrete(release) => &release.version, } } + + /// Get a mutable borrow of the release metadata if any + pub fn get_metadata_mut(&mut self) -> Option<&mut HashMap> { + match self { + Release::Abstract(_) => None, + Release::Concrete(release) => Some(&mut release.metadata), + } + } } /// Type to represent a Release with all its information. diff --git a/graph-builder/Cargo.toml b/graph-builder/Cargo.toml index 350954b1b..550e9db77 100644 --- a/graph-builder/Cargo.toml +++ b/graph-builder/Cargo.toml @@ -27,10 +27,11 @@ semver = { version = "^0.9.0", features = [ "serde" ] } serde = "^1.0.70" serde_derive = "^1.0.70" serde_json = "^1.0.22" +serde_yaml = "^0.8.11" smart-default = "^0.5.1" structopt = "^0.2.10" tar = "^0.4.16" -tokio = "0.2" +tokio = { version = "0.2.11", features = [ "fs", "stream" ] } toml = "^0.4.10" url = "^1.7.2" parking_lot = "^0.8.0" @@ -42,6 +43,7 @@ custom_debug_derive = "^0.1.7" built = "^0.3.2" [dev-dependencies] +walkdir = "2.3.1" twoway = "^0.2" assert-json-diff = "1.0.0" test-case = "1.0.0" diff --git a/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/github_v3.rs b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/github_v3.rs new file mode 100644 index 000000000..c2fc0a390 --- /dev/null +++ b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/github_v3.rs @@ -0,0 +1,76 @@ +//! This is a helper module for accessing the [GitHub API v3][]. +//! +//! [GitHub API v3]: https://developer.github.com/v3/ + +/// Commit structure. +#[derive(Default, Clone, Debug, Deserialize, PartialEq, Eq)] +pub(crate) struct Commit { + pub(crate) sha: String, + pub(crate) url: String, +} + +/// Branch structure. +#[derive(Debug, Deserialize, PartialEq, Eq)] +pub(crate) struct Branch { + pub(crate) name: String, + pub(crate) commit: Commit, + pub(crate) protected: bool, +} + +/// Format the URL to request branch information. +pub(crate) fn branches_url(org: &str, repo: &str) -> String { + format!( + "https://api.github.com/repos/{org}/{repo}/branches", + org = &org, + repo = &repo, + ) +} + +/// Format the URL to request a tarball URL. +pub(crate) fn tarball_url(org: &str, repo: &str, commit: &Commit) -> String { + format!( + "https://api.github.com/repos/{org}/{repo}/tarball/{sha}", + org = org, + repo = repo, + sha = commit.sha, + ) +} + +/// Format a subdirectory name for a specific revision's tarball. +pub(crate) fn archive_entry_directory_name(org: &str, repo: &str, commit: &Commit) -> String { + format!("{}-{}-{}", &org, &repo, &commit.sha[0..7],) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn de_serialize_branch() { + let json = r#" + [ + { + "name": "master", + "commit": { + "sha": "fef06adb57b9d965bfc9ae0959bd038f3044207e", + "url": "https://api.github.com/repos/openshift/cincinnati-graph-data/commits/fef06adb57b9d965bfc9ae0959bd038f3044207e" + }, + "protected": true + } + ] + "#; + + let branches = serde_json::from_str::>(&json).unwrap(); + + let branches_expected = vec![Branch { + name: "master".to_string(), + commit: Commit { + sha: "fef06adb57b9d965bfc9ae0959bd038f3044207e".to_string(), + url: "https://api.github.com/repos/openshift/cincinnati-graph-data/commits/fef06adb57b9d965bfc9ae0959bd038f3044207e".to_string() + }, + protected: true + }]; + + assert_eq!(branches_expected, branches); + } +} diff --git a/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/mod.rs b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/mod.rs new file mode 100644 index 000000000..0ef7d7e8e --- /dev/null +++ b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/mod.rs @@ -0,0 +1,12 @@ +//! This plugin downloads repository content and extracts it to a given output directory. +//! +//! It is meant to be included in the plugin chain, preceding other plugins who +//! rely on the data being in the output directory. +//! The plugin will only download a tarball if detects a change of revision or on first run. + +mod github_v3; +pub mod plugin; + +pub use plugin::{ + GithubOpenshiftSecondaryMetadataScraperPlugin, GithubOpenshiftSecondaryMetadataScraperSettings, +}; diff --git a/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/plugin.rs b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/plugin.rs new file mode 100644 index 000000000..68cd99901 --- /dev/null +++ b/graph-builder/src/plugins/github_openshift_secondary_metadata_scraper/plugin.rs @@ -0,0 +1,358 @@ +use super::github_v3; +use async_trait::async_trait; +use cincinnati::plugins::prelude::*; +use cincinnati::plugins::{InternalIO, InternalPlugin}; +use failure::{Fallible, ResultExt}; +use serde::Deserialize; +use smart_default::SmartDefault; +use std::path::PathBuf; +use tokio::sync::Mutex as FuturesMutex; + +pub static DEFAULT_OUTPUT_WHITELIST: &[&str] = &[ + "channels/.+\\.ya+ml", + "blocked-edges/.+\\.ya+ml", + "raw/metadata.json", +]; + +static USER_AGENT: &str = "openshift/cincinnati"; + +/// Plugin settings. +#[derive(Debug, SmartDefault, Clone, Deserialize)] +#[serde(default)] +pub struct GithubOpenshiftSecondaryMetadataScraperSettings { + github_org: String, + github_repo: String, + branch: String, + output_directory: PathBuf, + #[default(DEFAULT_OUTPUT_WHITELIST.iter().map(|s| (*s).to_string()).collect())] + output_whitelist: Vec, +} + +#[derive(Debug, Default)] +pub struct State { + commit_wanted: Option, + commit_completed: Option, +} + +/// Plugin. +#[derive(Debug, SmartDefault)] +pub struct GithubOpenshiftSecondaryMetadataScraperPlugin { + settings: GithubOpenshiftSecondaryMetadataScraperSettings, + output_whitelist: Vec, + + #[default(FuturesMutex::new(Default::default()))] + state: FuturesMutex, +} + +impl GithubOpenshiftSecondaryMetadataScraperPlugin { + /// Instantiate a new instance of `Self`. + pub fn try_new(settings: GithubOpenshiftSecondaryMetadataScraperSettings) -> Fallible { + let output_whitelist: Vec = settings + .output_whitelist + .iter() + .try_fold( + Vec::with_capacity(settings.output_whitelist.len()), + |mut acc, cur| -> Fallible<_> { + let re = regex::Regex::new(cur)?; + acc.push(re); + Ok(acc) + }, + ) + .context("Parsing output whitelist strings as regex")?; + + Ok(Self { + settings, + output_whitelist, + + ..Default::default() + }) + } + + /// Lookup the latest commit on the given branch and update `self.state.commit_wanted`. + async fn refresh_commit_wanted(&self) -> Fallible { + let url = github_v3::branches_url(&self.settings.github_org, &self.settings.github_repo); + + trace!("Getting branches from {}", &url); + let bytes = reqwest::Client::new() + .get(&url) + .header(reqwest::header::USER_AGENT, USER_AGENT) + .header(reqwest::header::ACCEPT, "application/vnd.github.v3+json") + .send() + .await + .context(format!("Getting branches from {}", &url))? + .bytes() + .await + .context(format!("Getting bytes from request to {}", &url))?; + + let json = std::str::from_utf8(&bytes).context("Parsing body as string")?; + + let branches = serde_json::from_str::>(&json) + .context(format!("Parsing {} to Vec", &json))?; + + let latest_commit = branches + .iter() + .filter_map(|branch| { + if branch.name == self.settings.branch { + Some(branch.commit.clone()) + } else { + None + } + }) + .nth(0) + .ok_or_else(|| { + failure::err_msg(format!( + "{}/{} does not have branch {}: {:#?}", + &self.settings.github_org, + &self.settings.github_repo, + &self.settings.branch, + &branches + )) + })?; + + trace!( + "Latest commit on branch {}: {:?}", + &self.settings.branch, + &latest_commit + ); + + let mut state = self.state.lock().await; + + (*state).commit_wanted = Some(latest_commit.clone()); + + let should_update = if let Some(commit_completed) = &state.commit_completed { + commit_completed != &latest_commit + } else { + true + }; + + Ok(should_update) + } + + /// Fetch the tarball for the latest wanted commit and extract it to the output directory. + async fn download_wanted(&self) -> Fallible<(github_v3::Commit, Box<[u8]>)> { + let commit_wanted = { + let state = &self.state.lock().await; + state + .commit_wanted + .clone() + .ok_or_else(|| failure::err_msg("commit_wanted unset"))? + }; + + let url = github_v3::tarball_url( + &self.settings.github_org, + &self.settings.github_repo, + &commit_wanted, + ); + + reqwest::Client::new() + .get(&url) + .header(reqwest::header::USER_AGENT, USER_AGENT) + .header(reqwest::header::ACCEPT, "application/vnd.github.v3.raw") + .send() + .await + .context(format!("Updating from tarball at {}", &url))? + .bytes() + .await + .context(format!( + "Getting bytes from the request response to {}", + &url, + )) + .map_err(Into::into) + .map(|bytes| (commit_wanted, bytes.to_vec().into_boxed_slice())) + } + + /// Extract a given blob to the output directory, adhering to the output whitelist, and finally update the completed commit state. + async fn extract(&self, commit: github_v3::Commit, bytes: Box<[u8]>) -> Fallible<()> { + // Use a tempdir as intermediary extraction target, and later rename to the destination + let tmpdir = tempfile::tempdir()?; + + { + let settings = self.settings.clone(); + let commit = commit.clone(); + let output_whitelist = self.output_whitelist.clone(); + let tmpdir = tmpdir.path().to_owned(); + + tokio::task::spawn_blocking(move || -> Fallible<()> { + use flate2::read::GzDecoder; + use tar::Archive; + + let mut archive = Archive::new(GzDecoder::new(bytes.as_ref())); + + archive + .entries()? + .filter_map(move |entry_result| match entry_result { + Ok(entry) => { + trace!("Processing entry {:?}", &entry.path()); + Some(entry) + } + + Err(e) => { + warn!( + "Could not process entry in tarball from commit {:?}: {}", + &commit, e + ); + None + } + }) + .try_for_each(|mut entry| -> Fallible<_> { + let path = entry + .path() + .context(format!( + "Getting path from entry {:?}", + &entry.header().clone().path().unwrap_or_default() + ))? + .to_str() + .ok_or_else(|| failure::err_msg("Could not get string from entry"))? + .to_owned(); + trace!("Processing entry with path {:?}", &path); + + if output_whitelist + .iter() + .any(|whitelist_regex| whitelist_regex.is_match(&path)) + { + debug!("Unpacking {:?} to {:?}", &path, &settings.output_directory); + entry + .unpack_in(&tmpdir) + .context(format!("Unpacking {:?} to {:?}", &path, &tmpdir))?; + }; + + Ok(()) + }) + }) + .await?? + }; + + { + // Move all files from the archive specific subdirectory to the output directory. + let rename_from = tmpdir.path().join(github_v3::archive_entry_directory_name( + &self.settings.github_org, + &self.settings.github_repo, + &commit, + )); + let rename_to = &self.settings.output_directory; + let msg = format!("Renaming {:?} -> {:?}", &rename_from, &rename_to); + + // Acquire the state lock as we're going to move files from the + // commit specific directory into the output directory. + let mut state_guard = self.state.lock().await; + + debug!("{}", &msg); + tokio::fs::rename(&rename_from, &rename_to) + .await + .context(msg)?; + + // Set commit_completed to the one we've extracted. + state_guard.commit_completed = Some(commit); + } + + Ok(()) + } +} + +impl PluginSettings for GithubOpenshiftSecondaryMetadataScraperSettings { + fn build_plugin(&self, _: Option<&prometheus::Registry>) -> Fallible { + let plugin = GithubOpenshiftSecondaryMetadataScraperPlugin::try_new(self.clone())?; + Ok(new_plugin!(InternalPluginWrapper(plugin))) + } +} + +#[async_trait] +impl InternalPlugin for GithubOpenshiftSecondaryMetadataScraperPlugin { + async fn run_internal(self: &Self, io: InternalIO) -> Fallible { + let should_update = self + .refresh_commit_wanted() + .await + .context("Checking for new commit")?; + + if should_update { + let (commit, blob) = self + .download_wanted() + .await + .context("Downloading tarball")?; + self.extract(commit, blob) + .await + .context("Extracting tarball")?; + }; + + Ok(io) + } +} + +#[cfg(test)] +#[cfg(feature = "test-net")] +mod network_tests { + use super::*; + use std::collections::HashSet; + #[test] + fn openshift_secondary_metadata_extraction() -> Fallible<()> { + let mut runtime = commons::testing::init_runtime()?; + + let tmpdir = tempfile::tempdir()?; + + let settings = + toml::from_str::(&format!( + r#" + github_org = "openshift" + github_repo = "cincinnati-graph-data" + branch = "master" + output_whitelist = [ {} ] + output_directory = {:?} + "#, + DEFAULT_OUTPUT_WHITELIST + .iter() + .map(|s| format!(r#"{:?}"#, s)) + .collect::>() + .join(", "), + &tmpdir.path(), + ))?; + + debug!("Settings: {:#?}", &settings); + + let plugin = Box::new(GithubOpenshiftSecondaryMetadataScraperPlugin::try_new( + settings, + )?); + + let _ = runtime.block_on(plugin.run_internal(InternalIO { + graph: Default::default(), + parameters: Default::default(), + }))?; + + let regexes = DEFAULT_OUTPUT_WHITELIST + .iter() + .map(|s| regex::Regex::new(s).unwrap()) + .collect::>(); + assert!(!regexes.is_empty(), "no regexes compiled"); + + let extracted_paths: HashSet = walkdir::WalkDir::new(tmpdir.path()) + .into_iter() + .map(Result::unwrap) + .filter(|entry| entry.file_type().is_file()) + .filter_map(|file| { + let path = file.path(); + path.to_str().map(str::to_owned) + }) + .collect(); + assert!(!extracted_paths.is_empty(), "no files were extracted"); + + // ensure all files match the configured regexes + extracted_paths.iter().for_each(|path| { + assert!( + regexes.iter().any(|re| re.is_match(&path)), + "{} doesn't match any of the regexes: {:#?}", + path, + regexes + ) + }); + + // ensure every regex matches at least one file + regexes.iter().for_each(|re| { + assert!( + extracted_paths.iter().any(|path| re.is_match(path)), + "regex {} didn't match a file", + &re + ); + }); + + Ok(()) + } +} diff --git a/graph-builder/src/plugins/mod.rs b/graph-builder/src/plugins/mod.rs index 21fc17308..045cb5b77 100644 --- a/graph-builder/src/plugins/mod.rs +++ b/graph-builder/src/plugins/mod.rs @@ -1,3 +1,4 @@ //! Plugins specific to the graph-builder +pub mod github_openshift_secondary_metadata_scraper; pub mod release_scrape_dockerv2;