From 432a9df7abba12752b714130e96dd7c125718caa Mon Sep 17 00:00:00 2001 From: Isala Piyarisi Date: Fri, 11 Mar 2022 23:19:37 +0530 Subject: [PATCH] refactor(sherlock): Moved to single use tf-serving architecture --- sherlock/Cargo.toml | 4 +- sherlock/Dockerfile | 12 ++--- sherlock/deployment.yaml | 101 +++++++++++++++++++++++++++++++++++++++ sherlock/src/main.rs | 75 +++++++++++++---------------- 4 files changed, 140 insertions(+), 52 deletions(-) create mode 100644 sherlock/deployment.yaml diff --git a/sherlock/Cargo.toml b/sherlock/Cargo.toml index 04b148e..828ed1d 100644 --- a/sherlock/Cargo.toml +++ b/sherlock/Cargo.toml @@ -11,5 +11,5 @@ lazy_static = "1.4.0" hyper = { version = "0.14.17", features = ["full"] } tokio = { version = "1", features = ["full"] } reqwest = { version = "0.11", features = ["blocking", "json"] } -flate2 = "1.0.22" -tar = "0.4.38" \ No newline at end of file +serde = "1.0" +serde_yaml = "0.8" \ No newline at end of file diff --git a/sherlock/Dockerfile b/sherlock/Dockerfile index 797bfd0..6fed15c 100644 --- a/sherlock/Dockerfile +++ b/sherlock/Dockerfile @@ -12,17 +12,15 @@ RUN cargo build --target x86_64-unknown-linux-musl --release # Production Image FROM alpine:latest RUN apk --no-cache add ca-certificates - +WORKDIR /app # COPY the binary -COPY --from=builder /sherlock/target/x86_64-unknown-linux-musl/release/sherlock /sherlock +RUN mkdir -p /app/models +COPY --from=builder /sherlock/target/x86_64-unknown-linux-musl/release/sherlock /app/sherlock # DEFAULT ENV -ENV SERVICE_NAME=sample_service -ENV NAMESPACE=default -ENV END_POINT=http://localhost:8501/v1/models/sherlock:predict +ENV END_POINT=http://localhost:8501/v1/models ENV POOL_DURATION=60 -ENV MODEL_URL=https://static.isala.me/lazy-koala/sherlock/models/sample_service.tar.gz # Define port EXPOSE 9898 -CMD ["/sherlock"] \ No newline at end of file +CMD ["/app/sherlock"] \ No newline at end of file diff --git a/sherlock/deployment.yaml b/sherlock/deployment.yaml new file mode 100644 index 0000000..3352240 --- /dev/null +++ b/sherlock/deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sherlock + namespace: lazy-koala +spec: + selector: + matchLabels: + name: sherlock + template: + metadata: + labels: + name: sherlock + annotations: + lazy-koala/scrape: "true" + spec: + serviceAccountName: gke-workload-identity + containers: + - name: sherlock + image: ghcr.io/mrsupiri/lazy-koala/sherlock:latest + ports: + - containerPort: 9898 + name: metrics + env: + - name: END_POINT + value: "http://localhost:8501/v1/models/sherlock:predict" + - name: POOL_DURATION + value: "1" + volumeMounts: + - name: model-config + mountPath: /app/config + - image: google/cloud-sdk + name: model-poller + imagePullPolicy: IfNotPresent + env: + - name: BUCKET_NAME + value: sherlock.isala.me + command: + - /bin/bash + - -ce + - |- + mkdir -p models + while true; do gsutil -m rsync -d -r gs://$BUCKET_NAME models; sleep 60; done + volumeMounts: + - name: model-files + mountPath: /models + - name: serving + image: tensorflow/serving + imagePullPolicy: IfNotPresent + args: + [ + "--model_config_file_poll_wait_seconds=60", + "--model_config_file=/config/models.config", + ] + ports: + - containerPort: 8501 + name: http + env: + - name: MODEL_NAME + value: "sherlock" + volumeMounts: + - name: model-config + mountPath: /config + - name: model-files + mountPath: /models + volumes: + - name: sherlock-config + configMap: + name: sherlock-config + - name: model-files + emptyDir: {} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sherlock-config + namespace: lazy-koala +data: + models.config: | + model_config_list { + config { + name: 'service_1' + base_path: '/models/service_1/' + model_platform: 'tensorflow' + } + config { + name: 'service_2' + base_path: '/models/service_2/' + model_platform: 'tensorflow' + } + } + services.yaml: | +# --- +## https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity +# apiVersion: v1 +# kind: ServiceAccount +# metadata: +# annotations: +# iam.gke.io/gcp-service-account: sherlock@lazy-koala.iam.gserviceaccount.com +# name: gke-workload-identity +# namespace: lazy-koala \ No newline at end of file diff --git a/sherlock/src/main.rs b/sherlock/src/main.rs index 359ec40..8da83c2 100644 --- a/sherlock/src/main.rs +++ b/sherlock/src/main.rs @@ -3,36 +3,26 @@ use hyper::{ service::{make_service_fn, service_fn}, Body, Request, Response, Server, }; -use prometheus::{Encoder, Gauge, TextEncoder}; +use prometheus::{Encoder, GaugeVec, TextEncoder}; use std::{env::var, thread, time::Duration}; use lazy_static::lazy_static; -use prometheus::{labels, opts, register_gauge}; +use prometheus::{opts, register_gauge_vec}; use std::collections::HashMap; -use flate2::read::GzDecoder; -use tar::Archive; +use serde_yaml; lazy_static! { - static ref SERVICE_NAME: String = var("SERVICE_NAME").unwrap(); - static ref NAMESPACE: String = var("NAMESPACE").unwrap(); static ref END_POINT: String = var("END_POINT").unwrap(); static ref POOL_DURATION: String = var("POOL_DURATION").unwrap(); - static ref MODEL_URL: String = var("MODEL_URL").unwrap(); - static ref ANOMLAY_GAUGE: Gauge = register_gauge!(opts!( - "anomaly_score", - "Reconstruction loss of the autoencoder", - labels! {"serviceName" => SERVICE_NAME.as_str(), "namespace" => NAMESPACE.as_str()} - )) + static ref ANOMLAY_GAUGE: GaugeVec = register_gauge_vec!( + opts!( + "anomaly_score", + "Reconstruction loss of the autoencoder" + ), + &["serviceName"] + ) .unwrap(); } -fn download_model()->Result<(), Box>{ - let resp = reqwest::blocking::get(MODEL_URL.as_str())?; - let tarfile = GzDecoder::new(resp); - let mut archive = Archive::new(tarfile); - archive.unpack("models/")?; - - Ok(()) -} async fn serve_req(_req: Request) -> Result, hyper::Error> { let encoder = TextEncoder::new(); @@ -51,42 +41,41 @@ async fn serve_req(_req: Request) -> Result, hyper::Error> } -fn query_model() ->Result> { - let resp = reqwest::blocking::get(END_POINT.as_str())? - .json::>()?; +fn query_model(service: &String) ->Result> { + println!("Querying {} model", service); + let endpoint = format!("{}/{}:predict", END_POINT.as_str(), service); + let resp = reqwest::blocking::get(endpoint)?.json::>()?; Ok(resp["predictions"]) } fn poll_anomaly_scores(delay: u64) { - loop { - - if let Ok(value) = query_model(){ - ANOMLAY_GAUGE.set(value); - } - + match read_config() { + Ok(services) => { + for service in services.iter(){ + match query_model(service) { + Ok(score) => ANOMLAY_GAUGE.with_label_values(&[service]).set(score), + Err(e) => eprintln!("Error while querying model: {}", e), + } + } + }, + Err(e) => eprintln!("Error while parsing config: {}", e), + }; thread::sleep(Duration::from_secs(delay)); } } -#[allow(unused_must_use)] +fn read_config() -> Result, Box> { + let f = std::fs::File::open("config/config.yaml")?; + let services: Vec = serde_yaml::from_reader(f)?; + Ok(services) +} + #[tokio::main] async fn main() { - - // Forgive me father for i have sinned 🙏 - // I couldn't figure out way to use reqwest's - // async response with GzDecoder 😭 - thread::spawn(|| { - if let Err(err) = download_model() { - eprintln!("failed to download the model: {}", err); - std::process::exit(1); - } - }).join(); - + thread::spawn(|| poll_anomaly_scores(POOL_DURATION.as_str().parse::().unwrap())); - ANOMLAY_GAUGE.set(0.0); - let addr = ([0, 0, 0, 0], 9898).into(); println!("Listening on http://{}", addr);