From 5ab8724594468dacc59e8bde0578a61f7be5ffc3 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Thu, 31 Oct 2024 19:22:31 +0000
Subject: [PATCH 01/21] feat: integrate `trasnformers.js` with rust backend

Simple integration between `transformers.js` and rust ort backend by
exposing js owned API
---
 crates/sb_ai/js/ai.js                     |   2 +-
 crates/sb_ai/js/onnxruntime/onnx.js       |  80 +++++++++++
 crates/sb_ai/lib.rs                       |  13 +-
 crates/sb_ai/onnxruntime/mod.rs           | 160 ++++++++++++++++++++++
 crates/sb_ai/{ => onnxruntime}/onnx.rs    |   0
 crates/sb_ai/{ => onnxruntime}/session.rs |  31 +++++
 examples/generate-embeddings/index.ts     |  20 +--
 examples/ort-rust-backend/index.ts        |  28 ++++
 8 files changed, 318 insertions(+), 16 deletions(-)
 create mode 100644 crates/sb_ai/js/onnxruntime/onnx.js
 create mode 100644 crates/sb_ai/onnxruntime/mod.rs
 rename crates/sb_ai/{ => onnxruntime}/onnx.rs (100%)
 rename crates/sb_ai/{ => onnxruntime}/session.rs (81%)
 create mode 100644 examples/ort-rust-backend/index.ts

diff --git a/crates/sb_ai/js/ai.js b/crates/sb_ai/js/ai.js
index 52f564cd8..174e64ff4 100644
--- a/crates/sb_ai/js/ai.js
+++ b/crates/sb_ai/js/ai.js
@@ -1,4 +1,4 @@
-
+import 'ext:sb_ai/js/onnxruntime/onnx.js';
 import EventSourceStream from 'ext:sb_ai/js/util/event_source_stream.mjs';
 
 const core = globalThis.Deno.core;
diff --git a/crates/sb_ai/js/onnxruntime/onnx.js b/crates/sb_ai/js/onnxruntime/onnx.js
new file mode 100644
index 000000000..aafeaaf08
--- /dev/null
+++ b/crates/sb_ai/js/onnxruntime/onnx.js
@@ -0,0 +1,80 @@
+const core = globalThis.Deno.core;
+
+// Workaround to serialize
+BigInt64Array.prototype.toJSON = function () {
+  return [...this].map(Number);
+};
+
+class Tensor {
+  /** @type {number[]} Dimensions of the tensor. */
+  dims;
+
+  /** @type {DataType} Type of the tensor. */
+  type;
+
+  /** @type {DataArray} The data stored in the tensor. */
+  data;
+
+  /** @type {number} The number of elements in the tensor. */
+  size;
+
+  constructor(type, data, dims) {
+    this.type = type;
+    this.data = data;
+    this.dims = dims;
+
+    // console.log('onnx.js Tensor:', this);
+  }
+}
+
+class InferenceSession {
+  sessionId;
+  inputNames;
+  outputNames;
+
+  constructor(sessionId, inputNames, outputNames) {
+    this.sessionId = sessionId;
+    this.inputNames = inputNames;
+    this.outputNames= outputNames;
+  }
+
+  static async fromBuffer(modelBuffer) {
+    const {id, inputs, outputs} = await core.ops.op_sb_ai_ort_init_session(modelBuffer);
+
+    console.log('onnx.js fromBuffer:', {id, inputs, outputs});
+
+    return new InferenceSession(id, inputs, outputs);
+  }
+
+  async run(inputs) {
+    console.log('onnx.js run: [inputs]', inputs);
+
+    const outputs = await core.ops.op_sb_ai_ort_run_session(this.sessionId, JSON.parse(JSON.stringify(inputs)));
+
+    // Parse to Tensor
+    for(const key in outputs) {
+      if(Object.hasOwn(outputs, key)) {
+        const {type, cpuData, dims} = outputs[key];
+        outputs[key] = new Tensor(type, cpuData, dims);
+      }
+    }
+
+    console.log('onnx.js run: [outputs]', outputs);
+
+    return outputs;
+  }
+}
+
+const onnxruntime = {
+  InferenceSession: {
+    create: InferenceSession.fromBuffer
+  },
+  Tensor,
+  env: {
+    wasm: {
+      proxy: false
+    }
+  }
+};
+
+globalThis[Symbol.for("onnxruntime")] = onnxruntime;
diff --git a/crates/sb_ai/lib.rs b/crates/sb_ai/lib.rs
index 3d3ada169..e59080cb4 100644
--- a/crates/sb_ai/lib.rs
+++ b/crates/sb_ai/lib.rs
@@ -1,5 +1,4 @@
-mod onnx;
-mod session;
+mod onnxruntime;
 
 use anyhow::{anyhow, bail, Error};
 use base_rt::BlockingScopeCPUUsageMetricExt;
@@ -8,8 +7,8 @@ use deno_core::OpState;
 use deno_core::{op2, JsRuntime, V8CrossThreadTaskSpawner, V8TaskSpawner};
 use ndarray::{Array1, Array2, ArrayView3, Axis, Ix3};
 use ndarray_linalg::norm::{normalize, NormalizeAxis};
+use onnxruntime::session::load_session_from_file;
 use ort::inputs;
-use session::load_session_from_file;
 use std::cell::RefCell;
 use std::path::Path;
 use std::rc::Rc;
@@ -18,6 +17,7 @@ use tokenizers::Tokenizer;
 use tokio::sync::{mpsc, oneshot};
 use tokio::task;
 
+use onnxruntime::*;
 use tracing::{error, trace_span};
 
 deno_core::extension!(
@@ -25,13 +25,16 @@ deno_core::extension!(
     ops = [
         op_sb_ai_run_model,
         op_sb_ai_init_model,
-        op_sb_ai_try_cleanup_unused_session
+        op_sb_ai_try_cleanup_unused_session,
+        op_sb_ai_ort_init_session,
+        op_sb_ai_ort_run_session,
     ],
     esm_entry_point = "ext:sb_ai/js/ai.js",
     esm = [
         "js/ai.js",
         "js/util/event_stream_parser.mjs",
-        "js/util/event_source_stream.mjs"
+        "js/util/event_source_stream.mjs",
+        "js/onnxruntime/onnx.js"
     ]
 );
 
diff --git a/crates/sb_ai/onnxruntime/mod.rs b/crates/sb_ai/onnxruntime/mod.rs
new file mode 100644
index 000000000..3995b7ed7
--- /dev/null
+++ b/crates/sb_ai/onnxruntime/mod.rs
@@ -0,0 +1,160 @@
+pub(crate) mod onnx;
+pub(crate) mod session;
+
+use core::panic;
+use std::{borrow::Cow, collections::HashMap};
+
+use anyhow::{anyhow, Result};
+use deno_core::op2;
+use ort::{SessionInputValue, TensorElementType, Value, ValueType};
+use serde::{Deserialize, Serialize};
+
+use onnx::ensure_onnx_env_init;
+use session::{get_session, load_session_from_bytes};
+
+#[derive(Serialize, Debug, Clone)]
+#[serde(rename_all = "camelCase")]
+struct SessionInfo {
+    id: String,
+    inputs: Vec<String>,
+    outputs: Vec<String>,
+}
+
+// TODO: Better tensor convertion
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "camelCase")]
+struct TensorInt64 {
+    #[serde(rename = "type")]
+    data_type: String,
+    dims: Vec<i64>,
+    #[serde(rename = "cpuData")]
+    data: Vec<i64>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "camelCase")]
+struct TensorFloat32 {
+    #[serde(rename = "type")]
+    data_type: String,
+    dims: Vec<i64>,
+    #[serde(rename = "cpuData")]
+    data: Vec<f32>,
+}
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(untagged)]
+enum Tensor {
+    Int64(TensorInt64),
+    Float32(TensorFloat32),
+}
+
+#[op2]
+#[serde]
+pub fn op_sb_ai_ort_init_session(#[buffer] model_bytes: &[u8]) -> Result<SessionInfo> {
+    println!("Hello from ORT");
+
+    println!(
+        "op_sb_ai_init_ort_session: received {} bytes.",
+        model_bytes.len()
+    );
+
+    if let Some(err) = ensure_onnx_env_init() {
+        return Err(anyhow!("failed to create onnx environment: {err}"));
+    }
+
+    let (session_id, session) = load_session_from_bytes(model_bytes)?;
+
+    let session_info = SessionInfo {
+        id: session_id,
+        inputs: session.inputs.iter().map(|i| i.name.to_owned()).collect(),
+        outputs: session.outputs.iter().map(|o| o.name.to_owned()).collect(),
+    };
+
+    Ok(session_info)
+}
+
+#[op2]
+#[serde]
+pub fn op_sb_ai_ort_run_session(
+    #[string] session_id: String,
+    #[serde] inputs: HashMap<String, Tensor>,
+) -> Result<HashMap<String, Tensor>> {
+    // TODO: take inputs a hash map and pass then to session.
+    println!("op_sb_ai_run_ort_session: got {inputs:?}");
+
+    let session = get_session(&session_id).ok_or(anyhow!("session {session_id} not initalized"))?;
+    println!("op_sb_ai_run_ort_session: loaded {session_id} -> {session:?}");
+
+    // Prepare input values
+    let mut inputs = inputs
+        .iter()
+        .map(|(key, value)| {
+            // TODO: Proper conversion
+            let raw_tensor = match value {
+                Tensor::Int64(value) => {
+                    Value::from_array((value.dims.to_owned(), value.data.to_owned())).unwrap()
+                }
+                Tensor::Float32(_) => {
+                    panic!("invalid TensorFloat32")
+                }
+            };
+
+            (key, raw_tensor)
+        })
+        .collect::<HashMap<_, _>>();
+
+    // Create input session map
+    let input_values = session
+        .inputs
+        .iter()
+        .map(|input| {
+            (
+                Cow::from(&input.name),
+                SessionInputValue::from(inputs.remove(&input.name).unwrap()),
+            )
+        })
+        .collect::<Vec<_>>();
+
+    let outputs = session.run(input_values)?;
+    println!("op_sb_ai_run_ort_session: outputs {outputs:?}");
+
+    // Prepare outputs
+    let output_map = session
+        .outputs
+        .iter()
+        .map(|output| {
+            // TODO: Proper pattern matching
+            let ValueType::Tensor { ty, .. } = output.output_type else {
+                panic!("Invalid output_type");
+            };
+            let tensor = if let TensorElementType::Float32 = ty {
+                let (dims, data) = outputs
+                    .get(output.name.as_str())
+                    .unwrap()
+                    .try_extract_raw_tensor::<f32>()
+                    .unwrap();
+
+                Tensor::Float32(TensorFloat32 {
+                    data_type: "float32".into(),
+                    dims,
+                    data: data.to_vec(),
+                })
+            } else {
+                let (dims, data) = outputs
+                    .get(&output.name.as_str())
+                    .unwrap()
+                    .try_extract_raw_tensor::<i64>()
+                    .unwrap();
+
+                Tensor::Int64(TensorInt64 {
+                    data_type: "int64".into(),
+                    dims,
+                    data: data.to_vec(),
+                })
+            };
+
+            (output.name.to_owned(), tensor)
+        })
+        .collect();
+
+    Ok(output_map)
+}
diff --git a/crates/sb_ai/onnx.rs b/crates/sb_ai/onnxruntime/onnx.rs
similarity index 100%
rename from crates/sb_ai/onnx.rs
rename to crates/sb_ai/onnxruntime/onnx.rs
diff --git a/crates/sb_ai/session.rs b/crates/sb_ai/onnxruntime/session.rs
similarity index 81%
rename from crates/sb_ai/session.rs
rename to crates/sb_ai/onnxruntime/session.rs
index 219b6b382..5b372c8d9 100644
--- a/crates/sb_ai/session.rs
+++ b/crates/sb_ai/onnxruntime/session.rs
@@ -1,9 +1,11 @@
 use deno_core::error::AnyError;
 use once_cell::sync::Lazy;
 use std::collections::HashMap;
+use std::hash::Hasher;
 use std::sync::Mutex;
 use std::{path::PathBuf, sync::Arc};
 use tracing::{debug, instrument, trace};
+use xxhash_rust::xxh3::Xxh3;
 
 use anyhow::{anyhow, Error};
 use ort::{
@@ -107,6 +109,35 @@ pub(crate) fn load_session_from_file(
 }
 
 #[instrument(level = "debug", ret)]
+pub(crate) fn load_session_from_bytes(model_bytes: &[u8]) -> Result<(String, Arc<Session>), Error> {
+    let session_id = {
+        let mut model_bytes = model_bytes;
+        let mut hasher = Xxh3::new();
+        let _ = std::io::copy(&mut model_bytes, &mut hasher);
+
+        let hash = hasher.finish().to_be_bytes();
+        faster_hex::hex_string(&hash)
+    };
+
+    let mut sessions = SESSIONS.lock().unwrap();
+
+    if let Some(session) = sessions.get(&session_id) {
+        return Ok((session_id, session.clone()));
+    }
+
+    let session = create_session(model_bytes)?;
+
+    sessions.insert(session_id.to_owned(), session.clone());
+
+    Ok((session_id, session))
+}
+
+pub(crate) fn get_session(session_id: &String) -> Option<Arc<Session>> {
+    let sessions = SESSIONS.lock().unwrap();
+
+    sessions.get(session_id).map(|session| session.clone())
+}
+
 pub fn cleanup() -> Result<usize, AnyError> {
     let mut remove_counter = 0;
     {
diff --git a/examples/generate-embeddings/index.ts b/examples/generate-embeddings/index.ts
index 3b43f47ac..cd5bdebb5 100644
--- a/examples/generate-embeddings/index.ts
+++ b/examples/generate-embeddings/index.ts
@@ -7,17 +7,17 @@ env.allowLocalModels = false;
 const pipe = await pipeline('feature-extraction', 'Supabase/gte-small');
 
 Deno.serve(async (req) => {
-	const params = new URL(req.url).searchParams;
-	const input = params.get('text');
+  const params = new URL(req.url).searchParams;
+  const input = params.get('text');
 
-	const output = await pipe(input, {
-		pooling: 'mean',
-		normalize: true,
-	});
+  const output = await pipe(input, {
+    pooling: 'mean',
+    normalize: true,
+  });
 
-	const embedding = Array.from(output.data);
+  const embedding = Array.from(output.data);
 
-	return new Response(JSON.stringify(embedding), {
-		headers: { 'Content-Type': 'application/json' },
-	});
+  return new Response(JSON.stringify(embedding), {
+    headers: { 'Content-Type': 'application/json' },
+  });
 });
diff --git a/examples/ort-rust-backend/index.ts b/examples/ort-rust-backend/index.ts
new file mode 100644
index 000000000..126deb259
--- /dev/null
+++ b/examples/ort-rust-backend/index.ts
@@ -0,0 +1,28 @@
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.21/dist/transformers.min.js';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('feature-extraction', 'supabase/gte-small', { device: 'auto' });
+
+Deno.serve(async (req: Request) => {
+  const { input } = await req.json();
+
+  const output = await pipe(input, { pooling: 'mean', normalize: true });
+
+  return new Response(
+    JSON.stringify(
+      output,
+    ),
+    {
+      headers: {
+        'Content-Type': 'application/json',
+        'Connection': 'keep-alive',
+      },
+    },
+  );
+});

From 2f2632089e23a0c8a754990f9372d59a642160d6 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Tue, 24 Sep 2024 10:55:19 +0100
Subject: [PATCH 02/21] stamp: refactoring tensors ser/de to try zero-copy

- Implementing v8 traits to cast tensors `ToV8` and `FromV8`.
- Resolving `ort::Tensor` type based on Js tensor's.
---
 crates/sb_ai/js/onnxruntime/onnx.js       |  59 +--
 crates/sb_ai/onnxruntime/mod.rs           | 167 ++------
 crates/sb_ai/onnxruntime/model_session.rs |  74 ++++
 crates/sb_ai/onnxruntime/tensor.rs        | 485 ++++++++++++++++++++++
 4 files changed, 619 insertions(+), 166 deletions(-)
 create mode 100644 crates/sb_ai/onnxruntime/model_session.rs
 create mode 100644 crates/sb_ai/onnxruntime/tensor.rs

diff --git a/crates/sb_ai/js/onnxruntime/onnx.js b/crates/sb_ai/js/onnxruntime/onnx.js
index aafeaaf08..c0b91a7fc 100644
--- a/crates/sb_ai/js/onnxruntime/onnx.js
+++ b/crates/sb_ai/js/onnxruntime/onnx.js
@@ -1,29 +1,38 @@
 const core = globalThis.Deno.core;
 
-// Workaround to serialize
-BigInt64Array.prototype.toJSON = function () {
-  return [...this].map(Number);
-};
-
 class Tensor {
-  /** @type {number[]} Dimensions of the tensor. */
-  dims;
-
   /** @type {DataType} Type of the tensor. */
   type;
 
   /** @type {DataArray} The data stored in the tensor. */
   data;
 
+  /** @type {number[]} Dimensions of the tensor. */
+  dims;
+
   /** @type {number} The number of elements in the tensor. */
-  size;
+  size = 0;
 
   constructor(type, data, dims) {
     this.type = type;
     this.data = data;
     this.dims = dims;
+  }
 
-    // console.log('onnx.js Tensor:', this);
+  static isTensorLike(object) {
+    return (
+      Object.hasOwn(object, 'type')
+      && Object.hasOwn(object, 'cpuData')
+      && Object.hasOwn(object, 'dims')
+    )
+  }
+
+  static toTuple(tensorLike) {
+    if (!this.isTensorLike(tensorLike)) {
+      throw Error('The given object is not a valid Tensor like.');
+    }
+
+    return [tensorLike.type, tensorLike.cpuData, tensorLike.dims]
   }
 }
 
@@ -39,29 +48,27 @@ class InferenceSession {
   }
 
   static async fromBuffer(modelBuffer) {
-    const {id, inputs, outputs} = await core.ops.op_sb_ai_ort_init_session(modelBuffer);
-
-    console.log('onnx.js fromBuffer:', {id, inputs, outputs});
+    const [id, inputs, outputs] = await core.ops.op_sb_ai_ort_init_session(modelBuffer);
 
     return new InferenceSession(id, inputs, outputs);
   }
 
   async run(inputs) {
-    console.log('onnx.js run: [inputs]', inputs);
-
-    const outputs = await core.ops.op_sb_ai_ort_run_session(this.sessionId, JSON.parse(JSON.stringify(inputs)));
-
-    // Parse to Tensor
-    for(const key in outputs) {
-      if(Object.hasOwn(outputs, key)) {
-        const {type, cpuData, dims} = outputs[key];
-        outputs[key] = new Tensor(type, cpuData, dims);
-      }
+    // We pass values as tuples to avoid string allocation
+    // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
+    const tupledTensors = Object.values(inputs).map(tensor => Tensor.toTuple(tensor));
+    const outputTuples = await core.ops.op_sb_ai_ort_run_session(this.sessionId, tupledTensors);
+
+    // Since we got outputs as tuples we need to re-map it to an object
+    const result = {};
+    for (let idx = 0; idx < this.outputNames.length; idx++) {
+      const key = this.outputNames[idx];
+      const [type, data, dims] = outputTuples[idx];
+
+      result[key] = new Tensor(type, data, dims);
     }
 
-    console.log('onnx.js run: [outputs]', outputs);
-
-    return outputs;
+    return result;
   }
 }
 
diff --git a/crates/sb_ai/onnxruntime/mod.rs b/crates/sb_ai/onnxruntime/mod.rs
index 3995b7ed7..3450ff766 100644
--- a/crates/sb_ai/onnxruntime/mod.rs
+++ b/crates/sb_ai/onnxruntime/mod.rs
@@ -1,160 +1,47 @@
+mod model_session;
 pub(crate) mod onnx;
 pub(crate) mod session;
+mod tensor;
 
-use core::panic;
-use std::{borrow::Cow, collections::HashMap};
-
-use anyhow::{anyhow, Result};
+use anyhow::Result;
 use deno_core::op2;
-use ort::{SessionInputValue, TensorElementType, Value, ValueType};
-use serde::{Deserialize, Serialize};
-
-use onnx::ensure_onnx_env_init;
-use session::{get_session, load_session_from_bytes};
 
-#[derive(Serialize, Debug, Clone)]
-#[serde(rename_all = "camelCase")]
-struct SessionInfo {
-    id: String,
-    inputs: Vec<String>,
-    outputs: Vec<String>,
-}
-
-// TODO: Better tensor convertion
-#[derive(Serialize, Deserialize, Debug, Clone)]
-#[serde(rename_all = "camelCase")]
-struct TensorInt64 {
-    #[serde(rename = "type")]
-    data_type: String,
-    dims: Vec<i64>,
-    #[serde(rename = "cpuData")]
-    data: Vec<i64>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-#[serde(rename_all = "camelCase")]
-struct TensorFloat32 {
-    #[serde(rename = "type")]
-    data_type: String,
-    dims: Vec<i64>,
-    #[serde(rename = "cpuData")]
-    data: Vec<f32>,
-}
-#[derive(Serialize, Deserialize, Debug, Clone)]
-#[serde(untagged)]
-enum Tensor {
-    Int64(TensorInt64),
-    Float32(TensorFloat32),
-}
+use model_session::{ModelInfo, ModelSession};
+use tensor::{JsDynTensorValue, JsSessionInputs, JsSessionOutputs};
 
 #[op2]
-#[serde]
-pub fn op_sb_ai_ort_init_session(#[buffer] model_bytes: &[u8]) -> Result<SessionInfo> {
-    println!("Hello from ORT");
-
-    println!(
-        "op_sb_ai_init_ort_session: received {} bytes.",
-        model_bytes.len()
-    );
+#[to_v8]
+pub fn op_sb_ai_ort_init_session(#[buffer] model_bytes: &[u8]) -> Result<ModelInfo> {
+    let model_info = ModelSession::from_bytes(model_bytes)?;
 
-    if let Some(err) = ensure_onnx_env_init() {
-        return Err(anyhow!("failed to create onnx environment: {err}"));
-    }
-
-    let (session_id, session) = load_session_from_bytes(model_bytes)?;
-
-    let session_info = SessionInfo {
-        id: session_id,
-        inputs: session.inputs.iter().map(|i| i.name.to_owned()).collect(),
-        outputs: session.outputs.iter().map(|o| o.name.to_owned()).collect(),
-    };
-
-    Ok(session_info)
+    Ok(model_info.info())
 }
 
 #[op2]
-#[serde]
+#[to_v8]
 pub fn op_sb_ai_ort_run_session(
-    #[string] session_id: String,
-    #[serde] inputs: HashMap<String, Tensor>,
-) -> Result<HashMap<String, Tensor>> {
-    // TODO: take inputs a hash map and pass then to session.
-    println!("op_sb_ai_run_ort_session: got {inputs:?}");
+    #[string] model_id: String,
+    #[from_v8] input_values: JsSessionInputs,
+) -> Result<JsSessionOutputs> {
+    let model = ModelSession::from_id(model_id).unwrap();
+    let model_session = model.inner();
 
-    let session = get_session(&session_id).ok_or(anyhow!("session {session_id} not initalized"))?;
-    println!("op_sb_ai_run_ort_session: loaded {session_id} -> {session:?}");
+    let input_values = input_values.to_ort_session_inputs(model.info().input_names.into_iter())?;
 
-    // Prepare input values
-    let mut inputs = inputs
-        .iter()
-        .map(|(key, value)| {
-            // TODO: Proper conversion
-            let raw_tensor = match value {
-                Tensor::Int64(value) => {
-                    Value::from_array((value.dims.to_owned(), value.data.to_owned())).unwrap()
-                }
-                Tensor::Float32(_) => {
-                    panic!("invalid TensorFloat32")
-                }
-            };
+    let mut outputs = model_session.run(input_values)?;
+    let mut output_values = vec![];
 
-            (key, raw_tensor)
-        })
-        .collect::<HashMap<_, _>>();
+    // We need to `pop` over outputs to get it ownership, since keys are attached to 'model_session' lifetime
+    // and since we are sending tuples to JS, we don't need the model's output keys.
+    for _ in 0..outputs.len() {
+        let (_, ort_value) = outputs.pop_first().unwrap();
 
-    // Create input session map
-    let input_values = session
-        .inputs
-        .iter()
-        .map(|input| {
-            (
-                Cow::from(&input.name),
-                SessionInputValue::from(inputs.remove(&input.name).unwrap()),
-            )
-        })
-        .collect::<Vec<_>>();
+        let js_value = JsDynTensorValue(ort_value);
 
-    let outputs = session.run(input_values)?;
-    println!("op_sb_ai_run_ort_session: outputs {outputs:?}");
-
-    // Prepare outputs
-    let output_map = session
-        .outputs
-        .iter()
-        .map(|output| {
-            // TODO: Proper pattern matching
-            let ValueType::Tensor { ty, .. } = output.output_type else {
-                panic!("Invalid output_type");
-            };
-            let tensor = if let TensorElementType::Float32 = ty {
-                let (dims, data) = outputs
-                    .get(output.name.as_str())
-                    .unwrap()
-                    .try_extract_raw_tensor::<f32>()
-                    .unwrap();
-
-                Tensor::Float32(TensorFloat32 {
-                    data_type: "float32".into(),
-                    dims,
-                    data: data.to_vec(),
-                })
-            } else {
-                let (dims, data) = outputs
-                    .get(&output.name.as_str())
-                    .unwrap()
-                    .try_extract_raw_tensor::<i64>()
-                    .unwrap();
-
-                Tensor::Int64(TensorInt64 {
-                    data_type: "int64".into(),
-                    dims,
-                    data: data.to_vec(),
-                })
-            };
+        output_values.push(js_value);
+    }
 
-            (output.name.to_owned(), tensor)
-        })
-        .collect();
+    let outputs = JsSessionOutputs(output_values);
 
-    Ok(output_map)
+    Ok(outputs)
 }
diff --git a/crates/sb_ai/onnxruntime/model_session.rs b/crates/sb_ai/onnxruntime/model_session.rs
new file mode 100644
index 000000000..695b40a76
--- /dev/null
+++ b/crates/sb_ai/onnxruntime/model_session.rs
@@ -0,0 +1,74 @@
+use std::sync::Arc;
+
+use anyhow::Result;
+use deno_core::{serde_v8::to_v8, ToV8};
+use ort::Session;
+
+use super::session::{get_session, load_session_from_bytes};
+
+#[derive(Debug, Clone)]
+pub struct ModelInfo {
+    pub id: String,
+    pub input_names: Vec<String>,
+    pub output_names: Vec<String>,
+}
+
+#[derive(Debug)]
+pub struct ModelSession {
+    info: ModelInfo,
+    inner: Arc<Session>,
+}
+
+impl ModelSession {
+    fn new(id: String, session: Arc<Session>) -> Self {
+        let input_names = session
+            .inputs
+            .iter()
+            .map(|input| input.name.to_owned())
+            .collect::<Vec<_>>();
+
+        let output_names = session
+            .outputs
+            .iter()
+            .map(|output| output.name.to_owned())
+            .collect::<Vec<_>>();
+
+        Self {
+            info: ModelInfo {
+                id,
+                input_names,
+                output_names,
+            },
+            inner: session,
+        }
+    }
+
+    pub fn info(&self) -> ModelInfo {
+        self.info.to_owned()
+    }
+
+    pub fn inner(&self) -> Arc<Session> {
+        self.inner.clone()
+    }
+
+    pub fn from_id(id: String) -> Option<Self> {
+        get_session(&id).map(|session| Self::new(id, session))
+    }
+
+    pub fn from_bytes(model_bytes: &[u8]) -> Result<Self> {
+        load_session_from_bytes(model_bytes).map(|(id, session)| Self::new(id, session))
+    }
+}
+
+impl<'a> ToV8<'a> for ModelInfo {
+    type Error = std::convert::Infallible;
+
+    fn to_v8(
+        self,
+        scope: &mut deno_core::v8::HandleScope<'a>,
+    ) -> std::result::Result<deno_core::v8::Local<'a, deno_core::v8::Value>, Self::Error> {
+        let v8_values = to_v8(scope, (self.id, self.input_names, self.output_names));
+
+        Ok(v8_values.unwrap())
+    }
+}
diff --git a/crates/sb_ai/onnxruntime/tensor.rs b/crates/sb_ai/onnxruntime/tensor.rs
new file mode 100644
index 000000000..60b88d07f
--- /dev/null
+++ b/crates/sb_ai/onnxruntime/tensor.rs
@@ -0,0 +1,485 @@
+use core::slice;
+use std::{
+    borrow::Cow,
+    ffi::c_void,
+    fmt::Debug,
+    marker::PhantomData,
+    mem::{size_of, MaybeUninit},
+    ops::{Deref, DerefMut},
+    rc::Rc,
+    str::Bytes,
+    sync::Arc,
+};
+
+use anyhow::anyhow;
+use deno_core::{
+    error::{AnyError, StdAnyError},
+    parking_lot::Mutex,
+    serde_v8::{from_v8, to_v8},
+    v8::{self},
+    FromV8, ToV8,
+};
+use ort::{
+    AllocationDevice, AllocatorType, DynTensor, DynTensorRefMut, DynValue, DynValueTypeMarker,
+    IntoTensorElementType, MemoryInfo, MemoryType, SessionInputValue, SessionInputs,
+    SessionOutputs, Tensor as OrtTensor, TensorElementType, TensorRefMut, ValueRefMut, ValueType,
+};
+use tracing_subscriber::filter::targets;
+
+// We expect that a given JS tensor type string be less than 1 byte
+// since the largest string is 'floatNN': 7
+const EXPECTED_TENSOR_TYPE_STR_LENGHT: usize = 8;
+
+// We zero-copy an ORT Tensor to a JS ArrayBuffer like:
+// https://github.com/denoland/deno_core/blob/7258aa325368a8e2c1271a25c1b4d537ed41e9c5/core/runtime/ops_rust_to_v8.rs#L370
+// We could try `Tensor::try_extract_raw_tensor_mut<T>` with `v8::ArrayBuffer::new_backing_store_from_bytes`
+// but it only allows [u8] instead of [T], so we need to get into `unsafe` path.
+macro_rules! v8_typed_array_from {
+    ($array_type:ident; tensor::<$type:ident>($tensor:expr), $scope:expr) => {{
+        let (_, raw_tensor) = $tensor
+            .try_extract_raw_tensor_mut::<$type>()
+            .map_err(AnyError::from)?;
+
+        let tensor_ptr = raw_tensor.as_ptr();
+        let tensor_len = raw_tensor.len();
+        let tensor_rc = Rc::into_raw(Rc::new(raw_tensor)) as *const c_void;
+
+        let buffer_len = tensor_len * size_of::<$type>();
+
+        extern "C" fn drop_tensor(_ptr: *mut c_void, _len: usize, data: *mut c_void) {
+            // SAFETY: We know that data is a raw Rc from above
+            unsafe { drop(Rc::from_raw(data.cast::<$type>())) }
+        }
+
+        // Zero-Copying using ptr
+        let buf_store = unsafe {
+            v8::ArrayBuffer::new_backing_store_from_ptr(
+                tensor_ptr as _,
+                buffer_len,
+                drop_tensor,
+                tensor_rc as _,
+            )
+        }
+        .make_shared();
+
+        let buffer = v8::ArrayBuffer::with_backing_store($scope, &buf_store);
+
+        v8::$array_type::new($scope, buffer, 0, tensor_len)
+            .ok_or(anyhow!("Could not create '$array_type' from tensor."))?
+    }};
+}
+
+#[derive(Debug)]
+pub struct JsTensorElementType(TensorElementType);
+
+impl Deref for JsTensorElementType {
+    type Target = TensorElementType;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl TryFrom<&str> for JsTensorElementType {
+    type Error = AnyError;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        let tensor_type = match value {
+            "float32" => TensorElementType::Float32,
+            "float64" => TensorElementType::Float64,
+            "string" => TensorElementType::String,
+            "int8" => TensorElementType::Int8,
+            "uint8" => TensorElementType::Uint8,
+            "int16" => TensorElementType::Int16,
+            "uint16" => TensorElementType::Uint16,
+            "int32" => TensorElementType::Int32,
+            "uint32" => TensorElementType::Uint32,
+            "int64" => TensorElementType::Int64,
+            "uint64" => TensorElementType::Uint64,
+            "bool" => TensorElementType::Bool,
+            _ => return Err(anyhow!("value '{value}' is not a valid tensor type.")),
+        };
+
+        Ok(Self(tensor_type))
+    }
+}
+
+impl TryFrom<JsTensorElementType> for &str {
+    type Error = AnyError;
+
+    fn try_from(value: JsTensorElementType) -> Result<Self, Self::Error> {
+        let js_tensor_type = match value.0 {
+            TensorElementType::Float32 => "float32",
+            TensorElementType::Float64 => "float64",
+            TensorElementType::String => "string",
+            TensorElementType::Int8 => "int8",
+            TensorElementType::Uint8 => "uint8",
+            TensorElementType::Int16 => "int16",
+            TensorElementType::Uint16 => "uint16",
+            TensorElementType::Int32 => "int32",
+            TensorElementType::Uint32 => "uint32",
+            TensorElementType::Int64 => "int64",
+            TensorElementType::Uint64 => "uint64",
+            TensorElementType::Bool => "bool",
+            _ => return Err(anyhow!("value '{value:?}' is not a valid tensor type.")),
+        };
+
+        Ok(js_tensor_type)
+    }
+}
+
+impl<'a> FromV8<'a> for JsTensorElementType {
+    type Error = StdAnyError;
+
+    fn from_v8(
+        scope: &mut v8::HandleScope<'a>,
+        value: v8::Local<'a, v8::Value>,
+    ) -> Result<Self, Self::Error> {
+        let value = v8::Local::<v8::String>::try_from(value).map_err(AnyError::from)?;
+
+        // Here we try to zero-copy the given Js string.
+        let mut buffer = [MaybeUninit::<u8>::uninit(); EXPECTED_TENSOR_TYPE_STR_LENGHT];
+        let value_str = value.to_rust_cow_lossy(scope, &mut buffer);
+
+        let tensor_type = match value_str {
+            std::borrow::Cow::Borrowed(value_str) => JsTensorElementType::try_from(value_str)?,
+            std::borrow::Cow::Owned(value_str) => {
+                JsTensorElementType::try_from(value_str.as_str())?
+            }
+        };
+
+        Ok(tensor_type)
+    }
+}
+
+impl<'a> ToV8<'a> for JsTensorElementType {
+    type Error = StdAnyError;
+
+    fn to_v8(
+        self,
+        scope: &mut v8::HandleScope<'a>,
+    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
+        let tensor_type_str = self.try_into()?;
+        let js_tensor_type = v8::String::new(scope, tensor_type_str)
+            .ok_or(anyhow!("could not parse '{tensor_type_str}' to V8 String",))?;
+
+        Ok(js_tensor_type.into())
+    }
+}
+
+#[derive(Debug)]
+pub struct JsDynTensorValue(pub DynValue);
+
+impl Deref for JsDynTensorValue {
+    type Target = DynValue;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for JsDynTensorValue {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl<'a> ToV8<'a> for JsDynTensorValue {
+    type Error = StdAnyError;
+
+    fn to_v8(
+        mut self,
+        scope: &mut v8::HandleScope<'a>,
+    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
+        let ort_type = self.dtype().map_err(AnyError::from)?;
+
+        let ValueType::Tensor { ty, dimensions } = ort_type else {
+            return Err(anyhow!(
+                "JS only support 'ort::Value' of 'Tensor' type, got '{ort_type:?}'."
+            )
+            .into());
+        };
+
+        let tensor_type = JsTensorElementType(ty);
+
+        // Zero-Copying Data from ORT to a JS ArrayBuffer based on type
+        let tensor_buffer = match *tensor_type {
+            TensorElementType::Float32 => {
+                v8_typed_array_from!(Float32Array; tensor::<f32>(self), scope).into()
+            }
+            TensorElementType::Float64 => {
+                v8_typed_array_from!(Float64Array; tensor::<f64>(self), scope).into()
+            }
+            TensorElementType::String => {
+                // TODO: Handle string[] tensors from 'v8::Array'
+                return Err(anyhow!("Can't zero-copy tensor from it: 'String' does not implement the 'IntoTensorElementType' trait.").into());
+            }
+            TensorElementType::Int8 => {
+                v8_typed_array_from!(Int8Array; tensor::<i8>(self), scope).into()
+            }
+            TensorElementType::Uint8 => {
+                v8_typed_array_from!(Uint8Array; tensor::<i8>(self), scope).into()
+            }
+            TensorElementType::Int16 => {
+                v8_typed_array_from!(Int16Array; tensor::<i16>(self), scope).into()
+            }
+            TensorElementType::Uint16 => {
+                v8_typed_array_from!(Uint16Array; tensor::<u16>(self), scope).into()
+            }
+            TensorElementType::Int32 => {
+                v8_typed_array_from!(Int32Array; tensor::<i32>(self), scope).into()
+            }
+            TensorElementType::Uint32 => {
+                v8_typed_array_from!(Uint32Array; tensor::<u32>(self), scope).into()
+            }
+            TensorElementType::Int64 => {
+                v8_typed_array_from!(BigInt64Array; tensor::<i64>(self), scope).into()
+            }
+            TensorElementType::Uint64 => {
+                v8_typed_array_from!(BigUint64Array; tensor::<u64>(self), scope).into()
+            }
+            TensorElementType::Bool => {
+                v8_typed_array_from!(Uint8Array; tensor::<bool>(self), scope).into()
+            }
+            TensorElementType::Float16 => {
+                return Err(anyhow!("'half::f16' is not supported by JS tensor.").into())
+            }
+            TensorElementType::Bfloat16 => {
+                return Err(anyhow!("'half::bf16' is not supported by JS tensor.").into())
+            }
+        };
+
+        let tensor_type = tensor_type.to_v8(scope).map_err(AnyError::from)?;
+        let tensor_dims = to_v8(scope, dimensions).map_err(AnyError::from)?;
+
+        // We pass tensor as tuples to avoid string allocation
+        // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
+        let tensor =
+            v8::Array::new_with_elements(scope, &[tensor_type, tensor_buffer, tensor_dims]);
+
+        Ok(tensor.into())
+    }
+}
+
+#[derive(Debug)]
+pub struct JsTensorBufferView<'v> {
+    data_type: JsTensorElementType,
+    data_ptr: *mut c_void,
+    shape: Vec<i64>,
+    lifetime: PhantomData<&'v ()>,
+}
+
+impl<'v> JsTensorBufferView<'v> {
+    pub fn to_ort_tensor_ref(&self) -> anyhow::Result<ValueRefMut<'v, DynValueTypeMarker>> {
+        // Same impl. as the Tensor::from_array()
+        // https://github.com/pykeio/ort/blob/abd527b6a1df8f566c729a9c4398bdfd185d652f/src/value/impl_tensor/create.rs#L170
+        let memory_info = MemoryInfo::new(
+            AllocationDevice::CPU,
+            0,
+            AllocatorType::Arena,
+            MemoryType::CPUInput,
+        )?;
+
+        // TODO: Try to zero-copy shape also
+        let shape = self.shape.to_owned();
+
+        // Zero-Copying Data to an ORT Tensor based on JS type
+        let tensor = match *self.data_type {
+            TensorElementType::Float32 => unsafe {
+                TensorRefMut::<f32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Float64 => unsafe {
+                TensorRefMut::<f64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::String => {
+                // TODO: Handle string[] tensors from 'v8::Array'
+                return Err(anyhow!("Can't zero-copy tensor from it: 'String' does not implement the 'IntoTensorElementType' trait."));
+            }
+            TensorElementType::Int8 => unsafe {
+                TensorRefMut::<i8>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Uint8 => unsafe {
+                TensorRefMut::<u8>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Int16 => unsafe {
+                TensorRefMut::<i16>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Uint16 => unsafe {
+                TensorRefMut::<u16>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Int32 => unsafe {
+                TensorRefMut::<i32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Uint32 => unsafe {
+                TensorRefMut::<u32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Int64 => unsafe {
+                TensorRefMut::<i64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Uint64 => unsafe {
+                TensorRefMut::<u64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Bool => unsafe {
+                TensorRefMut::<bool>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
+            },
+            TensorElementType::Float16 => {
+                return Err(anyhow!("'half::f16' is not supported by JS tensor."))
+            }
+            TensorElementType::Bfloat16 => {
+                return Err(anyhow!("'half::bf16' is not supported by JS tensor."))
+            }
+        };
+
+        Ok(tensor.into_dyn())
+    }
+}
+
+impl<'a> FromV8<'a> for JsTensorBufferView<'a> {
+    type Error = StdAnyError;
+
+    fn from_v8(
+        scope: &mut deno_core::v8::HandleScope<'a>,
+        value: deno_core::v8::Local<'a, deno_core::v8::Value>,
+    ) -> Result<Self, Self::Error> {
+        let value = v8::Local::<v8::Array>::try_from(value).map_err(AnyError::from)?;
+
+        if value.length() != 3 {
+            return Err(anyhow!(
+                "expected a JS tuple of 3 elements, found {}",
+                value.length()
+            )
+            .into());
+        }
+        let data_type = v8::Local::<v8::String>::try_from(
+            value
+                .get_index(scope, 0)
+                .ok_or(anyhow!("tensor type was expected at tuple.0"))?,
+        )
+        .map_err(AnyError::from)?;
+
+        let data_type = JsTensorElementType::from_v8(scope, data_type.into())?;
+
+        // TODO: Handle 'v8::Array' for string[] tensors,
+        // since string[] tensors are not passed as 'v8::TypedArray'
+        let data_values = v8::Local::<v8::TypedArray>::try_from(
+            value
+                .get_index(scope, 1)
+                .ok_or(anyhow!("tensor data was expected at tuple.1"))?,
+        )
+        .map_err(AnyError::from)?;
+
+        let shape_dims = v8::Local::<v8::Array>::try_from(
+            value
+                .get_index(scope, 2)
+                .ok_or(anyhow!("tensor dims was expected at tuple.2"))?,
+        )
+        .map_err(AnyError::from)?;
+
+        // TODO: Pass dims as 'TypedArray' from Js
+        // Since dims are 'v8::Array' and not 'v8::TypedArray' we can't get the inner ptr to zero copy.
+        let shape_dims = from_v8::<Vec<_>>(scope, shape_dims.into()).map_err(AnyError::from)?;
+
+        Ok(JsTensorBufferView {
+            data_type,
+            data_ptr: data_values.data(),
+            shape: shape_dims,
+            lifetime: PhantomData::default(),
+        })
+    }
+}
+
+// Useful to receive the model inputs as JS sequence
+#[derive(Debug)]
+pub struct JsSessionInputs<'a>(Arc<Vec<JsTensorBufferView<'a>>>);
+
+impl<'a> JsSessionInputs<'a> {
+    pub fn to_ort_session_inputs(
+        self,
+        keys: impl Iterator<Item = String>,
+    ) -> Result<SessionInputs<'a, 'a>, AnyError> {
+        let mut session_inputs = vec![];
+
+        for (key, tensor_view) in keys.zip(self.iter()) {
+            let value = SessionInputValue::from(tensor_view.to_ort_tensor_ref()?);
+
+            session_inputs.push((Cow::from(key), value));
+        }
+
+        Ok(SessionInputs::ValueMap(session_inputs))
+    }
+}
+
+impl<'a> Deref for JsSessionInputs<'a> {
+    type Target = Vec<JsTensorBufferView<'a>>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<'a> FromV8<'a> for JsSessionInputs<'a> {
+    type Error = StdAnyError;
+
+    fn from_v8(
+        scope: &mut deno_core::v8::HandleScope<'a>,
+        value: deno_core::v8::Local<'a, deno_core::v8::Value>,
+    ) -> Result<Self, Self::Error> {
+        let mut sequence = vec![];
+        let value = v8::Local::<v8::Array>::try_from(value).map_err(AnyError::from)?;
+
+        for idx in 0..value.length() {
+            let value = value
+                .get_index(scope, idx)
+                .ok_or(anyhow!("could no get value for index {idx}"))?;
+
+            let tensor = JsTensorBufferView::from_v8(scope, value)?;
+
+            sequence.push(tensor);
+        }
+
+        Ok(Self(Arc::new(sequence)))
+    }
+}
+
+#[derive(Debug)]
+pub struct JsSessionOutputs(pub Vec<JsDynTensorValue>);
+
+impl Deref for JsSessionOutputs {
+    type Target = Vec<JsDynTensorValue>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for JsSessionOutputs {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+// Useful to receive the model inputs as JS sequence
+impl<'a> ToV8<'a> for JsSessionOutputs {
+    type Error = StdAnyError;
+
+    fn to_v8(
+        mut self,
+        scope: &mut v8::HandleScope<'a>,
+    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
+        // We pass as tuples to avoid string allocation
+        // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
+
+        let mut elements = vec![];
+
+        for idx in 0..self.len() {
+            let value = self.remove(idx);
+            let js_value = value.to_v8(scope)?;
+
+            elements.push(js_value);
+        }
+
+        let output_tuple = v8::Array::new_with_elements(scope, &elements);
+
+        Ok(output_tuple.into())
+    }
+}

From bdaff49985dcdcdfb0c0343171639675528fa9e7 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Wed, 25 Sep 2024 18:30:52 +0100
Subject: [PATCH 03/21] stamp: refactoring to use `serde_v8`

- Since `serde_v8` allows zero-copy we use it to handle the model
inputs and send back the outputs
---
 crates/sb_ai/js/onnxruntime/onnx.js |  63 ++--
 crates/sb_ai/onnxruntime/mod.rs     |  33 +-
 crates/sb_ai/onnxruntime/tensor.rs  | 527 ++++++----------------------
 3 files changed, 168 insertions(+), 455 deletions(-)

diff --git a/crates/sb_ai/js/onnxruntime/onnx.js b/crates/sb_ai/js/onnxruntime/onnx.js
index c0b91a7fc..01eac887a 100644
--- a/crates/sb_ai/js/onnxruntime/onnx.js
+++ b/crates/sb_ai/js/onnxruntime/onnx.js
@@ -1,5 +1,20 @@
 const core = globalThis.Deno.core;
 
+const DataTypeMap = Object.freeze({
+    float32: Float32Array,
+    float64: Float64Array,
+    string: Array, // string[]
+    int8: Int8Array,
+    uint8: Uint8Array,
+    int16: Int16Array,
+    uint16: Uint16Array,
+    int32: Int32Array,
+    uint32: Uint32Array,
+    int64: BigInt64Array,
+    uint64: BigUint64Array,
+    bool: Uint8Array,
+});
+
 class Tensor {
   /** @type {DataType} Type of the tensor. */
   type;
@@ -14,25 +29,16 @@ class Tensor {
   size = 0;
 
   constructor(type, data, dims) {
-    this.type = type;
-    this.data = data;
-    this.dims = dims;
-  }
-
-  static isTensorLike(object) {
-    return (
-      Object.hasOwn(object, 'type')
-      && Object.hasOwn(object, 'cpuData')
-      && Object.hasOwn(object, 'dims')
-    )
-  }
-
-  static toTuple(tensorLike) {
-    if (!this.isTensorLike(tensorLike)) {
-      throw Error('The given object is not a valid Tensor like.');
+    if (!Object.hasOwn(DataTypeMap, type)) {
+      throw new Error(`Unsupported type: ${type}`);
     }
 
-    return [tensorLike.type, tensorLike.cpuData, tensorLike.dims]
+    const dataArray = new DataTypeMap[type](data);
+
+    this.type = type;
+    this.data = dataArray;
+    this.dims = dims;
+    this.size = dataArray.length
   }
 }
 
@@ -54,21 +60,18 @@ class InferenceSession {
   }
 
   async run(inputs) {
-    // We pass values as tuples to avoid string allocation
-    // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
-    const tupledTensors = Object.values(inputs).map(tensor => Tensor.toTuple(tensor));
-    const outputTuples = await core.ops.op_sb_ai_ort_run_session(this.sessionId, tupledTensors);
-
-    // Since we got outputs as tuples we need to re-map it to an object
-    const result = {};
-    for (let idx = 0; idx < this.outputNames.length; idx++) {
-      const key = this.outputNames[idx];
-      const [type, data, dims] = outputTuples[idx];
-
-      result[key] = new Tensor(type, data, dims);
+    const outputs = await core.ops.op_sb_ai_ort_run_session(this.sessionId, inputs);
+
+    // Parse to Tensor
+    for(const key in outputs) {
+      if(Object.hasOwn(outputs, key)) {
+        const {type, data, dims} = outputs[key];
+
+        outputs[key] = new Tensor(type, data.buffer, dims);
+      }
     }
 
-    return result;
+    return outputs;
   }
 }
 
diff --git a/crates/sb_ai/onnxruntime/mod.rs b/crates/sb_ai/onnxruntime/mod.rs
index 3450ff766..43adf9613 100644
--- a/crates/sb_ai/onnxruntime/mod.rs
+++ b/crates/sb_ai/onnxruntime/mod.rs
@@ -3,11 +3,13 @@ pub(crate) mod onnx;
 pub(crate) mod session;
 mod tensor;
 
+use std::{borrow::Cow, collections::HashMap};
+
 use anyhow::Result;
 use deno_core::op2;
 
 use model_session::{ModelInfo, ModelSession};
-use tensor::{JsDynTensorValue, JsSessionInputs, JsSessionOutputs};
+use tensor::{JsTensor, ToJsTensor};
 
 #[op2]
 #[to_v8]
@@ -18,30 +20,31 @@ pub fn op_sb_ai_ort_init_session(#[buffer] model_bytes: &[u8]) -> Result<ModelIn
 }
 
 #[op2]
-#[to_v8]
-pub fn op_sb_ai_ort_run_session(
+#[serde]
+pub fn op_sb_ai_ort_run_session<'a>(
     #[string] model_id: String,
-    #[from_v8] input_values: JsSessionInputs,
-) -> Result<JsSessionOutputs> {
+    #[serde] input_values: HashMap<String, JsTensor>,
+) -> Result<HashMap<String, ToJsTensor>> {
     let model = ModelSession::from_id(model_id).unwrap();
     let model_session = model.inner();
 
-    let input_values = input_values.to_ort_session_inputs(model.info().input_names.into_iter())?;
+    let input_values = input_values
+        .into_iter()
+        .map(|(key, value)| value.as_ort_input().map(|value| (Cow::from(key), value)))
+        .collect::<Result<Vec<_>>>()?;
 
     let mut outputs = model_session.run(input_values)?;
-    let mut output_values = vec![];
+    let mut output_values = HashMap::new();
 
-    // We need to `pop` over outputs to get it ownership, since keys are attached to 'model_session' lifetime
-    // and since we are sending tuples to JS, we don't need the model's output keys.
+    // We need to `pop` over outputs to get 'value' ownership, since keys are attached to 'model_session' lifetime
+    // it can't be iterated with `into_iter()`
     for _ in 0..outputs.len() {
-        let (_, ort_value) = outputs.pop_first().unwrap();
+        let (key, value) = outputs.pop_first().unwrap();
 
-        let js_value = JsDynTensorValue(ort_value);
+        let value = ToJsTensor::from_ort_tensor(value)?;
 
-        output_values.push(js_value);
+        output_values.insert(key.to_owned(), value);
     }
 
-    let outputs = JsSessionOutputs(output_values);
-
-    Ok(outputs)
+    Ok(output_values)
 }
diff --git a/crates/sb_ai/onnxruntime/tensor.rs b/crates/sb_ai/onnxruntime/tensor.rs
index 60b88d07f..0e702ef3d 100644
--- a/crates/sb_ai/onnxruntime/tensor.rs
+++ b/crates/sb_ai/onnxruntime/tensor.rs
@@ -1,41 +1,20 @@
-use core::slice;
-use std::{
-    borrow::Cow,
-    ffi::c_void,
-    fmt::Debug,
-    marker::PhantomData,
-    mem::{size_of, MaybeUninit},
-    ops::{Deref, DerefMut},
-    rc::Rc,
-    str::Bytes,
-    sync::Arc,
-};
+use std::{ffi::c_void, fmt::Debug, mem::size_of, rc::Rc};
 
 use anyhow::anyhow;
-use deno_core::{
-    error::{AnyError, StdAnyError},
-    parking_lot::Mutex,
-    serde_v8::{from_v8, to_v8},
-    v8::{self},
-    FromV8, ToV8,
-};
+use deno_core::{error::AnyError, v8, JsBuffer, ToJsBuffer};
 use ort::{
-    AllocationDevice, AllocatorType, DynTensor, DynTensorRefMut, DynValue, DynValueTypeMarker,
-    IntoTensorElementType, MemoryInfo, MemoryType, SessionInputValue, SessionInputs,
-    SessionOutputs, Tensor as OrtTensor, TensorElementType, TensorRefMut, ValueRefMut, ValueType,
+    AllocationDevice, AllocatorType, DynValue, DynValueTypeMarker, IntoTensorElementType,
+    MemoryInfo, MemoryType, SessionInputValue, TensorElementType, TensorRefMut, ValueRefMut,
+    ValueType,
 };
-use tracing_subscriber::filter::targets;
-
-// We expect that a given JS tensor type string be less than 1 byte
-// since the largest string is 'floatNN': 7
-const EXPECTED_TENSOR_TYPE_STR_LENGHT: usize = 8;
+use serde::{Deserialize, Serialize};
 
 // We zero-copy an ORT Tensor to a JS ArrayBuffer like:
 // https://github.com/denoland/deno_core/blob/7258aa325368a8e2c1271a25c1b4d537ed41e9c5/core/runtime/ops_rust_to_v8.rs#L370
 // We could try `Tensor::try_extract_raw_tensor_mut<T>` with `v8::ArrayBuffer::new_backing_store_from_bytes`
 // but it only allows [u8] instead of [T], so we need to get into `unsafe` path.
-macro_rules! v8_typed_array_from {
-    ($array_type:ident; tensor::<$type:ident>($tensor:expr), $scope:expr) => {{
+macro_rules! v8_slice_from {
+    (tensor::<$type:ident>($tensor:expr)) => {{
         let (_, raw_tensor) = $tensor
             .try_extract_raw_tensor_mut::<$type>()
             .map_err(AnyError::from)?;
@@ -62,215 +41,63 @@ macro_rules! v8_typed_array_from {
         }
         .make_shared();
 
-        let buffer = v8::ArrayBuffer::with_backing_store($scope, &buf_store);
+        let buffer_slice =
+            unsafe { deno_core::serde_v8::V8Slice::<u8>::from_parts(buf_store, 0..buffer_len) };
 
-        v8::$array_type::new($scope, buffer, 0, tensor_len)
-            .ok_or(anyhow!("Could not create '$array_type' from tensor."))?
+        buffer_slice
     }};
 }
 
-#[derive(Debug)]
-pub struct JsTensorElementType(TensorElementType);
-
-impl Deref for JsTensorElementType {
-    type Target = TensorElementType;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl TryFrom<&str> for JsTensorElementType {
-    type Error = AnyError;
-
-    fn try_from(value: &str) -> Result<Self, Self::Error> {
-        let tensor_type = match value {
-            "float32" => TensorElementType::Float32,
-            "float64" => TensorElementType::Float64,
-            "string" => TensorElementType::String,
-            "int8" => TensorElementType::Int8,
-            "uint8" => TensorElementType::Uint8,
-            "int16" => TensorElementType::Int16,
-            "uint16" => TensorElementType::Uint16,
-            "int32" => TensorElementType::Int32,
-            "uint32" => TensorElementType::Uint32,
-            "int64" => TensorElementType::Int64,
-            "uint64" => TensorElementType::Uint64,
-            "bool" => TensorElementType::Bool,
-            _ => return Err(anyhow!("value '{value}' is not a valid tensor type.")),
-        };
-
-        Ok(Self(tensor_type))
-    }
-}
-
-impl TryFrom<JsTensorElementType> for &str {
-    type Error = AnyError;
-
-    fn try_from(value: JsTensorElementType) -> Result<Self, Self::Error> {
-        let js_tensor_type = match value.0 {
-            TensorElementType::Float32 => "float32",
-            TensorElementType::Float64 => "float64",
-            TensorElementType::String => "string",
-            TensorElementType::Int8 => "int8",
-            TensorElementType::Uint8 => "uint8",
-            TensorElementType::Int16 => "int16",
-            TensorElementType::Uint16 => "uint16",
-            TensorElementType::Int32 => "int32",
-            TensorElementType::Uint32 => "uint32",
-            TensorElementType::Int64 => "int64",
-            TensorElementType::Uint64 => "uint64",
-            TensorElementType::Bool => "bool",
-            _ => return Err(anyhow!("value '{value:?}' is not a valid tensor type.")),
-        };
-
-        Ok(js_tensor_type)
-    }
-}
-
-impl<'a> FromV8<'a> for JsTensorElementType {
-    type Error = StdAnyError;
-
-    fn from_v8(
-        scope: &mut v8::HandleScope<'a>,
-        value: v8::Local<'a, v8::Value>,
-    ) -> Result<Self, Self::Error> {
-        let value = v8::Local::<v8::String>::try_from(value).map_err(AnyError::from)?;
-
-        // Here we try to zero-copy the given Js string.
-        let mut buffer = [MaybeUninit::<u8>::uninit(); EXPECTED_TENSOR_TYPE_STR_LENGHT];
-        let value_str = value.to_rust_cow_lossy(scope, &mut buffer);
-
-        let tensor_type = match value_str {
-            std::borrow::Cow::Borrowed(value_str) => JsTensorElementType::try_from(value_str)?,
-            std::borrow::Cow::Owned(value_str) => {
-                JsTensorElementType::try_from(value_str.as_str())?
-            }
-        };
-
-        Ok(tensor_type)
-    }
-}
-
-impl<'a> ToV8<'a> for JsTensorElementType {
-    type Error = StdAnyError;
-
-    fn to_v8(
-        self,
-        scope: &mut v8::HandleScope<'a>,
-    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
-        let tensor_type_str = self.try_into()?;
-        let js_tensor_type = v8::String::new(scope, tensor_type_str)
-            .ok_or(anyhow!("could not parse '{tensor_type_str}' to V8 String",))?;
-
-        Ok(js_tensor_type.into())
-    }
-}
-
-#[derive(Debug)]
-pub struct JsDynTensorValue(pub DynValue);
-
-impl Deref for JsDynTensorValue {
-    type Target = DynValue;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for JsDynTensorValue {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-impl<'a> ToV8<'a> for JsDynTensorValue {
-    type Error = StdAnyError;
-
-    fn to_v8(
+// Type alias for TensorElementType
+// https://serde.rs/remote-derive.html
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(remote = "TensorElementType", rename_all = "lowercase")]
+pub enum JsTensorType {
+    /// 32-bit floating point number, equivalent to Rust's `f32`.
+    Float32,
+    /// Unsigned 8-bit integer, equivalent to Rust's `u8`.
+    Uint8,
+    /// Signed 8-bit integer, equivalent to Rust's `i8`.
+    Int8,
+    /// Unsigned 16-bit integer, equivalent to Rust's `u16`.
+    Uint16,
+    /// Signed 16-bit integer, equivalent to Rust's `i16`.
+    Int16,
+    /// Signed 32-bit integer, equivalent to Rust's `i32`.
+    Int32,
+    /// Signed 64-bit integer, equivalent to Rust's `i64`.
+    Int64,
+    /// String, equivalent to Rust's `String`.
+    String,
+    /// Boolean, equivalent to Rust's `bool`.
+    Bool,
+    /// 16-bit floating point number, equivalent to [`half::f16`] (requires the `half` feature).
+    Float16,
+    /// 64-bit floating point number, equivalent to Rust's `f64`. Also known as `double`.
+    Float64,
+    /// Unsigned 32-bit integer, equivalent to Rust's `u32`.
+    Uint32,
+    /// Unsigned 64-bit integer, equivalent to Rust's `u64`.
+    Uint64,
+    /// Brain 16-bit floating point number, equivalent to [`half::bf16`] (requires the `half` feature).
+    Bfloat16,
+}
+
+#[derive(Serialize, Deserialize)]
+struct JsTensorTypeSerdeHelper(#[serde(with = "JsTensorType")] TensorElementType);
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct JsTensor {
+    #[serde(rename = "type", with = "JsTensorType")]
+    data_type: TensorElementType,
+    data: JsBuffer,
+    dims: Vec<i64>,
+}
+
+impl JsTensor {
+    pub fn as_ort_tensor_ref<'a, T: IntoTensorElementType + Debug>(
         mut self,
-        scope: &mut v8::HandleScope<'a>,
-    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
-        let ort_type = self.dtype().map_err(AnyError::from)?;
-
-        let ValueType::Tensor { ty, dimensions } = ort_type else {
-            return Err(anyhow!(
-                "JS only support 'ort::Value' of 'Tensor' type, got '{ort_type:?}'."
-            )
-            .into());
-        };
-
-        let tensor_type = JsTensorElementType(ty);
-
-        // Zero-Copying Data from ORT to a JS ArrayBuffer based on type
-        let tensor_buffer = match *tensor_type {
-            TensorElementType::Float32 => {
-                v8_typed_array_from!(Float32Array; tensor::<f32>(self), scope).into()
-            }
-            TensorElementType::Float64 => {
-                v8_typed_array_from!(Float64Array; tensor::<f64>(self), scope).into()
-            }
-            TensorElementType::String => {
-                // TODO: Handle string[] tensors from 'v8::Array'
-                return Err(anyhow!("Can't zero-copy tensor from it: 'String' does not implement the 'IntoTensorElementType' trait.").into());
-            }
-            TensorElementType::Int8 => {
-                v8_typed_array_from!(Int8Array; tensor::<i8>(self), scope).into()
-            }
-            TensorElementType::Uint8 => {
-                v8_typed_array_from!(Uint8Array; tensor::<i8>(self), scope).into()
-            }
-            TensorElementType::Int16 => {
-                v8_typed_array_from!(Int16Array; tensor::<i16>(self), scope).into()
-            }
-            TensorElementType::Uint16 => {
-                v8_typed_array_from!(Uint16Array; tensor::<u16>(self), scope).into()
-            }
-            TensorElementType::Int32 => {
-                v8_typed_array_from!(Int32Array; tensor::<i32>(self), scope).into()
-            }
-            TensorElementType::Uint32 => {
-                v8_typed_array_from!(Uint32Array; tensor::<u32>(self), scope).into()
-            }
-            TensorElementType::Int64 => {
-                v8_typed_array_from!(BigInt64Array; tensor::<i64>(self), scope).into()
-            }
-            TensorElementType::Uint64 => {
-                v8_typed_array_from!(BigUint64Array; tensor::<u64>(self), scope).into()
-            }
-            TensorElementType::Bool => {
-                v8_typed_array_from!(Uint8Array; tensor::<bool>(self), scope).into()
-            }
-            TensorElementType::Float16 => {
-                return Err(anyhow!("'half::f16' is not supported by JS tensor.").into())
-            }
-            TensorElementType::Bfloat16 => {
-                return Err(anyhow!("'half::bf16' is not supported by JS tensor.").into())
-            }
-        };
-
-        let tensor_type = tensor_type.to_v8(scope).map_err(AnyError::from)?;
-        let tensor_dims = to_v8(scope, dimensions).map_err(AnyError::from)?;
-
-        // We pass tensor as tuples to avoid string allocation
-        // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
-        let tensor =
-            v8::Array::new_with_elements(scope, &[tensor_type, tensor_buffer, tensor_dims]);
-
-        Ok(tensor.into())
-    }
-}
-
-#[derive(Debug)]
-pub struct JsTensorBufferView<'v> {
-    data_type: JsTensorElementType,
-    data_ptr: *mut c_void,
-    shape: Vec<i64>,
-    lifetime: PhantomData<&'v ()>,
-}
-
-impl<'v> JsTensorBufferView<'v> {
-    pub fn to_ort_tensor_ref(&self) -> anyhow::Result<ValueRefMut<'v, DynValueTypeMarker>> {
+    ) -> anyhow::Result<ValueRefMut<'a, DynValueTypeMarker>> {
         // Same impl. as the Tensor::from_array()
         // https://github.com/pykeio/ort/blob/abd527b6a1df8f566c729a9c4398bdfd185d652f/src/value/impl_tensor/create.rs#L170
         let memory_info = MemoryInfo::new(
@@ -281,47 +108,33 @@ impl<'v> JsTensorBufferView<'v> {
         )?;
 
         // TODO: Try to zero-copy shape also
-        let shape = self.shape.to_owned();
+        let shape = self.dims.to_owned();
 
         // Zero-Copying Data to an ORT Tensor based on JS type
-        let tensor = match *self.data_type {
-            TensorElementType::Float32 => unsafe {
-                TensorRefMut::<f32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Float64 => unsafe {
-                TensorRefMut::<f64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
+        let tensor = unsafe {
+            TensorRefMut::<T>::from_raw(memory_info, self.data.as_mut_ptr() as *mut c_void, shape)
+        }?;
+
+        Ok(tensor.into_dyn())
+    }
+
+    pub fn as_ort_input<'a>(self) -> anyhow::Result<SessionInputValue<'a>> {
+        let input_value = match self.data_type {
+            TensorElementType::Float32 => self.as_ort_tensor_ref::<f32>()?.into(),
+            TensorElementType::Float64 => self.as_ort_tensor_ref::<f64>()?.into(),
             TensorElementType::String => {
                 // TODO: Handle string[] tensors from 'v8::Array'
-                return Err(anyhow!("Can't zero-copy tensor from it: 'String' does not implement the 'IntoTensorElementType' trait."));
-            }
-            TensorElementType::Int8 => unsafe {
-                TensorRefMut::<i8>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Uint8 => unsafe {
-                TensorRefMut::<u8>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Int16 => unsafe {
-                TensorRefMut::<i16>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Uint16 => unsafe {
-                TensorRefMut::<u16>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Int32 => unsafe {
-                TensorRefMut::<i32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Uint32 => unsafe {
-                TensorRefMut::<u32>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Int64 => unsafe {
-                TensorRefMut::<i64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Uint64 => unsafe {
-                TensorRefMut::<u64>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
-            TensorElementType::Bool => unsafe {
-                TensorRefMut::<bool>::from_raw(memory_info, self.data_ptr, shape)?.into_dyn()
-            },
+                return Err(anyhow!("Can't extract tensor from it: 'String' does not implement the 'IntoTensorElementType' trait."));
+            }
+            TensorElementType::Int8 => self.as_ort_tensor_ref::<i8>()?.into(),
+            TensorElementType::Uint8 => self.as_ort_tensor_ref::<u8>()?.into(),
+            TensorElementType::Int16 => self.as_ort_tensor_ref::<i16>()?.into(),
+            TensorElementType::Uint16 => self.as_ort_tensor_ref::<u16>()?.into(),
+            TensorElementType::Int32 => self.as_ort_tensor_ref::<i32>()?.into(),
+            TensorElementType::Uint32 => self.as_ort_tensor_ref::<u32>()?.into(),
+            TensorElementType::Int64 => self.as_ort_tensor_ref::<i64>()?.into(),
+            TensorElementType::Uint64 => self.as_ort_tensor_ref::<u64>()?.into(),
+            TensorElementType::Bool => self.as_ort_tensor_ref::<bool>()?.into(),
             TensorElementType::Float16 => {
                 return Err(anyhow!("'half::f16' is not supported by JS tensor."))
             }
@@ -330,156 +143,50 @@ impl<'v> JsTensorBufferView<'v> {
             }
         };
 
-        Ok(tensor.into_dyn())
+        Ok(input_value)
     }
 }
 
-impl<'a> FromV8<'a> for JsTensorBufferView<'a> {
-    type Error = StdAnyError;
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ToJsTensor {
+    #[serde(rename = "type", with = "JsTensorType")]
+    data_type: TensorElementType,
+    data: ToJsBuffer,
+    dims: Vec<i64>,
+}
 
-    fn from_v8(
-        scope: &mut deno_core::v8::HandleScope<'a>,
-        value: deno_core::v8::Local<'a, deno_core::v8::Value>,
-    ) -> Result<Self, Self::Error> {
-        let value = v8::Local::<v8::Array>::try_from(value).map_err(AnyError::from)?;
+impl ToJsTensor {
+    pub fn from_ort_tensor(mut value: DynValue) -> anyhow::Result<Self> {
+        let ort_type = value.dtype().map_err(AnyError::from)?;
 
-        if value.length() != 3 {
+        let ValueType::Tensor { ty, dimensions } = ort_type else {
             return Err(anyhow!(
-                "expected a JS tuple of 3 elements, found {}",
-                value.length()
+                "JS only support 'ort::Value' of 'Tensor' type, got '{ort_type:?}'."
             )
             .into());
-        }
-        let data_type = v8::Local::<v8::String>::try_from(
-            value
-                .get_index(scope, 0)
-                .ok_or(anyhow!("tensor type was expected at tuple.0"))?,
-        )
-        .map_err(AnyError::from)?;
-
-        let data_type = JsTensorElementType::from_v8(scope, data_type.into())?;
-
-        // TODO: Handle 'v8::Array' for string[] tensors,
-        // since string[] tensors are not passed as 'v8::TypedArray'
-        let data_values = v8::Local::<v8::TypedArray>::try_from(
-            value
-                .get_index(scope, 1)
-                .ok_or(anyhow!("tensor data was expected at tuple.1"))?,
-        )
-        .map_err(AnyError::from)?;
-
-        let shape_dims = v8::Local::<v8::Array>::try_from(
-            value
-                .get_index(scope, 2)
-                .ok_or(anyhow!("tensor dims was expected at tuple.2"))?,
-        )
-        .map_err(AnyError::from)?;
+        };
 
-        // TODO: Pass dims as 'TypedArray' from Js
-        // Since dims are 'v8::Array' and not 'v8::TypedArray' we can't get the inner ptr to zero copy.
-        let shape_dims = from_v8::<Vec<_>>(scope, shape_dims.into()).map_err(AnyError::from)?;
+        let buffer_slice = match ty {
+            TensorElementType::Float32 => v8_slice_from!(tensor::<f32>(value)),
+            TensorElementType::Float64 => v8_slice_from!(tensor::<f64>(value)),
+            TensorElementType::Int8 => v8_slice_from!(tensor::<u8>(value)),
+            TensorElementType::Uint8 => v8_slice_from!(tensor::<u8>(value)),
+            TensorElementType::Int16 => v8_slice_from!(tensor::<i16>(value)),
+            TensorElementType::Uint16 => v8_slice_from!(tensor::<u16>(value)),
+            TensorElementType::Int32 => v8_slice_from!(tensor::<i32>(value)),
+            TensorElementType::Uint32 => v8_slice_from!(tensor::<u32>(value)),
+            TensorElementType::Int64 => v8_slice_from!(tensor::<i64>(value)),
+            TensorElementType::Uint64 => v8_slice_from!(tensor::<u64>(value)),
+            TensorElementType::Bool => v8_slice_from!(tensor::<bool>(value)),
+            TensorElementType::String => todo!(),
+            TensorElementType::Float16 => todo!(),
+            TensorElementType::Bfloat16 => todo!(),
+        };
 
-        Ok(JsTensorBufferView {
-            data_type,
-            data_ptr: data_values.data(),
-            shape: shape_dims,
-            lifetime: PhantomData::default(),
+        Ok(Self {
+            data_type: ty,
+            data: ToJsBuffer::from(buffer_slice.to_boxed_slice()),
+            dims: dimensions,
         })
     }
 }
-
-// Useful to receive the model inputs as JS sequence
-#[derive(Debug)]
-pub struct JsSessionInputs<'a>(Arc<Vec<JsTensorBufferView<'a>>>);
-
-impl<'a> JsSessionInputs<'a> {
-    pub fn to_ort_session_inputs(
-        self,
-        keys: impl Iterator<Item = String>,
-    ) -> Result<SessionInputs<'a, 'a>, AnyError> {
-        let mut session_inputs = vec![];
-
-        for (key, tensor_view) in keys.zip(self.iter()) {
-            let value = SessionInputValue::from(tensor_view.to_ort_tensor_ref()?);
-
-            session_inputs.push((Cow::from(key), value));
-        }
-
-        Ok(SessionInputs::ValueMap(session_inputs))
-    }
-}
-
-impl<'a> Deref for JsSessionInputs<'a> {
-    type Target = Vec<JsTensorBufferView<'a>>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl<'a> FromV8<'a> for JsSessionInputs<'a> {
-    type Error = StdAnyError;
-
-    fn from_v8(
-        scope: &mut deno_core::v8::HandleScope<'a>,
-        value: deno_core::v8::Local<'a, deno_core::v8::Value>,
-    ) -> Result<Self, Self::Error> {
-        let mut sequence = vec![];
-        let value = v8::Local::<v8::Array>::try_from(value).map_err(AnyError::from)?;
-
-        for idx in 0..value.length() {
-            let value = value
-                .get_index(scope, idx)
-                .ok_or(anyhow!("could no get value for index {idx}"))?;
-
-            let tensor = JsTensorBufferView::from_v8(scope, value)?;
-
-            sequence.push(tensor);
-        }
-
-        Ok(Self(Arc::new(sequence)))
-    }
-}
-
-#[derive(Debug)]
-pub struct JsSessionOutputs(pub Vec<JsDynTensorValue>);
-
-impl Deref for JsSessionOutputs {
-    type Target = Vec<JsDynTensorValue>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl DerefMut for JsSessionOutputs {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
-
-// Useful to receive the model inputs as JS sequence
-impl<'a> ToV8<'a> for JsSessionOutputs {
-    type Error = StdAnyError;
-
-    fn to_v8(
-        mut self,
-        scope: &mut v8::HandleScope<'a>,
-    ) -> Result<v8::Local<'a, v8::Value>, Self::Error> {
-        // We pass as tuples to avoid string allocation
-        // https://docs.rs/deno_core/latest/deno_core/convert/trait.ToV8.html#structs
-
-        let mut elements = vec![];
-
-        for idx in 0..self.len() {
-            let value = self.remove(idx);
-            let js_value = value.to_v8(scope)?;
-
-            elements.push(js_value);
-        }
-
-        let output_tuple = v8::Array::new_with_elements(scope, &elements);
-
-        Ok(output_tuple.into())
-    }
-}

From 00786251db9615a6d58c745bbb39c0f2ba506464 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Tue, 1 Oct 2024 17:46:59 +0100
Subject: [PATCH 04/21] fix(ai): seq2seq models causing null pointer error

- Solved the "`GetMutableData` should not be a null pointer" error while
executing seq2seq models.
- Ref.: https://github.com/pykeio/ort/issues/185
---
 crates/sb_ai/onnxruntime/tensor.rs | 64 +++++++++++++++++-------------
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/crates/sb_ai/onnxruntime/tensor.rs b/crates/sb_ai/onnxruntime/tensor.rs
index 0e702ef3d..91e03fa06 100644
--- a/crates/sb_ai/onnxruntime/tensor.rs
+++ b/crates/sb_ai/onnxruntime/tensor.rs
@@ -15,36 +15,46 @@ use serde::{Deserialize, Serialize};
 // but it only allows [u8] instead of [T], so we need to get into `unsafe` path.
 macro_rules! v8_slice_from {
     (tensor::<$type:ident>($tensor:expr)) => {{
-        let (_, raw_tensor) = $tensor
-            .try_extract_raw_tensor_mut::<$type>()
-            .map_err(AnyError::from)?;
-
-        let tensor_ptr = raw_tensor.as_ptr();
-        let tensor_len = raw_tensor.len();
-        let tensor_rc = Rc::into_raw(Rc::new(raw_tensor)) as *const c_void;
+        // We must ensure there's some detection to avoid `null pointer` errors
+        // https://github.com/pykeio/ort/issues/185
+        let n_detections = $tensor.shape()?[0];
+        if n_detections == 0 {
+            let buf_store = v8::ArrayBuffer::new_backing_store_from_vec(vec![]).make_shared();
+            let buffer_slice =
+                unsafe { deno_core::serde_v8::V8Slice::<u8>::from_parts(buf_store, 0..0) };
+
+            buffer_slice
+        } else {
+            let (_, raw_tensor) = $tensor
+                .try_extract_raw_tensor_mut::<$type>()
+                .map_err(AnyError::from)?;
+
+            let tensor_ptr = raw_tensor.as_ptr();
+            let tensor_len = raw_tensor.len();
+            let tensor_rc = Rc::into_raw(Rc::new(raw_tensor)) as *const c_void;
+
+            let buffer_len = tensor_len * size_of::<$type>();
+
+            extern "C" fn drop_tensor(_ptr: *mut c_void, _len: usize, data: *mut c_void) {
+                // SAFETY: We know that data is a raw Rc from above
+                unsafe { drop(Rc::from_raw(data.cast::<$type>())) }
+            }
 
-        let buffer_len = tensor_len * size_of::<$type>();
+            let buf_store = unsafe {
+                v8::ArrayBuffer::new_backing_store_from_ptr(
+                    tensor_ptr as _,
+                    buffer_len,
+                    drop_tensor,
+                    tensor_rc as _,
+                )
+            }
+            .make_shared();
 
-        extern "C" fn drop_tensor(_ptr: *mut c_void, _len: usize, data: *mut c_void) {
-            // SAFETY: We know that data is a raw Rc from above
-            unsafe { drop(Rc::from_raw(data.cast::<$type>())) }
-        }
+            let buffer_slice =
+                unsafe { deno_core::serde_v8::V8Slice::<u8>::from_parts(buf_store, 0..buffer_len) };
 
-        // Zero-Copying using ptr
-        let buf_store = unsafe {
-            v8::ArrayBuffer::new_backing_store_from_ptr(
-                tensor_ptr as _,
-                buffer_len,
-                drop_tensor,
-                tensor_rc as _,
-            )
+            buffer_slice
         }
-        .make_shared();
-
-        let buffer_slice =
-            unsafe { deno_core::serde_v8::V8Slice::<u8>::from_parts(buf_store, 0..buffer_len) };
-
-        buffer_slice
     }};
 }
 
@@ -152,7 +162,7 @@ pub struct ToJsTensor {
     #[serde(rename = "type", with = "JsTensorType")]
     data_type: TensorElementType,
     data: ToJsBuffer,
-    dims: Vec<i64>,
+    pub dims: Vec<i64>,
 }
 
 impl ToJsTensor {

From 754c750028a49dc84dd90832c72e8401be76a2fd Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Wed, 16 Oct 2024 19:51:21 +0100
Subject: [PATCH 05/21] test(sb_ai): implementing tests for ort backend

- Applying integration tests over all NLP pipelines with ort backend
---
 .../ai-ort-rust-backend/main/index.ts         |  57 ++++++
 .../feature-extraction/index.ts               |  32 +++
 .../transformers-js/fill-mask/index.ts        |  23 +++
 .../question-answering/index.ts               |  23 +++
 .../transformers-js/summarization/index.ts    |  28 +++
 .../text-classification/index.ts              |  33 +++
 .../transformers-js/text-generation/index.ts  |  31 +++
 .../text2text-generation/index.ts             |  27 +++
 .../token-classification/index.ts             |  47 +++++
 .../transformers-js/translation/index.ts      |  29 +++
 .../zero-shot-classification/index.ts         |  27 +++
 crates/base/tests/integration_tests.rs        | 191 ++++++++++++++++++
 crates/sb_ai/js/onnxruntime/onnx.js           |   3 -
 crates/sb_ai/onnxruntime/mod.rs               |   1 +
 examples/ort-rust-backend/index.ts            |   5 +-
 15 files changed, 550 insertions(+), 7 deletions(-)
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/main/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/feature-extraction/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/fill-mask/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/question-answering/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/summarization/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-classification/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-generation/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/text2text-generation/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/token-classification/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/translation/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-classification/index.ts

diff --git a/crates/base/test_cases/ai-ort-rust-backend/main/index.ts b/crates/base/test_cases/ai-ort-rust-backend/main/index.ts
new file mode 100644
index 000000000..9919cd2bf
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/main/index.ts
@@ -0,0 +1,57 @@
+import * as path from "jsr:@std/path";
+
+Deno.serve(async (req: Request) => {
+  console.log(req.url);
+  const url = new URL(req.url);
+  const { pathname } = url;
+  const service_name = pathname;
+
+  if (!service_name || service_name === "") {
+    const error = { msg: "missing function name in request" }
+    return new Response(
+      JSON.stringify(error),
+      { status: 400, headers: { "Content-Type": "application/json" } },
+    )
+  }
+
+  const servicePath = path.join("test_cases/ai-ort-rust-backend", pathname);
+
+  const createWorker = async () => {
+    const memoryLimitMb = 750;
+    const workerTimeoutMs = 10 * 60 * 1000;
+    const cpuTimeSoftLimitMs = 10 * 60 * 1000;
+    const cpuTimeHardLimitMs = 10 * 60 * 1000;
+    const noModuleCache = false;
+    const importMapPath = null;
+    const envVarsObj = Deno.env.toObject();
+    const envVars = Object.keys(envVarsObj).map(k => [k, envVarsObj[k]]);
+
+    return await EdgeRuntime.userWorkers.create({
+      servicePath,
+      memoryLimitMb,
+      workerTimeoutMs,
+      cpuTimeSoftLimitMs,
+      cpuTimeHardLimitMs,
+      noModuleCache,
+      importMapPath,
+      envVars
+    });
+  }
+
+  const callWorker = async () => {
+    try {
+      const worker = await createWorker();
+      return await worker.fetch(req);
+    } catch (e) {
+      console.error(e);
+
+      const error = { msg: e.toString() }
+      return new Response(
+        JSON.stringify(error),
+        { status: 500, headers: { "Content-Type": "application/json" } },
+      );
+    }
+  }
+
+  return await callWorker();
+})
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/feature-extraction/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/feature-extraction/index.ts
new file mode 100644
index 000000000..b035fe630
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/feature-extraction/index.ts
@@ -0,0 +1,32 @@
+import { assertEquals, assertAlmostEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('feature-extraction', 'supabase/gte-small', { device: 'auto' }); // 384 dims model
+
+Deno.serve(async () => {
+  const input = [
+    'This framework generates embeddings for each input sentence',
+    'Sentences are passed as a list of string.',
+    'The quick brown fox jumps over the lazy dog.',
+  ];
+
+  const output = await pipe(input, { pooling: 'mean', normalize: true });
+
+  assertEquals(output.size, 3 * 384);
+  assertEquals(output.dims.length, 2);
+
+  // Comparing first 3 predictions
+  [-0.050660304725170135, -0.006694655399769545, 0.003071750048547983]
+    .map((expected, idx) => {
+      assertAlmostEquals(output.data[idx], expected);
+    });
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/fill-mask/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/fill-mask/index.ts
new file mode 100644
index 000000000..65c734871
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/fill-mask/index.ts
@@ -0,0 +1,23 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('fill-mask', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = '[MASK] is the capital of England.';
+
+  const output = await pipe(input);
+
+  assertEquals(output[0].token_str, 'london');
+  assertEquals(output[0].sequence, 'london is the capital of england.');
+  assertAlmostEquals(output[0].score, 0.3513388931751251);
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/question-answering/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/question-answering/index.ts
new file mode 100644
index 000000000..7d0cfae4f
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/question-answering/index.ts
@@ -0,0 +1,23 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('question-answering', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = 'Who was Jim Henson?';
+  const context = 'Jim Henson was a nice puppet.';
+
+  const output = await pipe(input, context);
+
+  assertEquals(output.answer, 'a nice puppet');
+  assertAlmostEquals(output.score, 0.7828674695785575);
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/summarization/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/summarization/index.ts
new file mode 100644
index 000000000..33df86a73
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/summarization/index.ts
@@ -0,0 +1,28 @@
+import { assertGreater, assertLess } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('summarization', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = [
+    `The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.`,
+    `The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain "Amazonas" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.`,
+  ];
+
+  const output = await pipe(input);
+
+  assertLess(output[0].summary_text.length, input[0].length / 2);
+  assertGreater(output[0].summary_text.length, 50);
+
+  assertLess(output[1].summary_text.length, input[0].length / 2);
+  assertGreater(output[1].summary_text.length, 50);
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-classification/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-classification/index.ts
new file mode 100644
index 000000000..b9d0527d9
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-classification/index.ts
@@ -0,0 +1,33 @@
+
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('text-classification', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = [
+    'I love supabase',
+    'I hated the movie',
+  ];
+
+  const output = await pipe(input);
+
+  assertEquals(output.length, 2);
+
+  [
+    { label: 'POSITIVE', score: 0.9987488985061646 },
+    { label: 'NEGATIVE', score: 0.9996954202651978 },
+  ].map((expected, idx) => {
+    assertEquals(output[idx].label, expected.label);
+    assertAlmostEquals(output[idx].score, expected.score);
+  });
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-generation/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-generation/index.ts
new file mode 100644
index 000000000..df714a688
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text-generation/index.ts
@@ -0,0 +1,31 @@
+import { assertGreater } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+// There's a little bug in pipeline that can't resolve the model name by itself
+// So we need to explicit pass it
+const pipe = await pipeline(
+  'text-generation',
+  'Xenova/gpt2',
+  { device: 'auto', model_file_name: 'decoder_model_merged_quantized' },
+);
+
+Deno.serve(async () => {
+  const input = 'Once upon a time, there was a';
+
+  const output = await pipe(input, {
+    max_new_tokens: 10,
+    top_k: 0,
+    do_sample: false,
+  });
+
+  assertGreater(output[0].generated_text.length, input.length);
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text2text-generation/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text2text-generation/index.ts
new file mode 100644
index 000000000..72d476da9
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/text2text-generation/index.ts
@@ -0,0 +1,27 @@
+import { assertGreater, assertStringIncludes } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('text2text-generation', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = 'Premise:  At my age you will probably have learnt one lesson. ' +
+    "Hypothesis:  It's not certain how many lessons you'll learn by your thirties. " +
+    'Does the premise entail the hypothesis?';
+
+  const output = await pipe(input, {
+    top_k: 0,
+    do_sample: false,
+  });
+
+  assertGreater(output[0].generated_text.length, 0);
+  assertStringIncludes(output[0].generated_text, 'no');
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/token-classification/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/token-classification/index.ts
new file mode 100644
index 000000000..cb096a81e
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/token-classification/index.ts
@@ -0,0 +1,47 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('token-classification', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = "My name is Kalleby and I'm from Brazil.";
+
+  const output = await pipe(input);
+
+  assertEquals(output.length, 3);
+
+  [
+    {
+      entity: 'B-PER',
+      score: 0.9930744171142578,
+      index: 4,
+      word: 'Kalle',
+    },
+    {
+      entity: 'I-PER',
+      score: 0.9974944591522217,
+      index: 5,
+      word: '##by',
+    },
+    {
+      entity: 'B-LOC',
+      score: 0.9998322129249573,
+      index: 11,
+      word: 'Brazil',
+    },
+  ].map((expected, idx) => {
+    assertEquals(output[idx].entity, expected.entity);
+    assertAlmostEquals(output[idx].score, expected.score);
+    assertEquals(output[idx].index, expected.index);
+    assertEquals(output[idx].word, expected.word);
+  });
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/translation/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/translation/index.ts
new file mode 100644
index 000000000..5905fa862
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/translation/index.ts
@@ -0,0 +1,29 @@
+import { assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('translation', 'Xenova/opus-mt-en-de', { device: 'auto' });
+
+Deno.serve(async () => {
+  const input = [
+    'Hello, how are you?',
+    'My name is Maria.',
+  ];
+
+  const output = await pipe(input);
+
+  const expected = [
+    { translation_text: "Hallo, wie geht's?" },
+    { translation_text: 'Mein Name ist Maria.' },
+  ];
+
+  assertEquals(output, expected);
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-classification/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-classification/index.ts
new file mode 100644
index 000000000..fb0d67e59
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-classification/index.ts
@@ -0,0 +1,27 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  env,
+  pipeline,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('zero-shot-classification', null, { device: 'auto' });
+
+Deno.serve(async () => {
+  const sequences_to_classify = 'I love making pizza';
+  const candidate_labels = ['travel', 'cooking', 'dancing'];
+
+  const output = await pipe(sequences_to_classify, candidate_labels);
+
+  assertEquals(output.labels, ['cooking', 'travel', 'dancing']);
+
+  [0.9991624362472264, 0.0004726026797654259, 0.0003649610730082667]
+    .map((expected, idx) => {
+      assertAlmostEquals(output.scores[idx], expected);
+    });
+
+  return new Response();
+});
diff --git a/crates/base/tests/integration_tests.rs b/crates/base/tests/integration_tests.rs
index 2aeafcf50..27cdcbc96 100644
--- a/crates/base/tests/integration_tests.rs
+++ b/crates/base/tests/integration_tests.rs
@@ -2830,6 +2830,197 @@ async fn test_tmp_fs_should_not_be_available_in_import_stmt() {
     );
 }
 
+// -- sb_ai: ORT @huggingface/transformers
+#[tokio::test]
+#[serial]
+async fn test_ort_feature_extraction() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/feature-extraction",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_fill_mask() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/fill-mask",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_question_answering() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/question-answering",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_summarization() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/summarization",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_text_classification() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/text-classification",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_text_generation() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/text-generation",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_text2text_generation() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/text2text-generation",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_token_classification() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/token-classification",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_translation() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/translation",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_zero_shot_classification() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/zero-shot-classification",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
 #[derive(Deserialize)]
 struct ErrorResponsePayload {
     msg: String,
diff --git a/crates/sb_ai/js/onnxruntime/onnx.js b/crates/sb_ai/js/onnxruntime/onnx.js
index 01eac887a..ca1efeea8 100644
--- a/crates/sb_ai/js/onnxruntime/onnx.js
+++ b/crates/sb_ai/js/onnxruntime/onnx.js
@@ -81,9 +81,6 @@ const onnxruntime = {
   },
   Tensor,
   env: {
-    wasm: {
-      proxy: false
-    }
   }
 };
 
diff --git a/crates/sb_ai/onnxruntime/mod.rs b/crates/sb_ai/onnxruntime/mod.rs
index 43adf9613..e00fd658f 100644
--- a/crates/sb_ai/onnxruntime/mod.rs
+++ b/crates/sb_ai/onnxruntime/mod.rs
@@ -28,6 +28,7 @@ pub fn op_sb_ai_ort_run_session<'a>(
     let model = ModelSession::from_id(model_id).unwrap();
     let model_session = model.inner();
 
+    // println!("{model_session:?}");
     let input_values = input_values
         .into_iter()
         .map(|(key, value)| value.as_ort_input().map(|value| (Cow::from(key), value)))
diff --git a/examples/ort-rust-backend/index.ts b/examples/ort-rust-backend/index.ts
index 126deb259..4794722c3 100644
--- a/examples/ort-rust-backend/index.ts
+++ b/examples/ort-rust-backend/index.ts
@@ -1,7 +1,4 @@
-import {
-  env,
-  pipeline,
-} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.21/dist/transformers.min.js';
+import { env, pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
 
 // Ensure we do not use browser cache
 env.useBrowserCache = false;

From 9f78148efc0d6976a7ad269e4d1685b50ffefdec Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Thu, 31 Oct 2024 23:21:22 +0000
Subject: [PATCH 06/21] stamp(sb_ai): example for generate image embeddings

---
 examples/generate-image-embeddings/index.ts | 82 +++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 examples/generate-image-embeddings/index.ts

diff --git a/examples/generate-image-embeddings/index.ts b/examples/generate-image-embeddings/index.ts
new file mode 100644
index 000000000..bee3d9968
--- /dev/null
+++ b/examples/generate-image-embeddings/index.ts
@@ -0,0 +1,82 @@
+import {
+  Gravity,
+  ImageMagick,
+  initializeImageMagick,
+  MagickColors,
+  MagickFormat,
+  MagickGeometry,
+} from 'npm:@imagemagick/magick-wasm@0.0.30';
+
+import {
+  env,
+  pipeline,
+  RawImage,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+const wasmBytes = await Deno.readFile(
+  new URL(
+    'magick.wasm',
+    import.meta.resolve('npm:@imagemagick/magick-wasm@0.0.30'),
+  ),
+);
+
+await initializeImageMagick(
+  wasmBytes,
+);
+
+// May need to increase the worker memory limit
+const pipe = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32', {
+  device: 'auto',
+});
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+export async function fetchImage(url: string) {
+  const imageRes = await fetch(new URL(url));
+  const imageBlob = await imageRes.blob();
+  const buffer = await imageBlob.arrayBuffer();
+
+  return new Uint8Array(buffer);
+}
+
+Deno.serve(async (request) => {
+  const { image_url } = await request.json();
+  const imageFile = await fetchImage(image_url);
+
+  const image = ImageMagick.read(imageFile, (img) => {
+    const { width, height } = pipe.processor.feature_extractor.crop_size;
+
+    // We need to resize to fit model dims
+    // https://legacy.imagemagick.org/Usage/resize/#space_fill
+    img.resize(new MagickGeometry(width, height));
+    img.extent(new MagickGeometry(width, height), Gravity.Center, MagickColors.Transparent);
+
+    return img
+      .write(
+        MagickFormat.Rgba,
+        (buffer) => ({
+          buffer,
+          width: img.width,
+          height: img.height,
+          channels: img.channels.length,
+        }),
+      );
+  });
+
+  const imageInput = new RawImage(
+    image.buffer,
+    image.width,
+    image.height,
+    image.channels,
+  );
+
+  // Disable pre-processor transformations
+  pipe.processor.feature_extractor.do_resize = false;
+  pipe.processor.feature_extractor.do_center_crop = false;
+
+  const output = await pipe(imageInput);
+
+  return Response.json({ output });
+});

From 116a99610fb3a61ed506edd1baf4a04448468e9f Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Sat, 2 Nov 2024 15:00:23 +0000
Subject: [PATCH 07/21] test(sb_ai): implementing computer vision tests for ort
 backend

- Applying integration tests over VISION pipelines with ort backend
---
 .../image-classification/index.ts             | 94 ++++++++++++++++++
 .../image-feature-extraction/index.ts         | 95 ++++++++++++++++++
 .../zero-shot-image-classification/index.ts   | 99 +++++++++++++++++++
 crates/base/tests/integration_tests.rs        | 77 +++++++++++++--
 4 files changed, 355 insertions(+), 10 deletions(-)
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-classification/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-feature-extraction/index.ts
 create mode 100644 crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-image-classification/index.ts

diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-classification/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-classification/index.ts
new file mode 100644
index 000000000..f6f72fd19
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-classification/index.ts
@@ -0,0 +1,94 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  Gravity,
+  ImageMagick,
+  initializeImageMagick,
+  MagickColors,
+  MagickFormat,
+  MagickGeometry,
+} from 'npm:@imagemagick/magick-wasm@0.0.30';
+
+import {
+  env,
+  pipeline,
+  RawImage,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+const wasmBytes = await Deno.readFile(
+  new URL(
+    'magick.wasm',
+    import.meta.resolve('npm:@imagemagick/magick-wasm@0.0.30'),
+  ),
+);
+
+await initializeImageMagick(
+  wasmBytes,
+);
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('image-classification', 'Xenova/vit-base-patch16-224', {
+  device: 'auto',
+});
+
+const preprocessor = (img) => {
+  const { width, height } = pipe.processor.feature_extractor.size;
+
+  // We need to resize to fit model dims
+  // https://legacy.imagemagick.org/Usage/resize/#space_fill
+  img.resize(new MagickGeometry(width, height));
+  img.extent(new MagickGeometry(width, height), Gravity.Center, MagickColors.Transparent);
+
+  return img
+    .write(
+      MagickFormat.Rgba,
+      (buffer) => ({
+        buffer,
+        width: img.width,
+        height: img.height,
+        channels: img.channels.length,
+      }),
+    );
+};
+
+export async function fetchImage(url: string) {
+  const imageRes = await fetch(new URL(url));
+  const imageBlob = await imageRes.blob();
+  const buffer = await imageBlob.arrayBuffer();
+
+  return new Uint8Array(buffer);
+}
+
+Deno.serve(async () => {
+  const imageFile = await fetchImage(
+    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cropped_bee.jpg',
+  );
+
+  // Disable default pre-processor transformations
+  pipe.processor.feature_extractor.do_resize = false;
+  pipe.processor.feature_extractor.do_center_crop = false;
+
+  const image = ImageMagick.read(imageFile, preprocessor);
+
+  const imageInput = new RawImage(
+    image.buffer,
+    image.width,
+    image.height,
+    image.channels,
+  );
+
+  const output = await pipe(imageInput);
+
+  // Comparing first 2 predictions
+  [
+    { label: 'bee', score: 0.9869289994239807 },
+    { label: 'fly', score: 0.005201293155550957 },
+  ].map((expected, idx) => {
+    assertEquals(output[idx].label, expected.label);
+    assertAlmostEquals(output[idx].score, expected.score);
+  });
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-feature-extraction/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-feature-extraction/index.ts
new file mode 100644
index 000000000..ae3fe67b3
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/image-feature-extraction/index.ts
@@ -0,0 +1,95 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  Gravity,
+  ImageMagick,
+  initializeImageMagick,
+  MagickColors,
+  MagickFormat,
+  MagickGeometry,
+} from 'npm:@imagemagick/magick-wasm@0.0.30';
+
+import {
+  env,
+  pipeline,
+  RawImage,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+const wasmBytes = await Deno.readFile(
+  new URL(
+    'magick.wasm',
+    import.meta.resolve('npm:@imagemagick/magick-wasm@0.0.30'),
+  ),
+);
+
+await initializeImageMagick(
+  wasmBytes,
+);
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32', {
+  device: 'auto',
+});
+
+const preprocessor = (img) => {
+  const { width, height } = pipe.processor.feature_extractor.crop_size;
+
+  // We need to resize to fit model dims
+  // https://legacy.imagemagick.org/Usage/resize/#space_fill
+  img.resize(new MagickGeometry(width, height));
+  img.extent(new MagickGeometry(width, height), Gravity.Center, MagickColors.Transparent);
+
+  return img
+    .write(
+      MagickFormat.Rgba,
+      (buffer) => ({
+        buffer,
+        width: img.width,
+        height: img.height,
+        channels: img.channels.length,
+      }),
+    );
+};
+
+export async function fetchImage(url: string) {
+  const imageRes = await fetch(new URL(url));
+  const imageBlob = await imageRes.blob();
+  const buffer = await imageBlob.arrayBuffer();
+
+  return new Uint8Array(buffer);
+}
+
+Deno.serve(async () => {
+  const imageFile = await fetchImage(
+    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png',
+  );
+
+  // Disable default pre-processor transformations
+  pipe.processor.feature_extractor.do_resize = false;
+  pipe.processor.feature_extractor.do_center_crop = false;
+
+  const image = ImageMagick.read(imageFile, preprocessor);
+
+  const imageInput = new RawImage(
+    image.buffer,
+    image.width,
+    image.height,
+    image.channels,
+  );
+
+  const output = await pipe(imageInput);
+
+  assertEquals(output.size, 512);
+  assertEquals(output.dims.length, 2);
+
+  // Comparing first 3 predictions
+  [0.06320247054100037, 0.01292148977518081, -0.14722692966461182]
+    .map((expected, idx) => {
+      assertAlmostEquals(output.data[idx], expected);
+    });
+
+
+  return new Response();
+});
diff --git a/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-image-classification/index.ts b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-image-classification/index.ts
new file mode 100644
index 000000000..0d3e57ae6
--- /dev/null
+++ b/crates/base/test_cases/ai-ort-rust-backend/transformers-js/zero-shot-image-classification/index.ts
@@ -0,0 +1,99 @@
+import { assertAlmostEquals, assertEquals } from 'jsr:@std/assert';
+import {
+  Gravity,
+  ImageMagick,
+  initializeImageMagick,
+  MagickColors,
+  MagickFormat,
+  MagickGeometry,
+} from 'npm:@imagemagick/magick-wasm@0.0.30';
+
+import {
+  env,
+  pipeline,
+  RawImage,
+} from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1';
+
+const wasmBytes = await Deno.readFile(
+  new URL(
+    'magick.wasm',
+    import.meta.resolve('npm:@imagemagick/magick-wasm@0.0.30'),
+  ),
+);
+
+await initializeImageMagick(
+  wasmBytes,
+);
+
+// Ensure we do not use browser cache
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+
+const pipe = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32', {
+  device: 'auto',
+});
+
+const preprocessor = (img) => {
+  const { shortest_edge } = pipe.processor.feature_extractor.size;
+
+  // We need to resize to fit model dims
+  // https://legacy.imagemagick.org/Usage/resize/#space_fill
+  img.resize(new MagickGeometry(shortest_edge));
+  img.extent(
+    new MagickGeometry(shortest_edge),
+    Gravity.Center,
+    MagickColors.Transparent,
+  );
+
+  return img
+    .write(
+      MagickFormat.Rgba,
+      (buffer) => ({
+        buffer,
+        width: img.width,
+        height: img.height,
+        channels: img.channels.length,
+      }),
+    );
+};
+
+export async function fetchImage(url: string) {
+  const imageRes = await fetch(new URL(url));
+  const imageBlob = await imageRes.blob();
+  const buffer = await imageBlob.arrayBuffer();
+
+  return new Uint8Array(buffer);
+}
+
+Deno.serve(async () => {
+  const imageFile = await fetchImage(
+    'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png',
+  );
+
+  // Disable default pre-processor transformations
+  pipe.processor.feature_extractor.do_resize = false;
+  pipe.processor.feature_extractor.do_center_crop = false;
+
+  const image = ImageMagick.read(imageFile, preprocessor);
+
+  const imageInput = new RawImage(
+    image.buffer,
+    image.width,
+    image.height,
+    image.channels,
+  );
+
+  const output = await pipe(imageInput, ['bird', 'dog', 'cat', 'airplane']);
+
+  [
+    { score: 0.9822331666946411, label: 'cat' },
+    { score: 0.012786678038537502, label: 'dog' },
+    { score: 0.003186287358403206, label: 'airplane' },
+    { score: 0.0017938446253538132, label: 'bird' },
+  ].map((expected, idx) => {
+    assertEquals(output[idx].label, expected.label);
+    assertAlmostEquals(output[idx].score, expected.score);
+  });
+
+  return new Response();
+});
diff --git a/crates/base/tests/integration_tests.rs b/crates/base/tests/integration_tests.rs
index 27cdcbc96..5bbe56d43 100644
--- a/crates/base/tests/integration_tests.rs
+++ b/crates/base/tests/integration_tests.rs
@@ -2833,7 +2833,7 @@ async fn test_tmp_fs_should_not_be_available_in_import_stmt() {
 // -- sb_ai: ORT @huggingface/transformers
 #[tokio::test]
 #[serial]
-async fn test_ort_feature_extraction() {
+async fn test_ort_nlp_feature_extraction() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2852,7 +2852,7 @@ async fn test_ort_feature_extraction() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_fill_mask() {
+async fn test_ort_nlp_fill_mask() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2871,7 +2871,7 @@ async fn test_ort_fill_mask() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_question_answering() {
+async fn test_ort_nlp_question_answering() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2890,7 +2890,7 @@ async fn test_ort_question_answering() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_summarization() {
+async fn test_ort_nlp_summarization() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2909,7 +2909,7 @@ async fn test_ort_summarization() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_text_classification() {
+async fn test_ort_nlp_text_classification() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2928,7 +2928,7 @@ async fn test_ort_text_classification() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_text_generation() {
+async fn test_ort_nlp_text_generation() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2947,7 +2947,7 @@ async fn test_ort_text_generation() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_text2text_generation() {
+async fn test_ort_nlp_text2text_generation() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2966,7 +2966,7 @@ async fn test_ort_text2text_generation() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_token_classification() {
+async fn test_ort_nlp_token_classification() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -2985,7 +2985,7 @@ async fn test_ort_token_classification() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_translation() {
+async fn test_ort_nlp_translation() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -3004,7 +3004,7 @@ async fn test_ort_translation() {
 
 #[tokio::test]
 #[serial]
-async fn test_ort_zero_shot_classification() {
+async fn test_ort_nlp_zero_shot_classification() {
     integration_test!(
         "./test_cases/ai-ort-rust-backend/main",
         NON_SECURE_PORT,
@@ -3021,6 +3021,63 @@ async fn test_ort_zero_shot_classification() {
     );
 }
 
+#[tokio::test]
+#[serial]
+async fn test_ort_vision_image_feature_extraction() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/image-feature-extraction",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_vision_image_classification() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/image-classification",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn test_ort_vision_zero_shot_image_classification() {
+    integration_test!(
+        "./test_cases/ai-ort-rust-backend/main",
+        NON_SECURE_PORT,
+        "transformers-js/zero-shot-image-classification",
+        None,
+        None,
+        None,
+        None,
+        (|resp| async {
+            let res = resp.unwrap();
+            assert_eq!(res.status().as_u16(), StatusCode::OK);
+        }),
+        TerminationToken::new()
+    );
+}
+
 #[derive(Deserialize)]
 struct ErrorResponsePayload {
     msg: String,

From 2d5d4620eb7df92b68828007ff0fd9af30b62fd0 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Sat, 2 Nov 2024 16:36:10 +0000
Subject: [PATCH 08/21] stamp: clippy

---
 crates/sb_ai/onnxruntime/mod.rs     |  8 ++++++--
 crates/sb_ai/onnxruntime/session.rs |  2 +-
 crates/sb_ai/onnxruntime/tensor.rs  | 29 ++++++++++++++---------------
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/crates/sb_ai/onnxruntime/mod.rs b/crates/sb_ai/onnxruntime/mod.rs
index e00fd658f..0fcd3fc15 100644
--- a/crates/sb_ai/onnxruntime/mod.rs
+++ b/crates/sb_ai/onnxruntime/mod.rs
@@ -21,7 +21,7 @@ pub fn op_sb_ai_ort_init_session(#[buffer] model_bytes: &[u8]) -> Result<ModelIn
 
 #[op2]
 #[serde]
-pub fn op_sb_ai_ort_run_session<'a>(
+pub fn op_sb_ai_ort_run_session(
     #[string] model_id: String,
     #[serde] input_values: HashMap<String, JsTensor>,
 ) -> Result<HashMap<String, ToJsTensor>> {
@@ -31,7 +31,11 @@ pub fn op_sb_ai_ort_run_session<'a>(
     // println!("{model_session:?}");
     let input_values = input_values
         .into_iter()
-        .map(|(key, value)| value.as_ort_input().map(|value| (Cow::from(key), value)))
+        .map(|(key, value)| {
+            value
+                .extract_ort_input()
+                .map(|value| (Cow::from(key), value))
+        })
         .collect::<Result<Vec<_>>>()?;
 
     let mut outputs = model_session.run(input_values)?;
diff --git a/crates/sb_ai/onnxruntime/session.rs b/crates/sb_ai/onnxruntime/session.rs
index 5b372c8d9..ba01afe27 100644
--- a/crates/sb_ai/onnxruntime/session.rs
+++ b/crates/sb_ai/onnxruntime/session.rs
@@ -135,7 +135,7 @@ pub(crate) fn load_session_from_bytes(model_bytes: &[u8]) -> Result<(String, Arc
 pub(crate) fn get_session(session_id: &String) -> Option<Arc<Session>> {
     let sessions = SESSIONS.lock().unwrap();
 
-    sessions.get(session_id).map(|session| session.clone())
+    sessions.get(session_id).cloned()
 }
 
 pub fn cleanup() -> Result<usize, AnyError> {
diff --git a/crates/sb_ai/onnxruntime/tensor.rs b/crates/sb_ai/onnxruntime/tensor.rs
index 91e03fa06..0f848b074 100644
--- a/crates/sb_ai/onnxruntime/tensor.rs
+++ b/crates/sb_ai/onnxruntime/tensor.rs
@@ -105,7 +105,7 @@ pub struct JsTensor {
 }
 
 impl JsTensor {
-    pub fn as_ort_tensor_ref<'a, T: IntoTensorElementType + Debug>(
+    pub fn extract_ort_tensor_ref<'a, T: IntoTensorElementType + Debug>(
         mut self,
     ) -> anyhow::Result<ValueRefMut<'a, DynValueTypeMarker>> {
         // Same impl. as the Tensor::from_array()
@@ -128,23 +128,23 @@ impl JsTensor {
         Ok(tensor.into_dyn())
     }
 
-    pub fn as_ort_input<'a>(self) -> anyhow::Result<SessionInputValue<'a>> {
+    pub fn extract_ort_input<'a>(self) -> anyhow::Result<SessionInputValue<'a>> {
         let input_value = match self.data_type {
-            TensorElementType::Float32 => self.as_ort_tensor_ref::<f32>()?.into(),
-            TensorElementType::Float64 => self.as_ort_tensor_ref::<f64>()?.into(),
+            TensorElementType::Float32 => self.extract_ort_tensor_ref::<f32>()?.into(),
+            TensorElementType::Float64 => self.extract_ort_tensor_ref::<f64>()?.into(),
             TensorElementType::String => {
                 // TODO: Handle string[] tensors from 'v8::Array'
                 return Err(anyhow!("Can't extract tensor from it: 'String' does not implement the 'IntoTensorElementType' trait."));
             }
-            TensorElementType::Int8 => self.as_ort_tensor_ref::<i8>()?.into(),
-            TensorElementType::Uint8 => self.as_ort_tensor_ref::<u8>()?.into(),
-            TensorElementType::Int16 => self.as_ort_tensor_ref::<i16>()?.into(),
-            TensorElementType::Uint16 => self.as_ort_tensor_ref::<u16>()?.into(),
-            TensorElementType::Int32 => self.as_ort_tensor_ref::<i32>()?.into(),
-            TensorElementType::Uint32 => self.as_ort_tensor_ref::<u32>()?.into(),
-            TensorElementType::Int64 => self.as_ort_tensor_ref::<i64>()?.into(),
-            TensorElementType::Uint64 => self.as_ort_tensor_ref::<u64>()?.into(),
-            TensorElementType::Bool => self.as_ort_tensor_ref::<bool>()?.into(),
+            TensorElementType::Int8 => self.extract_ort_tensor_ref::<i8>()?.into(),
+            TensorElementType::Uint8 => self.extract_ort_tensor_ref::<u8>()?.into(),
+            TensorElementType::Int16 => self.extract_ort_tensor_ref::<i16>()?.into(),
+            TensorElementType::Uint16 => self.extract_ort_tensor_ref::<u16>()?.into(),
+            TensorElementType::Int32 => self.extract_ort_tensor_ref::<i32>()?.into(),
+            TensorElementType::Uint32 => self.extract_ort_tensor_ref::<u32>()?.into(),
+            TensorElementType::Int64 => self.extract_ort_tensor_ref::<i64>()?.into(),
+            TensorElementType::Uint64 => self.extract_ort_tensor_ref::<u64>()?.into(),
+            TensorElementType::Bool => self.extract_ort_tensor_ref::<bool>()?.into(),
             TensorElementType::Float16 => {
                 return Err(anyhow!("'half::f16' is not supported by JS tensor."))
             }
@@ -172,8 +172,7 @@ impl ToJsTensor {
         let ValueType::Tensor { ty, dimensions } = ort_type else {
             return Err(anyhow!(
                 "JS only support 'ort::Value' of 'Tensor' type, got '{ort_type:?}'."
-            )
-            .into());
+            ));
         };
 
         let buffer_slice = match ty {

From 4765a3f773eb55aa721ebd40d326aa6056029839 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 07:53:39 +0000
Subject: [PATCH 09/21] fix(ci): makes share common env vars from dotenv file

---
 .devcontainer/devcontainer.json |  8 +++-----
 .env                            |  5 +++++
 .github/workflows/ci.yml        |  6 ++++++
 .github/workflows/release.yml   | 12 ++++++++++--
 Dockerfile                      |  6 +++---
 scripts/install_onnx.sh         |  2 +-
 scripts/run.sh                  |  7 ++++---
 scripts/run_dind.sh             | 20 +++++++++-----------
 8 files changed, 41 insertions(+), 25 deletions(-)
 create mode 100644 .env

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 05d024879..269452501 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -2,11 +2,7 @@
   "name": "Rust",
   "build": {
     "dockerfile": "Dockerfile",
-    "context": "..",
-    "args": {
-      "ONNXRUNTIME_VERSION": "1.19.2",
-      "DENO_VERSION": "1.45.2"
-    }
+    "context": ".."
   },
   "containerEnv": {
     "PATH": "${localEnv:PATH}:/deno/bin"
@@ -16,6 +12,8 @@
     "ghcr.io/jungaretti/features/make:1": {}
   },
   "runArgs": [
+    "--env-file",
+    "../.env",
     "--rm",
     "--privileged",
     "--security-opt",
diff --git a/.env b/.env
new file mode 100644
index 000000000..6d06ccca0
--- /dev/null
+++ b/.env
@@ -0,0 +1,5 @@
+GIT_V_TAG=0.1.0
+ONNXRUNTIME_VERSION=1.19.2
+DENO_VERSION=1.45.2
+EDGE_RUNTIME_PORT=9998
+AI_INFERENCE_API_HOST=http://localhost:11434
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d58c0f270..2e4f3ab4d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,6 +12,7 @@ env:
   CARGO_NET_RETRY: 10
   CARGO_TERM_COLOR: always
   RUSTUP_MAX_RETRIES: 10
+  ORT_DYLIB_PATH: /usr/local/bin/libonnxruntime.so
 
 jobs:
   cargo-fmt:
@@ -49,4 +50,9 @@ jobs:
       - uses: actions/checkout@v4
       - run: rustup show
       - uses: Swatinem/rust-cache@v2
+      - uses: cardinalby/export-env-action@v2
+        with:
+          envFile: ".env"
+      - name: Install ONNX Runtime Library
+        run: ./scripts/install_onnx.sh ${{ env.ONNXRUNTIME_VERSION }} x64 /usr/local/bin/libonnxruntime.so
       - run: ./scripts/test.sh
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 45ffeee73..6f50eed21 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -52,6 +52,9 @@ jobs:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
 
+      - uses: cardinalby/export-env-action@v2
+        with:
+          envFile: ".env"
       - id: build
         uses: docker/build-push-action@v3
         with:
@@ -61,7 +64,8 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
           build-args: |
-            GIT_V_VERSION=${{ needs.release.outputs.version }}
+            GIT_V_TAG=${{ needs.release.outputs.version }}
+            ONNXRUNTIME_VERSION=${{ env.ONNXRUNTIME_VERSION }}
 
   publish_arm:
     needs:
@@ -95,6 +99,9 @@ jobs:
             image=moby/buildkit:master
             network=host
 
+      - uses: cardinalby/export-env-action@v2
+        with:
+          envFile: ".env"
       - id: build
         uses: docker/build-push-action@v3
         with:
@@ -104,7 +111,8 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           no-cache: true
           build-args: |
-            GIT_V_VERSION=${{ needs.release.outputs.version }}
+            GIT_V_TAG=${{ needs.release.outputs.version }}
+            ONNXRUNTIME_VERSION=${{ env.ONNXRUNTIME_VERSION }}
 
   merge_manifest:
     needs: [release, publish_x86, publish_arm]
diff --git a/Dockerfile b/Dockerfile
index 1938ad0d1..e58b52564 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,8 +3,8 @@
 FROM rust:1.79.0-bookworm as builder
 
 ARG TARGETPLATFORM
-ARG GIT_V_VERSION
-ARG ONNXRUNTIME_VERSION=1.19.2
+ARG GIT_V_TAG
+ARG ONNXRUNTIME_VERSION
 ARG PROFILE=release
 ARG FEATURES
 
@@ -15,7 +15,7 @@ WORKDIR /usr/src/edge-runtime
 COPY . .
 
 RUN --mount=type=cache,target=/usr/local/cargo/registry,id=${TARGETPLATFORM} --mount=type=cache,target=/usr/src/edge-runtime/target,id=${TARGETPLATFORM} \
-    GIT_V_TAG=${GIT_V_VERSION} cargo build --profile ${PROFILE} --features "${FEATURES}" && \
+    ${GIT_V_TAG} cargo build --profile ${PROFILE} --features "${FEATURES}" && \
     mv /usr/src/edge-runtime/target/${PROFILE}/edge-runtime /root
 
 RUN objcopy --compress-debug-sections \
diff --git a/scripts/install_onnx.sh b/scripts/install_onnx.sh
index 601e1cf73..2649bdc80 100755
--- a/scripts/install_onnx.sh
+++ b/scripts/install_onnx.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-ONNX_VERSION=${1:-1.19.2}
+ONNX_VERSION=$1
 TARGETPLATFORM=$2
 SAVE_PATH=${3:-"./onnx-runtime"}
 
diff --git a/scripts/run.sh b/scripts/run.sh
index 9a7920e7b..a5bcf27c9 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -1,8 +1,9 @@
 #!/usr/bin/env bash
 
+export $(grep -v '^#' ../.env | xargs)
+
 # --features cli/tracing
-GIT_V_TAG=0.1.1 cargo build --features cli/tracing && \
-AI_INFERENCE_API_HOST=http://localhost:11434 \
-EDGE_RUNTIME_PORT=9998 RUST_BACKTRACE=full ./target/debug/edge-runtime "$@" start \
+cargo build --features cli/tracing && \
+RUST_BACKTRACE=full ./target/debug/edge-runtime "$@" start \
     --main-service ./examples/main \
     --event-worker ./examples/event-manager
\ No newline at end of file
diff --git a/scripts/run_dind.sh b/scripts/run_dind.sh
index a7dc93fc9..0fe67835b 100755
--- a/scripts/run_dind.sh
+++ b/scripts/run_dind.sh
@@ -1,8 +1,5 @@
 #!/usr/bin/env bash
 
-GIT_V_TAG=0.1.1
-EDGE_RUNTIME_PORT=9998
-ONNXRUNTIME_VERSION=1.19.2
 FEATURES=cli/tracing
 RUST_BACKTRACE=full
 
@@ -11,15 +8,16 @@ PROFILE=${1:-dind}
 SCRIPT=$(readlink -f "$0")
 SCRIPTPATH=$(dirname "$SCRIPT")
 
-cd "$SCRIPTPATH" &&
-  docker build \
-    -t edge_runtime \
-    --build-arg GIT_V_TAG=$GIT_V_TAG \
-    --build-arg ONNXRUNTIME_VERSION=$ONNXRUNTIME_VERSION \
-    --build-arg PROFILE=$PROFILE \
-    --build-arg FEATURES=$FEATURES \
-    "$SCRIPTPATH/.."
+cd "$SCRIPTPATH"
 
+docker build \
+  -t edge_runtime \
+  --env-file ../.env \
+  --build-arg PROFILE=$PROFILE \
+  --build-arg FEATURES=$FEATURES \
+  "$SCRIPTPATH/.."
+
+export $(grep -v '^#' ../.env | xargs)
 docker run \
   --privileged \
   --rm \

From 57d30657f5a17704f8bc342d5e8e0affd809ef0b Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Thu, 7 Nov 2024 09:55:17 +0000
Subject: [PATCH 10/21] fix(ci): update `ORT_DYLIB_PATH`

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2e4f3ab4d..5a5a207fc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ env:
   CARGO_NET_RETRY: 10
   CARGO_TERM_COLOR: always
   RUSTUP_MAX_RETRIES: 10
-  ORT_DYLIB_PATH: /usr/local/bin/libonnxruntime.so
+  ORT_DYLIB_PATH: /usr/local/bin/onnxruntime/lib/libonnxruntime.so
 
 jobs:
   cargo-fmt:
@@ -54,5 +54,5 @@ jobs:
         with:
           envFile: ".env"
       - name: Install ONNX Runtime Library
-        run: ./scripts/install_onnx.sh ${{ env.ONNXRUNTIME_VERSION }} x64 /usr/local/bin/libonnxruntime.so
+        run: ./scripts/install_onnx.sh ${{ env.ONNXRUNTIME_VERSION }} x64 /usr/local/bin/onnxruntime
       - run: ./scripts/test.sh

From 6bef30caa30525929c5b161c0dbd0d04e46d5dd0 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 20:34:22 +0900
Subject: [PATCH 11/21] fix(ci): makes share common env vars from dotenv file

---
 .devcontainer/Dockerfile     | 18 ++++--------------
 .devcontainer/install.sh     | 20 ++++++++++++++++++++
 .env                         |  1 -
 .github/workflows/ci.yml     |  4 ++--
 Dockerfile                   |  9 +++------
 scripts/run.sh               |  6 +++++-
 scripts/run_dind.sh          | 10 ++++------
 scripts/test_indefinitely.sh |  2 +-
 8 files changed, 39 insertions(+), 31 deletions(-)
 create mode 100755 .devcontainer/install.sh

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 8f00a59fe..4b297e485 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,28 +1,18 @@
 FROM mcr.microsoft.com/devcontainers/rust:dev-1-bookworm
 
 ARG TARGETPLATFORM
-ARG ONNXRUNTIME_VERSION
-ARG DENO_VERSION
 
 RUN apt-get update && apt-get install -y build-essential cmake libclang-dev lldb \
     nodejs npm hyperfine
 
+COPY .env /tmp/.env
+COPY .devcontainer/install.sh /tmp/install.sh
 COPY scripts/install_onnx.sh /tmp/install_onnx.sh
 COPY scripts/download_models.sh /tmp/download_models.sh
 
 WORKDIR /tmp
-RUN ./install_onnx.sh $ONNXRUNTIME_VERSION $TARGETPLATFORM /usr/local/bin/libonnxruntime.so
-RUN ./download_models.sh
-RUN mkdir -p /etc/sb_ai && cp -r /tmp/models /etc/sb_ai/models
 
-ENV ORT_DYLIB_PATH=/usr/local/bin/libonnxruntime.so
 ENV SB_AI_MODELS_DIR=/etc/sb_ai/models
-
-# Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh
-
-# Deno
 ENV DENO_INSTALL=/deno
-RUN mkdir -p /deno \
-    && curl -fsSL https://deno.land/install.sh | bash -s -- v$DENO_VERSION \
-    && chown -R vscode /deno
+
+RUN /tmp/install.sh $TARGETPLATFORM
diff --git a/.devcontainer/install.sh b/.devcontainer/install.sh
new file mode 100755
index 000000000..440793570
--- /dev/null
+++ b/.devcontainer/install.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+set -e
+
+TARGETPLATFORM=$1
+
+export $(grep -v '^#' /tmp/.env | xargs)
+
+# ONNX Runtime
+/tmp/install_onnx.sh $ONNXRUNTIME_VERSION $TARGETPLATFORM /tmp/onnxruntime
+mv /tmp/onnxruntime/lib/libonnxruntime.so* /usr/lib
+/tmp/download_models.sh
+mkdir -p /etc/sb_ai && cp -r /tmp/models /etc/sb_ai/models
+
+# Ollama
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Deno
+mkdir -p /deno
+curl -fsSL https://deno.land/install.sh | bash -s -- v$DENO_VERSION
+chown -R vscode /deno
\ No newline at end of file
diff --git a/.env b/.env
index 6d06ccca0..ffb5ba486 100644
--- a/.env
+++ b/.env
@@ -1,4 +1,3 @@
-GIT_V_TAG=0.1.0
 ONNXRUNTIME_VERSION=1.19.2
 DENO_VERSION=1.45.2
 EDGE_RUNTIME_PORT=9998
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5a5a207fc..b18b20db1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ env:
   CARGO_NET_RETRY: 10
   CARGO_TERM_COLOR: always
   RUSTUP_MAX_RETRIES: 10
-  ORT_DYLIB_PATH: /usr/local/bin/onnxruntime/lib/libonnxruntime.so
+  ORT_DYLIB_PATH: /tmp/onnxruntime/lib/libonnxruntime.so
 
 jobs:
   cargo-fmt:
@@ -54,5 +54,5 @@ jobs:
         with:
           envFile: ".env"
       - name: Install ONNX Runtime Library
-        run: ./scripts/install_onnx.sh ${{ env.ONNXRUNTIME_VERSION }} x64 /usr/local/bin/onnxruntime
+        run: ./scripts/install_onnx.sh ${{ env.ONNXRUNTIME_VERSION }} x64 /tmp/onnxruntime
       - run: ./scripts/test.sh
diff --git a/Dockerfile b/Dockerfile
index e58b52564..f2962b709 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,8 +3,8 @@
 FROM rust:1.79.0-bookworm as builder
 
 ARG TARGETPLATFORM
-ARG GIT_V_TAG
 ARG ONNXRUNTIME_VERSION
+ARG GIT_V_TAG
 ARG PROFILE=release
 ARG FEATURES
 
@@ -36,8 +36,6 @@ RUN apt-get remove -y perl && apt-get autoremove -y
 COPY --from=builder /root/edge-runtime /usr/local/bin/edge-runtime
 COPY --from=builder /root/edge-runtime.debug /usr/local/bin/edge-runtime.debug
 
-ENV ORT_DYLIB_PATH=/usr/local/bin/onnxruntime/lib/libonnxruntime.so
-
 
 # ONNX Runtime provider
 # Application runtime with ONNX
@@ -60,10 +58,9 @@ FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 as edge-runtime-cuda
 
 COPY --from=edge-runtime-base /usr/local/bin/edge-runtime /usr/local/bin/edge-runtime
 COPY --from=builder /root/edge-runtime.debug /usr/local/bin/edge-runtime.debug
-COPY --from=ort-cuda /root/onnxruntime /usr/local/bin/onnxruntime
+COPY --from=ort-cuda /root/onnxruntime/lib/libonnxruntime.so* /usr/lib
 COPY --from=preload-models /usr/src/edge-runtime/models /etc/sb_ai/models
 
-ENV ORT_DYLIB_PATH=/usr/local/bin/onnxruntime/lib/libonnxruntime.so
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 
@@ -72,7 +69,7 @@ ENTRYPOINT ["edge-runtime"]
 
 # Base
 FROM edge-runtime-base as edge-runtime
-COPY --from=ort /root/onnxruntime /usr/local/bin/onnxruntime
+COPY --from=ort /root/onnxruntime/lib/libonnxruntime.so* /usr/lib
 COPY --from=preload-models /usr/src/edge-runtime/models /etc/sb_ai/models
 
 ENTRYPOINT ["edge-runtime"]
diff --git a/scripts/run.sh b/scripts/run.sh
index a5bcf27c9..0093f0f43 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -1,6 +1,10 @@
 #!/usr/bin/env bash
+set -e
 
-export $(grep -v '^#' ../.env | xargs)
+SCRIPT=$(readlink -f "$0")
+SCRIPTPATH=$(dirname "$SCRIPT")
+
+export $(grep -v '^#' $SCRIPTPATH/../.env | xargs)
 
 # --features cli/tracing
 cargo build --features cli/tracing && \
diff --git a/scripts/run_dind.sh b/scripts/run_dind.sh
index 0fe67835b..0f772b84b 100755
--- a/scripts/run_dind.sh
+++ b/scripts/run_dind.sh
@@ -1,23 +1,23 @@
 #!/usr/bin/env bash
+set -e
 
 FEATURES=cli/tracing
 RUST_BACKTRACE=full
 
-PWD=$(pwd)
 PROFILE=${1:-dind}
 SCRIPT=$(readlink -f "$0")
 SCRIPTPATH=$(dirname "$SCRIPT")
 
-cd "$SCRIPTPATH"
+export $(grep -v '^#' $SCRIPTPATH/../.env | xargs)
 
 docker build \
   -t edge_runtime \
-  --env-file ../.env \
+  --build-arg GIT_V_TAG=$GIT_V_TAG \
+  --build-arg ONNXRUNTIME_VERSION=$ONNXRUNTIME_VERSION \
   --build-arg PROFILE=$PROFILE \
   --build-arg FEATURES=$FEATURES \
   "$SCRIPTPATH/.."
 
-export $(grep -v '^#' ../.env | xargs)
 docker run \
   --privileged \
   --rm \
@@ -34,5 +34,3 @@ docker run \
   --main-service ./examples/main \
   --event-worker ./examples/event-manager \
   --static "./examples/**/*.bin"
-
-cd $PWD
diff --git a/scripts/test_indefinitely.sh b/scripts/test_indefinitely.sh
index dcdeefa66..45dd598a9 100755
--- a/scripts/test_indefinitely.sh
+++ b/scripts/test_indefinitely.sh
@@ -14,4 +14,4 @@ while true; do
     fi
 
     echo "Tests passed. Running again..."
-done
\ No newline at end of file
+done

From c7ba875eb5dc71fd87f07f2d9d89fbca1bba1f2e Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:35:59 +0000
Subject: [PATCH 12/21] chore(sb_ai): update dependencies

---
 crates/sb_ai/Cargo.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/sb_ai/Cargo.toml b/crates/sb_ai/Cargo.toml
index d48320977..2ebef630e 100644
--- a/crates/sb_ai/Cargo.toml
+++ b/crates/sb_ai/Cargo.toml
@@ -38,7 +38,8 @@ rand = "0.8"
 convert_case = "0.6"
 tokenizers = { version = ">=0.13.4", default-features = false, features = [ "onig" ] }
 
-ort = { git = "https://github.com/pykeio/ort", default-features = false, features = [
+ort-sys = "=2.0.0-rc.2"
+ort = { version = "=2.0.0-rc.2", default-features = false, features = [
   "ndarray",
   "half",
   "load-dynamic",

From 4f620fcf20c424f125709257ed284d47720e353a Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:36:13 +0000
Subject: [PATCH 13/21] chore(event_worker): add a dependency

---
 crates/event_worker/Cargo.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/event_worker/Cargo.toml b/crates/event_worker/Cargo.toml
index 25e364ba4..1f74419e8 100644
--- a/crates/event_worker/Cargo.toml
+++ b/crates/event_worker/Cargo.toml
@@ -19,4 +19,5 @@ uuid.workspace = true
 serde.workspace = true
 anyhow.workspace = true
 tokio.workspace = true
-log.workspace = true
\ No newline at end of file
+log.workspace = true
+tracing.workspace = true
\ No newline at end of file

From c6c067c3b602cf94a9410ab859bbdeba03544643 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:36:46 +0000
Subject: [PATCH 14/21] chore(event_worker): install a tracing macro

---
 crates/event_worker/js_interceptors.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/event_worker/js_interceptors.rs b/crates/event_worker/js_interceptors.rs
index 35bd33ff4..5d44e5fed 100644
--- a/crates/event_worker/js_interceptors.rs
+++ b/crates/event_worker/js_interceptors.rs
@@ -5,6 +5,7 @@ use deno_core::op2;
 use deno_core::OpState;
 use log::error;
 use tokio::sync::mpsc;
+use tracing::trace;
 
 #[op2(fast)]
 fn op_user_worker_log(
@@ -24,15 +25,16 @@ fn op_user_worker_log(
             .unwrap_or(&EventMetadata::default())
             .clone();
 
-        let metadata = EventMetadata { ..event_metadata };
-
-        tx.send(WorkerEventWithMetadata {
+        let metadata = WorkerEventWithMetadata {
             event: WorkerEvents::Log(LogEvent {
                 msg: msg.to_string(),
                 level,
             }),
-            metadata,
-        })?;
+            metadata: EventMetadata { ..event_metadata },
+        };
+
+        trace!(?metadata);
+        tx.send(metadata)?;
     } else {
         error!("[{:?}] {}", level, msg.to_string());
     }

From 4e8871588397498ef2855be4423a5b62c960a7da Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:37:13 +0000
Subject: [PATCH 15/21] chore(base): update `Cargo.toml`

---
 crates/base/Cargo.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crates/base/Cargo.toml b/crates/base/Cargo.toml
index f3b2ffd69..33641f3b3 100644
--- a/crates/base/Cargo.toml
+++ b/crates/base/Cargo.toml
@@ -74,6 +74,7 @@ notify.workspace = true
 pin-project.workspace = true
 rustls-pemfile.workspace = true
 tracing.workspace = true
+tracing-subscriber = { workspace = true, optional = true, features = ["env-filter", "tracing-log"] }
 
 reqwest_v011 = { package = "reqwest", version = "0.11", features = ["stream", "json", "multipart"] }
 tls-listener = { version = "0.10", features = ["rustls"] }
@@ -129,4 +130,5 @@ tokio.workspace = true
 url.workspace = true
 
 [features]
+tracing = ["dep:tracing-subscriber"]
 termination-signal-ext = []
\ No newline at end of file

From f4bc4bbcda791268bfcb2bd901e85143f57a1663 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:37:59 +0000
Subject: [PATCH 16/21] chore(base): trace `malloced_mb` more precisely

---
 crates/base/src/deno_runtime.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crates/base/src/deno_runtime.rs b/crates/base/src/deno_runtime.rs
index 2fb311847..29887e2a6 100644
--- a/crates/base/src/deno_runtime.rs
+++ b/crates/base/src/deno_runtime.rs
@@ -190,6 +190,7 @@ impl MemCheck {
             }
         }
 
+        trace!(malloced_mb = bytes_to_display(total_bytes as u64));
         total_bytes
     }
 }
@@ -965,11 +966,9 @@ where
 
             if is_user_worker {
                 let mem_state = mem_check_state.as_ref().unwrap();
-                let total_malloced_bytes = mem_state.check(js_runtime.v8_isolate().as_mut());
 
+                mem_state.check(js_runtime.v8_isolate().as_mut());
                 mem_state.waker.register(waker);
-
-                trace!(malloced_mb = bytes_to_display(total_malloced_bytes as u64));
             }
 
             // NOTE(Nyannyacha): If tasks are empty or V8 is not evaluating the

From 9cd4943fbd5a196e560c60b90d08ca164c088310 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:38:33 +0000
Subject: [PATCH 17/21] chore: update an integration test case script

---
 crates/base/test_cases/ai-ort-rust-backend/main/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/base/test_cases/ai-ort-rust-backend/main/index.ts b/crates/base/test_cases/ai-ort-rust-backend/main/index.ts
index 9919cd2bf..f570af0f4 100644
--- a/crates/base/test_cases/ai-ort-rust-backend/main/index.ts
+++ b/crates/base/test_cases/ai-ort-rust-backend/main/index.ts
@@ -17,7 +17,7 @@ Deno.serve(async (req: Request) => {
   const servicePath = path.join("test_cases/ai-ort-rust-backend", pathname);
 
   const createWorker = async () => {
-    const memoryLimitMb = 750;
+    const memoryLimitMb = 1500;
     const workerTimeoutMs = 10 * 60 * 1000;
     const cpuTimeSoftLimitMs = 10 * 60 * 1000;
     const cpuTimeHardLimitMs = 10 * 60 * 1000;

From 79ac8f43020df818d3d836ba1ef6a7d14cdbd79c Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:39:33 +0000
Subject: [PATCH 18/21] chore: install tracing subscriber when `base/tracing`
 feature is enabled

---
 crates/base/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/base/src/lib.rs b/crates/base/src/lib.rs
index bbd29a557..2f25b8a8b 100644
--- a/crates/base/src/lib.rs
+++ b/crates/base/src/lib.rs
@@ -15,5 +15,5 @@ pub use inspector_server::InspectorOption;
 pub use sb_core::cache::CacheSetting;
 pub use sb_graph::DecoratorType;
 
-#[cfg(test)]
+#[cfg(any(test, feature = "tracing"))]
 mod tracing_subscriber;

From 9f26703b24eaa73c67be9d8c947f73de8f7a4868 Mon Sep 17 00:00:00 2001
From: Nyannyacha <meow@nnc.gg>
Date: Thu, 7 Nov 2024 13:39:42 +0000
Subject: [PATCH 19/21] chore: update `Cargo.lock`

---
 Cargo.lock | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d8804e9c0..f72ea89a2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1526,7 +1526,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b28bfe653d79bd16c77f659305b195b82bb5ce0c0eb2a4846b82ddbd77586813"
 dependencies = [
  "bitflags 2.5.0",
- "libloading 0.7.4",
+ "libloading 0.8.1",
  "winapi",
 ]
 
@@ -2540,6 +2540,7 @@ dependencies = [
  "log",
  "serde",
  "tokio",
+ "tracing",
  "uuid",
 ]
 
@@ -4515,7 +4516,8 @@ dependencies = [
 [[package]]
 name = "ort"
 version = "2.0.0-rc.2"
-source = "git+https://github.com/pykeio/ort#467d127c5877b099e1d0f605d38b74d221b6121c"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bc80894094c6a875bfac64415ed456fa661081a278a035e22be661305c87e14"
 dependencies = [
  "half",
  "js-sys",
@@ -4530,7 +4532,8 @@ dependencies = [
 [[package]]
 name = "ort-sys"
 version = "2.0.0-rc.2"
-source = "git+https://github.com/pykeio/ort#467d127c5877b099e1d0f605d38b74d221b6121c"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d9c1373fc813d3f024d394f621f4c6dde0734c79b1c17113c3bb5bf0084bbe"
 
 [[package]]
 name = "os_pipe"
@@ -5576,6 +5579,7 @@ dependencies = [
  "ndarray-linalg",
  "once_cell",
  "ort",
+ "ort-sys",
  "rand",
  "reqwest 0.12.4",
  "scopeguard",
@@ -7838,7 +7842,7 @@ dependencies = [
  "js-sys",
  "khronos-egl",
  "libc",
- "libloading 0.7.4",
+ "libloading 0.8.1",
  "log",
  "metal",
  "naga",

From ff43c6ff98dcb9de4cba896bc25ee16e10d3ab48 Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Sat, 9 Nov 2024 16:30:25 +0000
Subject: [PATCH 20/21] stamp: add `docker build` script with shared envs

---
 scripts/build_docker.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100755 scripts/build_docker.sh

diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh
new file mode 100755
index 000000000..eb57f8dc2
--- /dev/null
+++ b/scripts/build_docker.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -e
+
+SCRIPT=$(readlink -f "$0")
+SCRIPTPATH=$(dirname "$SCRIPT")
+
+export $(grep -v '^#' "$SCRIPTPATH/../.env" | xargs)
+
+docker build \
+  --build-arg "GIT_V_TAG=$GIT_V_TAG" \
+  --build-arg "ONNXRUNTIME_VERSION=$ONNXRUNTIME_VERSION" \
+  "$@" \
+  "$SCRIPTPATH/.."

From a9931df61714678ff1c37e06a3390523dac3b61d Mon Sep 17 00:00:00 2001
From: kallebysantos <kalleby_santos@hotmail.com>
Date: Sat, 9 Nov 2024 17:13:28 +0000
Subject: [PATCH 21/21] fix(devcontainer): shared `.env` file path

---
 .devcontainer/devcontainer.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 269452501..9ce392e79 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -13,7 +13,7 @@
   },
   "runArgs": [
     "--env-file",
-    "../.env",
+    ".env",
     "--rm",
     "--privileged",
     "--security-opt",