rustformers · danforbes · Apr 15, 2023 · Apr 15, 2023 · Apr 16, 2023 · Apr 16, 2023
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,44 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug example 'llama_inference'",
+      "cargo": {
+        "args": [
+          "build",
+          "--example=llama_inference",
+          "--package=llama"
+        ],
+        "filter": {
+          "name": "llama_inference",
+          "kind": "example"
+        }
+      },
+      "args": ["${env:HOME}/.ggml-models/gpt4all-7b.bin"],
+      "cwd": "${workspaceFolder}"
+    },
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug example 'bloom_inference'",
+      "cargo": {
+        "args": [
+          "build",
+          "--example=bloom_inference",
+          "--package=bloom"
+        ],
+        "filter": {
+          "name": "bloom_inference",
+          "kind": "example"
+        }
+      },
+      "args": ["${env:HOME}/.ggml-models/bloom-7b.bin"],
+      "cwd": "${workspaceFolder}"
+    }
+  ]
+}
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,10 +1,16 @@
 [workspace]
 members = [
+    # Crates
     "ggml-sys",
-    "ggml",
     "ggml-format",
-    "llama-rs",
-    "llama-cli",
+    "ggml",
+    "llm-base",
+    "llama",
+    "bloom",
+    "llm",
+    "llm-cli",
+
+    # Tools
     "generate-ggml-bindings"
 ]
 resolver = "2"
@@ -13,4 +19,7 @@ resolver = "2"
 version = "0.1.0"
 
 [workspace.dependencies]
+bytemuck = "1.13.1"
+log = "0.4"
 rand = "0.8.5"
+serde = { version = "1.0", features = ["derive"] }
diff --git a/README.md b/README.md
@@ -30,10 +30,11 @@ performance as the original code.
 
 Make sure you have a Rust 1.65.0 or above and C toolchain[^1] set up.
 
-`llama-rs` is a Rust library, while `llama-cli` is a CLI application that wraps
-`llama-rs` and offers basic inference capabilities.
+`llm-base`, `bloom`, and `llama` are Rust libraries, while `bloom-cli` and
+`llama-cli` are a CLI applications that wrap `bloom` and `llama`, respectively,
+and offer basic inference capabilities.
 
-The following instructions explain how to build `llama-cli`.
+The following instructions explain how to build the CLI applications.
 
 **NOTE**: For best results, make sure to build and run in release mode.
 Debug builds are going to be very slow.
@@ -43,33 +44,34 @@ Debug builds are going to be very slow.
 Run
 
 ```shell
-cargo install --git https://github.com/rustformers/llama-rs llama-cli
+cargo install --git https://github.com/rustformers/llama-rs bloom-cli llama-cli
 ```
 
-to install `llama-cli` to your Cargo `bin` directory, which `rustup` is likely to
-have added to your `PATH`.
+to install `bloom-cli` and `llama-cli` to your Cargo `bin` directory, which
+`rustup` is likely to have added to your `PATH`.
 
-It can then be run through `llama-cli`.
+The CLI applications can then be run through `bloom-cli` and `llama-cli`, respectively.
 
 ### Building from repository
 
 Clone the repository, and then build it through
 
 ```shell
-cargo build --release --bin llama-cli
+cargo build --release
 ```
 
-The resulting binary will be at `target/release/llama-cli[.exe]`.
+The resulting binaries will be at `target/release/bloom-cli[.exe]` and
+`target/release/llama-cli[.exe]`, respectively.
 
-It can also be run directly through Cargo, using
+They can also be run directly through Cargo, using
 
 ```shell
-cargo run --release --bin llama-cli -- <ARGS>
+cargo run --release --bin {bloom,llama}-cli -- <ARGS>
 ```
 
 This is useful for development.
 
-### Getting the weights
+### Getting LLaMA weights
 
 In order to run the inference code in `llama-rs`, a copy of the model's weights
 are required.
@@ -107,6 +109,14 @@ cargo run -p llama-cli quantize /path/to/your/models/7B/ggml-model-f16.bin /path
 > The [llama.cpp repository](https://github.com/ggerganov/llama.cpp) has
 > additional information on how to obtain and run specific models.
 
+### BLOOM
+
+The open-source [BLOOM](https://bigscience.huggingface.co/blog/bloom) model is
+also supported.
+[More information](https://huggingface.co/docs/transformers/model_doc/bloom)
+about BLOOM is available on HuggingFace, as are some
+[quantized models](https://huggingface.co/models?search=bloom%20ggml).
+
 _Support for other open source models is currently planned. For models where
 weights can be legally distributed, this section will be updated with scripts to
 make the install process as user-friendly as possible. Due to the model's legal

diff --git a/bloom/Cargo.toml b/bloom/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "bloom"
+version = { workspace = true }
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ggml = { path = "../ggml" }
+llm-base = { path = "../llm-base" }
+
+bytemuck = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
diff --git a/bloom/examples/bloom_inference.rs b/bloom/examples/bloom_inference.rs
@@ -0,0 +1,33 @@
+use std::{convert::Infallible, env::args, io::Write};
+
+use llm_base::{snapshot, LoadError};
+
+extern crate bloom;
+
+fn main() -> Result<(), LoadError> {
+    let args: Vec<String> = args().collect();
+    let bloom = bloom::Bloom::load(&args[1], true, 32, |_| {})?;
+    let (mut session, _) = snapshot::read_or_create_session(
+        &bloom,
+        Default::default(),
+        Default::default(),
+        Default::default(),
+    );
+
+    let _ = session.inference_with_prompt::<Infallible>(
+        &bloom,
+        &Default::default(),
+        "The best kind of wine is ",
+        Some(32),
+        &mut rand::thread_rng(),
+        |t| {
+            print!("{t}");
+            std::io::stdout().flush().unwrap();
+
+            Ok(())
+        },
+    );
+
+    println!();
+    Ok(())
+}