diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f6052dc93c2b..4eaebbb5e8cf 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -45,13 +45,6 @@ jobs: # the login password doesn't matter, but the keyring must be unlocked for the tests to work run: gnome-keyring-daemon --components=secrets --daemonize --unlock <<< 'foobar' - - name: Install UV - run: | - curl -LsSf https://astral.sh/uv/install.sh | sh - - - name: Run download_tokenizer_files.py - run: uv run download_tokenizer_files.py - - name: Set up Rust uses: actions-rs/toolchain@v1 with: diff --git a/.github/workflows/desktop-app-release.yaml b/.github/workflows/desktop-app-release.yaml index c58ebf6b818d..c4e87066e576 100644 --- a/.github/workflows/desktop-app-release.yaml +++ b/.github/workflows/desktop-app-release.yaml @@ -44,14 +44,6 @@ jobs: restore-keys: | ${{ runner.os }}-cargo-build- - # Install UV and download tokenizer files - - name: Install UV - run: | - curl -LsSf https://astral.sh/uv/install.sh | sh - - - name: Run download_tokenizer_files.py - run: uv run download_tokenizer_files.py - # Build Rust Binary - name: Build Release Binary run: cargo build --release @@ -68,7 +60,7 @@ jobs: working-directory: ui/desktop env: CERTIFICATE_OSX_APPLICATION: ${{ secrets.CERTIFICATE_OSX_APPLICATION }} - CERTIFICATE_PASSWORD: ${{ secrets.CERTIFICATE_PASSWORD }} + CERTIFICATE_PASSWORD: ${{ secrets.CERTIFICATE_PASSWORD }} - name: Set up Node.js uses: actions/setup-node@v2 @@ -91,4 +83,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: Goose.zip - path: ui/desktop/out/Goose-darwin-arm64/Goose.zip + path: ui/desktop/out/Goose-darwin-arm64/Goose.zip diff --git a/.gitignore b/.gitignore index cee516b0b299..63a20930b708 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,7 @@ Cargo.lock # UI ./ui/desktop/node_modules ./ui/desktop/out + +# Hermit +/.hermit/ +/bin/ diff --git a/README.md b/README.md index af185d8b9c25..666e789658ee 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ This is the branch for goose 1.0 WIP: which is a port over from python to rust + ## Building ```sh -./download_tokenizer_files.sh cargo build ``` diff --git a/crates/goose/Cargo.toml b/crates/goose/Cargo.toml index 55bf58c3f10a..4b68fd44301c 100644 --- a/crates/goose/Cargo.toml +++ b/crates/goose/Cargo.toml @@ -7,6 +7,10 @@ license.workspace = true repository.workspace = true description.workspace = true +[build-dependencies] +tokio = { version = "1.36", features = ["full"] } +reqwest = { version = "0.11", features = ["json"] } + [dependencies] mcp-core = { path = "../mcp-core" } dirs = "5.0.1" @@ -44,7 +48,11 @@ libc = "=0.2.167" lazy_static = "1.5" kill_tree = "0.2.4" -keyring = { version = "3.6.1", features = ["apple-native", "windows-native", "sync-secret-service"] } +keyring = { version = "3.6.1", features = [ + "apple-native", + "windows-native", + "sync-secret-service", +] } shellexpand = "3.1.0" rust_decimal = "1.36.0" rust_decimal_macros = "1.36.0" diff --git a/crates/goose/build.rs b/crates/goose/build.rs new file mode 100644 index 000000000000..421ad3df5821 --- /dev/null +++ b/crates/goose/build.rs @@ -0,0 +1,58 @@ +use std::error::Error; +use std::fs; +use std::path::Path; + +const BASE_DIR: &str = "../../tokenizer_files"; +const MODELS: &[&str] = &[ + "Xenova/claude-tokenizer", + "Xenova/gemma-2-tokenizer", + "Xenova/gpt-4o", + "Qwen/Qwen2.5-Coder-32B-Instruct", +]; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Create base directory + fs::create_dir_all(BASE_DIR)?; + println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed={}", BASE_DIR); + + for model in MODELS { + download_tokenizer(model).await?; + } + + Ok(()) +} + +async fn download_tokenizer(repo_id: &str) -> Result<(), Box> { + let dir_name = repo_id.replace('/', "--"); + let download_dir = format!("{}/{}", BASE_DIR, dir_name); + let file_url = format!( + "https://huggingface.co/{}/resolve/main/tokenizer.json", + repo_id + ); + let file_path = format!("{}/tokenizer.json", download_dir); + + // Create directory if it doesn't exist + fs::create_dir_all(&download_dir)?; + + // Check if file already exists + if Path::new(&file_path).exists() { + println!("Tokenizer for {} already exists, skipping...", repo_id); + return Ok(()); + } + + println!("Downloading tokenizer for {}...", repo_id); + + // Download the file + let response = reqwest::get(&file_url).await?; + if !response.status().is_success() { + return Err(format!("Failed to download tokenizer for {}", repo_id).into()); + } + + let content = response.bytes().await?; + fs::write(&file_path, content)?; + + println!("Downloaded {} to {}", repo_id, file_path); + Ok(()) +} diff --git a/download_tokenizer_files.py b/download_tokenizer_files.py deleted file mode 100644 index 7c205d45495c..000000000000 --- a/download_tokenizer_files.py +++ /dev/null @@ -1,23 +0,0 @@ -# /// script -# dependencies = [ -# "huggingface_hub" -# ] -# /// - -# Run: `uv run download_tokenizer_files.py` - -from huggingface_hub import hf_hub_download -from pathlib import Path - -BASE_DIR = Path("tokenizer_files") -BASE_DIR.mkdir(parents=True, exist_ok=True) - -for repo_id in [ - "Xenova/gpt-4o", - "Xenova/claude-tokenizer", - "Qwen/Qwen2.5-Coder-32B-Instruct", - "Xenova/gemma-2-tokenizer", -]: - download_dir = BASE_DIR / repo_id.replace("/", "--") - _path = hf_hub_download(repo_id, filename="tokenizer.json", local_dir=download_dir) - print(f"Downloaded {repo_id} to {_path}") diff --git a/download_tokenizers.sh b/download_tokenizers.sh deleted file mode 100755 index b46618d2d01b..000000000000 --- a/download_tokenizers.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# Create base directory for tokenizer files -BASE_DIR="tokenizer_files" -mkdir -p "$BASE_DIR" - -# Function to download a tokenizer file -download_tokenizer() { - local repo_id="$1" - local dir_name="${repo_id//\/--}" # Replace / with -- for directory name - local download_dir="$BASE_DIR/${repo_id//\//--}" # Replace / with -- for directory name, matching Python's replace("/", "--") - local file_url="https://huggingface.co/$repo_id/resolve/main/tokenizer.json" - - mkdir -p "$download_dir" - - # Only download if the file doesn't exist - if [ ! -f "$download_dir/tokenizer.json" ]; then - echo "Downloading tokenizer for $repo_id..." - curl -L "$file_url" -o "$download_dir/tokenizer.json" - if [ $? -eq 0 ]; then - echo "Downloaded $repo_id to $download_dir/tokenizer.json" - else - echo "Failed to download $repo_id tokenizer" - return 1 - fi - else - echo "Tokenizer for $repo_id already exists, skipping..." - fi -} - -# Download tokenizers for each model -download_tokenizer "Xenova/gpt-4o" -download_tokenizer "Xenova/claude-tokenizer" -download_tokenizer "Qwen/Qwen2.5-Coder-32B-Instruct" -download_tokenizer "Xenova/gemma-2-tokenizer" diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 000000000000..02cb8fcb5372 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "stable" +profile = "default"