diff --git a/Cargo.lock b/Cargo.lock index 0715155a..59165c59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,17 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -121,7 +132,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.13", + "syn", "which", ] @@ -137,6 +148,12 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" +[[package]] +name = "bytesize" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38fcc2979eff34a4b84e1cf9a1e3da42a7d44b3b690a40cdcb23e3d556cfb2e5" + [[package]] name = "cc" version = "1.0.79" @@ -205,7 +222,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] @@ -238,6 +255,17 @@ dependencies = [ "owo-colors", ] +[[package]] +name = "colored" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" +dependencies = [ + "atty", + "lazy_static", + "winapi", +] + [[package]] name = "concolor-override" version = "1.0.0" @@ -492,6 +520,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.2.6" @@ -642,6 +679,7 @@ name = "llm-cli" version = "0.1.0" dependencies = [ "bincode", + "bytesize", "clap", "color-eyre", "env_logger", @@ -650,7 +688,7 @@ dependencies = [ "num_cpus", "rand", "rustyline", - "spinners", + "spinoff", "zstd", ] @@ -705,12 +743,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - [[package]] name = "memchr" version = "2.5.0" @@ -818,6 +850,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7924d1d0ad836f665c9065e26d016c673ece3993f30d340068b16f282afc1156" +[[package]] +name = "paste" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -843,7 +881,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "syn 2.0.13", + "syn", ] [[package]] @@ -1014,12 +1052,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "rustversion" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" - [[package]] name = "rustyline" version = "11.0.0" @@ -1081,7 +1113,7 @@ checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] @@ -1108,14 +1140,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] -name = "spinners" -version = "4.1.0" +name = "spinoff" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08615eea740067d9899969bc2891c68a19c315cb1f66640af9a9ecb91b13bcab" +checksum = "fee259f96b31e7a18657d11741fe30d63f98e07de70e7a19d2b705ab9b331cdc" dependencies = [ - "lazy_static", - "maplit", - "strum", + "colored", + "once_cell", + "paste", ] [[package]] @@ -1136,39 +1168,6 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.13" @@ -1206,7 +1205,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] diff --git a/binaries/llm-cli/Cargo.toml b/binaries/llm-cli/Cargo.toml index d52084f3..683cffbb 100644 --- a/binaries/llm-cli/Cargo.toml +++ b/binaries/llm-cli/Cargo.toml @@ -14,10 +14,11 @@ log = { workspace = true } rand = { workspace = true } bincode = "1.3.3" +bytesize = "1.1" env_logger = "0.10.0" num_cpus = "1.15.0" rustyline = "11.0.0" -spinners = "4.1.0" +spinoff = { version = "0.7.0", default-features = false, features = ["dots2"] } clap = { version = "4.1.8", features = ["derive"] } color-eyre = { version = "0.6.2", default-features = false } diff --git a/binaries/llm-cli/src/cli_args.rs b/binaries/llm-cli/src/cli_args.rs index 47c1581f..275d08b9 100644 --- a/binaries/llm-cli/src/cli_args.rs +++ b/binaries/llm-cli/src/cli_args.rs @@ -311,58 +311,61 @@ impl ModelLoad { ..Default::default() }; + let mut sp = Some(spinoff::Spinner::new( + spinoff::spinners::Dots2, + "Loading model...", + None, + )); let now = std::time::Instant::now(); - let model = llm::load::( - &self.model_path, - !self.no_mmap, - params, - load_progress_handler_log, - ) - .wrap_err("Could not load model")?; - - log::info!( - "Model fully loaded! Elapsed: {}ms", - now.elapsed().as_millis() - ); + let model = + llm::load::( + &self.model_path, + !self.no_mmap, + params, + move |progress| match progress { + LoadProgress::HyperparametersLoaded => { + if let Some(sp) = sp.as_mut() { + sp.update_text("Loaded hyperparameters") + }; + } + LoadProgress::ContextSize { bytes } => log::debug!( + "ggml ctx size = {}", + bytesize::to_string(bytes as u64, false) + ), + LoadProgress::TensorLoaded { + current_tensor, + tensor_count, + .. + } => { + if let Some(sp) = sp.as_mut() { + sp.update_text(format!( + "Loaded tensor {}/{}", + current_tensor + 1, + tensor_count + )); + }; + } + LoadProgress::Loaded { + file_size, + tensor_count, + } => { + if let Some(sp) = sp.take() { + sp.success(&format!( + "Loaded {tensor_count} tensors ({}) after {}ms", + bytesize::to_string(file_size, false), + now.elapsed().as_millis() + )); + }; + } + }, + ) + .wrap_err("Could not load model")?; Ok(Box::new(model)) } } -pub(crate) fn load_progress_handler_log(progress: LoadProgress) { - match progress { - LoadProgress::HyperparametersLoaded => { - log::debug!("Loaded hyperparameters") - } - LoadProgress::ContextSize { bytes } => log::info!( - "ggml ctx size = {:.2} MB\n", - bytes as f64 / (1024.0 * 1024.0) - ), - LoadProgress::TensorLoaded { - current_tensor, - tensor_count, - .. - } => { - let current_tensor = current_tensor + 1; - if current_tensor % 8 == 0 { - log::info!("Loaded tensor {current_tensor}/{tensor_count}"); - } - } - LoadProgress::Loaded { - byte_size, - tensor_count, - } => { - log::info!("Loading of model complete"); - log::info!( - "Model size = {:.2} MB / num tensors = {}", - byte_size as f64 / 1024.0 / 1024.0, - tensor_count - ); - } - } -} - #[derive(Parser, Debug)] pub struct PromptFile { /// A file to read the prompt from. diff --git a/binaries/llm-cli/src/main.rs b/binaries/llm-cli/src/main.rs index e29c9db2..132c643b 100644 --- a/binaries/llm-cli/src/main.rs +++ b/binaries/llm-cli/src/main.rs @@ -96,8 +96,9 @@ fn infer(args: &cli_args::Infer) -> Result<()> { fn info(args: &cli_args::Info) -> Result<()> { let file = File::open(&args.model_path)?; let mut reader = BufReader::new(&file); - let mut loader: llm::Loader = - llm::Loader::new(cli_args::load_progress_handler_log); + let mut loader: llm::Loader = llm::Loader::new(|_| { + // We purposely do not print progress here, as we are only interested in the metadata + }); llm::ggml_format::load(&mut reader, &mut loader)?; @@ -192,7 +193,7 @@ fn interactive( .map(|pf| process_prompt(pf, &line)) .unwrap_or(line); - let mut sp = spinners::Spinner::new(spinners::Spinners::Dots2, "".to_string()); + let sp = spinoff::Spinner::new(spinoff::spinners::Dots2, "".to_string(), None); if let Err(InferenceError::ContextFull) = session.feed_prompt::( model.as_ref(), &inference_params, @@ -201,7 +202,7 @@ fn interactive( ) { log::error!("Prompt exceeds context window length.") }; - sp.stop(); + sp.clear(); let res = session.infer_with_params::( model.as_ref(), diff --git a/crates/llm-base/src/loader.rs b/crates/llm-base/src/loader.rs index 831bd0cc..f30e1ae8 100644 --- a/crates/llm-base/src/loader.rs +++ b/crates/llm-base/src/loader.rs @@ -102,7 +102,7 @@ pub enum LoadProgress { /// A model part has finished fully loading. Loaded { /// The number of bytes in the part. - byte_size: usize, + file_size: u64, /// The number of tensors in the part. tensor_count: usize, }, @@ -331,11 +331,14 @@ pub fn load( (load_progress_callback)(LoadProgress::ContextSize { bytes: ctx_size }); let context = Context::init(ctx_size, !use_mmap); - let mmap = if use_mmap { + let (mmap, file_size) = { let file = File::open(path)?; - Some(unsafe { Mmap::map(&file)? }) - } else { - None + let mmap = if use_mmap { + Some(unsafe { Mmap::map(&file)? }) + } else { + None + }; + (mmap, file.metadata()?.len()) }; struct MmapCompatibleLoader<'a> { @@ -434,7 +437,7 @@ pub fn load( let model = KnownModel::new(hyperparameters, params, vocabulary, tl)?; (load_progress_callback)(LoadProgress::Loaded { - byte_size: 0, + file_size, tensor_count: tensors_len, }); @@ -527,7 +530,7 @@ pub fn load_progress_callback_stdout(progress: LoadProgress) { } } LoadProgress::Loaded { - byte_size, + file_size: byte_size, tensor_count, } => { println!("Loading of model complete"); diff --git a/crates/models/llama/src/old_loader.rs b/crates/models/llama/src/old_loader.rs index a9639862..7fbbd38f 100644 --- a/crates/models/llama/src/old_loader.rs +++ b/crates/models/llama/src/old_loader.rs @@ -230,7 +230,7 @@ fn load_weights_ggmf_or_unversioned( // Skip metadata part_reader.seek(SeekFrom::Start(file_offset))?; - let mut total_size = 0; + let mut total_size = 0u64; let mut n_tensors = 0; // Load weights @@ -279,7 +279,7 @@ fn load_weights_ggmf_or_unversioned( part_reader.seek(SeekFrom::Current(tensor.nbytes() as i64))?; } - total_size += tensor.nbytes(); + total_size += tensor.nbytes() as u64; } else { if (nelements * bpe) / ggml::blck_size(tensor.get_type()) != tensor.nbytes() / n_parts @@ -328,7 +328,7 @@ fn load_weights_ggmf_or_unversioned( } } - total_size += tensor.nbytes() / n_parts; + total_size += (tensor.nbytes() / n_parts) as u64; } n_tensors += 1; @@ -339,7 +339,7 @@ fn load_weights_ggmf_or_unversioned( } load_progress_callback(LoadProgress::Loaded { - byte_size: total_size, + file_size: total_size, tensor_count: n_tensors.try_into()?, }); } @@ -534,7 +534,7 @@ fn load_weights_ggjt( } load_progress_callback(LoadProgress::Loaded { - byte_size: total_loaded_bytes as usize, + file_size: total_loaded_bytes, tensor_count: loop_i, });