Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mistralrs-core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
text_models_inputs_processor::PagedAttentionMeta,
AdapterInstruction, CacheBackendMetadata, CacheInstruction, NormalCache,
},
prefix_cacher_v2::PrefixCacheManagerV2,
prefix_cacher::PrefixCacheManagerV2,
request::{DetokenizationRequest, NormalRequest, TokenizationRequest},
response::CompletionChoice,
scheduler::{Scheduler, SchedulerOutput},
Expand Down
1 change: 0 additions & 1 deletion mistralrs-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ mod diffusion_models;
pub mod distributed;
mod pipeline;
mod prefix_cacher;
mod prefix_cacher_v2;
mod request;
mod response;
mod sampler;
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/amoe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
amoe::{AnyMoeConfig, AnyMoeTrainingInputRow, AnyMoeTrainingInputs, AnyMoeTrainingResult},
device_map::DeviceMapper,
get_mut_arcmutex,
prefix_cacher_v2::PrefixCacheManagerV2,
prefix_cacher::PrefixCacheManagerV2,
sampler::Sampler,
sequence::{SeqStepType, Sequence, SequenceGroup, SequenceRecognizer},
utils::progress::NiceProgressBar,
Expand Down
6 changes: 5 additions & 1 deletion mistralrs-core/src/pipeline/cache_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,11 @@ impl RotatingCache {
}

pub fn set_len(&mut self, len: usize) {
self.current_seq_len = len % self.max_seq_len;
if len < self.max_seq_len {
self.current_seq_len = len % self.max_seq_len;
} else {
// No change.
}
}

pub fn append(&mut self, src: &Tensor) -> Result<Tensor> {
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/diffusion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::device_map::DeviceMapper;
use crate::diffusion_models::processor::{DiffusionProcessor, ModelInputs};
use crate::paged_attention::AttentionImplementation;
use crate::pipeline::ChatTemplate;
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
use crate::sequence::Sequence;
use crate::utils::varbuilder_utils::DeviceForLoadTensor;
use crate::utils::{tokens::get_token, varbuilder_utils::from_mmaped_safetensors};
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/ggml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::pipeline::get_chat_template;
use crate::pipeline::inputs_processor::DEFAULT_PROMPT_CHUNK_SIZE;
use crate::pipeline::sampling::sample_and_add_toks;
use crate::pipeline::{ChatTemplate, LocalModelPaths};
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
use crate::sequence::Sequence;
use crate::utils::debug::DeviceRepr;
use crate::utils::model_config as ModelConfig;
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/gguf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::pipeline::inputs_processor::DEFAULT_PROMPT_CHUNK_SIZE;
use crate::pipeline::loaders::DeviceMappedModelLoader;
use crate::pipeline::sampling::sample_and_add_toks;
use crate::pipeline::ChatTemplate;
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
use crate::sequence::Sequence;
use crate::utils::gguf_metadata::{ContentConfig, GgufDeviceMapLoaderInner};
use crate::utils::model_config as ModelConfig;
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub use super::diffusion_models::DiffusionGenerationParams;
use crate::amoe::{AnyMoeConfig, AnyMoeExpertType, AnyMoeTrainingInputs, AnyMoeTrainingResult};
use crate::device_map::DeviceMapper;
use crate::paged_attention::{CacheConfig, CacheEngine, ModelConfigLike};
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
pub use amoe::{AnyMoeLoader, AnyMoePipeline};
use chat_template::ChatTemplate;
pub use diffusion::{DiffusionLoader, DiffusionLoaderBuilder, DiffusionSpecificConfig};
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/normal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::pipeline::isq::UqffFullSer;
use crate::pipeline::sampling::sample_and_add_toks;
use crate::pipeline::text_models_inputs_processor::make_prompt_chunk;
use crate::pipeline::{ChatTemplate, LocalModelPaths};
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
use crate::sequence::Sequence;
use crate::utils::tokenizer::get_tokenizer;
use crate::utils::varbuilder_utils::DeviceForLoadTensor;
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/sampling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use candle_core::{DType, Device, Result, Tensor};
use rand_isaac::Isaac64Rng;

use crate::{
prefix_cacher_v2::PrefixCacheManagerV2,
prefix_cacher::PrefixCacheManagerV2,
sampler::Logprobs,
sequence::{Sequence, SequenceRecognizer, StopReason},
tools::parse_text_tools,
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/speculative.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
},
AdapterInstruction,
},
prefix_cacher_v2::PrefixCacheManagerV2,
prefix_cacher::PrefixCacheManagerV2,
sequence::{Sequence, SequenceRecognizer},
DeviceMapSetting, Loader, ModelKind, PagedAttentionConfig, Pipeline, TokenSource, TryIntoDType,
};
Expand Down
2 changes: 1 addition & 1 deletion mistralrs-core/src/pipeline/vision.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::pipeline::llg::build_tok_env;
use crate::pipeline::sampling::sample_and_add_toks;
use crate::pipeline::text_models_inputs_processor::make_prompt_chunk;
use crate::pipeline::{get_chat_template, ChatTemplate, IsqOrganization, LocalModelPaths};
use crate::prefix_cacher_v2::PrefixCacheManagerV2;
use crate::prefix_cacher::PrefixCacheManagerV2;
use crate::sequence::Sequence;
use crate::utils::tokenizer::get_tokenizer;
use crate::utils::varbuilder_utils::DeviceForLoadTensor;
Expand Down
Loading
Loading