Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
akshayballal95 committed Aug 30, 2024
2 parents e531ce7 + c02b7c3 commit bec57df
Show file tree
Hide file tree
Showing 115 changed files with 9,427 additions and 253 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest] # For now, only test on Linux
steps:
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

- name: Install Rust
uses: actions-rs/toolchain@v1
Expand Down Expand Up @@ -65,4 +65,4 @@ jobs:
working-directory: ./candle-pyo3
run: |
source .env/bin/activate
python -m pytest -s -v tests
python -m pytest -s -v tests
12 changes: 6 additions & 6 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
on:
on:
push:
branches:
branches:
- main
pull_request:

Expand All @@ -15,7 +15,7 @@ jobs:
os: [ubuntu-latest, windows-latest, macOS-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: actions-rs/toolchain@v1
with:
profile: minimal
Expand All @@ -34,7 +34,7 @@ jobs:
os: [ubuntu-latest, windows-latest, macOS-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: actions-rs/toolchain@v1
with:
profile: minimal
Expand All @@ -49,7 +49,7 @@ jobs:
name: Rustfmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: actions-rs/toolchain@v1
with:
profile: minimal
Expand All @@ -65,7 +65,7 @@ jobs:
name: Clippy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: actions-rs/toolchain@v1
with:
profile: minimal
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,6 @@ candle-wasm-examples/*/package-lock.json
candle-wasm-examples/**/config*.json
.DS_Store
.idea/*
__pycache__
out.safetensors
out.wav
22 changes: 11 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ exclude = [
resolver = "2"

[workspace.package]
version = "0.6.0"
version = "0.6.1"
edition = "2021"
description = "Minimalist ML framework."
repository = "https://github.com/huggingface/candle"
Expand All @@ -33,23 +33,23 @@ ab_glyph = "0.2.23"
accelerate-src = { version = "0.3.2" }
anyhow = { version = "1", features = ["backtrace"] }
byteorder = "1.4.3"
candle = { path = "./candle-core", package = "candle-core", version = "0.6.0" }
candle-datasets = { path = "./candle-datasets", version = "0.6.0" }
candle-flash-attn = { path = "./candle-flash-attn", version = "0.6.0" }
candle-kernels = { path = "./candle-kernels", version = "0.6.0" }
candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.6.0" }
candle-nn = { path = "./candle-nn", version = "0.6.0" }
candle-onnx = { path = "./candle-onnx", version = "0.6.0" }
candle-transformers = { path = "./candle-transformers", version = "0.6.0" }
candle = { path = "./candle-core", package = "candle-core", version = "0.6.1" }
candle-datasets = { path = "./candle-datasets", version = "0.6.1" }
candle-flash-attn = { path = "./candle-flash-attn", version = "0.6.1" }
candle-kernels = { path = "./candle-kernels", version = "0.6.1" }
candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.6.1" }
candle-nn = { path = "./candle-nn", version = "0.6.1" }
candle-onnx = { path = "./candle-onnx", version = "0.6.1" }
candle-transformers = { path = "./candle-transformers", version = "0.6.1" }
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "=0.11.6", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
cudarc = { version = "0.12.0", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
fancy-regex = "0.13.0"
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
hf-hub = "0.3.0"
half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"] }
hound = "3.5.1"
image = { version = "0.25.0", default-features = false, features = ["jpeg", "png"] }
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
imageproc = { version = "0.24.0", default-features = false }
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
libc = { version = "0.2.147" }
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ We also provide a some command line based examples using state of the art models
- [LLaMA v1, v2, and v3](./candle-examples/examples/llama/): general LLM, includes
the SOLAR-10.7B variant.
- [Falcon](./candle-examples/examples/falcon/): general LLM.
- [Gemma](./candle-examples/examples/gemma/): 2b and 7b general LLMs from Google Deepmind.
- [Codegeex4](./candle-examples/examples/codegeex4-9b/): Code completion,code interpreter,web search,fuction calling,repository-level
- [GLM4](./candle-examples/examples/glm4/): Open Multilingual Multimodal Chat LMs by THUDM
- [Gemma v1 and v2](./candle-examples/examples/gemma/): 2b and 7b+/9b general LLMs from Google Deepmind.
- [RecurrentGemma](./candle-examples/examples/recurrent-gemma/): 2b and 7b
Griffin based models from Google that mix attention with a RNN like state.
- [Phi-1, Phi-1.5, Phi-2, and Phi-3](./candle-examples/examples/phi/): 1.3b,
Expand Down Expand Up @@ -118,6 +120,8 @@ We also provide a some command line based examples using state of the art models
model using residual vector quantization.
- [MetaVoice](./candle-examples/examples/metavoice/): foundational model for
text-to-speech.
- [Parler-TTS](./candle-examples/examples/parler-tts/): large text-to-speech
model.
- [T5](./candle-examples/examples/t5), [Bert](./candle-examples/examples/bert/),
[JinaBert](./candle-examples/examples/jina-bert/) : useful for sentence embeddings.
- [DINOv2](./candle-examples/examples/dinov2/): computer vision model trained
Expand Down Expand Up @@ -206,7 +210,7 @@ If you have an addition to this list, please submit a pull request.
- StarCoder, StarCoder2.
- Phi 1, 1.5, 2, and 3.
- Mamba, Minimal Mamba
- Gemma 2b and 7b.
- Gemma v1 2b and 7b+, v2 2b and 9b.
- Mistral 7b v0.1.
- Mixtral 8x7b v0.1.
- StableLM-3B-4E1T, StableLM-2-1.6B, Stable-Code-3B.
Expand Down Expand Up @@ -234,9 +238,10 @@ If you have an addition to this list, please submit a pull request.
- Whisper, multi-lingual speech-to-text.
- EnCodec, audio compression model.
- MetaVoice-1B, text-to-speech model.
- Parler-TTS, text-to-speech model.
- Computer Vision Models.
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
ConvNeXTv2, MobileOne, EfficientVit (MSRA), MobileNetv4.
ConvNeXTv2, MobileOne, EfficientVit (MSRA), MobileNetv4, Hiera, FastViT.
- yolo-v3, yolo-v8.
- Segment-Anything Model (SAM).
- SegFormer.
Expand Down
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/affine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_affine_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name:
let m = 1024;
let k = 1024;

let tensor = Tensor::zeros((b, m, k), dtype, &device).unwrap();
let tensor = Tensor::zeros((b, m, k), dtype, device).unwrap();

let flops = b * m * k * dtype.size_in_bytes();

Expand Down
4 changes: 2 additions & 2 deletions candle-core/benches/benchmarks/qmatmul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;

fn run(matmul: &QMatMul, x: &Tensor) {
matmul.forward(&x).unwrap();
matmul.forward(x).unwrap();
}

fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
Expand Down Expand Up @@ -50,7 +50,7 @@ fn run_bench(c: &mut Criterion, device: &Device, dtype: GgmlDType) {
fn criterion_benchmark(c: &mut Criterion) {
let handler = BenchDeviceHandler::new().unwrap();
for device in handler.devices {
for dtype in vec![
for dtype in [
GgmlDType::F32,
GgmlDType::F16,
GgmlDType::Q4_0,
Expand Down
2 changes: 1 addition & 1 deletion candle-core/benches/benchmarks/unary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn run_unary_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &
let m = 1024;
let k = 1024;

let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, &device)
let tensor = Tensor::arange(0.0f32, (b * m * k) as f32, device)
.unwrap()
.to_dtype(dtype)
.unwrap()
Expand Down
6 changes: 3 additions & 3 deletions candle-core/benches/benchmarks/where_cond.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ const SIZE: usize = B * M * K;
const DATA: [u8; SIZE] = create_cond_arr::<SIZE>();

fn run_where_cond_benchmark(c: &mut Criterion, device: &Device, dtype: DType, name: &str) {
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), &device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, &device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, &device).unwrap();
let tensor = Tensor::from_slice(DATA.as_slice(), (B, M, K), device).unwrap();
let on_true = Tensor::ones((B, M, K), dtype, device).unwrap();
let on_false = Tensor::zeros((B, M, K), dtype, device).unwrap();

let elements = B * M * K;
// E.g. 2 f32 tensors + 1 u8 tensor
Expand Down
9 changes: 7 additions & 2 deletions candle-core/src/backprop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,9 +623,9 @@ impl Tensor {
}
Op::Unary(arg, UnaryOp::Silu) => {
let sum_grad = grads.or_insert(arg)?;
// d/dx silu = sigmoid(x) * (1 + x * (1 - sigmoid(x)))
// d/dx silu = sigmoid(x) * (1 + x * (1 - sigmoid(x))) = sigmoid(x) * (1 - node) + node
let sigmoid_arg = (arg.neg()?.exp()? + 1.)?.recip()?;
let silu_grad = (&sigmoid_arg * (1. + (arg * (1. - &sigmoid_arg)?)?)?)?;
let silu_grad = &sigmoid_arg * (1. - *node) + *node;
*sum_grad = sum_grad.add(&(&grad * silu_grad)?)?
}
Op::Elu(arg, alpha) => {
Expand Down Expand Up @@ -756,4 +756,9 @@ impl GradStore {
};
Ok(grad)
}

/// Get the tensor ids of the stored gradient tensors
pub fn get_ids(&self) -> impl Iterator<Item = &TensorId> {
self.0.keys()
}
}
4 changes: 2 additions & 2 deletions candle-core/src/cuda_backend/cudnn.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::WithDType;
use cudarc;
use cudarc::cudnn::safe::{Conv2dForward, Cudnn};
use cudarc::cudnn::safe::{ConvForward, Cudnn};
use cudarc::driver::{CudaSlice, CudaView, DeviceRepr, ValidAsZeroBits};
use std::cell::RefCell;
use std::collections::HashMap;
Expand Down Expand Up @@ -87,7 +87,7 @@ pub(crate) fn launch_conv2d<
cudarc::cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
[params.b_size as i32, params.c_out as i32, h_out, w_out],
)?;
let conv2d = Conv2dForward {
let conv2d = ConvForward {
conv: &conv,
x: &x,
w: &w,
Expand Down
2 changes: 2 additions & 0 deletions candle-core/src/cuda_backend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ impl Map1 for Im2Col1D {
}
}

#[allow(unused)]
struct Im2Col {
h_k: usize,
w_k: usize,
Expand All @@ -183,6 +184,7 @@ struct Im2Col {
}

impl Im2Col {
#[allow(unused)]
fn hw_out(&self, h: usize, w: usize) -> (usize, usize) {
let h_out = (h + 2 * self.padding - self.dilation * (self.h_k - 1) - 1) / self.stride + 1;
let w_out = (w + 2 * self.padding - self.dilation * (self.w_k - 1) - 1) / self.stride + 1;
Expand Down
16 changes: 16 additions & 0 deletions candle-core/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,22 @@ impl Device {
matches!(self, Self::Metal(_))
}

pub fn supports_bf16(&self) -> bool {
match self {
Self::Cuda(_) => true,
Self::Metal(_) | Self::Cpu => false,
}
}

/// Return `BF16` for devices that support it, otherwise default to `F32`.
pub fn bf16_default_to_f32(&self) -> DType {
if self.supports_bf16() {
DType::BF16
} else {
DType::F32
}
}

pub fn cuda_if_available(ordinal: usize) -> Result<Self> {
if crate::utils::cuda_is_available() {
Self::new_cuda(ordinal)
Expand Down
Loading

0 comments on commit bec57df

Please sign in to comment.