Skip to content

Commit

Permalink
Add: Rust binary distances
Browse files Browse the repository at this point in the history
Closes #84
  • Loading branch information
ashvardanian committed Mar 4, 2024
1 parent 4f1eba1 commit 960af05
Show file tree
Hide file tree
Showing 6 changed files with 482 additions and 125 deletions.
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ python -m cibuildwheel --platform windows
## Rust

```sh
cargo test
cargo bench
cargo test
cargo bench
open ./target/criterion/report/index.html
```

Expand Down
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ repository = "https://github.com/ashvardanian/SimSIMD"
documentation = "https://docs.rs/simsimd"
homepage = "https://ashvardanian.com/posts/simsimd-faster-scipy"
keywords = ["simd", "search", "linear-algebra", "vector"]
categories = ["mathematics", "hardware-support", "no-std", "wasm", "external-ffi-bindings"]
categories = [
"mathematics",
"hardware-support",
"no-std",
"wasm",
"external-ffi-bindings",
]

[lib]
name = "simsimd"
Expand All @@ -21,6 +27,7 @@ path = "rust/lib.rs"
[build-dependencies]
cc = "1.0.83"


[[bench]]
name = "cosine"
harness = false
Expand All @@ -34,3 +41,4 @@ path = "rust/benches/sqeuclidean.rs"
[dev-dependencies]
criterion = { version = "0.5.1" }
rand = { version = "0.8.5" }
half = { version = "2.4.0" }
69 changes: 61 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,20 +185,73 @@ To install, add the following to your `Cargo.toml`:
simsimd = "..."
```

To use it:
Before using the SimSIMD library, ensure you have imported the necessary traits and types into your Rust source file.
The library provides several traits for different distance/similarity kinds - `SpatialSimilarity`, `BinarySimilarity`, and `ProbabilitySimilarity`.

```rust
use simsimd::{cosine, sqeuclidean};
use simsimd::SpatialSimilarity;

fn main() {
let vector_a = vec![1.0, 2.0, 3.0];
let vector_b = vec![4.0, 5.0, 6.0];
let vector_a: Vec<f32> = vec![1.0, 2.0, 3.0];
let vector_b: Vec<f32> = vec![4.0, 5.0, 6.0];

let distance = cosine(&vector_a, &vector_b);
println!("Cosine Distance: {}", distance);
// Compute the cosine similarity between vector_a and vector_b
let cosine_similarity = f32::cosine(&vector_a, &vector_b)
.expect("Vectors must be of the same length");

let distance = sqeuclidean(&vector_a, &vector_b);
println!("Squared Euclidean Distance: {}", distance);
println!("Cosine Similarity: {}", cosine_similarity);

// Compute the squared Euclidean distance between vector_a and vector_b
let sq_euclidean_distance = f32::sqeuclidean(&vector_a, &vector_b)
.expect("Vectors must be of the same length");

println!("Squared Euclidean Distance: {}", sq_euclidean_distance);
}
```

Similarly, one can compute bit-level distance functions between slices of unsigned integers:

```rust
use simsimd::BinarySimilarity;

fn main() {
let vector_a = &[0b11110000, 0b00001111, 0b10101010];
let vector_b = &[0b11110000, 0b00001111, 0b01010101];

// Compute the Hamming distance between vector_a and vector_b
let hamming_distance = u8::hamming(&vector_a, &vector_b)
.expect("Vectors must be of the same length");

println!("Hamming Distance: {}", hamming_distance);

// Compute the Jaccard distance between vector_a and vector_b
let jaccard_distance = u8::jaccard(&vector_a, &vector_b)
.expect("Vectors must be of the same length");

println!("Jaccard Distance: {}", jaccard_distance);
}
```

Rust has no native support for half-precision floating-point numbers, but SimSIMD provides a `f16` type for this purpose.
It doesn't have any functionality and is a `transparent` wrapper around `u16`, so it can be used with `half`, or any other half-precision library.

```rust
use simsimd::SpatialSimilarity;
use simsimd::f16 as SimF16;
use half::f16 as HalfF16;

fn main() {
let vector_a: Vec<HalfF16> = ...
let vector_b: Vec<HalfF16> = ...

let buffer_a: &[SimF16] = unsafe { std::slice::from_raw_parts(a_half.as_ptr() as *const SimF16, a_half.len()) };
let buffer_b: &[SimF16] = unsafe { std::slice::from_raw_parts(b_half.as_ptr() as *const SimF16, b_half.len()) };

// Compute the cosine similarity between vector_a and vector_b
let cosine_similarity = SimF16::cosine(&vector_a, &vector_b)
.expect("Vectors must be of the same length");

println!("Cosine Similarity: {}", cosine_similarity);
}
```

Expand Down
87 changes: 32 additions & 55 deletions rust/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,62 +7,39 @@ simsimd_capability_t cached_capabilities(void) {
return static_capabilities;
}

simsimd_f32_t cosine_i8(simsimd_i8_t const* a, simsimd_i8_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_cosine_k, simsimd_datatype_i8_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
#define SIMSIMD_METRIC_DECLARATION(name, type) \
simsimd_f32_t name##_##type(simsimd_##type##_t const* a, simsimd_##type##_t const* b, simsimd_size_t d) { \
static simsimd_metric_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##type##_k, cached_capabilities(), \
simsimd_cap_any_k, &metric, &used_capability); \
} \
return metric(a, b, d, d); \
}
return metric(a, b, d, d);
}

simsimd_f32_t cosine_f32(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_cosine_k, simsimd_datatype_f32_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
}
return metric(a, b, d, d);
}
// Spatial distances
SIMSIMD_METRIC_DECLARATION(cosine, i8)
SIMSIMD_METRIC_DECLARATION(cosine, f16)
SIMSIMD_METRIC_DECLARATION(cosine, f32)
SIMSIMD_METRIC_DECLARATION(cosine, f64)
SIMSIMD_METRIC_DECLARATION(inner, i8)
SIMSIMD_METRIC_DECLARATION(inner, f16)
SIMSIMD_METRIC_DECLARATION(inner, f32)
SIMSIMD_METRIC_DECLARATION(inner, f64)
SIMSIMD_METRIC_DECLARATION(sqeuclidean, i8)
SIMSIMD_METRIC_DECLARATION(sqeuclidean, f16)
SIMSIMD_METRIC_DECLARATION(sqeuclidean, f32)
SIMSIMD_METRIC_DECLARATION(sqeuclidean, f64)

simsimd_f32_t inner_i8(simsimd_i8_t const* a, simsimd_i8_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_inner_k, simsimd_datatype_i8_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
}
return metric(a, b, d, d);
}
// Binary distances
SIMSIMD_METRIC_DECLARATION(hamming, b8)
SIMSIMD_METRIC_DECLARATION(jaccard, b8)

simsimd_f32_t inner_f32(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_inner_k, simsimd_datatype_f32_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
}
return metric(a, b, d, d);
}

simsimd_f32_t sqeuclidean_i8(simsimd_i8_t const* a, simsimd_i8_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_sqeuclidean_k, simsimd_datatype_i8_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
}
return metric(a, b, d, d);
}

simsimd_f32_t sqeuclidean_f32(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t d) {
static simsimd_metric_punned_t metric = 0;
if (metric == 0) {
simsimd_capability_t used_capability;
simsimd_find_metric_punned(simsimd_metric_sqeuclidean_k, simsimd_datatype_f32_k, cached_capabilities(),
simsimd_cap_any_k, &metric, &used_capability);
}
return metric(a, b, d, d);
}
// Probability distributions
SIMSIMD_METRIC_DECLARATION(kl, f16)
SIMSIMD_METRIC_DECLARATION(kl, f32)
SIMSIMD_METRIC_DECLARATION(kl, f64)
SIMSIMD_METRIC_DECLARATION(js, f16)
SIMSIMD_METRIC_DECLARATION(js, f32)
SIMSIMD_METRIC_DECLARATION(js, f64)
Loading

0 comments on commit 960af05

Please sign in to comment.