Skip to content

Commit

Permalink
Add the code for vello_cpu (#830)
Browse files Browse the repository at this point in the history
Doesn't include the tests yet which will come after this PR. But I
verified that the tests pass using this setup.
  • Loading branch information
LaurenzV authored Mar 5, 2025
1 parent c5f91d7 commit 1b6904e
Show file tree
Hide file tree
Showing 8 changed files with 449 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ thiserror = "2.0.11"
# The below crates are experimental!
vello_api = { path = "sparse_strips/vello_api" }
vello_common = { path = "sparse_strips/vello_common" }
vello_cpu = { path = "sparse_strips/vello_cpu" }

# NOTE: Make sure to keep this in sync with the version badge in README.md and vello/README.md
wgpu = { version = "24.0.1" }
Expand Down
3 changes: 3 additions & 0 deletions sparse_strips/vello_cpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ repository.workspace = true
publish = false

[dependencies]
kurbo = { workspace = true }
peniko = { workspace = true }
vello_common = { workspace = true }

[lints]
workspace = true
176 changes: 176 additions & 0 deletions sparse_strips/vello_cpu/src/fine.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2025 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Fine rasterization runs the commands in each wide tile to determine the final RGBA value
//! of each pixel and pack it into the pixmap.
use crate::util::ColorExt;
use vello_common::coarse::{Cmd, WIDE_TILE_WIDTH};
use vello_common::paint::Paint;
use vello_common::strip::STRIP_HEIGHT;

pub(crate) const COLOR_COMPONENTS: usize = 4;
pub(crate) const STRIP_HEIGHT_COMPONENTS: usize = STRIP_HEIGHT * COLOR_COMPONENTS;
pub(crate) const SCRATCH_BUF_SIZE: usize = WIDE_TILE_WIDTH * STRIP_HEIGHT * COLOR_COMPONENTS;

pub(crate) type ScratchBuf = [u8; SCRATCH_BUF_SIZE];

pub(crate) struct Fine<'a> {
pub(crate) width: usize,
pub(crate) height: usize,
pub(crate) out_buf: &'a mut [u8],
pub(crate) scratch: ScratchBuf,
}

impl<'a> Fine<'a> {
pub(crate) fn new(width: usize, height: usize, out_buf: &'a mut [u8]) -> Self {
let scratch = [0; SCRATCH_BUF_SIZE];

Self {
width,
height,
out_buf,
scratch,
}
}

pub(crate) fn clear(&mut self, premul_color: [u8; 4]) {
if premul_color[0] == premul_color[1]
&& premul_color[1] == premul_color[2]
&& premul_color[2] == premul_color[3]
{
// All components are the same, so we can use memset instead.
self.scratch.fill(premul_color[0]);
} else {
for z in self.scratch.chunks_exact_mut(COLOR_COMPONENTS) {
z.copy_from_slice(&premul_color);
}
}
}

pub(crate) fn pack(&mut self, x: usize, y: usize) {
pack(self.out_buf, &self.scratch, self.width, self.height, x, y);
}

pub(crate) fn run_cmd(&mut self, cmd: &Cmd, alphas: &[u32]) {
match cmd {
Cmd::Fill(f) => {
self.fill(f.x as usize, f.width as usize, &f.paint);
}
Cmd::AlphaFill(s) => {
let a_slice = &alphas[s.alpha_ix..];
self.strip(s.x as usize, s.width as usize, a_slice, &s.paint);
}
}
}

pub(crate) fn fill(&mut self, x: usize, width: usize, paint: &Paint) {
match paint {
Paint::Solid(c) => {
let color = c.premultiply().to_rgba8_fast();

let target = &mut self.scratch[x * STRIP_HEIGHT_COMPONENTS..]
[..STRIP_HEIGHT_COMPONENTS * width];

// If color is completely opaque we can just memcopy the colors.
if color[3] == 255 {
for t in target.chunks_exact_mut(COLOR_COMPONENTS) {
t.copy_from_slice(&color);
}

return;
}

fill::src_over(target, &color);
}
_ => unimplemented!(),
}
}

pub(crate) fn strip(&mut self, x: usize, width: usize, alphas: &[u32], paint: &Paint) {
debug_assert!(
alphas.len() >= width,
"alpha buffer doesn't contain sufficient elements"
);

match paint {
Paint::Solid(s) => {
let color = s.premultiply().to_rgba8_fast();

let target = &mut self.scratch[x * STRIP_HEIGHT_COMPONENTS..]
[..STRIP_HEIGHT_COMPONENTS * width];

strip::src_over(target, &color, alphas);
}
_ => unimplemented!(),
}
}
}

fn pack(out_buf: &mut [u8], scratch: &ScratchBuf, width: usize, height: usize, x: usize, y: usize) {
let base_ix = (y * STRIP_HEIGHT * width + x * WIDE_TILE_WIDTH) * COLOR_COMPONENTS;

// Make sure we don't process rows outside the range of the pixmap.
let max_height = (height - y * STRIP_HEIGHT).min(STRIP_HEIGHT);

for j in 0..max_height {
let line_ix = base_ix + j * width * COLOR_COMPONENTS;

// Make sure we don't process columns outside the range of the pixmap.
let max_width = (width - x * WIDE_TILE_WIDTH).min(WIDE_TILE_WIDTH);
let target_len = max_width * COLOR_COMPONENTS;
// This helps the compiler to understand that any access to `dest` cannot
// be out of bounds, and thus saves corresponding checks in the for loop.
let dest = &mut out_buf[line_ix..][..target_len];

for i in 0..max_width {
let src = &scratch[(i * STRIP_HEIGHT + j) * COLOR_COMPONENTS..][..COLOR_COMPONENTS];
dest[i * COLOR_COMPONENTS..][..COLOR_COMPONENTS]
.copy_from_slice(&src[..COLOR_COMPONENTS]);
}
}
}

pub(crate) mod fill {
// See https://www.w3.org/TR/compositing-1/#porterduffcompositingoperators for the
// formulas.

use crate::fine::{COLOR_COMPONENTS, STRIP_HEIGHT_COMPONENTS};
use crate::util::scalar::div_255;

pub(crate) fn src_over(target: &mut [u8], src_c: &[u8; COLOR_COMPONENTS]) {
let src_a = src_c[3] as u16;

for strip in target.chunks_exact_mut(STRIP_HEIGHT_COMPONENTS) {
for bg_c in strip.chunks_exact_mut(COLOR_COMPONENTS) {
for i in 0..COLOR_COMPONENTS {
bg_c[i] = src_c[i] + div_255(bg_c[i] as u16 * (255 - src_a)) as u8;
}
}
}
}
}

pub(crate) mod strip {
use crate::fine::{COLOR_COMPONENTS, STRIP_HEIGHT_COMPONENTS};
use crate::util::scalar::div_255;
use vello_common::strip::STRIP_HEIGHT;

pub(crate) fn src_over(target: &mut [u8], src_c: &[u8; COLOR_COMPONENTS], alphas: &[u32]) {
let src_a = src_c[3] as u16;

for (bg_c, masks) in target.chunks_exact_mut(STRIP_HEIGHT_COMPONENTS).zip(alphas) {
for j in 0..STRIP_HEIGHT {
let mask_a = ((*masks >> (j * 8)) & 0xff) as u16;
let inv_src_a_mask_a = 255 - div_255(mask_a * src_a);

for i in 0..COLOR_COMPONENTS {
let im1 = bg_c[j * COLOR_COMPONENTS + i] as u16 * inv_src_a_mask_a;
let im2 = src_c[i] as u16 * mask_a;
let im3 = div_255(im1 + im2);
bg_c[j * COLOR_COMPONENTS + i] = im3 as u8;
}
}
}
}
}
14 changes: 14 additions & 0 deletions sparse_strips/vello_cpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,17 @@
//! This crate implements a CPU-based renderer, optimized for SIMD and multithreaded execution.
//! It is optimized for CPU-bound workloads and serves as a standalone renderer for systems
//! without GPU acceleration.
#![expect(
clippy::cast_possible_truncation,
reason = "We cast u16s to u8 in various places where we know for sure that it's < 256"
)]

mod pixmap;
mod render;

mod fine;
mod util;

pub use pixmap::Pixmap;
pub use render::RenderContext;
39 changes: 39 additions & 0 deletions sparse_strips/vello_cpu/src/pixmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright 2025 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! A simple pixmap type.
/// A pixmap backed by u8.
#[derive(Debug)]
pub struct Pixmap {
pub(crate) width: u16,
pub(crate) height: u16,
pub(crate) buf: Vec<u8>,
}

impl Pixmap {
/// Create a new pixmap with the given width and height in pixels.
pub fn new(width: u16, height: u16) -> Self {
let buf = vec![0; width as usize * height as usize * 4];
Self { width, height, buf }
}

/// Returns the underlying data as premultiplied RGBA8.
pub fn data(&self) -> &[u8] {
&self.buf
}

/// Convert from premultiplied to separate alpha.
///
/// Not fast, but useful for saving to PNG etc.
pub fn unpremultiply(&mut self) {
for rgba in self.buf.chunks_exact_mut(4) {
let alpha = 255.0 / rgba[3] as f32;
if alpha != 0.0 {
rgba[0] = (rgba[0] as f32 * alpha).round().min(255.0) as u8;
rgba[1] = (rgba[1] as f32 * alpha).round().min(255.0) as u8;
rgba[2] = (rgba[2] as f32 * alpha).round().min(255.0) as u8;
}
}
}
}
Loading

0 comments on commit 1b6904e

Please sign in to comment.