Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 25 additions & 26 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,35 +97,34 @@ jobs:
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

# TODO: investigate slight precision issues in some operations for test-backend-ops on the WebGPU backend.
#ggml-ci-nvidia-webgpu:
# runs-on: [self-hosted, Linux, NVIDIA]
ggml-ci-nvidia-webgpu:
runs-on: [self-hosted, Linux, NVIDIA]

# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

# - name: Dawn Dependency
# id: dawn-depends
# run: |
# DAWN_VERSION="v20260317.182325"
# DAWN_OWNER="google"
# DAWN_REPO="dawn"
# DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
# echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
# curl -L -o artifact.tar.gz \
# "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
# mkdir dawn
# tar -xvf artifact.tar.gz -C dawn --strip-components=1
- name: Dawn Dependency
id: dawn-depends
run: |
DAWN_VERSION="v20260317.182325"
DAWN_OWNER="google"
DAWN_REPO="dawn"
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
curl -L -o artifact.tar.gz \
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
mkdir dawn
tar -xvf artifact.tar.gz -C dawn --strip-components=1

# - name: Test
# id: ggml-ci
# run: |
# GG_BUILD_WEBGPU=1 \
# GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
# GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
- name: Test
id: ggml-ci
run: |
GG_BUILD_WEBGPU=1 \
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

# TODO: provision AMX-compatible machine
#ggml-ci-cpu-amx:
Expand Down
31 changes: 29 additions & 2 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,11 @@ struct test_case {
}

virtual double max_nmse_err(ggml_backend_t backend) {
GGML_UNUSED(backend);
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(ggml_backend_get_device(backend));
// See https://github.com/ggml-org/llama.cpp/pull/22976 for explanation.
if (contains_f16 && strcmp(ggml_backend_reg_name(reg), "WebGPU") == 0) {
return std::max(max_nmse_err(), 1e-6);
}
Comment on lines +1131 to +1135
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may want to reference the change to this PR otherwise future maintainers would wonder why WebGPU has a special case.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added. Can I get a reapproval?

return max_nmse_err();
}

Expand Down Expand Up @@ -1205,6 +1209,18 @@ struct test_case {
std::vector<ggml_tensor *> sentinels;

std::string current_op_name;
bool contains_f16 = false;

// Used by the WebGPU backend to relax error thresholds on ops on f16 tensors
void check_for_f16_tensor(ggml_context * ctx) {
contains_f16 = false;
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
if (t->type == GGML_TYPE_F16) {
contains_f16 = true;
break;
}
}
}

void add_sentinel(ggml_context * ctx) {
if (mode == MODE_PERF || mode == MODE_GRAD || mode == MODE_SUPPORT) {
Expand Down Expand Up @@ -1298,6 +1314,7 @@ struct test_case {

ggml_tensor * out = build_graph(ctx);
current_op_name = op_desc(out);
check_for_f16_tensor(ctx);

if (!matches_filter(out, op_names_filter)) {
//printf(" %s: skipping\n", op_desc(out).c_str());
Expand Down Expand Up @@ -1973,9 +1990,19 @@ struct test_unary : public test_case {
}

void initialize_tensors(ggml_context * ctx) override {
float min = -150.f;
float max = 150.f;

// Keep FP16 exp/expm1 inputs in-range so all backends stay finite instead of
// disagreeing on whether overflow saturates to max-F16 or produces +inf.
if (type == GGML_TYPE_F16 && (op == GGML_UNARY_OP_EXP || op == GGML_UNARY_OP_EXPM1)) {
min = -10.f;
max = 10.f;
}

for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
// test extended range of values to check for NaNs in GELU
init_tensor_uniform(t, -150.f, 150.f);
init_tensor_uniform(t, min, max);
}
}

Expand Down
Loading