Skip to content

Commit afb6416

Browse files
authored
[WebGPU] Handle device OOM in createBuffer (#17005)
1 parent f044eef commit afb6416

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

web/src/runtime.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ export class Instance implements Disposable {
10141014
private asyncifyHandler: AsyncifyHandler;
10151015
private initProgressCallback: Array<InitProgressCallback> = [];
10161016
private rng: LinearCongruentialGenerator;
1017+
private deviceLostIsError = true; // whether device.lost is due to actual error or dispose()
10171018

10181019
/**
10191020
* Internal function(registered by the runtime)
@@ -1107,11 +1108,14 @@ export class Instance implements Disposable {
11071108
}
11081109

11091110
dispose(): void {
1111+
this.deviceLostIsError = false; // prevent dispose to trigger device.lost error
11101112
// order matters
11111113
// ctx release goes back into lib.
11121114
this.ctx.dispose();
11131115
this.lib.dispose();
1116+
this.deviceLostIsError = true;
11141117
}
1118+
11151119
/**
11161120
* Obtain the runtime information in readable format.
11171121
*/
@@ -2094,6 +2098,17 @@ export class Instance implements Disposable {
20942098
* @param device The given GPU device.
20952099
*/
20962100
initWebGPU(device: GPUDevice): void {
2101+
device.addEventListener("uncapturederror", (event) => {
2102+
console.error("A WebGPU error was not captured: ", event);
2103+
});
2104+
2105+
device.lost.then((info: any) => {
2106+
if (this.deviceLostIsError) {
2107+
console.error("Device lost, calling Instance.dispose(). Please initialize again. ", info);
2108+
this.dispose();
2109+
}
2110+
});
2111+
20972112
const webGPUContext = new WebGPUContext(
20982113
this.memory, device
20992114
);

web/src/webgpu.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,29 @@ export async function detectGPUDevice(): Promise<GPUDeviceDetectOutput | undefin
120120
}
121121
}
122122

123+
/**
124+
* Create GPU buffer with `createBuffer()` but with error catching; destroy if error caught.
125+
* @param device The GPUDevice used to create a buffer.
126+
* @param descriptor The GPUBufferDescriptor passed to `createBuffer()`.
127+
* @returns The buffer created by `createBuffer()`.
128+
*
129+
* @note We treat any error occurred at `createBuffer()` fatal and expect the user to handle
130+
* `device.destroy()` with `device.lost.then()`.
131+
*/
132+
function tryCreateBuffer(device: GPUDevice, descriptor: GPUBufferDescriptor) {
133+
device.pushErrorScope("out-of-memory");
134+
device.pushErrorScope("validation");
135+
device.pushErrorScope("internal");
136+
137+
const buffer = device.createBuffer(descriptor);
138+
139+
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
140+
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
141+
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
142+
143+
return buffer;
144+
}
145+
123146
const canvasRenderWGSL = `
124147
@group(0) @binding(0) var my_sampler : sampler;
125148
@group(0) @binding(1) var my_texture : texture_2d<f32>;
@@ -504,7 +527,7 @@ export class WebGPUContext {
504527

505528
if (buffer == undefined) {
506529
// create uniform buffer
507-
buffer = this.device.createBuffer({
530+
buffer = tryCreateBuffer(this.device, {
508531
size: allocSize,
509532
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
510533
});
@@ -779,7 +802,7 @@ export class WebGPUContext {
779802
if (nbytes == 0) {
780803
nbytes = 1;
781804
}
782-
const buffer = this.device.createBuffer({
805+
const buffer = tryCreateBuffer(this.device, {
783806
size: nbytes,
784807
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
785808
});
@@ -833,7 +856,7 @@ export class WebGPUContext {
833856
nbytes: number
834857
): void {
835858
// Perhaps it would be more useful to resuse a staging buffer?
836-
const gpuTemp = this.device.createBuffer({
859+
const gpuTemp = tryCreateBuffer(this.device, {
837860
size: nbytes,
838861
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
839862
});

0 commit comments

Comments
 (0)