fixed correctness for both depthToSpace and new packed matmul

migrating this to the new repo...
microsoft · May 11, 2021 · 4c5d979 · 4c5d979
1 parent d2dc883
commit 4c5d979
Show file tree

Hide file tree

Showing 7 changed files with 302 additions and 41 deletions.
diff --git a/benchmark/super_resolution_model_zoo/karma.conf.js b/benchmark/super_resolution_model_zoo/karma.conf.js
@@ -59,6 +59,7 @@ module.exports = function(config) {
       printMatches: false,
       // To enable pack, run 'PACK=1 npm run test'
       usePackedGlTexture: config.usePackedGlTexture==1 ? true : false,
+      runIteration: config.runIteration ? config.runIteration : 10,
       profile: config.profile
     },
     browsers: ['ChromeTest', 'ChromeDebug', 'Edge', 'Safari'],

diff --git a/benchmark/super_resolution_model_zoo/package.json b/benchmark/super_resolution_model_zoo/package.json
@@ -6,8 +6,8 @@
   "scripts": {
     "build": "webpack --config ./webpack.conf.js --mode production",
     "build-debug": "webpack --config ./webpack.conf.js --mode development",
-    "test": "karma start --browsers ChromeTest --single-run --usePackedGlTexture=$PACK",
-    "profile": "karma start --browsers ChromeTest --single-run --profile --usePackedGlTexture=$PACK",
+    "test": "karma start --browsers ChromeTest --single-run --usePackedGlTexture=$PACK --runIteration=$RUNCOUNT",
+    "profile": "karma start --browsers ChromeTest --single-run --profile --usePackedGlTexture=$PACK --runIteration=$RUNCOUNT",
     "test-debug": "karma start --browsers ChromeDebug",
     "test-edge": "karma start --browsers Edge --single-run",
     "test-safari": "karma start --browsers Safari --single-run"

diff --git a/benchmark/super_resolution_model_zoo/src/index.js b/benchmark/super_resolution_model_zoo/src/index.js
@@ -99,9 +99,9 @@ async function runBenchmark(benchmarkData, backend, imageSize) {
     const imageLoader = new ImageLoader(imageSize, imageSize);
     const durations = [];
     for(const input of benchmarkData.inputs) {
-        console.log(`Running ${input.name}`)
+        console.log(`Running ${input.name} for ${runIteration} iterations.`)
         const imageData = await imageLoader.getImageData(input.url);
-        for(let i = 0 ; i < 10; i++) {
+        for(let i = 0 ; i < runIteration; i++) {
           const outputData = await impl.runModel(imageData.data);
           durations.push(impl.duration);
         }
@@ -125,6 +125,7 @@ class TensorFlowResnetBenchmark {
         this.imageSize = imageSize;
         tf.disposeVariables();
         tf.env().set('WEBGL_PACK', pack_texture);
+
         console.log(`Pack mode enabled: ${tf.env().getBool('WEBGL_PACK')}`);
         if(backend) {
             console.log(`Setting the backend to ${backend}`);
@@ -261,6 +262,7 @@ const results = [];
 const browser = __karma__.config.browser[0];
 const profile = __karma__.config.profile;
 const pack_texture = __karma__.config.usePackedGlTexture;
+const runIteration = __karma__.config.runIteration;
 
 console.log(`browser: ${browser}`)
 describe('super resolution Tests', ()=> {

diff --git a/lib/backends/webgl/ops/conv-pack.ts b/lib/backends/webgl/ops/conv-pack.ts
@@ -15,7 +15,7 @@ import {WebGLReshapePacked} from './reshape-packed';
 export class WebGLConvPacked extends Conv {
   protected artifacts: Artifact[];
   protected programInfo: ProgramInfo[];
-
+  protected outputShape: number[];
   run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] {
     const programManager = inferenceHandler.session.programManager;
     const xshape = inputs[0].dims.slice();
@@ -33,8 +33,8 @@ export class WebGLConvPacked extends Conv {
         `autpPad:${this.autoPad}, dilations:${this.dilations}, group:${this.group}, kernelShape:${
             this.kernelShape}, pads:${this.pads}, strides:${this.strides}`);
 
-    const outputShape = WebGLConv.calcOutputShape(xshape, kshape, this.dilations, this.pads, this.strides);
-    const im2col = new WebGLIm2ColPacked(outputShape, kshape, this.dilations, this.pads, this.strides);
+    this.outputShape = WebGLConv.calcOutputShape(xshape, kshape, this.dilations, this.pads, this.strides);
+    const im2col = new WebGLIm2ColPacked(this.outputShape, kshape, this.dilations, this.pads, this.strides);
     const matmul = new WebGLMatMulPacked();
     const reshape = new WebGLReshapePacked();
     // shape for kernel reshape
@@ -76,11 +76,10 @@ export class WebGLConvPacked extends Conv {
     inferenceHandler.checkAndUpdateTextureForm(this.artifacts[2], runDataMatmul);
     programManager.run(this.artifacts[2], runDataMatmul);
     const matmulOutput = runDataMatmul.outputTextureData.tensor;
-
     // reshape output
     const outputShapeTensor = new Tensor(
-        [outputShape.length], 'int32', undefined, undefined,
-        new Int32Array([outputShape[0], outputShape[1], outputShape[2], outputShape[3]]));
+        [this.outputShape.length], 'int32', undefined, undefined,
+        new Int32Array([this.outputShape[0], this.outputShape[1], this.outputShape[2], this.outputShape[3]]));
 
     assert(this.artifacts.length > 2, () => 'expect at least 3 artifacts created');
     if (this.artifacts.length === 3) {

diff --git a/lib/backends/webgl/ops/matmul-pack.ts b/lib/backends/webgl/ops/matmul-pack.ts
@@ -4,51 +4,62 @@
 import {MatMul} from '../../../ops/matmul';
 import {Tensor} from '../../../tensor';
 import {BroadcastUtil} from '../../../util';
+import {getGlsl} from '../glsl-source';
 import {WebGLInferenceHandler} from '../inference-handler';
 import {ProgramInfo, RunData, WebGLOperator} from '../types';
+import {getCoordsDataType} from '../utils';
 
 export class WebGLMatMulPacked extends MatMul implements WebGLOperator {
   run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] {
     return inferenceHandler.run(this, inputs);
   }
   createProgramInfo(handler: WebGLInferenceHandler, inputs: Tensor[]): ProgramInfo {
     const hasBias = inputs.length > 2;
-    const processBias = hasBias ? `value += vec4(getBias(a[0]*2).xx, getBias(a[0]*2).yy);` : ``;
+    const processBias = hasBias ? 'result += getBiasAtOutCoords();' : '';
     const aShape = inputs[0].dims;
     const bShape = inputs[1].dims;
     const outputShape = BroadcastUtil.calcShape(aShape, bShape, true);
 
     if (!outputShape) {
       throw new Error('Can\'t use matmul on the given tensors');
     }
-    const rank = outputShape.length;
+
+    const sharedDim = aShape[aShape.length - 1];
+    const sharedDimIndex = Math.ceil(sharedDim / 2);
+
     const aRank = aShape.length;
     const bRank = bShape.length;
-    const sharedDim = aShape[aShape.length - 1];
-    // TODO:fix broadcasting
+
+    const glsl = getGlsl(handler.session.backend.glContext.version);
+    const coordsDataType = getCoordsDataType(outputShape.length);
+    const allGlChannels = ['x', 'y', 'z', 'w', 'u', 'v'];
+
     const shaderSource = `
-      vec4 process(int indices[${rank}]) {
-          int a[${aRank}];
-          int b[${bRank}];
-          bcastMatmulIndices_A(indices, a);
-          bcastMatmulIndices_B(indices, b);
+    void main() {
+      ${coordsDataType} rc = getOutputCoords();
+
+      vec4 result = vec4(0);
+
+      for (int i = 0; i < ${sharedDimIndex}; i++) {
+        vec4 a = getA(${getA(allGlChannels, aRank)});
+        vec4 b = getB(${getB(allGlChannels, bRank)});
+
+        result += (a.rrbb * b.rgrg);
+        result += (a.ggaa * b.baba);
+      }
+
+      ${processBias}
+
+      ${glsl.output} = result;
+    }`;
 
-          vec4 value;
-          for (int k=0; k<((${sharedDim}+1)/2); ++k) {
-              a[${aRank - 1}] = k;
-              b[${bRank - 2}] = k;
-              value += ${getA(aRank)}.rrbb * ${getB(bRank)}.rgrg;
-              value += ${getA(aRank)}.ggaa * ${getB(bRank)}.baba;
-          }
-          ${processBias}
-          return value;
-      }`;
     return {
       inputLayouts: inputs.map((t, i) => handler.getOrCreateTextureLayout(t, 4, true, inputs[i].dims, true)),
       outputLayout:
           handler.createTextureLayoutFromShape(outputShape, 4, outputShape, {isPacked: true, reverseWH: true}),
       samplers: hasBias ? ['A', 'B', 'Bias'] : ['A', 'B'],
       shaderSource,
+      hasMain: true,
       expectPackedInputs: true,
       expectPackedOutputs: true,
     };
@@ -64,22 +75,22 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator {
   }
 }
 
-function getA(outputRank: number): string {
-  let res = 'getA(';
-  for (let i = 0; i < outputRank - 2; i++) {
-    res += `a[${i}], `;
+function getA(allGlChannels: string[], rank: number): string {
+  let res = '';
+  for (let i = 0; i < rank - 2; i++) {
+    res += `rc.${allGlChannels[i]}, `;
   }
-  res += `a[${outputRank - 2}]*2, ` +
-      'k*2)';
+  res += `rc.${allGlChannels[rank - 2]}, ` +
+      'i<<1';
   return res;
 }
 
-function getB(outputRank: number): string {
-  let res = 'getB(';
-  for (let i = 0; i < outputRank - 2; i++) {
-    res += `b[${i}], `;
+function getB(allGlChannels: string[], rank: number): string {
+  let res = '';
+  for (let i = 0; i < rank - 2; i++) {
+    res += `rc.${allGlChannels[i]}, `;
   }
-  res += 'k*2, ' +
-      `b[${outputRank - 1}]*2)`;
+  res += 'i<<1, ' +
+      `rc.${allGlChannels[rank - 1]}`;
   return res;
 }