Skip to content

Commit 2a1cc09

Browse files
authored
[ETHOSN] Per-channel int8 quantization for conv2d (#10131)
1 parent 8133048 commit 2a1cc09

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

tests/python/contrib/test_ethosn/infrastructure.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,18 @@ def get_conv2d(var, shape, dtype):
316316
def get_conv2d_qnn_params(
317317
dtype, input_zp, input_sc, kernel_zp, kernel_sc, kernel_h, kernel_w, channels
318318
):
319+
kernel_sc = (
320+
kernel_sc.numpy() if isinstance(kernel_sc, tvm.runtime.ndarray.NDArray) else [kernel_sc]
321+
)
319322
dtype_min = np.iinfo(dtype).min
320323
dtype_max = np.iinfo(dtype).max
324+
321325
input_max = input_sc * (dtype_max - input_zp)
322326
input_min = input_sc * (dtype_min - input_zp)
323-
kernel_max = kernel_sc * (dtype_max - kernel_zp)
324-
kernel_min = kernel_sc * (dtype_min - kernel_zp)
327+
328+
kernel_max = max(kernel_sc) * (dtype_max - kernel_zp)
329+
kernel_min = min(kernel_sc) * (dtype_min - kernel_zp)
330+
325331
output_limits = [
326332
kernel_max * kernel_h * kernel_w * channels * input_max,
327333
kernel_min * kernel_h * kernel_w * channels * input_max,
@@ -330,6 +336,7 @@ def get_conv2d_qnn_params(
330336
]
331337
output_max = max(output_limits)
332338
output_min = min(output_limits)
339+
333340
output_sc = (output_max - output_min) / (dtype_max - dtype_min)
334341
output_zp = int(dtype_min - (output_min / output_sc))
335342
return output_zp, output_sc

tests/python/contrib/test_ethosn/test_conv2d.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,13 @@ def _get_model(
106106
)
107107
biasc = relay.const(b, "int32")
108108
bias = relay.nn.bias_add(conv, biasc, axis=3)
109+
if isinstance(kernel_sc, tvm.runtime.ndarray.NDArray):
110+
req_input_sc = [sc * input_sc for sc in kernel_sc.numpy()]
111+
else:
112+
req_input_sc = input_sc * kernel_sc
109113
req = relay.qnn.op.requantize(
110114
bias,
111-
relay.const(input_sc * kernel_sc, "float32"), # input zero scale
115+
relay.const(req_input_sc, "float32"), # input zero scale
112116
relay.const(0, "int32"), # input zero point
113117
relay.const(output_sc, "float32"), # output zero scale
114118
relay.const(output_zp, "int32"), # output zero point
@@ -123,22 +127,25 @@ def _get_model(
123127
@pytest.mark.parametrize("dtype", ["uint8", "int8"])
124128
def test_conv2d(dtype, depthwise):
125129
trials = [
126-
[(1, 17, 20, 26), 4, 3, 1, "attr", (2, 2), (1, 1)],
127-
[(1, 30, 27, 30), 5, 5, 3, "none", (1, 1), (1, 1)],
128-
[(1, 14, 28, 11), 6, 2, 2, "op", (2, 2), (1, 1)],
129-
[(1, 9, 20, 30), 7, 1, 5, "none", (1, 1), (1, 1)],
130-
[(1, 21, 21, 22), 8, 5, 1, "attr", (2, 2), (1, 1)],
131-
[(1, 21, 25, 29), 9, 2, 5, "op", (1, 1), (1, 1)],
132-
[(1, 31, 28, 15), 10, 1, 2, "attr", (2, 2), (1, 1)],
133-
[(1, 21, 21, 8), 11, 3, 3, "none", (1, 1), (1, 1)],
134-
[(1, 5, 11, 6), 12, 5, 2, "op", (2, 2), (1, 1)],
135-
[(1, 12, 7, 18), 13, 1, 3, "op", (1, 1), (1, 1)],
136-
[(1, 24, 6, 26), 14, 3, 5, "none", (2, 2), (1, 1)],
137-
[(1, 19, 24, 16), 15, 2, 1, "attr", (1, 1), (1, 1)],
130+
[(1, 17, 20, 26), 4, 3, 1, "attr", (2, 2), (1, 1), False],
131+
[(1, 30, 27, 30), 5, 5, 3, "none", (1, 1), (1, 1), False],
132+
[(1, 30, 27, 30), 5, 5, 3, "none", (1, 1), (1, 1), dtype == "int8"],
133+
[(1, 14, 28, 11), 6, 2, 2, "op", (2, 2), (1, 1), False],
134+
[(1, 9, 20, 30), 7, 1, 5, "none", (1, 1), (1, 1), False],
135+
[(1, 21, 21, 22), 8, 5, 1, "attr", (2, 2), (1, 1), False],
136+
[(1, 21, 21, 22), 8, 5, 1, "attr", (2, 2), (1, 1), dtype == "int8"],
137+
[(1, 21, 25, 29), 9, 2, 5, "op", (1, 1), (1, 1), False],
138+
[(1, 21, 25, 29), 9, 2, 5, "op", (1, 1), (1, 1), dtype == "int8"],
139+
[(1, 31, 28, 15), 10, 1, 2, "attr", (2, 2), (1, 1), False],
140+
[(1, 21, 21, 8), 11, 3, 3, "none", (1, 1), (1, 1), False],
141+
[(1, 5, 11, 6), 12, 5, 2, "op", (2, 2), (1, 1), False],
142+
[(1, 12, 7, 18), 13, 1, 3, "op", (1, 1), (1, 1), False],
143+
[(1, 24, 6, 26), 14, 3, 5, "none", (2, 2), (1, 1), False],
144+
[(1, 19, 24, 16), 15, 2, 1, "attr", (1, 1), (1, 1), False],
138145
]
139146

140147
np.random.seed(0)
141-
for shape, out_channels, kernel_h, kernel_w, pad, stride, dilation in trials:
148+
for shape, out_channels, kernel_h, kernel_w, pad, stride, dilation, qnn_per_channel in trials:
142149
if depthwise:
143150
out_channels = shape[3]
144151
groups = out_channels
@@ -162,7 +169,12 @@ def test_conv2d(dtype, depthwise):
162169
}
163170
input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
164171
input_sc = np.random.random() * 2
165-
kernel_sc = np.random.random() * 2
172+
if qnn_per_channel:
173+
kernel_sc = tvm.nd.array(
174+
np.random.uniform(low=0, high=2, size=(out_channels,)).astype(np.float32)
175+
)
176+
else:
177+
kernel_sc = np.random.random() * 2
166178
kernel_zp = (
167179
0 if dtype == "int8" else np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
168180
)

0 commit comments

Comments
 (0)