-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[microTVM] Use QNN schedules to give SOTA performance #13752
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cf8536e
476426d
e2ed946
5c67f47
434f4c5
ca7a0e0
24c5029
d9a3659
85344cf
c5507d3
3b33ae3
c69a742
ea8c9f4
06c1c8a
622ece4
2649051
866efc8
bf86d9f
698f6e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,9 +21,55 @@ | |
| regular/depthwise conv2d is supported, but qnn_dense will be added eventually.""" | ||
|
|
||
| from tvm import topi, TVMError | ||
| from .generic import qnn_conv2d_strategy | ||
| from tvm.topi.utils import get_const_tuple | ||
| from ... import op as _op | ||
| from ...op.strategy.generic import is_depthwise_conv2d | ||
| from .generic import ( | ||
| qnn_conv2d_strategy, | ||
| qnn_dense_strategy, | ||
| qnn_dequantize_strategy, | ||
| qnn_quantize_strategy, | ||
| wrap_compute_dequantize, | ||
| wrap_compute_quantize, | ||
| wrap_topi_qnn_dense, | ||
| wrap_topi_schedule, | ||
| ) | ||
|
|
||
|
|
||
| @qnn_quantize_strategy.register("arm_cpu") | ||
| def qnn_quantize_strategy_arm_cpu(_attrs, _inputs, _out_type, _target): | ||
| """qnn.quantize strategy for arm_cpu""" | ||
| strategy = _op.OpStrategy() | ||
| strategy.add_implementation( | ||
| wrap_compute_quantize(topi.hexagon.qnn_quantize), | ||
| wrap_topi_schedule(topi.hexagon.schedule_qnn_quantize), | ||
| name="qnn_quantize.arm_cpu", | ||
| ) | ||
| return strategy | ||
|
|
||
|
|
||
| @qnn_dequantize_strategy.register("arm_cpu") | ||
| def qnn_dequantize_strategy_arm_cpu(_attrs, _inputs, _out_type, _target): | ||
| """qnn.dequantize strategy for arm_cpu""" | ||
| strategy = _op.OpStrategy() | ||
| strategy.add_implementation( | ||
| wrap_compute_dequantize(topi.hexagon.qnn_dequantize), | ||
| wrap_topi_schedule(topi.hexagon.schedule_qnn_dequantize), | ||
| name="qnn_dequantize.arm_cpu", | ||
| ) | ||
| return strategy | ||
|
|
||
|
|
||
| @qnn_dense_strategy.register("arm_cpu") | ||
| def qnn_dense_strategy_arm_cpu(_attrs, _inputs, _out_type, _target): | ||
| """qnn.dense strategy for arm_cpu""" | ||
| strategy = _op.OpStrategy() | ||
| strategy.add_implementation( | ||
| wrap_topi_qnn_dense(topi.hexagon.qnn_dense), | ||
| wrap_topi_schedule(topi.hexagon.schedule_qnn_dense), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I see you reuse compute/schedule from Hexagon. These schedules are not optimized and have very naive implementation. Is it acceptable for you?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fine for the time being. I know @mkatanbaf is working on a Cortex-M schedule for |
||
| name="qnn_dense.arm_cpu", | ||
| ) | ||
| return strategy | ||
|
|
||
|
|
||
| @qnn_conv2d_strategy.register("arm_cpu") | ||
|
|
@@ -59,13 +105,28 @@ def qnn_conv2d_strategy_arm_cpu(attrs, inputs, _out_type, target): | |
| topi.arm_cpu.schedule_qnn_conv2d, | ||
| name="qnn_conv2d.arm_cpu", | ||
| ) | ||
| else: | ||
| raise TVMError("QNN regular Conv2D for Arm Cortex-M DSP got incorrect input layout!") | ||
| elif is_depthwise_conv2d(data.shape, data_layout, kernel.shape, kernel_layout, groups): | ||
| if data_layout == "NCHW" and kernel_layout == "IOHW": | ||
| strategy.add_implementation( | ||
| topi.arm_cpu.qnn_depthwise_conv2d, | ||
| topi.arm_cpu.schedule_qnn_depthwise_conv2d, | ||
| name="qnn_depthwise_conv2d.arm_cpu", | ||
| ) | ||
| height, width = data.shape[2:] | ||
| y_stride, x_stride = get_const_tuple(attrs.strides) | ||
| if height * width * y_stride % 2 == 0: | ||
| strategy.add_implementation( | ||
| topi.arm_cpu.qnn_depthwise_conv2d, | ||
| topi.arm_cpu.schedule_qnn_depthwise_conv2d, | ||
| name="qnn_depthwise_conv2d.arm_cpu", | ||
| ) | ||
| elif y_stride == x_stride == 1: | ||
| strategy.add_implementation( | ||
| topi.arm_cpu.qnn_unrolled_depthwise_conv2d, | ||
| topi.arm_cpu.schedule_qnn_unrolled_depthwise_conv2d, | ||
| name="qnn_unrolled_depthwise_conv2d.arm_cpu", | ||
| ) | ||
| else: | ||
| raise TVMError("No QNN depthwise Conv2D Cortex-M schedule supports these params!") | ||
| else: | ||
| raise TVMError("QNN depthwise Conv2D for Arm Cortex-M DSP got incorrect input layout!") | ||
| else: | ||
| raise TVMError("No Arm Cortex-M DSP strategy exists for generic group qnn.conv2d") | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.