|
1 | | -# Licensed to the Apache Software Foundation (ASF) under one |
2 | | -# or more contributor license agreements. See the NOTICE file |
3 | | -# distributed with this work for additional information |
4 | | -# regarding copyright ownership. The ASF licenses this file |
5 | | -# to you under the Apache License, Version 2.0 (the |
6 | | -# "License"); you may not use this file except in compliance |
7 | | -# with the License. You may obtain a copy of the License at |
8 | | -# |
9 | | -# http://www.apache.org/licenses/LICENSE-2.0 |
10 | | -# |
11 | | -# Unless required by applicable law or agreed to in writing, |
12 | | -# software distributed under the License is distributed on an |
13 | | -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | -# KIND, either express or implied. See the License for the |
15 | | -# specific language governing permissions and limitations |
16 | | -# under the License. |
17 | | - |
18 | | -""" Compute and schedule for adaptive_avg_pool1d slice op |
19 | | -
|
20 | | -Following are few notes and assumptions made by the implementation: |
21 | | -
|
22 | | -Assumptions: |
23 | | -1) The input is in NCW layout. Distilbert is the only model that calls |
24 | | - nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'. |
25 | | -2) The op takes output_size as an argument and |
26 | | - only handles the specialized case where output_size is 1. |
27 | | - The argument output_size is used as the value of output_width. |
28 | | -3) Both input and output dtype is uint8/int8 and |
29 | | - quantization parameter is provided to the op. |
30 | | -4) Input is assumed to always be multiple of fixed chunk 32c64w. |
31 | | -
|
32 | | -Notes: |
33 | | -1) If input width is used as output width, there can be two cases: |
34 | | - a. If the quantization parameters of input and output are same, |
35 | | - it can return the input as output so the op will be a no-op. |
36 | | - b. If the quantization parameters of input and output are different, |
37 | | - it will essentially be a requantize op. |
38 | | -2) If output_size is a value besides 1 or input_width, |
39 | | - adaptive_avg_pool1d may use dynamic stride and kernel for each output element. |
40 | | - When this case occurs, kernel won't be known at compile time. We want to use |
41 | | - the generic implementation nn.adaptive_avg_pool1d() for this case. |
42 | | -""" |
43 | | - |
44 | | -from tvm import te |
45 | | -from tvm import tir |
46 | | -from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate |
47 | | - |
48 | | - |
49 | | -def adaptive_avg_pool1d( |
50 | | - data: te.Tensor, |
51 | | - output_size: list, |
52 | | - odtype: str, |
53 | | - input_zero_point: int, |
54 | | - input_scale: float, |
55 | | - output_zero_point: int, |
56 | | - output_scale: float, |
57 | | -): |
58 | | - """adaptive_avg_pool1d compute""" |
59 | | - _, _, inw = data.shape |
60 | | - |
61 | | - out_width = output_size[0] |
62 | | - |
63 | | - n, c = data.shape[:2] |
64 | | - oshape = (n, c) + (out_width,) |
65 | | - |
66 | | - # Kernel is same as input_width since output_width is assumed to be 1 |
67 | | - if out_width == 1: |
68 | | - kw_r = inw |
69 | | - else: |
70 | | - raise RuntimeError(f"Unsupported output_size, {out_width}'") |
71 | | - |
72 | | - if odtype == "uint8": |
73 | | - temp_dtype = "uint32" |
74 | | - elif odtype == "int8": |
75 | | - temp_dtype = "int32" |
76 | | - else: |
77 | | - raise RuntimeError(f"Unsupported output dtype, {odtype}'") |
78 | | - |
79 | | - scale_with_area = input_scale / (output_scale * int(kw_r)) |
80 | | - scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") |
81 | | - corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point |
82 | | - |
83 | | - rw_r = te.reduce_axis((0, kw_r), name="rw_r") |
84 | | - |
85 | | - sum_compute = te.compute( |
86 | | - oshape, |
87 | | - lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]), |
88 | | - name="sum", |
89 | | - ) |
90 | | - |
91 | | - avg_compute = te.compute( |
92 | | - oshape, |
93 | | - lambda n, c, w: saturate( |
94 | | - ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype |
95 | | - ).astype(odtype), |
96 | | - name="adaptive_avg_1d", |
97 | | - ) |
98 | | - return avg_compute |
99 | | - |
100 | | - |
101 | | -def stir_schedule_ncw_32c64w(outs, ins, input_layout: str): |
102 | | - """Schedule for input layout ncw-32c64w and output layout ncw""" |
103 | | - func = te.create_prim_func([ins, outs]) |
104 | | - s = tir.Schedule(func) |
105 | | - |
106 | | - sum_block = s.get_block("sum") |
107 | | - |
108 | | - # Input is multiple of fixed chunk but output is NxCx1 |
109 | | - # Hence transform_layout is only applied on input |
110 | | - input_transformed_layout = get_layout_transform_fn(input_layout) |
111 | | - s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) |
112 | | - |
113 | | - return s |
114 | | - |
115 | | - |
116 | | -def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str): |
117 | | - """STIR based schedule""" |
118 | | - if output_layout == "ncw": |
119 | | - return stir_schedule_ncw_32c64w(outs, ins, input_layout) |
120 | | - raise RuntimeError(f"Unexpected layout '{output_layout}'") |
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | + |
| 18 | +""" Compute and schedule for adaptive_avg_pool1d slice op |
| 19 | +
|
| 20 | +Following are few notes and assumptions made by the implementation: |
| 21 | +
|
| 22 | +Assumptions: |
| 23 | +1) The input is in NCW layout. Distilbert is the only model that calls |
| 24 | + nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'. |
| 25 | +2) The op takes output_size as an argument and |
| 26 | + only handles the specialized case where output_size is 1. |
| 27 | + The argument output_size is used as the value of output_width. |
| 28 | +3) Both input and output dtype is uint8/int8 and |
| 29 | + quantization parameter is provided to the op. |
| 30 | +4) Input is assumed to always be multiple of fixed chunk 32c64w. |
| 31 | +
|
| 32 | +Notes: |
| 33 | +1) If input width is used as output width, there can be two cases: |
| 34 | + a. If the quantization parameters of input and output are same, |
| 35 | + it can return the input as output so the op will be a no-op. |
| 36 | + b. If the quantization parameters of input and output are different, |
| 37 | + it will essentially be a requantize op. |
| 38 | +2) If output_size is a value besides 1 or input_width, |
| 39 | + adaptive_avg_pool1d may use dynamic stride and kernel for each output element. |
| 40 | + When this case occurs, kernel won't be known at compile time. We want to use |
| 41 | + the generic implementation nn.adaptive_avg_pool1d() for this case. |
| 42 | +""" |
| 43 | + |
| 44 | +from tvm import te |
| 45 | +from tvm import tir |
| 46 | +from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate |
| 47 | + |
| 48 | + |
| 49 | +def adaptive_avg_pool1d( |
| 50 | + data: te.Tensor, |
| 51 | + output_size: list, |
| 52 | + odtype: str, |
| 53 | + input_zero_point: int, |
| 54 | + input_scale: float, |
| 55 | + output_zero_point: int, |
| 56 | + output_scale: float, |
| 57 | +): |
| 58 | + """adaptive_avg_pool1d compute""" |
| 59 | + _, _, inw = data.shape |
| 60 | + |
| 61 | + out_width = output_size[0] |
| 62 | + |
| 63 | + n, c = data.shape[:2] |
| 64 | + oshape = (n, c) + (out_width,) |
| 65 | + |
| 66 | + # Kernel is same as input_width since output_width is assumed to be 1 |
| 67 | + if out_width == 1: |
| 68 | + kw_r = inw |
| 69 | + else: |
| 70 | + raise RuntimeError(f"Unsupported output_size, {out_width}'") |
| 71 | + |
| 72 | + if odtype == "uint8": |
| 73 | + temp_dtype = "uint32" |
| 74 | + elif odtype == "int8": |
| 75 | + temp_dtype = "int32" |
| 76 | + else: |
| 77 | + raise RuntimeError(f"Unsupported output dtype, {odtype}'") |
| 78 | + |
| 79 | + scale_with_area = input_scale / (output_scale * int(kw_r)) |
| 80 | + scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") |
| 81 | + corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point |
| 82 | + |
| 83 | + rw_r = te.reduce_axis((0, kw_r), name="rw_r") |
| 84 | + |
| 85 | + sum_compute = te.compute( |
| 86 | + oshape, |
| 87 | + lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]), |
| 88 | + name="sum", |
| 89 | + ) |
| 90 | + |
| 91 | + avg_compute = te.compute( |
| 92 | + oshape, |
| 93 | + lambda n, c, w: saturate( |
| 94 | + ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype |
| 95 | + ).astype(odtype), |
| 96 | + name="adaptive_avg_1d", |
| 97 | + ) |
| 98 | + return avg_compute |
| 99 | + |
| 100 | + |
| 101 | +def stir_schedule_ncw_32c64w(outs, ins, input_layout: str): |
| 102 | + """Schedule for input layout ncw-32c64w and output layout ncw""" |
| 103 | + func = te.create_prim_func([ins, outs]) |
| 104 | + s = tir.Schedule(func) |
| 105 | + |
| 106 | + sum_block = s.get_block("sum") |
| 107 | + |
| 108 | + # Input is multiple of fixed chunk but output is NxCx1 |
| 109 | + # Hence transform_layout is only applied on input |
| 110 | + input_transformed_layout = get_layout_transform_fn(input_layout) |
| 111 | + s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) |
| 112 | + |
| 113 | + return s |
| 114 | + |
| 115 | + |
| 116 | +def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str): |
| 117 | + """STIR based schedule""" |
| 118 | + if output_layout == "ncw": |
| 119 | + return stir_schedule_ncw_32c64w(outs, ins, input_layout) |
| 120 | + raise RuntimeError(f"Unexpected layout '{output_layout}'") |
0 commit comments