File tree Expand file tree Collapse file tree 4 files changed +23
-11
lines changed Expand file tree Collapse file tree 4 files changed +23
-11
lines changed Original file line number Diff line number Diff line change @@ -16,6 +16,7 @@ def define_common_targets():
1616 compatible_with = ["ovr_config//cpu:xtensa" ],
1717 visibility = [
1818 "//executorch/backends/cadence/..." ,
19+ "@EXECUTORCH_CLIENTS" ,
1920 ],
2021 exported_deps = [
2122 "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib_common" ,
Original file line number Diff line number Diff line change 1212
1313#define ALIGN_PTR (x, bytes ) ((((unsigned )(x)) + (bytes - 1 )) & (~(bytes - 1 )))
1414
15- using Tensor = executorch::aten::Tensor;
16- using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
17- using ScalarType = executorch::aten::ScalarType;
18- using ::executorch::aten::IntArrayRef;
19-
2015namespace cadence {
2116namespace impl {
2217namespace HiFi {
2318namespace native {
2419
20+ using ::cadence::impl::HiFi::kernels::quantize;
21+ using ::executorch::aten::IntArrayRef;
22+ using ::executorch::aten::ScalarType;
23+ using ::executorch::aten::Tensor;
24+ using ::torch::executor::KernelRuntimeContext;
25+
2526// This implements a generic 2d conv kernel that operates on raw pointers.
2627// The version handles both quantized and fp32 convolutions.
2728// The input is of shape [n x c x h x w]
@@ -145,7 +146,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
145146 if (quantized) {
146147 float val = bias_scale * acc;
147148 out_plane[_oh * ow + _ow] =
148- kernels:: quantize<OT>(val, inv_out_scale, out_zero_point);
149+ quantize<OT>(val, inv_out_scale, out_zero_point);
149150 } else {
150151 out_plane[_oh * ow + _ow] = acc;
151152 }
Original file line number Diff line number Diff line change 77 */
88
99#include < executorch/backends/cadence/reference/kernels/kernels.h>
10- #include < math.h>
1110#include < algorithm>
11+ #include < cmath>
1212#include < cstring>
1313#include < limits>
14+
1415namespace impl {
1516namespace reference {
1617namespace kernels {
1718
1819// Quantize a fp32 value to an int8_t/uint8_t value
1920template <typename T>
2021T quantize (const float x, float scale, int32_t zero_point) {
21- constexpr float min_val = std::numeric_limits<T>::min ();
22- constexpr float max_val = std::numeric_limits<T>::max ();
23- float tmp = roundf (x * scale + zero_point);
24- return std::max (std::min (tmp, max_val), min_val);
22+ // constexpr float min_val = std::numeric_limits<T>::min();
23+ // constexpr float max_val = std::numeric_limits<T>::max();
24+ // float tmp = roundf(x * scale + zero_point);
25+ // return std::max(std::min(tmp, max_val), min_val);
26+ // Match Executorch CPU kernel implementation at
27+ // https://fburl.com/code/fxizw6u6
28+ int64_t qvalue;
29+ qvalue = static_cast <int64_t >(zero_point + std::nearbyint (scale * x));
30+
31+ qvalue = std::max<int64_t >(qvalue, std::numeric_limits<T>::min ());
32+ qvalue = std::min<int64_t >(qvalue, std::numeric_limits<T>::max ());
33+ return static_cast <T>(qvalue);
2534}
2635
2736// Quantize an fp32 array to an int8_t/uint8_t array
Original file line number Diff line number Diff line change @@ -10,6 +10,7 @@ def define_common_targets():
1010 ],
1111 visibility = [
1212 "//executorch/backends/cadence/..." ,
13+ "@EXECUTORCH_CLIENTS" ,
1314 ],
1415 platforms = CXX ,
1516 )
You can’t perform that action at this time.
0 commit comments