Skip to content

Commit

Permalink
all: new blocked layouts for 3D brgemm matmul B matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
akharito committed Jul 7, 2023
1 parent 0abbf22 commit acb8e12
Show file tree
Hide file tree
Showing 15 changed files with 216 additions and 67 deletions.
14 changes: 13 additions & 1 deletion include/oneapi/dnnl/dnnl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2022 Intel Corporation
* Copyright 2016-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1900,6 +1900,18 @@ struct memory : public handle<dnnl_memory_t> {
BA16a48b4a = dnnl_BA16a48b4a,
BA16a64b4a = dnnl_BA16a64b4a,
decbA16a = dnnl_decbA16a,
aCB16b16c = dnnl_aCB16b16c,
aCB16b32c = dnnl_aCB16b32c,
aCB16b48c = dnnl_aCB16b48c,
aCB16b64c = dnnl_aCB16b64c,
aCB16b16c2b = dnnl_aCB16b16c2b,
aCB16b32c2b = dnnl_aCB16b32c2b,
aCB16b48c2b = dnnl_aCB16b48c2b,
aCB16b64c2b = dnnl_aCB16b64c2b,
aCB16b16c4b = dnnl_aCB16b16c4b,
aCB16b32c4b = dnnl_aCB16b32c4b,
aCB16b48c4b = dnnl_aCB16b48c4b,
aCB16b64c4b = dnnl_aCB16b64c4b,

format_tag_last = dnnl_format_tag_last,

Expand Down
14 changes: 13 additions & 1 deletion include/oneapi/dnnl/dnnl_types.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2022 Intel Corporation
* Copyright 2016-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -723,6 +723,18 @@ typedef enum {
dnnl_aBdfec16b,
dnnl_abdEC64e2c,
dnnl_abdEC64e4c,
dnnl_aCB16b16c,
dnnl_aCB16b32c,
dnnl_aCB16b48c,
dnnl_aCB16b64c,
dnnl_aCB16b16c2b,
dnnl_aCB16b32c2b,
dnnl_aCB16b48c2b,
dnnl_aCB16b64c2b,
dnnl_aCB16b16c4b,
dnnl_aCB16b32c4b,
dnnl_aCB16b48c4b,
dnnl_aCB16b64c4b,

/// Just a sentinel, not real memory format tag. Must be changed after new
/// format tag is added.
Expand Down
14 changes: 13 additions & 1 deletion src/common/c_types_map.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2022 Intel Corporation
* Copyright 2016-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -288,6 +288,18 @@ const format_tag_t BA16a16b4a = dnnl_BA16a16b4a;
const format_tag_t BA16a32b4a = dnnl_BA16a32b4a;
const format_tag_t BA16a48b4a = dnnl_BA16a48b4a;
const format_tag_t BA16a64b4a = dnnl_BA16a64b4a;
const format_tag_t aCB16b16c = dnnl_aCB16b16c;
const format_tag_t aCB16b32c = dnnl_aCB16b32c;
const format_tag_t aCB16b48c = dnnl_aCB16b48c;
const format_tag_t aCB16b64c = dnnl_aCB16b64c;
const format_tag_t aCB16b16c2b = dnnl_aCB16b16c2b;
const format_tag_t aCB16b32c2b = dnnl_aCB16b32c2b;
const format_tag_t aCB16b48c2b = dnnl_aCB16b48c2b;
const format_tag_t aCB16b64c2b = dnnl_aCB16b64c2b;
const format_tag_t aCB16b16c4b = dnnl_aCB16b16c4b;
const format_tag_t aCB16b32c4b = dnnl_aCB16b32c4b;
const format_tag_t aCB16b48c4b = dnnl_aCB16b48c4b;
const format_tag_t aCB16b64c4b = dnnl_aCB16b64c4b;

const format_tag_t Abc16a = dnnl_Abc16a;
const format_tag_t ABc16a16b = dnnl_ABc16a16b;
Expand Down
14 changes: 13 additions & 1 deletion src/common/dnnl_debug_autogenerated.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2022 Intel Corporation
* Copyright 2018-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -579,6 +579,18 @@ const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
if (v == dnnl_aBdfec16b) return "aBdfec16b";
if (v == dnnl_abdEC64e2c) return "abdEC64e2c";
if (v == dnnl_abdEC64e4c) return "abdEC64e4c";
if (v == dnnl_aCB16b16c) return "aCB16b16c";
if (v == dnnl_aCB16b32c) return "aCB16b32c";
if (v == dnnl_aCB16b48c) return "aCB16b48c";
if (v == dnnl_aCB16b64c) return "aCB16b64c";
if (v == dnnl_aCB16b16c2b) return "aCB16b16c2b";
if (v == dnnl_aCB16b32c2b) return "aCB16b32c2b";
if (v == dnnl_aCB16b48c2b) return "aCB16b48c2b";
if (v == dnnl_aCB16b64c2b) return "aCB16b64c2b";
if (v == dnnl_aCB16b16c4b) return "aCB16b16c4b";
if (v == dnnl_aCB16b32c4b) return "aCB16b32c4b";
if (v == dnnl_aCB16b48c4b) return "aCB16b48c4b";
if (v == dnnl_aCB16b64c4b) return "aCB16b64c4b";
if (v == dnnl_format_tag_last) return "format_tag_last";
if (v == dnnl_x) return "x";
if (v == dnnl_nc) return "nc";
Expand Down
14 changes: 13 additions & 1 deletion src/common/memory_desc_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2022 Intel Corporation
* Copyright 2016-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -195,6 +195,18 @@ status_t memory_desc_wrapper::compute_blocking(
C(BA16a32b4a, {1, 0}, {16, 32, 4}, {0, 1, 0});
C(BA16a48b4a, {1, 0}, {16, 48, 4}, {0, 1, 0});
C(BA16a64b4a, {1, 0}, {16, 64, 4}, {0, 1, 0});
C(aCB16b16c, {0, 2, 1}, {16, 16}, {1, 2});
C(aCB16b32c, {0, 2, 1}, {16, 32}, {1, 2});
C(aCB16b48c, {0, 2, 1}, {16, 48}, {1, 2});
C(aCB16b64c, {0, 2, 1}, {16, 64}, {1, 2});
C(aCB16b16c2b, {0, 2, 1}, {16, 16, 2}, {1, 2, 1});
C(aCB16b32c2b, {0, 2, 1}, {16, 32, 2}, {1, 2, 1});
C(aCB16b48c2b, {0, 2, 1}, {16, 48, 2}, {1, 2, 1});
C(aCB16b64c2b, {0, 2, 1}, {16, 64, 2}, {1, 2, 1});
C(aCB16b16c4b, {0, 2, 1}, {16, 16, 4}, {1, 2, 1});
C(aCB16b32c4b, {0, 2, 1}, {16, 32, 4}, {1, 2, 1});
C(aCB16b48c4b, {0, 2, 1}, {16, 48, 4}, {1, 2, 1});
C(aCB16b64c4b, {0, 2, 1}, {16, 64, 4}, {1, 2, 1});

C(ABc4b16a4b, {0, 1, 2}, {4, 16, 4}, {1, 0, 1});
C(ABc4b32a4b, {0, 1, 2}, {4, 32, 4}, {1, 0, 1});
Expand Down
4 changes: 2 additions & 2 deletions src/common/memory_desc_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2022 Intel Corporation
* Copyright 2016-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -127,7 +127,7 @@ struct memory_desc_wrapper : public c_compatible {
using namespace memory_extra_flags;

auto calculate_size = [=](int cmask, size_t buff_data_size) {
assert(utils::one_of(cmask, 1, 2, 3, 13, 27));
assert(utils::one_of(cmask, 1, 2, 3, 5, 13, 27));
dim_t prod = 1;
for (int d = 0; d < ndims(); ++d)
if (cmask & (1 << d)) { prod *= padded_dims()[d]; }
Expand Down
48 changes: 36 additions & 12 deletions src/common/tag_traits.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2022 Intel Corporation
* Copyright 2018-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -127,6 +127,16 @@ enum class inner_blk_t {
_16c32b4c,
_16c48b4c,
_16c64b4c,
_16b32c,
_16b48c,
_16b64c,
_16b32c2b,
_16b48c2b,
_16b64c2b,
_16b16c4b,
_16b32c4b,
_16b48c4b,
_16b64c4b,
};

/** returns the offset within the block for weights blocked over oc and ic */
Expand All @@ -150,7 +160,9 @@ constexpr int AB_or_BC_blk_off(int x0, int x1) {
ib::_4a8b8a4b, ib::_4b8c8b4c, ib::_16b32a2b, ib::_16b48a2b,
ib::_16b64a2b, ib::_16b32a4b, ib::_16b48a4b, ib::_16b64a4b,
ib::_16c32b2c, ib::_16c48b2c, ib::_16c64b2c, ib::_16c32b4c,
ib::_16c48b4c, ib::_16c64b4c),
ib::_16c48b4c, ib::_16c64b4c, ib::_16b32c, ib::_16b48c,
ib::_16b64c, ib::_16b32c2b, ib::_16b48c2b, ib::_16b64c2b,
ib::_16b16c4b, ib::_16b32c4b, ib::_16b48c4b, ib::_16b64c4b),
"unexpected inner_blk format");

// clang-format off
Expand All @@ -165,17 +177,17 @@ constexpr int AB_or_BC_blk_off(int x0, int x1) {
: (f == ib::_16b16a || f == ib::_16c16b) ? 16 * x1 + x0
: (f == ib::_16a2b || f == ib::_16b2c) ? 2 * x0 + x1
: (f == ib::_16a4b || f == ib::_16b4c) ? 4 * x0 + x1
: (f == ib::_32a32b || f == ib::_16a32b) ? 32 * x0 + x1
: (utils::one_of(f, ib::_32a32b, ib::_16a32b, ib::_16b32c)) ? 32 * x0 + x1
: (utils::one_of(f, ib::_8a16b2a, ib::_8b16c2b, ib::_16a16b2a, ib::_16b16c2b)) ? (x0 / 2) * 32 + x1 * 2 + x0 % 2
: (f == ib::_16a48b) ? x0 * 48 + x1
: (f == ib::_16a64b) ? x0 * 64 + x1
: (f == ib::_16a32b2a) ? (x0 / 2) * 64 + x1 * 2 + x0 % 2
: (f == ib::_16a48b2a) ? (x0 / 2) * 96 + x1 * 2 + x0 % 2
: (f == ib::_16a64b2a) ? (x0 / 2) * 128 + x1 * 2 + x0 % 2
: (f == ib::_16a16b4a) ? (x0 / 4) * 64 + x1 * 4 + x0 % 4
: (f == ib::_16a32b4a) ? (x0 / 4) * 128 + x1 * 4 + x0 % 4
: (f == ib::_16a48b4a) ? (x0 / 4) * 192 + x1 * 4 + x0 % 4
: (f == ib::_16a64b4a) ? (x0 / 4) * 256 + x1 * 4 + x0 % 4
: (utils::one_of(f, ib::_16a48b, ib::_16b48c)) ? x0 * 48 + x1
: (utils::one_of(f, ib::_16a64b, ib::_16b64c)) ? x0 * 64 + x1
: (utils::one_of(f, ib::_16a32b2a, ib::_16b32c2b)) ? (x0 / 2) * 64 + x1 * 2 + x0 % 2
: (utils::one_of(f, ib::_16a48b2a, ib::_16b48c2b)) ? (x0 / 2) * 96 + x1 * 2 + x0 % 2
: (utils::one_of(f, ib::_16a64b2a, ib::_16b64c2b)) ? (x0 / 2) * 128 + x1 * 2 + x0 % 2
: (utils::one_of(f, ib::_16a16b4a, ib::_16b16c4b)) ? (x0 / 4) * 64 + x1 * 4 + x0 % 4
: (utils::one_of(f, ib::_16a32b4a, ib::_16b32c4b)) ? (x0 / 4) * 128 + x1 * 4 + x0 % 4
: (utils::one_of(f, ib::_16a48b4a, ib::_16b48c4b)) ? (x0 / 4) * 192 + x1 * 4 + x0 % 4
: (utils::one_of(f, ib::_16a64b4a, ib::_16b64c4b)) ? (x0 / 4) * 256 + x1 * 4 + x0 % 4
: (f == ib::_4b16a4b || f == ib::_4c16b4c) ? (x1 / 4) * 64 + x0 * 4 + x1 % 4
: (f == ib::_4b32a4b) ? (x1 / 4) * 128 + x0 * 4 + x1 % 4
: (f == ib::_4b64a4b) ? (x1 / 4) * 256 + x0 * 4 + x1 % 4
Expand Down Expand Up @@ -336,6 +348,18 @@ DECL_TRAITS(BA16a16b4a, _AB, _16a16b4a, 2);
DECL_TRAITS(BA16a32b4a, _AB, _16a32b4a, 2);
DECL_TRAITS(BA16a48b4a, _AB, _16a48b4a, 2);
DECL_TRAITS(BA16a64b4a, _AB, _16a64b4a, 2);
DECL_TRAITS(aCB16b16c, _BC, _16b16c, 2);
DECL_TRAITS(aCB16b32c, _BC, _16b32c, 2);
DECL_TRAITS(aCB16b48c, _BC, _16b48c, 2);
DECL_TRAITS(aCB16b64c, _BC, _16b64c, 2);
DECL_TRAITS(aCB16b16c2b, _BC, _16b16c2b, 3);
DECL_TRAITS(aCB16b32c2b, _BC, _16b32c2b, 3);
DECL_TRAITS(aCB16b48c2b, _BC, _16b48c2b, 3);
DECL_TRAITS(aCB16b64c2b, _BC, _16b64c2b, 3);
DECL_TRAITS(aCB16b16c4b, _BC, _16b16c4b, 3);
DECL_TRAITS(aCB16b32c4b, _BC, _16b32c4b, 3);
DECL_TRAITS(aCB16b48c4b, _BC, _16b48c4b, 3);
DECL_TRAITS(aCB16b64c4b, _BC, _16b64c4b, 3);

DECL_TRAITS(Abc16a, _A, _16a, 3);
DECL_TRAITS(ABc16a16b, _AB, _16a16b, 3);
Expand Down
10 changes: 9 additions & 1 deletion src/cpu/reorder/cpu_reorder_comp_bf16_s8.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2022 Intel Corporation
* Copyright 2020-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -79,6 +79,14 @@ const impl_list_map_t &comp_bf16_s8_impl_list_map() {
DNNL_NON_X64_ONLY(REG_SR(bf16, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(bf16, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(bf16, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
REG_SR(bf16, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(bf16, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
nullptr,
}},
{{bf16, s8, 4}, {
Expand Down
10 changes: 9 additions & 1 deletion src/cpu/reorder/cpu_reorder_comp_f32_s8.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2022 Intel Corporation
* Copyright 2020-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,6 +71,14 @@ const impl_list_map_t &comp_f32_s8_impl_list_map() {
DNNL_NON_X64_ONLY(REG_SR(f32, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(f32, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(f32, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
REG_SR(f32, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(f32, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
nullptr,
}},
{{f32, s8, 4}, {
Expand Down
10 changes: 9 additions & 1 deletion src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2022 Intel Corporation
* Copyright 2020-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -72,6 +72,14 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
DNNL_NON_X64_ONLY(REG_SR(s8, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(s8, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
DNNL_NON_X64_ONLY(REG_SR(s8, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
REG_SR(s8, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
REG_SR(s8, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
nullptr,
}},
{{s8, s8, 4}, {
Expand Down
Loading

0 comments on commit acb8e12

Please sign in to comment.