diff --git a/python/tvm/script/ir_builder/tir/__init__.py b/python/tvm/script/ir_builder/tir/__init__.py index 0a71af4db7e6..563ac56f7b10 100644 --- a/python/tvm/script/ir_builder/tir/__init__.py +++ b/python/tvm/script/ir_builder/tir/__init__.py @@ -17,3 +17,4 @@ """Package tvm.script.ir_builder.tir""" from .ir import * # pylint: disable=wildcard-import,redefined-builtin from .ir import boolean as bool # pylint: disable=redefined-builtin +from .ir import buffer_decl as Buffer diff --git a/python/tvm/script/parser/tir/entry.py b/python/tvm/script/parser/tir/entry.py index a5c134a8594c..e7ec7cf886d4 100644 --- a/python/tvm/script/parser/tir/entry.py +++ b/python/tvm/script/parser/tir/entry.py @@ -55,7 +55,7 @@ class BufferProxy: def __call__( self, shape, - dtype=None, + dtype="float32", data=None, strides=None, elem_offset=None, @@ -65,8 +65,6 @@ def __call__( buffer_type="", axis_separators=None, ) -> Buffer: - if dtype is None: - raise ValueError("Data type must be specified when constructing buffer") return buffer_decl( shape, dtype=dtype, diff --git a/src/script/printer/tir/buffer.cc b/src/script/printer/tir/buffer.cc index b4429dc9afc9..19f3dc7ef577 100644 --- a/src/script/printer/tir/buffer.cc +++ b/src/script/printer/tir/buffer.cc @@ -209,8 +209,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) // if (!d->IsVarDefined(buffer)) { if (Optional opt_f = FindLowestVarDef(buffer, d)) { ExprDoc lhs = DefineBuffer(buffer, opt_f.value(), d); - ExprDoc rhs = BufferDecl(buffer, "buffer_decl", // TODO(@junrushao): name confusing - {}, p, opt_f.value(), d); + ExprDoc rhs = BufferDecl(buffer, "Buffer", {}, p, opt_f.value(), d); opt_f.value()->stmts.push_back(AssignDoc(lhs, rhs, NullOpt)); } } diff --git a/src/script/printer/tir/ir.cc b/src/script/printer/tir/ir.cc index 76d3680fec81..ce10ff6816d7 100644 --- a/src/script/printer/tir/ir.cc +++ b/src/script/printer/tir/ir.cc @@ -34,8 +34,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) } else if (dtype == DataType::Bool()) { return LiteralDoc::Boolean(imm->value, imm_p->Attr("value")); } else { - return TIR(d, runtime::DLDataType2String(dtype)) // - ->Call({LiteralDoc::Int(imm->value, imm_p->Attr("value"))}); + return TIR(d, DType2Str(dtype))->Call({LiteralDoc::Int(imm->value, imm_p->Attr("value"))}); } }); @@ -45,7 +44,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) if (dtype == d->cfg->float_dtype) { return LiteralDoc::Float(imm->value, imm_p->Attr("value")); } else { - return TIR(d, runtime::DLDataType2String(dtype)) // + return TIR(d, DType2Str(dtype)) ->Call({LiteralDoc::Float(imm->value, imm_p->Attr("value"))}); } }); @@ -61,8 +60,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) .set_dispatch("", [](PrimType ty, ObjectPath p, IRDocsifier d) -> Doc { - std::string dtype = ty->dtype.is_void() ? "void" : runtime::DLDataType2String(ty->dtype); - return TIR(d, dtype); + return TIR(d, DType2Str(ty->dtype)); }); TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) diff --git a/src/script/printer/utils.h b/src/script/printer/utils.h index 5161f1f9a268..cb20eb363ddd 100644 --- a/src/script/printer/utils.h +++ b/src/script/printer/utils.h @@ -65,6 +65,10 @@ inline std::string Docsify(const ObjectRef& obj, const IRDocsifier& d, const Fra return DocToPythonScript(StmtBlockDoc(f->stmts), cfg); } +inline std::string DType2Str(const runtime::DataType& dtype) { + return dtype.is_void() ? "void" : runtime::DLDataType2String(dtype); +} + } // namespace printer } // namespace script } // namespace tvm diff --git a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py index 586b8b380e22..02b5f9f7f122 100644 --- a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py +++ b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py @@ -29,16 +29,16 @@ class AllOperatorsWithWeights: def main() -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer2 = T.buffer_decl([128], "uint8") - buffer3 = T.buffer_decl([32], "uint8") - buffer4 = T.buffer_decl([112], "uint8") - buffer5 = T.buffer_decl([32], "uint8") - buffer6 = T.buffer_decl([112], "uint8") - buffer7 = T.buffer_decl([32], "uint8") - buffer8 = T.buffer_decl([112], "uint8") - buffer9 = T.buffer_decl([32], "uint8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer2 = T.Buffer([128], "uint8") + buffer3 = T.Buffer([32], "uint8") + buffer4 = T.Buffer([112], "uint8") + buffer5 = T.Buffer([32], "uint8") + buffer6 = T.Buffer([112], "uint8") + buffer7 = T.Buffer([32], "uint8") + buffer8 = T.Buffer([112], "uint8") + buffer9 = T.Buffer([32], "uint8") + buffer10 = T.Buffer([2048], "int8") # body p1 = T.decl_buffer([128], "uint8") p2 = T.decl_buffer([112], "uint8") @@ -77,16 +77,16 @@ class ReferenceModule: def main() -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer2 = T.buffer_decl([128], "uint8") - buffer3 = T.buffer_decl([32], "uint8") - buffer4 = T.buffer_decl([112], "uint8") - buffer5 = T.buffer_decl([32], "uint8") - buffer6 = T.buffer_decl([112], "uint8") - buffer7 = T.buffer_decl([32], "uint8") - buffer8 = T.buffer_decl([112], "uint8") - buffer9 = T.buffer_decl([32], "uint8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer2 = T.Buffer([128], "uint8") + buffer3 = T.Buffer([32], "uint8") + buffer4 = T.Buffer([112], "uint8") + buffer5 = T.Buffer([32], "uint8") + buffer6 = T.Buffer([112], "uint8") + buffer7 = T.Buffer([32], "uint8") + buffer8 = T.Buffer([112], "uint8") + buffer9 = T.Buffer([32], "uint8") + buffer10 = T.Buffer([2048], "int8") # body p1 = T.decl_buffer([128], "uint8") p2 = T.decl_buffer([112], "uint8") @@ -123,16 +123,16 @@ class ReferenceModule: def main() -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer2 = T.buffer_decl([128], "uint8") - buffer3 = T.buffer_decl([32], "uint8") - buffer4 = T.buffer_decl([112], "uint8") - buffer5 = T.buffer_decl([32], "uint8") - buffer6 = T.buffer_decl([112], "uint8") - buffer7 = T.buffer_decl([32], "uint8") - buffer8 = T.buffer_decl([112], "uint8") - buffer9 = T.buffer_decl([32], "uint8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer2 = T.Buffer([128], "uint8") + buffer3 = T.Buffer([32], "uint8") + buffer4 = T.Buffer([112], "uint8") + buffer5 = T.Buffer([32], "uint8") + buffer6 = T.Buffer([112], "uint8") + buffer7 = T.Buffer([32], "uint8") + buffer8 = T.Buffer([112], "uint8") + buffer9 = T.Buffer([32], "uint8") + buffer10 = T.Buffer([2048], "int8") # body p1 = T.decl_buffer([128], "uint8") p2 = T.decl_buffer([112], "uint8") @@ -167,8 +167,8 @@ class AllOperatorsWithoutWeights: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([36], "int8") - buffer2 = T.buffer_decl([9], "int8") + buffer1 = T.Buffer([36], "int8") + buffer2 = T.Buffer([9], "int8") # body p1 = T.decl_buffer([96], "int8") T.evaluate(T.call_extern("ethosu_pooling", "int8", 3, 4, 3, 3, 0, 4, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 12, 3, 1, "int8", 3, 2, 3, 3, 0, 2, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 32, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -189,11 +189,11 @@ class OperatorsWithAndWithoutWeights: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([97156], "int8") - buffer2 = T.buffer_decl([80], "uint8") - buffer3 = T.buffer_decl([64], "uint8") - buffer4 = T.buffer_decl([96], "uint8") - buffer5 = T.buffer_decl([32], "uint8") + buffer1 = T.Buffer([97156], "int8") + buffer2 = T.Buffer([80], "uint8") + buffer3 = T.Buffer([64], "uint8") + buffer4 = T.Buffer([96], "uint8") + buffer5 = T.Buffer([32], "uint8") # body p1 = T.decl_buffer([390336], "int8") p2 = T.decl_buffer([80], "uint8") @@ -224,11 +224,11 @@ class ReferenceModule: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([97156], "int8") - buffer2 = T.buffer_decl([80], "uint8") - buffer3 = T.buffer_decl([64], "uint8") - buffer4 = T.buffer_decl([96], "uint8") - buffer5 = T.buffer_decl([32], "uint8") + buffer1 = T.Buffer([97156], "int8") + buffer2 = T.Buffer([80], "uint8") + buffer3 = T.Buffer([64], "uint8") + buffer4 = T.Buffer([96], "uint8") + buffer5 = T.Buffer([32], "uint8") # body p1 = T.decl_buffer([390336], "int8") p2 = T.decl_buffer([80], "uint8") @@ -257,11 +257,11 @@ class ReferenceModule: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([97156], "int8") - buffer2 = T.buffer_decl([80], "uint8") - buffer3 = T.buffer_decl([64], "uint8") - buffer4 = T.buffer_decl([96], "uint8") - buffer5 = T.buffer_decl([32], "uint8") + buffer1 = T.Buffer([97156], "int8") + buffer2 = T.Buffer([80], "uint8") + buffer3 = T.Buffer([64], "uint8") + buffer4 = T.Buffer([96], "uint8") + buffer5 = T.Buffer([32], "uint8") # body p1 = T.decl_buffer([390336], "int8") p2 = T.decl_buffer([80], "uint8") @@ -289,14 +289,14 @@ class CopyToBufferWithLocalScope: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([64], "uint8") - buffer2 = T.buffer_decl([48], "uint8") - buffer3 = T.buffer_decl([48], "uint8") - buffer4 = T.buffer_decl([256], "uint8") - buffer5 = T.buffer_decl([16], "uint8") - buffer6 = T.buffer_decl([48], "uint8") - buffer7 = T.buffer_decl([256], "uint8") - buffer8 = T.buffer_decl([64], "uint8") + buffer1 = T.Buffer([64], "uint8") + buffer2 = T.Buffer([48], "uint8") + buffer3 = T.Buffer([48], "uint8") + buffer4 = T.Buffer([256], "uint8") + buffer5 = T.Buffer([16], "uint8") + buffer6 = T.Buffer([48], "uint8") + buffer7 = T.Buffer([256], "uint8") + buffer8 = T.Buffer([64], "uint8") # body p1 = T.decl_buffer([48], "uint8") p2 = T.decl_buffer([48], "uint8") @@ -330,14 +330,14 @@ class ReferenceModule: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([64], "uint8") - buffer2 = T.buffer_decl([48], "uint8") - buffer3 = T.buffer_decl([48], "uint8") - buffer4 = T.buffer_decl([256], "uint8") - buffer5 = T.buffer_decl([16], "uint8") - buffer6 = T.buffer_decl([48], "uint8") - buffer7 = T.buffer_decl([256], "uint8") - buffer8 = T.buffer_decl([64], "uint8") + buffer1 = T.Buffer([64], "uint8") + buffer2 = T.Buffer([48], "uint8") + buffer3 = T.Buffer([48], "uint8") + buffer4 = T.Buffer([256], "uint8") + buffer5 = T.Buffer([16], "uint8") + buffer6 = T.Buffer([48], "uint8") + buffer7 = T.Buffer([256], "uint8") + buffer8 = T.Buffer([64], "uint8") # body p1 = T.decl_buffer([48], "uint8") p2 = T.decl_buffer([48], "uint8") @@ -406,11 +406,11 @@ class ReferenceModule: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([97156], "int8") - buffer2 = T.buffer_decl([80], "uint8") - buffer3 = T.buffer_decl([64], "uint8") - buffer4 = T.buffer_decl([96], "uint8") - buffer5 = T.buffer_decl([32], "uint8") + buffer1 = T.Buffer([97156], "int8") + buffer2 = T.Buffer([80], "uint8") + buffer3 = T.Buffer([64], "uint8") + buffer4 = T.Buffer([96], "uint8") + buffer5 = T.Buffer([32], "uint8") # body p1 = T.decl_buffer([390336], "int8") p2 = T.decl_buffer([80], "uint8") @@ -439,11 +439,11 @@ class ReferenceModule: @T.prim_func def main() -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([97156], "int8") - buffer2 = T.buffer_decl([80], "uint8") - buffer3 = T.buffer_decl([64], "uint8") - buffer4 = T.buffer_decl([96], "uint8") - buffer5 = T.buffer_decl([32], "uint8") + buffer1 = T.Buffer([97156], "int8") + buffer2 = T.Buffer([80], "uint8") + buffer3 = T.Buffer([64], "uint8") + buffer4 = T.Buffer([96], "uint8") + buffer5 = T.Buffer([32], "uint8") # body p1 = T.decl_buffer([390336], "int8") p2 = T.decl_buffer([80], "uint8") diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py index 0728840ee96b..871c7e29df20 100644 --- a/tests/python/contrib/test_ethosu/test_encode_constants.py +++ b/tests/python/contrib/test_ethosu/test_encode_constants.py @@ -39,19 +39,19 @@ class WeightStreamOnlyU55: def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data) - buffer1 = T.buffer_decl([160], "uint8") - buffer3 = T.buffer_decl([144], "uint8") - buffer5 = T.buffer_decl([144], "uint8") - buffer7 = T.buffer_decl([144], "uint8") - buffer8 = T.buffer_decl([32], "uint8") + placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) + buffer1 = T.Buffer([160], "uint8") + buffer3 = T.Buffer([144], "uint8") + buffer5 = T.Buffer([144], "uint8") + buffer7 = T.Buffer([144], "uint8") + buffer8 = T.Buffer([32], "uint8") # body p1_data = T.allocate([160], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([160], "uint8", data=p1_data) + p1 = T.Buffer([160], "uint8", data=p1_data) p2_data = T.allocate([144], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([144], "uint8", data=p2_data) - buffer9 = T.buffer_decl([144], "uint8", data=p1.data) + p2 = T.Buffer([144], "uint8", data=p2_data) + buffer9 = T.Buffer([144], "uint8", data=p1.data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 160, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 144, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, T.int8(-1), T.int8(-1), 12, p1[128], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -70,18 +70,18 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) - buffer_encoded_1 = T.buffer_decl([192], dtype="uint8") - buffer_encoded_2_1 = T.buffer_decl([192], dtype="uint8") - buffer_encoded_4_1 = T.buffer_decl([208], dtype="uint8") - buffer_encoded_6_1 = T.buffer_decl([192], dtype="uint8") + placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) + buffer_encoded_1 = T.Buffer([192], dtype="uint8") + buffer_encoded_2_1 = T.Buffer([192], dtype="uint8") + buffer_encoded_4_1 = T.Buffer([208], dtype="uint8") + buffer_encoded_6_1 = T.Buffer([192], dtype="uint8") # body p1_data = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([208], "uint8", data=p1_data) + p1 = T.Buffer([208], "uint8", data=p1_data) p2_data = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([192], "uint8", data=p2_data) - p3 = T.buffer_decl([192], dtype="uint8", data=p1.data) + p2 = T.Buffer([192], "uint8", data=p2_data) + p3 = T.Buffer([192], dtype="uint8", data=p1.data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 192, p3[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2_1[0], 192, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 80, p3[80], 80, 12, p3[160], 16, p3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -157,14 +157,14 @@ class RereadWeightsU55: def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([384], "uint8") - placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data) + buffer1 = T.Buffer([384], "uint8") + placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) # body p1_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([384], "uint8", data=p1_data) + p1 = T.Buffer([384], "uint8", data=p1_data) p2_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([384], "uint8", data=p2_data) + p2 = T.Buffer([384], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 304, T.int8(-1), T.int8(-1), 12, p1[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -179,14 +179,14 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) - placeholder_encoded_1 = T.buffer_decl([464], "uint8") + placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) + placeholder_encoded_1 = T.Buffer([464], "uint8") # body p1_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([464], "uint8", data=p1_data) + p1 = T.Buffer([464], "uint8", data=p1_data) p2_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([464], "uint8", data=p2_data) + p2 = T.Buffer([464], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -259,15 +259,15 @@ class DirectReadOnlyU55: def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([592], "uint8") - buffer_1 = T.buffer_decl([160], "uint8") - buffer_2 = T.buffer_decl([160], "uint8") - buffer_3 = T.buffer_decl([80], "uint8") - placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data) + buffer = T.Buffer([592], "uint8") + buffer_1 = T.Buffer([160], "uint8") + buffer_2 = T.Buffer([160], "uint8") + buffer_3 = T.Buffer([80], "uint8") + placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) # body ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 160, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -280,15 +280,15 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder_encoded = T.buffer_decl([608], dtype="uint8") - placeholder_encoded_1 = T.buffer_decl([160], dtype="uint8") - placeholder_encoded_2 = T.buffer_decl([208], dtype="uint8") - placeholder_encoded_3 = T.buffer_decl([96], dtype="uint8") - placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) + placeholder_encoded = T.Buffer([608], dtype="uint8") + placeholder_encoded_1 = T.Buffer([160], dtype="uint8") + placeholder_encoded_2 = T.Buffer([208], dtype="uint8") + placeholder_encoded_3 = T.Buffer([96], dtype="uint8") + placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) # body ethosu_write_2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_2 = T.buffer_decl([4096], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([4096], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded[0], 304, placeholder_encoded[304], 304, 12, placeholder_encoded_1[0], 80, placeholder_encoded_1[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded_2[0], 112, placeholder_encoded_2[112], 96, 12, placeholder_encoded_3[0], 48, placeholder_encoded_3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -357,21 +357,21 @@ class MixedReadU55: def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([112], "uint8") - buffer3 = T.buffer_decl([112], "uint8") - buffer5 = T.buffer_decl([112], "uint8") - buffer7 = T.buffer_decl([112], "uint8") - buffer9 = T.buffer_decl([592], "uint8") - buffer10 = T.buffer_decl([160], "uint8") - ifm = T.buffer_decl([8192], "int8", data=input_ifm.data) - ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data) + buffer1 = T.Buffer([112], "uint8") + buffer3 = T.Buffer([112], "uint8") + buffer5 = T.Buffer([112], "uint8") + buffer7 = T.Buffer([112], "uint8") + buffer9 = T.Buffer([592], "uint8") + buffer10 = T.Buffer([160], "uint8") + ifm = T.Buffer([8192], "int8", data=input_ifm.data) + ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) # body p1_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([112], "uint8", data=p1_data) + p1 = T.Buffer([112], "uint8", data=p1_data) p3_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - p3 = T.buffer_decl([4096], "int8", data=p3_data) + p3 = T.Buffer([4096], "int8", data=p3_data) p2_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([112], "uint8", data=p2_data) + p2 = T.Buffer([112], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 112, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer9[0], 592, T.int8(-1), T.int8(-1), 12, buffer10[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 112, p2[0], dtype="handle")) @@ -391,20 +391,20 @@ def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - ifm = T.buffer_decl([8192], dtype="int8", data=input_ifm.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) - buffer1 = T.buffer_decl([128], dtype="uint8") - buffer2 = T.buffer_decl([128], dtype="uint8") - buffer3 = T.buffer_decl([128], dtype="uint8") - buffer4 = T.buffer_decl([608], dtype="uint8") - buffer5 = T.buffer_decl([160], dtype="uint8") - buffer6 = T.buffer_decl([128], dtype="uint8") + ifm = T.Buffer([8192], dtype="int8", data=input_ifm.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) + buffer1 = T.Buffer([128], dtype="uint8") + buffer2 = T.Buffer([128], dtype="uint8") + buffer3 = T.Buffer([128], dtype="uint8") + buffer4 = T.Buffer([608], dtype="uint8") + buffer5 = T.Buffer([160], dtype="uint8") + buffer6 = T.Buffer([128], dtype="uint8") p1_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([128], "uint8", data=p1_data) + p1 = T.Buffer([128], "uint8", data=p1_data) p2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([4096], "int8", data=p2_data) + p2 = T.Buffer([4096], "int8", data=p2_data) p3_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True}) - p3 = T.buffer_decl([128], "uint8", data=p3_data) + p3 = T.Buffer([128], "uint8", data=p3_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 128, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer4[0], 304, buffer4[304], 304, 12, buffer5[0], 80, buffer5[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p3[0], dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py b/tests/python/contrib/test_ethosu/test_hoist_allocates.py index 1508aa441c3b..ea1cae50e6eb 100644 --- a/tests/python/contrib/test_ethosu/test_hoist_allocates.py +++ b/tests/python/contrib/test_ethosu/test_hoist_allocates.py @@ -109,27 +109,27 @@ class Module: def main(input_placeholder: T.Buffer[(1, 27, 42, 3), "int8"], input_placeholder_encoded: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_1: T.Buffer[(3, 10), "uint8"], input_placeholder_encoded_2: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_3: T.Buffer[(3, 10), "uint8"], input_ethosu_write: T.Buffer[(1, 27, 42, 3), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([3402], dtype="int8", data=input_placeholder.data) - placeholder_encoded = T.buffer_decl([128], dtype="int8", data=input_placeholder_encoded.data) - placeholder_encoded_1 = T.buffer_decl([32], dtype="uint8", data=input_placeholder_encoded_1.data) - placeholder_encoded_2 = T.buffer_decl([128], dtype="int8", data=input_placeholder_encoded_2.data) - placeholder_encoded_3 = T.buffer_decl([32], dtype="uint8", data=input_placeholder_encoded_3.data) - ethosu_write = T.buffer_decl([3402], dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer([3402], dtype="int8", data=input_placeholder.data) + placeholder_encoded = T.Buffer([128], dtype="int8", data=input_placeholder_encoded.data) + placeholder_encoded_1 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_1.data) + placeholder_encoded_2 = T.Buffer([128], dtype="int8", data=input_placeholder_encoded_2.data) + placeholder_encoded_3 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_3.data) + ethosu_write = T.Buffer([3402], dtype="int8", data=input_ethosu_write.data) # body placeholder_global_data = T.allocate([128], "uint8", "global") - placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data) + placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 128, placeholder_global[0], dtype="handle")) placeholder_d_global_data = T.allocate([32], "uint8", "global") - placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 32, placeholder_d_global[0], dtype="handle")) ethosu_write_2_data = T.allocate([18144], "int8", "global") - ethosu_write_2 = T.buffer_decl([18144], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([18144], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, placeholder[0], 0, 0, 0, T.float32(0.0039215646684169769), -128, "NHWC", 126, 3, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, 2, 3, 1, 1, 1, 2, placeholder_global[0], 128, 0, placeholder_d_global[0], 32, 2, 0, 2, 1, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) placeholder_d_global_1_data = T.allocate([128], "uint8", "global") - placeholder_d_global_1 = T.buffer_decl([128], "uint8", data=placeholder_d_global_1_data) + placeholder_d_global_1 = T.Buffer([128], "uint8", data=placeholder_d_global_1_data) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 128, placeholder_d_global_1[0], dtype="handle")) placeholder_d_global_2_data = T.allocate([32], "uint8", "global") - placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data) + placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 32, placeholder_d_global_2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write[0], 0, 0, 0, T.float32(0.23604340851306915), -128, "NHWC", 126, 3, 1, 2, 3, 1, 1, 1, 2, placeholder_d_global_1[0], 128, 0, placeholder_d_global_2[0], 32, 2, 0, 2, 1, "CLIP", -128, 127, "TFL", "NONE", dtype="handle")) # fmt: on @@ -153,20 +153,20 @@ class Module: def main(input_placeholder: T.Buffer[(1, 2, 3, 4), "int8"], T_concat: T.Buffer[(24,), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([24], dtype="int8", data=input_placeholder.data) + placeholder = T.Buffer([24], dtype="int8", data=input_placeholder.data) # body ethosu_write_data = T.allocate([12], "int8", "global") - ethosu_write = T.buffer_decl([12], "int8", data=ethosu_write_data) + ethosu_write = T.Buffer([12], "int8", data=ethosu_write_data) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) ethosu_write_1_data = T.allocate([12], "int8", "global") - ethosu_write_1 = T.buffer_decl([12], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.Buffer([12], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) ethosu_write_2_data = T.allocate([12], "int8", "global") - ethosu_write_2 = T.buffer_decl([12], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([12], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) ethosu_write_3_data = T.allocate([12], "int8", "global") - ethosu_write_3 = T.buffer_decl([12], "int8", data=ethosu_write_3_data) + ethosu_write_3 = T.Buffer([12], "int8", data=ethosu_write_3_data) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) # fmt: on @@ -190,35 +190,35 @@ class Module: def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) # body with T.allocate([128], "uint8", "global") as placeholder_global_data: - placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data) + placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle")) placeholder_d_global_data = T.allocate([32], "uint8", "global") - placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) with T.allocate([112], "uint8", "global") as placeholder_global_1_data: - placeholder_global_1 = T.buffer_decl([112], "uint8", data=placeholder_global_1_data) + placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 112, placeholder_global_1[0], dtype="handle")) placeholder_d_global_1_data = T.allocate([32], "uint8", "global") - placeholder_d_global_1 = T.buffer_decl([32], "uint8", data=placeholder_d_global_1_data) + placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 32, placeholder_d_global_1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) with T.allocate([112], "uint8", "global") as placeholder_global_2_data: - placeholder_global_2 = T.buffer_decl([112], "uint8", data=placeholder_global_2_data) + placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle")) placeholder_d_global_2_data = T.allocate([32], "uint8", "global") - placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data) + placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) placeholder_global_3_data = T.allocate([112], "uint8", "global") - placeholder_global_3 = T.buffer_decl([112], "uint8", data=placeholder_global_3_data) + placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle")) placeholder_d_global_3_data = T.allocate([32], "uint8", "global") - placeholder_d_global_3 = T.buffer_decl([32], "uint8", data=placeholder_d_global_3_data) + placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) # fmt: on @@ -240,23 +240,23 @@ class Module: def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) # body placeholder_global_data = T.allocate([128], "uint8", "global") - placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data) + placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data) placeholder_global_1_data = T.allocate([112], "uint8", "global") - placeholder_global_1 = T.buffer_decl([112], "uint8", data=placeholder_global_1_data) + placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data) placeholder_global_2_data = T.allocate([112], "uint8", "global") - placeholder_global_2 = T.buffer_decl([112], "uint8", data=placeholder_global_2_data) + placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data) placeholder_d_global_data = T.allocate([32], "uint8", "global") - placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data) placeholder_d_global_1_data = T.allocate([32], "uint8", "global") - placeholder_d_global_1 = T.buffer_decl([32], "uint8", data=placeholder_d_global_1_data) + placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data) placeholder_d_global_2_data = T.allocate([32], "uint8", "global") - placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data) + placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data) placeholder_global_3_data = T.allocate([112], "uint8", "global") - placeholder_global_3 = T.buffer_decl([112], "uint8", data=placeholder_global_3_data) + placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -266,7 +266,7 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_writ T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle")) placeholder_d_global_3_data = T.allocate([32], "uint8", "global") - placeholder_d_global_3 = T.buffer_decl([32], "uint8", data=placeholder_d_global_3_data) + placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_merge_constants.py b/tests/python/contrib/test_ethosu/test_merge_constants.py index ed1927b849d6..7465e220787c 100644 --- a/tests/python/contrib/test_ethosu/test_merge_constants.py +++ b/tests/python/contrib/test_ethosu/test_merge_constants.py @@ -41,13 +41,13 @@ class InputModule: def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p1_data = T.allocate([128], "uint8", "global") - p1 = T.buffer_decl([128], "uint8", data=p1_data) + p1 = T.Buffer([128], "uint8", data=p1_data) p4_data = T.allocate([32], "uint8", "global") - p4 = T.buffer_decl([32], "uint8", data=p4_data) + p4 = T.Buffer([32], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -59,11 +59,11 @@ class ReferenceModule: def main(buffer2: T.Buffer[(160,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p4_data = T.allocate([160], "uint8", "global") - p4 = T.buffer_decl([160], "uint8", data=p4_data) + p4 = T.Buffer([160], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) # fmt: on @@ -86,25 +86,25 @@ class InputModule: def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p1_data = T.allocate([128], "uint8", "global") - p1 = T.buffer_decl([128], "uint8", data=p1_data) + p1 = T.Buffer([128], "uint8", data=p1_data) p2_data = T.allocate([112], "uint8", "global") - p2 = T.buffer_decl([112], "uint8", data=p2_data) + p2 = T.Buffer([112], "uint8", data=p2_data) p3_data = T.allocate([112], "uint8", "global") - p3 = T.buffer_decl([112], "uint8", data=p3_data) + p3 = T.Buffer([112], "uint8", data=p3_data) p4_data = T.allocate([32], "uint8", "global") - p4 = T.buffer_decl([32], "uint8", data=p4_data) + p4 = T.Buffer([32], "uint8", data=p4_data) p5_data = T.allocate([32], "uint8", "global") - p5 = T.buffer_decl([32], "uint8", data=p5_data) + p5 = T.Buffer([32], "uint8", data=p5_data) p6_data = T.allocate([32], "uint8", "global") - p6 = T.buffer_decl([32], "uint8", data=p6_data) + p6 = T.Buffer([32], "uint8", data=p6_data) p7_data = T.allocate([112], "uint8", "global") - p7 = T.buffer_decl([112], "uint8", data=p7_data) + p7 = T.Buffer([112], "uint8", data=p7_data) p8_data = T.allocate([3], "uint8", "global") - p8 = T.buffer_decl([3], "uint8", data=p8_data) + p8 = T.Buffer([3], "uint8", data=p8_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle")) @@ -125,17 +125,17 @@ class ReferenceModule: def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p4_data = T.allocate([160], "uint8", "global") - p4 = T.buffer_decl([160], "uint8", data=p4_data) + p4 = T.Buffer([160], "uint8", data=p4_data) p7_data = T.allocate([144], "uint8", "global") - p7 = T.buffer_decl([144], "uint8", data=p7_data) + p7 = T.Buffer([144], "uint8", data=p7_data) p10_data = T.allocate([144], "uint8", "global") - p10 = T.buffer_decl([144], "uint8", data=p10_data) + p10 = T.Buffer([144], "uint8", data=p10_data) p11_data = T.allocate([144], "uint8", "global") - p11 = T.buffer_decl([144], "uint8", data=p11_data) + p11 = T.Buffer([144], "uint8", data=p11_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 144, p7[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -175,14 +175,14 @@ class InputModule: @T.prim_func def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer0 = T.buffer_decl([390336], "int8") - buffer1 = T.buffer_decl([97156], "int8") - buffer6 = T.buffer_decl([390336], "int8") + buffer0 = T.Buffer([390336], "int8") + buffer1 = T.Buffer([97156], "int8") + buffer6 = T.Buffer([390336], "int8") # body p2_data = T.allocate([80], "uint8", "global") - p2 = T.buffer_decl([80], "uint8", data=p2_data) + p2 = T.Buffer([80], "uint8", data=p2_data) p3_data = T.allocate([64], "uint8", "global") - p3 = T.buffer_decl([64], "uint8", data=p3_data) + p3 = T.Buffer([64], "uint8", data=p3_data) T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle")) @@ -194,12 +194,12 @@ class ReferenceModule: @T.prim_func def main(buffer2: T.Buffer[(144,), "uint8"]) -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer0 = T.buffer_decl([390336], "int8") - buffer1 = T.buffer_decl([97156], "int8") - buffer6 = T.buffer_decl([390336], "int8") + buffer0 = T.Buffer([390336], "int8") + buffer1 = T.Buffer([97156], "int8") + buffer6 = T.Buffer([390336], "int8") # body p3_data = T.allocate([144], "uint8", "global") - p3 = T.buffer_decl([144], "uint8", data=p3_data) + p3 = T.Buffer([144], "uint8", data=p3_data) T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 144, p3[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, buffer6[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p3[0], 80, 0, p3[80], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -234,17 +234,17 @@ def main(buffer1: T.Buffer[(64,), "uint8"], T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # body p1_data = T.allocate([48], "uint8", "global") - p1 = T.buffer_decl([48], "uint8", data=p1_data) + p1 = T.Buffer([48], "uint8", data=p1_data) p2_data = T.allocate([48], "uint8", "global") - p2 = T.buffer_decl([48], "uint8", data=p2_data) + p2 = T.Buffer([48], "uint8", data=p2_data) p3_data = T.allocate([256], "int8", "local") - p3 = T.buffer_decl([256], "int8", data=p3_data, scope="local") + p3 = T.Buffer([256], "int8", data=p3_data, scope="local") p5_data = T.allocate([16], "uint8", "global") - p5 = T.buffer_decl([16], "uint8", data=p5_data) + p5 = T.Buffer([16], "uint8", data=p5_data) p6_data = T.allocate([48], "uint8", "global") - p6 = T.buffer_decl([48], "uint8", data=p6_data) + p6 = T.Buffer([48], "uint8", data=p6_data) p7_data = T.allocate([256], "int8", "local") - p7 = T.buffer_decl([256], "int8", data=p7_data, scope="local") + p7 = T.Buffer([256], "int8", data=p7_data, scope="local") T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 48, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 48, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local @@ -269,13 +269,13 @@ def main(buffer1: T.Buffer[(64,), "uint8"], T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # body p1_data = T.allocate([96], "uint8", "global") - p1 = T.buffer_decl([96], "uint8", data=p1_data) + p1 = T.Buffer([96], "uint8", data=p1_data) p2_data = T.allocate([64], "uint8", "global") - p2 = T.buffer_decl([64], "uint8", data=p2_data) + p2 = T.Buffer([64], "uint8", data=p2_data) p3_data = T.allocate([256], "int8", "local") - p3 = T.buffer_decl([256], "int8", data=p3_data, scope="local") + p3 = T.Buffer([256], "int8", data=p3_data, scope="local") p7_data = T.allocate([256], "int8", "local") - p7 = T.buffer_decl([256], "int8", data=p7_data, scope="local") + p7 = T.Buffer([256], "int8", data=p7_data, scope="local") T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 64, p2[0], dtype="handle")) @@ -312,11 +312,11 @@ class InputModule: def main() -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([20], "int8") - ethosu_write = T.buffer_decl([16], "int8") + placeholder = T.Buffer([20], "int8") + ethosu_write = T.Buffer([16], "int8") # body ethosu_write_4_data = T.allocate([16], "int8", "global") - ethosu_write_4 = T.buffer_decl([16], "int8", data=ethosu_write_4_data) + ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data) T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle")) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -326,11 +326,11 @@ class ReferenceModule: def main() -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([20], "int8") - ethosu_write = T.buffer_decl([16], "int8") + placeholder = T.Buffer([20], "int8") + ethosu_write = T.Buffer([16], "int8") # body ethosu_write_4_data = T.allocate([16], "int8", "global") - ethosu_write_4 = T.buffer_decl([16], "int8", data=ethosu_write_4_data) + ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data) T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle")) T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) # fmt: on @@ -351,13 +351,13 @@ class InputModule: def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p1_data = T.allocate([128], "uint8", "global") - p1 = T.buffer_decl([128], "uint8", data=p1_data) + p1 = T.Buffer([128], "uint8", data=p1_data) p4_data = T.allocate([32], "uint8", "global") - p4 = T.buffer_decl([32], "uint8", data=p4_data) + p4 = T.Buffer([32], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -372,11 +372,11 @@ class ReferenceModule: def main(buffer2: T.Buffer[(160,), "uint8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p5_data = T.allocate([160], "uint8", "global") - p5 = T.buffer_decl([160], "uint8", data=p5_data) + p5 = T.Buffer([160], "uint8", data=p5_data) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5[0], 128, 12, p5[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle")) @@ -403,13 +403,13 @@ def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], buffer1: T.Buffer # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([368], "uint8", "global") - p1 = T.buffer_decl([368], "uint8", data=p1_data) + p1 = T.Buffer([368], "uint8", data=p1_data) p2_data = T.allocate([96], "uint8", "global") - p2 = T.buffer_decl([96], "uint8", data=p2_data) + p2 = T.Buffer([96], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -423,11 +423,11 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(4 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([464], "uint8", "global") - p1 = T.buffer_decl([464], "uint8", data=p1_data) + p1 = T.Buffer([464], "uint8", data=p1_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -453,17 +453,17 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([368], "uint8", "global") - p1 = T.buffer_decl([368], "uint8", data=p1_data) + p1 = T.Buffer([368], "uint8", data=p1_data) p2_data = T.allocate([96], "uint8", "global") - p2 = T.buffer_decl([96], "uint8", data=p2_data) + p2 = T.Buffer([96], "uint8", data=p2_data) p3_data = T.allocate([368], "uint8", "global") - p3 = T.buffer_decl([368], "uint8", data=p3_data) + p3 = T.Buffer([368], "uint8", data=p3_data) p4_data = T.allocate([96], "uint8", "global") - p4 = T.buffer_decl([96], "uint8", data=p4_data) + p4 = T.Buffer([96], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -480,13 +480,13 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(4 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([464], "uint8", "global") - p1 = T.buffer_decl([464], "uint8", data=p1_data) + p1 = T.Buffer([464], "uint8", data=p1_data) p2_data = T.allocate([464], "uint8", "global") - p2 = T.buffer_decl([464], "uint8", data=p2_data) + p2 = T.Buffer([464], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle")) @@ -519,17 +519,17 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([368], "uint8", "global") - p1 = T.buffer_decl([368], "uint8", data=p1_data) + p1 = T.Buffer([368], "uint8", data=p1_data) p2_data = T.allocate([96], "uint8", "global") - p2 = T.buffer_decl([96], "uint8", data=p2_data) + p2 = T.Buffer([96], "uint8", data=p2_data) p3_data = T.allocate([368], "uint8", "global") - p3 = T.buffer_decl([368], "uint8", data=p3_data) + p3 = T.Buffer([368], "uint8", data=p3_data) p4_data = T.allocate([96], "uint8", "global") - p4 = T.buffer_decl([96], "uint8", data=p4_data) + p4 = T.Buffer([96], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -546,13 +546,13 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(4 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data) - ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data) + ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data) # body p1_data = T.allocate([464], "uint8", "global") - p1 = T.buffer_decl([464], "uint8", data=p1_data) + p1 = T.Buffer([464], "uint8", data=p1_data) p2_data = T.allocate([464], "uint8", "global") - p2 = T.buffer_decl([464], "uint8", data=p2_data) + p2 = T.Buffer([464], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle")) @@ -585,17 +585,17 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(3 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype='int8', data=input_placeholder.data) - ethosu_write = T.buffer_decl(4096, dtype='int8', data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data) + ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data) # body p1_data = T.allocate([368], "uint8", "global") - p1 = T.buffer_decl([368], "uint8", data=p1_data) + p1 = T.Buffer([368], "uint8", data=p1_data) p2_data = T.allocate([368], "uint8", "global") - p2 = T.buffer_decl([368], "uint8", data=p2_data) + p2 = T.Buffer([368], "uint8", data=p2_data) p3_data = T.allocate([96], "uint8", "global") - p3 = T.buffer_decl([96], "uint8", data=p3_data) + p3 = T.Buffer([96], "uint8", data=p3_data) p4_data = T.allocate([96], "uint8", "global") - p4 = T.buffer_decl([96], "uint8", data=p4_data) + p4 = T.Buffer([96], "uint8", data=p4_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 96, p3[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p3[0], 48, p3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -612,13 +612,13 @@ def main(input_placeholder: T.Buffer[(1,16,16,32), "int8"], buffer1: T.Buffer[(4 # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - placeholder = T.buffer_decl(8192, dtype='int8', data=input_placeholder.data) - ethosu_write = T.buffer_decl(4096, dtype='int8', data=input_ethosu_write.data) + placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data) + ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data) # body p1_data = T.allocate([464], "uint8", "global") - p1 = T.buffer_decl([464], "uint8", data=p1_data) + p1 = T.Buffer([464], "uint8", data=p1_data) p2_data = T.allocate([464], "uint8", "global") - p2 = T.buffer_decl([464], "uint8", data=p2_data) + p2 = T.Buffer([464], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle")) @@ -662,25 +662,25 @@ def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], v4a = T.var("int32") v4b = T.var("int32") v4c = T.var("int32") - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p1_data = T.allocate([128], "uint8", "global") - p1 = T.buffer_decl([128], "uint8", data=p1_data) + p1 = T.Buffer([128], "uint8", data=p1_data) p2_data = T.allocate([112], "uint8", "global") - p2 = T.buffer_decl([112], "uint8", data=p2_data) + p2 = T.Buffer([112], "uint8", data=p2_data) p3_data = T.allocate([112], "uint8", "global") - p3 = T.buffer_decl([112], "uint8", data=p3_data) + p3 = T.Buffer([112], "uint8", data=p3_data) p4_data = T.allocate([32], "uint8", "global") - p4 = T.buffer_decl([32], "uint8", data=p4_data) + p4 = T.Buffer([32], "uint8", data=p4_data) p5_data = T.allocate([32], "uint8", "global") - p5 = T.buffer_decl([32], "uint8", data=p5_data) + p5 = T.Buffer([32], "uint8", data=p5_data) p6_data = T.allocate([32], "uint8", "global") - p6 = T.buffer_decl([32], "uint8", data=p6_data) + p6 = T.Buffer([32], "uint8", data=p6_data) p7_data = T.allocate([112], "uint8", "global") - p7 = T.buffer_decl([112], "uint8", data=p7_data) + p7 = T.Buffer([112], "uint8", data=p7_data) p8_data = T.allocate([3], "uint8", "global") - p8 = T.buffer_decl([3], "uint8", data=p8_data) + p8 = T.Buffer([3], "uint8", data=p8_data) with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 100): T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle")) with T.attr(T.iter_var(v1b, None, "DataPar", ""), "pragma_compute_cycles_hint", 101): @@ -721,17 +721,17 @@ def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], v3c = T.var("int32") v4a = T.var("int32") v4c = T.var("int32") - buffer1 = T.buffer_decl([8192], "int8") - buffer10 = T.buffer_decl([2048], "int8") + buffer1 = T.Buffer([8192], "int8") + buffer10 = T.Buffer([2048], "int8") # body p4_data = T.allocate([160], "uint8", "global") - p4 = T.buffer_decl([160], "uint8", data=p4_data) + p4 = T.Buffer([160], "uint8", data=p4_data) p7_data = T.allocate([144], "uint8", "global") - p7 = T.buffer_decl([144], "uint8", data=p7_data) + p7 = T.Buffer([144], "uint8", data=p7_data) p10_data = T.allocate([144], "uint8", "global") - p10 = T.buffer_decl([144], "uint8", data=p10_data) + p10 = T.Buffer([144], "uint8", data=p10_data) p11_data = T.allocate([144], "uint8", "global") - p11 = T.buffer_decl([144], "uint8", data=p11_data) + p11 = T.Buffer([144], "uint8", data=p11_data) with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 201): T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle")) with T.attr(T.iter_var(v2a, None, "DataPar", ""), "pragma_compute_cycles_hint", 205): diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py index b8ce7f0d60c9..64777aa0fb71 100644 --- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py +++ b/tests/python/contrib/test_ethosu/test_remove_concatenates.py @@ -35,21 +35,21 @@ def main(input_placeholder: T.Buffer[(1,8,12,16), "int8"], input_placeholder_1: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl(1536, dtype="int8", data=input_placeholder.data) - placeholder_1 = T.buffer_decl(1280, dtype="int8", data=input_placeholder_1.data) - T_concat = T.buffer_decl(4096, dtype="int8", data=input_T_concat.data) + placeholder = T.Buffer(1536, dtype="int8", data=input_placeholder.data) + placeholder_1 = T.Buffer(1280, dtype="int8", data=input_placeholder_1.data) + T_concat = T.Buffer(4096, dtype="int8", data=input_T_concat.data) - buffer = T.buffer_decl([2992], "uint8") - buffer_1 = T.buffer_decl([160], "uint8") - buffer_2 = T.buffer_decl([2992], "uint8") - buffer_3 = T.buffer_decl([160], "uint8") - buffer_4 = T.buffer_decl([2992], "uint8") - buffer_5 = T.buffer_decl([160], "uint8") - buffer_6 = T.buffer_decl([2992], "uint8") - buffer_7 = T.buffer_decl([160], "uint8") + buffer = T.Buffer([2992], "uint8") + buffer_1 = T.Buffer([160], "uint8") + buffer_2 = T.Buffer([2992], "uint8") + buffer_3 = T.Buffer([160], "uint8") + buffer_4 = T.Buffer([2992], "uint8") + buffer_5 = T.Buffer([160], "uint8") + buffer_6 = T.Buffer([2992], "uint8") + buffer_7 = T.Buffer([160], "uint8") # body T_concat_1_data = T.allocate([2816], "int8", "global", annotations={"disable_lower_builtin":True}) - T_concat_1 = T.buffer_decl([2816], "int8", data=T_concat_1_data) + T_concat_1 = T.Buffer([2816], "int8", data=T_concat_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, placeholder_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 160, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 352, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat[352], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 16, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_3[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 12, 16, 8, 0, 12, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 192, 16, 1, "int8", 8, 12, 16, 8, 0, 12, T_concat_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer_4[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_5[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py index bdc0447bc718..ffa6d6effd79 100644 --- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py +++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py @@ -370,15 +370,15 @@ class Conv2dDoubleCascade1: def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([304], "uint8") - buffer_1 = T.buffer_decl([80], "uint8") - buffer_2 = T.buffer_decl([320], "uint8") - buffer_3 = T.buffer_decl([160], "uint8") - placeholder_5 = T.buffer_decl([192], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.buffer_decl([512], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([304], "uint8") + buffer_1 = T.Buffer([80], "uint8") + buffer_2 = T.Buffer([320], "uint8") + buffer_3 = T.Buffer([160], "uint8") + placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([1024], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.buffer_decl([1024], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([1024], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 128, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, buffer[0], 304, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[12], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -392,15 +392,15 @@ class Conv2dDoubleCascade2: def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([80], "uint8") - buffer_1 = T.buffer_decl([320], "uint8") - buffer_2 = T.buffer_decl([1312], "uint8") - buffer_3 = T.buffer_decl([2608], "uint8") - placeholder_5 = T.buffer_decl([192], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.buffer_decl([512], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([80], "uint8") + buffer_1 = T.Buffer([320], "uint8") + buffer_2 = T.Buffer([1312], "uint8") + buffer_3 = T.Buffer([2608], "uint8") + placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([1536], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.buffer_decl([1536], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([1536], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 8, 8, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 3, 3, 1, 1, 1, 1, buffer_3[0], 2608, T.int8(-1), T.int8(-1), 12, buffer[0], 80, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[48], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -414,16 +414,16 @@ class Conv2dDoubleCascade3: def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 20, 4, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([1744], "uint8") - buffer_1 = T.buffer_decl([80], "uint8") - buffer_2 = T.buffer_decl([320], "uint8") - buffer_3 = T.buffer_decl([880], "uint8") - placeholder_5 = T.buffer_decl([768], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.buffer_decl([640], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([1744], "uint8") + buffer_1 = T.Buffer([80], "uint8") + buffer_2 = T.Buffer([320], "uint8") + buffer_3 = T.Buffer([880], "uint8") + placeholder_5 = T.Buffer([768], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.Buffer([640], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([2560], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.buffer_decl([2560], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer([2560], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 3, 8, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 12, 16, 3, 12, 0, 16, placeholder_5[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 10, 8, 32, 10, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -439,15 +439,15 @@ class Conv2dDoubleCascade4: def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 2, 8, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([1456], "uint8") - buffer_1 = T.buffer_decl([352], "uint8") - buffer_2 = T.buffer_decl([272], "uint8") - buffer_3 = T.buffer_decl([11040], "uint8") - placeholder_5 = T.buffer_decl([1024], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.buffer_decl([2048], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([1456], "uint8") + buffer_1 = T.Buffer([352], "uint8") + buffer_2 = T.Buffer([272], "uint8") + buffer_3 = T.Buffer([11040], "uint8") + placeholder_5 = T.Buffer([1024], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.Buffer([2048], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([2304], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.buffer_decl((2304,), "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.Buffer((2304,), "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, buffer_3[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_2[0], 272, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[256], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -461,15 +461,15 @@ class Conv2dDoubleCascade5: def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write: T.Buffer[(1, 32, 32, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([320], "uint8") - buffer_2 = T.buffer_decl([304], "uint8") - buffer_3 = T.buffer_decl([80], "uint8") - placeholder = T.buffer_decl([192], 'int8', data=input_placeholder.data) - ethosu_write = T.buffer_decl([8192], 'int8', data=input_ethosu_write.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([320], "uint8") + buffer_2 = T.Buffer([304], "uint8") + buffer_3 = T.Buffer([80], "uint8") + placeholder = T.Buffer([192], 'int8', data=input_placeholder.data) + ethosu_write = T.Buffer([8192], 'int8', data=input_ethosu_write.data) # body ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 304, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[96], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) @@ -483,15 +483,15 @@ class Conv2dDoubleCascade6: def main(input_placeholder: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write: T.Buffer[(1, 32, 2, 32, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([1456], "uint8") - buffer_1 = T.buffer_decl([352], "uint8") - buffer_2 = T.buffer_decl([11040], "uint8") - buffer_3 = T.buffer_decl([272], "uint8") - placeholder = T.buffer_decl([1024], 'int8', data=input_placeholder.data) - ethosu_write = T.buffer_decl([32768], 'int8', data=input_ethosu_write.data) + buffer = T.Buffer([1456], "uint8") + buffer_1 = T.Buffer([352], "uint8") + buffer_2 = T.Buffer([11040], "uint8") + buffer_3 = T.Buffer([272], "uint8") + placeholder = T.Buffer([1024], 'int8', data=input_placeholder.data) + ethosu_write = T.Buffer([32768], 'int8', data=input_ethosu_write.data) # body ethosu_write_1_data = T.allocate([12288], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.buffer_decl([12288], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.Buffer([12288], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 3, 8, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 768, 16, 256, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 768, 16, 256, "int8", 32, 32, 26, 32, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 1024, 16, 512, 3, 3, 1, 1, 1, 1, buffer_2[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_3[0], 272, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -647,10 +647,10 @@ class Conv2dInlineCopy1: def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([848], "uint8") - buffer_1 = T.buffer_decl([160], "uint8") - placeholder_3 = T.buffer_decl([960], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([1024], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([848], "uint8") + buffer_1 = T.Buffer([160], "uint8") + placeholder_3 = T.Buffer([960], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([1024], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 4, 8, 0, 8, placeholder_3[120], 0, 0, 0, T.float32(0.5), 10, "NHWC", 96, 8, 1, "int8", 8, 8, 16, 8, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 848, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -662,10 +662,10 @@ class Conv2dInlineCopy2: def main(input_placeholder_3: T.Buffer[(1, 7, 9, 5), "int8"], input_ethosu_write_1: T.Buffer[(1, 3, 5, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([656], "uint8") - placeholder_3 = T.buffer_decl([315], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([240], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([656], "uint8") + placeholder_3 = T.Buffer([315], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([240], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 3, 5, 3, 3, 0, 5, placeholder_3[146], 0, 0, 0, T.float32(0.5), 10, "NHWC", 45, 5, 1, "int8", 3, 5, 16, 3, 0, 5, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 80, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 656, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -706,10 +706,10 @@ class Conv2dInlineReshape1: def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([848], "uint8") - placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([848], "uint8") + placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -722,10 +722,10 @@ class Conv2dInlineReshape2: def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([848], "uint8") - placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([848], "uint8") + placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -738,10 +738,10 @@ class Conv2dInlineReshape3: def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([848], "uint8") - placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([848], "uint8") + placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -754,9 +754,9 @@ class Conv2dInlineReshape4: def main(placeholder_3: T.Buffer[(192,), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([160], "uint8") - buffer_1 = T.buffer_decl([848], "uint8") - ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data) + buffer = T.Buffer([160], "uint8") + buffer_1 = T.Buffer([848], "uint8") + ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py index e23954f4cb67..29e1f9814c81 100644 --- a/tests/python/contrib/test_ethosu/test_replace_copy.py +++ b/tests/python/contrib/test_ethosu/test_replace_copy.py @@ -37,12 +37,12 @@ class ReferenceModule: def main(input_placeholder_3: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 8), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer_1 = T.buffer_decl([384], "uint8") - placeholder_3 = T.buffer_decl([8192], dtype="int8", data=input_placeholder_3.data) - ethosu_write_1 = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write_1.data) + buffer_1 = T.Buffer([384], "uint8") + placeholder_3 = T.Buffer([8192], dtype="int8", data=input_placeholder_3.data) + ethosu_write_1 = T.Buffer([2048], dtype="int8", data=input_ethosu_write_1.data) # body placeholder_global_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_global = T.buffer_decl([384], "uint8", data=placeholder_global_data) + placeholder_global = T.Buffer([384], "uint8", data=placeholder_global_data) T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 384, placeholder_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 304, T.int8(-1), T.int8(-1), 12, placeholder_global[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -81,15 +81,15 @@ class WeightStream: def main(input_placeholder_5: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 16), "int8"]) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.buffer_decl([528], "uint8") - buffer_2 = T.buffer_decl([336], "uint8") - placeholder_5 = T.buffer_decl([8192], dtype="int8", data=input_placeholder_5.data) - ethosu_write_1 = T.buffer_decl([4096], dtype="int8", data=input_ethosu_write_1.data) + buffer = T.Buffer([528], "uint8") + buffer_2 = T.Buffer([336], "uint8") + placeholder_5 = T.Buffer([8192], dtype="int8", data=input_placeholder_5.data) + ethosu_write_1 = T.Buffer([4096], dtype="int8", data=input_ethosu_write_1.data) # body placeholder_d_global_data = T.allocate([528], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_d_global = T.buffer_decl([528], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.Buffer([528], "uint8", data=placeholder_d_global_data) placeholder_d_global_1_data = T.allocate([336], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_d_global_1 = T.buffer_decl([336], "uint8", data=placeholder_d_global_1_data) + placeholder_d_global_1 = T.Buffer([336], "uint8", data=placeholder_d_global_1_data) T.evaluate(T.call_extern("ethosu_copy", buffer[0], 528, placeholder_d_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 336, placeholder_d_global_1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 10, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_d_global[0], 416, T.int8(-1), T.int8(-1), 12, placeholder_d_global[416], 112, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py index 1e9b43b47ada..c6f6bc2c6c61 100644 --- a/tests/python/contrib/test_ethosu/test_scheduler.py +++ b/tests/python/contrib/test_ethosu/test_scheduler.py @@ -182,18 +182,18 @@ class DiamondGraphTir: @T.prim_func def main(input_placeholder: T.Buffer[(1, 56, 56, 96), "int8"], input_ethosu_write: T.Buffer[(1, 56, 56, 24), "int8"]) -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.buffer_decl([301056], dtype='int8', data=input_placeholder.data) - ethosu_write = T.buffer_decl([75264], dtype='int8', data=input_ethosu_write.data) - buffer1 = T.buffer_decl([2848], "uint8") - buffer3 = T.buffer_decl([976], "uint8") + placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data) + ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data) + buffer1 = T.Buffer([2848], "uint8") + buffer3 = T.Buffer([976], "uint8") p1_data = T.allocate([2848], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.buffer_decl([2848], "uint8", data=p1_data) + p1 = T.Buffer([2848], "uint8", data=p1_data) p2_data = T.allocate([976], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.buffer_decl([976], "uint8", data=p2_data) + p2 = T.Buffer([976], "uint8", data=p2_data) p5_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True}) - p5 = T.buffer_decl([75264], "int8", data=p5_data) + p5 = T.Buffer([75264], "int8", data=p5_data) p6_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True}) - p6 = T.buffer_decl([75264], "int8", data=p6_data) + p6 = T.Buffer([75264], "int8", data=p6_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 2848, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 976, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 56, 56, 96, 56, 0, 56, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 5376, 96, 1, "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 1344, 24, 1, 1, 1, 1, 1, 1, 1, p1[0], 2608, T.int8(-1), T.int8(-1), 12, p1[2608], 240, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py index f205bc3b26ca..d68c806f72d9 100644 --- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py +++ b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py @@ -36,8 +36,8 @@ class SingleEthosUConv2D: def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(1024,), "int8"]) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - placeholder_4 = T.buffer_decl([1], "uint8") - placeholder_5 = T.buffer_decl([1], "uint8") + placeholder_4 = T.Buffer([1], "uint8") + placeholder_5 = T.Buffer([1], "uint8") # body T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 8, 8, 3, 8, 0, 8, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 8, 8, 16, 8, 0, 8, ethosu_conv2d_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_4[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_5[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8")) # fmt: on @@ -51,10 +51,10 @@ class MultiEthosUConv2D: def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512,), "int8"]) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - placeholder_9 = T.buffer_decl([1], "uint8") - placeholder_7 = T.buffer_decl([1], "uint8") - placeholder_8 = T.buffer_decl([1], "uint8") - placeholder_5 = T.buffer_decl([1], "uint8") + placeholder_9 = T.Buffer([1], "uint8") + placeholder_7 = T.Buffer([1], "uint8") + placeholder_8 = T.Buffer([1], "uint8") + placeholder_5 = T.Buffer([1], "uint8") # body ethosu_conv2d_2 = T.decl_buffer([1024], "uint8") ethosu_conv2d_3 = T.decl_buffer([2048], "uint8") @@ -73,8 +73,8 @@ class MultiEthosUCopy: def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(2048,), "int8"]) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - placeholder_5 = T.buffer_decl([1], "int32") - placeholder_4 = T.buffer_decl([1], "uint8") + placeholder_5 = T.Buffer([1], "int32") + placeholder_4 = T.Buffer([1], "uint8") # body placeholder_global = T.decl_buffer([256], "uint8") placeholder_d_global = T.decl_buffer([8], "int32") @@ -90,14 +90,14 @@ def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(20 class WeightStreamOnly: @T.prim_func def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None: - buffer = T.buffer_decl([1], "uint8") - buffer_1 = T.buffer_decl([1], "uint8") - buffer_2 = T.buffer_decl([1], "uint8") - buffer_3 = T.buffer_decl([1], "uint8") - buffer_4 = T.buffer_decl([1], "uint8") - buffer_5 = T.buffer_decl([1], "uint8") - buffer_6 = T.buffer_decl([1], "uint8") - buffer_7 = T.buffer_decl([1], "uint8") + buffer = T.Buffer([1], "uint8") + buffer_1 = T.Buffer([1], "uint8") + buffer_2 = T.Buffer([1], "uint8") + buffer_3 = T.Buffer([1], "uint8") + buffer_4 = T.Buffer([1], "uint8") + buffer_5 = T.Buffer([1], "uint8") + buffer_6 = T.Buffer([1], "uint8") + buffer_7 = T.Buffer([1], "uint8") # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True, @@ -136,16 +136,16 @@ def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), class MixedRead: @T.prim_func def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None: - buffer = T.buffer_decl([1], "uint8") - buffer_1 = T.buffer_decl([1], "uint8") - buffer_2 = T.buffer_decl([1], "uint8") - buffer_3 = T.buffer_decl([1], "uint8") - buffer_4 = T.buffer_decl([1], "uint8") - buffer_5 = T.buffer_decl([1], "uint8") - buffer_6 = T.buffer_decl([1], "uint8") - buffer_7 = T.buffer_decl([1], "uint8") - buffer_8 = T.buffer_decl([1], "uint8") - buffer_9 = T.buffer_decl([1], "uint8") + buffer = T.Buffer([1], "uint8") + buffer_1 = T.Buffer([1], "uint8") + buffer_2 = T.Buffer([1], "uint8") + buffer_3 = T.Buffer([1], "uint8") + buffer_4 = T.Buffer([1], "uint8") + buffer_5 = T.Buffer([1], "uint8") + buffer_6 = T.Buffer([1], "uint8") + buffer_7 = T.Buffer([1], "uint8") + buffer_8 = T.Buffer([1], "uint8") + buffer_9 = T.Buffer([1], "uint8") # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True, @@ -161,11 +161,11 @@ def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), buffer_9.name: buffer_9}}) # body ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data) placeholder_global_data = T.allocate([80], "uint8", "global", annotations={"disable_lower_builtin":True}) - placeholder_global = T.buffer_decl([80], "uint8", data=placeholder_global_data) + placeholder_global = T.Buffer([80], "uint8", data=placeholder_global_data) placeholder_d_global_data = T.allocate([32], "uint8", "global", annotations={"disable_lower_builtin":True}) - placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 80, placeholder_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_3[0], 32, placeholder_d_global[0], dtype="handle")) @@ -673,9 +673,9 @@ def populate_ethosu_copy_calls(stmt): class MixedConstantDatatypes: @T.prim_func def main(placeholder_4: T.Buffer[(2048,), "int8"], ethosu_write_1: T.Buffer[(16,), "int8"]) -> None: - buffer = T.buffer_decl([1], "uint8") - buffer_1 = T.buffer_decl([1], "uint8") - buffer_2 = T.buffer_decl([1], "int16") + buffer = T.Buffer([1], "uint8") + buffer_1 = T.Buffer([1], "uint8") + buffer_2 = T.Buffer([1], "int16") # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True, diff --git a/tests/python/relay/aot/test_pass_aot_lower_main.py b/tests/python/relay/aot/test_pass_aot_lower_main.py index 093305203a94..b523e019299c 100644 --- a/tests/python/relay/aot/test_pass_aot_lower_main.py +++ b/tests/python/relay/aot/test_pass_aot_lower_main.py @@ -180,12 +180,12 @@ def func(a: T.handle, output: T.handle) -> None: T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []}) tmp_read = T.buffer_var("uint8", "") # buffer definition - tmp_read_1 = T.buffer_decl([T.uint64(140)], dtype="uint8", data=tmp_read) + tmp_read_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_read) a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16) output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16) # body tmp_write: T.Ptr[T.uint8] = output_buffer.data - tmp_write_1 = T.buffer_decl([T.uint64(140)], dtype="uint8", data=tmp_write) + tmp_write_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_write) for i in T.serial(140): tmp_write_1[i] = T.let(tmp_read, a_buffer.data, tmp_read_1[i]) # fmt: on diff --git a/tests/python/unittest/test_lower_build.py b/tests/python/unittest/test_lower_build.py index 665697b84be9..4c188d2f834b 100644 --- a/tests/python/unittest/test_lower_build.py +++ b/tests/python/unittest/test_lower_build.py @@ -60,9 +60,9 @@ def main( ) -> None: # function attr dict T.func_attr({"global_symbol": "main", "from_legacy_te_schedule": True, "tir.noalias": True}) - A_flat = T.buffer_decl([16384], data=A.data) - B_flat = T.buffer_decl([16384], data=B.data) - C_flat = T.buffer_decl([16384], data=C.data) + A_flat = T.Buffer([16384], data=A.data) + B_flat = T.Buffer([16384], data=B.data) + C_flat = T.Buffer([16384], data=C.data) # body for x, y in T.grid(128, 128): C_flat[x * 128 + y] = 0.0 @@ -82,9 +82,9 @@ def main( ) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - A_flat = T.buffer_decl([16384], data=A.data) - B_flat = T.buffer_decl([16384], data=B.data) - C_flat = T.buffer_decl([16384], data=C.data) + A_flat = T.Buffer([16384], data=A.data) + B_flat = T.Buffer([16384], data=B.data) + C_flat = T.Buffer([16384], data=C.data) # body for x, y in T.grid(128, 128): C_flat[x * 128 + y] = 0.0 diff --git a/tests/python/unittest/test_tir_renew_defs.py b/tests/python/unittest/test_tir_renew_defs.py index 28b440a608dc..65f81499bdfd 100644 --- a/tests/python/unittest/test_tir_renew_defs.py +++ b/tests/python/unittest/test_tir_renew_defs.py @@ -136,7 +136,7 @@ def test_undefined_buffer(): def access_alloc(): # Buffer A should be remapped A_data = T.allocate([128], "float16", "global") - A = T.buffer_decl(shape=[128], dtype="float16", data=A_data) + A = T.Buffer(shape=[128], dtype="float16", data=A_data) # check if buffer var also get remapped T.evaluate(A.data) for i in range(128): diff --git a/tests/python/unittest/test_tir_schedule_cache_read_write.py b/tests/python/unittest/test_tir_schedule_cache_read_write.py index 6a75057e72ff..bcb214594cb8 100644 --- a/tests/python/unittest/test_tir_schedule_cache_read_write.py +++ b/tests/python/unittest/test_tir_schedule_cache_read_write.py @@ -1011,9 +1011,9 @@ def cache_write_allocate_const( ): B = T.alloc_buffer([128, 128], dtype="float32") const = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8]) - const_1 = T.buffer_decl([8], dtype="float32", data=const) + const_1 = T.Buffer([8], dtype="float32", data=const) const2 = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8]) - const_2 = T.buffer_decl([8], dtype="float32", data=const) + const_2 = T.Buffer([8], dtype="float32", data=const) for i, j in T.grid(128, 128): for x in range(8): with T.block("B"): @@ -1037,8 +1037,8 @@ def cache_write_allocate_const_output( A_global = T.alloc_buffer([128, 128], dtype="float32") C_global = T.alloc_buffer([128, 128], dtype="float16") const_2 = T.allocate_const([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8]) - const_1 = T.buffer_decl([8], dtype="float32", data=const_2) - const_2_1 = T.buffer_decl([8], dtype="float32", data=const_2) + const_1 = T.Buffer([8], dtype="float32", data=const_2) + const_2_1 = T.Buffer([8], dtype="float32", data=const_2) const2 = T.allocate_const([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8]) for ax0, ax1 in T.grid(128, 128): with T.block("A_global"): diff --git a/tests/python/unittest/test_tir_transform_common_subexpr_elim.py b/tests/python/unittest/test_tir_transform_common_subexpr_elim.py index be229a580f01..113d9f047478 100644 --- a/tests/python/unittest/test_tir_transform_common_subexpr_elim.py +++ b/tests/python/unittest/test_tir_transform_common_subexpr_elim.py @@ -349,7 +349,7 @@ def test_no_normalization_without_commoning(): # ------------------------------------------------- @T.prim_func def func_distributivity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32) -> None: - B = T.buffer_decl((50,), "int32") + B = T.Buffer((50,), "int32") B[i1] = x * (y + z) B[i2] = x * y + x * z @@ -358,7 +358,7 @@ def func_distributivity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.i def func_distributivity_expected( i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32 ) -> None: - B = T.buffer_decl((50,), "int32") + B = T.Buffer((50,), "int32") cse_var_1 = T.var("int32") with T.let(cse_var_1, x * y + x * z): B[i1] = cse_var_1 @@ -367,7 +367,7 @@ def func_distributivity_expected( @T.prim_func def func_associativity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32) -> None: - B = T.buffer_decl((50,), "int32") + B = T.Buffer((50,), "int32") B[i1] = (x + y) + z B[i2] = x + (y + z) @@ -376,7 +376,7 @@ def func_associativity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.in def func_associativity_expected( i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32 ) -> None: - B = T.buffer_decl((50,), "int32") + B = T.Buffer((50,), "int32") cse_var_1 = T.var("int32") with T.let(cse_var_1, (x + y) + z): B[i1] = cse_var_1 diff --git a/tests/python/unittest/test_tir_transform_extract_constants.py b/tests/python/unittest/test_tir_transform_extract_constants.py index 5de06e38a557..b3e0aa74f96d 100644 --- a/tests/python/unittest/test_tir_transform_extract_constants.py +++ b/tests/python/unittest/test_tir_transform_extract_constants.py @@ -28,7 +28,7 @@ def constant1(a: T.handle) -> None: A = T.match_buffer(a, (10), "int32") B = T.alloc_buffer((10), "int32") K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K = T.buffer_decl(shape=(10), dtype="int32", data=K_data) + K = T.Buffer(shape=(10), dtype="int32", data=K_data) for x in T.serial(0, 10): B[x] = A[x] + K[x] @@ -37,7 +37,7 @@ def constant2(a: T.handle) -> None: A = T.match_buffer(a, (10), "int32") B = T.alloc_buffer((10), "int32") K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K = T.buffer_decl(shape=(10), dtype="int32", data=K_data) + K = T.Buffer(shape=(10), dtype="int32", data=K_data) for x in T.serial(0, 10): B[x] = A[x] + K[x] @@ -46,7 +46,7 @@ def constant3(a: T.handle) -> None: A = T.match_buffer(a, (10), "int32") B = T.alloc_buffer((10), "int32") K_data = T.allocate_const([1, 2, 3, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K = T.buffer_decl(shape=(10), dtype="int32", data=K_data) + K = T.Buffer(shape=(10), dtype="int32", data=K_data) for x in T.serial(0, 10): B[x] = A[x] + K[x] diff --git a/tests/python/unittest/test_tir_transform_flatten_buffer.py b/tests/python/unittest/test_tir_transform_flatten_buffer.py index 513e04dc2090..12523fbdb2ae 100644 --- a/tests/python/unittest/test_tir_transform_flatten_buffer.py +++ b/tests/python/unittest/test_tir_transform_flatten_buffer.py @@ -41,11 +41,11 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]): C[i, j] = B_new[0, j] * 2.0 def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]): - A = T.buffer_decl(256, dtype="float32", data=input_A.data) - C = T.buffer_decl(256, dtype="float32", data=input_C.data) + A = T.Buffer(256, dtype="float32", data=input_A.data) + C = T.Buffer(256, dtype="float32", data=input_C.data) for i in T.serial(0, 16): B_new_data = T.allocate([16], "float32", scope="global") - B_new = T.buffer_decl([16], "float32", scope="global", data=B_new_data) + B_new = T.Buffer([16], "float32", scope="global", data=B_new_data) for j in T.serial(0, 16): B_new[j] = A[((i * 16) + j)] + 1.0 for j in T.serial(0, 16): @@ -56,7 +56,7 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare): """2-d buffers are flattened to 1-d Like TestElementwise, but the TIR doesn't have the DeclBuffer - node. The T.buffer_decl declaration applies only during the + node. The T.Buffer declaration applies only during the parsing the TVMScript, and doesn't occur in the TIR itself. In this case, the allocation should be assumed to be targeting flat memory, and should be flattened to a 1-d allocation. @@ -65,18 +65,18 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare): def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]): for i in T.serial(0, 16): B_new_data = T.allocate([1, 16], "float32", "global") - B_new = T.buffer_decl([1, 16], "float32", data=B_new_data) + B_new = T.Buffer([1, 16], "float32", data=B_new_data) for j in T.serial(0, 16): B_new[0, j] = A[i, j] + 1.0 for j in T.serial(0, 16): C[i, j] = B_new[0, j] * 2.0 def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]): - A = T.buffer_decl(256, dtype="float32", data=input_A.data) - C = T.buffer_decl(256, dtype="float32", data=input_C.data) + A = T.Buffer(256, dtype="float32", data=input_A.data) + C = T.Buffer(256, dtype="float32", data=input_C.data) for i in T.serial(0, 16): B_new_data = T.allocate([16], "float32", "global") - B_new = T.buffer_decl(16, "float32", data=B_new_data) + B_new = T.Buffer(16, "float32", data=B_new_data) for j in T.serial(0, 16): B_new[j] = A[((i * 16) + j)] + 1.0 for j in T.serial(0, 16): @@ -101,8 +101,8 @@ def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]): C[i0 * 4 + i1 * 2 + i2, j] = B[0, j] * 2.0 def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]): - A = T.buffer_decl(256, dtype="float32", data=input_A.data) - C = T.buffer_decl(256, dtype="float32", data=input_C.data) + A = T.Buffer(256, dtype="float32", data=input_A.data) + C = T.Buffer(256, dtype="float32", data=input_C.data) i0 = T.env_thread("blockIdx.x") i1 = T.env_thread("threadIdx.x") @@ -112,7 +112,7 @@ def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), T.launch_thread(i1, 2) T.launch_thread(i2, 2) B_data = T.allocate([16], "float32", scope="local") - B = T.buffer_decl([16], "float32", scope="local", data=B_data) + B = T.Buffer([16], "float32", scope="local", data=B_data) for j in range(0, 16): B[j] = A[i0 * 64 + i1 * 32 + i2 * 16 + j] + 1.0 for j in range(0, 16): @@ -136,12 +136,12 @@ def before(a: T.handle, c: T.handle, n: T.int32, m: T.int32) -> None: def expected(a: T.handle, c: T.handle, n: T.int32, m: T.int32) -> None: input_A = T.match_buffer(a, (n, m), "float32") input_C = T.match_buffer(c, (n, m), "float32") - A = T.buffer_decl(n * m, "float32", data=input_A.data) - C = T.buffer_decl(n * m, "float32", data=input_C.data) + A = T.Buffer(n * m, "float32", data=input_A.data) + C = T.Buffer(n * m, "float32", data=input_C.data) for i in range(0, n): B_data = T.allocate([m], "float32", scope="global") - B = T.buffer_decl([m], "float32", scope="global", data=B_data) + B = T.Buffer([m], "float32", scope="global", data=B_data) for j in range(0, m): B[j] = A[i * m + j] + 1.0 for j in range(0, m): @@ -160,14 +160,14 @@ def before(A: T.Buffer[(4, 32), "float32"], D: T.Buffer[(4, 32), "float32"]): D[i, j] = C[i, j] * 2.0 def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "float32"]): - A = T.buffer_decl(128, "float32", data=input_A.data) - D = T.buffer_decl(128, "float32", data=input_D.data) + A = T.Buffer(128, "float32", data=input_A.data) + D = T.Buffer(128, "float32", data=input_D.data) for i, j in T.grid(4, 32): B_data = T.allocate([128], "float32", scope="global") - B = T.buffer_decl([128], "float32", scope="global", data=B_data) + B = T.Buffer([128], "float32", scope="global", data=B_data) C_data = T.allocate([128], "float32", scope="global") - C = T.buffer_decl([128], "float32", scope="global", data=C_data) + C = T.Buffer([128], "float32", scope="global", data=C_data) B[i * 32 + j] = A[i * 32 + j] + 1.0 C[i * 32 + j] = A[i * 32 + j] + B[i * 32 + j] D[i * 32 + j] = C[i * 32 + j] * 2.0 @@ -179,18 +179,18 @@ class TestStrided(BaseCompare): def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]): for i0 in T.serial(4): B = T.decl_buffer([4, 17], "float32") - B_1 = T.buffer_decl([4, 16], dtype="float32", data=B.data, strides=[17, 1]) + B_1 = T.Buffer([4, 16], dtype="float32", data=B.data, strides=[17, 1]) for i1, j in T.grid(4, 16): B_1[i1, j] = A[i0 * 4 + i1, j] + 1.0 for i1, j in T.grid(4, 16): C[i0 * 4 + i1, j] = B_1[i1, j] * 2.0 def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]): - A = T.buffer_decl(256, dtype="float32", data=input_A.data) - C = T.buffer_decl(256, dtype="float32", data=input_C.data) + A = T.Buffer(256, dtype="float32", data=input_A.data) + C = T.Buffer(256, dtype="float32", data=input_C.data) for i0 in T.serial(0, 4): B_new_data = T.allocate([68], "float32", scope="global") - B_new = T.buffer_decl([68], "float32", scope="global", data=B_new_data) + B_new = T.Buffer([68], "float32", scope="global", data=B_new_data) for i1 in T.serial(0, 4): for j in T.serial(0, 16): B_new[i1 * 17 + j] = A[i0 * 64 + i1 * 16 + j] + 1.0 @@ -207,8 +207,8 @@ def before(A: T.Buffer[10, "bool"], B: T.Buffer[10, "bool"]) -> None: B[i0] = A[i0] def expected(input_A: T.Buffer[10, "bool"], input_B: T.Buffer[10, "bool"]) -> None: - A = T.buffer_decl(10, dtype="int8", data=input_A.data) - B = T.buffer_decl(10, dtype="int8", data=input_B.data) + A = T.Buffer(10, dtype="int8", data=input_A.data) + B = T.Buffer(10, dtype="int8", data=input_B.data) # body for i0 in T.serial(10): B[i0] = T.cast(T.cast(A[i0], "bool"), "int8") @@ -285,9 +285,7 @@ def before(): def expected(): A_data = T.allocate([30, 1001], dtype="float32", scope="global") - A = T.buffer_decl( - [30, 1001], dtype="float32", scope="global", axis_separators=[1], data=A_data - ) + A = T.Buffer([30, 1001], dtype="float32", scope="global", axis_separators=[1], data=A_data) for i0, i1, i2, i3, i4, i5 in T.grid(2, 3, 5, 7, 11, 13): T.evaluate(A[i0 * 15 + i1 * 5 + i2, i3 * 143 + i4 * 13 + i5]) diff --git a/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py b/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py index d75fb2b03e39..b7bd6cb46fd6 100644 --- a/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py +++ b/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py @@ -207,7 +207,7 @@ def main(A: T.handle, tensor: T.handle) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - tensor_2 = T.buffer_decl([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) + tensor_2 = T.Buffer([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) A_1 = T.match_buffer(A, [1, 12, 14, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) tensor_1 = T.match_buffer(tensor, [1, 8, 8, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) # body @@ -239,7 +239,7 @@ def main(A: T.handle, tensor: T.handle) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) # buffer definition - tensor_2 = T.buffer_decl([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) + tensor_2 = T.Buffer([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) A_1 = T.match_buffer(A, [1, 12, 14, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) tensor_1 = T.match_buffer(tensor, [1, 8, 8, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1) # body diff --git a/tests/python/unittest/test_tir_transform_inject_virtual_thread.py b/tests/python/unittest/test_tir_transform_inject_virtual_thread.py index eb5ed08bb5af..d32714938424 100644 --- a/tests/python/unittest/test_tir_transform_inject_virtual_thread.py +++ b/tests/python/unittest/test_tir_transform_inject_virtual_thread.py @@ -146,13 +146,13 @@ def before_func(): vthread = T.env_thread("vthread") T.launch_thread(vthread, 4) B_data = T.allocate([4], "int32", scope="shared") - B = T.buffer_decl([4], "int32", data=B_data, scope="shared") + B = T.Buffer([4], "int32", data=B_data, scope="shared") B[0:4] = T.broadcast(vthread, 4) @T.prim_func def expected_func(): B_data = T.allocate([16], "int32", scope="shared") - B = T.buffer_decl([16], "int32", data=B_data, scope="shared") + B = T.Buffer([16], "int32", data=B_data, scope="shared") # The indices for B should each be a single Ramp node, and # should not be the sum of a Ramp and Broadcast node. B[T.Mul(0, 4) : T.Mul(0, 4) + 4] = T.broadcast(0, 4) @@ -175,13 +175,13 @@ def before_func(): vthread = T.env_thread("vthread") T.launch_thread(vthread, 4) B_data = T.allocate([4], "int32", "shared") - B = T.buffer_decl([4], "int32", data=B_data, scope="shared") + B = T.Buffer([4], "int32", data=B_data, scope="shared") B[0:4] = T.broadcast(vthread, 4) @T.prim_func def expected_func(): B_data = T.allocate([4], "int32x4", "shared") - B = T.buffer_decl([4], "int32x4", data=B_data, scope="shared") + B = T.Buffer([4], "int32x4", data=B_data, scope="shared") B[T.Mul(0, 4) / 4] = T.broadcast(0, 4) B[T.Mul(1, 4) / 4] = T.broadcast(1, 4) B[T.Mul(2, 4) / 4] = T.broadcast(2, 4) diff --git a/tests/python/unittest/test_tir_transform_loop_partition.py b/tests/python/unittest/test_tir_transform_loop_partition.py index 7dd8e794103e..1a40f52140ee 100644 --- a/tests/python/unittest/test_tir_transform_loop_partition.py +++ b/tests/python/unittest/test_tir_transform_loop_partition.py @@ -583,10 +583,10 @@ def partitioned_concat_3( placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"], T_concat: T.Buffer[(1, 128, 28, 28), "int8"], ) -> None: - placeholder_flat = T.buffer_decl([50176], "int8", data=placeholder.data) - placeholder_1_flat = T.buffer_decl([25088], "int8", data=placeholder_1.data) - placeholder_2_flat = T.buffer_decl([25088], "int8", data=placeholder_2.data) - T_concat_flat = T.buffer_decl([100352], "int8", data=T_concat.data) + placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data) + placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data) + placeholder_2_flat = T.Buffer([25088], "int8", data=placeholder_2.data) + T_concat_flat = T.Buffer([100352], "int8", data=T_concat.data) for i1, i2, i3 in T.grid(64, 28, 28): T_concat_flat[i1 * 784 + i2 * 28 + i3] = placeholder_flat[i1 * 784 + i2 * 28 + i3] for i1, i2, i3 in T.grid(32, 28, 28): @@ -602,10 +602,10 @@ def concat_func_3( placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"], T_concat: T.Buffer[(1, 128, 28, 28), "int8"], ) -> None: - placeholder_flat = T.buffer_decl([50176], "int8", data=placeholder.data) - placeholder_1_flat = T.buffer_decl([25088], "int8", data=placeholder_1.data) - placeholder_2_flat = T.buffer_decl([25088], "int8", data=placeholder_2.data) - T_concat_flat = T.buffer_decl([100352], "int8", data=T_concat.data) + placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data) + placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data) + placeholder_2_flat = T.Buffer([25088], "int8", data=placeholder_2.data) + T_concat_flat = T.Buffer([100352], "int8", data=T_concat.data) for i1 in T.serial(128, annotations={"pragma_loop_partition_hint": 1}): for i2, i3 in T.grid(28, 28): if 96 <= i1: @@ -632,8 +632,8 @@ def test_loop_partition_unroll_hint(): def main( A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"] ) -> None: - A = T.buffer_decl(150528, "int8", data=A_arg.data) - B = T.buffer_decl(25088, "int8", data=B_arg.data) + A = T.Buffer(150528, "int8", data=A_arg.data) + B = T.Buffer(25088, "int8", data=B_arg.data) for ax0 in T.serial( 112, annotations={"pragma_loop_partition_hint": True}, @@ -646,8 +646,8 @@ def main( def partitioned_main( A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"] ) -> None: - A = T.buffer_decl(150528, dtype="int8", data=A_arg.data) - B = T.buffer_decl(25088, dtype="int8", data=B_arg.data) + A = T.Buffer(150528, dtype="int8", data=A_arg.data) + B = T.Buffer(25088, dtype="int8", data=B_arg.data) # body for ax1, ax2, ax3 in T.grid(224, 7, 16): if 3 <= ax2 and ax3 < 3: @@ -706,11 +706,11 @@ def main(): @T.prim_func def partitioned_main(): placeholder_0_dm = T.allocate([16384], "int8", "global") - placeholder_0_dm_1 = T.buffer_decl([16384], dtype="int8", data=placeholder_0_dm) + placeholder_0_dm_1 = T.Buffer([16384], dtype="int8", data=placeholder_0_dm) for i3_0 in T.unroll(2): for i2_0 in T.unroll(2): pad_temp = T.allocate([4096], "int8", "global") - pad_temp_1 = T.buffer_decl([4096], dtype="int8", data=pad_temp) + pad_temp_1 = T.Buffer([4096], dtype="int8", data=pad_temp) for ax0, ax1, ax2 in T.grid(16, 16, 16): if 6 <= i2_0 * 4 + ax0 and 6 <= i3_0 * 4 + ax1: pad_temp_1[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[ @@ -718,7 +718,7 @@ def partitioned_main(): ] for i2_0 in T.unroll(2): pad_temp_2 = T.allocate([4096], "int8", "global") - pad_temp_3 = T.buffer_decl([4096], dtype="int8", data=pad_temp_2) + pad_temp_3 = T.Buffer([4096], dtype="int8", data=pad_temp_2) for ax0, ax1, ax2 in T.grid(16, 16, 16): if 6 <= i2_0 * 4 + ax0: pad_temp_3[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[ @@ -727,7 +727,7 @@ def partitioned_main(): for i3_0 in T.unroll(2): for i2_0 in T.unroll(2): pad_temp_4 = T.allocate([4096], "int8", "global") - pad_temp_5 = T.buffer_decl([4096], dtype="int8", data=pad_temp_4) + pad_temp_5 = T.Buffer([4096], dtype="int8", data=pad_temp_4) for ax0, ax1, ax2 in T.grid(16, 16, 16): if 6 <= i2_0 * 4 + ax0 and i3_0 * 4 + ax1 < 14: pad_temp_5[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[ diff --git a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py index 635badb847bd..5cdc272440e7 100644 --- a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py +++ b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py @@ -28,9 +28,9 @@ class Before: def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data) - weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data) - conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data) + inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data) + weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data) + conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data) # var definition threadIdx_x = T.env_thread("threadIdx.x") blockIdx_x = T.env_thread("blockIdx.x") @@ -59,9 +59,9 @@ class After: def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None: # function attr dict T.func_attr({"global_symbol": "main", "tir.noalias": True}) - inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data) - weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data) - conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data) + inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data) + weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data) + conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data) # var definition threadIdx_x = T.env_thread("threadIdx.x") blockIdx_x = T.env_thread("blockIdx.x") @@ -93,9 +93,9 @@ def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 51 # var definition threadIdx_x = T.env_thread("threadIdx.x") blockIdx_x = T.env_thread("blockIdx.x") - inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data) - weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data) - conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data) + inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data) + weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data) + conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data) # body T.launch_thread(blockIdx_x, 64) conv2d_transpose_nhwc_local = T.decl_buffer([8], "float32", scope="local") diff --git a/tests/python/unittest/test_tir_transform_storage_rewrite.py b/tests/python/unittest/test_tir_transform_storage_rewrite.py index 533a835e0f9c..2ed2e6ec6d71 100644 --- a/tests/python/unittest/test_tir_transform_storage_rewrite.py +++ b/tests/python/unittest/test_tir_transform_storage_rewrite.py @@ -655,7 +655,7 @@ def test_access_in_let_value(): def func(A: T.Buffer[(8,), "float32"]): for i in range(8): B_data = T.allocate((1,), "float32", "global") - B = T.buffer_decl(shape=[1], dtype="float32", data=B_data) + B = T.Buffer(shape=[1], dtype="float32", data=B_data) B[0] = 3.14 x: T.float32 = T.exp(B[0], dtype="float32") A[i] = (x + 1.0) / (x - 1.0) @@ -663,7 +663,7 @@ def func(A: T.Buffer[(8,), "float32"]): @T.prim_func def func_rewritten(A: T.Buffer[(8,), "float32"]) -> None: B_data = T.allocate((1,), "float32", "global") - B = T.buffer_decl(shape=[1], dtype="float32", data=B_data) + B = T.Buffer(shape=[1], dtype="float32", data=B_data) for i in range(8): B[0] = 3.14 x: T.float32 = T.exp(B[0], dtype="float32") @@ -690,12 +690,12 @@ class TestLetBufferRewrite(BaseCompare): def before() -> None: A_data: T.Ptr[T.int32] = T.call_extern("dummy_func", dtype="handle") - A = T.buffer_decl([8], "int32", data=A_data) + A = T.Buffer([8], "int32", data=A_data) A[0:8] = T.broadcast(42, 8) def expected() -> None: A_data: T.Ptr[T.int32x8] = T.call_extern("dummy_func", dtype="handle") - A = T.buffer_decl([1], "int32x8", data=A_data) + A = T.Buffer([1], "int32x8", data=A_data) A[0] = T.broadcast(42, 8) @@ -708,7 +708,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]): dtype="float32", scope="global", ) - B = T.buffer_decl( + B = T.Buffer( [16, 16], dtype="float32", axis_separators=[1], @@ -719,7 +719,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]): dtype="float32", scope="global", ) - C = T.buffer_decl( + C = T.Buffer( [16, 16], dtype="float32", axis_separators=[1], @@ -741,8 +741,8 @@ def expected(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]) dtype="float32", scope="global", ) - B = T.buffer_decl([16, 16], dtype="float32", axis_separators=[1], data=B_data) - C = T.buffer_decl( + B = T.Buffer([16, 16], dtype="float32", axis_separators=[1], data=B_data) + C = T.Buffer( [16, 16], dtype="float32", axis_separators=[1], @@ -777,7 +777,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]): dtype="float32", scope="global", ) - B = T.buffer_decl( + B = T.Buffer( [16, 16], dtype="float32", axis_separators=[1], @@ -788,7 +788,7 @@ def before(A: T.Buffer[(16, 16), "float32"], D: T.Buffer[(16, 16), "float32"]): dtype="float32", scope="global", ) - C = T.buffer_decl( + C = T.Buffer( [20, 20], dtype="float32", axis_separators=[1], diff --git a/tests/python/unittest/test_tir_transform_thread_sync.py b/tests/python/unittest/test_tir_transform_thread_sync.py index b2a0581d6980..b7caf04d659c 100644 --- a/tests/python/unittest/test_tir_transform_thread_sync.py +++ b/tests/python/unittest/test_tir_transform_thread_sync.py @@ -101,7 +101,7 @@ def test_sync_read_thread_id_independent_location(): def func(p0_arg: T.Buffer[(1, 2, 1, 1), "float32"], p1: T.Buffer[2, "float32"]) -> None: threadIdx_x = T.env_thread("threadIdx.x") blockIdx_x = T.env_thread("blockIdx.x") - p0 = T.buffer_decl([2], dtype="float32", data=p0_arg.data) + p0 = T.Buffer([2], dtype="float32", data=p0_arg.data) result_local = T.alloc_buffer([1], dtype="float32", scope="local") temp_shared = T.alloc_buffer([1], dtype="float32", scope="shared") T.launch_thread(blockIdx_x, 8) diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py index 25e895573551..6145c39b876d 100644 --- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py +++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py @@ -92,13 +92,13 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholde T_cast_21 = T.match_buffer(T_cast_20, [289], dtype="uint8", elem_offset=0, align=64, offset_factor=1) # body PaddedInput_7_data = T.allocate([157323], "int16", "global") - PaddedInput_7 = T.buffer_decl(shape=[157323], dtype="int16", data=PaddedInput_7_data) + PaddedInput_7 = T.Buffer(shape=[157323], dtype="int16", data=PaddedInput_7_data) for i0_i1_fused_7 in T.serial(0, 229): for i2_7, i3_7 in T.grid(229, 3): PaddedInput_7[(((i0_i1_fused_7*687) + (i2_7*3)) + i3_7)] = T.if_then_else(((((2 <= i0_i1_fused_7) and (i0_i1_fused_7 < 226)) and (2 <= i2_7)) and (i2_7 < 226)), placeholder_65[((((i0_i1_fused_7*672) + (i2_7*3)) + i3_7) - 1350)], T.int16(0), dtype="int16") for ax0_ax1_fused_ax2_fused_7 in T.serial(0, 12544): Conv2dOutput_7_data = T.allocate([64], "int32", "global") - Conv2dOutput_7 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_7_data) + Conv2dOutput_7 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_7_data) for ff_3 in T.serial(0, 64): Conv2dOutput_7[ff_3] = 0 for ry_2, rx_2, rc_7 in T.grid(7, 7, 3): @@ -114,7 +114,7 @@ def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T_cast_7 = T.match_buffer(T_cast_6, [177], dtype="int16", elem_offset=0, align=64, offset_factor=1) # body tensor_2_data = T.allocate([200704], "uint8", "global") - tensor_2 = T.buffer_decl(shape=[200704], dtype="uint8", data=tensor_2_data) + tensor_2 = T.Buffer(shape=[200704], dtype="uint8", data=tensor_2_data) for ax0_ax1_fused_4 in T.serial(0, 56): for ax2_4 in T.serial(0, 56): for ax3_init in T.serial(0, 64): @@ -163,7 +163,7 @@ def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: fast_memory_6_buffer_var = T.match_buffer(fast_memory_6_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16) slow_memory_7_buffer_var = T.match_buffer(slow_memory_7_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - tensor_2_let = T.buffer_decl([200704], dtype="uint8") + tensor_2_let = T.Buffer([200704], dtype="uint8") with T.let(tensor_2_let.data, T.address_of(fast_memory_6_buffer_var[0], dtype="handle")): for ax0_ax1_fused_4, ax2_4 in T.grid(56, 56): for ax3_init in T.serial(0, 64): @@ -193,12 +193,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast(placeholde fast_memory_4_buffer_var = T.match_buffer(fast_memory_4_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16) slow_memory_5_buffer_var = T.match_buffer(slow_memory_5_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - PaddedInput_7_let = T.buffer_decl([157323], "int16") + PaddedInput_7_let = T.Buffer([157323], "int16") with T.let(PaddedInput_7_let.data, T.address_of(slow_memory_5_buffer_var[802816], dtype="handle")): for i0_i1_fused_7, i2_7, i3_7 in T.grid(229, 229, 3): PaddedInput_7_let[i0_i1_fused_7 * 687 + i2_7 * 3 + i3_7] = T.if_then_else(2 <= i0_i1_fused_7 and i0_i1_fused_7 < 226 and 2 <= i2_7 and i2_7 < 226, placeholder_65[i0_i1_fused_7 * 672 + i2_7 * 3 + i3_7 - 1350], T.int16(0), dtype="int16") for ax0_ax1_fused_ax2_fused_7 in T.serial(0, 12544): - Conv2dOutput_7_let = T.buffer_decl([64], "int32") + Conv2dOutput_7_let = T.Buffer([64], "int32") with T.let(Conv2dOutput_7_let.data, T.address_of(fast_memory_4_buffer_var[0], dtype="handle")): for ff_3 in T.serial(0, 64): Conv2dOutput_7_let[ff_3] = 0 @@ -272,12 +272,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(pla T_cast_5 = T.match_buffer(T_cast_4, [215], dtype="int16") # body PaddedInput_1_data = T.allocate([379456], "int16", "global") - PaddedInput_1 = T.buffer_decl(shape=[379456], dtype="int16", data=PaddedInput_1_data) + PaddedInput_1 = T.Buffer(shape=[379456], dtype="int16", data=PaddedInput_1_data) for i0_i1_fused_1, i2_1, i3_1 in T.grid(77, 77, 64): PaddedInput_1[i0_i1_fused_1 * 4928 + i2_1 * 64 + i3_1] = T.if_then_else(1 <= i0_i1_fused_1 and i0_i1_fused_1 < 76 and 1 <= i2_1 and i2_1 < 76, placeholder_13[i0_i1_fused_1 * 4800 + i2_1 * 64 + i3_1 - 4864], T.int16(0), dtype="int16") for ax0_ax1_fused_ax2_fused_1 in T.serial(0, 5625): Conv2dOutput_1_data = T.allocate([64], "int32", "global") - Conv2dOutput_1 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_1_data) + Conv2dOutput_1 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_1_data) for ff_1 in T.serial(0, 64): Conv2dOutput_1[ff_1] = 0 for ry, rx, rc_1 in T.grid(3, 3, 64): @@ -295,12 +295,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s T_add_1 = T.match_buffer(T_add, [407], dtype="int32") # body PaddedInput_2_data = T.allocate([360000], "int16", "global") - PaddedInput_2 = T.buffer_decl(shape=[360000], dtype="int16", data=PaddedInput_2_data) + PaddedInput_2 = T.Buffer(shape=[360000], dtype="int16", data=PaddedInput_2_data) for i0_i1_fused_2, i2_2, i3_2 in T.grid(75, 75, 64): PaddedInput_2[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] = placeholder_19[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] for ax0_ax1_fused_ax2_fused_2 in T.serial(0, 5625): Conv2dOutput_2_data = T.allocate([64], "int32", "global") - Conv2dOutput_2 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_2_data) + Conv2dOutput_2 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_2_data) for ax3_outer_1 in T.serial(0, 4): for ff_2 in T.serial(0, 64): Conv2dOutput_2[ff_2] = 0 @@ -320,12 +320,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s T_cast_7 = T.match_buffer(T_cast_6, [407], dtype="uint8") # body PaddedInput_3_data = T.allocate([360000], "int16", "global") - PaddedInput_3 = T.buffer_decl(shape=[360000], dtype="int16", data=PaddedInput_3_data) + PaddedInput_3 = T.Buffer(shape=[360000], dtype="int16", data=PaddedInput_3_data) for i0_i1_fused_3, i2_3, i3_3 in T.grid(75, 75, 64): PaddedInput_3[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] = placeholder_29[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] for ax0_ax1_fused_ax2_fused_3 in T.serial(0, 5625): Conv2dOutput_3_data = T.allocate([64], "int32", "global") - Conv2dOutput_3 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_3_data) + Conv2dOutput_3 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_3_data) for ax3_outer_2 in T.serial(0, 4): for ff_3 in T.serial(0, 64): Conv2dOutput_3[ff_3] = 0 @@ -361,12 +361,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place T_cast_3 = T.match_buffer(T_cast_2, [215], dtype="int16") # body PaddedInput_data = T.allocate([360000], "int16", "global") - PaddedInput = T.buffer_decl([360000], "int16", data=PaddedInput_data) + PaddedInput = T.Buffer([360000], "int16", data=PaddedInput_data) for i0_i1_fused, i2, i3 in T.grid(75, 75, 64): PaddedInput[i0_i1_fused * 4800 + i2 * 64 + i3] = placeholder_7[i0_i1_fused * 4800 + i2 * 64 + i3] for ax0_ax1_fused_ax2_fused in T.serial(0, 5625): Conv2dOutput_data = T.allocate([64], "int32", "global") - Conv2dOutput = T.buffer_decl([64], "int32", data=Conv2dOutput_data) + Conv2dOutput = T.Buffer([64], "int32", data=Conv2dOutput_data) for ff in T.serial(0, 64): Conv2dOutput[ff] = 0 for rc in T.serial(0, 64): @@ -398,12 +398,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s T_cast_7 = T.match_buffer(T_cast_6, [407], dtype="uint8") global_workspace_5_buffer_var = T.match_buffer(global_workspace_5_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - PaddedInput_3_let = T.buffer_decl([360000], 'int16') + PaddedInput_3_let = T.Buffer([360000], 'int16') with T.let(PaddedInput_3_let.data, T.address_of(global_workspace_5_buffer_var[6480000], dtype="handle")): for i0_i1_fused_3, i2_3, i3_3 in T.grid(75, 75, 64): PaddedInput_3_let[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] = placeholder_29[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] for ax0_ax1_fused_ax2_fused_3 in T.serial(0, 5625): - Conv2dOutput_3_let = T.buffer_decl([64], 'int32') + Conv2dOutput_3_let = T.Buffer([64], 'int32') with T.let(Conv2dOutput_3_let.data, T.address_of(global_workspace_5_buffer_var[7200000], dtype="handle")): for ax3_outer_2 in T.serial(0, 4): for ff_3 in T.serial(0, 64): @@ -421,12 +421,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s T_add_1 = T.match_buffer(T_add, [407], dtype="int32") global_workspace_4_buffer_var = T.match_buffer(global_workspace_4_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - PaddedInput_2_let = T.buffer_decl([360000], "int16") + PaddedInput_2_let = T.Buffer([360000], "int16") with T.let(PaddedInput_2_let.data, T.address_of(global_workspace_4_buffer_var[7200000], dtype="handle")): for i0_i1_fused_2, i2_2, i3_2 in T.grid(75, 75, 64): PaddedInput_2_let[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] = placeholder_19[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] for ax0_ax1_fused_ax2_fused_2 in T.serial(0, 5625): - Conv2dOutput_2_let = T.buffer_decl([64], 'int32') + Conv2dOutput_2_let = T.Buffer([64], 'int32') with T.let(Conv2dOutput_2_let.data, T.address_of(global_workspace_4_buffer_var[7920000], dtype="handle")): for ax3_outer_1 in T.serial(0, 4): for ff_2 in T.serial(0, 64): @@ -444,12 +444,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place T_cast_3 = T.match_buffer(T_cast_2, [215], dtype="int16") global_workspace_2_buffer_var = T.match_buffer(global_workspace_2_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - PaddedInput_let = T.buffer_decl([360000], "int16") + PaddedInput_let = T.Buffer([360000], "int16") with T.let(PaddedInput_let.data, T.address_of(global_workspace_2_buffer_var[7200000], dtype="handle")): for i0_i1_fused, i2, i3 in T.grid(75, 75, 64): PaddedInput_let[i0_i1_fused * 4800 + i2 * 64 + i3] = placeholder_7[i0_i1_fused * 4800 + i2 * 64 + i3] for ax0_ax1_fused_ax2_fused in T.serial(0, 5625): - Conv2dOutput_let = T.buffer_decl([64], "int32") + Conv2dOutput_let = T.Buffer([64], "int32") with T.let(Conv2dOutput_let.data, T.address_of(global_workspace_2_buffer_var[7920000], dtype="handle")): for ff in T.serial(0, 64): Conv2dOutput_let[ff] = 0 @@ -466,12 +466,12 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(pla T_cast_5 = T.match_buffer(T_cast_4, [215], dtype="int16") global_workspace_3_buffer_var = T.match_buffer(global_workspace_3_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16) # body - PaddedInput_1_let = T.buffer_decl([379456], "int16") + PaddedInput_1_let = T.Buffer([379456], "int16") with T.let(PaddedInput_1_let.data, T.address_of(global_workspace_3_buffer_var[0], dtype="handle")): for i0_i1_fused_1, i2_1, i3_1 in T.grid(77, 77, 64): PaddedInput_1_let[i0_i1_fused_1 * 4928 + i2_1 * 64 + i3_1] = T.if_then_else(1 <= i0_i1_fused_1 and i0_i1_fused_1 < 76 and 1 <= i2_1 and i2_1 < 76, placeholder_13[i0_i1_fused_1 * 4800 + i2_1 * 64 + i3_1 - 4864], T.int16(0), dtype="int16") for ax0_ax1_fused_ax2_fused_1 in T.serial(0, 5625): - Conv2dOutput_1_let = T.buffer_decl([64], "int32") + Conv2dOutput_1_let = T.Buffer([64], "int32") with T.let(Conv2dOutput_1_let.data, T.address_of(global_workspace_3_buffer_var[7200000], dtype="handle")): for ff_1 in T.serial(0, 64): Conv2dOutput_1_let[ff_1] = 0 @@ -546,7 +546,7 @@ def tensor_intrin_primfunc() -> None: ) ) - dense = T.buffer_decl([10], "int32", data=dense_data) + dense = T.Buffer([10], "int32", data=dense_data) dense[0] = T.q_multiply_shift(dense[0], 1608879842, 31, -7, dtype="int32") @T.prim_func @@ -561,7 +561,7 @@ def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None: global_workspace_1_buffer_var = T.match_buffer( global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16 ) - dense_let = T.buffer_decl([10], "int32") + dense_let = T.Buffer([10], "int32") with T.let(dense_let.data, T.address_of(global_workspace_1_buffer_var[0], dtype="handle")): T.evaluate( T.call_extern( diff --git a/tests/python/unittest/test_tvmscript_ir_builder_tir.py b/tests/python/unittest/test_tvmscript_ir_builder_tir.py index 7d542c7bc7bd..85d2e808b3d8 100644 --- a/tests/python/unittest/test_tvmscript_ir_builder_tir.py +++ b/tests/python/unittest/test_tvmscript_ir_builder_tir.py @@ -53,9 +53,9 @@ def test_ir_builder_tir_primfunc_complete(): with T.prim_func(): T.arg("a", T.handle()) T.arg("b", T.var("int64")) - T.arg("c", T.buffer_decl((128, 128), "float32")) + T.arg("c", T.Buffer((128, 128), "float32")) d = T.arg("d", T.handle()) - e = T.arg("e", T.buffer_decl((1024,), "int8")) + e = T.arg("e", T.Buffer((1024,), "int8")) T.func_attr({"key": "value"}) T.func_ret(tvm.ir.PrimType("int64")) buffer_d = T.match_buffer(d, (64, 64), "int64") @@ -120,10 +120,10 @@ def test_ir_builder_tir_block_base(): def test_ir_builder_tir_block_complete(): with IRBuilder() as ib: a = T.var("int64", "a") - b = T.buffer_decl((128, 128), "float32") - c = T.buffer_decl((128, 128), "float32") + b = T.Buffer((128, 128), "float32") + c = T.Buffer((128, 128), "float32") d = T.var("int32", "d") - e = T.buffer_decl((128, 128), "float32") + e = T.Buffer((128, 128), "float32") f = T.var("int32", "f") with T.block("block"): T.where(a > 1) @@ -298,7 +298,7 @@ def test_ir_builder_tir_let(): def test_ir_builder_tir_realize(): - buffer_a = T.buffer_decl((128, 128), "float32") + buffer_a = T.Buffer((128, 128), "float32") with IRBuilder() as ib: with T.realize(buffer_a[0:128, 0:128], "test_storage_scope", True): T.evaluate(0) @@ -417,7 +417,7 @@ def test_ir_builder_tir_if_then_else(): def test_ir_builder_tir_buffer_store(): - buffer_a = T.buffer_decl((10, 10), "float32") + buffer_a = T.Buffer((10, 10), "float32") i = T.var("int32", "x") with IRBuilder() as ib: T.buffer_store(buffer_a, 0.1, [0, i]) @@ -434,7 +434,7 @@ def test_ir_builder_tir_buffer_store(): def test_ir_builder_tir_prefetch(): with IRBuilder() as ib: - buffer_a = T.buffer_decl((128, 128), "float32") + buffer_a = T.Buffer((128, 128), "float32") T.prefetch(buffer_a, []) # the prefetch generated by IRBuilder @@ -469,7 +469,7 @@ def test_ir_builder_tir_decl_buffer(): ir_actual = ib.get() # the expected decl_buffer - buffer = T.buffer_decl((128, 128), "float32") + buffer = T.Buffer((128, 128), "float32") ir_expected = tir.Allocate( buffer.data, "float32", diff --git a/tests/python/unittest/test_tvmscript_printer_tir.py b/tests/python/unittest/test_tvmscript_printer_tir.py index 71da86bff763..ec69c54396c3 100644 --- a/tests/python/unittest/test_tvmscript_printer_tir.py +++ b/tests/python/unittest/test_tvmscript_printer_tir.py @@ -166,7 +166,7 @@ def test_match_buffer_region(): _assert_print( obj, """ -src = T.buffer_decl((128, 128)) +src = T.Buffer((128, 128)) tgt = T.match_buffer(src[64:128, 64:128], (64, 64)) """, ) @@ -176,7 +176,7 @@ def test_buffer(): a = tir.decl_buffer((128, 128), "float16", name="A") _assert_print( a, - """A = T.buffer_decl((128, 128), "float16") + """A = T.Buffer((128, 128), "float16") A""", ) @@ -193,7 +193,7 @@ def test_buffer_region(): _assert_print( obj, """ -src = T.buffer_decl((128, 128)) +src = T.Buffer((128, 128)) src[64:128, 64:128] """, ) @@ -205,7 +205,7 @@ def test_buffer_load(): _assert_print( obj, """ -A = T.buffer_decl((128, 128), "float16") +A = T.Buffer((128, 128), "float16") A[128, 128] """, ) @@ -219,7 +219,7 @@ def test_buffer_store(): _assert_print( obj, """ -A = T.buffer_decl((128, 128), "float16") +A = T.Buffer((128, 128), "float16") A[128, 128] = A[128, 128] + T.float16(1) """, ) @@ -380,7 +380,7 @@ def test_prefetch(): _assert_print( obj, """ -A = T.buffer_decl((128, 128), "float16") +A = T.Buffer((128, 128), "float16") T.prefetch(A, [T.Range(0, 64), T.Range(0, 64)]) """, ) @@ -439,7 +439,7 @@ def test_buffer_realize(): _assert_print( obj, """ -A = T.buffer_decl((128, 128)) +A = T.Buffer((128, 128)) with T.realize(A[0:128, 0:128], "test_storage_scope"): T.evaluate(0) """, diff --git a/tests/python/unittest/test_tvmscript_roundtrip.py b/tests/python/unittest/test_tvmscript_roundtrip.py index 0a6a2a26380c..4300c4bbade9 100644 --- a/tests/python/unittest/test_tvmscript_roundtrip.py +++ b/tests/python/unittest/test_tvmscript_roundtrip.py @@ -34,8 +34,8 @@ def mmult(A: T.handle, B: T.handle, C: T.handle) -> None: # function attr dict T.func_attr({"global_symbol": "mmult", "tir.noalias": True}) # buffer definition - C_global = T.buffer_decl([1024, 1024], elem_offset=0, align=64, offset_factor=1) - packedB = T.buffer_decl([32, 1024, 32], elem_offset=0, align=64, offset_factor=1) + C_global = T.Buffer([1024, 1024], elem_offset=0, align=64, offset_factor=1) + packedB = T.Buffer([32, 1024, 32], elem_offset=0, align=64, offset_factor=1) A_1 = T.match_buffer(A, [1024, 1024], elem_offset=0, align=64, offset_factor=1) B_1 = T.match_buffer(B, [1024, 1024], elem_offset=0, align=64, offset_factor=1) C_1 = T.match_buffer(C, [1024, 1024], elem_offset=0, align=64, offset_factor=1) @@ -95,15 +95,13 @@ def mmult(A: T.handle, B: T.handle, C: T.handle) -> None: C_1 = T.match_buffer(C, [16384], elem_offset=0, align=64, offset_factor=1) # body packedB_data = T.allocate([32768], "float32", "global") - packedB = T.buffer_decl( - shape=[32768], dtype="float32", scope="global", data=packedB_data - ) + packedB = T.Buffer(shape=[32768], dtype="float32", scope="global", data=packedB_data) for x in T.parallel(0, 32): for y in T.serial(0, 1024): packedB[T.ramp(((x * 32768) + (y * 32)), 1, 32)] = B_1[y, T.ramp(x * 32, 1, 32)] for x_outer in T.parallel(0, 32): C_global_data = T.allocate([1024], "float32", "global") - C_global = T.buffer_decl( + C_global = T.Buffer( shape=[1024], dtype="float32", scope="global", data=C_global_data ) for y_outer in T.serial(0, 32): @@ -196,8 +194,8 @@ def mmult( # buffer definition buf_type_ids = T.match_buffer(arg_type_ids, [3], dtype="int32") - packedB = T.buffer_decl([32768], dtype="float32") - C_global = T.buffer_decl([1024], dtype="float32") + packedB = T.Buffer([32768], dtype="float32") + C_global = T.Buffer([1024], dtype="float32") # var definition # C_global = T.buffer_var("float32", "global") # packedB = T.buffer_var("float32", "global") @@ -212,29 +210,29 @@ def mmult( A_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle") T.attr(A_data, "storage_alignment", 128) - A = T.buffer_decl([1024 * 1024], dtype="int32", data=A_data) + A = T.Buffer([1024 * 1024], dtype="int32", data=A_data) buf0_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 2, dtype="handle") - buf0_shape = T.buffer_decl([2], dtype="int32", data=buf0_shape_data) + buf0_shape = T.Buffer([2], dtype="int32", data=buf0_shape_data) buf0_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 3, dtype="handle") - buf0_strides = T.buffer_decl([2], dtype="int32", data=buf0_strides_data) + buf0_strides = T.Buffer([2], dtype="int32", data=buf0_strides_data) dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32") B_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle") T.attr(B_data, "storage_alignment", 128) - B = T.buffer_decl([1024 * 1024], dtype="int32", data=B_data) + B = T.Buffer([1024 * 1024], dtype="int32", data=B_data) buf1_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 2, dtype="handle") - buf1_shape = T.buffer_decl([2], dtype="int32", data=buf1_shape_data) + buf1_shape = T.Buffer([2], dtype="int32", data=buf1_shape_data) buf1_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 3, dtype="handle") - buf1_strides = T.buffer_decl([2], dtype="int32", data=buf1_strides_data) + buf1_strides = T.Buffer([2], dtype="int32", data=buf1_strides_data) C_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 1, dtype="handle") T.attr(C_data, "storage_alignment", 128) - C = T.buffer_decl([1024 * 1024], dtype="int32", data=C_data) + C = T.Buffer([1024 * 1024], dtype="int32", data=C_data) buf2_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 2, dtype="handle") - buf2_shape = T.buffer_decl([2], dtype="int32", data=buf2_shape_data) + buf2_shape = T.Buffer([2], dtype="int32", data=buf2_shape_data) buf2_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 3, dtype="handle") - buf2_strides = T.buffer_decl([2], dtype="int32", data=buf2_strides_data) + buf2_strides = T.Buffer([2], dtype="int32", data=buf2_strides_data) assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or ( arg0_code == 4 @@ -489,42 +487,34 @@ def func(A: T.handle, W: T.handle, Conv: T.handle) -> None: ty = T.env_thread("threadIdx.y") tz = T.env_thread("threadIdx.z") # buffer definition - Apad_shared = T.buffer_decl( + Apad_shared = T.Buffer( [16, 16, 16, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1 ) - Apad_shared_wmma_matrix_a = T.buffer_decl( + Apad_shared_wmma_matrix_a = T.Buffer( [16, 16, 16, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1 ) - BA = T.buffer_decl( - [16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256 - ) - BB = T.buffer_decl( - [16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256 - ) - BC = T.buffer_decl([16, 16], scope="wmma.accumulator", align=32, offset_factor=256) - Conv_wmma_accumulator = T.buffer_decl( + BA = T.Buffer([16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256) + BB = T.Buffer([16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256) + BC = T.Buffer([16, 16], scope="wmma.accumulator", align=32, offset_factor=256) + Conv_wmma_accumulator = T.Buffer( [16, 14, 14, 32, 16, 16], elem_offset=0, align=64, offset_factor=1 ) - W_shared = T.buffer_decl( + W_shared = T.Buffer( [3, 3, 16, 32, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1 ) - W_shared_wmma_matrix_b = T.buffer_decl( + W_shared_wmma_matrix_b = T.Buffer( [3, 3, 16, 32, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1 ) - buffer = T.buffer_decl( - [16, 16], dtype="float16", scope="shared", align=32, offset_factor=256 - ) - buffer_1 = T.buffer_decl( + buffer = T.Buffer([16, 16], dtype="float16", scope="shared", align=32, offset_factor=256) + buffer_1 = T.Buffer( [16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256 ) - buffer_2 = T.buffer_decl( - [16, 16], dtype="float16", scope="shared", align=32, offset_factor=256 - ) - buffer_3 = T.buffer_decl( + buffer_2 = T.Buffer([16, 16], dtype="float16", scope="shared", align=32, offset_factor=256) + buffer_3 = T.Buffer( [16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256 ) - buffer_4 = T.buffer_decl([16, 16], scope="wmma.accumulator", align=32, offset_factor=256) - buffer_5 = T.buffer_decl([16, 16], align=32, offset_factor=256) + buffer_4 = T.Buffer([16, 16], scope="wmma.accumulator", align=32, offset_factor=256) + buffer_5 = T.Buffer([16, 16], align=32, offset_factor=256) A_1 = T.match_buffer( A, [16, 14, 14, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1 ) @@ -949,9 +939,9 @@ def func( # function attr dict T.func_attr({"global_symbol": "default_function", "tir.noalias": True}) # body - A_1 = T.buffer_decl([12845056], dtype="float16", data=A.data) - W_1 = T.buffer_decl([1179648], dtype="float16", data=W.data) - Conv_1 = T.buffer_decl([25690112], data=Conv.data) + A_1 = T.Buffer([12845056], dtype="float16", data=A.data) + W_1 = T.Buffer([1179648], dtype="float16", data=W.data) + Conv_1 = T.Buffer([25690112], data=Conv.data) bx = T.env_thread("blockIdx.x") by = T.env_thread("blockIdx.y") bz = T.env_thread("blockIdx.z") @@ -960,21 +950,21 @@ def func( tz = T.env_thread("threadIdx.z") T.launch_thread(bz, 196) Conv_wmma_accumulator_data = T.allocate([2048], "float32", "wmma.accumulator") - Conv_wmma_accumulator = T.buffer_decl( + Conv_wmma_accumulator = T.Buffer( shape=[2048], dtype="float32", scope="wmma.accumulator", data=Conv_wmma_accumulator_data ) Apad_shared_data = T.allocate([12288], "float16", "shared") - Apad_shared = T.buffer_decl( + Apad_shared = T.Buffer( shape=[12288], dtype="float16", scope="shared", data=Apad_shared_data ) W_shared_data = T.allocate([12288], "float16", "shared") - W_shared = T.buffer_decl(shape=[12288], dtype="float16", scope="shared", data=W_shared_data) + W_shared = T.Buffer(shape=[12288], dtype="float16", scope="shared", data=W_shared_data) Apad_shared_wmma_matrix_a_data = T.allocate([512], "float16", "wmma.matrix_a") - Apad_shared_wmma_matrix_a = T.buffer_decl( + Apad_shared_wmma_matrix_a = T.Buffer( shape=[512], dtype="float16", scope="wmma.matrix_a", data=Apad_shared_wmma_matrix_a_data ) W_shared_wmma_matrix_b_data = T.allocate([1024], "float16", "wmma.matrix_b") - W_shared_wmma_matrix_b = T.buffer_decl( + W_shared_wmma_matrix_b = T.Buffer( shape=[1024], dtype="float16", scope="wmma.matrix_b", data=W_shared_wmma_matrix_b_data ) T.launch_thread(bx, 2) @@ -2253,7 +2243,7 @@ def opt_conv_tensorcore_mod_host( ) # body stack_tcode_data: T.Ptr[T.int32] = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle") - stack_tcode = T.buffer_decl([9], "int32", data=stack_tcode_data) + stack_tcode = T.Buffer([9], "int32", data=stack_tcode_data) stack_value: T.handle = T.tvm_stack_alloca("arg_value", 10, dtype="handle") assert num_args == 3, "default_function: num_args should be 3" arg0: T.handle = T.tvm_struct_get(args, 0, 12, dtype="handle") @@ -2266,25 +2256,25 @@ def opt_conv_tensorcore_mod_host( A: T.handle = T.tvm_struct_get(arg0, 0, 1, dtype="handle") T.attr(A, "storage_alignment", 128) arg0_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 2, dtype="handle") - arg0_shape = T.buffer_decl([6], "int64", data=arg0_shape_data) + arg0_shape = T.Buffer([6], "int64", data=arg0_shape_data) arg0_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 3, dtype="handle") - arg0_strides = T.buffer_decl([6], "int64", data=arg0_strides_data) + arg0_strides = T.Buffer([6], "int64", data=arg0_strides_data) dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32") W: T.handle = T.tvm_struct_get(arg1, 0, 1, dtype="handle") T.attr(W, "storage_alignment", 128) arg1_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 2, dtype="handle") - arg1_shape = T.buffer_decl([6], "int64", data=arg1_shape_data) + arg1_shape = T.Buffer([6], "int64", data=arg1_shape_data) arg1_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 3, dtype="handle") - arg1_strides = T.buffer_decl([6], "int64", data=arg1_strides_data) + arg1_strides = T.Buffer([6], "int64", data=arg1_strides_data) Conv: T.handle = T.tvm_struct_get(arg2, 0, 1, dtype="handle") T.attr(Conv, "storage_alignment", 128) arg2_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 2, dtype="handle") - arg2_shape = T.buffer_decl([6], "int64", data=arg2_shape_data) + arg2_shape = T.Buffer([6], "int64", data=arg2_shape_data) arg2_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 3, dtype="handle") - arg2_strides = T.buffer_decl([6], "int64", data=arg2_strides_data) + arg2_strides = T.Buffer([6], "int64", data=arg2_strides_data) assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or ( arg0_code == 4 @@ -2499,7 +2489,7 @@ def vthread_func(a: T.handle, c: T.handle) -> None: T.launch_thread(i1, 2) T.launch_thread(i2, 2) B_data = T.allocate([16], "float32", "local") - B = T.buffer_decl(shape=[16], dtype="float32", scope="local", data=B_data) + B = T.Buffer(shape=[16], dtype="float32", scope="local", data=B_data) for j in range(16): B[j] = A[i0 * 64 + i1 * 32 + i2 * 16 + j] + T.float32(1) for j in range(16): @@ -2813,12 +2803,12 @@ def B(a: T.handle, c: T.handle) -> None: B = T.alloc_buffer((10), "int32") K1_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K1 = T.buffer_decl(shape=[10], dtype="int32", data=K1_data) + K1 = T.Buffer(shape=[10], dtype="int32", data=K1_data) for x in T.serial(0, 10): B[x] = A[x] + K1[x] K2_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K2 = T.buffer_decl(shape=[10], dtype="int32", data=K2_data) + K2 = T.Buffer(shape=[10], dtype="int32", data=K2_data) for x in T.serial(0, 10): B[x] = B[x] + K2[x] @@ -2835,7 +2825,7 @@ def constant(a: T.handle, c: T.handle) -> None: C = T.match_buffer(c, (10), "int32") B = T.alloc_buffer((10), "int32") K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10]) - K = T.buffer_decl(shape=[10], dtype="int32", data=K_data) + K = T.Buffer(shape=[10], dtype="int32", data=K_data) for x in T.serial(0, 10): B[x] = A[x] + K[x] @@ -2980,7 +2970,7 @@ def primfunc_with_allocate_annotations(placeholder_28: T.handle, T_cast_6: T.han T_cast_7 = T.match_buffer(T_cast_6, [200704], dtype="int16", elem_offset=0, align=64, offset_factor=1) # body tensor_2_data = T.allocate([200704], "uint8", "global", annotations={"attr1_key": "attr1_value"}) - tensor_2 = T.buffer_decl(shape=[200704], dtype="uint8", scope="global", data=tensor_2_data) + tensor_2 = T.Buffer(shape=[200704], dtype="uint8", scope="global", data=tensor_2_data) for ax0_ax1_fused_4 in T.serial(0, 56): for ax2_4 in T.serial(0, 56): for ax3_init in T.serial(0, 64): @@ -3007,7 +2997,7 @@ def comm_reducer_single_reduce_group(a: T.handle, b: T.handle) -> None: for i in T.serial(0, 128): T.launch_thread(threadIdx_x, 128) reduce_temp0_data = T.allocate([1], "float32", "local") - reduce_temp0 = T.buffer_decl(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data) + reduce_temp0 = T.Buffer(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data) with T.attr(T.comm_reducer(lambda x, y: x + y, [T.float32(0)]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle")): T.evaluate(T.tvm_thread_allreduce(T.uint32(1), A[i * 128 + threadIdx_x], True, reduce_temp0.data, threadIdx_x, dtype="handle")) @@ -3023,7 +3013,7 @@ def comm_reducer_multiple_reduce_groups(a: T.handle, b: T.handle) -> None: for i in T.serial(0, 128): T.launch_thread(threadIdx_x, 128) reduce_temp0_data = T.allocate([1], "float32", "local") - reduce_temp0 = T.buffer_decl(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data) + reduce_temp0 = T.Buffer(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data) with T.attr(T.comm_reducer(lambda x0, x1, y0, y1: (T.Select((x1 >= y1), x0, y0), T.Select((x1 >= y1), x1, y1)), [T.int32(-1), T.min_value("float32")]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle")): T.evaluate(T.tvm_thread_allreduce(T.uint32(1), A[i * 128 + threadIdx_x], True, reduce_temp0.data, threadIdx_x, dtype="handle")) @@ -3033,10 +3023,10 @@ def comm_reducer_multiple_reduce_groups(a: T.handle, b: T.handle) -> None: def multiple_commreducer(): @T.prim_func def multiple_commreducer() -> None: - normal_reduce_temp0 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local") - normal_reduce_temp1 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local") - reduce_temp0 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local") - reduce_temp1 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local") + normal_reduce_temp0 = T.Buffer([1], dtype="float32", strides=[1], scope="local") + normal_reduce_temp1 = T.Buffer([1], dtype="float32", strides=[1], scope="local") + reduce_temp0 = T.Buffer([1], dtype="float32", strides=[1], scope="local") + reduce_temp1 = T.Buffer([1], dtype="float32", strides=[1], scope="local") for ax0_1 in T.thread_binding(0, 32, thread="threadIdx.x"): with T.block("T_softmax_maxelem_cross_thread_reduction"): T.attr(T.comm_reducer(lambda x, y: T.max(x, y), [T.min_value("float32")]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle")) @@ -3163,7 +3153,7 @@ def func_T_ptr_let_statement( ) -> None: # The T.Ptr declaration in the parameter list should parse # correctly, and should be usable as the data pointer in a buffer. - arg_type_ids = T.buffer_decl([2], dtype="int32", data=arg_type_ids_handle) + arg_type_ids = T.Buffer([2], dtype="int32", data=arg_type_ids_handle) arg0: T.handle = T.tvm_struct_get(args, 0, 12, dtype="handle") arg1: T.handle = T.tvm_struct_get(args, 1, 12, dtype="handle") @@ -3177,9 +3167,9 @@ def func_T_ptr_let_statement( # this function. It should only be defined after the data pointer # has been defined, and should not be hoisted into the header of # the function as other buffer_decl statements can be. - A = T.buffer_decl([1024], dtype="float32", data=A_data) + A = T.Buffer([1024], dtype="float32", data=A_data) B_data: T.Ptr[T.float32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle") - B = T.buffer_decl([1024], dtype="float32", data=B_data) + B = T.Buffer([1024], dtype="float32", data=B_data) B[0] = A[0] @@ -3190,7 +3180,7 @@ def func_T_ptr_allocate(): @T.prim_func def func_T_ptr_allocate() -> None: A_data = T.allocate([1024], "float32", "global") - A = T.buffer_decl(shape=[1024], dtype="float32", scope="global", data=A_data) + A = T.Buffer(shape=[1024], dtype="float32", scope="global", data=A_data) A[0] = 0.0 return func_T_ptr_allocate @@ -3282,9 +3272,9 @@ def pointer_type(): @T.prim_func def func_with_ptr_type_annotations(x: T.Ptr[T.int32], y: T.Ptr[T.int32, "shared"]): xx_data = T.allocate([16], "int32", "global") - xx = T.buffer_decl(shape=[16], dtype="int32", scope="global", data=xx_data) + xx = T.Buffer(shape=[16], dtype="int32", scope="global", data=xx_data) yy_data = T.allocate([16], "int32", "shared") - yy = T.buffer_decl(shape=[16], dtype="int32", scope="shared", data=yy_data) + yy = T.Buffer(shape=[16], dtype="int32", scope="shared", data=yy_data) a: T.Ptr[T.int32] = T.address_of(xx[0], dtype="handle") b: T.Ptr[T.int32, "shared"] = T.address_of(yy[0], dtype="handle") T.evaluate(T.call_extern("copy", a, b, dtype="")) diff --git a/tests/python/unittest/test_tvmscript_syntax_sugar.py b/tests/python/unittest/test_tvmscript_syntax_sugar.py index 02b18e7e7c44..35f9e6c2e635 100644 --- a/tests/python/unittest/test_tvmscript_syntax_sugar.py +++ b/tests/python/unittest/test_tvmscript_syntax_sugar.py @@ -152,18 +152,6 @@ def func_with_sugar(A: T.Buffer[16, "float32"]): assert_structural_equal(func_no_sugar, func_with_sugar) -# match buffer failed case -def test_match_buffer_no_kwargs_failed(): - with pytest.raises(ValueError) as e: - - @T.prim_func - def elementwise_buffer_no_kwargs_failed( - a: T.Buffer[(128, 128, 128, 128)], - b: T.Buffer[(128, 128, 128, 128)], - ) -> None: - pass - - # dynamic shape gemm @T.prim_func def gemm_dyn_shape(a: T.handle, b: T.handle, c: T.handle):