apache
diff --git a/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 2 additions & 2 deletions b/‎apps/android_rpc/app/src/main/jni/tvm_runtime.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎apps/android_rpc/tests/android_rpc_test.py‎
Lines changed: 2 additions & 2 deletions b/‎apps/android_rpc/tests/android_rpc_test.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎apps/hexagon_launcher/launcher_core.h‎
Lines changed: 1 addition & 1 deletion b/‎apps/hexagon_launcher/launcher_core.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apps/hexagon_launcher/launcher_hexagon.cc‎
Lines changed: 6 additions & 6 deletions b/‎apps/hexagon_launcher/launcher_hexagon.cc‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎apps/ios_rpc/tests/ios_rpc_test.py‎
Lines changed: 2 additions & 2 deletions b/‎apps/ios_rpc/tests/ios_rpc_test.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/arch/index.rst‎
Lines changed: 3 additions & 3 deletions b/‎docs/arch/index.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/deep_dive/tensor_ir/tutorials/tir_creation.py‎
Lines changed: 3 additions & 3 deletions b/‎docs/deep_dive/tensor_ir/tutorials/tir_creation.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/deep_dive/tensor_ir/tutorials/tir_transformation.py‎
Lines changed: 3 additions & 3 deletions b/‎docs/deep_dive/tensor_ir/tutorials/tir_transformation.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/get_started/tutorials/ir_module.py‎
Lines changed: 3 additions & 3 deletions b/‎docs/get_started/tutorials/ir_module.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/get_started/tutorials/quick_start.py‎
Lines changed: 8 additions & 8 deletions b/‎docs/get_started/tutorials/quick_start.py‎
Lines changed: 8 additions & 8 deletions
@@ -43,16 +43,15 @@
 #include "../ffi/src/ffi/extra/module.cc"
 #include "../ffi/src/ffi/extra/testing.cc"
 #include "../ffi/src/ffi/function.cc"
-#include "../ffi/src/ffi/ndarray.cc"
 #include "../ffi/src/ffi/object.cc"
+#include "../ffi/src/ffi/tensor.cc"
 #include "../ffi/src/ffi/traceback.cc"
 #include "../src/runtime/cpu_device_api.cc"
 #include "../src/runtime/device_api.cc"
 #include "../src/runtime/file_utils.cc"
 #include "../src/runtime/logging.cc"
 #include "../src/runtime/memory/memory_manager.cc"
 #include "../src/runtime/minrpc/minrpc_logger.cc"
-#include "../src/runtime/ndarray.cc"
 #include "../src/runtime/profiling.cc"
 #include "../src/runtime/registry.cc"
 #include "../src/runtime/rpc/rpc_channel.cc"
@@ -63,6 +62,7 @@
 #include "../src/runtime/rpc/rpc_server_env.cc"
 #include "../src/runtime/rpc/rpc_session.cc"
 #include "../src/runtime/rpc/rpc_socket_impl.cc"
+#include "../src/runtime/tensor.cc"
 #include "../src/runtime/thread_pool.cc"
 #include "../src/runtime/threading_backend.cc"
 #include "../src/runtime/workspace_pool.cc"
 
@@ -72,8 +72,8 @@ def test_rpc_module():
         dev = remote.cl(0)
         remote.upload(path_dso_cl)
         f1 = remote.load_module("dev_lib_cl.so")
-        a = tvm.nd.array(a_np, dev)
-        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
+        a = tvm.runtime.tensor(a_np, dev)
+        b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
         time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
         cost = time_f(a, b).mean
         print("%g secs/op\n" % cost)
 
@@ -25,7 +25,7 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/data_type.h>
 #include <tvm/runtime/module.h>
-#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/tensor.h>
 
 #include <string>
 #include <vector>
 
@@ -137,7 +137,7 @@ AEEResult __QAIC_HEADER(launcher_rpc_set_input)(remote_handle64 handle, int inpu
   };
   DLManagedTensor managed{tensor, /*manager_ctx*/ nullptr, /*deleter*/ nullptr};
 
-  auto input = tvm::runtime::NDArray::FromDLPack(&managed);
+  auto input = tvm::runtime::Tensor::FromDLPack(&managed);
 
   tvm::ffi::Function set_input = get_module_func(TheModel->model_executor, "set_input");
   set_input(input_idx, input);
@@ -172,17 +172,17 @@ AEEResult __QAIC_HEADER(launcher_rpc_get_output)(remote_handle64 handle, int out
   }
 
   tvm::ffi::Function get_output = get_module_func(TheModel->model_executor, "get_output");
-  tvm::runtime::NDArray output = get_output(output_idx);
+  tvm::runtime::Tensor output = get_output(output_idx);
 
   std::vector<int64_t> shape_vec{output->shape, output->shape + output->ndim};
 
-  auto* container = new tvm::runtime::NDArray::Container(
-      static_cast<void*>(output_value), shape_vec, output->dtype, Model::external());
+  auto* container = new tvm::runtime::Tensor::Container(static_cast<void*>(output_value), shape_vec,
+                                                        output->dtype, Model::external());
   container->SetDeleter([](tvm::Object* container) {
-    delete static_cast<tvm::runtime::NDArray::Container*>(container);
+    delete static_cast<tvm::runtime::Tensor::Container*>(container);
   });
 
-  tvm::runtime::NDArray host_output(tvm::runtime::GetObjectPtr<tvm::runtime::Object>(container));
+  tvm::runtime::Tensor host_output(tvm::runtime::GetObjectPtr<tvm::runtime::Object>(container));
 
   if (meta_size != 0) {
     auto* meta = reinterpret_cast<tensor_meta*>(output_meta);
 
@@ -72,8 +72,8 @@ def test_rpc_module(host, port, key, mode):
     dev = remote.metal(0)
     f1 = remote.load_module("dev_lib.dylib")
     a_np = np.random.uniform(size=1024).astype(A.dtype)
-    a = tvm.nd.array(a_np, dev)
-    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)
+    a = tvm.runtime.tensor(a_np, dev)
+    b = tvm.runtime.tensor(np.zeros(1024, dtype=A.dtype), dev)
     time_f = f1.time_evaluator(f1.entry_name, dev, number=10)
     cost = time_f(a, b).mean
     print("Metal: %g secs/op" % cost)
 
@@ -133,7 +133,7 @@ The main goal of TVM's runtime is to provide a minimal API for loading and execu
     import tvm
     # Example runtime execution program in python, with type annotated
     mod: tvm.runtime.Module = tvm.runtime.load_module("compiled_artifact.so")
-    arr: tvm.runtime.NDArray = tvm.nd.array([1, 2, 3], device=tvm.cuda(0))
+    arr: tvm.runtime.Tensor = tvm.runtime.tensor([1, 2, 3], device=tvm.cuda(0))
     fun: tvm.runtime.PackedFunc = mod["addone"]
     fun(arr)
     print(arr.numpy())
@@ -142,7 +142,7 @@ The main goal of TVM's runtime is to provide a minimal API for loading and execu
 :py:class:`tvm.runtime.Module` encapsulates the result of compilation. A runtime.Module contains a GetFunction method to obtain PackedFuncs by name.
 
 :py:class:`tvm.runtime.PackedFunc` is a type-erased function interface for both the generated functions. A runtime.PackedFunc can take arguments and return values with the
-following types: POD types(int, float), string, runtime.PackedFunc, runtime.Module, runtime.NDArray, and other sub-classes of runtime.Object.
+following types: POD types(int, float), string, runtime.PackedFunc, runtime.Module, runtime.Tensor, and other sub-classes of runtime.Object.
 
 :py:class:`tvm.runtime.Module` and :py:class:`tvm.runtime.PackedFunc` are powerful mechanisms to modularize the runtime. For example, to get the above `addone` function on CUDA, we can use LLVM to generate the host-side code to compute the launching parameters(e.g. size of the thread groups) and then call into another PackedFunc from a CUDAModule that is backed by the CUDA driver API. The same mechanism can be used for OpenCL kernels.
 
@@ -155,7 +155,7 @@ The above example only deals with a simple `addone` function. The code snippet b
    factory: tvm.runtime.Module = tvm.runtime.load_module("resnet18.so")
    # Create a stateful graph execution module for resnet18 on cuda(0)
    gmod: tvm.runtime.Module = factory["resnet18"](tvm.cuda(0))
-   data: tvm.runtime.NDArray = get_input_data()
+   data: tvm.runtime.Tensor = get_input_data()
    # set input
    gmod["set_input"](0, data)
    # execute the model
 
@@ -204,9 +204,9 @@ def mm_relu(a: T.handle, b: T.handle, c: T.handle):
 
 
 def evaluate_dynamic_shape(lib: tvm.runtime.Module, m: int, n: int, k: int):
-    A = tvm.nd.array(np.random.uniform(size=(m, k)).astype("float32"))
-    B = tvm.nd.array(np.random.uniform(size=(k, n)).astype("float32"))
-    C = tvm.nd.array(np.zeros((m, n), dtype="float32"))
+    A = tvm.runtime.tensor(np.random.uniform(size=(m, k)).astype("float32"))
+    B = tvm.runtime.tensor(np.random.uniform(size=(k, n)).astype("float32"))
+    C = tvm.runtime.tensor(np.zeros((m, n), dtype="float32"))
     lib(A, B, C)
     return C.numpy()
 
 
@@ -72,9 +72,9 @@ def main(
 b_np = np.random.uniform(size=(128, 128)).astype("float32")
 c_np = a_np @ b_np
 
-a_nd = tvm.nd.array(a_np)
-b_nd = tvm.nd.array(b_np)
-c_nd = tvm.nd.array(np.zeros((128, 128), dtype="float32"))
+a_nd = tvm.runtime.tensor(a_np)
+b_nd = tvm.runtime.tensor(b_np)
+c_nd = tvm.runtime.tensor(np.zeros((128, 128), dtype="float32"))
 
 
 def evaluate(mod: tvm.IRModule):
 
@@ -237,7 +237,7 @@ def main(
 vm = relax.VirtualMachine(exec, dev)
 
 raw_data = np.random.rand(1, 784).astype("float32")
-data = tvm.nd.array(raw_data, dev)
+data = tvm.runtime.tensor(raw_data, dev)
 cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
 print(cpu_out)
 
@@ -267,8 +267,8 @@ def main(
 dev = tvm.device("cuda", 0)
 vm = relax.VirtualMachine(exec, dev)
 # Need to allocate data and params on GPU device
-data = tvm.nd.array(raw_data, dev)
-gpu_params = [tvm.nd.array(p, dev) for p in params_from_torch["main"]]
+data = tvm.runtime.tensor(raw_data, dev)
+gpu_params = [tvm.runtime.tensor(p, dev) for p in params_from_torch["main"]]
 gpu_out = vm["main"](data, *gpu_params).numpy()
 print(gpu_out)
 
 
@@ -141,9 +141,9 @@ def forward(self, x):
 device = tvm.cpu()
 vm = relax.VirtualMachine(ex, device)
 data = np.random.rand(1, 784).astype("float32")
-tvm_data = tvm.nd.array(data, device=device)
+tvm_data = tvm.runtime.tensor(data, device=device)
 params = [np.random.rand(*param.shape).astype("float32") for _, param in param_spec]
-params = [tvm.nd.array(param, device=device) for param in params]
+params = [tvm.runtime.tensor(param, device=device) for param in params]
 print(vm["forward"](tvm_data, *params).numpy())
 
 ################################################################################
@@ -158,14 +158,14 @@ def forward(self, x):
 #       prefill_logits = vm["prefill"](inputs, weight, kv_cache)
 #       decoded_logits = vm["decode"](inputs, weight, kv_cache)
 #
-# - TVM runtime comes with native data structures, such as NDArray, can also have zero
+# - TVM runtime comes with native data structures, such as Tensor, can also have zero
 #   copy exchange with existing ecosystem (DLPack exchange with PyTorch)
 #
 #   .. code-block:: Python
 #
-#       # Convert PyTorch tensor to TVM NDArray
-#       x_tvm = tvm.nd.from_dlpack(x_torch.to_dlpack())
-#       # Convert TVM NDArray to PyTorch tensor
+#       # Convert PyTorch tensor to TVM Tensor
+#       x_tvm = tvm.runtime.from_dlpack(x_torch.to_dlpack())
+#       # Convert TVM Tensor to PyTorch tensor
 #       x_torch = torch.from_dlpack(x_tvm.to_dlpack())
 #
 # - TVM runtime works in non-python environments, so it works on settings such as mobile
@@ -175,14 +175,14 @@ def forward(self, x):
 #       // C++ snippet
 #       runtime::Module vm = ex.GetFunction("load_executable")();
 #       vm.GetFunction("init")(...);
-#       NDArray out = vm.GetFunction("prefill")(data, weight, kv_cache);
+#       Tensor out = vm.GetFunction("prefill")(data, weight, kv_cache);
 #
 #   .. code-block:: Java
 #
 #       // Java snippet
 #       Module vm = ex.getFunction("load_executable").invoke();
 #       vm.getFunction("init").pushArg(...).invoke;
-#       NDArray out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
+#       Tensor out = vm.getFunction("prefill").pushArg(data).pushArg(weight).pushArg(kv_cache).invoke();
 #
 
 ################################################################################