microsoft
diff --git a/‎ark/api/model.cpp‎
Lines changed: 2 additions & 2 deletions b/‎ark/api/model.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ark/api/model_graph.cpp‎
Lines changed: 21 additions & 1 deletion b/‎ark/api/model_graph.cpp‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎ark/api/model_test.cpp‎
Lines changed: 54 additions & 12 deletions b/‎ark/api/model_test.cpp‎
Lines changed: 54 additions & 12 deletions
diff --git a/‎ark/api/planner.cpp‎
Lines changed: 3 additions & 1 deletion b/‎ark/api/planner.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎ark/include/ark/model.hpp‎
Lines changed: 40 additions & 26 deletions b/‎ark/include/ark/model.hpp‎
Lines changed: 40 additions & 26 deletions
@@ -9,9 +9,9 @@
 
 namespace ark {
 
-Model Model::compress() const {
+Model Model::compress(bool merge_nodes) const {
     Model model(*this);
-    model.compress_nodes();
+    model.compress_nodes(merge_nodes);
     return model;
 }
 
 
@@ -33,10 +33,30 @@ int ModelGraph::rank() const { return impl_->rank(); }
 
 int ModelGraph::world_size() const { return impl_->world_size(); }
 
-void ModelGraph::compress_nodes() { impl_->compress_nodes(); }
+void ModelGraph::compress_nodes(bool merge_nodes) {
+    impl_->compress_nodes(merge_nodes);
+}
 
 bool ModelGraph::compressed() const { return impl_->compressed(); }
 
 bool ModelGraph::verify() const { return impl_->verify(); }
 
+ModelGraph::ContextManager ModelGraph::context(const std::string& key,
+                                               const std::string& value) {
+    return impl_->context_manager(key, value);
+}
+
+ModelGraph::ContextManager::ContextManager(
+    const std::map<std::string, std::string>& context, const std::string& key,
+    std::shared_ptr<std::map<std::string, std::vector<std::string>>>
+        context_stacks)
+    : context_(context), key_(key), context_stacks_(context_stacks) {}
+
+ModelGraph::ContextManager::~ContextManager() {
+    if (context_stacks_->find(key_) == context_stacks_->end()) {
+        ERR(ModelError, "context stack not found: {}", key_);
+    }
+    context_stacks_->at(key_).pop_back();
+}
+
 }  // namespace ark
@@ -36,7 +36,7 @@ ark::unittest::State test_model_basics() {
     //   (AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     UNITTEST_TRUE(compressed.compressed());
     UNITTEST_EQ(compressed.nodes().size(), 1);
@@ -70,7 +70,7 @@ ark::unittest::State test_model_basics() {
     //   (AddOp,AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     UNITTEST_EQ(compressed.nodes().size(), 1);
 
@@ -104,7 +104,7 @@ ark::unittest::State test_model_basics() {
     //   (AddOp,AddOp,ReluOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     UNITTEST_EQ(compressed.nodes().size(), 1);
 
@@ -143,7 +143,7 @@ ark::unittest::State test_model_basics() {
     //   (AddOp,AddOp,ReluOp,AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
 
     auto nodes = compressed.nodes();
@@ -190,7 +190,7 @@ ark::unittest::State test_model_basics() {
     //                      (AddOp,) --+--> (AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
 
     nodes = compressed.nodes();
@@ -250,7 +250,7 @@ ark::unittest::State test_model_basics() {
     //                                      (AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
 
     nodes = compressed.nodes();
@@ -312,7 +312,7 @@ ark::unittest::State test_model_basics() {
     //                                      (AddOp,)
     //
 
-    compressed = model.compress();
+    compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
 
     nodes = compressed.nodes();
@@ -353,7 +353,7 @@ ark::unittest::State test_model_dependent_inputs() {
     ark::Tensor x4 = m.mul(x2, x3);
     ark::Tensor y = m.add(x0, x4);
 
-    auto compressed = m.compress();
+    auto compressed = m.compress(true);
     auto nodes = compressed.nodes();
     UNITTEST_EQ(nodes.size(), 4);
     auto nodes_iter = nodes.begin();
@@ -399,7 +399,7 @@ ark::unittest::State test_model_noop() {
 
     UNITTEST_TRUE(model.verify());
 
-    auto compressed = model.compress();
+    auto compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     UNITTEST_EQ(compressed.nodes().size(), 0);
     return ark::unittest::SUCCESS;
@@ -425,7 +425,7 @@ ark::unittest::State test_model_identity() {
     ark::Tensor t4 = model.relu(t3);
     UNITTEST_TRUE(model.verify());
 
-    auto compressed = model.compress();
+    auto compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     auto nodes = compressed.nodes();
     UNITTEST_EQ(nodes.size(), 3);
@@ -478,7 +478,7 @@ ark::unittest::State test_model_sharding() {
     ark::Tensor t5 = model.relu(t4);
     UNITTEST_TRUE(model.verify());
 
-    auto compressed = model.compress();
+    auto compressed = model.compress(true);
     UNITTEST_TRUE(compressed.verify());
     auto nodes = compressed.nodes();
     UNITTEST_EQ(nodes.size(), 4);
@@ -526,7 +526,7 @@ ark::unittest::State test_model_cumulate() {
 
     UNITTEST_TRUE(model.verify());
 
-    auto compressed = model.compress();
+    auto compressed = model.compress(true);
     auto nodes = compressed.nodes();
     UNITTEST_EQ(nodes.size(), 5);
 
@@ -538,12 +538,54 @@ ark::unittest::State test_model_cumulate() {
     return ark::unittest::SUCCESS;
 }
 
+ark::unittest::State test_model_context() {
+    ark::Model model;
+    ark::Tensor t0 = model.tensor({1}, ark::FP32);
+    ark::Tensor t1 = model.tensor({1}, ark::FP32);
+    ark::Tensor t2 = model.add(t0, t1);
+
+    ark::Tensor t3;
+    ark::Tensor t4;
+    ark::Tensor t5;
+    {
+        ark::Model::ContextManager cm0_1 = model.context("lev0", "1");
+        t3 = model.relu(t2);
+
+        ark::Model::ContextManager cm1_1 = model.context("lev1", "2");
+        t4 = model.sqrt(t3);
+    }
+    {
+        ark::Model::ContextManager cm0_2 = model.context("lev0", "3");
+        t5 = model.exp(t2);
+    }
+
+    UNITTEST_TRUE(model.verify());
+
+    auto compressed = model.compress(false);
+    UNITTEST_TRUE(compressed.verify());
+
+    auto nodes = compressed.nodes();
+    UNITTEST_EQ(nodes.size(), 4);
+
+    UNITTEST_EQ(nodes[0]->context.size(), 0);
+    UNITTEST_EQ(nodes[1]->context.size(), 1);
+    UNITTEST_EQ(nodes[1]->context.at("lev0"), "1");
+    UNITTEST_EQ(nodes[2]->context.size(), 2);
+    UNITTEST_EQ(nodes[2]->context.at("lev0"), "1");
+    UNITTEST_EQ(nodes[2]->context.at("lev1"), "2");
+    UNITTEST_EQ(nodes[3]->context.size(), 1);
+    UNITTEST_EQ(nodes[3]->context.at("lev0"), "3");
+
+    return ark::unittest::SUCCESS;
+}
+
 int main() {
     UNITTEST(test_model_basics);
     UNITTEST(test_model_dependent_inputs);
     UNITTEST(test_model_noop);
     UNITTEST(test_model_identity);
     UNITTEST(test_model_sharding);
     UNITTEST(test_model_cumulate);
+    UNITTEST(test_model_context);
     return 0;
 }
@@ -69,7 +69,9 @@ std::string DefaultPlanner::Impl::plan(bool pretty) const {
             task_info["Id"] = next_node_id++;
 
             Json config;
-            if (!config_rules_.empty()) {
+            if (!op->config().empty()) {
+                config = op->config();
+            } else if (!config_rules_.empty()) {
                 const std::string op_str = op->serialize().dump();
                 for (auto &rule : config_rules_) {
                     auto config_str = rule(op_str, gpu_info.arch->name());
 
@@ -26,7 +26,9 @@ class Model : public ModelGraph {
 
     Model &operator=(const Model &other) = default;
 
-    Model compress() const;
+    Model compress(bool merge_nodes = false) const;
+
+    using ContextManager = ModelGraph::ContextManager;
 
     int unique_tag();
 
@@ -87,23 +89,29 @@ class Model : public ModelGraph {
     // result in `output`.
     // Currently, only reduction along the last dimension is supported.
     Tensor reduce_sum(Tensor input, int axis, bool keepdims = true,
-                      Tensor output = NullTensor, const std::string &name = "");
+                      Tensor output = NullTensor,
+                      const std::string &config = "",
+                      const std::string &name = "");
     Tensor reduce_mean(Tensor input, int axis, bool keepdims = true,
                        Tensor output = NullTensor,
+                       const std::string &config = "",
                        const std::string &name = "");
     Tensor reduce_max(Tensor input, int axis, bool keepdims = true,
-                      Tensor output = NullTensor, const std::string &name = "");
+                      Tensor output = NullTensor,
+                      const std::string &config = "",
+                      const std::string &name = "");
 
     // Transposes the `input` tensor according to the given `permutation`.
     // For example, transpose(input, {0, 1 ,3, 2}) will swap the last two
     // dimensions of the input tensor. Currently, only 4D tensors are supported.
     Tensor transpose(Tensor input, const std::vector<int64_t> &permutation,
-                     Tensor output = NullTensor, const std::string &name = "");
+                     Tensor output = NullTensor, const std::string &config = "",
+                     const std::string &name = "");
     // Performs matrix multiplication between the `input` tensor and another
     // `other` tensor, storing the result in `output`.
     Tensor matmul(Tensor input, Tensor other, Tensor output = NullTensor,
                   bool trans_input = false, bool trans_other = false,
-                  const std::string &name = "");
+                  const std::string &config = "", const std::string &name = "");
     // Implements the 'im2col' method for 2D convolution layers, which takes an
     // `input` tensor and reshapes it to a 2D matrix by extracting image patches
     // from the input tensor based on the provided parameters.
@@ -120,72 +128,76 @@ class Model : public ModelGraph {
                     Tensor output = NullTensor, const std::string &name = "");
     // Calculates the exponential of the `input` tensor, element-wise.
     Tensor exp(Tensor input, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     // Calculates the square root of the `input` tensor, element-wise.
     Tensor sqrt(Tensor input, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     // Calculates the reverse square root of the `input` tensor, element-wise.
     Tensor rsqrt(Tensor input, Tensor output = NullTensor,
-                 const std::string &name = "");
+                 const std::string &config = "", const std::string &name = "");
     // ReLU activation
     Tensor relu(Tensor input, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     // Copy the `input` tensor to `output` tensor
     Tensor copy(Tensor input, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     Tensor copy(float val, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     // Applies the Gaussian Error Linear Unit (GELU) activation function to the
     // `input` tensor, element-wise. GELU is a smooth approximation of the
     // rectifier function and is widely used in deep learning models.
     Tensor gelu(Tensor input, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     // Sigmoid activation
     Tensor sigmoid(Tensor input, Tensor output = NullTensor,
+                   const std::string &config = "",
                    const std::string &name = "");
     // Performs rotary position embedding (RoPE) on the `input` tensor
     Tensor rope(Tensor input, Tensor other, Tensor output = NullTensor,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
 
     // Performs an element-wise addition operator between the `input` tensor
     // and the `other` tensor
     Tensor add(Tensor input, Tensor other, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     Tensor add(Tensor input, float value, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     // Performs an element-wise subtraction operator between the `input` tensor
     // and the `other` tensor
     Tensor sub(Tensor input, Tensor other, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     Tensor sub(Tensor input, float value, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     // Performs an element-wise multiplication operator between the `input`
     // tensor and the `other` tensor,
     Tensor mul(Tensor input, Tensor other, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     Tensor mul(Tensor input, float value, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     // Performs an element-wise division operator between the `input`
     // tensor and the `other` tensor,
     Tensor div(Tensor input, Tensor other, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
     Tensor div(Tensor input, float value, Tensor output = NullTensor,
-               const std::string &name = "");
+               const std::string &config = "", const std::string &name = "");
 
     Tensor send(Tensor input, int remote_rank, int tag,
-                Tensor output = NullTensor, const std::string &name = "");
+                Tensor output = NullTensor, const std::string &config = "",
+                const std::string &name = "");
     // Blocks the execution until the corresponding 'send' operator with the
     // specified `id` is completed.
-    Tensor send_done(Tensor input, const std::string &name = "");
+    Tensor send_done(Tensor input, const std::string &config = "",
+                     const std::string &name = "");
     // Receives a tensor from a source rank (@p src_rank), identified by the
     // `id` parameter. Blocks the execution until the corresponding 'recv'
     // operator is completed.
     Tensor recv(Tensor output, int remote_rank, int tag,
-                const std::string &name = "");
+                const std::string &config = "", const std::string &name = "");
     //
     Tensor put_packet(Tensor input, Tensor local_tmp_buf, Tensor recv_buf,
                       int id, int rank, int dst_rank, size_t dst_offset,
-                      int flag, const std::string &name = "");
+                      int flag, const std::string &config = "",
+                      const std::string &name = "");
     // Performs an all-reduce operator across all ranks, aggregating the input
     // tensors. Takes the `input` tensor, the current GPU's rank, and the
     // total number of ranks `rank_num`.
@@ -200,10 +212,12 @@ class Model : public ModelGraph {
                                    const std::string &name = "");
     /// Embedding layer.
     Tensor embedding(Tensor input, Tensor weight, Tensor output = NullTensor,
+                     const std::string &config = "",
                      const std::string &name = "");
     /// Tensor type casting.
     Tensor cast(Tensor input, const DataType &data_type,
-                Tensor output = NullTensor, const std::string &name = "");
+                Tensor output = NullTensor, const std::string &config = "",
+                const std::string &name = "");
 
     // sync across multi devices
     Tensor device_sync(Tensor input, int npeers, const std::string &name = "");
Original file line number	Diff line number	Diff line change
`@@ -9,9 +9,9 @@`
`9`	`9`
`10`	`10`	`namespace ark {`
`11`	`11`
`12`		`-Model Model::compress() const {`
	`12`	`+Model Model::compress(bool merge_nodes) const {`
`13`	`13`	`Model model(*this);`
`14`		`- model.compress_nodes();`
	`14`	`+ model.compress_nodes(merge_nodes);`
`15`	`15`	`return model;`
`16`	`16`	`}`
`17`	`17`