updating flatten/unflatten functions

liangel-02 · liangel-02 · commit 1539983d6bee · 2025-11-03T15:18:28.000-08:00
diff --git a/test/prototype/safetensors/test_safetensors_support.py b/test/prototype/safetensors/test_safetensors_support.py
@@ -1,4 +1,5 @@
 import json
+import re
 import tempfile
 import unittest
 
@@ -38,6 +39,19 @@ def load_data(file_path: str, device: str):
     return loaded_tensors, metadata
 
 
+def check_saved_tensor_names_format(state_dict, metadata):
+    original_tensor_names = metadata["tensor_names"]
+    for key in state_dict.keys():
+        m = re.match(r"^(.*)\._([^_]+)_.+", key)
+        if m:
+            reverted_key = f"{m.group(1)}.{m.group(2)}"
+        else:
+            reverted_key = key.split("_", 1)[0]
+        assert reverted_key in original_tensor_names, (
+            f"Reverted key {reverted_key} not found in original state_dict keys"
+        )
+
+
 @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
 @unittest.skipIf(not is_sm_at_least_89(), "Need sm89+")
 class TestSafeTensors(TestCase):
@@ -66,6 +80,9 @@ def test_safetensors(self, config, act_pre_scale=False):
 
         with tempfile.NamedTemporaryFile() as f:
             tensors_data_dict, metadata = flatten_tensor_state_dict(model.state_dict())
+
+            test_saved_tensor_names_format(tensors_data_dict, metadata)
+
             save_file(tensors_data_dict, f.name, metadata=metadata)
             tensors_data_dict, metadata = load_data(file_path=f.name, device="cuda")
             reconstructed_dict = unflatten_tensor_state_dict(
diff --git a/torchao/prototype/safetensors/safetensors_support.py b/torchao/prototype/safetensors/safetensors_support.py
@@ -24,9 +24,9 @@ def unflatten_tensor_state_dict(
 
     For example, given a previously flattened tensors_data_dict and metadata:
     tensors_data_dict = {
-        '0.weight:qdata': torch.Tensor(...),
-        '0.weight:scale': torch.Tensor(...),
-        '0.bias:_data': torch.Tensor(...),
+        '0._weight_qdata': torch.Tensor(...),
+        '0._weight_scale': torch.Tensor(...),
+        '0.bias': torch.Tensor(...),
     }
     metadata = {
         '0.weight': {
@@ -53,7 +53,7 @@ def unflatten_tensor_state_dict(
     }
 
     Args:
-        tensors_data_dict: a dictionary from "tensor_name:tensor_data_attribute_name" to flattened torch.Tensor data for tensor subclass instance
+        tensors_data_dict: a dictionary from "{tensor_name}_{tensor_data_attribute_name}" to flattened torch.Tensor data for tensor subclass instance
         metadata: a dictionary from "tensor_name" to another dictionary that contains type and attributes for tensor subclass instance
 
     Returns:
@@ -68,23 +68,28 @@ def unflatten_tensor_state_dict(
     result = {}
 
     for tensor_name in tensor_names:
+        prefix = f"{tensor_name.rsplit('.', 1)[0]}._{tensor_name.rsplit('.', 1)[1]}_"
         tensor_tensors = {}
         for key, value in combined_data.items():
-            if key.startswith(f"{tensor_name}:"):
+            if key.startswith(prefix):
                 # Remove the prefix
-                tensor_tensors[key[len(tensor_name) + 1 :]] = value
+                tensor_tensors[key[len(prefix) :]] = value
 
         tensor_metadata = json.loads(metadata.get(tensor_name))
         tensor_type = tensor_metadata.get("_type")
 
         if tensor_type in ALLOWED_TENSORS_SUBCLASSES:
+            if not tensor_tensors:
+                # we allow the option of loading in state_dict info for a single tensor
+                # if tensor state dict info is not loaded in yet, we wait for it to be provided
+                # in a future call
+                continue
             tensor_metadata["_data"].update(tensor_tensors)
             result[tensor_name] = object_from_dict(tensor_metadata)
         elif tensor_type == torch.Tensor.__name__:
-            result[tensor_name] = tensor_tensors["_data"]
+            result[tensor_name] = tensors_data_dict[tensor_name]
         else:
             raise ValueError(f"Unsupported tensor type: {tensor_type}")
-
     return result
 
 
@@ -108,9 +113,9 @@ def flatten_tensor_state_dict(
 
     We flatten this to:
     tensors_data = {
-        '0.weight:qdata': torch.Tensor(...),
-        '0.weight:scale': torch.Tensor(...),
-        '0.bias:_data': torch.Tensor(...),
+        '0._weight_qdata': torch.Tensor(...),
+        '0._weight_scale': torch.Tensor(...),
+        '0.bias': torch.Tensor(...),
     }
     metadata = {
         '0.weight': {
@@ -152,22 +157,23 @@ def flatten_tensor_state_dict(
                     tensor_dict[tensor_data_name] = getattr(tensor, tensor_data_name)
 
             tensor_metadata = json.dumps(tensor, cls=TensorSubclassAttributeJSONEncoder)
+
+            # Clone tensors to avoid memory sharing issues
+            tensors_dict_to_save = {
+                f"{tensor_name.rsplit('.', 1)[0]}._{tensor_name.rsplit('.', 1)[1]}_{key}": (
+                    value.detach().clone() if isinstance(value, torch.Tensor) else value
+                )
+                for key, value in tensor_dict.items()
+            }
+
         elif type(tensor) is torch.Tensor:
-            tensor_dict = {"_data": tensor}
             tensor_metadata = json.dumps({"_type": torch.Tensor.__name__})
+            tensors_dict_to_save = {tensor_name: tensor}
         else:
             raise ValueError(f"Unsupported tensor type: {type(tensor)}")
 
-        # Clone tensors to avoid memory sharing issues
-        prefixed_tensors_dict = {
-            f"{tensor_name}:{key}": (
-                value.detach().clone() if isinstance(value, torch.Tensor) else value
-            )
-            for key, value in tensor_dict.items()
-        }
-
         metadata[tensor_name] = tensor_metadata
-        tensors_data_dict.update(prefixed_tensors_dict)
+        tensors_data_dict.update(tensors_dict_to_save)
 
     metadata["tensor_names"] = json.dumps(list(tensors_dict.keys()))
     return tensors_data_dict, metadata