[subclasses] Use __slots__ for micro optim of flatten/unflatten

ghstack-source-id: 78700dffdc8daf49e43a80e68e2a7888e681503c Pull Request resolved: #1211
pytorch · Nov 1, 2024 · f7f4337 · f7f4337
1 parent 2761917
commit f7f4337
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/torchao/float8/fsdp_utils.py b/torchao/float8/fsdp_utils.py
@@ -128,6 +128,9 @@ def precompute_float8_dynamic_scale_for_fsdp(module: nn.Module) -> None:
 #      |   TP compute with torch.mm(input, weight)
 
 class WeightWithDynamicFloat8CastTensor(torch.Tensor):
+
+    __slots__ = "_tensor", "_precomputed_scale", "_linear_mm_config"
+
     @staticmethod
     def __new__(
         cls,
@@ -258,6 +261,16 @@ def fsdp_post_all_gather(
 
 
 class WeightWithDelayedFloat8CastTensor(torch.Tensor):
+
+    __slots__ = [
+        "_tensor",
+        "_amax_buffer",
+        "_amax_history_buffer",
+        "_scale_buffer",
+        "_linear_mm_config",
+        "is_amax_initialized"
+    ]
+
     @staticmethod
     def __new__(
         cls,
@@ -439,6 +452,9 @@ def fsdp_post_all_gather(
 
 
 class WeightWithStaticFloat8CastTensor(torch.Tensor):
+
+    __slots__ = "_tensor", "_static_scale", "_linear_mm_config"
+
     @staticmethod
     def __new__(
         cls,