Fix TableWriter not using ids

ibro45 · ibro45 · commit 5e94eb8ddd71 · 2024-10-17T12:47:22.000-04:00
diff --git a/lighter/callbacks/writer/table.py b/lighter/callbacks/writer/table.py
@@ -19,17 +19,17 @@ class LighterTableWriter(LighterBaseWriter):
         path (Path): CSV filepath.
         writer (Union[str, Callable]): Name of the writer function registered in `self.writers` or a custom writer function.
             Available writers: "tensor". A custom writer function must take a single argument: `tensor`, and return the record
-            to be saved in the CSV file. The tensor will be a single tensor without the batch dimension.
+            to be saved in the CSV file under 'pred' column. The tensor will be a single tensor without the batch dimension.
     """
 
     def __init__(self, path: Union[str, Path], writer: Union[str, Callable]) -> None:
         super().__init__(path, writer)
-        self.csv_records = {}
+        self.csv_records = []
 
     @property
     def writers(self) -> Dict[str, Callable]:
         return {
-            "tensor": lambda tensor: tensor.tolist(),
+            "tensor": lambda tensor: tensor.item() if tensor.numel() == 1 else tensor.tolist(),
         }
 
     def write(self, tensor: Any, id: Union[int, str]) -> None:
@@ -40,9 +40,7 @@ def write(self, tensor: Any, id: Union[int, str]) -> None:
             tensor (Any): Tensor, without the batch dimension, to be recorded.
             id (Union[int, str]): Identifier, used as the key for the record.
         """
-        column = "pred"
-        record = self.writer(tensor)
-        self.csv_records.setdefault(id, {})[column] = record
+        self.csv_records.append({"id": id, "pred": self.writer(tensor)})
 
     def on_predict_epoch_end(self, trainer: Trainer, pl_module: LighterSystem) -> None:
         """
@@ -52,19 +50,18 @@ def on_predict_epoch_end(self, trainer: Trainer, pl_module: LighterSystem) -> No
         If training was done in a distributed setting, it gathers predictions from all processes
         and then saves them from the rank 0 process.
         """
-        # Sort the records by ID and convert the dictionary to a list
-        self.csv_records = [self.csv_records[id] for id in sorted(self.csv_records)]
-
         # If in distributed data parallel mode, gather records from all processes to rank 0.
         if trainer.world_size > 1:
-            # Create a list to hold the records from each process. Used on rank 0 only.
             gather_csv_records = [None] * trainer.world_size if trainer.is_global_zero else None
-            # Each process sends its records to rank 0, which stores them in the `gather_csv_records`.
             torch.distributed.gather_object(self.csv_records, gather_csv_records, dst=0)
-            # Concatenate the gathered records
             if trainer.is_global_zero:
                 self.csv_records = list(itertools.chain(*gather_csv_records))
 
         # Save the records to a CSV file
         if trainer.is_global_zero:
-            pd.DataFrame(self.csv_records).to_csv(self.path)
+            df = pd.DataFrame(self.csv_records)
+            df = df.sort_values("id").set_index("id")
+            df.to_csv(self.path)
+
+        # Clear the records after saving
+        self.csv_records = []