remove openx_dataset_name

michel-aractingi · michel-aractingi · commit b1e9c01dc93c · 2024-11-13T08:52:03.000Z
diff --git a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py b/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py
@@ -55,16 +55,19 @@
 
 np.set_printoptions(precision=2)
 
+
 def tf_to_torch(data):
     return torch.from_numpy(data.numpy())
 
+
 def tf_img_convert(img):
     if img.dtype == tf.string:
         img = tf.io.decode_image(img, expand_animations=False, dtype=tf.uint8)
     elif img.dtype != tf.uint8:
         raise ValueError(f"Unsupported image dtype: found with dtype {img.dtype}")
     return img.numpy()
 
+
 def _broadcast_metadata_rlds(i: tf.Tensor, traj: dict) -> dict:
     """
     In the RLDS format, each trajectory has some top-level metadata that is explicitly separated out, and a "steps"
@@ -130,7 +133,7 @@ def load_from_raw(
     # search for 'image' keys in the observations
     image_keys = []
     state_keys = []
-    observation_info = dataset_info.features['steps']['observation'] 
+    observation_info = dataset_info.features["steps"]["observation"]
     for key in observation_info:
         # check whether the key is for an image or a vector observation
         if len(observation_info[key].shape) == 3:
@@ -254,7 +257,7 @@ def load_from_raw(
 
 def to_hf_dataset(data_dict, video) -> Dataset:
     features = {}
-        
+
     for key in data_dict:
         # check if vector state obs
         if key.startswith("observation.") and "observation.images." not in key:
@@ -272,7 +275,7 @@ def to_hf_dataset(data_dict, video) -> Dataset:
     features["action"] = Sequence(
         length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)
     )
-        
+
     features["is_terminal"] = Value(dtype="bool", id=None)
     features["is_first"] = Value(dtype="bool", id=None)
     features["discount"] = Value(dtype="float32", id=None)
@@ -297,7 +300,6 @@ def from_raw_to_lerobot_format(
     episodes: list[int] | None = None,
     encoding: dict | None = None,
 ):
-    
     data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding)
     hf_dataset = to_hf_dataset(data_dict, video)
     episode_data_index = calculate_episode_data_index(hf_dataset)
diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py
@@ -200,24 +200,14 @@ def push_dataset_to_hub(
     # convert dataset from original raw format to LeRobot format
     from_raw_to_lerobot_format = get_from_raw_to_lerobot_format_fn(raw_format)
 
-    fmt_kwgs = {
-        "raw_dir": raw_dir,
-        "videos_dir": videos_dir,
-        "fps": fps,
-        "video": video,
-        "episodes": episodes,
-        "encoding": encoding,
-    }
-
-    if "openx_rlds." in raw_format:
-        # Support for official OXE dataset name inside `raw_format`.
-        # For instance, `raw_format="oxe_rlds"` uses the default formating (TODO what does that mean?),
-        # and `raw_format="oxe_rlds.bridge_orig"` uses the brdige_orig formating
-        _, openx_dataset_name = raw_format.split(".")
-        print(f"Converting dataset [{openx_dataset_name}] from 'openx_rlds' to LeRobot format.")
-        fmt_kwgs["openx_dataset_name"] = openx_dataset_name
-
-    hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(**fmt_kwgs)
+    hf_dataset, episode_data_index, info = from_raw_to_lerobot_format(
+        raw_dir,
+        videos_dir,
+        fps,
+        video,
+        episodes,
+        encoding,
+    )
 
     lerobot_dataset = LeRobotDataset.from_preloaded(
         repo_id=repo_id,