From 84ccbce4ad25307f5cb1601f36cefab57008bacd Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 02:05:52 +0000 Subject: [PATCH 01/15] feat(data): add camera selector to annotation workspace and fix AV1 frame extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - expose cameras list and setCameraName from media-sources hook with selection preserved across episodes - render CameraSelector alongside ViewerDisplayControls in playback card - prefer imageio-ffmpeg static binary so frame thumbnails work for AV1 videos πŸŽ₯ - Generated by Copilot --- .../dataset_service/lerobot_handler.py | 20 +++++- .../AnnotationWorkspaceContent.tsx | 3 + .../AnnotationWorkspacePlaybackCard.tsx | 64 +++++++++++-------- .../useAnnotationWorkspaceMediaSources.ts | 26 ++++++-- .../useAnnotationWorkspaceShell.ts | 5 +- 5 files changed, 83 insertions(+), 35 deletions(-) diff --git a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py index 658f2b5a..c9987716 100644 --- a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py +++ b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py @@ -241,17 +241,33 @@ def get_frame_image( return self._extract_frame_cv2(str(video_path), frame_idx) + @staticmethod + def _resolve_ffmpeg() -> str | None: + """Locate a usable ffmpeg binary. + + Prefers the imageio-ffmpeg static binary (ships with libdav1d for AV1 + and libx264; not affected by host libGL breakage) over a system ffmpeg + on PATH. + """ + try: + import imageio_ffmpeg + + return imageio_ffmpeg.get_ffmpeg_exe() + except Exception: + return shutil.which("ffmpeg") + @staticmethod def _extract_frame_ffmpeg(video_path: str, frame_idx: int, fps: float) -> bytes | None: """Extract a single frame as JPEG using ffmpeg.""" - if shutil.which("ffmpeg") is None: + ffmpeg = LeRobotFormatHandler._resolve_ffmpeg() + if ffmpeg is None: return None seek_time = frame_idx / fps try: proc = subprocess.run( [ - "ffmpeg", + ffmpeg, "-ss", f"{seek_time:.6f}", "-i", diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx index b17dfcca..d7a914fe 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx @@ -38,6 +38,9 @@ export function AnnotationWorkspaceContent({ shell }: AnnotationWorkspaceContent totalFrames={shell.totalFrames} resizeOutput={shell.globalTransform?.resize ?? null} frameImageUrl={shell.frameImageUrl} + cameras={shell.cameras} + selectedCamera={shell.cameraName} + onSelectCamera={shell.setCameraName} isPlaying={shell.isPlaying} onTogglePlayback={shell.togglePlayback} onStepFrame={shell.playback.stepFrame} diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx index 6fdb27b2..aaaa7b2a 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx @@ -13,6 +13,7 @@ import { SpeedControl } from '@/components/playback/SpeedControl' import { Button } from '@/components/ui/button' import { Card, CardContent } from '@/components/ui/card' import { ViewerDisplayControls } from '@/components/viewer-display' +import { CameraSelector } from '@/components/episode-viewer' interface AnnotationWorkspacePlaybackCardProps { compact?: boolean @@ -28,6 +29,9 @@ interface AnnotationWorkspacePlaybackCardProps { totalFrames: number resizeOutput: { width: number; height: number } | null frameImageUrl: string | null + cameras: string[] + selectedCamera: string | null + onSelectCamera: (camera: string) => void isPlaying: boolean onTogglePlayback: () => void onStepFrame: (delta: number) => void @@ -58,6 +62,9 @@ export function AnnotationWorkspacePlaybackCard({ totalFrames, resizeOutput, frameImageUrl, + cameras, + selectedCamera, + onSelectCamera, isPlaying, onTogglePlayback, onStepFrame, @@ -117,7 +124,14 @@ export function AnnotationWorkspacePlaybackCard({ return ( - +
+ + +
diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts index 252ec2d6..4a85df21 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts +++ b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts @@ -48,16 +48,26 @@ export function useAnnotationWorkspaceMediaSources({ ], ) - const cameraName = useMemo(() => { - const cameras = currentEpisode?.cameras ?? [] - if (cameras.length > 0) { - return cameras[0] + const cameras = useMemo(() => { + const fromEpisode = currentEpisode?.cameras ?? [] + if (fromEpisode.length > 0) { + return fromEpisode } - - const videoKeys = Object.keys(currentEpisode?.videoUrls ?? {}) - return videoKeys.length > 0 ? videoKeys[0] : null + return Object.keys(currentEpisode?.videoUrls ?? {}) }, [currentEpisode?.cameras, currentEpisode?.videoUrls]) + const [cameraName, setCameraName] = useState(null) + + // Reset selected camera when the camera list changes (e.g., new episode/dataset). + // Preserves selection if the same camera is still available. + useEffect(() => { + if (cameras.length === 0) { + setCameraName(null) + return + } + setCameraName((prev) => (prev && cameras.includes(prev) ? prev : cameras[0])) + }, [cameras]) + const videoSrc = useMemo(() => { if (!currentEpisode?.videoUrls || !cameraName) { return null @@ -179,7 +189,9 @@ export function useAnnotationWorkspaceMediaSources({ return { canvasRef, + cameras, cameraName, + setCameraName, displayFilter, frameImageUrl, interpolatedImageUrl, diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts index ce8ea493..25e9ac5f 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts +++ b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts @@ -45,7 +45,7 @@ export function useAnnotationWorkspaceShell({ const seekVideoFrameRef = useRef( (frame: number, _range: [number, number] | null, _constrainToRange = true) => frame, ) - const resumePlaybackRef = useRef((_: number) => {}) + const resumePlaybackRef = useRef((_: number) => { }) const currentDataset = useDatasetStore((state) => state.currentDataset) const currentEpisode = useEpisodeStore((state) => state.currentEpisode) @@ -310,5 +310,8 @@ export function useAnnotationWorkspaceShell({ videoRef: media.videoRef, videoSrc: media.videoSrc, canvasRef: media.canvasRef, + cameras: media.cameras, + cameraName: media.cameraName, + setCameraName: media.setCameraName, } } From 4e6f0548061d09a53bc704016364df8ff9405383 Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 04:07:24 +0000 Subject: [PATCH 02/15] fix(data): resolve PR validation failures for multi-camera dataviewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add libdav to cspell dictionary (libdav1d reference in lerobot_handler docstring) - Sort imports in AnnotationWorkspacePlaybackCard for simple-import-sort - Update ffmpeg extraction tests to mock _resolve_ffmpeg directly πŸ€– - Generated by Copilot --- .cspell/general-technical.txt | 1 + .../backend/tests/test_lerobot_handler.py | 24 +++++++++++-------- .../AnnotationWorkspacePlaybackCard.tsx | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.cspell/general-technical.txt b/.cspell/general-technical.txt index a2dd58b2..8554d412 100644 --- a/.cspell/general-technical.txt +++ b/.cspell/general-technical.txt @@ -1926,6 +1926,7 @@ incluster instancetype instancetypes jtzh +libdav libgl libglib lnkmpfy diff --git a/data-management/viewer/backend/tests/test_lerobot_handler.py b/data-management/viewer/backend/tests/test_lerobot_handler.py index 043172ef..df675cb4 100644 --- a/data-management/viewer/backend/tests/test_lerobot_handler.py +++ b/data-management/viewer/backend/tests/test_lerobot_handler.py @@ -149,16 +149,18 @@ class TestFfmpegExtraction: """Test ffmpeg-based frame extraction.""" FAKE_JPEG = b"\xff\xd8\xff\xe0fake-jpeg-data" + FFMPEG_PATH = "/usr/bin/ffmpeg" def test_successful_extraction(self, monkeypatch): """Verify _extract_frame_ffmpeg returns stdout bytes on success.""" - import shutil import subprocess as sp - monkeypatch.setattr(shutil, "which", lambda cmd: "/usr/bin/ffmpeg") + monkeypatch.setattr( + LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) + ) def mock_run(cmd, *, capture_output=False, timeout=None): - assert cmd[0] == "ffmpeg" + assert cmd[0] == self.FFMPEG_PATH assert "-ss" in cmd return sp.CompletedProcess(cmd, returncode=0, stdout=self.FAKE_JPEG, stderr=b"") @@ -168,17 +170,18 @@ def mock_run(cmd, *, capture_output=False, timeout=None): assert result == self.FAKE_JPEG def test_returns_none_when_ffmpeg_missing(self, monkeypatch): - import shutil - - monkeypatch.setattr(shutil, "which", lambda cmd: None) + monkeypatch.setattr( + LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: None) + ) result = LeRobotFormatHandler._extract_frame_ffmpeg("/tmp/video.mp4", 0, 30.0) assert result is None def test_returns_none_on_nonzero_exit(self, monkeypatch): - import shutil import subprocess as sp - monkeypatch.setattr(shutil, "which", lambda cmd: "/usr/bin/ffmpeg") + monkeypatch.setattr( + LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) + ) monkeypatch.setattr( sp, "run", @@ -190,10 +193,11 @@ def test_returns_none_on_nonzero_exit(self, monkeypatch): def test_seek_time_calculation(self, monkeypatch): """Verify frame_idx / fps produces correct -ss argument.""" - import shutil import subprocess as sp - monkeypatch.setattr(shutil, "which", lambda cmd: "/usr/bin/ffmpeg") + monkeypatch.setattr( + LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) + ) captured_cmd = [] diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx index aaaa7b2a..9020fdc9 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx @@ -8,12 +8,12 @@ import { useState, } from 'react' +import { CameraSelector } from '@/components/episode-viewer' import { PlaybackControlStrip } from '@/components/playback/PlaybackControlStrip' import { SpeedControl } from '@/components/playback/SpeedControl' import { Button } from '@/components/ui/button' import { Card, CardContent } from '@/components/ui/card' import { ViewerDisplayControls } from '@/components/viewer-display' -import { CameraSelector } from '@/components/episode-viewer' interface AnnotationWorkspacePlaybackCardProps { compact?: boolean From ae7d49c2c53f281917a99e3c8e303ee7a2676132 Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 04:13:41 +0000 Subject: [PATCH 03/15] fix(data): apply ruff format and add camera props to playback card test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Format ffmpeg test monkeypatch calls onto single lines for ruff - Add cameras/selectedCamera/onSelectCamera to test fixtures for new AnnotationWorkspacePlaybackCard required props πŸ€– - Generated by Copilot --- .../viewer/backend/tests/test_lerobot_handler.py | 16 ++++------------ .../annotation-workspace-playback-card.test.tsx | 9 +++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/data-management/viewer/backend/tests/test_lerobot_handler.py b/data-management/viewer/backend/tests/test_lerobot_handler.py index df675cb4..54a861e9 100644 --- a/data-management/viewer/backend/tests/test_lerobot_handler.py +++ b/data-management/viewer/backend/tests/test_lerobot_handler.py @@ -155,9 +155,7 @@ def test_successful_extraction(self, monkeypatch): """Verify _extract_frame_ffmpeg returns stdout bytes on success.""" import subprocess as sp - monkeypatch.setattr( - LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) - ) + monkeypatch.setattr(LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH)) def mock_run(cmd, *, capture_output=False, timeout=None): assert cmd[0] == self.FFMPEG_PATH @@ -170,18 +168,14 @@ def mock_run(cmd, *, capture_output=False, timeout=None): assert result == self.FAKE_JPEG def test_returns_none_when_ffmpeg_missing(self, monkeypatch): - monkeypatch.setattr( - LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: None) - ) + monkeypatch.setattr(LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: None)) result = LeRobotFormatHandler._extract_frame_ffmpeg("/tmp/video.mp4", 0, 30.0) assert result is None def test_returns_none_on_nonzero_exit(self, monkeypatch): import subprocess as sp - monkeypatch.setattr( - LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) - ) + monkeypatch.setattr(LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH)) monkeypatch.setattr( sp, "run", @@ -195,9 +189,7 @@ def test_seek_time_calculation(self, monkeypatch): """Verify frame_idx / fps produces correct -ss argument.""" import subprocess as sp - monkeypatch.setattr( - LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH) - ) + monkeypatch.setattr(LeRobotFormatHandler, "_resolve_ffmpeg", staticmethod(lambda: self.FFMPEG_PATH)) captured_cmd = [] diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/__tests__/annotation-workspace-playback-card.test.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/__tests__/annotation-workspace-playback-card.test.tsx index 486e610c..8b4b84a7 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/__tests__/annotation-workspace-playback-card.test.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/__tests__/annotation-workspace-playback-card.test.tsx @@ -18,6 +18,9 @@ function renderPlaybackCard(overrides: Record = {}) { totalFrames: 100, resizeOutput: null, frameImageUrl: '/api/datasets/test/episodes/0/frames/0?camera=wrist', + cameras: ['wrist'], + selectedCamera: 'wrist', + onSelectCamera: vi.fn(), isPlaying: false, onTogglePlayback: vi.fn(), onStepFrame: vi.fn(), @@ -91,6 +94,9 @@ describe('AnnotationWorkspacePlaybackCard', () => { totalFrames={100} resizeOutput={null} frameImageUrl="/api/datasets/test/episodes/0/frames/0?camera=wrist" + cameras={['wrist']} + selectedCamera="wrist" + onSelectCamera={vi.fn()} isPlaying={false} onTogglePlayback={vi.fn()} onStepFrame={vi.fn()} @@ -128,6 +134,9 @@ describe('AnnotationWorkspacePlaybackCard', () => { totalFrames={80} resizeOutput={null} frameImageUrl="/api/datasets/test/episodes/1/frames/0?camera=wrist" + cameras={['wrist']} + selectedCamera="wrist" + onSelectCamera={vi.fn()} isPlaying={false} onTogglePlayback={vi.fn()} onStepFrame={vi.fn()} From d9d670ba85ad5bf2e1277062f464b371adf9b2bc Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 05:28:50 +0000 Subject: [PATCH 04/15] fix(data): apply prettier and resolve frontend test failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Apply prettier to AnnotationWorkspacePlaybackCard and useAnnotationWorkspaceShell - Add CameraSelector to episode-viewer mock so AnnotationWorkspace tests render - Derive cameraName synchronously via useMemo with separate override state so initial render no longer produces a transient null videoSrc that triggers the frame-only autoplay path twice πŸ€– - Generated by Copilot --- .../annotationWorkspaceTestSupport.tsx | 20 ++++++++ .../AnnotationWorkspacePlaybackCard.tsx | 48 +++++++++---------- .../useAnnotationWorkspaceMediaSources.ts | 29 +++++++---- .../useAnnotationWorkspaceShell.ts | 2 +- 4 files changed, 65 insertions(+), 34 deletions(-) diff --git a/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx b/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx index 1d0e4216..34850655 100644 --- a/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx +++ b/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx @@ -115,6 +115,26 @@ vi.mock('@/components/annotation-panel', () => ({ })) vi.mock('@/components/episode-viewer', () => ({ + CameraSelector: (props: Record) => { + const cameraProps = props as { + cameras?: string[] + selectedCamera?: string + onSelectCamera?: (camera: string) => void + } + return ( + + ) + }, TrajectoryPlot: (props: Record) => { const plotProps = props as { selectedRange?: [number, number] | null diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx index 9020fdc9..f10273c5 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspacePlaybackCard.tsx @@ -210,31 +210,31 @@ export function AnnotationWorkspacePlaybackCard({ controls={ compact ? renderCompactControls({ - isPlaying, - onTogglePlayback, - onStepFrame, - playbackSpeed, - onSetPlaybackSpeed, - autoPlay, - onSetAutoPlay, - autoLoop, - onSetAutoLoop, - playbackRangeStart, - onSetFrameWithinPlaybackRange, - }) + isPlaying, + onTogglePlayback, + onStepFrame, + playbackSpeed, + onSetPlaybackSpeed, + autoPlay, + onSetAutoPlay, + autoLoop, + onSetAutoLoop, + playbackRangeStart, + onSetFrameWithinPlaybackRange, + }) : renderDefaultControls({ - isPlaying, - onTogglePlayback, - onStepFrame, - playbackSpeed, - onSetPlaybackSpeed, - autoPlay, - onSetAutoPlay, - autoLoop, - onSetAutoLoop, - playbackRangeStart, - onSetFrameWithinPlaybackRange, - }) + isPlaying, + onTogglePlayback, + onStepFrame, + playbackSpeed, + onSetPlaybackSpeed, + autoPlay, + onSetAutoPlay, + autoLoop, + onSetAutoLoop, + playbackRangeStart, + onSetFrameWithinPlaybackRange, + }) } slider={
diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts index 4a85df21..fd8bce91 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts +++ b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceMediaSources.ts @@ -56,17 +56,28 @@ export function useAnnotationWorkspaceMediaSources({ return Object.keys(currentEpisode?.videoUrls ?? {}) }, [currentEpisode?.cameras, currentEpisode?.videoUrls]) - const [cameraName, setCameraName] = useState(null) + // User-selected camera override; null means "follow the default (cameras[0])". + // Tracking the override (rather than the resolved camera) keeps the resolved + // cameraName synchronous on first render, avoiding a transient null that would + // briefly produce an empty videoSrc and disrupt autoplay sequencing. + const [cameraOverride, setCameraOverride] = useState(null) - // Reset selected camera when the camera list changes (e.g., new episode/dataset). - // Preserves selection if the same camera is still available. - useEffect(() => { + const cameraName = useMemo(() => { if (cameras.length === 0) { - setCameraName(null) - return + return null + } + if (cameraOverride && cameras.includes(cameraOverride)) { + return cameraOverride + } + return cameras[0] + }, [cameras, cameraOverride]) + + // Drop a stale override when the camera list no longer contains it. + useEffect(() => { + if (cameraOverride && !cameras.includes(cameraOverride)) { + setCameraOverride(null) } - setCameraName((prev) => (prev && cameras.includes(prev) ? prev : cameras[0])) - }, [cameras]) + }, [cameras, cameraOverride]) const videoSrc = useMemo(() => { if (!currentEpisode?.videoUrls || !cameraName) { @@ -191,7 +202,7 @@ export function useAnnotationWorkspaceMediaSources({ canvasRef, cameras, cameraName, - setCameraName, + setCameraName: setCameraOverride, displayFilter, frameImageUrl, interpolatedImageUrl, diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts index 25e9ac5f..d2b93103 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts +++ b/data-management/viewer/frontend/src/components/annotation-workspace/useAnnotationWorkspaceShell.ts @@ -45,7 +45,7 @@ export function useAnnotationWorkspaceShell({ const seekVideoFrameRef = useRef( (frame: number, _range: [number, number] | null, _constrainToRange = true) => frame, ) - const resumePlaybackRef = useRef((_: number) => { }) + const resumePlaybackRef = useRef((_: number) => {}) const currentDataset = useDatasetStore((state) => state.currentDataset) const currentEpisode = useEpisodeStore((state) => state.currentEpisode) From 34eb00e81008645d598158a3a8cfb75da5a693c9 Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 05:55:46 +0000 Subject: [PATCH 05/15] fix(data): support LeRobot v2.x dataset layout in dataviewer loader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - format paths with both v2.x (episode_chunk/episode_index) and v3 placeholders - derive chunk index from episode_index // chunks_size for v2.x layouts - read episode lengths from meta/episodes.jsonl when present - add tests covering v2.x path resolution and episode metadata πŸ› οΈ - Generated by Copilot --- .../src/api/services/lerobot_loader.py | 94 ++++++++++++++++--- .../backend/tests/test_lerobot_loader.py | 74 +++++++++++++++ 2 files changed, 156 insertions(+), 12 deletions(-) diff --git a/data-management/viewer/backend/src/api/services/lerobot_loader.py b/data-management/viewer/backend/src/api/services/lerobot_loader.py index 8caec507..628d32e9 100644 --- a/data-management/viewer/backend/src/api/services/lerobot_loader.py +++ b/data-management/viewer/backend/src/api/services/lerobot_loader.py @@ -179,12 +179,23 @@ def get_dataset_info(self) -> LeRobotDatasetInfo: """ return self._load_info() + def _is_v2_layout(self, info: LeRobotDatasetInfo) -> bool: + """Return True if the dataset uses the v2.x one-episode-per-file layout. + + Detected from the data_path template (which contains the + ``{episode_index}`` placeholder in v2.x) rather than codebase_version, + so locally repacked datasets work regardless of version string. + """ + return "{episode_index" in info.data_path + def _find_episode_location(self, episode_index: int) -> tuple[int, int]: """ Find the chunk and file indices for an episode. - In LeRobot format, episodes are usually stored one per chunk/file, - where chunk_index == episode_index and file_index == 0. + v2.x layout: one parquet per episode named by ``episode_index``, + grouped into ``chunk-{episode_index // chunks_size:03d}`` directories. + v3 layout: many episodes per ``file-{file_index:03d}.parquet``; chunk + and file indices must be discovered by scanning. Returns: Tuple of (chunk_index, file_index). @@ -194,12 +205,19 @@ def _find_episode_location(self, episode_index: int) -> tuple[int, int]: info = self._load_info() - # Standard layout: one episode per chunk + if self._is_v2_layout(info): + chunks_size = max(info.chunks_size, 1) + chunk_idx = episode_index // chunks_size + file_idx = 0 + self._episode_index_cache[episode_index] = (chunk_idx, file_idx) + return chunk_idx, file_idx + + # v3 layout: assume one episode per chunk, then verify on disk chunk_idx = episode_index file_idx = 0 # Verify the parquet file exists - data_path = self._format_path(info.data_path, chunk_idx, file_idx) + data_path = self._format_path(info.data_path, chunk_idx, file_idx, episode_index=episode_index) full_path = self.base_path / data_path if not full_path.exists(): @@ -229,9 +247,36 @@ def _find_episode_location(self, episode_index: int) -> tuple[int, int]: self._episode_index_cache[episode_index] = (chunk_idx, file_idx) return chunk_idx, file_idx - def _format_path(self, template: str, chunk_index: int, file_index: int, video_key: str = "") -> str: - """Format a path template with indices.""" - return template.format(chunk_index=chunk_index, file_index=file_index, video_key=video_key) + def _format_path( + self, + template: str, + chunk_index: int, + file_index: int, + video_key: str = "", + *, + episode_index: int | None = None, + ) -> str: + """Format a path template, supporting v2.x and v3 placeholders. + + v3 templates use ``{chunk_index}`` and ``{file_index}``; v2.x templates + use ``{episode_chunk}`` and ``{episode_index}``. All known placeholders + are supplied so unused ones are ignored by ``str.format``. + """ + info = self._info + chunks_size = max(info.chunks_size, 1) if info is not None else 1 + if episode_index is None: + episode_chunk = chunk_index + ep_idx = 0 + else: + episode_chunk = episode_index // chunks_size + ep_idx = episode_index + return template.format( + chunk_index=chunk_index, + file_index=file_index, + video_key=video_key, + episode_chunk=episode_chunk, + episode_index=ep_idx, + ) def list_episodes(self) -> list[int]: """ @@ -261,9 +306,30 @@ def list_episodes_with_meta(self) -> dict[int, dict[str, Any]]: info = self._load_info() cameras = [k for k, v in info.features.items() if v.get("dtype") == "video"] meta_episodes_dir = self.base_path / "meta" / "episodes" + meta_episodes_jsonl = self.base_path / "meta" / "episodes.jsonl" result: dict[int, dict[str, Any]] = {} - if meta_episodes_dir.exists(): + # v2.x layout: meta/episodes.jsonl with one JSON object per line + if meta_episodes_jsonl.exists(): + try: + with open(meta_episodes_jsonl) as f: + for line in f: + line = line.strip() + if not line: + continue + row = json.loads(line) + idx = int(row.get("episode_index", len(result))) + result[idx] = { + "length": int(row.get("length", 0)), + "task_index": int(row.get("task_index", 0)), + "cameras": cameras, + "fps": info.fps, + "robot_type": info.robot_type, + } + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to read %s: %s", meta_episodes_jsonl, e) + + if not result and meta_episodes_dir.exists(): for chunk_dir in sorted(meta_episodes_dir.iterdir()): if not chunk_dir.is_dir() or not chunk_dir.name.startswith("chunk-"): continue @@ -317,7 +383,7 @@ def load_episode(self, episode_index: int) -> LeRobotEpisodeData: chunk_idx, file_idx = self._find_episode_location(episode_index) # Load parquet data - data_path = self._format_path(info.data_path, chunk_idx, file_idx) + data_path = self._format_path(info.data_path, chunk_idx, file_idx, episode_index=episode_index) full_path = self.base_path / data_path try: @@ -376,7 +442,9 @@ def load_episode(self, episode_index: int) -> LeRobotEpisodeData: for feature_name, feature_info in info.features.items(): if feature_info.get("dtype") == "video": video_key = feature_name - video_rel_path = self._format_path(info.video_path, chunk_idx, file_idx, video_key) + video_rel_path = self._format_path( + info.video_path, chunk_idx, file_idx, video_key, episode_index=episode_index + ) video_full_path = self.base_path / video_rel_path if video_full_path.exists(): video_paths[video_key] = video_full_path @@ -428,7 +496,7 @@ def get_episode_info(self, episode_index: int) -> dict[str, Any]: info = self._load_info() chunk_idx, file_idx = self._find_episode_location(episode_index) - data_path = self._format_path(info.data_path, chunk_idx, file_idx) + data_path = self._format_path(info.data_path, chunk_idx, file_idx, episode_index=episode_index) full_path = self.base_path / data_path try: @@ -472,7 +540,9 @@ def get_video_path(self, episode_index: int, camera_key: str) -> Path | None: info = self._load_info() chunk_idx, file_idx = self._find_episode_location(episode_index) - video_rel_path = self._format_path(info.video_path, chunk_idx, file_idx, camera_key) + video_rel_path = self._format_path( + info.video_path, chunk_idx, file_idx, camera_key, episode_index=episode_index + ) video_full_path = self.base_path / video_rel_path if video_full_path.exists(): diff --git a/data-management/viewer/backend/tests/test_lerobot_loader.py b/data-management/viewer/backend/tests/test_lerobot_loader.py index 24bda19b..0a246629 100644 --- a/data-management/viewer/backend/tests/test_lerobot_loader.py +++ b/data-management/viewer/backend/tests/test_lerobot_loader.py @@ -206,3 +206,77 @@ def test_get_video_path_missing_camera(self, loader): def test_get_cameras(self, loader): cameras = loader.get_cameras() assert cameras == ["observation.images.il-camera"] + + +class TestV2EpisodeLayout: + """Validate v2.x layout (episode-per-parquet, episodes.jsonl) handling.""" + + @pytest.fixture + def v2_dataset(self, tmp_path): + import json + + import pyarrow as pa + import pyarrow.parquet as pq + + root = tmp_path / "v2-dataset" + (root / "meta").mkdir(parents=True) + (root / "data" / "chunk-000").mkdir(parents=True) + (root / "videos" / "chunk-000" / "observation.images.front").mkdir(parents=True) + + info = { + "codebase_version": "v2.1", + "robot_type": "so101_follower", + "total_episodes": 2, + "total_frames": 5, + "total_tasks": 1, + "total_chunks": 1, + "chunks_size": 1000, + "fps": 30, + "splits": {"train": "0:2"}, + "data_path": "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet", + "video_path": "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4", + "features": { + "observation.state": {"dtype": "float32", "shape": [6]}, + "action": {"dtype": "float32", "shape": [6]}, + "observation.images.front": {"dtype": "video", "shape": [480, 640, 3]}, + }, + } + (root / "meta" / "info.json").write_text(json.dumps(info)) + + with (root / "meta" / "episodes.jsonl").open("w") as f: + f.write(json.dumps({"episode_index": 0, "tasks": ["t"], "length": 3}) + "\n") + f.write(json.dumps({"episode_index": 1, "tasks": ["t"], "length": 2}) + "\n") + + for ep_idx, length in [(0, 3), (1, 2)]: + table = pa.table( + { + "frame_index": list(range(length)), + "timestamp": [i / 30.0 for i in range(length)], + "episode_index": [ep_idx] * length, + "task_index": [0] * length, + "observation.state": [[0.0] * 6 for _ in range(length)], + "action": [[0.0] * 6 for _ in range(length)], + } + ) + pq.write_table(table, root / "data" / "chunk-000" / f"episode_{ep_idx:06d}.parquet") + (root / "videos" / "chunk-000" / "observation.images.front" / f"episode_{ep_idx:06d}.mp4").write_bytes(b"") + + return root + + def test_episode_lengths_from_jsonl(self, v2_dataset): + loader = LeRobotLoader(v2_dataset) + meta = loader.list_episodes_with_meta() + assert meta[0]["length"] == 3 + assert meta[1]["length"] == 2 + assert meta[0]["cameras"] == ["observation.images.front"] + + def test_load_episode_resolves_v2_paths(self, v2_dataset): + loader = LeRobotLoader(v2_dataset) + ep = loader.load_episode(1) + assert ep.length == 2 + assert "observation.images.front" in ep.video_paths + + def test_get_video_path_v2(self, v2_dataset): + loader = LeRobotLoader(v2_dataset) + path = loader.get_video_path(0, "observation.images.front") + assert path is not None and path.name == "episode_000000.mp4" From 006d670fb962f06d09688140fe5813ac9a28ac81 Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 06:15:04 +0000 Subject: [PATCH 06/15] feat(data): add language instruction annotations and task loading to dataviewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add LanguageInstructionAnnotation model, store actions, and panel widget - load task descriptions from meta/tasks.jsonl and tasks.parquet - preserve dataset feature keys when transforming snake_case API responses - support v2.x LeRobot layout in loader and read episodes.jsonl lengths - cover v2.x layout and jsonl metadata with new loader tests 🏷️ - Generated by Copilot --- .../backend/src/api/models/annotations.py | 32 ++++++++++++ .../dataset_service/lerobot_handler.py | 6 ++- .../src/api/services/lerobot_loader.py | 38 ++++++++++++++ .../src/components/annotation-panel/index.ts | 1 + .../AnnotationWorkspaceContent.tsx | 4 +- .../AnnotationWorkspaceTrajectoryTab.tsx | 3 ++ .../viewer/frontend/src/lib/api-client.ts | 22 +++++++- .../frontend/src/stores/annotation-store.ts | 52 +++++++++++++++++++ .../viewer/frontend/src/types/annotations.ts | 34 ++++++++++++ 9 files changed, 187 insertions(+), 5 deletions(-) diff --git a/data-management/viewer/backend/src/api/models/annotations.py b/data-management/viewer/backend/src/api/models/annotations.py index fa9d196b..48ef456b 100644 --- a/data-management/viewer/backend/src/api/models/annotations.py +++ b/data-management/viewer/backend/src/api/models/annotations.py @@ -200,6 +200,37 @@ class AnomalyAnnotation(SanitizedModel): anomalies: list[Anomaly] = Field(default_factory=list) +# ============================================================================ +# Language Instruction Types (VLA) +# ============================================================================ + + +class InstructionSource(StrEnum): + """Source of the language instruction annotation.""" + + HUMAN = "human" + TEMPLATE = "template" + LLM_GENERATED = "llm-generated" + RETROACTIVE = "retroactive" + + +class LanguageInstructionAnnotation(SanitizedModel): + """Natural language instruction for VLA-conditioned training. + + Stores the primary task instruction plus optional paraphrases and + subtask decomposition used for data augmentation and hierarchical + policy conditioning. + """ + + instruction: str = Field(min_length=1, max_length=1000) + source: InstructionSource + language: str = Field(default="en", max_length=10) + paraphrases: list[str] = Field(default_factory=list) + subtask_instructions: list[str] = Field(default_factory=list) + + model_config: ClassVar = {"use_enum_values": True} + + # ============================================================================ # Combined Episode Annotation Types # ============================================================================ @@ -214,6 +245,7 @@ class EpisodeAnnotation(SanitizedModel): trajectory_quality: TrajectoryQualityAnnotation data_quality: DataQualityAnnotation anomalies: AnomalyAnnotation + language_instruction: LanguageInstructionAnnotation | None = None notes: str | None = None diff --git a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py index c9987716..b66092f2 100644 --- a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py +++ b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py @@ -13,7 +13,7 @@ import subprocess from pathlib import Path -from ...models.datasources import DatasetInfo, EpisodeData, EpisodeMeta, FeatureSchema, TrajectoryPoint +from ...models.datasources import DatasetInfo, EpisodeData, EpisodeMeta, FeatureSchema, TaskInfo, TrajectoryPoint from .base import build_trajectory logger = logging.getLogger(__name__) @@ -112,7 +112,9 @@ def discover(self, dataset_id: str, dataset_path: Path) -> DatasetInfo | None: total_episodes=lr_info.total_episodes, fps=lr_info.fps, features=features, - tasks=[], + tasks=[ + TaskInfo(task_index=idx, description=desc) for idx, desc in sorted(loader.get_tasks().items()) + ], ) except Exception as e: logger.warning( diff --git a/data-management/viewer/backend/src/api/services/lerobot_loader.py b/data-management/viewer/backend/src/api/services/lerobot_loader.py index 628d32e9..808262f4 100644 --- a/data-management/viewer/backend/src/api/services/lerobot_loader.py +++ b/data-management/viewer/backend/src/api/services/lerobot_loader.py @@ -563,6 +563,44 @@ def get_cameras(self) -> list[str]: cameras.append(feature_name) return cameras + def get_tasks(self) -> dict[int, str]: + """Load task descriptions keyed by task_index. + + Supports v2.x (``meta/tasks.jsonl`` with ``{task_index, task}`` rows) + and v3 (``meta/tasks.parquet`` with ``task_index`` and ``task`` + columns). Returns an empty dict when no task metadata is found. + """ + tasks_jsonl = self.base_path / "meta" / "tasks.jsonl" + if tasks_jsonl.exists(): + result: dict[int, str] = {} + try: + with open(tasks_jsonl) as f: + for line in f: + line = line.strip() + if not line: + continue + row = json.loads(line) + idx = int(row.get("task_index", len(result))) + result[idx] = str(row.get("task", "")) + return result + except (json.JSONDecodeError, OSError) as e: + logger.warning("Failed to read %s: %s", tasks_jsonl, e) + + tasks_parquet = self.base_path / "meta" / "tasks.parquet" + if tasks_parquet.exists(): + try: + table = pq.read_table(tasks_parquet) + cols = table.column_names + if "task_index" in cols and "task" in cols: + return { + int(table.column("task_index")[i].as_py()): str(table.column("task")[i].as_py()) + for i in range(table.num_rows) + } + except Exception as e: + logger.warning("Failed to read %s: %s", tasks_parquet, e) + + return {} + def is_lerobot_dataset(path: str | Path) -> bool: """ diff --git a/data-management/viewer/frontend/src/components/annotation-panel/index.ts b/data-management/viewer/frontend/src/components/annotation-panel/index.ts index 841bb25a..b77d7867 100644 --- a/data-management/viewer/frontend/src/components/annotation-panel/index.ts +++ b/data-management/viewer/frontend/src/components/annotation-panel/index.ts @@ -12,6 +12,7 @@ export { FlagToggle } from './FlagToggle' export { IssueList } from './IssueList' export { LabelFilter } from './LabelFilter' export { LabelPanel } from './LabelPanel' +export { LanguageInstructionWidget } from './LanguageInstructionWidget' export { StarRating } from './StarRating' export { TaskCompletenessWidget } from './TaskCompletenessWidget' export { TrajectoryQualityWidget } from './TrajectoryQualityWidget' diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx index d7a914fe..1e356604 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceContent.tsx @@ -1,4 +1,4 @@ -import { LabelPanel } from '@/components/annotation-panel' +import { LabelPanel, LanguageInstructionWidget } from '@/components/annotation-panel' import { AnnotationWorkspaceDiagnosticsPanel } from '@/components/annotation-workspace/AnnotationWorkspaceDiagnosticsPanel' import { AnnotationWorkspaceEditToolsPanel } from '@/components/annotation-workspace/AnnotationWorkspaceEditToolsPanel' import { AnnotationWorkspacePlaybackCard } from '@/components/annotation-workspace/AnnotationWorkspacePlaybackCard' @@ -71,6 +71,7 @@ export function AnnotationWorkspaceContent({ shell }: AnnotationWorkspaceContent ) const trajectoryLabelPanel = + const trajectoryLanguageInstructionPanel = const trajectoryEditToolsPanel = (
{labelPanel} +
{languageInstructionPanel}
{editToolsPanel}
diff --git a/data-management/viewer/frontend/src/lib/api-client.ts b/data-management/viewer/frontend/src/lib/api-client.ts index 978a0653..30ca2621 100644 --- a/data-management/viewer/frontend/src/lib/api-client.ts +++ b/data-management/viewer/frontend/src/lib/api-client.ts @@ -84,6 +84,22 @@ export function transformKeys(obj: unknown): T { return obj as T } +/** + * Apply transformKeys to a dataset payload while preserving the original + * `features` map keys (camera/feature names like ``observation.images.front`` + * must not be camelCased). + */ +function preserveDatasetFeatureKeys(raw: Record): DatasetInfo { + const originalFeatures = raw.features as Record | undefined + const dataset = transformKeys(raw) + if (originalFeatures) { + dataset.features = Object.fromEntries( + Object.entries(originalFeatures).map(([key, value]) => [key, transformKeys(value)]), + ) as DatasetInfo['features'] + } + return dataset +} + /** * Custom error class for API errors. */ @@ -131,7 +147,8 @@ export async function fetchDatasets(): Promise { const response = await fetch(`${API_BASE}/datasets`, { headers: await requestHeaders(), }) - return handleResponse(response) + const raw = await handleResponse>>(response) + return raw.map(preserveDatasetFeatureKeys) } /** @@ -141,7 +158,8 @@ export async function fetchDataset(datasetId: string): Promise { const response = await fetch(`${API_BASE}/datasets/${datasetId}`, { headers: await requestHeaders(), }) - return handleResponse(response) + const raw = await handleResponse>(response) + return preserveDatasetFeatureKeys(raw) } /** diff --git a/data-management/viewer/frontend/src/stores/annotation-store.ts b/data-management/viewer/frontend/src/stores/annotation-store.ts index fec85c84..2c3d9bee 100644 --- a/data-management/viewer/frontend/src/stores/annotation-store.ts +++ b/data-management/viewer/frontend/src/stores/annotation-store.ts @@ -10,6 +10,7 @@ import type { Anomaly, DataQualityAnnotation, EpisodeAnnotation, + LanguageInstructionAnnotation, TaskCompletenessAnnotation, TrajectoryQualityAnnotation, } from '@/types' @@ -52,6 +53,10 @@ interface AnnotationActions { toggleAnomalyVerified: (id: string) => void /** Update notes */ updateNotes: (notes: string) => void + /** Update language instruction annotation */ + updateLanguageInstruction: (update: Partial) => void + /** Clear language instruction */ + clearLanguageInstruction: () => void /** Set saving state */ setSaving: (isSaving: boolean) => void /** Set error state */ @@ -319,6 +324,53 @@ export const useAnnotationStore = create()( ) }, + updateLanguageInstruction: (update) => { + const { currentAnnotation } = get() + if (!currentAnnotation) return + + const existing = currentAnnotation.languageInstruction ?? { + instruction: '', + source: 'human' as const, + language: 'en', + paraphrases: [], + subtaskInstructions: [], + } + + set( + { + currentAnnotation: { + ...currentAnnotation, + timestamp: new Date().toISOString(), + languageInstruction: { + ...existing, + ...update, + }, + }, + isDirty: true, + }, + false, + 'updateLanguageInstruction', + ) + }, + + clearLanguageInstruction: () => { + const { currentAnnotation } = get() + if (!currentAnnotation) return + + const { languageInstruction: _, ...rest } = currentAnnotation + set( + { + currentAnnotation: { + ...rest, + timestamp: new Date().toISOString(), + }, + isDirty: true, + }, + false, + 'clearLanguageInstruction', + ) + }, + setSaving: (isSaving) => { set({ isSaving }, false, 'setSaving') }, diff --git a/data-management/viewer/frontend/src/types/annotations.ts b/data-management/viewer/frontend/src/types/annotations.ts index 850c34df..39e768c1 100644 --- a/data-management/viewer/frontend/src/types/annotations.ts +++ b/data-management/viewer/frontend/src/types/annotations.ts @@ -154,6 +154,27 @@ export interface AnomalyAnnotation { anomalies: Anomaly[] } +// ============================================================================ +// Language Instruction Types (VLA) +// ============================================================================ + +/** Source of the language instruction */ +export type InstructionSource = 'human' | 'template' | 'llm-generated' | 'retroactive' + +/** Language instruction annotation for VLA-conditioned training */ +export interface LanguageInstructionAnnotation { + /** Primary task instruction */ + instruction: string + /** How this instruction was produced */ + source: InstructionSource + /** ISO 639-1 language code */ + language: string + /** Alternative phrasings for data augmentation */ + paraphrases: string[] + /** Ordered subtask decomposition */ + subtaskInstructions: string[] +} + // ============================================================================ // Combined Episode Annotation Types // ============================================================================ @@ -172,6 +193,8 @@ export interface EpisodeAnnotation { dataQuality: DataQualityAnnotation /** Anomaly annotations */ anomalies: AnomalyAnnotation + /** Language instruction for VLA training */ + languageInstruction?: LanguageInstructionAnnotation /** Free-form notes about the episode */ notes?: string } @@ -243,6 +266,17 @@ export function createDefaultAnomalyAnnotation(): AnomalyAnnotation { } } +/** Create a default language instruction annotation */ +export function createDefaultLanguageInstruction(): LanguageInstructionAnnotation { + return { + instruction: '', + source: 'human', + language: 'en', + paraphrases: [], + subtaskInstructions: [], + } +} + /** Create a complete default episode annotation */ export function createDefaultEpisodeAnnotation(annotatorId: string): EpisodeAnnotation { return { From 8056c0d57dbfd7113a81123037acf72af61c9c19 Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 06:22:47 +0000 Subject: [PATCH 07/15] refactor(data): compact trajectory tab layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - move trajectory plot and subtask timeline above the subtasks pane - shrink trajectory plot to a fixed 180px height - collapse the labels panel back to a single full-height column 🎨 - Generated by Copilot --- .../dataset_service/lerobot_handler.py | 4 +- .../AnnotationWorkspaceTrajectoryTab.tsx | 94 +++++++++---------- 2 files changed, 44 insertions(+), 54 deletions(-) diff --git a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py index b66092f2..52a7fda6 100644 --- a/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py +++ b/data-management/viewer/backend/src/api/services/dataset_service/lerobot_handler.py @@ -112,9 +112,7 @@ def discover(self, dataset_id: str, dataset_path: Path) -> DatasetInfo | None: total_episodes=lr_info.total_episodes, fps=lr_info.fps, features=features, - tasks=[ - TaskInfo(task_index=idx, description=desc) for idx, desc in sorted(loader.get_tasks().items()) - ], + tasks=[TaskInfo(task_index=idx, description=desc) for idx, desc in sorted(loader.get_tasks().items())], ) except Exception as e: logger.warning( diff --git a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceTrajectoryTab.tsx b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceTrajectoryTab.tsx index 7e4da851..07097914 100644 --- a/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceTrajectoryTab.tsx +++ b/data-management/viewer/frontend/src/components/annotation-workspace/AnnotationWorkspaceTrajectoryTab.tsx @@ -45,20 +45,59 @@ export function AnnotationWorkspaceTrajectoryTab({
-
+
{playbackCard} + + + {(selectedRange || selectedSubtaskId) && ( +
+ +
+ )} + +
+
+

Subtask Timeline

+ +
+ onSubtaskSelectionChange(segment.id)} + /> +
+
+
{subtaskListCard}
@@ -68,53 +107,6 @@ export function AnnotationWorkspaceTrajectoryTab({
- - - {(selectedRange || selectedSubtaskId) && ( -
- -
- )} - -
-
-
-

Subtask Timeline

-

- Compare subtask ranges directly against trajectory changes on the same frame - timeline. -

-
-
-
- -
- onSubtaskSelectionChange(segment.id)} - /> -
-
-
) From 73a4369eaf9c32432da041eb33b50fafb45a635f Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 06:29:39 +0000 Subject: [PATCH 08/15] fix(data): estimate joint velocities from positions when missing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - compute finite-difference velocities in build_trajectory when not provided - guards against zero or non-monotonic timestamps with a small dt floor πŸ“ˆ - Generated by Copilot --- .../src/api/services/dataset_service/base.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/data-management/viewer/backend/src/api/services/dataset_service/base.py b/data-management/viewer/backend/src/api/services/dataset_service/base.py index 2ff26cfd..84b50ca0 100644 --- a/data-management/viewer/backend/src/api/services/dataset_service/base.py +++ b/data-management/viewer/backend/src/api/services/dataset_service/base.py @@ -33,9 +33,24 @@ def build_trajectory( Convert raw numpy arrays into a list of TrajectoryPoint models. Works for both LeRobot and HDF5 data by accepting optional arrays - with sensible defaults (zeros) for missing fields. + with sensible defaults (zeros) for missing fields. When ``joint_velocities`` + is not provided, it is estimated via finite differences of + ``joint_positions`` over ``timestamps`` so the velocity view in the UI is + populated for datasets that only record positions. """ num_joints = joint_positions.shape[1] if joint_positions.ndim > 1 else 6 + + if joint_velocities is None and joint_positions.ndim == 2 and length > 1: + # Backwards/forwards differences along the time axis. Guard against + # zero or non-monotonic timestamps by clamping dt to a small positive + # value. + dt = np.diff(timestamps) + dt = np.where(dt > 1e-6, dt, 1e-6) + diffs = np.diff(joint_positions, axis=0) / dt[:, None] + joint_velocities = np.empty_like(joint_positions) + joint_velocities[:-1] = diffs + joint_velocities[-1] = diffs[-1] + points: list[TrajectoryPoint] = [] for i in range(length): From 31b5d9c431aaa852398fe3faa0a872455eadec9e Mon Sep 17 00:00:00 2001 From: Ali Zaidi Date: Wed, 29 Apr 2026 06:39:36 +0000 Subject: [PATCH 09/15] fix(data): add missing LanguageInstructionWidget and update trajectory layout tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Track the LanguageInstructionWidget component that was referenced from the annotation-panel barrel but never committed - Mock LanguageInstructionWidget in the workspace test support module - Update trajectory tab layout tests to match the compact single-column labels panel and graph-inside-playback-group structure πŸ€– - Generated by Copilot --- .../AnnotationWorkspacePlayback.test.tsx | 16 +- .../annotationWorkspaceTestSupport.tsx | 1 + .../LanguageInstructionWidget.tsx | 323 ++++++++++++++++++ 3 files changed, 333 insertions(+), 7 deletions(-) create mode 100644 data-management/viewer/frontend/src/components/annotation-panel/LanguageInstructionWidget.tsx diff --git a/data-management/viewer/frontend/src/components/__tests__/AnnotationWorkspacePlayback.test.tsx b/data-management/viewer/frontend/src/components/__tests__/AnnotationWorkspacePlayback.test.tsx index 8d31c9b4..77b85c95 100644 --- a/data-management/viewer/frontend/src/components/__tests__/AnnotationWorkspacePlayback.test.tsx +++ b/data-management/viewer/frontend/src/components/__tests__/AnnotationWorkspacePlayback.test.tsx @@ -102,7 +102,6 @@ describe('AnnotationWorkspace playback and trajectory tab flows', () => { const labelsPanel = screen.getByTestId('trajectory-labels-panel') expect(trajectoryLayout.className).toContain('lg:grid-cols-3') - expect(labelsPanel.className).toContain('lg:row-span-2') expect(labelsPanel).not.toContainElement(screen.getByTestId('trajectory-compact-media-frame')) expect(labelsPanel).not.toContainElement(screen.getByText('Subtask List')) }) @@ -158,11 +157,12 @@ describe('AnnotationWorkspace playback and trajectory tab flows', () => { ctrlKey: false, }) - expect( - screen - .getByTestId('trajectory-playback-group-panel') - .compareDocumentPosition(screen.getByTestId('trajectory-graph-panel')), - ).toBe(Node.DOCUMENT_POSITION_FOLLOWING) + const playbackGroupPanel = screen.getByTestId('trajectory-playback-group-panel') + const graphPanel = screen.getByTestId('trajectory-graph-panel') + + // Graph now lives inside the playback group panel (compact layout) and + // appears after the playback controls within that container. + expect(playbackGroupPanel).toContainElement(graphPanel) }) it('keeps the outer trajectory tab panel free of its own vertical scrollbar', () => { @@ -187,8 +187,10 @@ describe('AnnotationWorkspace playback and trajectory tab flows', () => { }) const graphPanel = screen.getByTestId('trajectory-graph-panel') + const playbackGroupPanel = screen.getByTestId('trajectory-playback-group-panel') - expect(graphPanel.className).toContain('overflow-y-auto') + // Graph panel is contained in the scrollable playback group panel. + expect(playbackGroupPanel.className).toContain('overflow-y-auto') expect(graphPanel).toContainElement(screen.getByText('Trajectory Plot')) }) diff --git a/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx b/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx index 34850655..97928c8c 100644 --- a/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx +++ b/data-management/viewer/frontend/src/components/__tests__/support/annotationWorkspaceTestSupport.tsx @@ -112,6 +112,7 @@ vi.mock('@/components/annotation-panel', () => ({
), + LanguageInstructionWidget: () =>
Language Instructions
, })) vi.mock('@/components/episode-viewer', () => ({ diff --git a/data-management/viewer/frontend/src/components/annotation-panel/LanguageInstructionWidget.tsx b/data-management/viewer/frontend/src/components/annotation-panel/LanguageInstructionWidget.tsx new file mode 100644 index 00000000..145046a8 --- /dev/null +++ b/data-management/viewer/frontend/src/components/annotation-panel/LanguageInstructionWidget.tsx @@ -0,0 +1,323 @@ +/** + * Language instruction annotation widget for VLA training. + * + * Captures natural language task descriptions, paraphrases for data + * augmentation, and subtask decompositions for hierarchical policy + * conditioning. + */ + +import { Plus, Trash2 } from 'lucide-react' +import { useCallback, useMemo, useState } from 'react' + +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { Textarea } from '@/components/ui/textarea' +import { useEpisodeAnnotations } from '@/hooks/use-annotations' +import { cn } from '@/lib/utils' +import { useAnnotationStore } from '@/stores' +import { useDatasetStore } from '@/stores/dataset-store' +import { useEpisodeStore } from '@/stores/episode-store' +import type { InstructionSource } from '@/types' + +import { FormSection } from './FormSection' + +const SOURCE_OPTIONS: { value: InstructionSource; label: string }[] = [ + { value: 'human', label: 'Human' }, + { value: 'template', label: 'Template' }, + { value: 'llm-generated', label: 'LLM Generated' }, + { value: 'retroactive', label: 'Retroactive' }, +] + +export function LanguageInstructionWidget() { + useEpisodeAnnotations('default') + + const currentAnnotation = useAnnotationStore((state) => state.currentAnnotation) + const updateLanguageInstruction = useAnnotationStore((state) => state.updateLanguageInstruction) + const clearLanguageInstruction = useAnnotationStore((state) => state.clearLanguageInstruction) + + const currentDataset = useDatasetStore((state) => state.currentDataset) + const currentEpisode = useEpisodeStore((state) => state.currentEpisode) + + const datasetTaskDescription = useMemo(() => { + const taskIndex = currentEpisode?.meta.taskIndex + if (taskIndex == null || !currentDataset?.tasks.length) return undefined + return currentDataset.tasks.find((t) => t.taskIndex === taskIndex)?.description + }, [currentDataset?.tasks, currentEpisode?.meta.taskIndex]) + + const langInst = currentAnnotation?.languageInstruction + + const [paraphraseInput, setParaphraseInput] = useState('') + const [subtaskInput, setSubtaskInput] = useState('') + + const handleAddParaphrase = useCallback(() => { + const trimmed = paraphraseInput.trim() + if (!trimmed || !langInst) return + updateLanguageInstruction({ + paraphrases: [...langInst.paraphrases, trimmed], + }) + setParaphraseInput('') + }, [paraphraseInput, langInst, updateLanguageInstruction]) + + const handleRemoveParaphrase = useCallback( + (index: number) => { + if (!langInst) return + updateLanguageInstruction({ + paraphrases: langInst.paraphrases.filter((_, i) => i !== index), + }) + }, + [langInst, updateLanguageInstruction], + ) + + const handleAddSubtask = useCallback(() => { + const trimmed = subtaskInput.trim() + if (!trimmed || !langInst) return + updateLanguageInstruction({ + subtaskInstructions: [...langInst.subtaskInstructions, trimmed], + }) + setSubtaskInput('') + }, [subtaskInput, langInst, updateLanguageInstruction]) + + const handleRemoveSubtask = useCallback( + (index: number) => { + if (!langInst) return + updateLanguageInstruction({ + subtaskInstructions: langInst.subtaskInstructions.filter((_, i) => i !== index), + }) + }, + [langInst, updateLanguageInstruction], + ) + + if (!currentAnnotation) { + return ( + + + Language Instruction + + +

No episode selected

+
+
+ ) + } + + if (!langInst) { + return ( + + + Language Instruction + + + {datasetTaskDescription ? ( + <> +

+ Dataset task description: +

+

{datasetTaskDescription}

+ + + ) : ( + <> +

+ Add a natural language task description for VLA training. +

+ + + )} +
+
+ ) + } + + const hasInstruction = langInst.instruction.trim().length > 0 + + return ( + + + + Language Instruction + {hasInstruction && ( + + {langInst.source} + + )} + + + + +