Skip to content

Commit eae4b6f

Browse files
committed
feat: enhance dynamo_check.py with color constants, disk space, and terse mode
- Replace direct escape sequences with Colors constants for better maintainability - Add disk space checking in thorough mode with warning for <10% free space - Add --terse mode showing only essential info (OS, User, GPU, Framework, Dynamo) and errors - Integrate disk space info inline with directory entries instead of separate section Signed-off-by: Keiven Chang <[email protected]>
1 parent e0a00b7 commit eae4b6f

File tree

1 file changed

+133
-29
lines changed

1 file changed

+133
-29
lines changed

deploy/dynamo_check.py

Lines changed: 133 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
- Development tools (Cargo/Rust, Maturin, Python)
1313
- LLM frameworks (vllm, sglang, tensorrt_llm)
1414
- Dynamo runtime and framework components
15-
- File permissions (directory-level by default, detailed with --thorough-check)
15+
- File system (permissions and disk space, detailed with --thorough-check)
1616
- Installation status and component availability
1717
1818
The output uses status indicators:
@@ -22,8 +22,8 @@
2222
- ❓ Component not found (for optional items)
2323
2424
By default, the tool runs quickly by checking only directory permissions and skipping
25-
size calculations. Use --thorough-check for detailed file-level permission analysis
26-
and directory size information.
25+
size calculations. Use --thorough-check for detailed file-level permission analysis,
26+
directory size information, and disk space checking.
2727
2828
Exit codes:
2929
- 0: All critical components are present
@@ -35,7 +35,7 @@
3535
├─ OS Ubuntu 24.04.1 LTS (Noble Numbat) (Linux 6.11.0-28-generic x86_64), Memory=26.7/125.5 GiB, Cores=32
3636
├─ User info: user=ubuntu, uid=1000, gid=1000
3737
├─ ✅ NVIDIA GPU NVIDIA RTX 6000 Ada Generation, driver 570.133.07, CUDA 12.8, Power=26.14/300.00 W, Memory=289/49140 MiB
38-
├─ File Permissions
38+
├─ File System
3939
│ ├─ ✅ Dynamo workspace ($HOME/dynamo) writable
4040
│ ├─ ✅ Dynamo .git directory writable
4141
│ ├─ ✅ Rustup home ($HOME/.rustup) writable
@@ -96,6 +96,14 @@
9696
from typing import Any, Dict, List, Optional, Tuple
9797

9898

99+
# ANSI color constants
100+
class Colors:
101+
"""ANSI color escape sequences for terminal output."""
102+
103+
RESET = "\033[0m"
104+
BRIGHT_RED = "\033[38;5;196m"
105+
106+
99107
class NodeStatus(Enum):
100108
"""Status of a tree node"""
101109

@@ -270,8 +278,14 @@ def _format_timestamp_pdt(self, timestamp: float) -> str:
270278
class SystemInfo(NodeInfo):
271279
"""Root node for system information"""
272280

273-
def __init__(self, hostname: Optional[str] = None, thorough_check: bool = False):
281+
def __init__(
282+
self,
283+
hostname: Optional[str] = None,
284+
thorough_check: bool = False,
285+
terse: bool = False,
286+
):
274287
self.thorough_check = thorough_check
288+
self.terse = terse
275289
if hostname is None:
276290
hostname = platform.node()
277291

@@ -290,35 +304,37 @@ def __init__(self, hostname: Optional[str] = None, thorough_check: bool = False)
290304
self._suppress_planner_warnings()
291305

292306
# Collect and add all system information
293-
# Add OS info
307+
# Always show: OS, User, GPU, Framework, Dynamo
294308
self.add_child(OSInfo())
295-
296-
# Add user info
297309
self.add_child(UserInfo())
298310

299-
# Add GPU info
311+
# Add GPU info (always show, even if not found)
300312
gpu_info = GPUInfo()
301-
# Always add GPU info so we can see errors like "nvidia-smi not found"
302313
self.add_child(gpu_info)
303314

304-
# Add file permissions check
305-
self.add_child(FilePermissionsInfo(thorough_check=self.thorough_check))
306-
307-
# Add Cargo (always show, even if not found)
308-
self.add_child(CargoInfo(thorough_check=self.thorough_check))
309-
310-
# Add Maturin (Python-Rust build tool)
311-
self.add_child(MaturinInfo())
312-
313-
# Add Python info
314-
self.add_child(PythonInfo())
315-
316315
# Add Framework info (vllm, sglang, tensorrt_llm)
317316
self.add_child(FrameworkInfo())
318317

319318
# Add Dynamo workspace info (always show, even if not found)
320319
self.add_child(DynamoInfo(thorough_check=self.thorough_check))
321320

321+
# In terse mode, only add other components if they have errors
322+
if not self.terse:
323+
# Add file permissions check
324+
self.add_child(FilePermissionsInfo(thorough_check=self.thorough_check))
325+
326+
# Add Cargo (always show, even if not found)
327+
self.add_child(CargoInfo(thorough_check=self.thorough_check))
328+
329+
# Add Maturin (Python-Rust build tool)
330+
self.add_child(MaturinInfo())
331+
332+
# Add Python info
333+
self.add_child(PythonInfo())
334+
else:
335+
# In terse mode, only add components that have errors
336+
self._add_error_only_components()
337+
322338
def _get_ip_address(self) -> Optional[str]:
323339
"""Get the primary IP address of the system."""
324340
try:
@@ -352,6 +368,21 @@ def _suppress_planner_warnings(self) -> None:
352368
defaults_logger = logging.getLogger("defaults._get_default_prometheus_endpoint")
353369
defaults_logger.setLevel(logging.ERROR)
354370

371+
def _add_error_only_components(self) -> None:
372+
"""In terse mode, only add components that have errors"""
373+
# Create components and check their status
374+
components_to_check = [
375+
("File System", FilePermissionsInfo(thorough_check=self.thorough_check)),
376+
("Cargo", CargoInfo(thorough_check=self.thorough_check)),
377+
("Maturin", MaturinInfo()),
378+
("Python", PythonInfo()),
379+
]
380+
381+
for name, component in components_to_check:
382+
# Only add if the component has an error status
383+
if component.status == NodeStatus.ERROR:
384+
self.add_child(component)
385+
355386

356387
class UserInfo(NodeInfo):
357388
"""User information"""
@@ -379,7 +410,13 @@ def __init__(self):
379410
gid = os.getgid()
380411

381412
desc = f"user={username}, uid={uid}, gid={gid}"
382-
super().__init__(label="User info", desc=desc, status=NodeStatus.INFO)
413+
414+
# Add warning if running as root
415+
status = NodeStatus.WARNING if uid == 0 else NodeStatus.INFO
416+
if uid == 0:
417+
desc += " ⚠️"
418+
419+
super().__init__(label="User info", desc=desc, status=status)
383420

384421

385422
class OSInfo(NodeInfo):
@@ -663,19 +700,22 @@ def _get_power_memory_string(
663700

664701

665702
class FilePermissionsInfo(NodeInfo):
666-
"""File permissions check for development environment directories
703+
"""File system check for development environment directories
667704
668705
Checks writability of critical directories needed for:
669706
- Dynamo development (top-level dynamo directory)
670707
- Rust development (Cargo target directory + all files, RUSTUP_HOME, CARGO_HOME)
671708
- Python development (site-packages)
672709
710+
In thorough mode, also checks disk space for the dynamo working directory
711+
and shows a warning if less than 10% free space is available.
712+
673713
In fast mode, skips recursive file checking in Cargo target directory
674714
for improved performance on large target directories.
675715
"""
676716

677717
def __init__(self, thorough_check: bool = False):
678-
super().__init__(label="File Permissions", status=NodeStatus.INFO)
718+
super().__init__(label="File System", status=NodeStatus.INFO)
679719
self.thorough_check = thorough_check
680720

681721
# Check top-level dynamo directory
@@ -780,11 +820,20 @@ def _check_permissions_unified(
780820
except Exception:
781821
desc_text = "writable (owned by unknown)"
782822

823+
# Add disk space info in thorough mode
824+
status = NodeStatus.OK # Default status
825+
if self.thorough_check:
826+
disk_space, disk_warning = self._format_disk_space(selected_path)
827+
desc_text += disk_space
828+
# Override status if disk space is low
829+
if disk_warning:
830+
status = disk_warning
831+
783832
results.append(
784833
NodeInfo(
785834
label=f"{label_prefix} ({self._replace_home_with_var(selected_path)}){warning_symbol}",
786835
desc=desc_text,
787-
status=NodeStatus.OK,
836+
status=status,
788837
)
789838
)
790839
else:
@@ -834,6 +883,14 @@ def _check_permissions_unified(
834883
"writable", "writable (owned by unknown)"
835884
)
836885

886+
# Add disk space info in thorough mode
887+
if self.thorough_check:
888+
disk_space, disk_warning = self._format_disk_space(selected_path)
889+
desc += disk_space
890+
# Override status if disk space is low
891+
if disk_warning:
892+
status = disk_warning
893+
837894
results.append(
838895
NodeInfo(
839896
label=f"{label_prefix} ({self._replace_home_with_var(selected_path)}){warning_symbol}",
@@ -1127,6 +1184,44 @@ def _check_rust_toolchain_permissions(self):
11271184
for result in cargo_results:
11281185
self.add_child(result)
11291186

1187+
def _format_disk_space(self, path: str) -> Tuple[str, Optional[NodeStatus]]:
1188+
"""Format disk space information for a given path
1189+
1190+
Returns:
1191+
Tuple of (formatted_string, warning_status_if_low_space)
1192+
"""
1193+
try:
1194+
# Get disk usage statistics
1195+
statvfs = os.statvfs(path)
1196+
1197+
# Calculate sizes in bytes
1198+
total_bytes = statvfs.f_frsize * statvfs.f_blocks
1199+
free_bytes = statvfs.f_frsize * statvfs.f_bavail
1200+
used_bytes = total_bytes - free_bytes
1201+
1202+
# Convert to human readable format
1203+
def format_bytes(bytes_val):
1204+
"""Convert bytes to human readable format"""
1205+
for unit in ["B", "KB", "MB", "GB", "TB"]:
1206+
if bytes_val < 1024.0:
1207+
return f"{bytes_val:.1f} {unit}"
1208+
bytes_val /= 1024.0
1209+
return f"{bytes_val:.1f} PB"
1210+
1211+
# Calculate percentage used
1212+
percent_used = (used_bytes / total_bytes) * 100
1213+
percent_free = 100 - percent_used
1214+
1215+
formatted_string = f", {format_bytes(used_bytes)}/{format_bytes(total_bytes)} ({percent_used:.1f}% used)"
1216+
1217+
# Return warning status if less than 10% free space
1218+
warning_status = NodeStatus.WARNING if percent_free < 10 else None
1219+
1220+
return formatted_string, warning_status
1221+
1222+
except Exception:
1223+
return "", None
1224+
11301225

11311226
class CargoInfo(NodeInfo):
11321227
"""Cargo tool information"""
@@ -1660,7 +1755,7 @@ def __init__(self, pythonpath: str):
16601755
# Check if path exists and is accessible
16611756
if not os.path.exists(p) or not os.access(p, os.R_OK):
16621757
display_paths.append(
1663-
f"\033[38;5;196m{display_path}\033[0m"
1758+
f"{Colors.BRIGHT_RED}{display_path}{Colors.RESET}"
16641759
) # Bright red path
16651760
has_invalid_paths = True
16661761
else:
@@ -2236,12 +2331,21 @@ def main():
22362331
parser.add_argument(
22372332
"--thorough-check",
22382333
action="store_true",
2239-
help="Enable thorough checking (file permissions, directory sizes, etc.)",
2334+
help="Enable thorough checking (file permissions, directory sizes, disk space, etc.)",
2335+
)
2336+
parser.add_argument(
2337+
"--terse",
2338+
action="store_true",
2339+
help="Show only essential information (OS, User, GPU, Framework, Dynamo) and errors",
22402340
)
22412341
args = parser.parse_args()
22422342

2343+
# Validate mutual exclusion
2344+
if args.thorough_check and args.terse:
2345+
parser.error("--thorough-check and --terse cannot be used together")
2346+
22432347
# Simply create a SystemInfo instance - it collects everything in its constructor
2244-
tree = SystemInfo(thorough_check=args.thorough_check)
2348+
tree = SystemInfo(thorough_check=args.thorough_check, terse=args.terse)
22452349
tree.print_tree()
22462350

22472351
# Check if there are framework component errors and show PYTHONPATH recommendation

0 commit comments

Comments
 (0)