diff --git a/docs/design-docs/logger.md b/docs/design-docs/logger.md index 1c45529540..86f2db6258 100644 --- a/docs/design-docs/logger.md +++ b/docs/design-docs/logger.md @@ -1,6 +1,6 @@ # Logger -The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB, Tensorboard, and MLflow. +The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB, Tensorboard, MLflow and Swanlab. ## Requirements @@ -10,12 +10,13 @@ The logger is designed to track key training metrics (including distributed metr * WandB * Tensorboard * MLflow + * Swanlab ## Overall Design Since there is a single controller, the single process running the main training loop will gather the metrics and do the logging. -To handle multiple logger backends, we will have a {py:class}`LoggerInterface ` interface that the {py:class}`TensorboardLogger `, {py:class}`WandbLogger `, and {py:class}`MLflowLogger ` will implement: +To handle multiple logger backends, we will have a {py:class}`LoggerInterface ` interface that the {py:class}`TensorboardLogger `, {py:class}`WandbLogger `, {py:class}`MLflowLogger ` and {py:class}`SwanlabLogger ` will implement: ```python class LoggerInterface(ABC): @@ -35,7 +36,7 @@ class LoggerInterface(ABC): A {py:class}`Logger ` wrapper class will also implement {py:class}`LoggerInterface ` and maintain a list of loggers to which it delegates writing logs. This will be the main class the user uses in the training loop. Usage example: ```python -# Initialize logger with wandb, tensorboard, and mlflow enabled +# Initialize logger with wandb, tensorboard, mlflow and swanlab enabled logging_config = { "wandb_enabled": True, "tensorboard_enabled": False, @@ -45,6 +46,10 @@ logging_config = { "project": "grpo-dev", "name": "grpo-dev-logging", }, + "swanlab": { + "project": "nemo-rl", + "name": "grpo-dev-logging", + }, "tensorboard": { "log_dir": "logs", }, @@ -74,6 +79,13 @@ The logger supports three main logging backends: - Includes built-in hyperparameter logging - Offers rich visualization and collaboration features +### Swanlab +- Training visualization (Android, iOS, Wechat public account and Web) +- Automatic logging +- Hyperparameter recording +- Experiment comparison +- Multi-user collaboration + ### Tensorboard - Local file-based logging - Standard TensorBoard visualization @@ -121,6 +133,7 @@ The logger supports pretty-formatted logging of validation samples to help visua ```python logger: wandb_enabled: false + swanlab_enabled: false tensorboard_enabled: false mlflow_enabled: false num_val_samples_to_print: 10 @@ -140,7 +153,7 @@ When enabled, the pretty logging will generate formatted text similar to: ## GPU Metric Logging -NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard, WandB, and/or MLflow. +NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard, WandB, MLflow and/or SwanLab. This approach allows us to offer the same GPU metric tracking on all loggers and simplifies the implementation greatly. @@ -149,6 +162,7 @@ This feature is enabled with the `monitor_gpus` configuration parameter. The fre ```python logger: wandb_enabled: false + swanlab_enabled: false tensorboard_enabled: false mlflow_enabled: false monitor_gpus: true @@ -162,8 +176,8 @@ While it is feasible to monitor using remote workers, the implementation require * Logs sent back to the driver do not introduce significant overhead. * Metrics remain clear and interpretable, avoiding issues like double counting caused by colocated workers. * Workers can gracefully flush their logs in case of failure. -* Logging behaves consistently across TensorBoard, WandB, and MLflow. +* Logging behaves consistently across TensorBoard, WandB, MLflow and Swanlab. * Workers that spawn other workers accurately report the total resource usage of any grandchild workers. Due to these complexities, we opted for a simpler approach: collecting metrics exposed by the Ray metrics server from the driver. -::: \ No newline at end of file +::: diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml index 4524338e4f..84e24e49bf 100755 --- a/examples/configs/dpo.yaml +++ b/examples/configs/dpo.yaml @@ -155,6 +155,7 @@ logger: wandb_enabled: false # Make sure you do a ``wandb login [Your API key]'' before running tensorboard_enabled: false mlflow_enabled: false # Disable MLflow logging + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true # If true, will monitor GPU usage and log to wandb and/or tensorboard num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/grpo-deepscaler-1.5b-8K.yaml index 576494b9b9..fdf87ec2d2 100644 --- a/examples/configs/grpo-deepscaler-1.5b-8K.yaml +++ b/examples/configs/grpo-deepscaler-1.5b-8K.yaml @@ -129,6 +129,7 @@ logger: wandb_enabled: false tensorboard_enabled: false mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: project: "grpo-dev" diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index b9be32bdda..88ec997350 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -138,6 +138,7 @@ logger: wandb_enabled: false tensorboard_enabled: false mlflow_enabled: false # Disable MLflow logging + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: project: "grpo-dev" diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml index cf6ba44d75..b9f9739a75 100644 --- a/examples/configs/grpo_math_1B_megatron.yaml +++ b/examples/configs/grpo_math_1B_megatron.yaml @@ -157,6 +157,7 @@ logger: wandb_enabled: false tensorboard_enabled: false mlflow_enabled: false # Disable MLflow logging + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: project: "grpo-dev" diff --git a/examples/configs/grpo_sliding_puzzle.yaml b/examples/configs/grpo_sliding_puzzle.yaml index aeb6b48da4..754be91664 100644 --- a/examples/configs/grpo_sliding_puzzle.yaml +++ b/examples/configs/grpo_sliding_puzzle.yaml @@ -53,6 +53,7 @@ logger: wandb_enabled: false tensorboard_enabled: false mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true # If true, will monitor GPU usage and log to wandb and/or tensorboard wandb: project: "grpo-dev" diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml index e7eaef706a..e69ac2ca86 100644 --- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml +++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml @@ -79,6 +79,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml index 4906550001..b7acfbcfbd 100644 --- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml +++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml @@ -79,6 +79,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml index 789f4fcbdf..7b4c22c5c2 100644 --- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml +++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml @@ -112,6 +112,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml index 7d480f58a3..01f36d72c4 100644 --- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml +++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml @@ -112,6 +112,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml index 8863fad45f..3221626692 100644 --- a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml +++ b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml @@ -78,6 +78,7 @@ logger: log_dir: "logs" wandb_enabled: true tensorboard_enabled: true + swanlab_enabled: false # Disable SwanLab logging mlflow_enabled: false monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml index cd63c3ba79..54b50cd9cd 100644 --- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml +++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml @@ -108,6 +108,7 @@ logger: num_val_samples_to_print: 0 wandb_enabled: true tensorboard_enabled: true + swanlab_enabled: false # Disable SwanLab logging mlflow_enabled: false monitor_gpus: true wandb: diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml index bdd0361cf8..022f613c76 100644 --- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml +++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml index ae1eee46c8..a9dda593fd 100644 --- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml +++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml index 4571128a9a..f29a560579 100644 --- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml +++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml index 33ecfae6a4..0a3bba5bd1 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml index ea862ee9d3..2309a2a414 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml index 0961b8f2c7..cd602a280b 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml index 978832bad0..1e02ff36a6 100644 --- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml +++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml @@ -110,6 +110,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true wandb: project: nemo-rl diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml index 19fbd99562..2aee79d498 100644 --- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml +++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml @@ -59,6 +59,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml index 8f37dc440e..3e2a69f5a4 100644 --- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml +++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml @@ -59,6 +59,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml index ec72f8f454..57d3736a13 100644 --- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml +++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml @@ -103,6 +103,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml index 685990ab98..b20b901c3a 100644 --- a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml +++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml @@ -59,6 +59,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml index 2ebadb9670..9e27c789ca 100644 --- a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml +++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml @@ -59,6 +59,7 @@ logger: wandb_enabled: true tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index a592321cfe..cc065e6f72 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -138,6 +138,7 @@ logger: wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: true # If true, will monitor GPU usage and log to wandb and/or tensorboard num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/examples/configs/sft_openmathinstruct2.yaml b/examples/configs/sft_openmathinstruct2.yaml index 1f1b88a8a9..c1ddd4aa4a 100644 --- a/examples/configs/sft_openmathinstruct2.yaml +++ b/examples/configs/sft_openmathinstruct2.yaml @@ -73,6 +73,7 @@ logger: wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running tensorboard_enabled: true mlflow_enabled: false + swanlab_enabled: false # Disable SwanLab logging monitor_gpus: false # If true, will monitor GPU usage and log to wandb and/or tensorboard num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal wandb: diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py index 4cf2621cd4..46b63d9770 100644 --- a/nemo_rl/utils/logger.py +++ b/nemo_rl/utils/logger.py @@ -28,6 +28,7 @@ import mlflow import ray import requests +import swanlab import torch import wandb from matplotlib import pyplot as plt @@ -51,6 +52,11 @@ class WandbConfig(TypedDict): name: NotRequired[str] +class SwanlabConfig(TypedDict): + project: NotRequired[str] + name: NotRequired[str] + + class TensorboardConfig(TypedDict): log_dir: NotRequired[str] @@ -69,9 +75,11 @@ class GPUMonitoringConfig(TypedDict): class LoggerConfig(TypedDict): log_dir: str wandb_enabled: bool + swanlab_enabled: bool tensorboard_enabled: bool mlflow_enabled: bool wandb: WandbConfig + swanlab: NotRequired[SwanlabConfig] tensorboard: TensorboardConfig mlflow: NotRequired[MLflowConfig] monitor_gpus: bool @@ -324,6 +332,75 @@ def log_plot(self, figure: plt.Figure, step: int, name: str) -> None: self.run.log({name: figure}, step=step) +class SwanlabLogger(LoggerInterface): + """Weights & Biases logger backend.""" + + def __init__(self, cfg: SwanlabConfig, log_dir: Optional[str] = None): + self.run = swanlab.init(**cfg, logdir=log_dir) + print( + f"Initialized SwanlabLogger for project {cfg.get('project')}, run {cfg.get('name')} (with offline logdir={log_dir})" + ) + + def define_metric( + self, + name: str, + step_metric: Optional[str] = None, + ) -> None: + """Define a metric with custom step metric. + + Args: + name: Name of the metric or pattern (e.g. 'ray/*') + step_metric: Optional name of the step metric to use + """ + self.run.define_metric(name, step_metric=step_metric) + + def log_metrics( + self, + metrics: dict[str, Any], + step: int, + prefix: Optional[str] = "", + step_metric: Optional[str] = None, + ) -> None: + """Log metrics to wandb. + + Args: + metrics: Dict of metrics to log + step: Global step value + prefix: Optional prefix for metric names + step_metric: Optional name of a field in metrics to use as step instead + of the provided step value + """ + if prefix: + metrics = { + f"{prefix}/{k}" if k != step_metric else k: v + for k, v in metrics.items() + } + + # If step_metric is provided, use the corresponding value from metrics as step + if step_metric and step_metric in metrics: + # commit=False so the step does not get incremented + self.run.log(metrics, commit=False) + else: + self.run.log(metrics, step=step) + + def log_hyperparams(self, params: Mapping[str, Any]) -> None: + """Log hyperparameters to swanlab. + + Args: + params: Dict of hyperparameters to log + """ + self.run.config.update(params) + + def log_plot(self, figure: plt.Figure, step: int, name: str) -> None: + """Log a plot to swanlab. + + Args: + figure: Matplotlib figure to log + step: Global step value + """ + self.run.log({name: figure}, step=step) + + class GpuMetricSnapshot(TypedDict): step: int metrics: dict[str, Any] @@ -727,6 +804,7 @@ def __init__(self, cfg: LoggerConfig): """ self.loggers: list[LoggerInterface] = [] self.wandb_logger = None + self.swanlab_logger = None self.base_log_dir = cfg["log_dir"] os.makedirs(self.base_log_dir, exist_ok=True) @@ -737,6 +815,12 @@ def __init__(self, cfg: LoggerConfig): self.wandb_logger = WandbLogger(cfg["wandb"], log_dir=wandb_log_dir) self.loggers.append(self.wandb_logger) + if cfg["swanlab_enabled"]: + swanlab_log_dir = os.path.join(self.base_log_dir, "swanlab") + os.makedirs(swanlab_log_dir, exist_ok=True) + self.swanlab_logger = SwanlabLogger(cfg["swanlab"], log_dir=swanlab_log_dir) + self.loggers.append(self.swanlab_logger) + if cfg["tensorboard_enabled"]: tensorboard_log_dir = os.path.join(self.base_log_dir, "tensorboard") os.makedirs(tensorboard_log_dir, exist_ok=True) @@ -761,6 +845,11 @@ def __init__(self, cfg: LoggerConfig): f"{metric_prefix}/*", step_metric=step_metric ) + if cfg["swanlab_enabled"] and self.swanlab_logger: + self.swanlab_logger.define_metric( + f"{metric_prefix}/*", step_metric=step_metric + ) + self.gpu_monitor = RayGpuMonitorLogger( collection_interval=cfg["gpu_monitoring"]["collection_interval"], flush_interval=cfg["gpu_monitoring"]["flush_interval"], diff --git a/pyproject.toml b/pyproject.toml index 564ee56508..ae47a4b8ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "matplotlib", "plotly", "mlflow", + "swanlab", ] [project.optional-dependencies] diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py index c5f769c888..d92088c4d9 100644 --- a/tests/unit/utils/test_logger.py +++ b/tests/unit/utils/test_logger.py @@ -23,6 +23,7 @@ Logger, MLflowLogger, RayGpuMonitorLogger, + SwanlabLogger, TensorboardLogger, WandbLogger, flatten_dict, @@ -264,6 +265,141 @@ def test_log_hyperparams(self, mock_wandb): mock_run.config.update.assert_called_once_with(params) +class TestSwanlabLogger: + """Test the SwanlabLogger class.""" + + @pytest.fixture + def temp_dir(self): + """Create a temporary directory for logs.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir) + + @patch("nemo_rl.utils.logger.swanlab") + def test_init_custom_config(self, mock_swanlab, temp_dir): + """Test initialization of SwanlabLogger with custom config.""" + cfg = { + "project": "custom-project", + "name": "custom-run", + "entity": "custom-entity", + "group": "custom-group", + "tags": ["tag1", "tag2"], + } + SwanlabLogger(cfg, log_dir=temp_dir) + + mock_swanlab.init.assert_called_once_with( + project="custom-project", + name="custom-run", + entity="custom-entity", + group="custom-group", + tags=["tag1", "tag2"], + ) + + @patch("nemo_rl.utils.logger.swanlab") + def test_log_metrics(self, mock_swanlab): + """Test logging metrics to SwanlabLogger.""" + cfg = {} + logger = SwanlabLogger(cfg) + + metrics = {"loss": 0.5, "accuracy": 0.8} + step = 10 + logger.log_metrics(metrics, step) + + # Check that log was called with metrics and step + mock_run = mock_swanlab.init.return_value + mock_run.log.assert_called_once_with(metrics, step=step) + + @patch("nemo_rl.utils.logger.swanlab") + def test_log_metrics_with_prefix(self, mock_swanlab): + """Test logging metrics with a prefix to SwanlabLogger.""" + cfg = {} + logger = SwanlabLogger(cfg) + + metrics = {"loss": 0.5, "accuracy": 0.8} + step = 10 + prefix = "train" + logger.log_metrics(metrics, step, prefix) + + # Check that log was called with prefixed metrics and step + mock_run = mock_swanlab.init.return_value + expected_metrics = {"train/loss": 0.5, "train/accuracy": 0.8} + mock_run.log.assert_called_once_with(expected_metrics, step=step) + + @patch("nemo_rl.utils.logger.swanlab") + def test_log_metrics_with_step_metric(self, mock_swanlab): + """Test logging metrics with a step metric to SwanlabLogger.""" + cfg = {} + logger = SwanlabLogger(cfg) + + # Define step metric + step_metric = "iteration" + + # Include the step metric in the metrics + metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15} + step = 10 # This should be ignored when step_metric is provided + + logger.log_metrics(metrics, step, step_metric=step_metric) + + # Check that log was called with metrics and commit=False + # When using step_metric, step should be ignored and commit=False should be used + mock_run = mock_swanlab.init.return_value + mock_run.log.assert_called_once_with(metrics, commit=False) + + @patch("nemo_rl.utils.logger.swanlab") + def test_log_metrics_with_prefix_and_step_metric(self, mock_swanlab): + """Test logging metrics with both prefix and step metric.""" + cfg = {} + logger = SwanlabLogger(cfg) + + # Define prefix and step metric + prefix = "train" + step_metric = "train/iteration" + + # Include the step metric in the metrics + metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15} + step = 10 # This should be ignored when step_metric is provided + + logger.log_metrics(metrics, step, prefix=prefix, step_metric=step_metric) + + # Check that log was called with prefixed metrics and commit=False + # The step_metric key gets prefixed based on the current implementation + mock_run = mock_swanlab.init.return_value + expected_metrics = { + "train/loss": 0.5, + "train/accuracy": 0.8, + "train/iteration": 15, + } + mock_run.log.assert_called_once_with(expected_metrics, commit=False) + + @patch("nemo_rl.utils.logger.swanlab") + def test_define_metric(self, mock_swanlab): + """Test defining a metric with a custom step metric.""" + cfg = {} + logger = SwanlabLogger(cfg) + + # Define metric pattern and step metric + logger.define_metric("ray/*", step_metric="ray/ray_step") + + # Check that define_metric was called + mock_run = mock_swanlab.init.return_value + mock_run.define_metric.assert_called_once_with( + "ray/*", step_metric="ray/ray_step" + ) + + @patch("nemo_rl.utils.logger.swanlab") + def test_log_hyperparams(self, mock_swanlab): + """Test logging hyperparameters to SwanlabLogger.""" + cfg = {} + logger = SwanlabLogger(cfg) + + params = {"lr": 0.001, "batch_size": 32, "model": {"hidden_size": 128}} + logger.log_hyperparams(params) + + # Check that config.update was called with params + mock_run = mock_swanlab.init.return_value + mock_run.config.update.assert_called_once_with(params) + + class TestMLflowLogger: """Test the MLflowLogger class.""" @@ -919,6 +1055,7 @@ def test_init_with_gpu_monitoring( "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": True, "gpu_monitoring": { "collection_interval": 15.0, @@ -965,6 +1102,7 @@ def test_gpu_monitoring_without_wandb( "wandb_enabled": False, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": True, "gpu_monitoring": { "collection_interval": 15.0, @@ -1002,6 +1140,7 @@ def test_gpu_monitoring_no_main_loggers( """Test GPU monitoring initialization when no main loggers (wandb/tensorboard) are enabled.""" cfg = { "wandb_enabled": False, + "swanlab_enabled": False, "tensorboard_enabled": False, "mlflow_enabled": False, "monitor_gpus": True, @@ -1056,6 +1195,7 @@ def test_init_no_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": False, "tensorboard_enabled": False, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "log_dir": temp_dir, } @@ -1073,6 +1213,7 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": True, "tensorboard_enabled": False, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "log_dir": temp_dir, @@ -1085,6 +1226,28 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): assert wandb_cfg == {"project": "test-project"} mock_tb_logger.assert_not_called() + @patch("nemo_rl.utils.logger.WandbLogger") + @patch("nemo_rl.utils.logger.SwanlabLogger") + @patch("nemo_rl.utils.logger.TensorboardLogger") + def test_init_swanlab_only(self, mock_tb_logger, mock_swanlab_logger, temp_dir): + """Test initialization with only SwanlabLogger enabled.""" + cfg = { + "wandb_enabled": False, + "tensorboard_enabled": False, + "mlflow_enabled": False, + "swanlab_enabled": True, + "monitor_gpus": False, + "swanlab": {"project": "test-project"}, + "log_dir": temp_dir, + } + logger = Logger(cfg) + + assert len(logger.loggers) == 1 + mock_swanlab_logger.assert_called_once() + swanlab_cfg = mock_swanlab_logger.call_args[0][0] + assert swanlab_cfg == {"project": "test-project"} + mock_tb_logger.assert_not_called() + @patch("nemo_rl.utils.logger.WandbLogger") @patch("nemo_rl.utils.logger.TensorboardLogger") def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): @@ -1093,6 +1256,7 @@ def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir "wandb_enabled": False, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "tensorboard": {"log_dir": "test_logs"}, "log_dir": temp_dir, @@ -1113,6 +1277,7 @@ def test_init_both_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, @@ -1137,6 +1302,7 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, @@ -1164,6 +1330,7 @@ def test_log_hyperparams(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, @@ -1193,6 +1360,7 @@ def test_init_with_gpu_monitoring( "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": True, "gpu_monitoring": { "collection_interval": 15.0, @@ -1238,6 +1406,7 @@ def test_log_metrics_with_prefix_and_step_metric( "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, @@ -1276,6 +1445,7 @@ def test_log_plot_token_mult_prob_error( "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": False, + "swanlab_enabled": False, "monitor_gpus": False, "wandb": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, @@ -1330,6 +1500,7 @@ def test_init_mlflow_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): "wandb_enabled": False, "tensorboard_enabled": False, "mlflow_enabled": True, + "swanlab_enabled": False, "monitor_gpus": False, "mlflow": { "experiment_name": "test-experiment", @@ -1347,16 +1518,24 @@ def test_init_mlflow_only(self, mock_tb_logger, mock_wandb_logger, temp_dir): @patch("nemo_rl.utils.logger.WandbLogger") @patch("nemo_rl.utils.logger.TensorboardLogger") @patch("nemo_rl.utils.logger.MLflowLogger") + @patch("nemo_rl.utils.logger.SwanlabLogger") def test_init_all_loggers( - self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir + self, + mock_swanlab_logger, + mock_mlflow_logger, + mock_tb_logger, + mock_wandb_logger, + temp_dir, ): """Test initialization with all loggers enabled.""" cfg = { "wandb_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": True, + "swanlab_enabled": True, "monitor_gpus": False, "wandb": {"project": "test-project"}, + "swanlab": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, "mlflow": { "experiment_name": "test-experiment", @@ -1367,24 +1546,33 @@ def test_init_all_loggers( } logger = Logger(cfg) - assert len(logger.loggers) == 3 + assert len(logger.loggers) == 4 mock_wandb_logger.assert_called_once() mock_tb_logger.assert_called_once() mock_mlflow_logger.assert_called_once() + mock_swanlab_logger.assert_called_once() @patch("nemo_rl.utils.logger.WandbLogger") @patch("nemo_rl.utils.logger.TensorboardLogger") @patch("nemo_rl.utils.logger.MLflowLogger") + @patch("nemo_rl.utils.logger.SwanlabLogger") def test_log_metrics_with_mlflow( - self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir + self, + mock_swanlab_logger, + mock_mlflow_logger, + mock_tb_logger, + mock_wandb_logger, + temp_dir, ): """Test logging metrics to all enabled loggers including MLflow.""" cfg = { "wandb_enabled": True, + "swanlab_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": True, "monitor_gpus": False, "wandb": {"project": "test-project"}, + "swanlab": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, "mlflow": { "experiment_name": "test-experiment", @@ -1399,6 +1587,7 @@ def test_log_metrics_with_mlflow( mock_wandb_instance = mock_wandb_logger.return_value mock_tb_instance = mock_tb_logger.return_value mock_mlflow_instance = mock_mlflow_logger.return_value + mock_swanlab_instance = mock_swanlab_logger.return_value metrics = {"loss": 0.5, "accuracy": 0.8} step = 10 @@ -1406,6 +1595,9 @@ def test_log_metrics_with_mlflow( # Check that log_metrics was called on all loggers mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) + mock_swanlab_instance.log_metrics.assert_called_once_with( + metrics, step, "", None + ) mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) mock_mlflow_instance.log_metrics.assert_called_once_with( metrics, step, "", None @@ -1414,16 +1606,24 @@ def test_log_metrics_with_mlflow( @patch("nemo_rl.utils.logger.WandbLogger") @patch("nemo_rl.utils.logger.TensorboardLogger") @patch("nemo_rl.utils.logger.MLflowLogger") + @patch("nemo_rl.utils.logger.SwanlabLogger") def test_log_hyperparams_with_mlflow( - self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir + self, + mock_swanlab_logger, + mock_mlflow_logger, + mock_tb_logger, + mock_wandb_logger, + temp_dir, ): """Test logging hyperparameters to all enabled loggers including MLflow.""" cfg = { "wandb_enabled": True, + "swanlab_enabled": True, "tensorboard_enabled": True, "mlflow_enabled": True, "monitor_gpus": False, "wandb": {"project": "test-project"}, + "swanlab": {"project": "test-project"}, "tensorboard": {"log_dir": "test_logs"}, "mlflow": {"experiment_name": "test-experiment"}, "log_dir": temp_dir, @@ -1434,6 +1634,7 @@ def test_log_hyperparams_with_mlflow( mock_wandb_instance = mock_wandb_logger.return_value mock_tb_instance = mock_tb_logger.return_value mock_mlflow_instance = mock_mlflow_logger.return_value + mock_swanlab_instance = mock_swanlab_logger.return_value params = {"lr": 0.001, "batch_size": 32} logger.log_hyperparams(params) @@ -1442,6 +1643,7 @@ def test_log_hyperparams_with_mlflow( mock_wandb_instance.log_hyperparams.assert_called_once_with(params) mock_tb_instance.log_hyperparams.assert_called_once_with(params) mock_mlflow_instance.log_hyperparams.assert_called_once_with(params) + mock_swanlab_instance.log_hyperparams.assert_called_once_with(params) def test_print_message_log_samples(capsys): diff --git a/uv.lock b/uv.lock index 5a5c083e29..2b18c8641b 100644 --- a/uv.lock +++ b/uv.lock @@ -384,6 +384,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/4d/1392562369b1139e741b30d624f09fe7091d17dd5579fae5732f044b12bb/blobfile-3.0.0-py3-none-any.whl", hash = "sha256:48ecc3307e622804bd8fe13bf6f40e6463c4439eba7a1f9ad49fd78aa63cc658", size = 75413, upload-time = "2024-08-27T00:02:51.518Z" }, ] +[[package]] +name = "boto3" +version = "1.39.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/45/8321081d478881779551692a185bbe19574a2ac87294f5096c549837292c/boto3-1.39.17-py3-none-any.whl", hash = "sha256:6af9f7d6db7b5e72d6869ae22ebad1b0c6602591af2ef5d914b331a055953df5", size = 139901, upload-time = "2025-07-30T19:27:04.001Z" }, +] + +[[package]] +name = "botocore" +version = "1.39.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/9d/fc3cfb3305c355dde52870434917304c40dfb642c332c9edbe646939a3bc/botocore-1.39.17.tar.gz", hash = "sha256:1a1f0b29dab5d1b10d16f14423c16ac0a3043272f579e9ab0d757753ee9a7d2b", size = 14250697, upload-time = "2025-07-30T19:26:55.626Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/40/d16536e0db30c35c14cfd7f2227fccb59f7b999b501ed410bddb9e1492cf/botocore-1.39.17-py3-none-any.whl", hash = "sha256:41db169e919f821b3ef684794c5e67dd7bb1f5ab905d33729b1d8c27fafe8004", size = 13908201, upload-time = "2025-07-30T19:26:50.726Z" }, +] + [[package]] name = "braceexpand" version = "0.1.7" @@ -1724,6 +1751,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213, upload-time = "2025-05-18T19:04:41.894Z" }, ] +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, +] + [[package]] name = "joblib" version = "1.5.1" @@ -2468,6 +2504,7 @@ dependencies = [ { name = "ray", extra = ["default"] }, { name = "rich" }, { name = "setuptools" }, + { name = "swanlab" }, { name = "tensorboard" }, { name = "tiktoken" }, { name = "torch" }, @@ -2549,6 +2586,7 @@ requires-dist = [ { name = "ray", extras = ["default"], specifier = "==2.46.0" }, { name = "rich" }, { name = "setuptools" }, + { name = "swanlab" }, { name = "tensorboard" }, { name = "tiktoken" }, { name = "torch", specifier = "==2.7.0", index = "https://download.pytorch.org/whl/cu128" }, @@ -3441,6 +3479,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/e3/54cd906d377e1766299df14710ded125e195d5c685c8f1bafecec073e9c6/pre_commit-3.6.0-py2.py3-none-any.whl", hash = "sha256:c255039ef399049a5544b6ce13d135caba8f2c28c3b4033277a788f434308376", size = 204021, upload-time = "2023-12-09T21:25:28.932Z" }, ] +[[package]] +name = "prettytable" +version = "3.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/b1/85e18ac92afd08c533603e3393977b6bc1443043115a47bb094f3b98f94f/prettytable-3.16.0.tar.gz", hash = "sha256:3c64b31719d961bf69c9a7e03d0c1e477320906a98da63952bc6698d6164ff57", size = 66276, upload-time = "2025-03-24T19:39:04.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/c7/5613524e606ea1688b3bdbf48aa64bafb6d0a4ac3750274c43b6158a390f/prettytable-3.16.0-py3-none-any.whl", hash = "sha256:b5eccfabb82222f5aa46b798ff02a8452cf530a352c31bddfa29be41242863aa", size = 33863, upload-time = "2025-03-24T19:39:02.359Z" }, +] + [[package]] name = "prometheus-client" version = "0.22.1" @@ -3794,6 +3844,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" }, ] +[[package]] +name = "pyecharts" +version = "2.0.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "prettytable" }, + { name = "simplejson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/68/c86a3960ab5d97709237e91a5136ae02079c3c39c83192ca16f1a25f8b1a/pyecharts-2.0.8.tar.gz", hash = "sha256:908dbd939862dd3c76bb53697bdb41d3cdd0b5ba48ca69a76a6085d0aa27dbdf", size = 165148, upload-time = "2025-01-24T03:10:07.179Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/18/383622b338e4f6948ba1b75a8155d748ce097ead08a4163ca763f0ad510e/pyecharts-2.0.8-py3-none-any.whl", hash = "sha256:8b711ba139f39f89bc1b2a869d7adda89dc74c910d158a1f9063109fe66bc985", size = 153686, upload-time = "2025-01-24T03:10:03.737Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -3823,6 +3887,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141, upload-time = "2022-01-07T22:06:01.861Z" }, ] +[[package]] +name = "pynvml" +version = "12.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-ml-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/6f/6b5880ed0239e85b9a39aed103b65b2ef81425beef9f45e5c035bf008330/pynvml-12.0.0.tar.gz", hash = "sha256:299ce2451a6a17e6822d6faee750103e25b415f06f59abb8db65d30f794166f5", size = 33636, upload-time = "2024-12-02T15:04:36.631Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/df/f7cf07a65a96dd11d71f346f9c2863accdd4784da83af7181b067d556cbc/pynvml-12.0.0-py3-none-any.whl", hash = "sha256:fdff84b62a27dbe98e08e1a647eb77342bef1aebe0878bcd15e99a83fcbecb9e", size = 26560, upload-time = "2024-12-02T15:04:35.047Z" }, +] + [[package]] name = "pyparsing" version = "3.2.3" @@ -4332,6 +4408,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/d8/de873d1c1b020d668d8ec9855d390764cb90cf8f6486c0983da52be8b7b7/ruff-0.9.9-py3-none-win_arm64.whl", hash = "sha256:3ac78f127517209fe6d96ab00f3ba97cafe38718b23b1db3e96d8b2d39e37ddf", size = 10435860, upload-time = "2025-02-28T10:16:39.481Z" }, ] +[[package]] +name = "s3transfer" +version = "0.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/05/d52bf1e65044b4e5e27d4e63e8d1579dbdec54fce685908ae09bc3720030/s3transfer-0.13.1.tar.gz", hash = "sha256:c3fdba22ba1bd367922f27ec8032d6a1cf5f10c934fb5d68cf60fd5a23d936cf", size = 150589, upload-time = "2025-07-18T19:22:42.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/4f/d073e09df851cfa251ef7840007d04db3293a0482ce607d2b993926089be/s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724", size = 85308, upload-time = "2025-07-18T19:22:40.947Z" }, +] + [[package]] name = "safetensors" version = "0.5.3" @@ -4511,6 +4599,41 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "simplejson" +version = "3.20.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/92/51b417685abd96b31308b61b9acce7ec50d8e1de8fbc39a7fd4962c60689/simplejson-3.20.1.tar.gz", hash = "sha256:e64139b4ec4f1f24c142ff7dcafe55a22b811a74d86d66560c8815687143037d", size = 85591, upload-time = "2025-02-15T05:18:53.15Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/eb/34c16a1ac9ba265d024dc977ad84e1659d931c0a700967c3e59a98ed7514/simplejson-3.20.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f31c4a3a7ab18467ee73a27f3e59158255d1520f3aad74315edde7a940f1be23", size = 93100, upload-time = "2025-02-15T05:16:38.801Z" }, + { url = "https://files.pythonhosted.org/packages/41/fc/2c2c007d135894971e6814e7c0806936e5bade28f8db4dd7e2a58b50debd/simplejson-3.20.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:884e6183d16b725e113b83a6fc0230152ab6627d4d36cb05c89c2c5bccfa7bc6", size = 75464, upload-time = "2025-02-15T05:16:40.905Z" }, + { url = "https://files.pythonhosted.org/packages/0f/05/2b5ecb33b776c34bb5cace5de5d7669f9b60e3ca13c113037b2ca86edfbd/simplejson-3.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03d7a426e416fe0d3337115f04164cd9427eb4256e843a6b8751cacf70abc832", size = 75112, upload-time = "2025-02-15T05:16:42.246Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/1f3609a2792f06cd4b71030485f78e91eb09cfd57bebf3116bf2980a8bac/simplejson-3.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:000602141d0bddfcff60ea6a6e97d5e10c9db6b17fd2d6c66199fa481b6214bb", size = 150182, upload-time = "2025-02-15T05:16:43.557Z" }, + { url = "https://files.pythonhosted.org/packages/2f/b0/053fbda38b8b602a77a4f7829def1b4f316cd8deb5440a6d3ee90790d2a4/simplejson-3.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af8377a8af78226e82e3a4349efdde59ffa421ae88be67e18cef915e4023a595", size = 158363, upload-time = "2025-02-15T05:16:45.748Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4b/2eb84ae867539a80822e92f9be4a7200dffba609275faf99b24141839110/simplejson-3.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c7de4c88ab2fbcb8781a3b982ef883696736134e20b1210bca43fb42ff1acf", size = 148415, upload-time = "2025-02-15T05:16:47.861Z" }, + { url = "https://files.pythonhosted.org/packages/e0/bd/400b0bd372a5666addf2540c7358bfc3841b9ce5cdbc5cc4ad2f61627ad8/simplejson-3.20.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:455a882ff3f97d810709f7b620007d4e0aca8da71d06fc5c18ba11daf1c4df49", size = 152213, upload-time = "2025-02-15T05:16:49.25Z" }, + { url = "https://files.pythonhosted.org/packages/50/12/143f447bf6a827ee9472693768dc1a5eb96154f8feb140a88ce6973a3cfa/simplejson-3.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc0f523ce923e7f38eb67804bc80e0a028c76d7868500aa3f59225574b5d0453", size = 150048, upload-time = "2025-02-15T05:16:51.5Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ea/dd9b3e8e8ed710a66f24a22c16a907c9b539b6f5f45fd8586bd5c231444e/simplejson-3.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76461ec929282dde4a08061071a47281ad939d0202dc4e63cdd135844e162fbc", size = 151668, upload-time = "2025-02-15T05:16:53Z" }, + { url = "https://files.pythonhosted.org/packages/99/af/ee52a8045426a0c5b89d755a5a70cc821815ef3c333b56fbcad33c4435c0/simplejson-3.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19c2da8c043607bde4d4ef3a6b633e668a7d2e3d56f40a476a74c5ea71949f", size = 158840, upload-time = "2025-02-15T05:16:54.851Z" }, + { url = "https://files.pythonhosted.org/packages/68/db/ab32869acea6b5de7d75fa0dac07a112ded795d41eaa7e66c7813b17be95/simplejson-3.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2578bedaedf6294415197b267d4ef678fea336dd78ee2a6d2f4b028e9d07be3", size = 154212, upload-time = "2025-02-15T05:16:56.318Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7a/e3132d454977d75a3bf9a6d541d730f76462ebf42a96fea2621498166f41/simplejson-3.20.1-cp312-cp312-win32.whl", hash = "sha256:339f407373325a36b7fd744b688ba5bae0666b5d340ec6d98aebc3014bf3d8ea", size = 74101, upload-time = "2025-02-15T05:16:57.746Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5d/4e243e937fa3560107c69f6f7c2eed8589163f5ed14324e864871daa2dd9/simplejson-3.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:627d4486a1ea7edf1f66bb044ace1ce6b4c1698acd1b05353c97ba4864ea2e17", size = 75736, upload-time = "2025-02-15T05:16:59.017Z" }, + { url = "https://files.pythonhosted.org/packages/c4/03/0f453a27877cb5a5fff16a975925f4119102cc8552f52536b9a98ef0431e/simplejson-3.20.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:71e849e7ceb2178344998cbe5ade101f1b329460243c79c27fbfc51c0447a7c3", size = 93109, upload-time = "2025-02-15T05:17:00.377Z" }, + { url = "https://files.pythonhosted.org/packages/74/1f/a729f4026850cabeaff23e134646c3f455e86925d2533463420635ae54de/simplejson-3.20.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b63fdbab29dc3868d6f009a59797cefaba315fd43cd32ddd998ee1da28e50e29", size = 75475, upload-time = "2025-02-15T05:17:02.544Z" }, + { url = "https://files.pythonhosted.org/packages/e2/14/50a2713fee8ff1f8d655b1a14f4a0f1c0c7246768a1b3b3d12964a4ed5aa/simplejson-3.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1190f9a3ce644fd50ec277ac4a98c0517f532cfebdcc4bd975c0979a9f05e1fb", size = 75112, upload-time = "2025-02-15T05:17:03.875Z" }, + { url = "https://files.pythonhosted.org/packages/45/86/ea9835abb646755140e2d482edc9bc1e91997ed19a59fd77ae4c6a0facea/simplejson-3.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1336ba7bcb722ad487cd265701ff0583c0bb6de638364ca947bb84ecc0015d1", size = 150245, upload-time = "2025-02-15T05:17:06.899Z" }, + { url = "https://files.pythonhosted.org/packages/12/b4/53084809faede45da829fe571c65fbda8479d2a5b9c633f46b74124d56f5/simplejson-3.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e975aac6a5acd8b510eba58d5591e10a03e3d16c1cf8a8624ca177491f7230f0", size = 158465, upload-time = "2025-02-15T05:17:08.707Z" }, + { url = "https://files.pythonhosted.org/packages/a9/7d/d56579468d1660b3841e1f21c14490d103e33cf911886b22652d6e9683ec/simplejson-3.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a6dd11ee282937ad749da6f3b8d87952ad585b26e5edfa10da3ae2536c73078", size = 148514, upload-time = "2025-02-15T05:17:11.323Z" }, + { url = "https://files.pythonhosted.org/packages/19/e3/874b1cca3d3897b486d3afdccc475eb3a09815bf1015b01cf7fcb52a55f0/simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab980fcc446ab87ea0879edad41a5c28f2d86020014eb035cf5161e8de4474c6", size = 152262, upload-time = "2025-02-15T05:17:13.543Z" }, + { url = "https://files.pythonhosted.org/packages/32/84/f0fdb3625292d945c2bd13a814584603aebdb38cfbe5fe9be6b46fe598c4/simplejson-3.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f5aee2a4cb6b146bd17333ac623610f069f34e8f31d2f4f0c1a2186e50c594f0", size = 150164, upload-time = "2025-02-15T05:17:15.021Z" }, + { url = "https://files.pythonhosted.org/packages/95/51/6d625247224f01eaaeabace9aec75ac5603a42f8ebcce02c486fbda8b428/simplejson-3.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:652d8eecbb9a3b6461b21ec7cf11fd0acbab144e45e600c817ecf18e4580b99e", size = 151795, upload-time = "2025-02-15T05:17:16.542Z" }, + { url = "https://files.pythonhosted.org/packages/7f/d9/bb921df6b35be8412f519e58e86d1060fddf3ad401b783e4862e0a74c4c1/simplejson-3.20.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c09948f1a486a89251ee3a67c9f8c969b379f6ffff1a6064b41fea3bce0a112", size = 159027, upload-time = "2025-02-15T05:17:18.083Z" }, + { url = "https://files.pythonhosted.org/packages/03/c5/5950605e4ad023a6621cf4c931b29fd3d2a9c1f36be937230bfc83d7271d/simplejson-3.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cbbd7b215ad4fc6f058b5dd4c26ee5c59f72e031dfda3ac183d7968a99e4ca3a", size = 154380, upload-time = "2025-02-15T05:17:20.334Z" }, + { url = "https://files.pythonhosted.org/packages/66/ad/b74149557c5ec1e4e4d55758bda426f5d2ec0123cd01a53ae63b8de51fa3/simplejson-3.20.1-cp313-cp313-win32.whl", hash = "sha256:ae81e482476eaa088ef9d0120ae5345de924f23962c0c1e20abbdff597631f87", size = 74102, upload-time = "2025-02-15T05:17:22.475Z" }, + { url = "https://files.pythonhosted.org/packages/db/a9/25282fdd24493e1022f30b7f5cdf804255c007218b2bfaa655bd7ad34b2d/simplejson-3.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b9fd15853b90aec3b1739f4471efbf1ac05066a2c7041bf8db821bb73cd2ddc", size = 75736, upload-time = "2025-02-15T05:17:24.122Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/00f02a0a921556dd5a6db1ef2926a1bc7a8bbbfb1c49cfed68a275b8ab2b/simplejson-3.20.1-py3-none-any.whl", hash = "sha256:8a6c1bbac39fa4a79f83cbf1df6ccd8ff7069582a9fd8db1e52cea073bc2c697", size = 57121, upload-time = "2025-02-15T05:18:51.243Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -4742,6 +4865,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" }, ] +[[package]] +name = "swankit" +version = "0.2.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/b8/025612465e22f77df120401055f366a66877fd26f52d718ea558f65814bd/swankit-0.2.4.tar.gz", hash = "sha256:ed89b7d23351f4038930da78d34195604d76c2285c31586ecf1a4cb9fad9c33d", size = 21641, upload-time = "2025-06-13T17:48:03.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/c7/7cc8d6bc562ce96d751a7655421eae09ba795cd557ed4791d63a72bd8f9a/swankit-0.2.4-py3-none-any.whl", hash = "sha256:8e6d3a50451ed1f708f375b839964c5815fe7152a39ed7588921d67c1ba469f8", size = 23853, upload-time = "2025-06-13T17:48:02.996Z" }, +] + +[[package]] +name = "swanlab" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "boto3" }, + { name = "botocore" }, + { name = "click" }, + { name = "psutil" }, + { name = "pydantic" }, + { name = "pyecharts" }, + { name = "pynvml" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "swankit" }, + { name = "urllib3" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e2/ef/ca99701801abd5fbb8b792be5af16016045ca5f2f1f034d1aebd73094c7d/swanlab-0.6.4.tar.gz", hash = "sha256:1e9278238cce43f1eeca7caf1e8488f1550f3b143b1e783402808dd0a5856f8a", size = 377309, upload-time = "2025-06-18T07:36:48.763Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/23/a4316595242bb58421d56b8485c862f021f2451a548ad5a3fb16ae11f8a4/swanlab-0.6.4-py3-none-any.whl", hash = "sha256:75f56f5d50f4e445cdeff26bcb4c4343a13b9095d7313ba57cff0e8905306518", size = 261909, upload-time = "2025-06-18T07:36:47.372Z" }, +] + [[package]] name = "sympy" version = "1.14.0"