Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/content/docs/harbor/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ agent:
override_timeout_sec: 1200 # Time (seconds) given for a single Trial to run
kwargs:
max_turns: 32 # Max agent iterations per trial
store_all_messages: true # Required for SkyRL to extract training data
collect_rollout_details: true # Required for SkyRL to extract training data
temperature: 1.0 # Sampling temperature (higher = more exploration)
enable_summarize: false # Context summarization when nearing token limits
model_info:
Expand All @@ -221,7 +221,7 @@ agent:
```

<Callout type="info">
`store_all_messages: true` is **required** for training. Without it, SkyRL cannot extract the chat history needed to compute loss masks and train the model.
`collect_rollout_details: true` is **required** for training, where fields like `prompt_token_ids`, `completion_token_ids`, and `logprobs` are crucial for model training.
</Callout>

### Key Knobs for RL Training
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Fully-async entrypoint for training on Harbor tasks.

Reuses HarborExp's generator/dataset overrides and swaps in
``FullyAsyncRayPPOTrainer``. This is the moral equivalent of
``examples/train/fully_async/main_fully_async.py`` for harbor.
"""

import asyncio
import sys

import ray
import yaml

from skyrl.train.fully_async_trainer import FullyAsyncRayPPOTrainer
from skyrl.train.utils import validate_cfg
from skyrl.train.utils.utils import initialize_ray

from .main_harbor import HARBOR_DEFAULT_CONFIG, HarborExp, HarborSkyRLConfig, _deep_merge


class HarborFullyAsyncExp(HarborExp):
def get_trainer(
self,
cfg,
tracker,
tokenizer,
train_dataset,
eval_dataset,
inference_engine_client,
generator,
colocate_pg,
):
return FullyAsyncRayPPOTrainer(
cfg=cfg,
tracker=tracker,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
inference_engine_client=inference_engine_client,
generator=generator,
colocate_pg=colocate_pg,
)

def run(self):
trainer = self._setup_trainer()
asyncio.run(trainer.train())


@ray.remote(num_cpus=1)
def skyrl_entrypoint(cfg):
exp = HarborFullyAsyncExp(cfg)
exp.run()


def main() -> None:
cfg = HarborSkyRLConfig.from_cli_overrides(sys.argv[1:])

with open(HARBOR_DEFAULT_CONFIG) as f:
defaults = yaml.safe_load(f)
cfg.harbor_trial_config = _deep_merge(defaults, cfg.harbor_trial_config)

validate_cfg(cfg)
if cfg.trainer.algorithm.max_seq_len is None:
raise ValueError(
"trainer.algorithm.max_seq_len must be explicitly set for Harbor training; "
"it is required to truncate responses to the maximum allowed length."
)
initialize_ray(cfg)
ray.get(skyrl_entrypoint.remote(cfg))


if __name__ == "__main__":
main()
Loading
Loading