NovaSky-AI · CharlieFRuan · Apr 23, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/docs/content/docs/harbor/index.mdx b/docs/content/docs/harbor/index.mdx
@@ -212,7 +212,7 @@ agent:
   override_timeout_sec: 1200          # Time (seconds) given for a single Trial to run
   kwargs:
     max_turns: 32                     # Max agent iterations per trial
-    store_all_messages: true          # Required for SkyRL to extract training data
+    collect_rollout_details: true     # Required for SkyRL to extract training data
     temperature: 1.0                  # Sampling temperature (higher = more exploration)
     enable_summarize: false           # Context summarization when nearing token limits
     model_info:
@@ -221,7 +221,7 @@ agent:
 ```
 
 <Callout type="info">
-`store_all_messages: true` is **required** for training. Without it, SkyRL cannot extract the chat history needed to compute loss masks and train the model.
+`collect_rollout_details: true` is **required** for training, where fields like `prompt_token_ids`, `completion_token_ids`, and `logprobs` are crucial for model training.
 </Callout>
 
 ### Key Knobs for RL Training

diff --git a/examples/train_integrations/harbor/entrypoints/main_harbor_fully_async.py b/examples/train_integrations/harbor/entrypoints/main_harbor_fully_async.py
@@ -0,0 +1,74 @@
+"""
+Fully-async entrypoint for training on Harbor tasks.
+
+Reuses HarborExp's generator/dataset overrides and swaps in
+``FullyAsyncRayPPOTrainer``. This is the moral equivalent of
+``examples/train/fully_async/main_fully_async.py`` for harbor.
+"""
+
+import asyncio
+import sys
+
+import ray
+import yaml
+
+from skyrl.train.fully_async_trainer import FullyAsyncRayPPOTrainer
+from skyrl.train.utils import validate_cfg
+from skyrl.train.utils.utils import initialize_ray
+
+from .main_harbor import HARBOR_DEFAULT_CONFIG, HarborExp, HarborSkyRLConfig, _deep_merge
+
+
+class HarborFullyAsyncExp(HarborExp):
+    def get_trainer(
+        self,
+        cfg,
+        tracker,
+        tokenizer,
+        train_dataset,
+        eval_dataset,
+        inference_engine_client,
+        generator,
+        colocate_pg,
+    ):
+        return FullyAsyncRayPPOTrainer(
+            cfg=cfg,
+            tracker=tracker,
+            tokenizer=tokenizer,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            inference_engine_client=inference_engine_client,
+            generator=generator,
+            colocate_pg=colocate_pg,
+        )
+
+    def run(self):
+        trainer = self._setup_trainer()
+        asyncio.run(trainer.train())
+
+
+@ray.remote(num_cpus=1)
+def skyrl_entrypoint(cfg):
+    exp = HarborFullyAsyncExp(cfg)
+    exp.run()
+
+
+def main() -> None:
+    cfg = HarborSkyRLConfig.from_cli_overrides(sys.argv[1:])
+
+    with open(HARBOR_DEFAULT_CONFIG) as f:
+        defaults = yaml.safe_load(f)
+    cfg.harbor_trial_config = _deep_merge(defaults, cfg.harbor_trial_config)
+
+    validate_cfg(cfg)
+    if cfg.trainer.algorithm.max_seq_len is None:
+        raise ValueError(
+            "trainer.algorithm.max_seq_len must be explicitly set for Harbor training; "
+            "it is required to truncate responses to the maximum allowed length."
+        )
+    initialize_ray(cfg)
+    ray.get(skyrl_entrypoint.remote(cfg))
+
+
+if __name__ == "__main__":
+    main()