Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
434d2f5
PullRequest: 353 [Lite] Add gradient checkpointing to FSDPEngine
nuzant Jul 14, 2025
d8038b2
PullRequest: 354 [lite] GRPO pre-commit: minor changes in FSDP engine
garrett4wade Jul 14, 2025
724628e
PullRequest: 355 [Lite] GRPO pre-commit 2: Refactor RemoteSGLangEngin…
garrett4wade Jul 14, 2025
8a15551
PullRequest: 357 [lite] GRPO pre-commit 3: Fix typos and experiment u…
garrett4wade Jul 14, 2025
3f95968
PullRequest: 358 [lite] Support GRPO training locally with the GSM8k …
garrett4wade Jul 15, 2025
c75dcaf
merge
garrett4wade Jul 16, 2025
b2bd639
PullRequest: 368 [lite] Refactor train engine after merging contribut…
garrett4wade Jul 16, 2025
b56f599
PullRequest: 371 [lite] [fix] fix misc bugs in GRPO implementation
garrett4wade Jul 16, 2025
ddabd9c
PullRequest: 370 [lite] Add Slurm Launcher and Ray Launcher
nuzant Jul 21, 2025
2f1b679
PullRequest: 392 [lite] Fix several bugs regarding RL learning and ad…
garrett4wade Jul 21, 2025
9c4da33
Merge branch 'lite' of https://github.com/inclusionAI/AReaL into lite
garrett4wade Jul 21, 2025
ab5db3f
.
garrett4wade Jul 21, 2025
4dd4a22
.
garrett4wade Jul 21, 2025
21bd032
PullRequest: 408 [Feature] Bump SGLang version to v0.4.9.post2
garrett4wade Jul 24, 2025
06d9370
PullRequest: 422 [lite] Fix tests and scripts after updating sgl to 0…
garrett4wade Jul 24, 2025
d6dbd90
PullRequest: 423 [lite] Remove the boba example for github release.
garrett4wade Jul 24, 2025
041121d
merge
garrett4wade Jul 24, 2025
be51586
Merge branch 'lite' of https://github.com/inclusionAI/AReaL into lite
garrett4wade Jul 24, 2025
b4b2018
Merge branch 'lite' of https://code.alipay.com/inclusionAI/AReaL into…
garrett4wade Jul 24, 2025
209aec6
update readme
garrett4wade Jul 24, 2025
8750616
Merge branch 'lite' of https://github.com/inclusionAI/AReaL into lite
garrett4wade Jul 24, 2025
86ee687
PullRequest: 431 [Fix] Fix environment of lite
garrett4wade Jul 25, 2025
04c8da4
PullRequest: 440 [FIX] fix update weight from disk
fishcrap Jul 25, 2025
7998055
PullRequest: 442 [lite] Refactor `RemoteSGLangEngine` into two parts:…
nuzant Jul 29, 2025
e825b99
PullRequest: 456 [lite] [Bug] Use `ProcessPoolExecutor` to calculate …
nuzant Jul 31, 2025
7a77fab
PullRequest: 460 [lite][fix] add a warning when reward computation ti…
garrett4wade Jul 31, 2025
7d1c852
PullRequest: 465 [lite][fix] Fix issues raised by tsao
garrett4wade Jul 31, 2025
58395f4
Merge branch 'lite' of https://code.alipay.com/inclusionAI/AReaL into…
garrett4wade Aug 1, 2025
8fb7564
PullRequest: 463 [FEAT] add config converter
fishcrap Aug 1, 2025
a962eb6
PullRequest: 466 [lite] Add seeding in training scripts and pad to ma…
garrett4wade Aug 1, 2025
c9cac38
PullRequest: 467 [lite][fix] remove logging functionality in stats_lo…
garrett4wade Aug 1, 2025
dd44bb7
Merge branch 'lite' of https://code.alipay.com/inclusionAI/AReaL into…
garrett4wade Aug 1, 2025
b4e7de5
PullRequest: 468 [fix] Fix the legacy realhf experiment in the sglang…
garrett4wade Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/customization/agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ Finally, let's complete the implementation by collecting trajectories in the
class MultiTurnWorkflow(RolloutWorkflow):
# ... previous methods ...

async def arun_episode(self, engine: InferenceEngine, data):
async def arun_episode(self, engine: InferenceEngine, data) -> TensorDict:
# ... episode logic above ...

while reward == 0 and t < self.max_turns:
Expand Down
7 changes: 6 additions & 1 deletion functioncall/math/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@


def math_verify(
id2info, generateds: List, query_ids: List, batch_size=10, timeout=1000
id2info,
generateds: List,
query_ids: List,
batch_size=10,
timeout=1000,
max_workers=None,
) -> List:
assert len(generateds) == len(query_ids), (
len(generateds),
Expand Down
3 changes: 2 additions & 1 deletion realhf/api/core/data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ def load_hf_processor_and_tokenizer(
model_name_or_path: str,
fast_tokenizer=True,
padding_side: Optional[str] = None,
) -> Tuple[transformers.AutoProcessor, transformers.PreTrainedTokenizerFast]:
) -> Tuple["transformers.ProcessorMixin", transformers.PreTrainedTokenizerFast]:
"""Load a tokenizer and processor from Hugging Face."""
# NOTE: use the raw type annoation will trigger cuda initialization
tokenizer = load_hf_tokenizer(model_name_or_path, fast_tokenizer, padding_side)
try:
processor = transformers.AutoProcessor.from_pretrained(
Expand Down
28 changes: 18 additions & 10 deletions realhf/impl/model/backend/sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,28 @@ async def _do_generate(
)

most_recent_timestamps[output_idx] = timestamp
output.output_ids = [data[SGLANG_TOKEN_OUTPUT_IDENTIFIER]]
finish_reason = data["meta_info"]["finish_reason"]
if req.return_logprob:
output.output_logprobs = [
[
x[0]
for x in data["meta_info"]["output_token_logprobs"]
]
]
meta_info = data["meta_info"]
finish_reason = meta_info["finish_reason"]
assert finish_reason["type"] in [
"length",
"stop",
"abort",
], finish_reason
output.no_eos = [finish_reason["type"] == "length"]

if meta_info.get("output_token_logprobs"):
output.output_ids = [
[x[1] for x in meta_info["output_token_logprobs"]]
]
if req.return_logprob:
output.output_logprobs = [
[x[0] for x in meta_info["output_token_logprobs"]]
]
else:
output.output_ids = [[]]
if req.return_logprob:
output.output_logprobs = [[]]

output.no_eos = [finish_reason["type"] in ["length", "abort"]]
output.latency = latency

output_idx += 1
Expand Down