Skip to content
Open
Show file tree
Hide file tree
Changes from 65 commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
cac78d7
Add environment selector
rafapi Nov 4, 2025
df1d846
Fix env launcher
rafapi Nov 4, 2025
9735130
Adapt domains to env registry
rafapi Nov 4, 2025
bb5e5ca
Adapt domain configs
rafapi Nov 4, 2025
a1a02bf
Collect env info
rafapi Nov 4, 2025
e4d0bc4
Remove unrelated files
rafapi Nov 6, 2025
da43cbc
Remove backup
rafapi Nov 6, 2025
5b18001
Remove duplicates
rafapi Nov 6, 2025
9af7329
Restore
rafapi Nov 6, 2025
599b510
add domains
rafapi Nov 8, 2025
32eb5b8
add coding
rafapi Nov 8, 2025
efaec65
Add remaining loaders
rafapi Nov 8, 2025
1220f6d
Domain mix tracking metrics
rafapi Nov 16, 2025
158b2ea
update domain rollouts
rafapi Nov 16, 2025
fe8e728
refresh async llm flow
rafapi Nov 16, 2025
6f9c5cc
sync coding init
rafapi Nov 16, 2025
455ed42
expand coding dataset
rafapi Nov 16, 2025
795e490
remove legacy executor
rafapi Nov 16, 2025
68072b1
revise coding rollouts
rafapi Nov 16, 2025
981cb74
adjust multidomain loader
rafapi Nov 16, 2025
f7e6946
refresh preprocess pipeline
rafapi Nov 16, 2025
73ca9d1
enhance utils helpers
rafapi Nov 16, 2025
38ff188
add multi domain config
rafapi Nov 16, 2025
2c5ebfd
introduce domain sampling
rafapi Nov 16, 2025
40cf648
add coding sandbox test
rafapi Nov 16, 2025
2c74b77
implement verifier api
rafapi Nov 16, 2025
cdfe57b
add symbolic init
rafapi Nov 16, 2025
a3c4106
add symbolic dataset
rafapi Nov 16, 2025
7eef15e
add symbolic rollouts
rafapi Nov 16, 2025
cc091ac
remove deleted domains
rafapi Nov 17, 2025
7664773
remove symbolic
rafapi Nov 17, 2025
120ba7b
restore env replica compatibility and uniqueness
rafapi Nov 17, 2025
f8d147e
fix
rafapi Nov 17, 2025
62ad5fb
up test len
rafapi Nov 17, 2025
2d22d5e
per domain logging
rafapi Nov 18, 2025
52fcb56
add domain to rollout data
rafapi Nov 18, 2025
9da8f04
keep existing env_replicas value
rafapi Nov 18, 2025
3368e69
restore template
rafapi Nov 18, 2025
2831252
add default to rep per actor
rafapi Nov 18, 2025
cb916c1
weight replicas by domain mix
rafapi Nov 18, 2025
a6ad805
add easy mix configs
rafapi Nov 18, 2025
2243897
remove bloated coding rewards
rafapi Nov 18, 2025
0cbc542
use existing reward structure
rafapi Nov 18, 2025
6888e05
Merge branch 'main' into multi-env
rafapi Nov 18, 2025
4adcd81
fix naming
rafapi Nov 19, 2025
a5a6e44
fix cache data composition
rafapi Nov 19, 2025
dca8a43
remove tapeagents imports
rafapi Nov 19, 2025
84b6587
remove tapeagents imports
rafapi Nov 19, 2025
8bbca61
add fn_calling
rafapi Nov 19, 2025
1937578
coding conf
rafapi Nov 19, 2025
69b5154
main mix config
rafapi Nov 19, 2025
fd2fc3b
fix finish reason detection
rafapi Nov 19, 2025
b139560
include fn_calling loader
rafapi Nov 19, 2025
8b5c159
add domain_mix placeholder
rafapi Nov 19, 2025
b876adb
add fn_calling
rafapi Nov 19, 2025
904c80e
fix path
rafapi Nov 19, 2025
e4017d9
change mix
rafapi Nov 19, 2025
d7935d0
Fix strings
rafapi Nov 20, 2025
8e2e7b3
ensure we arere passing an empty call type
rafapi Nov 20, 2025
5e10988
fix imports
rafapi Nov 20, 2025
dac01c1
return
rafapi Nov 22, 2025
eb3bacf
return too
rafapi Nov 22, 2025
f5093bf
return more
rafapi Nov 22, 2025
27a2a6d
extract ability list
rafapi Nov 22, 2025
5aea032
normalise prompt
rafapi Nov 22, 2025
02f9294
add missing math-code mix config
rafapi Nov 24, 2025
f6c128c
declare zero weight domains
rafapi Nov 24, 2025
60169b2
use hydra object conversion
rafapi Nov 24, 2025
a3de18e
remove empty lines
rafapi Nov 24, 2025
ba360fa
fix end of line
rafapi Nov 24, 2025
f405321
remove tapeagents import
rafapi Nov 24, 2025
bba110d
remove duplicate code
rafapi Nov 24, 2025
8f60aa2
Merge branch 'main' into multi-env
rafapi Nov 26, 2025
67ffd60
fix imports
rafapi Nov 26, 2025
7ea8744
remove redundant object conversion
rafapi Nov 26, 2025
182ee6e
init dataset placeholders
rafapi Nov 28, 2025
6bc73eb
flattent and convert to python object
rafapi Nov 28, 2025
63c3035
fix per domain system prompt
rafapi Nov 28, 2025
fa9a9bc
add sys prompt for coding
rafapi Nov 28, 2025
5e9b037
only spawn environments present in domain mix
rafapi Nov 28, 2025
2677ad9
adaptive sampling
rafapi Nov 29, 2025
48afae1
track domains
rafapi Nov 29, 2025
00fd6cb
track domains
rafapi Nov 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions conf/actor/web.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ llm_max_rollouts: 128
rollout_workers: 1
rollout_policy: pipelinerl.domains.deep_research.tapeagents_rollouts.generate_rollout

environment:
_target_: tapeagents.mcp.MCPEnvironment
config_path: conf/mcp/web.json
environments:
- key: mcp
mode: embedded
_target_: tapeagents.mcp.MCPEnvironment
config_path: conf/mcp/web.json
environment_key: mcp

llm:
_target_: tapeagents.llms.LiteLLM
Expand Down Expand Up @@ -105,4 +108,4 @@ only_tasks: #[] # list of (level, task_num)
- [1, 4]
- [1, 5]
- [1, 6]
- [1, 7]
- [1, 7]
3 changes: 2 additions & 1 deletion conf/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defaults:
- finetune: actor_critic
- rewards: pure_success
- streams: files
- domain_mix: null
- _self_

seed: 42
Expand All @@ -18,6 +19,7 @@ actor:
result_queue_size: 64
throughput_window_size: 50
shared_memory_entry_size: 10000000
domain_mix: null
environment: null
preprocess:
input: actor
Expand Down Expand Up @@ -135,4 +137,3 @@ wandb:
wandb_dir: null
# Comma-separated list of keywords to tag the run.
tags: []

50 changes: 50 additions & 0 deletions conf/coding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
defaults:
- base
- _self_

actor:
rollout_policy: pipelinerl.domains.coding.generate_coding_rollout
system_prompt: ""
task_template: |-
{task}
task_prompt: ""
ensure_boxed_answers: false

coding_time_limit_s: 15.0
coding_per_test_timeout_s: 10.0
coding_memory_limit_bytes: 1073741824
coding_compile_timeout_s: 10.0
coding_sandbox_url: ${oc.env:CODING_SANDBOX_URL, "http://sandbox:8080/run_code"}

dataset_loader: pipelinerl.domains.coding.dataset.load_problems
dataset_loader_params:
dataset_id: ServiceNow-AI/mixed-training-text-datasets
dataset_config: 80k-if-math-coding-fncalling-stem
split_ratios:
train: 0.9
validation: 0.05
test: 0.05
allowed_call_types:
- assert
- std
max_examples_per_split: 2048
trust_remote_code: true
huggingface_token: ${oc.env:CODING_HF_TOKEN, null}

train_dataset_names:
- coding@train

test_dataset_names:
- coding@validation

environments:
- key: coding
mode: remote
_target_: pipelinerl.domains.coding.CodingSandboxEnvironment
sandbox_url: ${actor.coding_sandbox_url}
compile_timeout_s: ${actor.coding_compile_timeout_s}
run_timeout_s: ${actor.coding_per_test_timeout_s}
request_timeout_s: ${actor.coding_time_limit_s}
memory_limit_bytes: ${actor.coding_memory_limit_bytes}

environment_key: coding
30 changes: 30 additions & 0 deletions conf/debug/multi_domain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
defaults:
- base
- domain_rollouts: base
- override rewards: success_and_format
- _self_

actor:
rollout_policy: pipelinerl.domains.dispatcher.generate_multidomain_rollout
llm_max_rollouts: 2
rollout_workers: 1
domain_rollouts:
math: ${domain_rollouts.math}
guessing: ${domain_rollouts.guessing}
coding: ${domain_rollouts.coding}

dataset_loader: pipelinerl.domains.multidomain.load_problems
train_dataset_names:
- math_debug
- guessing_debug
- coding_debug
test_dataset_names:
- math_debug
- coding_debug

environment: null
environment_key: null

world:
env_replicas_per_actor: 0
environment_mode: embedded
12 changes: 12 additions & 0 deletions conf/domain_mix/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Domain mix presets

Hydra group `domain_mix` stores reusable presets for `actor.domain_mix`.

Usage examples:

```
python main.py --config-name multi_domain/base +domain_mix=math_coding_70_30
python main.py --config-name multi_domain/base +domain_mix=balanced
```

Override or extend these presets by creating new files under `conf/domain_mix/`.
9 changes: 9 additions & 0 deletions conf/domain_mix/balanced.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# @package actor.domain_mix

math: 1.0
guessing: 1.0
counting: 1.0
chartqa: 1.0
miniwob: 1.0
coding: 1.0
fn_calling: 1.0
4 changes: 4 additions & 0 deletions conf/domain_mix/coding_heavy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# @package actor.domain_mix

math: 0.3
coding: 0.7
5 changes: 5 additions & 0 deletions conf/domain_mix/main_mix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# @package actor.domain_mix

math: 0.4
coding: 0.3
fn_calling: 0.3
4 changes: 4 additions & 0 deletions conf/domain_mix/math_coding_70_30.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# @package actor.domain_mix

math: 0.7
coding: 0.3
8 changes: 8 additions & 0 deletions conf/domain_rollouts/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Mapping between domain identifiers and rollout callables.
math: pipelinerl.domains.math.generate_math_rollout
guessing: pipelinerl.domains.guessing.generate_guessing_rollout
counting: pipelinerl.domains.counting.generate_counting_rollout
miniwob: pipelinerl.domains.miniwob.rollouts.generate_miniwob_rollout
chartqa: pipelinerl.domains.chartqa.generate_chartqa_rollout
coding: pipelinerl.domains.coding.generate_coding_rollout
fn_calling: pipelinerl.domains.fn_calling.generate_fn_calling_rollout
36 changes: 36 additions & 0 deletions conf/fn_calling.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
defaults:
- base
- _self_

actor:
rollout_policy: pipelinerl.domains.fn_calling.generate_fn_calling_rollout
system_prompt: ""
task_template: "{task}"
task_prompt: ""
ensure_boxed_answers: false

dataset_loader: pipelinerl.domains.fn_calling.dataset.load_problems
dataset_loader_params:
dataset_id: ServiceNow-AI/mixed-training-text-datasets
dataset_config: 80k-if-math-coding-fncalling-stem
split_ratios:
train: 0.9
validation: 0.05
test: 0.05
allowed_call_types: []
max_examples_per_split: 2048
trust_remote_code: true
huggingface_token: ${oc.env:CODING_HF_TOKEN, null}

train_dataset_names:
- fn_calling@train

test_dataset_names:
- fn_calling@validation

environments:
- key: fn_calling
mode: remote
_target_: pipelinerl.domains.fn_calling.AgenticToolsEnvironment

environment_key: fn_calling
13 changes: 8 additions & 5 deletions conf/math.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@ defaults:
actor:
rollout_policy: pipelinerl.domains.math.generate_math_rollout
system_prompt: Please reason step by step, and put your final answer within \boxed{}.
task_template: |-
{task}
environment:
_target_: pipelinerl.domains.math.MathEnvironment
task_template: "{task}"
task_prompt: ""
environments:
- key: math
mode: remote
_target_: pipelinerl.domains.math.MathEnvironment
environment_key: math
dataset_loader: pipelinerl.domains.math.load_datasets
train_dataset_names:
- open_reasoner_zero_57k
- open_reasoner_zero_extended_72k
test_dataset_names:
- aime_2024
- amc_2023
- math_500
- math_500
58 changes: 58 additions & 0 deletions conf/math_code.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
defaults:
- base
- /domain_rollouts@domain_rollouts: base
- domain_mix: math_coding_70_30
- _self_

actor:
rollout_policy: pipelinerl.domains.dispatcher.generate_multidomain_rollout
system_prompt: ""
task_template: |-
{task}
task_prompt: ""
ensure_boxed_answers: false
domain_rollouts:
math: ${domain_rollouts.math}
coding: ${domain_rollouts.coding}
coding_time_limit_s: 15.0
coding_per_test_timeout_s: 10.0
coding_memory_limit_bytes: 1073741824
coding_compile_timeout_s: 10.0
coding_sandbox_url: ${oc.env:CODING_SANDBOX_URL, "http://sandbox:8080/run_code"}

dataset_loader: pipelinerl.domains.multidomain.loader.load_datasets
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have multiple dataloaders at once so we load different datasets for different domains in the same exp?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it's a proportional concatenation

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so how can we define a multiple dataset_loader functions in a single config?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in your config you could do (if you wanted to do math, coding and agentic_fn_calling):

defaults:
  - base
  - multi_domain: base        # inits multidomain loader and dispatcher
  - domain_mix: main_mix      # Or inline the mix as showed below

actor:
  domain_mix:
    math: 0.4
    coding: 0.3
    fn_calling: 0.3

train_dataset_names:
  - math::open_reasoner_zero_57k
  - coding::coding@train
  - fn_calling::fn_calling@train

test_dataset_names:
  - math::math_500
  - coding::coding@validation
  - fn_calling::fn_calling@validation

dataset_loader_params:
per_domain_params:
coding:
dataset_id: ServiceNow-AI/mixed-training-text-datasets
dataset_config: 80k-if-math-coding-fncalling-stem
split_ratios:
train: 0.9
validation: 0.05
test: 0.05
allowed_call_types:
- assert
- std
max_examples_per_split: 2048
trust_remote_code: true
huggingface_token: ${oc.env:CODING_HF_TOKEN, null}

environments:
- key: math
mode: remote
replicas_per_actor: ${world.env_replicas_per_actor}
_target_: pipelinerl.domains.math.MathEnvironment
- key: coding
mode: remote
replicas_per_actor: ${world.env_replicas_per_actor}
_target_: pipelinerl.domains.coding.CodingSandboxEnvironment
sandbox_url: ${actor.coding_sandbox_url}
compile_timeout_s: ${actor.coding_compile_timeout_s}
run_timeout_s: ${actor.coding_per_test_timeout_s}
request_timeout_s: ${actor.coding_time_limit_s}
memory_limit_bytes: ${actor.coding_memory_limit_bytes}

environment_key: null

world:
env_replicas_per_actor: 1
79 changes: 79 additions & 0 deletions conf/multi_domain/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# @package _global_
defaults:
- /domain_rollouts@domain_rollouts: base
- domain_mix: null

actor:
rollout_policy: pipelinerl.domains.dispatcher.generate_multidomain_rollout
system_prompt: ""
task_template: |-
{task}
task_prompt: ""
ensure_boxed_answers: false
domain_mix: null
domain_rollouts:
math: ${domain_rollouts.math}
guessing: ${domain_rollouts.guessing}
counting: ${domain_rollouts.counting}
chartqa: ${domain_rollouts.chartqa}
miniwob: ${domain_rollouts.miniwob}
coding: ${domain_rollouts.coding}
fn_calling: ${domain_rollouts.fn_calling}
coding_time_limit_s: 15.0
coding_per_test_timeout_s: 10.0
coding_memory_limit_bytes: 1073741824
coding_compile_timeout_s: 10.0
coding_sandbox_url: ${oc.env:CODING_SANDBOX_URL, "http://sandbox:8080/run_code"}

dataset_loader: pipelinerl.domains.multidomain.loader.load_datasets
dataset_loader_params:
per_domain_params:
coding:
dataset_id: ServiceNow-AI/mixed-training-text-datasets
dataset_config: 80k-if-math-coding-fncalling-stem
split_ratios:
train: 0.9
validation: 0.05
test: 0.05
allowed_call_types:
- assert
- std
max_examples_per_split: 2048
trust_remote_code: true
huggingface_token: ${oc.env:CODING_HF_TOKEN, null}
fn_calling:
dataset_id: ServiceNow-AI/mixed-training-text-datasets
dataset_config: 80k-if-math-coding-fncalling-stem
split_ratios:
train: 0.9
validation: 0.05
test: 0.05
allowed_call_types: []
max_examples_per_split: 2048
trust_remote_code: true
huggingface_token: ${oc.env:CODING_HF_TOKEN, null}

environments:
- key: math
mode: remote
replicas_per_actor: ${world.env_replicas_per_actor}
_target_: pipelinerl.domains.math.MathEnvironment
- key: coding
mode: remote
replicas_per_actor: ${world.env_replicas_per_actor}
_target_: pipelinerl.domains.coding.CodingSandboxEnvironment
sandbox_url: ${actor.coding_sandbox_url}
compile_timeout_s: ${actor.coding_compile_timeout_s}
run_timeout_s: ${actor.coding_per_test_timeout_s}
request_timeout_s: ${actor.coding_time_limit_s}
memory_limit_bytes: ${actor.coding_memory_limit_bytes}
- key: fn_calling
mode: remote
replicas_per_actor: ${world.env_replicas_per_actor}
_target_: pipelinerl.domains.fn_calling.AgenticToolsEnvironment
max_workers: 4

environment_key: null

world:
env_replicas_per_actor: 1
10 changes: 10 additions & 0 deletions conf/multi_domain/main_mix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- base
- domain_mix: main_mix
- _self_

actor:
domain_rollouts:
math: ${domain_rollouts.math}
coding: ${domain_rollouts.coding}
fn_calling: ${domain_rollouts.fn_calling}
2 changes: 1 addition & 1 deletion conf/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ defaults:
finetune:
seq_length: 4000
gradient_accumulation_passes: 6
max_train_steps: 1
max_train_steps: 100
train_batch_size: 4
attempts: 4
llm:
Expand Down
Loading