Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[submodule "examples/swe-agent/nemo-gym"]
path = examples/swe-agent/nemo-gym
url = https://github.com/yueming-yuan/Gym
branch = slime-swe-agent
[submodule "examples/swe-agent/mini-swe-agent"]
path = examples/swe-agent/mini-swe-agent
url = https://github.com/yueming-yuan/nv-mini-swe-agent
branch = slime-swe-agent
12 changes: 8 additions & 4 deletions examples/formal_math/single_round/run_minimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@
)

wandb_args = (
"--use-wandb "
"--wandb-project slime-formal-math-run-minimal "
"--wandb-group demo "
"--wandb-key ${WANDB_API_KEY} "
(
"--use-wandb "
"--wandb-project slime-formal-math-run-minimal "
"--wandb-group demo "
f"--wandb-key '{wandb_api_key}' "
)
if (wandb_api_key := os.environ.get("WANDB_API_KEY"))
else ""
)

train_args = (
Expand Down
12 changes: 8 additions & 4 deletions examples/geo3k_vlm_multi_turn/run_geo3k_vlm_multi_turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ def execute():
ckpt_args = f"--hf-checkpoint /root/models/{MODEL_NAME} "

wandb_args = (
"--use-wandb "
"--wandb-project slime-dev "
"--wandb-group geo3k_vlm_multi_turn "
"--wandb-key ${WANDB_API_KEY} "
(
"--use-wandb "
"--wandb-project slime-dev "
"--wandb-group geo3k_vlm_multi_turn "
f"--wandb-key '{wandb_api_key}' "
)
if (wandb_api_key := os.environ.get("WANDB_API_KEY"))
else ""
)

rollout_args = (
Expand Down
130 changes: 130 additions & 0 deletions examples/swe-agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
### Introduction

This is an example for SWE-agent training. This example uses NVIDIA's Nemo-Gym as the Gym environment implement, SWE-Gym as the training data, and SWE-bench as the evaluation.

This implementation of this example is partially in submodules below:
- Nemo-Gym: https://github.com/yueming-yuan/Gym/tree/slime-swe-agent
- mini-swe-agent: https://github.com/yueming-yuan/nv-mini-swe-agent/tree/slime-swe-agent


### Prepare environment
#### Update submodules
```bash
git submodule update --init --recursive .
```
#### Docker settings
```bash
# 1. create a docker network
docker network create swe-net

# 2. create environment docker
docker run -itd \
--name swe_env \
--shm-size 16g \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /mnt/data:/data \
-v /home/sglang-rl/<your_name>:/workspace \
--ipc=host \
--ulimit nofile=65536:65536 \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
--network swe-net \
ubuntu:latest \
/bin/bash

# 3. create slime docker
docker run -itd \
--shm-size 32g \
--gpus all \
-v /mnt/data/cache/huggingface:/root/.cache/huggingface \
-v /mnt/data:/data \
-v /home/sglang-rl/<your_name>:/workspace \
--ipc=host \
--ulimit nofile=65536:65536 \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
--privileged \
--network swe-net \
--name slime_<your_name> \
slimerl/slime:latest \
/bin/zsh

# 4. install utils in environment docker
docker exec -it swe_env /bin/bash
apt update && apt install -y zsh curl git python3 python3-pip docker.io
```
note: `-v /var/run/docker.sock:/var/run/docker.sock` is required for Docker-in-Docker SWE environment execution; use `--network swe-net` to enable communication between training & environment.

#### Installation

In **environment docker**, install Gym
```bash
git clone https://github.com/yueming-yuan/Gym
cd Gym

curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env
uv venv --python 3.12 && source .venv/bin/activate
uv sync --extra dev --group docs

# configure env.yaml
echo "policy_base_url: https://api.openai.com/v1
policy_api_key: your-openai-api-key
policy_model_name: gpt-4.1-2025-04-14
default_host: 0.0.0.0" > env.yaml
```
note: set host IP to `0.0.0.0` to enable communications between dockers.

then set up for SWE-agent server:
```bash
cd responses_api_agents/mini_swe_agent
uv pip install -r requirements.txt
```
Now you should be able to run the SWE-agent server.

For **slime docker** setup, please follow the standard setup process.

### Preparing data
In **slime docker**, download **SWE-Gym** data from huggingface and convert it to Slime' prompt data format with this script.
```
cd slime/examples/swe-agent
python download_and_process_data.py --input SWE-Gym/SWE-Gym --output /root/swe_train.jsonl
```

### Running train
1. In environment docker, launch the agent server
```bash
cd Gym
source .venv/bin/activate
cd responses_api_agents/mini_swe_agent
./start_server.sh
```


2. In slime docker,
(1) export `SWE_AGENT_GYM_URL` to be the port of the second server you started in Gym in environment docker, whose `server_type` is `responses_api_agents`. `swe_env` is the environment docker's name; replace it if you changed the name.
(minor TODO: modify the port selections to avoid setting this every time.) (2) launch the training.
```bash
export SWE_AGENT_GYM_URL="http://swe_env:<port_of_responses_api_agents>"
bash examples/swe-agent/run-qwen3-4b-instruct.sh
```


### Troubleshooting
1. The first time of every SWE environment can be slow, and may need to wait before generation, because each SWE-Gym task has a specific docker, and `docker pull` takes time.
2. Sometimes the environment may also be slow at evaluation. The timeout of evaluation is 10 minutes by default. If the server is stuck at `[EVAL]<instance> Running eval`, you may need to wait for it.

## Metrics
```
agent/turns_mean, agent/turns_sum - Turn counts
agent/tool_calls_mean, agent/tool_calls_sum - Tool call counts
agent/total_time_mean/max/min - Total time statistics
agent/model_query_time_sum_mean - Avg total model time per rollout
agent/env_execution_time_sum_mean - Avg total env time per rollout
agent/eval_time_mean - Avg evaluation time
agent/overhead_time_mean - Avg overhead time
agent/time_per_turn - Avg time per turn
agent/model_query_time_avg - Avg model query time per turn
agent/env_execution_time_avg - Avg env execution time per turn
agent/model_time_ratio, agent/env_time_ratio - Time ratios
```
85 changes: 85 additions & 0 deletions examples/swe-agent/download_and_process_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""Download and process data to Slime format."""

import argparse
import json
import tempfile
from pathlib import Path
from datasets import load_dataset


def convert_to_slime_format(input_path: str, output_path: str, limit: int = None, split: str = "train"):
"""Convert JSONL to Slime format.

Args:
input_path: Path to input JSONL file
output_path: Path to output JSONL file in Slime format
limit: Optional limit on number of samples
split: Dataset split name (used in metadata)
"""
count = 0
with open(input_path) as fin, open(output_path, "w") as fout:
for line in fin:
if limit and count >= limit:
break

instance = json.loads(line)

# Add subset and split to metadata for Gym API
metadata = dict(instance)
metadata["subset"] = "gym"
metadata["split"] = split

slime_sample = {
"prompt": instance.get("problem_statement", ""),
"metadata": metadata,
}

fout.write(json.dumps(slime_sample) + "\n")
count += 1

print(f"Converted {count} samples: {input_path} -> {output_path}")


def main():
parser = argparse.ArgumentParser(description="Download HuggingFace dataset and convert to Slime format")
parser.add_argument("--input", type=str, required=True, help="HuggingFace dataset path or local JSONL file")
parser.add_argument("--output", type=str, required=True, help="Output JSONL file path")
parser.add_argument(
"--split", type=str, default="train", help="Dataset split (default: train, only for HF datasets)"
)
parser.add_argument("--limit", type=int, help="Limit number of samples")

args = parser.parse_args()

input_path = Path(args.input)

if input_path.exists() and input_path.suffix == ".jsonl":
print(f"Processing local file: {args.input}")
convert_to_slime_format(args.input, args.output, args.limit, args.split)
else:
print(f"Loading HuggingFace dataset: {args.input} (split={args.split})")
ds = load_dataset(args.input, split=args.split)

if args.limit:
ds = ds.select(range(min(args.limit, len(ds))))

tmp_path = None
try:
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as tmp:
tmp_path = tmp.name

print(f"Downloading to temporary file: {tmp_path}")
ds.to_json(tmp_path)

print(f"Converting to Slime format: {args.output}")
convert_to_slime_format(tmp_path, args.output, split=args.split)
finally:
if tmp_path and Path(tmp_path).exists():
Path(tmp_path).unlink()

print("Done.")


if __name__ == "__main__":
main()
Loading