Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[submodule "examples/swe-agent/nemo-gym"]
path = examples/swe-agent/nemo-gym
url = https://github.com/yueming-yuan/Gym
branch = slime-swe-agent
[submodule "examples/swe-agent/mini-swe-agent"]
path = examples/swe-agent/mini-swe-agent
url = https://github.com/yueming-yuan/nv-mini-swe-agent
branch = slime-swe-agent
12 changes: 8 additions & 4 deletions examples/formal_math/single_round/run_minimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,14 @@
)

wandb_args = (
"--use-wandb "
"--wandb-project slime-formal-math-run-minimal "
"--wandb-group demo "
"--wandb-key ${WANDB_API_KEY} "
(
"--use-wandb "
"--wandb-project slime-formal-math-run-minimal "
"--wandb-group demo "
f"--wandb-key '{wandb_api_key}' "
)
if (wandb_api_key := os.environ.get("WANDB_API_KEY"))
else ""
)

train_args = (
Expand Down
12 changes: 8 additions & 4 deletions examples/geo3k_vlm_multi_turn/run_geo3k_vlm_multi_turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ def execute():
ckpt_args = f"--hf-checkpoint /root/models/{MODEL_NAME} "

wandb_args = (
"--use-wandb "
"--wandb-project slime-dev "
"--wandb-group geo3k_vlm_multi_turn "
"--wandb-key ${WANDB_API_KEY} "
(
"--use-wandb "
"--wandb-project slime-dev "
"--wandb-group geo3k_vlm_multi_turn "
f"--wandb-key '{wandb_api_key}' "
)
if (wandb_api_key := os.environ.get("WANDB_API_KEY"))
else ""
)

rollout_args = (
Expand Down
130 changes: 130 additions & 0 deletions examples/swe-agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
### Introduction

This is an example for SWE-agent training. This example uses NVIDIA's Nemo-Gym as the Gym environment implement, SWE-Gym as the training data, and SWE-bench as the evaluation.

This implementation of this example is partially in submodules below:
- Nemo-Gym: https://github.com/yueming-yuan/Gym/tree/slime-swe-agent
- mini-swe-agent: https://github.com/yueming-yuan/nv-mini-swe-agent/tree/slime-swe-agent


### Prepare environment
#### Update submodules
```bash
git submodule update --init --recursive .
```
#### Docker settings
```bash
# 1. create a docker network
docker network create swe-net

# 2. create environment docker
docker run -itd \
--name swe_env \
--shm-size 16g \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /mnt/data:/data \
-v /home/sglang-rl/<your_name>:/workspace \
--ipc=host \
--ulimit nofile=65536:65536 \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
--network swe-net \
ubuntu:latest \
/bin/bash

# 3. create slime docker
docker run -itd \
--shm-size 32g \
--gpus all \
-v /mnt/data/cache/huggingface:/root/.cache/huggingface \
-v /mnt/data:/data \
-v /home/sglang-rl/<your_name>:/workspace \
--ipc=host \
--ulimit nofile=65536:65536 \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
--privileged \
--network swe-net \
--name slime_<your_name> \
slimerl/slime:latest \
/bin/zsh

# 4. install utils in environment docker
docker exec -it swe_env /bin/bash
apt update && apt install -y zsh curl git python3 python3-pip docker.io
```
note: `-v /var/run/docker.sock:/var/run/docker.sock` is required for Docker-in-Docker SWE environment execution; use `--network swe-net` to enable communication between training & environment.

#### Installation

In **environment docker**, install Gym
```bash
git clone https://github.com/yueming-yuan/Gym
cd Gym

curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env
uv venv --python 3.12 && source .venv/bin/activate
uv sync --extra dev --group docs

# configure env.yaml
echo "policy_base_url: https://api.openai.com/v1
policy_api_key: your-openai-api-key
policy_model_name: gpt-4.1-2025-04-14
default_host: 0.0.0.0" > env.yaml
```
note: set host IP to `0.0.0.0` to enable communications between dockers.

then set up for SWE-agent server:
```bash
cd responses_api_agents/mini_swe_agent
uv pip install -r requirements.txt
```
Now you should be able to run the SWE-agent server.

For **slime docker** setup, please follow the standard setup process.

### Preparing data
In **slime docker**, download **SWE-Gym** data from huggingface and convert it to Slime' prompt data format with this script.
```
cd slime/examples/swe-agent
python download_and_process_data.py --input SWE-Gym/SWE-Gym --output /root/swe_train.jsonl
```

### Running train
1. In environment docker, launch the agent server
```bash
cd Gym
source .venv/bin/activate
cd responses_api_agents/mini_swe_agent
./start_server.sh
```


2. In slime docker,
(1) export `SWE_AGENT_GYM_URL` to be the port of the second server you started in Gym in environment docker, whose `server_type` is `responses_api_agents`. `swe_env` is the environment docker's name; replace it if you changed the name.
(minor TODO: modify the port selections to avoid setting this every time.) (2) launch the training.
```bash
export SWE_AGENT_GYM_URL="http://swe_env:<port_of_responses_api_agents>"
bash examples/swe-agent/run-qwen3-4b-instruct.sh
```


### Troubleshooting
1. The first time of every SWE environment can be slow, and may need to wait before generation, because each SWE-Gym task has a specific docker, and `docker pull` takes time.
2. Sometimes the environment may also be slow at evaluation. The timeout of evaluation is 10 minutes by default. If the server is stuck at `[EVAL]<instance> Running eval`, you may need to wait for it.

## Metrics
```
agent/turns_mean, agent/turns_sum - Turn counts
agent/tool_calls_mean, agent/tool_calls_sum - Tool call counts
agent/total_time_mean/max/min - Total time statistics
agent/model_query_time_sum_mean - Avg total model time per rollout
agent/env_execution_time_sum_mean - Avg total env time per rollout
agent/eval_time_mean - Avg evaluation time
agent/overhead_time_mean - Avg overhead time
agent/time_per_turn - Avg time per turn
agent/model_query_time_avg - Avg model query time per turn
agent/env_execution_time_avg - Avg env execution time per turn
agent/model_time_ratio, agent/env_time_ratio - Time ratios
```
85 changes: 85 additions & 0 deletions examples/swe-agent/download_and_process_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""Download and process data to Slime format."""

import argparse
import json
import tempfile
from pathlib import Path
from datasets import load_dataset


def convert_to_slime_format(input_path: str, output_path: str, limit: int = None, split: str = "train"):
"""Convert JSONL to Slime format.

Args:
input_path: Path to input JSONL file
output_path: Path to output JSONL file in Slime format
limit: Optional limit on number of samples
split: Dataset split name (used in metadata)
"""
count = 0
with open(input_path) as fin, open(output_path, "w") as fout:
for line in fin:
if limit and count >= limit:
break

instance = json.loads(line)

# Add subset and split to metadata for Gym API
metadata = dict(instance)
metadata["subset"] = "gym"
metadata["split"] = split

slime_sample = {
"prompt": instance.get("problem_statement", ""),
"metadata": metadata,
}

fout.write(json.dumps(slime_sample) + "\n")
count += 1

print(f"Converted {count} samples: {input_path} -> {output_path}")


def main():
parser = argparse.ArgumentParser(description="Download HuggingFace dataset and convert to Slime format")
parser.add_argument("--input", type=str, required=True, help="HuggingFace dataset path or local JSONL file")
parser.add_argument("--output", type=str, required=True, help="Output JSONL file path")
parser.add_argument(
"--split", type=str, default="train", help="Dataset split (default: train, only for HF datasets)"
)
parser.add_argument("--limit", type=int, help="Limit number of samples")

args = parser.parse_args()

input_path = Path(args.input)

if input_path.exists() and input_path.suffix == ".jsonl":
print(f"Processing local file: {args.input}")
convert_to_slime_format(args.input, args.output, args.limit, args.split)
else:
print(f"Loading HuggingFace dataset: {args.input} (split={args.split})")
ds = load_dataset(args.input, split=args.split)

if args.limit:
ds = ds.select(range(min(args.limit, len(ds))))

tmp_path = None
try:
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as tmp:
tmp_path = tmp.name

print(f"Downloading to temporary file: {tmp_path}")
ds.to_json(tmp_path)

print(f"Converting to Slime format: {args.output}")
convert_to_slime_format(tmp_path, args.output, split=args.split)
finally:
if tmp_path and Path(tmp_path).exists():
Path(tmp_path).unlink()

print("Done.")


if __name__ == "__main__":
main()
Loading
Loading