Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions data/webshop/webshop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json
from datasets import load_dataset

# Load the full AgentEval dataset
ds = load_dataset("AgentGym/AgentEval", split="test")

# Filter only the entries with item_id starting with "webshop_"
webshop_ds = ds.filter(lambda x: x["item_id"].startswith("webshop_"))

# Preview the result
print(webshop_ds)

output_file = "webshop_inference.json"

data = [{"item_id": x["item_id"], "conversations": []} for x in webshop_ds]

with open(output_file, "w") as f:
json.dump(data, f, indent=2)
19 changes: 19 additions & 0 deletions openmanus_rl/agentgym/agentenv/examples/basic/base_eval_webshop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Evaluation args
model_path="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
inference_file="/home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json"
output_file="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output/qwen2.5-3b-webshop.log"
task_name="webshop"
seed="42"

# environment parameters
max_round="6"
env_server_base="http://127.0.0.1:36001"

python -u base_eval_template.py \
--model_path "${model_path}" \
--inference_file "${inference_file}" \
--output_file "${output_file}" \
--task_name "${task_name}" \
--seed "${seed}" \
--max_round "${max_round}" \
--env_server_base "${env_server_base}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
exp_name="eval_webshop"
inference_file='/home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json' # Path to the trainset file which contains idxs for the task.

num_processes='8'
main_process_port='8877'
weight_decay="0"

### Default variables
task_name="webshop" # change this to evaluate on a different task
output_dir="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output"

# agent model
#model_path="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
model_path="/data1/models/Qwen/Qwen2.5-3B"
eval_batch_size="1"
num_workers="8"
seed="42"
do_sample="False"
temperature="1.0"

max_round="6"
env_server_base="http://127.0.0.1:36001" # Set this to the base url of the EnvServer.
timeout="2400"


#########
mkdir -p "${output_dir}"
export PYTHONPATH=/home/user/muxin/OpenManus-RL/openmanus_rl/agentgym/agentenv:$PYTHONPATH # You need to modify this as your agentgym/agentenv absolute path

accelerate launch \
--num_processes=${num_processes} \
--main_process_port=${main_process_port} \
../../utils/distributed_eval_task.py \
--model_path "${model_path}" \
--output_file "${output_dir}/inference.jsonl" \
--inference_file "${inference_file}" \
--task_name "${task_name}" \
--eval_batch_size "${eval_batch_size}" \
--num_workers "${num_workers}" \
--seed "${seed}" \
--do_sample "${do_sample}" \
--temperature "${temperature}" \
--max_round "${max_round}" \
--env_server_base "${env_server_base}" \
--data_len 200 \
--timeout "${timeout}"
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ vllm<=0.6.3
wandb
IPython
matplotlib
omegaconf
omegaconf
96 changes: 96 additions & 0 deletions scripts/offline_rollout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
CONFIG_FILE="" # fulfill the config yaml file here
MODEL_PATH=""
OUTPUT_DIR=""
TASK_NAMES=""
DATA_LEN=200
TIMEOUT=2400
DO_SAMPLE="False"
TEMPERATURE=1.0
SEED=42
DEBUG=false

# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--config)
CONFIG_FILE="$2"
shift 2
;;
--model_path)
MODEL_PATH="$2"
shift 2
;;
--output_dir)
OUTPUT_DIR="$2"
shift 2
;;
--task_names)
TASK_NAMES="$2"
shift 2
;;
--data_len)
DATA_LEN="$2"
shift 2
;;
--timeout)
TIMEOUT="$2"
shift 2
;;
--do_sample)
DO_SAMPLE="$2"
shift 2
;;
--temperature)
TEMPERATURE="$2"
shift 2
;;
--seed)
SEED="$2"
shift 2
;;
--debug)
DEBUG=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done

# Build command
CMD="python traj_generation/rollout_eval.py --config $CONFIG_FILE"

if [ ! -z "$MODEL_PATH" ]; then
CMD="$CMD --model_path $MODEL_PATH"
fi

if [ ! -z "$OUTPUT_DIR" ]; then
CMD="$CMD --output_dir $OUTPUT_DIR"
fi

if [ ! -z "$TASK_NAMES" ]; then
CMD="$CMD --task_names $TASK_NAMES"
fi

CMD="$CMD --data_len $DATA_LEN --timeout $TIMEOUT --do_sample $DO_SAMPLE --temperature $TEMPERATURE --seed $SEED"

if [ "$DEBUG" = true ]; then
CMD="$CMD --debug"
fi

# Create log directory
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_DIR="./logs"
mkdir -p $LOG_DIR
LOG_FILE="$LOG_DIR/offline_rollout_$TIMESTAMP.log"

# Print the command
echo "Running: $CMD"
echo "Logging to: $LOG_FILE"

# Execute with logging
eval "$CMD | tee $LOG_FILE"

echo "Evaluation complete! Results saved to the output directory."
30 changes: 30 additions & 0 deletions scripts/run_sft.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
set -x

if [ "$#" -lt 2 ]; then
echo "Usage: run_sft.sh <nproc_per_node> <save_path> [other_configs...]"
exit 1
fi

nproc_per_node=$1
save_path=$2

# Shift the arguments so $@ refers to the rest
shift 2

torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
-m verl.trainer.fsdp_sft_trainer \
data.train_files=OpenManus-RL/data/train_split.parquet \
data.val_files=OpenManus-RL/data/test_split.parquet \
data.multiturn.enable=true \
data.multiturn.messages_key=prompt \
data.micro_batch_size=4 \
model.partial_pretrain=/data1/models/Qwen/Qwen3-4B \
trainer.default_local_dir=$save_path \
trainer.project_name=multiturn-sft \
trainer.experiment_name=multiturn-sft-qwen-3-4b \
trainer.logger=['console'] \
trainer.total_training_steps=1 \
trainer.default_hdfs_dir=null $@ \
ulysses_sequence_parallel_size=2 \
use_remove_padding=true
Loading
Loading