OpenManus · Kunlun-Zhu · May 11, 2025 · May 2, 2025 · May 7, 2025 · May 8, 2025
diff --git a/data/webshop/webshop.py b/data/webshop/webshop.py
@@ -0,0 +1,18 @@
+import json
+from datasets import load_dataset
+
+# Load the full AgentEval dataset
+ds = load_dataset("AgentGym/AgentEval", split="test")
+
+# Filter only the entries with item_id starting with "webshop_"
+webshop_ds = ds.filter(lambda x: x["item_id"].startswith("webshop_"))
+
+# Preview the result
+print(webshop_ds)
+
+output_file = "webshop_inference.json"
+
+data = [{"item_id": x["item_id"], "conversations": []} for x in webshop_ds]
+
+with open(output_file, "w") as f:
+    json.dump(data, f, indent=2)
diff --git a/openmanus_rl/agentgym/agentenv/examples/basic/base_eval_webshop.sh b/openmanus_rl/agentgym/agentenv/examples/basic/base_eval_webshop.sh
@@ -0,0 +1,19 @@
+# Evaluation args
+model_path="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
+inference_file="/home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json"
+output_file="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output/qwen2.5-3b-webshop.log"
+task_name="webshop"
+seed="42"
+
+# environment parameters
+max_round="6"
+env_server_base="http://127.0.0.1:36001"
+
+python -u base_eval_template.py \
+        --model_path "${model_path}" \
+        --inference_file "${inference_file}" \
+        --output_file "${output_file}" \
+        --task_name "${task_name}" \
+        --seed "${seed}" \
+        --max_round "${max_round}" \
+        --env_server_base "${env_server_base}"
diff --git a/openmanus_rl/agentgym/agentenv/examples/distributed_eval_scripts/distributed_eval_webshop.sh b/openmanus_rl/agentgym/agentenv/examples/distributed_eval_scripts/distributed_eval_webshop.sh
@@ -0,0 +1,46 @@
+exp_name="eval_webshop"
+inference_file='/home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json' # Path to the trainset file which contains idxs for the task.
+
+num_processes='8'
+main_process_port='8877'
+weight_decay="0"
+
+### Default variables
+task_name="webshop" # change this to evaluate on a different task
+output_dir="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output"
+
+# agent model
+#model_path="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
+model_path="/data1/models/Qwen/Qwen2.5-3B"
+eval_batch_size="1"
+num_workers="8"
+seed="42"
+do_sample="False"
+temperature="1.0"
+
+max_round="6"
+env_server_base="http://127.0.0.1:36001" # Set this to the base url of the EnvServer.
+timeout="2400"
+
+
+#########
+mkdir -p "${output_dir}"
+export PYTHONPATH=/home/user/muxin/OpenManus-RL/openmanus_rl/agentgym/agentenv:$PYTHONPATH # You need to modify this as your agentgym/agentenv absolute path
+
+accelerate launch \
+        --num_processes=${num_processes} \
+        --main_process_port=${main_process_port} \
+    ../../utils/distributed_eval_task.py \
+        --model_path "${model_path}" \
+        --output_file "${output_dir}/inference.jsonl" \
+        --inference_file "${inference_file}" \
+        --task_name "${task_name}" \
+        --eval_batch_size "${eval_batch_size}" \
+        --num_workers "${num_workers}" \
+        --seed "${seed}" \
+        --do_sample "${do_sample}" \
+        --temperature "${temperature}" \
+        --max_round "${max_round}" \
+        --env_server_base "${env_server_base}" \
+        --data_len 200 \
+        --timeout "${timeout}"
diff --git a/requirements.txt b/requirements.txt
@@ -14,4 +14,4 @@ vllm<=0.6.3
 wandb
 IPython
 matplotlib
-omegaconf
+omegaconf
diff --git a/scripts/offline_rollout.sh b/scripts/offline_rollout.sh
@@ -0,0 +1,96 @@
+CONFIG_FILE="" # fulfill the config yaml file here
+MODEL_PATH=""
+OUTPUT_DIR=""
+TASK_NAMES=""
+DATA_LEN=200
+TIMEOUT=2400
+DO_SAMPLE="False"
+TEMPERATURE=1.0
+SEED=42
+DEBUG=false
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --config)
+      CONFIG_FILE="$2"
+      shift 2
+      ;;
+    --model_path)
+      MODEL_PATH="$2"
+      shift 2
+      ;;
+    --output_dir)
+      OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    --task_names)
+      TASK_NAMES="$2"
+      shift 2
+      ;;
+    --data_len)
+      DATA_LEN="$2"
+      shift 2
+      ;;
+    --timeout)
+      TIMEOUT="$2"
+      shift 2
+      ;;
+    --do_sample)
+      DO_SAMPLE="$2"
+      shift 2
+      ;;
+    --temperature)
+      TEMPERATURE="$2"
+      shift 2
+      ;;
+    --seed)
+      SEED="$2"
+      shift 2
+      ;;
+    --debug)
+      DEBUG=true
+      shift
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+# Build command
+CMD="python traj_generation/rollout_eval.py --config $CONFIG_FILE"
+
+if [ ! -z "$MODEL_PATH" ]; then
+  CMD="$CMD --model_path $MODEL_PATH"
+fi
+
+if [ ! -z "$OUTPUT_DIR" ]; then
+  CMD="$CMD --output_dir $OUTPUT_DIR"
+fi
+
+if [ ! -z "$TASK_NAMES" ]; then
+  CMD="$CMD --task_names $TASK_NAMES"
+fi
+
+CMD="$CMD --data_len $DATA_LEN --timeout $TIMEOUT --do_sample $DO_SAMPLE --temperature $TEMPERATURE --seed $SEED"
+
+if [ "$DEBUG" = true ]; then
+  CMD="$CMD --debug"
+fi
+
+# Create log directory
+TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
+LOG_DIR="./logs"
+mkdir -p $LOG_DIR
+LOG_FILE="$LOG_DIR/offline_rollout_$TIMESTAMP.log"
+
+# Print the command
+echo "Running: $CMD"
+echo "Logging to: $LOG_FILE"
+
+# Execute with logging
+eval "$CMD | tee $LOG_FILE"
+
+echo "Evaluation complete! Results saved to the output directory."
diff --git a/scripts/run_sft.sh b/scripts/run_sft.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -x
+
+if [ "$#" -lt 2 ]; then
+    echo "Usage: run_sft.sh <nproc_per_node> <save_path> [other_configs...]"
+    exit 1
+fi
+
+nproc_per_node=$1
+save_path=$2
+
+# Shift the arguments so $@ refers to the rest
+shift 2
+
+torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
+     -m verl.trainer.fsdp_sft_trainer \
+    data.train_files=OpenManus-RL/data/train_split.parquet \
+    data.val_files=OpenManus-RL/data/test_split.parquet \
+    data.multiturn.enable=true \
+    data.multiturn.messages_key=prompt \
+    data.micro_batch_size=4 \
+    model.partial_pretrain=/data1/models/Qwen/Qwen3-4B \
+    trainer.default_local_dir=$save_path \
+    trainer.project_name=multiturn-sft \
+    trainer.experiment_name=multiturn-sft-qwen-3-4b \
+    trainer.logger=['console'] \
+    trainer.total_training_steps=1 \
+    trainer.default_hdfs_dir=null $@ \
+    ulysses_sequence_parallel_size=2 \
+    use_remove_padding=true