OpenManus · Kunlun-Zhu · May 6, 2025 · May 6, 2025
diff --git a/data/babyai/test.parquet b/data/babyai/test.parquet
diff --git a/data/babyai/train.parquet b/data/babyai/train.parquet
diff --git a/data/babyai/val.parquet b/data/babyai/val.parquet
diff --git a/data/generate_train_agentgym_all.py b/data/generate_train_agentgym_all.py
@@ -3,14 +3,31 @@
 import os
 import pandas as pd
 from collections import defaultdict
+import numpy as np
 
-# Define environments to extract
+# Define environments to extract with correct naming
 ENVIRONMENTS = [
-    "alworld", "babyai", "lmrlgym_maze", "lmrlgum_wordle", 
+    "alfworld", "babyai", "maze", "wordle", 
     "sciworld", "sqlgym", "textcraft", "movie", 
-    "todo", "weather", "webshop"
+    "todo", "weather", "webshop", "webarena"
 ]
 
+# Environment ID mapping (map item_id prefixes to standard environment names)
+ENV_ID_MAPPING = {
+    "alfworld": ["alfworld", "alworld"],  # Fix potential typo in original code
+    "babyai": ["babyai"],
+    "maze": ["lmrlgym_maze", "maze"],
+    "wordle": ["lmrlgum_wordle", "wordle"],
+    "sciworld": ["sciworld"],
+    "sqlgym": ["sqlgym"],
+    "textcraft": ["textcraft"],
+    "movie": ["movie"],
+    "todo": ["todo"],
+    "weather": ["weather"],
+    "webshop": ["webshop"],
+    "webarena": ["webarena"]
+}
+
 def make_prefix(question, environment):
     """
     Create instruction prefix for the OpenManus agent.
@@ -105,74 +122,148 @@ def process_group_data(group_name, group_samples):
 
     return processed_data
 
-def group_samples_by_environment(data, environments):
+def group_samples_by_environment(data, env_mapping):
     """
     Group data samples by their environment based on item_id.
 
     Args:
         data: Dataset samples
-        environments: List of environment names to look for
+        env_mapping: Dictionary mapping environment names to potential ID prefixes
 
     Returns:
         Dictionary with environment names as keys and sample lists as values
     """
     env_groups = defaultdict(list)
-    prefix_pattern = re.compile(r'^([^\d]+)')  # Regex to extract prefix before numbers
 
     for sample in data:
         item_id = sample['item_id']
 
-        # Extract item_id prefix until digits start
-        match = prefix_pattern.match(item_id)
-        if match:
-            item_id_prefix = match.group(1)
-        else:
-            item_id_prefix = item_id
+        # Check which environment this sample belongs to
+        matched = False
+        for env_name, prefixes in env_mapping.items():
+            for prefix in prefixes:
+                if prefix in item_id:
+                    env_groups[env_name].append(sample)
+                    matched = True
+                    break
+            if matched:
+                break
 
-        # Check if item_id contains any of the specified environments
-        for env in environments:
-            if env in item_id:
-                env_groups[env].append(sample)
-                break  # If matched to one environment, don't check others
+        # If not matched to any known environment, use a fallback
+        if not matched:
+            print(f"Warning: Could not match sample with item_id '{item_id}' to any environment")
 
     return env_groups
 
-def save_environment_data(env_groups, output_dir, txt_output_dir):
+def split_data(samples, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1, random_seed=42):
     """
-    Save grouped data to parquet and txt files.
+    Split data into train, validation, and test sets.
 
     Args:
-        env_groups: Dictionary of environment groups
-        output_dir: Directory to save parquet files
-        txt_output_dir: Directory to save txt files
+        samples: List of data samples
+        train_ratio: Ratio of training samples (default 0.8)
+        val_ratio: Ratio of validation samples (default 0.1)
+        test_ratio: Ratio of test samples (default 0.1)
+        random_seed: Random seed for reproducibility
+
+    Returns:
+        Dictionary with 'train', 'validation', 'test' keys containing corresponding samples
     """
-    # Create output directories
-    os.makedirs(output_dir, exist_ok=True)
-    os.makedirs(txt_output_dir, exist_ok=True)
+    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-10, "Ratios must sum to 1"
 
-    # Save each environment group as parquet and txt
-    for env, samples in env_groups.items():
-        print(f"Processing group: {env}")
-
-        # Process samples for this environment
-        processed_samples = process_group_data(env, samples)
+    # Set random seed for reproducibility
+    np.random.seed(random_seed)
+
+    # Shuffle indices
+    indices = np.random.permutation(len(samples))
+
+    # Calculate split sizes
+    n_train = int(len(samples) * train_ratio)
+    n_val = int(len(samples) * val_ratio)
+
+    # Split indices
+    train_indices = indices[:n_train]
+    val_indices = indices[n_train:n_train + n_val]
+    test_indices = indices[n_train + n_val:]
+
+    # Create splits
+    splits = {
+        'train': [samples[i] for i in train_indices],
+        'validation': [samples[i] for i in val_indices],
+        'test': [samples[i] for i in test_indices],
+    }
+
+    return splits
+
+def save_environment_data(env_groups, output_base_dir):
+    """
+    Save environment data to separate directories with train/test/validation splits.
+
+    Args:
+        env_groups: Dictionary with environment name as key and samples as value
+        output_base_dir: Base directory where environment subdirectories will be created
+    """
+    # Ensure base output directory exists
+    os.makedirs(output_base_dir, exist_ok=True)
+
+    # Process each environment group
+    for env_name, samples in env_groups.items():
+        if not samples:
+            print(f"Warning: No samples found for environment '{env_name}'. Skipping.")
+            continue
+
+        print(f"Processing environment: {env_name} with {len(samples)} samples")
 
-        # Convert processed data to DataFrame
-        df = pd.DataFrame(processed_samples)
+        # Create environment subdirectory
+        env_dir = os.path.join(output_base_dir, env_name)
+        os.makedirs(env_dir, exist_ok=True)
 
-        # Generate file paths
-        parquet_file_path = os.path.join(output_dir, f"{env}.parquet")
-        txt_file_path = os.path.join(txt_output_dir, f"{env}.txt")
+        # Process samples for this environment
+        processed_samples = process_group_data(env_name, samples)
 
-        # Save as Parquet file
-        df.to_parquet(parquet_file_path)
-        print(f"Saved data for environment '{env}' to {parquet_file_path}")
+        # Split data into train/validation/test sets
+        if len(processed_samples) < 3:
+            print(f"Warning: Only {len(processed_samples)} samples for {env_name}, using all for train")
+            splits = {
+                'train': processed_samples,
+                'validation': processed_samples[:1],  # Use first sample for both val and test
+                'test': processed_samples[:1]         # if there's only one or two samples
+            }
+        else:
+            # Adjust split ratios for very small datasets
+            if len(processed_samples) < 10:
+                # For small datasets, ensure at least 1 sample in each split
+                train_ratio = max(0.6, 1 - 2/len(processed_samples))
+                val_ratio = test_ratio = (1 - train_ratio) / 2
+                print(f"Adjusted split ratios for small dataset: train={train_ratio:.2f}, val={val_ratio:.2f}, test={test_ratio:.2f}")
+            else:
+                train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1
+
+            splits = split_data(
+                processed_samples, 
+                train_ratio=train_ratio,
+                val_ratio=val_ratio,
+                test_ratio=test_ratio
+            )
 
-        # Save as TXT file
-        with open(txt_file_path, 'w', encoding='utf-8') as txt_file:
-            for sample in processed_samples:
-                txt_file.write(str(sample) + '\n')
-        print(f"Saved data for environment '{env}' to {txt_file_path}")
+        # Save each split
+        for split_name, split_samples in splits.items():
+            if not split_samples:
+                print(f"Warning: No samples in {split_name} split for {env_name}")
+                continue
+
+            # Convert to DataFrame
+            df = pd.DataFrame(split_samples)
+
+            # Define output filename based on split
+            if split_name == 'validation':
+                output_file = os.path.join(env_dir, "val.parquet")
+            else:
+                output_file = os.path.join(env_dir, f"{split_name}.parquet")
+
+            # Save to parquet
+            df.to_parquet(output_file)
+            print(f"Saved {len(split_samples)} samples to {output_file}")
 
 def main():
     """
@@ -181,23 +272,19 @@ def main():
     # Load the dataset
     print("Loading dataset...")
     dataset = load_dataset("AgentGym/AgentTraj-L")
-    train_data = dataset['train']
+    data = dataset['train']
 
-    # Group samples by environment
+    # Group samples by environment using the ID mapping
     print("Grouping samples by environment...")
-    env_groups = group_samples_by_environment(train_data, ENVIRONMENTS)
+    env_groups = group_samples_by_environment(data, ENV_ID_MAPPING)
 
     # Print group statistics
     for env, samples in env_groups.items():
         print(f"Environment: {env}, Number of samples: {len(samples)}")
 
-    # Save grouped data
-    print("Saving environment data...")
-    save_environment_data(
-        env_groups, 
-        output_dir='output_env_groups',
-        txt_output_dir='output_txt_files'
-    )
+    # Save environment data to appropriate directories
+    print("Saving environment data with train/val/test splits...")
+    save_environment_data(env_groups, output_base_dir='./')
 
     print("Processing complete!")
 

diff --git a/data/maze/test.parquet b/data/maze/test.parquet
diff --git a/data/maze/train.parquet b/data/maze/train.parquet
diff --git a/data/maze/val.parquet b/data/maze/val.parquet
diff --git a/data/movie/test.parquet b/data/movie/test.parquet
diff --git a/data/movie/train.parquet b/data/movie/train.parquet
diff --git a/data/movie/val.parquet b/data/movie/val.parquet
diff --git a/data/sciworld/test.parquet b/data/sciworld/test.parquet
diff --git a/data/sciworld/train.parquet b/data/sciworld/train.parquet
diff --git a/data/sciworld/val.parquet b/data/sciworld/val.parquet
diff --git a/data/sqlgym/test.parquet b/data/sqlgym/test.parquet
diff --git a/data/sqlgym/train.parquet b/data/sqlgym/train.parquet
diff --git a/data/sqlgym/val.parquet b/data/sqlgym/val.parquet
diff --git a/data/textcraft/test.parquet b/data/textcraft/test.parquet
diff --git a/data/textcraft/train.parquet b/data/textcraft/train.parquet
diff --git a/data/textcraft/val.parquet b/data/textcraft/val.parquet
diff --git a/data/todo/test.parquet b/data/todo/test.parquet
diff --git a/data/todo/train.parquet b/data/todo/train.parquet
diff --git a/data/todo/val.parquet b/data/todo/val.parquet
diff --git a/data/weather/test.parquet b/data/weather/test.parquet
diff --git a/data/weather/train.parquet b/data/weather/train.parquet
diff --git a/data/weather/val.parquet b/data/weather/val.parquet
diff --git a/data/webshop/test.parquet b/data/webshop/test.parquet
diff --git a/data/webshop/train.parquet b/data/webshop/train.parquet
diff --git a/data/webshop/val.parquet b/data/webshop/val.parquet
diff --git a/data/webshop_old/test.parquet b/data/webshop_old/test.parquet
diff --git a/data/webshop_old/train.parquet b/data/webshop_old/train.parquet
diff --git a/data/webshop/validation.parquet → data/webshop_old/validation.parquet b/data/webshop/validation.parquet → data/webshop_old/validation.parquet
diff --git a/data/wordle/test.parquet b/data/wordle/test.parquet
diff --git a/data/wordle/train.parquet b/data/wordle/train.parquet
diff --git a/data/wordle/val.parquet b/data/wordle/val.parquet
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/PKG-INFO b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/PKG-INFO
@@ -0,0 +1,26 @@
+Metadata-Version: 2.1
+Name: agentenv_sciworld
+Version: 0.0.1
+Author-email: zsxmwjz <[email protected]>
+License: MIT
+Requires-Python: ~=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: fastapi
+Requires-Dist: uvicorn
+Requires-Dist: scienceworld
+
+# Agent Environments - SciWorld
+
+## Setup
+Before running: You will have to have Java 1.8+ installed on your system (shipped with most linux distributions).
+``` sh
+conda create --name agentenv-sciworld python=3.8
+conda activate agentenv-sciworld
+pip install -e .
+```
+
+## Launch
+
+``` sh
+sciworld --host 0.0.0.0 --port 36001
+```
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/SOURCES.txt b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/SOURCES.txt
@@ -0,0 +1,13 @@
+README.md
+pyproject.toml
+agentenv_sciworld/__init__.py
+agentenv_sciworld/environment.py
+agentenv_sciworld/launch.py
+agentenv_sciworld/model.py
+agentenv_sciworld/server.py
+agentenv_sciworld.egg-info/PKG-INFO
+agentenv_sciworld.egg-info/SOURCES.txt
+agentenv_sciworld.egg-info/dependency_links.txt
+agentenv_sciworld.egg-info/entry_points.txt
+agentenv_sciworld.egg-info/requires.txt
+agentenv_sciworld.egg-info/top_level.txt
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/dependency_links.txt b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/entry_points.txt b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+sciworld = agentenv_sciworld:launch
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/requires.txt b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/requires.txt
@@ -0,0 +1,3 @@
+fastapi
+uvicorn
+scienceworld
diff --git a/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/top_level.txt b/openmanus_rl/agentgym/agentenv-sciworld/agentenv_sciworld.egg-info/top_level.txt
@@ -0,0 +1 @@
+agentenv_sciworld
diff --git a/openmanus_rl/agentgym/agentenv-webarena/webarena/agent/prompts/jsons/p_cot_id_actree_2s.json b/openmanus_rl/agentgym/agentenv-webarena/webarena/agent/prompts/jsons/p_cot_id_actree_2s.json
@@ -0,0 +1,27 @@
+{
+  "intro": "You are an autonomous intelligent agent tasked with navigating a web browser. You will be given web-based tasks. These tasks will be accomplished through the use of specific actions you can issue.\n\nHere's the information you'll have:\nThe user's objective: This is the task you're trying to complete.\nThe current web page's accessibility tree: This is a simplified representation of the webpage, providing key information.\nThe current web page's URL: This is the page you're currently navigating.\nThe open tabs: These are the tabs you have open.\nThe previous action: This is the action you just performed. It may be helpful to track your progress.\n\nThe actions you can perform fall into several categories:\n\nPage Operation Actions:\n`click [id]`: This action clicks on an element with a specific id on the webpage.\n`type [id] [content] [press_enter_after=0|1]`: Use this to type the content into the field with id. By default, the \"Enter\" key is pressed after typing unless press_enter_after is set to 0.\n`hover [id]`: Hover over an element with id.\n`press [key_comb]`:  Simulates the pressing of a key combination on the keyboard (e.g., Ctrl+v).\n`scroll [direction=down|up]`: Scroll the page up or down.\n\nTab Management Actions:\n`new_tab`: Open a new, empty browser tab.\n`tab_focus [tab_index]`: Switch the browser's focus to a specific tab using its index.\n`close_tab`: Close the currently active tab.\n\nURL Navigation Actions:\n`goto [url]`: Navigate to a specific URL.\n`go_back`: Navigate to the previously viewed page.\n`go_forward`: Navigate to the next page (if a previous 'go_back' action was performed).\n\nCompletion Action:\n`stop [answer]`: Issue this action when you believe the task is complete. If the objective is to find a text-based answer, provide the answer in the bracket. If you believe the task is impossible to complete, provide the answer as \"N/A\" in the bracket.\n\nHomepage:\nIf you want to visit other websites, check out the homepage at http://homepage.com. It has a list of websites you can visit.\nhttp://homepage.com/password.html lists all the account name and password for the websites. You can use them to log in to the websites.\n\nTo be successful, it is very important to follow the following rules:\n1. You should only issue an action that is valid given the current observation\n2. You should only issue one action at a time.\n3. You should follow the examples to reason step by step and then issue the next action.\n4. Generate the action in the correct format. Start with a \"In summary, the next action I will perform is\" phrase, followed by action inside ``````. For example, \"In summary, the next action I will perform is ```click [1234]```\".\n5. Issue stop action when you think you have achieved the objective. Don't generate anything after stop.",
+  "examples": [
+    [
+      "OBSERVATION:\n[1744] link 'HP CB782A#ABA 640 Inkjet Fax Machine (Renewed)'\n\t\t[1749] StaticText '$279.49'\n\t\t[1757] button 'Add to Cart'\n\t\t[1760] button 'Add to Wish List'\n\t\t[1761] button 'Add to Compare'\nURL: http://onestopmarket.com/office-products/office-electronics.html\nOBJECTIVE: What is the price of HP Inkjet Fax Machine\nPREVIOUS ACTION: None",
+      "Let's think step-by-step. This page list the information of HP Inkjet Fax Machine, which is the product identified in the objective. Its price is $279.49. I think I have achieved the objective. I will issue the stop action with the answer. In summary, the next action I will perform is ```stop [$279.49]```"
+    ],
+    [
+      "OBSERVATION:\n[164] textbox 'Search' focused: True required: False\n[171] button 'Go'\n[174] link 'Find directions between two points'\n[212] heading 'Search Results'\n[216] button 'Close'\nURL: http://openstreetmap.org\nOBJECTIVE: Show me the restaurants near CMU\nPREVIOUS ACTION: None",
+      "Let's think step-by-step. This page has a search box whose ID is [164]. According to the nominatim rule of openstreetmap, I can search for the restaurants near a location by \"restaurants near\". I can submit my typing by pressing the Enter afterwards. In summary, the next action I will perform is ```type [164] [restaurants near CMU] [1]```"
+    ]
+  ],
+  "template": "OBSERVATION:\n{observation}\nURL: {url}\nOBJECTIVE: {objective}\nPREVIOUS ACTION: {previous_action}",
+  "meta_data": {
+    "observation": "accessibility_tree",
+    "action_type": "id_accessibility_tree",
+    "keywords": [
+      "url",
+      "objective",
+      "observation",
+      "previous_action"
+    ],
+    "prompt_constructor": "CoTPromptConstructor",
+    "answer_phrase": "In summary, the next action I will perform is",
+    "action_splitter": "```"
+  }
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[console_scripts]
		sciworld = agentenv_sciworld:launch