Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions resources_servers/xlam_fc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# XlamFc Resources Server

Function calling using the [Salesforce xlam-function-calling-60k dataset](https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k).


```bash
huggingface-cli login
python resources_servers/xlam_fc/generate_dataset.py
```

```bash
config_paths="responses_api_models/vllm_model/configs/vllm_model.yaml,\
resources_servers/xlam_fc/configs/xlam_fc.yaml"
ng_run "+config_paths=[$config_paths]"
```

```bash
ng_collect_rollouts \
+agent_name=xlam_fc_simple_agent \
+input_jsonl_fpath=resources_servers/xlam_fc/data/train.jsonl \
+output_jsonl_fpath=results/xlam_fc_trajectory_collection.jsonl \
+limit=10
```

## Licensing
Code: Apache 2.0
Dataset: https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k
125 changes: 125 additions & 0 deletions resources_servers/xlam_fc/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
from typing import Any, Dict, List

from pydantic import Field

from nemo_gym.base_resources_server import (
BaseResourcesServerConfig,
BaseVerifyRequest,
BaseVerifyResponse,
SimpleResourcesServer,
)


class XlamFcResourcesServerConfig(BaseResourcesServerConfig):
pass


class XlamFcVerifyRequest(BaseVerifyRequest):
expected_answers: List[Dict[str, Any]] = Field(default_factory=list)


class XlamFcVerifyResponse(BaseVerifyResponse):
num_expected: int = 0
num_predicted: int = 0
num_correct: int = 0
predicted_calls: List[Dict[str, Any]] = Field(default_factory=list)


class XlamFcResourcesServer(SimpleResourcesServer):
config: XlamFcResourcesServerConfig

@staticmethod
def _normalize_arguments(arguments: Any) -> Dict[str, Any]:
if isinstance(arguments, str):
try:
return json.loads(arguments)
except json.JSONDecodeError:
return {}
elif isinstance(arguments, dict):
return arguments
else:
return {}

@staticmethod
def _function_calls_match(predicted: Dict[str, Any], expected: Dict[str, Any]) -> bool:
if predicted.get("name") != expected.get("name"):
return False

predicted_args = XlamFcResourcesServer._normalize_arguments(predicted.get("arguments", {}))
expected_args = XlamFcResourcesServer._normalize_arguments(expected.get("arguments", {}))

for key, expected_value in expected_args.items():
if key not in predicted_args:
return False
if predicted_args[key] != expected_value:
return False

return True

def _extract_function_calls_from_response(self, body: BaseVerifyRequest) -> List[Dict[str, Any]]:
function_calls = []

for output_item in body.response.output:
if output_item.type == "function_call":
function_call = {
"name": output_item.name,
"arguments": self._normalize_arguments(output_item.arguments),
}
function_calls.append(function_call)

return function_calls

def _calculate_reward(
self, predicted_calls: List[Dict[str, Any]], expected_answers: List[Dict[str, Any]]
) -> tuple[float, int]:
if not expected_answers:
return (1.0, 0) if not predicted_calls else (0.0, 0)

num_correct = 0
matched_predicted_indices = set()

for expected_call in expected_answers:
for i, predicted_call in enumerate(predicted_calls):
if i in matched_predicted_indices:
continue

if self._function_calls_match(predicted_call, expected_call):
num_correct += 1
matched_predicted_indices.add(i)
break

reward = 1.0 if num_correct == len(expected_answers) == len(predicted_calls) else 0.0
return reward, num_correct

async def verify(self, body: XlamFcVerifyRequest) -> XlamFcVerifyResponse:
predicted_calls = self._extract_function_calls_from_response(body)

reward, num_correct = self._calculate_reward(predicted_calls, body.expected_answers)

return XlamFcVerifyResponse(
**body.model_dump(),
reward=reward,
num_expected=len(body.expected_answers),
num_predicted=len(predicted_calls),
num_correct=num_correct,
predicted_calls=predicted_calls,
)


if __name__ == "__main__":
XlamFcResourcesServer.run_webserver()
36 changes: 36 additions & 0 deletions resources_servers/xlam_fc/configs/xlam_fc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
xlam_fc:
resources_servers:
xlam_fc:
entrypoint: app.py
domain: agent
verified: false
xlam_fc_simple_agent:
responses_api_agents:
simple_agent:
entrypoint: app.py
resources_server:
type: resources_servers
name: xlam_fc
model_server:
type: responses_api_models
name: policy_model
datasets:
- name: example
type: example
jsonl_fpath: resources_servers/xlam_fc/data/example.jsonl
- name: train
type: train
jsonl_fpath: resources_servers/xlam_fc/data/train.jsonl
gitlab_identifier:
dataset_name: xlam_function_calling_60k
version: 0.0.1
artifact_fpath: train.jsonl
license: Apache 2.0
- name: valid
type: validation
jsonl_fpath: resources_servers/xlam_fc/data/valid.jsonl
gitlab_identifier:
dataset_name: xlam_function_calling_60k
version: 0.0.1
artifact_fpath: valid.jsonl
license: Apache 2.0
5 changes: 5 additions & 0 deletions resources_servers/xlam_fc/data/example.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"id": 0, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "Where can I find live giveaways for beta access and games?"}], "tools": [{"type": "function", "name": "live_giveaways_by_type", "description": "Retrieve live giveaways from the GamerPower API based on the specified type.", "parameters": {"type": "object", "properties": {"type": {"type": "string", "description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "default": "game"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}]}
{"id": 1, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for 'ethereum'?"}], "tools": [{"type": "function", "name": "peers", "description": "Retrieves a list of company peers given a stock symbol.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol for the company.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "web_chain_details", "description": "python", "parameters": {"type": "object", "properties": {"chain_slug": {"type": "string", "description": "The slug identifier for the blockchain (e.g., 'ethereum' for Ethereum mainnet).", "default": "ethereum"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "web_chain_details", "arguments": {"chain_slug": "ethereum"}}]}
{"id": 2, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "What is the T3MA for 'ETH/BTC' using a 1h interval and a time period of 14?"}], "tools": [{"type": "function", "name": "t3ma", "description": "Fetches the Triple Exponential Moving Average (T3MA) for a given financial instrument.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "Instrument symbol, which can be any equity, index, ETF, forex, or cryptocurrency (e.g., 'AAPL', 'EUR/USD', 'ETH/BTC').", "default": "AAPL"}, "interval": {"type": "string", "description": "Interval between two consecutive points in the time series. Supported intervals include '1min', '5min', '15min', '30min', '45min', '1h', '2h', '4h', '1day', '1week', and '1month'.", "default": "1min"}, "format": {"type": "string", "description": "Format of the response data, either 'CSV' or 'JSON'. Default is 'json'.", "default": "json"}, "v_factor": {"type": "string", "description": "Volume factor used in the calculation of the T3MA.", "default": 0.7}, "series_type": {"type": "string", "description": "Type of series to use in the calculation. Supported values are 'open', 'high', 'low', and 'close'. Default is 'close'.", "default": "close"}, "outputsize": {"type": "string", "description": "Number of data points to return. Default is 30.", "default": 30}, "time_period": {"type": "string", "description": "Number of periods over which to calculate the T3MA. Default is 9.", "default": 9}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "stock_v2_get_profile", "description": "Retrieves the company profile information for a given performance ID using the RapidAPI Morning Star service.", "parameters": {"type": "object", "properties": {"performanceid": {"type": "string", "description": "The performance ID of the stock, obtained from endpoints such as /auto-complete, /get-summary, or /get-movers.", "default": "0P0000OQN8"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "t3ma", "arguments": {"symbol": "ETH/BTC", "interval": "1h", "time_period": 14}}]}
{"id": 3, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "List titles originally aired on networks '1' and '8', released after 2010, sorted by release date in descending order."}], "tools": [{"type": "function", "name": "get_animes", "description": "Retrieves a list of animes based on specified search criteria and filters from the RapidAPI Anime API.", "parameters": {"type": "object", "properties": {"year_greater": {"type": "string", "description": "Find animes released after the specified year.", "default": ""}, "media_type": {"type": "string", "description": "Filter by media type (e.g., music, tv, ona, ova, movie, special).", "default": ""}, "studio": {"type": "string", "description": "Filter by studio name.", "default": ""}, "year_less": {"type": "string", "description": "Find animes released before the specified year.", "default": ""}, "nsfw": {"type": "string", "description": "Include NSFW content if set.", "default": ""}, "status": {"type": "string", "description": "Filter by anime status (e.g., currently_airing, finished_airing, not_yet_aired).", "default": ""}, "limit": {"type": "string", "description": "Limit the number of results.", "default": ""}, "q": {"type": "string", "description": "Search for animes by title in English or Japanese.", "default": ""}, "genre": {"type": "string", "description": "Filter by genre.", "default": ""}, "sort": {"type": "string", "description": "Specify sort order, True for ascending and False for descending.", "default": ""}, "offset": {"type": "string", "description": "Number of results to skip.", "default": ""}, "season": {"type": "string", "description": "Filter by season.", "default": ""}, "fields": {"type": "string", "description": "Specify the fields to return (e.g., id, title, main_picture, etc.).", "default": ""}, "year_equal": {"type": "string", "description": "Filter by animes released in the specified year.", "default": ""}, "source": {"type": "string", "description": "Filter by source material (e.g., manga, visual_novel, novel, etc.).", "default": ""}, "order": {"type": "string", "description": "Order results by a specific field.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "list_titles", "description": "Fetches a listing of titles that match specified parameters from the Watchmode API.", "parameters": {"type": "object", "properties": {"genres": {"type": "string", "description": "Filter results to only include certain genre(s). Pass in a single genre ID or multiple comma-separated IDs. Default is '4,9'.", "default": "4,9"}, "limit": {"type": "integer", "description": "Set how many titles to return per page. Default and maximum is 250.", "default": "250"}, "source_ids": {"type": "string", "description": "Filter the results to titles available on specific sources by passing individual IDs or multiple comma-separated IDs. Default is '23,206'. Note: Only a single region can be set if this is populated.", "default": "23,206"}, "source_types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "sub,free"}, "types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "movie,tv_series"}, "regions": {"type": "string", "description": "Filter results to only include sources active in specific regions. Currently supported regions: US, GB, CA, AU. Default is 'US'. Note: Only a single region can be set if source_ids or source_types are populated.", "default": "US"}, "sort_by": {"type": "string", "description": "Sort order of results. Possible values include: relevance_desc, relevance_asc, popularity_desc, popularity_asc, release_date_desc, release_date_asc, title_desc, title_asc. Default is 'relevance_desc'.", "default": "relevance_desc"}, "page": {"type": "integer", "description": "Set the page of results to return. Default is 1.", "default": "1"}, "network_ids": {"type": "string", "description": "Filter results to titles that originally aired on specific TV networks by passing individual IDs or multiple comma-separated IDs. Default is '1,8,12'.", "default": "1,8,12"}, "release_date_start": {"type": "integer", "description": "Filter results to only include titles released on or after a specific date. Format: YYYYMMDD. Default is 20010101.", "default": "20010101"}, "release_date_end": {"type": "integer", "description": "Filter results to only include titles released on or before a specific date. Format: YYYYMMDD. Default is 20201211.", "default": "20201211"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "list_titles", "arguments": {"network_ids": "1,8", "release_date_start": 20110101, "sort_by": "release_date_desc"}}]}
{"id": 4, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "Fetch the competitor standings for the recently concluded stage 98765."}], "tools": [{"type": "function", "name": "stagecompetitorstandings", "description": "Retrieve the competitor standings for a specific Motorsport stage using the given stage ID.", "parameters": {"type": "object", "properties": {"is_id": {"type": "integer", "description": "The ID of the stage for which to retrieve competitor's standings.", "default": 203968}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "stagecompetitorstandings", "arguments": {"is_id": 98765}}]}
49 changes: 49 additions & 0 deletions resources_servers/xlam_fc/data/example_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"name": "example",
"type": "example",
"jsonl_fpath": "resources_servers/xlam_fc/data/example.jsonl",
"num_repeats": 1,
"gitlab_identifier": null,
"license": null,
"Number of examples": 5,
"Number of tools": {
"Total # non-null values": 5,
"Average": 1.6,
"Min": 1.0,
"Max": 2.0,
"Median": 1.6,
"Standard deviation": 0.548
},
"Json-dumped number of words (proxy for token count)": {
"Total # non-null values": 5,
"Average": 259.0,
"Min": 106.0,
"Max": 648.0,
"Median": 143.0,
"Standard deviation": 230.19
},
"Number of turns": {
"Total # non-null values": 5,
"Average": 1.0,
"Min": 1.0,
"Max": 1.0,
"Median": 1.0,
"Standard deviation": 0.0
},
"Temperature": {
"Total # non-null values": 0,
"Average": 0.0,
"Min": 0.0,
"Max": 0.0,
"Median": 0.0,
"Standard deviation": 0.0
},
"id": {
"Total # non-null values": 5,
"Average": 2.0,
"Min": 0.0,
"Max": 4.0,
"Median": 2.0,
"Standard deviation": 1.58
}
}
Loading