diff --git a/resources_servers/xlam_fc/README.md b/resources_servers/xlam_fc/README.md new file mode 100644 index 000000000..06978e971 --- /dev/null +++ b/resources_servers/xlam_fc/README.md @@ -0,0 +1,27 @@ +# XlamFc Resources Server + +Function calling using the [Salesforce xlam-function-calling-60k dataset](https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k). + + +```bash +huggingface-cli login +python resources_servers/xlam_fc/generate_dataset.py +``` + +```bash +config_paths="responses_api_models/vllm_model/configs/vllm_model.yaml,\ +resources_servers/xlam_fc/configs/xlam_fc.yaml" +ng_run "+config_paths=[$config_paths]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=xlam_fc_simple_agent \ + +input_jsonl_fpath=resources_servers/xlam_fc/data/train.jsonl \ + +output_jsonl_fpath=results/xlam_fc_trajectory_collection.jsonl \ + +limit=10 +``` + +## Licensing +Code: Apache 2.0 +Dataset: https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k diff --git a/resources_servers/xlam_fc/app.py b/resources_servers/xlam_fc/app.py new file mode 100644 index 000000000..f7d9a792f --- /dev/null +++ b/resources_servers/xlam_fc/app.py @@ -0,0 +1,125 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import Any, Dict, List + +from pydantic import Field + +from nemo_gym.base_resources_server import ( + BaseResourcesServerConfig, + BaseVerifyRequest, + BaseVerifyResponse, + SimpleResourcesServer, +) + + +class XlamFcResourcesServerConfig(BaseResourcesServerConfig): + pass + + +class XlamFcVerifyRequest(BaseVerifyRequest): + expected_answers: List[Dict[str, Any]] = Field(default_factory=list) + + +class XlamFcVerifyResponse(BaseVerifyResponse): + num_expected: int = 0 + num_predicted: int = 0 + num_correct: int = 0 + predicted_calls: List[Dict[str, Any]] = Field(default_factory=list) + + +class XlamFcResourcesServer(SimpleResourcesServer): + config: XlamFcResourcesServerConfig + + @staticmethod + def _normalize_arguments(arguments: Any) -> Dict[str, Any]: + if isinstance(arguments, str): + try: + return json.loads(arguments) + except json.JSONDecodeError: + return {} + elif isinstance(arguments, dict): + return arguments + else: + return {} + + @staticmethod + def _function_calls_match(predicted: Dict[str, Any], expected: Dict[str, Any]) -> bool: + if predicted.get("name") != expected.get("name"): + return False + + predicted_args = XlamFcResourcesServer._normalize_arguments(predicted.get("arguments", {})) + expected_args = XlamFcResourcesServer._normalize_arguments(expected.get("arguments", {})) + + for key, expected_value in expected_args.items(): + if key not in predicted_args: + return False + if predicted_args[key] != expected_value: + return False + + return True + + def _extract_function_calls_from_response(self, body: BaseVerifyRequest) -> List[Dict[str, Any]]: + function_calls = [] + + for output_item in body.response.output: + if output_item.type == "function_call": + function_call = { + "name": output_item.name, + "arguments": self._normalize_arguments(output_item.arguments), + } + function_calls.append(function_call) + + return function_calls + + def _calculate_reward( + self, predicted_calls: List[Dict[str, Any]], expected_answers: List[Dict[str, Any]] + ) -> tuple[float, int]: + if not expected_answers: + return (1.0, 0) if not predicted_calls else (0.0, 0) + + num_correct = 0 + matched_predicted_indices = set() + + for expected_call in expected_answers: + for i, predicted_call in enumerate(predicted_calls): + if i in matched_predicted_indices: + continue + + if self._function_calls_match(predicted_call, expected_call): + num_correct += 1 + matched_predicted_indices.add(i) + break + + reward = 1.0 if num_correct == len(expected_answers) == len(predicted_calls) else 0.0 + return reward, num_correct + + async def verify(self, body: XlamFcVerifyRequest) -> XlamFcVerifyResponse: + predicted_calls = self._extract_function_calls_from_response(body) + + reward, num_correct = self._calculate_reward(predicted_calls, body.expected_answers) + + return XlamFcVerifyResponse( + **body.model_dump(), + reward=reward, + num_expected=len(body.expected_answers), + num_predicted=len(predicted_calls), + num_correct=num_correct, + predicted_calls=predicted_calls, + ) + + +if __name__ == "__main__": + XlamFcResourcesServer.run_webserver() diff --git a/resources_servers/xlam_fc/configs/xlam_fc.yaml b/resources_servers/xlam_fc/configs/xlam_fc.yaml new file mode 100644 index 000000000..a2cea8d69 --- /dev/null +++ b/resources_servers/xlam_fc/configs/xlam_fc.yaml @@ -0,0 +1,36 @@ +xlam_fc: + resources_servers: + xlam_fc: + entrypoint: app.py + domain: agent + verified: false +xlam_fc_simple_agent: + responses_api_agents: + simple_agent: + entrypoint: app.py + resources_server: + type: resources_servers + name: xlam_fc + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: example + type: example + jsonl_fpath: resources_servers/xlam_fc/data/example.jsonl + - name: train + type: train + jsonl_fpath: resources_servers/xlam_fc/data/train.jsonl + gitlab_identifier: + dataset_name: xlam_function_calling_60k + version: 0.0.1 + artifact_fpath: train.jsonl + license: Apache 2.0 + - name: valid + type: validation + jsonl_fpath: resources_servers/xlam_fc/data/valid.jsonl + gitlab_identifier: + dataset_name: xlam_function_calling_60k + version: 0.0.1 + artifact_fpath: valid.jsonl + license: Apache 2.0 diff --git a/resources_servers/xlam_fc/data/example.jsonl b/resources_servers/xlam_fc/data/example.jsonl new file mode 100644 index 000000000..8e5bea3c3 --- /dev/null +++ b/resources_servers/xlam_fc/data/example.jsonl @@ -0,0 +1,5 @@ +{"id": 0, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "Where can I find live giveaways for beta access and games?"}], "tools": [{"type": "function", "name": "live_giveaways_by_type", "description": "Retrieve live giveaways from the GamerPower API based on the specified type.", "parameters": {"type": "object", "properties": {"type": {"type": "string", "description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "default": "game"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}]} +{"id": 1, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for 'ethereum'?"}], "tools": [{"type": "function", "name": "peers", "description": "Retrieves a list of company peers given a stock symbol.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol for the company.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "web_chain_details", "description": "python", "parameters": {"type": "object", "properties": {"chain_slug": {"type": "string", "description": "The slug identifier for the blockchain (e.g., 'ethereum' for Ethereum mainnet).", "default": "ethereum"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "web_chain_details", "arguments": {"chain_slug": "ethereum"}}]} +{"id": 2, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "What is the T3MA for 'ETH/BTC' using a 1h interval and a time period of 14?"}], "tools": [{"type": "function", "name": "t3ma", "description": "Fetches the Triple Exponential Moving Average (T3MA) for a given financial instrument.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "Instrument symbol, which can be any equity, index, ETF, forex, or cryptocurrency (e.g., 'AAPL', 'EUR/USD', 'ETH/BTC').", "default": "AAPL"}, "interval": {"type": "string", "description": "Interval between two consecutive points in the time series. Supported intervals include '1min', '5min', '15min', '30min', '45min', '1h', '2h', '4h', '1day', '1week', and '1month'.", "default": "1min"}, "format": {"type": "string", "description": "Format of the response data, either 'CSV' or 'JSON'. Default is 'json'.", "default": "json"}, "v_factor": {"type": "string", "description": "Volume factor used in the calculation of the T3MA.", "default": 0.7}, "series_type": {"type": "string", "description": "Type of series to use in the calculation. Supported values are 'open', 'high', 'low', and 'close'. Default is 'close'.", "default": "close"}, "outputsize": {"type": "string", "description": "Number of data points to return. Default is 30.", "default": 30}, "time_period": {"type": "string", "description": "Number of periods over which to calculate the T3MA. Default is 9.", "default": 9}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "stock_v2_get_profile", "description": "Retrieves the company profile information for a given performance ID using the RapidAPI Morning Star service.", "parameters": {"type": "object", "properties": {"performanceid": {"type": "string", "description": "The performance ID of the stock, obtained from endpoints such as /auto-complete, /get-summary, or /get-movers.", "default": "0P0000OQN8"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "t3ma", "arguments": {"symbol": "ETH/BTC", "interval": "1h", "time_period": 14}}]} +{"id": 3, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "List titles originally aired on networks '1' and '8', released after 2010, sorted by release date in descending order."}], "tools": [{"type": "function", "name": "get_animes", "description": "Retrieves a list of animes based on specified search criteria and filters from the RapidAPI Anime API.", "parameters": {"type": "object", "properties": {"year_greater": {"type": "string", "description": "Find animes released after the specified year.", "default": ""}, "media_type": {"type": "string", "description": "Filter by media type (e.g., music, tv, ona, ova, movie, special).", "default": ""}, "studio": {"type": "string", "description": "Filter by studio name.", "default": ""}, "year_less": {"type": "string", "description": "Find animes released before the specified year.", "default": ""}, "nsfw": {"type": "string", "description": "Include NSFW content if set.", "default": ""}, "status": {"type": "string", "description": "Filter by anime status (e.g., currently_airing, finished_airing, not_yet_aired).", "default": ""}, "limit": {"type": "string", "description": "Limit the number of results.", "default": ""}, "q": {"type": "string", "description": "Search for animes by title in English or Japanese.", "default": ""}, "genre": {"type": "string", "description": "Filter by genre.", "default": ""}, "sort": {"type": "string", "description": "Specify sort order, True for ascending and False for descending.", "default": ""}, "offset": {"type": "string", "description": "Number of results to skip.", "default": ""}, "season": {"type": "string", "description": "Filter by season.", "default": ""}, "fields": {"type": "string", "description": "Specify the fields to return (e.g., id, title, main_picture, etc.).", "default": ""}, "year_equal": {"type": "string", "description": "Filter by animes released in the specified year.", "default": ""}, "source": {"type": "string", "description": "Filter by source material (e.g., manga, visual_novel, novel, etc.).", "default": ""}, "order": {"type": "string", "description": "Order results by a specific field.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false}, {"type": "function", "name": "list_titles", "description": "Fetches a listing of titles that match specified parameters from the Watchmode API.", "parameters": {"type": "object", "properties": {"genres": {"type": "string", "description": "Filter results to only include certain genre(s). Pass in a single genre ID or multiple comma-separated IDs. Default is '4,9'.", "default": "4,9"}, "limit": {"type": "integer", "description": "Set how many titles to return per page. Default and maximum is 250.", "default": "250"}, "source_ids": {"type": "string", "description": "Filter the results to titles available on specific sources by passing individual IDs or multiple comma-separated IDs. Default is '23,206'. Note: Only a single region can be set if this is populated.", "default": "23,206"}, "source_types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "sub,free"}, "types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "movie,tv_series"}, "regions": {"type": "string", "description": "Filter results to only include sources active in specific regions. Currently supported regions: US, GB, CA, AU. Default is 'US'. Note: Only a single region can be set if source_ids or source_types are populated.", "default": "US"}, "sort_by": {"type": "string", "description": "Sort order of results. Possible values include: relevance_desc, relevance_asc, popularity_desc, popularity_asc, release_date_desc, release_date_asc, title_desc, title_asc. Default is 'relevance_desc'.", "default": "relevance_desc"}, "page": {"type": "integer", "description": "Set the page of results to return. Default is 1.", "default": "1"}, "network_ids": {"type": "string", "description": "Filter results to titles that originally aired on specific TV networks by passing individual IDs or multiple comma-separated IDs. Default is '1,8,12'.", "default": "1,8,12"}, "release_date_start": {"type": "integer", "description": "Filter results to only include titles released on or after a specific date. Format: YYYYMMDD. Default is 20010101.", "default": "20010101"}, "release_date_end": {"type": "integer", "description": "Filter results to only include titles released on or before a specific date. Format: YYYYMMDD. Default is 20201211.", "default": "20201211"}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "list_titles", "arguments": {"network_ids": "1,8", "release_date_start": 20110101, "sort_by": "release_date_desc"}}]} +{"id": 4, "responses_create_params": {"input": [{"role": "system", "content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query."}, {"role": "user", "content": "Fetch the competitor standings for the recently concluded stage 98765."}], "tools": [{"type": "function", "name": "stagecompetitorstandings", "description": "Retrieve the competitor standings for a specific Motorsport stage using the given stage ID.", "parameters": {"type": "object", "properties": {"is_id": {"type": "integer", "description": "The ID of the stage for which to retrieve competitor's standings.", "default": 203968}}, "required": [], "additionalProperties": false}, "strict": false}]}, "expected_answers": [{"name": "stagecompetitorstandings", "arguments": {"is_id": 98765}}]} diff --git a/resources_servers/xlam_fc/data/example_metrics.json b/resources_servers/xlam_fc/data/example_metrics.json new file mode 100644 index 000000000..2e87353b6 --- /dev/null +++ b/resources_servers/xlam_fc/data/example_metrics.json @@ -0,0 +1,49 @@ +{ + "name": "example", + "type": "example", + "jsonl_fpath": "resources_servers/xlam_fc/data/example.jsonl", + "num_repeats": 1, + "gitlab_identifier": null, + "license": null, + "Number of examples": 5, + "Number of tools": { + "Total # non-null values": 5, + "Average": 1.6, + "Min": 1.0, + "Max": 2.0, + "Median": 1.6, + "Standard deviation": 0.548 + }, + "Json-dumped number of words (proxy for token count)": { + "Total # non-null values": 5, + "Average": 259.0, + "Min": 106.0, + "Max": 648.0, + "Median": 143.0, + "Standard deviation": 230.19 + }, + "Number of turns": { + "Total # non-null values": 5, + "Average": 1.0, + "Min": 1.0, + "Max": 1.0, + "Median": 1.0, + "Standard deviation": 0.0 + }, + "Temperature": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Median": 0.0, + "Standard deviation": 0.0 + }, + "id": { + "Total # non-null values": 5, + "Average": 2.0, + "Min": 0.0, + "Max": 4.0, + "Median": 2.0, + "Standard deviation": 1.58 + } +} \ No newline at end of file diff --git a/resources_servers/xlam_fc/data/example_rollouts.jsonl b/resources_servers/xlam_fc/data/example_rollouts.jsonl new file mode 100644 index 000000000..73f230605 --- /dev/null +++ b/resources_servers/xlam_fc/data/example_rollouts.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.", "role": "system", "type": "message"}, {"content": "I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for 'ethereum'?", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "peers", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol for the company.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves a list of company peers given a stock symbol."}, {"name": "web_chain_details", "parameters": {"type": "object", "properties": {"chain_slug": {"type": "string", "description": "The slug identifier for the blockchain (e.g., 'ethereum' for Ethereum mainnet).", "default": "ethereum"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "python"}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_9ae4c3613cb34531a95b27b1e17d2a5c", "created_at": 1762553384.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "Qwen/Qwen3-30B-A3B", "object": "response", "output": [{"id": "rs_9b5c29c493434b0880e701e80a2dab60", "summary": [{"text": "\nOkay, the user is asking about the details of the Ethereum blockchain for their cryptocurrency project. They mentioned fetching details for 'ethereum'. Let me check the available tools.\n\nLooking at the tools provided, there's a function called web_chain_details. Its description says it retrieves details for a blockchain using a slug identifier, with a default of 'ethereum'. The parameters require a chain_slug, which the user has specified as 'ethereum'. \n\nSince the user explicitly mentioned 'ethereum', I should call the web_chain_details function with chain_slug set to 'ethereum'. The other function, peers, is for stock symbols and company peers, which doesn't seem relevant here. \n\nSo, the correct tool to use is web_chain_details with the chain_slug parameter. No need to call multiple functions here. Just make sure the arguments are correctly formatted in JSON within the tool_call tags.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_c59dd1783a894df7b0c920c11a73c83c", "content": [{"annotations": [], "text": "\n\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"chain_slug\": \"ethereum\"}", "call_id": "chatcmpl-tool-30a72148da6d4d1ebb858f6c096f996b", "name": "web_chain_details", "type": "function_call", "id": "chatcmpl-tool-30a72148da6d4d1ebb858f6c096f996b", "status": "completed"}, {"call_id": "chatcmpl-tool-30a72148da6d4d1ebb858f6c096f996b", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"id": "rs_6a079e7082f74131bb40faeafdaa8baa", "summary": [{"text": "\nOkay, the user asked for details about the Ethereum blockchain using the web_chain_details function. The response came back as \"Not Found\". Hmm, maybe the function isn't implemented correctly or the endpoint is wrong. Let me check the function's description again. Oh, the function's description says it's a Python function, but maybe it's not properly connected to the actual API. Since the user needs Ethereum details, perhaps I should try another approach. Wait, the peers function is for stock symbols, which isn't relevant here. Maybe the user needs info on Ethereum's structure, consensus mechanism, or smart contracts. Since the tool isn't working, I should inform the user about the error and offer alternative ways to get the information, like suggesting official resources or explaining key aspects of Ethereum manually.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_d135a2a6cd1c4460a5ddf58160722cfb", "content": [{"annotations": [], "text": "\n\nIt seems there was an issue retrieving the Ethereum blockchain details through the available tool. The function `web_chain_details` returned a \"Not Found\" error, which might indicate a technical limitation or misconfiguration. \n\nSince this is critical for your cryptocurrency project, here\u2019s what we can do:\n\n1. **Alternative Approach**: I can manually explain key details about Ethereum, such as its consensus mechanism (Proof-of-Work to Proof-of-Stake transition), smart contract capabilities, gas fees, or its role in DeFi and NFTs.\n2. **Official Resources**: Recommend checking Ethereum's official documentation ([ethereum.org](https://ethereum.org)) or blockchain explorers like [Etherscan](https://etherscan.io/).\n3. **Technical Specs**: If you need specific data (e.g., block time, transaction throughput), I can provide general insights or guide you to where to find it.\n\nWould you like me to proceed with any of these options? Let me know!", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [{"name": "peers", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "The stock symbol for the company.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves a list of company peers given a stock symbol."}, {"name": "web_chain_details", "parameters": {"type": "object", "properties": {"chain_slug": {"type": "string", "description": "The slug identifier for the blockchain (e.g., 'ethereum' for Ethereum mainnet).", "default": "ethereum"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "python"}], "top_p": null, "background": null, "max_output_tokens": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "truncation": null, "usage": null, "user": null, "max_tool_calls": null, "top_logprobs": null}, "reward": 1.0, "num_expected": 1, "num_predicted": 1, "num_correct": 1, "predicted_calls": [{"name": "web_chain_details", "arguments": {"chain_slug": "ethereum"}}]} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.", "role": "system", "type": "message"}, {"content": "Fetch the competitor standings for the recently concluded stage 98765.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "stagecompetitorstandings", "parameters": {"type": "object", "properties": {"is_id": {"type": "integer", "description": "The ID of the stage for which to retrieve competitor's standings.", "default": 203968}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieve the competitor standings for a specific Motorsport stage using the given stage ID."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_cb21180d35f842819982ab1012b0cc7d", "created_at": 1762553384.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "Qwen/Qwen3-30B-A3B", "object": "response", "output": [{"id": "rs_fb0ffb29cfe04892825e48024de28fe2", "summary": [{"text": "\nOkay, the user is asking for the competitor standings of stage 98765. Let me check the tools available. There's a function called stagecompetitorstandings that takes an is_id parameter, which is an integer. The default is 203968, but the user specified 98765. So I need to call that function with is_id set to 98765. I'll make sure the JSON is correctly formatted with the stage ID. No other parameters are needed since the function only requires is_id. Let me structure the tool call accordingly.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_cada7eebea7245828046025962d61e13", "content": [{"annotations": [], "text": "\n\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"is_id\": 98765}", "call_id": "chatcmpl-tool-a57dc8116cba41b79d8c28ab736e13b9", "name": "stagecompetitorstandings", "type": "function_call", "id": "chatcmpl-tool-a57dc8116cba41b79d8c28ab736e13b9", "status": "completed"}, {"call_id": "chatcmpl-tool-a57dc8116cba41b79d8c28ab736e13b9", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"id": "rs_bff93855a249468eabf61b9a35fa9dd7", "summary": [{"text": "\nOkay, the user asked for the competitor standings of stage 98765, and the system returned a \"Not Found\" error. Hmm, maybe the stage ID is incorrect or the data isn't available. Let me check if there's a default ID in the function parameters. The function 'stagecompetitorstandings' has a default ID of 203968. Perhaps the user mentioned a different stage, but the default is still valid. Wait, the user specifically said \"recently concluded stage 98765,\" so maybe that ID is wrong. But the function might not have data for that ID. Should I try the default ID instead? Let me see. The function allows using the default if no ID is provided, but the user provided 98765. Since the response was not found, maybe the correct approach is to inform the user that the specific stage ID isn't available and suggest checking the ID or using the default. Alternatively, maybe there was a typo. But according to the tools, the function can only take the provided ID. So the answer should be that the data isn't found for 98765 and maybe mention the default ID exists. Wait, the function's required parameters don't include 'is_id' since it's not in the required list. So maybe the user can try without the ID, but the function has a default. However, the user specified 98765. Since the tool response was not found, I should tell the user that the data isn't available for that stage ID and perhaps check if they meant the default one. But the assistant should stick to the function's capabilities. So the final answer is to state that the competitor standings for stage 98765 could not be found.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_f1aa565c1bb04e14b9330d271fb8cee8", "content": [{"annotations": [], "text": "\n\nThe competitor standings for stage 98765 could not be found. This may indicate that the stage ID is incorrect, the data is not yet available, or there are no recorded standings for this specific stage. If you believe this is an error, please verify the stage ID or try again later. \n\nIf you'd like, I can retrieve the standings for the default stage ID (203968) instead. Let me know!", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [{"name": "stagecompetitorstandings", "parameters": {"type": "object", "properties": {"is_id": {"type": "integer", "description": "The ID of the stage for which to retrieve competitor's standings.", "default": 203968}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieve the competitor standings for a specific Motorsport stage using the given stage ID."}], "top_p": null, "background": null, "max_output_tokens": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "truncation": null, "usage": null, "user": null, "max_tool_calls": null, "top_logprobs": null}, "reward": 1.0, "num_expected": 1, "num_predicted": 1, "num_correct": 1, "predicted_calls": [{"name": "stagecompetitorstandings", "arguments": {"is_id": 98765}}]} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.", "role": "system", "type": "message"}, {"content": "Where can I find live giveaways for beta access and games?", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "live_giveaways_by_type", "parameters": {"type": "object", "properties": {"type": {"type": "string", "description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "default": "game"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieve live giveaways from the GamerPower API based on the specified type."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_289fb86842ea492cb13aa96def13c8d5", "created_at": 1762553384.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "Qwen/Qwen3-30B-A3B", "object": "response", "output": [{"id": "rs_42b52c30739d4399ad3e4790ede186d1", "summary": [{"text": "\nOkay, the user is asking where to find live giveaways for beta access and games. Let me check the tools provided. There's a function called live_giveaways_by_type that retrieves live giveaways based on the type. The parameters allow specifying the type, with a default of \"game\". The user mentioned both \"beta access\" and \"games\", so I need to handle both types.\n\nWait, the function's parameters have a \"type\" field which can be \"game\", \"loot\", or \"beta\". The user is asking for \"beta access\" and \"games\". So \"beta\" would cover the beta access part, and \"game\" for the games. But the function might need to be called separately for each type. However, the function's parameters don't mention multiple types. So maybe I need to make two separate calls: one with type \"beta\" and another with type \"game\". \n\nAlternatively, does the function accept an array of types? The parameters say \"type\" is a string, so probably not. So the correct approach is to call the function twice, once for each type. But the user might be expecting both in one go. Wait, the function's description says \"based on the specified type\", so maybe each call is for a single type. Therefore, I should generate two tool calls: one with type \"beta\" and another with \"game\". That way, the user gets both categories they're interested in.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_f6a381bbf0cb46b4aa84bb1674a73fa9", "content": [{"annotations": [], "text": "\n\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"type\": \"beta\"}", "call_id": "chatcmpl-tool-cd62ffcc3b58426bb5194edb59d4a752", "name": "live_giveaways_by_type", "type": "function_call", "id": "chatcmpl-tool-cd62ffcc3b58426bb5194edb59d4a752", "status": "completed"}, {"arguments": "{\"type\": \"game\"}", "call_id": "chatcmpl-tool-21c7909276c04b2aa2cc2003669f3983", "name": "live_giveaways_by_type", "type": "function_call", "id": "chatcmpl-tool-21c7909276c04b2aa2cc2003669f3983", "status": "completed"}, {"call_id": "chatcmpl-tool-cd62ffcc3b58426bb5194edb59d4a752", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"call_id": "chatcmpl-tool-21c7909276c04b2aa2cc2003669f3983", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"id": "rs_4a605f9590074be7a33ff91a68878cf3", "summary": [{"text": "\nOkay, the user asked where they can find live giveaways for beta access and games. I called the function live_giveaways_by_type twice, once with \"beta\" and once with \"game\". But both times, the response was {\"detail\":\"Not Found\"}. Hmm, that's a problem. The function is supposed to retrieve live giveaways from the GamerPower API, but it's not finding any. Maybe the API doesn't have any current giveaways for those types. Or maybe there's an error in the function's implementation. Since the user is asking for places to find these giveaways, and the function isn't returning any data, I should inform them that there are no current giveaways available through this method. I should also suggest checking the GamerPower website directly or other platforms that might list giveaways. Let me make sure to mention that the API isn't returning any results and offer alternative suggestions.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_78b9fcd3c7c44a08895ce9da46c28d15", "content": [{"annotations": [], "text": "\n\nIt seems there are no current live giveaways for \"beta\" access or \"game\" listings through the GamerPower API at the moment. The response returned a \"Not Found\" status, which might indicate either:\n\n1. **No active giveaways** for these categories right now.\n2. **API limitations** or temporary data unavailability.\n\n### Alternative Suggestions:\n- Check [GamerPower's official website](https://www.gamerpower.com/) directly for real-time giveaways.\n- Explore platforms like [GiveawayCats](https://www.giveawaycats.com/) or [FreeGameDaily](https://www.freegamedaily.com/) for beta and game giveaways.\n- Follow gaming communities (e.g., Reddit's r/giveaway or r/freepcgames) for user-shared opportunities.\n\nLet me know if you'd like help searching elsewhere!", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [{"name": "live_giveaways_by_type", "parameters": {"type": "object", "properties": {"type": {"type": "string", "description": "The type of giveaways to retrieve (e.g., game, loot, beta).", "default": "game"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieve live giveaways from the GamerPower API based on the specified type."}], "top_p": null, "background": null, "max_output_tokens": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "truncation": null, "usage": null, "user": null, "max_tool_calls": null, "top_logprobs": null}, "reward": 1.0, "num_expected": 2, "num_predicted": 2, "num_correct": 2, "predicted_calls": [{"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}]} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.", "role": "system", "type": "message"}, {"content": "List titles originally aired on networks '1' and '8', released after 2010, sorted by release date in descending order.", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "get_animes", "parameters": {"type": "object", "properties": {"year_greater": {"type": "string", "description": "Find animes released after the specified year.", "default": ""}, "media_type": {"type": "string", "description": "Filter by media type (e.g., music, tv, ona, ova, movie, special).", "default": ""}, "studio": {"type": "string", "description": "Filter by studio name.", "default": ""}, "year_less": {"type": "string", "description": "Find animes released before the specified year.", "default": ""}, "nsfw": {"type": "string", "description": "Include NSFW content if set.", "default": ""}, "status": {"type": "string", "description": "Filter by anime status (e.g., currently_airing, finished_airing, not_yet_aired).", "default": ""}, "limit": {"type": "string", "description": "Limit the number of results.", "default": ""}, "q": {"type": "string", "description": "Search for animes by title in English or Japanese.", "default": ""}, "genre": {"type": "string", "description": "Filter by genre.", "default": ""}, "sort": {"type": "string", "description": "Specify sort order, True for ascending and False for descending.", "default": ""}, "offset": {"type": "string", "description": "Number of results to skip.", "default": ""}, "season": {"type": "string", "description": "Filter by season.", "default": ""}, "fields": {"type": "string", "description": "Specify the fields to return (e.g., id, title, main_picture, etc.).", "default": ""}, "year_equal": {"type": "string", "description": "Filter by animes released in the specified year.", "default": ""}, "source": {"type": "string", "description": "Filter by source material (e.g., manga, visual_novel, novel, etc.).", "default": ""}, "order": {"type": "string", "description": "Order results by a specific field.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves a list of animes based on specified search criteria and filters from the RapidAPI Anime API."}, {"name": "list_titles", "parameters": {"type": "object", "properties": {"genres": {"type": "string", "description": "Filter results to only include certain genre(s). Pass in a single genre ID or multiple comma-separated IDs. Default is '4,9'.", "default": "4,9"}, "limit": {"type": "integer", "description": "Set how many titles to return per page. Default and maximum is 250.", "default": "250"}, "source_ids": {"type": "string", "description": "Filter the results to titles available on specific sources by passing individual IDs or multiple comma-separated IDs. Default is '23,206'. Note: Only a single region can be set if this is populated.", "default": "23,206"}, "source_types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "sub,free"}, "types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "movie,tv_series"}, "regions": {"type": "string", "description": "Filter results to only include sources active in specific regions. Currently supported regions: US, GB, CA, AU. Default is 'US'. Note: Only a single region can be set if source_ids or source_types are populated.", "default": "US"}, "sort_by": {"type": "string", "description": "Sort order of results. Possible values include: relevance_desc, relevance_asc, popularity_desc, popularity_asc, release_date_desc, release_date_asc, title_desc, title_asc. Default is 'relevance_desc'.", "default": "relevance_desc"}, "page": {"type": "integer", "description": "Set the page of results to return. Default is 1.", "default": "1"}, "network_ids": {"type": "string", "description": "Filter results to titles that originally aired on specific TV networks by passing individual IDs or multiple comma-separated IDs. Default is '1,8,12'.", "default": "1,8,12"}, "release_date_start": {"type": "integer", "description": "Filter results to only include titles released on or after a specific date. Format: YYYYMMDD. Default is 20010101.", "default": "20010101"}, "release_date_end": {"type": "integer", "description": "Filter results to only include titles released on or before a specific date. Format: YYYYMMDD. Default is 20201211.", "default": "20201211"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Fetches a listing of titles that match specified parameters from the Watchmode API."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_a5b32ce6adb34a64bd5d1373a90dec76", "created_at": 1762553385.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "Qwen/Qwen3-30B-A3B", "object": "response", "output": [{"id": "rs_d6c9621ec08f4881855748ad4d94f76c", "summary": [{"text": "\nOkay, the user wants a list of titles that were originally aired on networks 1 and 8, released after 2010, and sorted by release date in descending order. Let me check the available functions.\n\nLooking at the tools provided, there's the list_titles function. Let me check its parameters. The function has parameters like network_ids, release_date_start, and sort_by. \n\nThe user specified networks '1' and '8', so I should set network_ids to \"1,8\". The release date should be after 2010, so release_date_start needs to be 20100101. For sorting, the user wants release date descending, which corresponds to \"release_date_desc\" in the sort_by parameter. \n\nThe default values for other parameters like genres, source_ids, etc., might not be necessary here. The user didn't mention anything about genres or sources, so I can leave those as default. But I need to make sure that the parameters I do set are correctly formatted. \n\nWait, the release_date_start default is 20010101, but the user wants after 2010. So changing that to 20100101. The sort_by parameter should be set to \"release_date_desc\". Also, the types parameter defaults to movie and tv_series, which is probably okay unless the user specified otherwise. \n\nSo putting it all together, the function call should have network_ids=\"1,8\", release_date_start=20100101, and sort_by=\"release_date_desc\". The other parameters can stay as their defaults unless there's a conflict. I think that's all.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_e6365fba3824487eb1febf97b6b295d2", "content": [{"annotations": [], "text": "\n\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"network_ids\": \"1,8\", \"release_date_start\": \"20100101\", \"sort_by\": \"release_date_desc\"}", "call_id": "chatcmpl-tool-d59c1648507d4049b482217c51efd7d1", "name": "list_titles", "type": "function_call", "id": "chatcmpl-tool-d59c1648507d4049b482217c51efd7d1", "status": "completed"}, {"call_id": "chatcmpl-tool-d59c1648507d4049b482217c51efd7d1", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"id": "rs_85495a60689e49fb946f32bacfc08059", "summary": [{"text": "\nOkay, the user asked to list titles originally aired on networks '1' and '8', released after 2010, sorted by release date in descending order. I used the list_titles function with network_ids \"1,8\", release_date_start \"20100101\", and sort_by \"release_date_desc\". But the response was {\"detail\":\"Not Found\"}. Hmm, maybe the parameters were incorrect.\n\nWait, looking at the function definitions, the list_titles function has parameters like network_ids, which the user specified as '1' and '8'. The default for network_ids is \"1,8,12\", so maybe the user's input is correct. But the response was not found. Could there be no titles matching those criteria? Or maybe the date format is wrong? The release_date_start expects YYYYMMDD. The user said \"after 2010\", so setting it to 20100101 makes sense. But perhaps the API doesn't have data for those networks after 2010? Alternatively, maybe the function requires other parameters or there's a mistake in the function call.\n\nAlternatively, maybe the user's query is correct, but the API doesn't have any results. But the assistant's job is to call the function correctly. The parameters seem right. Maybe the issue is with the API itself, but the assistant can't know that. So the answer should state that no titles were found based on the given criteria.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_2032164cb8fd44b19f77c5c00276e872", "content": [{"annotations": [], "text": "\n\nNo titles were found that match the criteria of being originally aired on networks '1' and '8', released after 2010, and sorted by release date in descending order. The API returned a \"Not Found\" response, indicating no results matched the specified parameters.", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [{"name": "get_animes", "parameters": {"type": "object", "properties": {"year_greater": {"type": "string", "description": "Find animes released after the specified year.", "default": ""}, "media_type": {"type": "string", "description": "Filter by media type (e.g., music, tv, ona, ova, movie, special).", "default": ""}, "studio": {"type": "string", "description": "Filter by studio name.", "default": ""}, "year_less": {"type": "string", "description": "Find animes released before the specified year.", "default": ""}, "nsfw": {"type": "string", "description": "Include NSFW content if set.", "default": ""}, "status": {"type": "string", "description": "Filter by anime status (e.g., currently_airing, finished_airing, not_yet_aired).", "default": ""}, "limit": {"type": "string", "description": "Limit the number of results.", "default": ""}, "q": {"type": "string", "description": "Search for animes by title in English or Japanese.", "default": ""}, "genre": {"type": "string", "description": "Filter by genre.", "default": ""}, "sort": {"type": "string", "description": "Specify sort order, True for ascending and False for descending.", "default": ""}, "offset": {"type": "string", "description": "Number of results to skip.", "default": ""}, "season": {"type": "string", "description": "Filter by season.", "default": ""}, "fields": {"type": "string", "description": "Specify the fields to return (e.g., id, title, main_picture, etc.).", "default": ""}, "year_equal": {"type": "string", "description": "Filter by animes released in the specified year.", "default": ""}, "source": {"type": "string", "description": "Filter by source material (e.g., manga, visual_novel, novel, etc.).", "default": ""}, "order": {"type": "string", "description": "Order results by a specific field.", "default": ""}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves a list of animes based on specified search criteria and filters from the RapidAPI Anime API."}, {"name": "list_titles", "parameters": {"type": "object", "properties": {"genres": {"type": "string", "description": "Filter results to only include certain genre(s). Pass in a single genre ID or multiple comma-separated IDs. Default is '4,9'.", "default": "4,9"}, "limit": {"type": "integer", "description": "Set how many titles to return per page. Default and maximum is 250.", "default": "250"}, "source_ids": {"type": "string", "description": "Filter the results to titles available on specific sources by passing individual IDs or multiple comma-separated IDs. Default is '23,206'. Note: Only a single region can be set if this is populated.", "default": "23,206"}, "source_types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "sub,free"}, "types": {"type": "string", "description": "Filter results to only include titles available on specific types of sources (e.g., subscription, free). Default is 'sub,free'. Note: Only a single region can be set if this is populated.", "default": "movie,tv_series"}, "regions": {"type": "string", "description": "Filter results to only include sources active in specific regions. Currently supported regions: US, GB, CA, AU. Default is 'US'. Note: Only a single region can be set if source_ids or source_types are populated.", "default": "US"}, "sort_by": {"type": "string", "description": "Sort order of results. Possible values include: relevance_desc, relevance_asc, popularity_desc, popularity_asc, release_date_desc, release_date_asc, title_desc, title_asc. Default is 'relevance_desc'.", "default": "relevance_desc"}, "page": {"type": "integer", "description": "Set the page of results to return. Default is 1.", "default": "1"}, "network_ids": {"type": "string", "description": "Filter results to titles that originally aired on specific TV networks by passing individual IDs or multiple comma-separated IDs. Default is '1,8,12'.", "default": "1,8,12"}, "release_date_start": {"type": "integer", "description": "Filter results to only include titles released on or after a specific date. Format: YYYYMMDD. Default is 20010101.", "default": "20010101"}, "release_date_end": {"type": "integer", "description": "Filter results to only include titles released on or before a specific date. Format: YYYYMMDD. Default is 20201211.", "default": "20201211"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Fetches a listing of titles that match specified parameters from the Watchmode API."}], "top_p": null, "background": null, "max_output_tokens": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "truncation": null, "usage": null, "user": null, "max_tool_calls": null, "top_logprobs": null}, "reward": 0.0, "num_expected": 1, "num_predicted": 1, "num_correct": 0, "predicted_calls": [{"name": "list_titles", "arguments": {"network_ids": "1,8", "release_date_start": "20100101", "sort_by": "release_date_desc"}}]} +{"responses_create_params": {"background": null, "include": null, "input": [{"content": "You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.", "role": "system", "type": "message"}, {"content": "What is the T3MA for 'ETH/BTC' using a 1h interval and a time period of 14?", "role": "user", "type": "message"}], "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "metadata": null, "model": null, "parallel_tool_calls": true, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "store": null, "temperature": null, "text": null, "tool_choice": "auto", "tools": [{"name": "t3ma", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "Instrument symbol, which can be any equity, index, ETF, forex, or cryptocurrency (e.g., 'AAPL', 'EUR/USD', 'ETH/BTC').", "default": "AAPL"}, "interval": {"type": "string", "description": "Interval between two consecutive points in the time series. Supported intervals include '1min', '5min', '15min', '30min', '45min', '1h', '2h', '4h', '1day', '1week', and '1month'.", "default": "1min"}, "format": {"type": "string", "description": "Format of the response data, either 'CSV' or 'JSON'. Default is 'json'.", "default": "json"}, "v_factor": {"type": "string", "description": "Volume factor used in the calculation of the T3MA.", "default": 0.7}, "series_type": {"type": "string", "description": "Type of series to use in the calculation. Supported values are 'open', 'high', 'low', and 'close'. Default is 'close'.", "default": "close"}, "outputsize": {"type": "string", "description": "Number of data points to return. Default is 30.", "default": 30}, "time_period": {"type": "string", "description": "Number of periods over which to calculate the T3MA. Default is 9.", "default": 9}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Fetches the Triple Exponential Moving Average (T3MA) for a given financial instrument."}, {"name": "stock_v2_get_profile", "parameters": {"type": "object", "properties": {"performanceid": {"type": "string", "description": "The performance ID of the stock, obtained from endpoints such as /auto-complete, /get-summary, or /get-movers.", "default": "0P0000OQN8"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves the company profile information for a given performance ID using the RapidAPI Morning Star service."}], "top_logprobs": null, "top_p": null, "truncation": null, "user": null, "stream": null}, "response": {"id": "resp_94bc6f237e074bffbd2478b281e4e25f", "created_at": 1762553385.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": null, "model": "Qwen/Qwen3-30B-A3B", "object": "response", "output": [{"id": "rs_8f91fd92c71443c89098431351f193a7", "summary": [{"text": "\nOkay, the user is asking for the Triple Exponential Moving Average (T3MA) for 'ETH/BTC' with a 1-hour interval and a time period of 14. Let me check the available functions.\n\nThere's the t3ma function. The parameters needed are symbol, interval, time_period, and others. The user specified 'ETH/BTC' as the symbol, which is valid. The interval they want is '1h', which is one of the supported options. The time period is 14, which they mentioned. The default for time_period in the function is 9, so I need to adjust that. The other parameters like v_factor, series_type, outputsize have defaults, so unless the user specifies, I can leave them as default. The format isn't mentioned, so default to JSON. \n\nSo the function call should include symbol: 'ETH/BTC', interval: '1h', time_period: 14. The rest can use defaults. I need to make sure the parameters are correctly formatted in JSON within the tool_call tags.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_19f8f879278847f0835cea2dec73ba37", "content": [{"annotations": [], "text": "\n\n", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}, {"arguments": "{\"symbol\": \"ETH/BTC\", \"interval\": \"1h\", \"time_period\": \"14\"}", "call_id": "chatcmpl-tool-2a645ba1ba0041afa421272036eff390", "name": "t3ma", "type": "function_call", "id": "chatcmpl-tool-2a645ba1ba0041afa421272036eff390", "status": "completed"}, {"call_id": "chatcmpl-tool-2a645ba1ba0041afa421272036eff390", "output": "{\"detail\":\"Not Found\"}", "type": "function_call_output", "id": null, "status": null}, {"id": "rs_4d7b9045932441e3a9bc3ae9a7776f29", "summary": [{"text": "\nOkay, the user asked for the T3MA of 'ETH/BTC' with a 1-hour interval and a time period of 14. I called the t3ma function with those parameters. The response came back as \"Not Found\". Hmm, that could mean a few things. Maybe the symbol 'ETH/BTC' isn't valid or isn't supported by the API. Let me check the function's description again. The t3ma function's symbol parameter can be any equity, index, ETF, forex, or cryptocurrency. So ETH/BTC should be okay. But perhaps the API doesn't have data for that specific pair. Alternatively, maybe the default parameters are necessary. Wait, the user specified the time period as 14, which is different from the default 9. The function should accept that. But if the API doesn't have data for that symbol and interval, it might return not found. I should inform the user that the request couldn't be fulfilled and maybe suggest checking the symbol or interval. Also, maybe the API requires additional parameters that aren't optional. But according to the function definition, all parameters are optional except the ones required. Wait, the required array is empty, so all are optional. But the user provided symbol, interval, and time_period. Maybe the issue is with the format of the parameters. For example, the interval '1h' is correct, but maybe the API expects a different format. Or the symbol should be in a different format, like ETH-BTC instead of ETH/BTC. Alternatively, maybe the API doesn't support cryptocurrency pairs. But the function's description says it can handle cryptocurrencies. I need to let the user know that the data isn't available with the current parameters. Maybe suggest trying a different symbol or checking the API documentation. But since I can't access external docs, I'll have to explain the possible reasons based on the info given.\n", "type": "summary_text"}], "type": "reasoning", "encrypted_content": null}, {"id": "msg_abde9ddc4dc24a1cab1a3183c0feb0c8", "content": [{"annotations": [], "text": "\n\nThe request for the Triple Exponential Moving Average (T3MA) for `ETH/BTC` with a `1h` interval and a time period of `14` returned a \"Not Found\" error. This could occur due to one of the following reasons:\n\n1. **Symbol Availability**: The `ETH/BTC` trading pair might not be supported or available in the data source.\n2. **Data Gaps**: There may be insufficient historical data for the specified parameters.\n3. **API Constraints**: The underlying API might have limitations on cryptocurrency pairs or specific intervals.\n\n### Suggested Actions:\n- Verify the symbol format (e.g., try `ETH-BTC` or check if the API expects a different notation).\n- Test with a different interval (e.g., `1day`) or a commonly supported symbol like `BTC/USD`.\n- Ensure the API has coverage for cryptocurrency pairs.\n\nLet me know if you'd like to try alternative parameters!", "type": "output_text", "logprobs": null}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": true, "temperature": null, "tool_choice": "auto", "tools": [{"name": "t3ma", "parameters": {"type": "object", "properties": {"symbol": {"type": "string", "description": "Instrument symbol, which can be any equity, index, ETF, forex, or cryptocurrency (e.g., 'AAPL', 'EUR/USD', 'ETH/BTC').", "default": "AAPL"}, "interval": {"type": "string", "description": "Interval between two consecutive points in the time series. Supported intervals include '1min', '5min', '15min', '30min', '45min', '1h', '2h', '4h', '1day', '1week', and '1month'.", "default": "1min"}, "format": {"type": "string", "description": "Format of the response data, either 'CSV' or 'JSON'. Default is 'json'.", "default": "json"}, "v_factor": {"type": "string", "description": "Volume factor used in the calculation of the T3MA.", "default": 0.7}, "series_type": {"type": "string", "description": "Type of series to use in the calculation. Supported values are 'open', 'high', 'low', and 'close'. Default is 'close'.", "default": "close"}, "outputsize": {"type": "string", "description": "Number of data points to return. Default is 30.", "default": 30}, "time_period": {"type": "string", "description": "Number of periods over which to calculate the T3MA. Default is 9.", "default": 9}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Fetches the Triple Exponential Moving Average (T3MA) for a given financial instrument."}, {"name": "stock_v2_get_profile", "parameters": {"type": "object", "properties": {"performanceid": {"type": "string", "description": "The performance ID of the stock, obtained from endpoints such as /auto-complete, /get-summary, or /get-movers.", "default": "0P0000OQN8"}}, "required": [], "additionalProperties": false}, "strict": false, "type": "function", "description": "Retrieves the company profile information for a given performance ID using the RapidAPI Morning Star service."}], "top_p": null, "background": null, "max_output_tokens": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": null, "text": null, "truncation": null, "usage": null, "user": null, "max_tool_calls": null, "top_logprobs": null}, "reward": 0.0, "num_expected": 1, "num_predicted": 1, "num_correct": 0, "predicted_calls": [{"name": "t3ma", "arguments": {"symbol": "ETH/BTC", "interval": "1h", "time_period": "14"}}]} diff --git a/resources_servers/xlam_fc/generate_dataset.py b/resources_servers/xlam_fc/generate_dataset.py new file mode 100644 index 000000000..19a1824ee --- /dev/null +++ b/resources_servers/xlam_fc/generate_dataset.py @@ -0,0 +1,259 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from pathlib import Path +from typing import Any, Dict, List + +from datasets import load_dataset +from tqdm import tqdm + + +SYSTEM_PROMPT = """You are a helpful AI assistant with access to various functions. When you need to use a function to answer a user's request, call the appropriate function with the correct arguments. You can call multiple functions if needed to fully address the user's query.""" + + +def convert_parameter_type(param_type: str) -> str: + type_mapping = { + "str": "string", + "string": "string", + "int": "integer", + "integer": "integer", + "float": "number", + "number": "number", + "bool": "boolean", + "boolean": "boolean", + "list": "array", + "array": "array", + "dict": "object", + "object": "object", + } + return type_mapping.get(param_type.lower(), "string") + + +def convert_tool_to_openai_format(tool: Dict[str, Any]) -> Dict[str, Any]: + """ + Convert a tool from the dataset format to OpenAI function calling format. + + Dataset format example: + { + "name": "tool_name", + "description": "Tool description", + "parameters": { + "param_name": { + "description": "param description", + "type": "str", + "default": "value" + } + } + } + + OpenAI format: + { + "type": "function", + "name": "tool_name", + "description": "Tool description", + "parameters": { + "type": "object", + "properties": { + "param_name": { + "type": "string", + "description": "param description" + } + }, + "required": ["param_name"], + "additionalProperties": False + }, + "strict": True + } + """ + openai_tool = { + "type": "function", + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": { + "type": "object", + "properties": {}, + "required": [], + "additionalProperties": False, + }, + "strict": False, + } + + if "parameters" in tool and isinstance(tool["parameters"], dict): + for param_name, param_info in tool["parameters"].items(): + if isinstance(param_info, dict): + prop = { + "type": convert_parameter_type(param_info.get("type", "string")), + "description": param_info.get("description", ""), + } + + if "default" in param_info: + prop["default"] = param_info["default"] + + if "enum" in param_info: + prop["enum"] = param_info["enum"] + + openai_tool["parameters"]["properties"][param_name] = prop + + if "default" not in param_info: + openai_tool["parameters"]["required"].append(param_name) + + return openai_tool + + +def parse_expected_answers(answers_str: str) -> List[Dict[str, Any]]: + try: + answers = json.loads(answers_str) + if not isinstance(answers, list): + answers = [answers] + return answers + except json.JSONDecodeError as e: + print(f"Error parsing answers: {answers_str}") + print(f"Error: {e}") + return [] + + +def parse_tools(tools_str: str) -> List[Dict[str, Any]]: + try: + tools = json.loads(tools_str) + if not isinstance(tools, list): + tools = [tools] + + openai_tools = [] + for tool in tools: + openai_tool = convert_tool_to_openai_format(tool) + openai_tools.append(openai_tool) + + return openai_tools + except json.JSONDecodeError as e: + print(f"Error parsing tools: {tools_str}") + print(f"Error: {e}") + return [] + + +def generate_dataset(output_dir: str = "resources_servers/xlam_fc/data", valid_size: int = 1000): + print("Loading dataset from HuggingFace...") + dataset = load_dataset("Salesforce/xlam-function-calling-60k", split="train") + print(f"Loaded {len(dataset)} examples") + + print(f"Splitting dataset: {valid_size} for validation, rest for training...") + split_dataset = dataset.train_test_split(test_size=valid_size, seed=42) + train_dataset = split_dataset["train"] + valid_dataset = split_dataset["test"] + + print(f"Train: {len(train_dataset)} examples") + print(f"Valid: {len(valid_dataset)} examples") + + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + train_path = output_path / "train.jsonl" + valid_path = output_path / "valid.jsonl" + + print("\nProcessing training set...") + train_processed, train_skipped = process_and_write_split(train_dataset, train_path) + + print("\nProcessing validation set...") + valid_processed, valid_skipped = process_and_write_split(valid_dataset, valid_path) + + print("\nDataset generation complete!") + print(f"Train - Processed: {train_processed}, Skipped: {train_skipped}") + print(f"Valid - Processed: {valid_processed}, Skipped: {valid_skipped}") + print(f"Output directory: {output_dir}") + + +def process_and_write_split(dataset, output_path: Path) -> tuple[int, int]: + processed_count = 0 + skipped_count = 0 + + with open(output_path, "w") as f: + for example in tqdm(dataset): + tools = parse_tools(example["tools"]) + expected_answers = parse_expected_answers(example["answers"]) + + if not tools or not expected_answers: + skipped_count += 1 + continue + + responses_create_params = { + "input": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": example["query"]}, + ], + "tools": tools, + } + + record = { + "id": example["id"], + "responses_create_params": responses_create_params, + "expected_answers": expected_answers, + } + + f.write(json.dumps(record) + "\n") + processed_count += 1 + + return processed_count, skipped_count + + +def generate_example_file(output_dir: str = "resources_servers/xlam_fc/data", num_examples: int = 5): + dataset = load_dataset("Salesforce/xlam-function-calling-60k", split="train") + + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + example_path = output_path / "example.jsonl" + + print(f"Generating {num_examples} examples...") + processed_count = 0 + skipped_count = 0 + + with open(example_path, "w") as f: + for i, example in enumerate(dataset): + if processed_count >= num_examples: + break + + tools = parse_tools(example["tools"]) + expected_answers = parse_expected_answers(example["answers"]) + + if not tools or not expected_answers: + skipped_count += 1 + continue + + responses_create_params = { + "input": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": example["query"]}, + ], + "tools": tools, + } + + record = { + "id": processed_count, + "responses_create_params": responses_create_params, + "expected_answers": expected_answers, + } + + f.write(json.dumps(record) + "\n") + processed_count += 1 + + print(f"Generated {processed_count} examples in {example_path}") + + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "--example": + generate_example_file() + else: + generate_dataset() diff --git a/resources_servers/xlam_fc/requirements.txt b/resources_servers/xlam_fc/requirements.txt new file mode 100644 index 000000000..00ed83213 --- /dev/null +++ b/resources_servers/xlam_fc/requirements.txt @@ -0,0 +1 @@ +-e nemo-gym[dev] @ ../../ diff --git a/resources_servers/xlam_fc/tests/__init__.py b/resources_servers/xlam_fc/tests/__init__.py new file mode 100644 index 000000000..341a77c5b --- /dev/null +++ b/resources_servers/xlam_fc/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/resources_servers/xlam_fc/tests/test_app.py b/resources_servers/xlam_fc/tests/test_app.py new file mode 100644 index 000000000..d484a410a --- /dev/null +++ b/resources_servers/xlam_fc/tests/test_app.py @@ -0,0 +1,430 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from unittest.mock import MagicMock + +from app import ( + XlamFcResourcesServer, + XlamFcResourcesServerConfig, + XlamFcVerifyRequest, +) + +from nemo_gym.openai_utils import NeMoGymResponse +from nemo_gym.server_utils import ServerClient + + +class TestApp: + def test_sanity(self) -> None: + config = XlamFcResourcesServerConfig( + host="0.0.0.0", + port=8080, + entrypoint="", + name="", + ) + XlamFcResourcesServer(config=config, server_client=MagicMock(spec=ServerClient)) + + async def test_verify_exact_match_single_call(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "get_weather", + "arguments": json.dumps({"city": "San Francisco"}), + "type": "function_call", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + ], + }, + response=response, + expected_answers=[{"name": "get_weather", "arguments": {"city": "San Francisco"}}], + ) + + result = await server.verify(verify_request) + assert result.reward == 1.0 + assert result.num_expected == 1 + assert result.num_predicted == 1 + assert result.num_correct == 1 + + async def test_verify_exact_match_multiple_calls(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "live_giveaways_by_type", + "arguments": json.dumps({"type": "beta"}), + "type": "function_call", + }, + { + "call_id": "call_2", + "name": "live_giveaways_by_type", + "arguments": json.dumps({"type": "game"}), + "type": "function_call", + }, + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "Where can I find live giveaways?"}, + ], + }, + response=response, + expected_answers=[ + {"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, + {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}, + ], + ) + + result = await server.verify(verify_request) + assert result.reward == 1.0 + assert result.num_expected == 2 + assert result.num_predicted == 2 + assert result.num_correct == 2 + + async def test_verify_partial_match(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "live_giveaways_by_type", + "arguments": json.dumps({"type": "beta"}), + "type": "function_call", + }, + { + "call_id": "call_2", + "name": "live_giveaways_by_type", + "arguments": json.dumps({"type": "loot"}), + "type": "function_call", + }, + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "Where can I find live giveaways?"}, + ], + }, + response=response, + expected_answers=[ + {"name": "live_giveaways_by_type", "arguments": {"type": "beta"}}, + {"name": "live_giveaways_by_type", "arguments": {"type": "game"}}, + ], + ) + + result = await server.verify(verify_request) + assert result.reward == 0.0 + assert result.num_expected == 2 + assert result.num_predicted == 2 + assert result.num_correct == 1 + + async def test_verify_no_match(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "wrong_function", + "arguments": json.dumps({"city": "New York"}), + "type": "function_call", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + ], + }, + response=response, + expected_answers=[{"name": "get_weather", "arguments": {"city": "San Francisco"}}], + ) + + result = await server.verify(verify_request) + assert result.reward == 0.0 + assert result.num_expected == 1 + assert result.num_predicted == 1 + assert result.num_correct == 0 + + async def test_verify_wrong_arguments(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "get_weather", + "arguments": json.dumps({"city": "New York"}), + "type": "function_call", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + ], + }, + response=response, + expected_answers=[{"name": "get_weather", "arguments": {"city": "San Francisco"}}], + ) + + result = await server.verify(verify_request) + assert result.reward == 0.0 + assert result.num_expected == 1 + assert result.num_predicted == 1 + assert result.num_correct == 0 + + async def test_verify_no_predictions(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "id": "msg_1", + "content": [ + { + "annotations": [], + "text": "I don't know how to help with that.", + "type": "output_text", + } + ], + "role": "assistant", + "status": "completed", + "type": "message", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + ], + }, + response=response, + expected_answers=[{"name": "get_weather", "arguments": {"city": "San Francisco"}}], + ) + + result = await server.verify(verify_request) + assert result.reward == 0.0 + assert result.num_expected == 1 + assert result.num_predicted == 0 + assert result.num_correct == 0 + + async def test_verify_extra_arguments(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "get_weather", + "arguments": json.dumps({"city": "San Francisco", "units": "celsius"}), + "type": "function_call", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + ], + }, + response=response, + expected_answers=[{"name": "get_weather", "arguments": {"city": "San Francisco"}}], + ) + + result = await server.verify(verify_request) + assert result.reward == 1.0 + assert result.num_expected == 1 + assert result.num_predicted == 1 + assert result.num_correct == 1 + + async def test_verify_no_expected_no_predicted(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "id": "msg_1", + "content": [ + { + "annotations": [], + "text": "Here's some information.", + "type": "output_text", + } + ], + "role": "assistant", + "status": "completed", + "type": "message", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "Tell me something."}, + ], + }, + response=response, + expected_answers=[], + ) + + result = await server.verify(verify_request) + assert result.reward == 1.0 + assert result.num_expected == 0 + assert result.num_predicted == 0 + assert result.num_correct == 0 + + async def test_verify_multiple_arguments(self) -> None: + response = NeMoGymResponse( + id="resp_test", + created_at=0.0, + model="dummy", + object="response", + output=[ + { + "call_id": "call_1", + "name": "t3ma", + "arguments": json.dumps({"symbol": "ETH/BTC", "interval": "1h", "time_period": 14}), + "type": "function_call", + } + ], + parallel_tool_calls=True, + tool_choice="auto", + tools=[], + ) + + server = XlamFcResourcesServer( + config=XlamFcResourcesServerConfig(host="0.0.0.0", port=8080, entrypoint="", name=""), + server_client=MagicMock(spec=ServerClient), + ) + + verify_request = XlamFcVerifyRequest( + responses_create_params={ + "input": [ + {"role": "user", "content": "Get T3MA for ETH/BTC"}, + ], + }, + response=response, + expected_answers=[ + {"name": "t3ma", "arguments": {"symbol": "ETH/BTC", "interval": "1h", "time_period": 14}} + ], + ) + + result = await server.verify(verify_request) + assert result.reward == 1.0 + assert result.num_expected == 1 + assert result.num_predicted == 1 + assert result.num_correct == 1