-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from cassiebreviu/main
add intent evaluation
- Loading branch information
Showing
12 changed files
with
466 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{"customerId": "7", "chat_history": [], "question": "what is the temperature rating of my sleeping bag?", "intent": "support"} | ||
{"customerId": "7", "chat_history": [], "question": "what is the temperature rating of the cozynights sleeping bag?", "intent": "support"} | ||
{"customerId": "8", "chat_history": [], "question": "what is the waterproof rating of the tent I bought?", "intent": "support"} | ||
{"customerId": "8", "chat_history": [], "question": "what is the waterproof rating of the TrailMaster X4 Tent's rainfly?", "intent": "support"} | ||
{"customerId": "2", "question": "What is your return or exchange policy?", "chat_history": [], "intent": "support" } | ||
{"customerId": "6", "chat_history": [], "question": "is the jacket I bought machine washable?", "intent": "support"} | ||
{"customerId": "8", "chat_history": [], "question": "I would like to return the tent I bought. It is used but I still want to return it since the roof leaks.", "intent": "support"} | ||
{ "customerId": "4", "question": "tell me about your hiking jackets", "chat_history": [], "intent": "chat"} | ||
{ "customerId": "1", "question": "Do you have any climbing gear?", "chat_history": [], "intent": "chat" } | ||
{ "customerId": "3", "question": "Can you tell me about your selection of tents?", "chat_history": [], "intent": "chat" } | ||
{ "customerId": "6", "question": "Do you have any hiking boots?", "chat_history": [], "intent": "chat" } | ||
{ "customerId": "2", "question": "What gear do you recommend for hiking?", "chat_history": [], "intent": "chat" } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
{"customerId": "7", "chat_history": [], "question": "what is the temperature rating of my sleeping bag?"} | ||
{"customerId": "7", "chat_history": [], "question": "what is the temperature rating of the cozynights sleeping bag?"} | ||
{"customerId": "8", "chat_history": [], "question": "what is the waterproof rating of the tent I bought?"} | ||
{"customerId": "8", "chat_history": [], "question": "what is the waterproof rating of the TrailMaster X4 Tent's rainfly?"} | ||
{"customerId": "2", "question": "What is your return or exchange policy?", "chat_history": [] } | ||
{"customerId": "6", "chat_history": [], "question": "is the jacket I bought machine washable?"} | ||
{"customerId": "8", "chat_history": [], "question": "I would like to return the tent I bought. It is used but I still want to return it since the roof leaks."} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,330 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Local Evaluation - Groundedness\n", | ||
"\n", | ||
"After you have setup and configured the prompt flow, its time to evaluation its performance. Here we can use the prompt flow SDK to test different questions and see how the prompt flow performs using the evaluation prompt flows provided." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from promptflow import PFClient\n", | ||
"pf_client = PFClient()\n", | ||
"\n", | ||
"from dotenv import load_dotenv\n", | ||
"\n", | ||
"from pathlib import Path\n", | ||
"load_dotenv(Path(\"../local.env\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Add a question to test the base prompt flow.\n", | ||
"question = \"How do I wash the jacket I purchased?\"\n", | ||
"customerId = \"4\"\n", | ||
"output = pf_client.test(\n", | ||
" flow=\"../contoso-intent\", # Path to the flow directory\n", | ||
" inputs={ # Inputs to the flow\n", | ||
" \"chat_history\": [],\n", | ||
" \"question\": question,\n", | ||
" \"customerId\": customerId,\n", | ||
" },\n", | ||
")\n", | ||
"\n", | ||
"output[\"answer\"] = \"\".join(list(output[\"answer\"]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"output" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Test the groundedness of the prompt flow with the answer from the above question." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"test = pf_client.test(\n", | ||
" flow=\"intent_eval\",\n", | ||
" inputs={\n", | ||
" \"question\": question,\n", | ||
" \"prediction\": str(output[\"intent_context\"]),\n", | ||
" \"groundtruth\": \"support\",\n", | ||
" },\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"test" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# AI Studio Azure batch run on an evaluation json dataset\n", | ||
"\n", | ||
"Now in order to test these more thoroughly, we can use the Azure AI Studio to run batches of test data with the evaluation prompt flow on a larger dataset." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import json\n", | ||
"# Import required libraries\n", | ||
"from promptflow.azure import PFClient\n", | ||
"\n", | ||
"# Import required libraries\n", | ||
"from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"try:\n", | ||
" credential = DefaultAzureCredential()\n", | ||
" # Check if given credential can get token successfully.\n", | ||
" credential.get_token(\"https://management.azure.com/.default\")\n", | ||
"except Exception as ex:\n", | ||
" # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", | ||
" credential = InteractiveBrowserCredential()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Populate the `config.json` file with the subscription_id, resource_group, and workspace_name." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"config_path = \"../config.json\"\n", | ||
"pf_azure_client = PFClient.from_config(credential=credential, path=config_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Add the runtime from the AI Studio that will be used for the cloud batch runs." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Update the runtime to the name of the runtime you created previously\n", | ||
"runtime = \"automatic\"\n", | ||
"# load flow\n", | ||
"flow = \"../contoso-intent\"\n", | ||
"# load data\n", | ||
"data = \"../data/intenttestdata.jsonl\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# get current time stamp for run name\n", | ||
"import datetime\n", | ||
"now = datetime.datetime.now()\n", | ||
"timestamp = now.strftime(\"%Y_%m_%d_%H%M%S\")\n", | ||
"run_name = timestamp+\"_intent_base_run\"\n", | ||
"print(run_name)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Create a base run to use as the variant for the evaluation runs. \n", | ||
"\n", | ||
"_NOTE: If you get \"'An existing connection was forcibly closed by the remote host'\" run the cell again._" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# create base run in Azure Ai Studio\n", | ||
"base_run = pf_azure_client.run(\n", | ||
" flow=flow,\n", | ||
" data=data,\n", | ||
" column_mapping={\n", | ||
" # reference data\n", | ||
" \"customerId\": \"${data.customerId}\",\n", | ||
" \"question\": \"${data.question}\",\n", | ||
" },\n", | ||
" runtime=runtime,\n", | ||
" display_name=run_name,\n", | ||
" name=run_name\n", | ||
")\n", | ||
"print(base_run)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pf_azure_client.stream(base_run)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"details = pf_azure_client.get_details(base_run)\n", | ||
"details.head(10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Cloud Eval run on Json Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"eval_flow = \"intent_eval/\"\n", | ||
"run_name = timestamp+\"intent_eval_run\"\n", | ||
"print(run_name)\n", | ||
"\n", | ||
"eval_run_variant = pf_azure_client.run(\n", | ||
" flow=eval_flow,\n", | ||
" data=data, # path to the data file\n", | ||
" run=base_run, # use run as the variant\n", | ||
" column_mapping={\n", | ||
" # reference data\n", | ||
" \"question\": \"${data.question}\",\n", | ||
" \"groundtruth\": \"${data.intent}\",\n", | ||
" \"prediction\": \"${run.intent_context}\",\n", | ||
" },\n", | ||
" runtime=runtime,\n", | ||
" display_name=run_name,\n", | ||
" name=run_name\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pf_azure_client.stream(eval_run_variant)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"details = pf_azure_client.get_details(eval_run_variant)\n", | ||
"details.head(10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"metrics = pf_azure_client.get_metrics(eval_run_variant)\n", | ||
"print(json.dumps(metrics, indent=4))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pf_azure_client.visualize([base_run, eval_run_variant])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.18" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.