diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 11d466de..c071d463 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.11" cache: "pip" - name: Install Dependencies @@ -54,7 +54,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.11" cache: "pip" - name: Install Dependencies @@ -66,7 +66,7 @@ jobs: run: pre-commit run mypy --all-file build-and-test: - name: "Build and Test Python 3.9" + name: "Build and Test Python 3.11" runs-on: ubuntu-latest if: always() timeout-minutes: 20 @@ -76,7 +76,7 @@ jobs: - name: Setup python uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.11" cache: "pip" - name: Install Dependencies diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml index 6505bc66..9dcbde6e 100644 --- a/.github/workflows/build_docs.yaml +++ b/.github/workflows/build_docs.yaml @@ -20,7 +20,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.9.15 + python-version: 3.11 cache: "pip" - name: Installing the Documentation requirements run: | diff --git a/.github/workflows/merge.yaml b/.github/workflows/merge.yaml index e1dce0ff..f215e728 100644 --- a/.github/workflows/merge.yaml +++ b/.github/workflows/merge.yaml @@ -6,7 +6,7 @@ on: jobs: build-and-test: - name: "Build and Test Python 3.9" + name: "Build and Test Python 3.11" runs-on: ubuntu-latest if: always() timeout-minutes: 20 @@ -16,7 +16,7 @@ jobs: - name: Setup python uses: actions/setup-python@v4 with: - python-version: "3.9" + python-version: "3.11" cache: "pip" - name: Install Dependencies diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0cd24892..93a59e68 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: 22.6.0 hooks: - id: black - language_version: python3.9 + language_version: python3.11 - repo: https://github.com/pycqa/flake8 rev: 5.0.4 hooks: diff --git a/README.md b/README.md index c4355630..80218153 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![docs](https://img.shields.io/github/actions/workflow/status/iai-group/UserSimCRS/build_docs.yaml?label=docs&branch=main) ![Tests](https://img.shields.io/github/actions/workflow/status/iai-group/UserSimCRS/merge.yaml?label=Tests&branch=main) ![Coverage Badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/NoB0/cd558f4b76df656b67277f8ae214b7e0/raw/coverage.UserSimCRS.main.json) -![Python version](https://img.shields.io/badge/python-3.9-blue) +![Python version](https://img.shields.io/badge/python-3.11-blue) UserSimCRS is an extensible user simulation toolkit for evaluating conversational recommender systems. @@ -29,7 +29,7 @@ We refer to the [documentation](https://iai-group.github.io/UserSimCRS/main) for ## Installation -The recommended version of Python is 3.9. +UserSimCRS requires Python 3.11+. The easiest way to install UserSimCRS and all of its dependencies is by using pip: ```shell diff --git a/config/default/config_default.yaml b/config/default/config_default.yaml index 328cb574..46ebd699 100644 --- a/config/default/config_default.yaml +++ b/config/default/config_default.yaml @@ -36,6 +36,4 @@ historical_ratings_ratio: 0.8 dialogues: data/datasets/moviebot/annotated_dialogues.json intent_classifier: "cosine" -# If using the DIET classifier the following file needs to be provided. -# rasa_dialogues: data/agents/moviebot/annotated_dialogues_rasa_agent.yml diff --git a/data/datasets/README.md b/data/datasets/README.md index e6491c48..69c242bf 100644 --- a/data/datasets/README.md +++ b/data/datasets/README.md @@ -1,16 +1,10 @@ # Dialogue files -The YAML files required to train the Rasa DIET classifier can be generated from the annotated dialogues saved in the correct format. -The generation of these files can be done with this command: +This folder contains dialogue files used for configuring a user simulator. For each dataset supported, annotated dialogues in JSON format are provided, along with prompts for LLM-based NLU and NLG components. -```shell - cd usersimcrs/utils - python -m annotation_converter_rasa -source PathToAnnotatedDialoguesFile -destination PathToDestinationFolder -``` +The datasets currently supported are: -It creates the following files: - - - `_reformat.yaml`: The original file saved as a yaml file - - `_types_w_examples.yaml`: Slots and example values extracted from the dialogues - - `_rasa_agent.yaml`: Examples of agent utterances for all possible intents/actions that the agent can take - - `_rasa_user.yaml`: Similar to the agent file, but for users + * MovieBot: dialogues related to movie recommendations obtained with IAI MovieBot v1. + * [ReDial](https://redialdata.github.io/website/): dialogues related to movie recommendations obtained with crowdsourcing. + * [IARD](https://github.com/wanlingcai1997/umap_2020_IARD): subset of ReDial dialogues related to movie recommendations. + * [INSPIRED](https://github.com/sweetpeach/Inspired): dialogues related to movie recommendations obtained with crowdsourcing. diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 3d7e27e8..11225070 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -33,7 +33,6 @@ Simulator parameters * `historical_ratings_ratio`: Ratio ([0..1]) of ratings to be used as historical data. * `dialogues`: Path to domain config file. * `intent_classifier`: Intent classifier model to be used. Only supports DialogueKit intent classifiers. - * `rasa_dialogues`: File with Rasa annotated dialogues. Only needed when using a DIET intent classifier. * `debug`: Flag (boolean) to activate debug mode. Configuration example @@ -71,8 +70,6 @@ Below is the default configuration to run simulation with the IAI MovieBot as th dialogues: data/agents/moviebot/annotated_dialogues.json intent_classifier: "cosine" - # If using the DIET classifier the following file needs to be provided. - # rasa_dialogues: data/agents/moviebot/annotated_dialogues_rasa_agent.yml debug: False diff --git a/docs/source/installation.rst b/docs/source/installation.rst index acd81ed5..d1e8cf16 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -1,7 +1,7 @@ Installation ============ -The recommended version of Python is 3.9. +UserSimCRS requires Python 3.11+. The easiest way to install UserSimCRS and all of its dependencies is by using pip: .. code-block:: shell diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c7d6718c..38a89c9f 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -17,5 +17,6 @@ torch==2.0.1 openai==1.30.5 ollama==0.3.1 pandas==2.2.2 -git+https://github.com/iai-group/DialogueKit.git +dialoguekit==0.1.1 +scikit-learn >= 0.24 setuptools<=70.0.0 \ No newline at end of file diff --git a/usersimcrs/core/information_need.py b/usersimcrs/core/information_need.py index deab542b..11636522 100644 --- a/usersimcrs/core/information_need.py +++ b/usersimcrs/core/information_need.py @@ -41,14 +41,14 @@ def generate_random_information_need( target_item.properties.keys() ) num_constraints = random.randint(1, len(informable_slots)) - for slot in random.sample(informable_slots, num_constraints): + for slot in random.sample(list(informable_slots), num_constraints): constraints[slot] = target_item.get_property(slot) requestable_slots = set( domain.get_requestable_slots() ).symmetric_difference(constraints.keys()) num_requests = random.randint(1, len(requestable_slots)) - requests = random.sample(requestable_slots, num_requests) + requests = random.sample(list(requestable_slots), num_requests) return InformationNeed([target_item], constraints, requests) diff --git a/usersimcrs/items/ratings.py b/usersimcrs/items/ratings.py index cd18d28b..88fd347f 100644 --- a/usersimcrs/items/ratings.py +++ b/usersimcrs/items/ratings.py @@ -31,7 +31,7 @@ def user_item_sampler( """ # Determine the number of items to use as historical data for a given user. nb_historical_items = int(historical_ratio * len(item_ratings)) - return random.sample(item_ratings.keys(), nb_historical_items) + return random.sample(list(item_ratings.keys()), nb_historical_items) class Ratings: diff --git a/usersimcrs/run_simulation.py b/usersimcrs/run_simulation.py index 007e78ef..a2710872 100644 --- a/usersimcrs/run_simulation.py +++ b/usersimcrs/run_simulation.py @@ -122,11 +122,6 @@ def parse_args() -> argparse.Namespace: choices=["cosine", "diet"], help="Intent classifier model to be used. Defaults to cosine.", ) - parser.add_argument( - "--rasa_dialogues", - type=str, - help="Path to the Rasa annotated dialogues file.", - ) parser.add_argument( "-d", "--debug", diff --git a/usersimcrs/utils/annotation_converter_rasa.py b/usersimcrs/utils/annotation_converter_rasa.py deleted file mode 100644 index bac93fcd..00000000 --- a/usersimcrs/utils/annotation_converter_rasa.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Generates Rasa NLU files from the JSON formatted annotated dialogues. - -Usage: - $ python -m usersimcrs.utils.annotation_converter_rasa \ - -source PathToAnnotatedDialoguesFile \ - -destination PathToDestinationFolder -""" - -import argparse -import os -import sys - -from dialoguekit.utils.annotation_converter_dialoguekit_to_rasa import ( - AnnotationConverterRasa, -) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-source", type=str, help="Path to the annotated dialogues file." - ) - parser.add_argument( - "-destination", type=str, help="Path to the destination folder." - ) - args = parser.parse_args() - - if not os.path.exists(args.source): - sys.exit("FileNotFound: {file}".format(file=args.source)) - if not os.path.exists(args.destination): - sys.exit("FileNotFound: {file}".format(file=args.destination)) - - converter = AnnotationConverterRasa(args.source, args.destination) - converter.read_original() - converter.run() diff --git a/usersimcrs/utils/simulation_utils.py b/usersimcrs/utils/simulation_utils.py index 926762a4..3ea48278 100644 --- a/usersimcrs/utils/simulation_utils.py +++ b/usersimcrs/utils/simulation_utils.py @@ -1,10 +1,9 @@ """Utility functions to run the simulation.""" import json -from typing import Any, Dict, Set, Tuple, Type +from typing import Any, Dict, Tuple, Type import confuse -import yaml from dialoguekit.core.intent import Intent from dialoguekit.core.utterance import Utterance from dialoguekit.nlg import ConditionalNLG @@ -16,7 +15,6 @@ DisjointDialogueActExtractor, ) from dialoguekit.nlu.intent_classifier import IntentClassifier -from dialoguekit.nlu.models.diet_classifier_rasa import IntentClassifierRasa from dialoguekit.nlu.models.intent_classifier_cosine import ( IntentClassifierCosine, ) @@ -131,7 +129,7 @@ def _get_agenda_based_simulator_config( ratings = Ratings(item_collection) ratings.load_ratings_csv(file_path=config["ratings"].get()) - historical_ratings, ground_truth_ratings = ratings.create_split( + historical_ratings, _ = ratings.create_split( config["historical_ratings_ratio"].get(0.8) ) @@ -200,9 +198,6 @@ def get_NLU(config: confuse.Configuration) -> NLU: return NLU( DisjointDialogueActExtractor(classifier, slot_value_annotators=[]) ) - elif intent_classifier == "diet": - classifier = train_rasa_diet_classifier(config) - return NLU(DisjointDialogueActExtractor(classifier, [classifier])) raise ValueError( "Unsupported intent classifier. Check DialogueKit intent" " classifiers." @@ -242,35 +237,3 @@ def train_cosine_classifier( intent_classifier = IntentClassifierCosine(intents=gt_intents) intent_classifier.train_model(utterances=utterances, labels=gt_intents) return intent_classifier - - -def train_rasa_diet_classifier( - config: confuse.Configuration, -) -> IntentClassifierRasa: - """Trains a DIET classifier on Rasa annotated dialogues for NLU module. - - Args: - config: Configuration generated from YAML configuration file. - - Returns: - A trained Rasa DIET model for intent classification. - """ - # TODO: Move to DialogueKit as util function. - # See: https://github.com/iai-group/UserSimCRS/issues/92 - intent_schema_file = config["intents"].get() - intent_schema = yaml.load(open(intent_schema_file), Loader=yaml.FullLoader) - - agent_intents_str: Set[str] = set() - for v in intent_schema["user_intents"].values(): - intents = v.get("expected_agent_intents", []) or [] - agent_intents_str.update(intents) - # agent_intents_str = intent_schema["agent_elicit_intents"] - # agent_intents_str.extend(intent_schema["agent_set_retrieval"]) - agent_intents = [Intent(intent) for intent in agent_intents_str] - intent_classifier = IntentClassifierRasa( - agent_intents, - config["rasa_dialogues"].get(), - ".rasa", - ) - intent_classifier.train_model() - return intent_classifier