From c3706ee4eed029abc56a466d2a9cc4ef98bffff9 Mon Sep 17 00:00:00 2001 From: seanzhangkx8 <106214464+seanzhangkx8@users.noreply.github.com> Date: Wed, 29 Oct 2025 18:37:56 -0400 Subject: [PATCH] add transforming a single object feature for LLMPromptTransformer --- convokit/genai/example/example.ipynb | 74 ++++++++++++++++++++++++++ convokit/genai/llmprompttransformer.py | 43 +++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/convokit/genai/example/example.ipynb b/convokit/genai/example/example.ipynb index ee67bc34..6fa6fe79 100644 --- a/convokit/genai/example/example.ipynb +++ b/convokit/genai/example/example.ipynb @@ -232,6 +232,80 @@ " sentiment = utt.meta.get(\"gpt_sentiment\", \"Not processed\")\n", " print(f\" {utt.speaker.id}: '{utt.text[:50]}...' -> Sentiment: {sentiment}\")\n" ] + }, + { + "cell_type": "markdown", + "id": "0d9408e2", + "metadata": {}, + "source": [ + "## Testing Prompts on Single Objects\n", + "\n", + "The `transform_single` method allows you to test your prompts on individual objects without processing an entire corpus. This function allows user to test prompt development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f11439f8", + "metadata": {}, + "outputs": [], + "source": [ + "# Test with a single utterance using string input\n", + "test_text = \"I absolutely love this new feature! It's amazing!\"\n", + "result = sentiment_transformer.transform_single(test_text)\n", + "print(f\"Input: {test_text}\")\n", + "print(f\"Sentiment: {result.meta.get('gpt_sentiment', 'Not processed')}\")\n", + "print(f\"Result type: {type(result)}\")\n", + "print()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82858c34", + "metadata": {}, + "outputs": [], + "source": [ + "# Test with an actual utterance object\n", + "from convokit import Utterance, Speaker\n", + "\n", + "test_utterance = Utterance(\n", + " id=\"test_utt\",\n", + " text=\"This is terrible! I hate it!\",\n", + " speaker=Speaker(id=\"test_speaker\")\n", + ")\n", + "\n", + "result = sentiment_transformer.transform_single(test_utterance)\n", + "print(f\"Input utterance: {test_utterance.text}\")\n", + "print(f\"Speaker: {test_utterance.speaker.id}\")\n", + "print(f\"Sentiment: {result.meta.get('gpt_sentiment', 'Not processed')}\")\n", + "print()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93446e06", + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Testing conversation-level transformation\n", + "conversation_transformer = LLMPromptTransformer(\n", + " provider=\"gpt\",\n", + " model=\"gpt-4o-mini\",\n", + " object_level=\"conversation\",\n", + " prompt=\"Summarize this conversation in one sentence: {formatted_object}\",\n", + " formatter=lambda conv: \" \".join([utt.text for utt in conv.iter_utterances()]),\n", + " metadata_name=\"conversation_summary\",\n", + " config_manager=config\n", + ")\n", + "\n", + "test_conv = corpus.get_conversation(corpus.get_conversation_ids()[0])\n", + "result = conversation_transformer.transform_single(test_conv)\n", + "print(f\"Conversation ID: {test_conv.id}\")\n", + "print(f\"Summary: {result.meta.get('conversation_summary', 'Not processed')}\")\n", + "print()\n" + ] } ], "metadata": { diff --git a/convokit/genai/llmprompttransformer.py b/convokit/genai/llmprompttransformer.py index c27f4994..1b48ea7a 100644 --- a/convokit/genai/llmprompttransformer.py +++ b/convokit/genai/llmprompttransformer.py @@ -123,3 +123,46 @@ def transform(self, corpus: Corpus) -> Corpus: corpus.add_meta(self.metadata_name, None) return corpus + + def transform_single( + self, obj: Union[str, Corpus, Conversation, Speaker, Utterance] + ) -> Union[Corpus, Conversation, Speaker, Utterance]: + """ + Transform a single object (utterance, conversation, speaker, or corpus) with the LLM prompt. + This method allows users to easily test their prompt on a single unit without processing an entire corpus. + + :param obj: The object to transform. Can be: + - A string (will be converted to an Utterance with a default speaker) + - An Utterance, Conversation, or Speaker object + :return: The transformed object with LLM response stored in metadata + """ + # Handle string input by converting to Utterance + if isinstance(obj, str): + if self.object_level != "utterance": + raise ValueError( + f"Cannot convert string to {self.object_level}. String input is only supported for utterance-level transformation." + ) + obj = Utterance(text=obj, speaker=Speaker(id="speaker")) + + # Validate object type matches the transformer's object_level + if self.object_level == "utterance" and not isinstance(obj, Utterance): + raise ValueError( + f"Expected Utterance object for utterance-level transformation, got {type(obj).__name__}" + ) + elif self.object_level == "conversation" and not isinstance(obj, Conversation): + raise ValueError( + f"Expected Conversation object for conversation-level transformation, got {type(obj).__name__}" + ) + elif self.object_level == "speaker" and not isinstance(obj, Speaker): + raise ValueError( + f"Expected Speaker object for speaker-level transformation, got {type(obj).__name__}" + ) + + # Check if object passes the selector + if not self.selector(obj): + obj.add_meta(self.metadata_name, None) + return obj + + # Process the object + self._process_object(obj) + return obj