PTG-Kitware · ovenmitt · May 27, 2024 · May 29, 2024 · May 29, 2024 · May 30, 2024
diff --git a/config/llm_prompts/r18_steps_prompt b/config/llm_prompts/r18_steps_prompt
@@ -0,0 +1,47 @@
+###
+You are a professional medic teaching me how to best save another persons life.
+
+Here are frequently asked questions:
+Question: What is a chest seal and why is it used? 
+Answer: A chest seal is a bandage for deep chest wounds. It prevents air from entering the chest cavity to avoid a collapsed lung.
+Question: How do I know if a chest seal is needed? 
+Answer: Use a chest seal if there is a deep chest wound that bubbles blood or if the person has trouble breathing.
+Question: What should I do first before applying a chest seal? 
+Answer: Call 911, ensure the area is safe, put on gloves, and assess the person's wounds.
+Question: How do I prepare the wound area for a chest seal? 
+Answer: Dry the area around the wound with gauze or a clean cloth.
+Question: How do I apply a chest seal? 
+Answer: Remove the backing, place the seal over the wound, and press firmly.
+Question: What if the wound has both an entry and exit point? 
+Answer: Apply a chest seal to both the entry and exit wounds.
+Question: What should I do after applying the chest seal? 
+Answer: Monitor the person's breathing and condition until emergency help arrives. Burp the seal if needed.
+Question: Can I use a homemade chest seal if I don't have a store-bought one? 
+Answer: Yes, use plastic wrap or a clean plastic bag and tape to cover the wound.
+Question: What is the best time to apply a chest seal? 
+Answer: Apply the seal right after the person exhales to minimize trapped air.
+Question: What if the chest seal gets clogged with blood? 
+Answer: Monitor the person. If breathing worsens, burp the seal or replace it.
+
+Here are the instructions of the current task you are trying to teach me:
+1) With gloved hand, cover and seal wound site.
+2) Open vented chest seal package.
+3) Wipe blood and body fluids from wound site.
+4) Peel away chest seal backer.
+5) Place chest seal with circle of vents over wound site and seal to chest.
+{taskactivity}
+
+When you answer my question, follow the these rules: 
+* Use information from the instructions above.
+* Is should not deviate from the instructions, except when medical instruments are not available.
+* If I ask a question not related to medicine, answer with: "Sorry, I can't help you with that". 
+* You should always resond in a conversational tone. 
+* DO NOT ANSWER "I'm sorry, I am an AI language model and I cannot see or perceive anything."
+* Don't use the phrasing "However,.."
+* Don't say "Based on the information you provided,"
+* Do not read the entire instrutions to me. Just give me one at a time.
+
+###
+
+{question}
+Your answer (very short, precise, helpful with empathy):
diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py
@@ -10,13 +10,14 @@
 from nltk.tokenize import sent_tokenize
 import rclpy
 
-from angel_msgs.msg import HeadsetAudioData, DialogueUtterance
+from angel_msgs.msg import HeadsetAudioData, DialogueUtterance, SystemTextResponse
 from angel_system_nodes.audio import dialogue
 from angel_utils import make_default_main
 
 
 AUDIO_TOPIC = "audio_topic"
 UTTERANCES_TOPIC = "utterances_topic"
+FEEDBACK_TOPIC = "feedback_topic"
 ASR_SERVER_URL = "asr_server_url"
 ASR_REQ_SEGMENT_SECONDS_DURATION = "asr_req_segment_duration"
 IS_SENTENCE_TOKENIZE = "is_sentence_tokenize"
@@ -34,6 +35,7 @@ def __init__(self):
         parameter_names = [
             AUDIO_TOPIC,
             UTTERANCES_TOPIC,
+            FEEDBACK_TOPIC,
             ASR_SERVER_URL,
             ASR_REQ_SEGMENT_SECONDS_DURATION,
             IS_SENTENCE_TOKENIZE,
@@ -70,6 +72,10 @@ def __init__(self):
         self._debug_mode = (
             self.get_parameter(DEBUG_MODE).get_parameter_value().bool_value
         )
+        self._feedback_topic = (
+            self.get_parameter(FEEDBACK_TOPIC).get_parameter_value().string_value
+        )
+
         self.log.info(
             f"Audio topic: "
             f"({type(self._audio_topic).__name__}) "
@@ -108,6 +114,9 @@ def __init__(self):
         self._publisher = self.create_publisher(
             DialogueUtterance, self._utterances_topic, 1
         )
+        self._feedback_publisher = self.create_publisher(
+            SystemTextResponse, self._feedback_topic, 1
+        )
 
         self.audio_stream = []
         self.t = threading.Thread()
@@ -217,8 +226,22 @@ def _publish_text(self, text: str):
         published_msg.utterance_text = text
         colored_utterance = colored(published_msg.utterance_text, "light_blue")
         self.log.info("Publishing message: " + f'"{colored_utterance}"')
+
+        if "angela" in text.lower() or "angel" in text.lower() or "angela," in text.lower() or "angel," in text.lower():
+            self.log.info("Publish thinking feedback")
+            self.publish_feedback_response()
+
         self._publisher.publish(published_msg)
 
+    def publish_feedback_response(
+        self
+    ):
+        publish_msg = SystemTextResponse()
+        publish_msg.header.frame_id = "GPT thinking"
+        publish_msg.header.stamp = self.get_clock().now().to_msg()
+        publish_msg.utterance_text = ""
+        publish_msg.response = "thinking"
+        self._feedback_publisher.publish(publish_msg)
 
 main = make_default_main(ASR)
 

diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py b/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py
@@ -3,26 +3,38 @@
 import json
 import openai
 import os
+import io
 import queue
+import base64
+import PIL.Image
+import numpy as np
+from cv_bridge import CvBridge
 
 import requests
 from termcolor import colored
+from sensor_msgs.msg import Image
 import threading
 
-from angel_msgs.msg import DialogueUtterance, SystemTextResponse
+from angel_msgs.msg import (
+    DialogueUtterance, 
+    SystemTextResponse,
+    TaskUpdate
+)
 from angel_system_nodes.audio import dialogue
 from angel_utils import declare_and_get_parameters
 from angel_utils import make_default_main
 
-
 openai.organization = os.getenv("OPENAI_ORG_ID")
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
+BRIDGE = CvBridge()
+
 INPUT_QA_TOPIC = "in_qa_topic"
 OUT_QA_TOPIC = "out_qa_topic"
 FEW_SHOT_PROMPT = "few_shot_prompt_file"
 CHAT_HISTORY_LENGTH = "chat_history_length"
-
+IMAGE_TOPIC = "image_topic"
+TASK_STATE_TOPIC = "task_state_topic"
 
 class QuestionAnswerer(dialogue.AbstractDialogueNode):
     def __init__(self):
@@ -35,12 +47,16 @@ def __init__(self):
                 (OUT_QA_TOPIC,),
                 (FEW_SHOT_PROMPT,),
                 (CHAT_HISTORY_LENGTH, -1),
+                (IMAGE_TOPIC,),
+                (TASK_STATE_TOPIC,),
             ],
         )
         self._in_qa_topic = param_values[INPUT_QA_TOPIC]
+        self._in_task_state_topic = param_values[TASK_STATE_TOPIC]
         self._out_qa_topic = param_values[OUT_QA_TOPIC]
         self._chat_history_length = param_values[CHAT_HISTORY_LENGTH]
         self.prompt_file = param_values[FEW_SHOT_PROMPT]
+        self.image_topic = param_values[IMAGE_TOPIC]
 
         self.question_queue = queue.Queue()
         self.handler_thread = threading.Thread(target=self.process_question_queue)
@@ -63,42 +79,83 @@ def __init__(self):
             self.openai_org_id = os.getenv("OPENAI_ORG_ID")
 
         # Handle subscription/publication topics.
+        self.log.info("Creating subscription to utterance topic")
         self.subscription = self.create_subscription(
             DialogueUtterance,
             self._in_qa_topic,
             self.question_answer_callback,
             1,
         )
+
+        self.log.info("Creating subscription to feedback generator topic")
         self._qa_publisher = self.create_publisher(
             SystemTextResponse, self._out_qa_topic, 1
         )
 
+        publish_msg = SystemTextResponse()
+        publish_msg.header.frame_id = "GPT Question Answering"
+        publish_msg.header.stamp = self.get_clock().now().to_msg()
+        publish_msg.utterance_text = ""
+        publish_msg.response = "Hello! Ask me anything. Just start with my name."
+        self._qa_publisher.publish(publish_msg)
+
+        # Single slot for latest image message to process detection over.
+        self.image_msg: Image = ""
+
+        self.log.info("Creating subscription to image topic")
+        # Initialize ROS hooks
+        self.subscription = self.create_subscription(
+            Image,
+            self.image_topic,
+            self.process_image_callback,
+            1,
+        )
+
+        self.log.info("Creating subscription to task topic")
+        # Configure the optional task updates subscription.
+        self.task_state_subscription = None
+        self.current_step = None
+        self.completed_steps = None
+        if self._in_task_state_topic:
+            self.task_state_subscription = self.create_subscription(
+                TaskUpdate,
+                self._in_task_state_topic,
+                self._set_task_topic,
+                1,
+            )
+
         self._chat_history = None
         if self._is_using_chat_history():
             self._chat_history = collections.deque([], maxlen=self._chat_history_length)
 
     def _is_using_chat_history(self):
         return self._chat_history_length > 0
 
-    def get_response(self, msg: DialogueUtterance) -> str:
+    def _set_task_topic(self, msg: TaskUpdate):
+        self.current_step = msg.current_step_id
+        self.completed_steps = msg.completed_steps
+
+    def get_response(self, msg: DialogueUtterance, optional_fields: str) -> str:
         response_text = ""
         try:
             if self.is_openai_ready:
-                prompt_fn = (
-                    self.prompt_gpt_with_chat_history
-                    if self._is_using_chat_history()
-                    else self.prompt_gpt
-                )
-                response_text = colored(
-                    f"{prompt_fn(msg.utterance_text)}\n", "light_green"
-                )
+                response_text = self.prompt_gpt(msg.utterance_text, optional_fields)
         except RuntimeError as err:
             self.log.info(err)
-            response_text = colored(
-                "I'm sorry. I don't know how to answer your statement.", "light_red"
-            )
+            response_text = "I'm sorry. I don't know how to answer your statement."
         return response_text
 
+    def process_image_callback(self, image: Image):
+        # image is type sensor_msgs.msg encoding BGR8
+        img0 = BRIDGE.imgmsg_to_cv2(image, desired_encoding="bgr8")
+
+        # Convert img0 into RGB and create a PIL image instance.
+        img_rgb = PIL.Image.fromarray(img0[:, :, ::-1], mode="RGB")
+        img_rgb = img_rgb.resize(np.divide(img_rgb.size, 4).astype(int))
+        jpg_container = io.BytesIO()
+        img_rgb.save(jpg_container, format="JPEG")
+        self.image_msg = base64.b64encode(jpg_container.getvalue()).decode("utf-8")
+
     def question_answer_callback(self, msg):
         self.log.debug(f"Received message:\n\n{msg.utterance_text}")
         if not self._apply_filter(msg):
@@ -111,7 +168,11 @@ def process_question_queue(self):
         """
         while True:
             msg = self.question_queue.get()
-            response = self.get_response(msg)
+            # Get the optional fields.
+            optional_fields = \
+                self._get_optional_fields_string(self.current_step,self.completed_steps)
+
+            response = self.get_response(msg,optional_fields)
             self.publish_generated_response(msg, response)
 
     def publish_generated_response(
@@ -130,15 +191,34 @@ def publish_generated_response(
         )
         self._qa_publisher.publish(publish_msg)
 
-    def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"):
-        prompt = self.prompt.format(question=question)
-        self.log.info(f"Prompting OpenAI with\n{prompt}\n")
-        payload = {
-            "model": model,
-            "messages": [{"role": "user", "content": prompt}],
-            "temperature": 0.7,
-            "max_tokens": 128,
+    def prompt_gpt(self, question, optional_fields: str, model: str = "gpt-4o"):
+        prompt = self.prompt.format(question=question, taskactivity=optional_fields)
+        self.log.info(f"Prompting OpenAI with\n{question} with \"{optional_fields}\"\n")
+
+        if self.image_msg==None or len(self.image_msg)<=1:
+            payload = {
+                "model": model,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.0,
+                "max_tokens": 128,
+            }
+        else:
+            payload = {
+                "model": model,
+                "messages": [{"role": "user", "content": [
+                    { "type": "text",
+                    "text": "Use the image to answer the question."+ prompt},
+                    {"type": "image_url",
+                    "image_url" : {
+                      "url" : "data:image/jpeg;base64,"+self.image_msg
+                    }
+                    }
+                ]}],
+            "temperature": 0.0,
+            "max_tokens": 128
         }
+
+
         req = requests.post(
             "https://api.openai.com/v1/chat/completions",
             json=payload,
@@ -150,6 +230,27 @@ def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"):
             .lstrip()
         )
 
+    def _get_optional_fields_string(self, current_step: int, completed_steps: list) -> str:
+        optional_fields_string = ""
+
+        if current_step==None:
+            #non started case
+            return "I didn't start the recipe yet."
+        else:
+            if completed_steps[-1]==True:
+                #the last step is finished
+                optional_fields_string += f"I am done with all steps."
+            elif current_step==0:
+                #user is at step 1
+                optional_fields_string += f"I am doing {current_step+1}"
+                optional_fields_string += f" and I am about to do {current_step+2}"
+            else:
+                optional_fields_string += f"I am doing {current_step+1}"
+                if current_step<=len(completed_steps)-2:
+                    optional_fields_string += f" and I am about to do {current_step+2}"
+
+        return optional_fields_string.rstrip("\n")
+
     def prompt_gpt_with_chat_history(self, question, model: str = "gpt-3.5-turbo"):
         prompt = self.prompt.format(
             chat_history=self._format_chat_history_str(), question=question
@@ -193,7 +294,7 @@ def _apply_filter(self, msg):
         none if the message should be filtered out. Else, return the incoming
         msg if it can be included.
         """
-        if msg.intent == "inquiry":
+        if "angela" in msg.utterance_text.lower() or "angel" in msg.utterance_text.lower() or "angela," in msg.utterance_text.lower() or "angel," in msg.utterance_text.lower():
             return msg
         return None