Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving the visual QA #431

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions config/llm_prompts/r18_steps_prompt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
###
You are a professional medic teaching me how to best save another persons life.

Here are frequently asked questions:
Question: What is a chest seal and why is it used?
Answer: A chest seal is a bandage for deep chest wounds. It prevents air from entering the chest cavity to avoid a collapsed lung.
Question: How do I know if a chest seal is needed?
Answer: Use a chest seal if there is a deep chest wound that bubbles blood or if the person has trouble breathing.
Question: What should I do first before applying a chest seal?
Answer: Call 911, ensure the area is safe, put on gloves, and assess the person's wounds.
Question: How do I prepare the wound area for a chest seal?
Answer: Dry the area around the wound with gauze or a clean cloth.
Question: How do I apply a chest seal?
Answer: Remove the backing, place the seal over the wound, and press firmly.
Question: What if the wound has both an entry and exit point?
Answer: Apply a chest seal to both the entry and exit wounds.
Question: What should I do after applying the chest seal?
Answer: Monitor the person's breathing and condition until emergency help arrives. Burp the seal if needed.
Question: Can I use a homemade chest seal if I don't have a store-bought one?
Answer: Yes, use plastic wrap or a clean plastic bag and tape to cover the wound.
Question: What is the best time to apply a chest seal?
Answer: Apply the seal right after the person exhales to minimize trapped air.
Question: What if the chest seal gets clogged with blood?
Answer: Monitor the person. If breathing worsens, burp the seal or replace it.

Here are the instructions of the current task you are trying to teach me:
1) With gloved hand, cover and seal wound site.
2) Open vented chest seal package.
3) Wipe blood and body fluids from wound site.
4) Peel away chest seal backer.
5) Place chest seal with circle of vents over wound site and seal to chest.
{taskactivity}

When you answer my question, follow the these rules:
* Use information from the instructions above.
* Is should not deviate from the instructions, except when medical instruments are not available.
* If I ask a question not related to medicine, answer with: "Sorry, I can't help you with that".
* You should always resond in a conversational tone.
* DO NOT ANSWER "I'm sorry, I am an AI language model and I cannot see or perceive anything."
* Don't use the phrasing "However,.."
* Don't say "Based on the information you provided,"
* Do not read the entire instrutions to me. Just give me one at a time.

###

{question}
Your answer (very short, precise, helpful with empathy):
25 changes: 24 additions & 1 deletion ros/angel_system_nodes/angel_system_nodes/audio/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
from nltk.tokenize import sent_tokenize
import rclpy

from angel_msgs.msg import HeadsetAudioData, DialogueUtterance
from angel_msgs.msg import HeadsetAudioData, DialogueUtterance, SystemTextResponse
from angel_system_nodes.audio import dialogue
from angel_utils import make_default_main


AUDIO_TOPIC = "audio_topic"
UTTERANCES_TOPIC = "utterances_topic"
FEEDBACK_TOPIC = "feedback_topic"
ASR_SERVER_URL = "asr_server_url"
ASR_REQ_SEGMENT_SECONDS_DURATION = "asr_req_segment_duration"
IS_SENTENCE_TOKENIZE = "is_sentence_tokenize"
Expand All @@ -34,6 +35,7 @@ def __init__(self):
parameter_names = [
AUDIO_TOPIC,
UTTERANCES_TOPIC,
FEEDBACK_TOPIC,
ASR_SERVER_URL,
ASR_REQ_SEGMENT_SECONDS_DURATION,
IS_SENTENCE_TOKENIZE,
Expand Down Expand Up @@ -70,6 +72,10 @@ def __init__(self):
self._debug_mode = (
self.get_parameter(DEBUG_MODE).get_parameter_value().bool_value
)
self._feedback_topic = (
self.get_parameter(FEEDBACK_TOPIC).get_parameter_value().string_value
)

self.log.info(
f"Audio topic: "
f"({type(self._audio_topic).__name__}) "
Expand Down Expand Up @@ -108,6 +114,9 @@ def __init__(self):
self._publisher = self.create_publisher(
DialogueUtterance, self._utterances_topic, 1
)
self._feedback_publisher = self.create_publisher(
SystemTextResponse, self._feedback_topic, 1
)

self.audio_stream = []
self.t = threading.Thread()
Expand Down Expand Up @@ -217,8 +226,22 @@ def _publish_text(self, text: str):
published_msg.utterance_text = text
colored_utterance = colored(published_msg.utterance_text, "light_blue")
self.log.info("Publishing message: " + f'"{colored_utterance}"')

if "angela" in text.lower() or "angel" in text.lower() or "angela," in text.lower() or "angel," in text.lower():
self.log.info("Publish thinking feedback")
self.publish_feedback_response()

self._publisher.publish(published_msg)

def publish_feedback_response(
self
):
publish_msg = SystemTextResponse()
publish_msg.header.frame_id = "GPT thinking"
publish_msg.header.stamp = self.get_clock().now().to_msg()
publish_msg.utterance_text = ""
publish_msg.response = "thinking"
self._feedback_publisher.publish(publish_msg)

main = make_default_main(ASR)

Expand Down
151 changes: 126 additions & 25 deletions ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,38 @@
import json
import openai
import os
import io
import queue
import base64
import PIL.Image
import numpy as np
from cv_bridge import CvBridge

import requests
from termcolor import colored
from sensor_msgs.msg import Image
import threading

from angel_msgs.msg import DialogueUtterance, SystemTextResponse
from angel_msgs.msg import (
DialogueUtterance,
SystemTextResponse,
TaskUpdate
)
from angel_system_nodes.audio import dialogue
from angel_utils import declare_and_get_parameters
from angel_utils import make_default_main


openai.organization = os.getenv("OPENAI_ORG_ID")
openai.api_key = os.getenv("OPENAI_API_KEY")

BRIDGE = CvBridge()

INPUT_QA_TOPIC = "in_qa_topic"
OUT_QA_TOPIC = "out_qa_topic"
FEW_SHOT_PROMPT = "few_shot_prompt_file"
CHAT_HISTORY_LENGTH = "chat_history_length"

IMAGE_TOPIC = "image_topic"
TASK_STATE_TOPIC = "task_state_topic"

class QuestionAnswerer(dialogue.AbstractDialogueNode):
def __init__(self):
Expand All @@ -35,12 +47,16 @@ def __init__(self):
(OUT_QA_TOPIC,),
(FEW_SHOT_PROMPT,),
(CHAT_HISTORY_LENGTH, -1),
(IMAGE_TOPIC,),
(TASK_STATE_TOPIC,),
],
)
self._in_qa_topic = param_values[INPUT_QA_TOPIC]
self._in_task_state_topic = param_values[TASK_STATE_TOPIC]
self._out_qa_topic = param_values[OUT_QA_TOPIC]
self._chat_history_length = param_values[CHAT_HISTORY_LENGTH]
self.prompt_file = param_values[FEW_SHOT_PROMPT]
self.image_topic = param_values[IMAGE_TOPIC]

self.question_queue = queue.Queue()
self.handler_thread = threading.Thread(target=self.process_question_queue)
Expand All @@ -63,42 +79,83 @@ def __init__(self):
self.openai_org_id = os.getenv("OPENAI_ORG_ID")

# Handle subscription/publication topics.
self.log.info("Creating subscription to utterance topic")
self.subscription = self.create_subscription(
DialogueUtterance,
self._in_qa_topic,
self.question_answer_callback,
1,
)

self.log.info("Creating subscription to feedback generator topic")
self._qa_publisher = self.create_publisher(
SystemTextResponse, self._out_qa_topic, 1
)

publish_msg = SystemTextResponse()
publish_msg.header.frame_id = "GPT Question Answering"
publish_msg.header.stamp = self.get_clock().now().to_msg()
publish_msg.utterance_text = ""
publish_msg.response = "Hello! Ask me anything. Just start with my name."
self._qa_publisher.publish(publish_msg)

# Single slot for latest image message to process detection over.
self.image_msg: Image = ""

self.log.info("Creating subscription to image topic")
# Initialize ROS hooks
self.subscription = self.create_subscription(
Image,
self.image_topic,
self.process_image_callback,
1,
)

self.log.info("Creating subscription to task topic")
# Configure the optional task updates subscription.
self.task_state_subscription = None
self.current_step = None
self.completed_steps = None
if self._in_task_state_topic:
self.task_state_subscription = self.create_subscription(
TaskUpdate,
self._in_task_state_topic,
self._set_task_topic,
1,
)

self._chat_history = None
if self._is_using_chat_history():
self._chat_history = collections.deque([], maxlen=self._chat_history_length)

def _is_using_chat_history(self):
return self._chat_history_length > 0

def get_response(self, msg: DialogueUtterance) -> str:
def _set_task_topic(self, msg: TaskUpdate):
self.current_step = msg.current_step_id
self.completed_steps = msg.completed_steps

def get_response(self, msg: DialogueUtterance, optional_fields: str) -> str:
response_text = ""
try:
if self.is_openai_ready:
prompt_fn = (
self.prompt_gpt_with_chat_history
if self._is_using_chat_history()
else self.prompt_gpt
)
response_text = colored(
f"{prompt_fn(msg.utterance_text)}\n", "light_green"
)
response_text = self.prompt_gpt(msg.utterance_text, optional_fields)
except RuntimeError as err:
self.log.info(err)
response_text = colored(
"I'm sorry. I don't know how to answer your statement.", "light_red"
)
response_text = "I'm sorry. I don't know how to answer your statement."
return response_text

def process_image_callback(self, image: Image):
# image is type sensor_msgs.msg encoding BGR8
img0 = BRIDGE.imgmsg_to_cv2(image, desired_encoding="bgr8")

# Convert img0 into RGB and create a PIL image instance.
img_rgb = PIL.Image.fromarray(img0[:, :, ::-1], mode="RGB")
img_rgb = img_rgb.resize(np.divide(img_rgb.size, 4).astype(int))
jpg_container = io.BytesIO()
img_rgb.save(jpg_container, format="JPEG")
self.image_msg = base64.b64encode(jpg_container.getvalue()).decode("utf-8")

def question_answer_callback(self, msg):
self.log.debug(f"Received message:\n\n{msg.utterance_text}")
if not self._apply_filter(msg):
Expand All @@ -111,7 +168,11 @@ def process_question_queue(self):
"""
while True:
msg = self.question_queue.get()
response = self.get_response(msg)
# Get the optional fields.
optional_fields = \
self._get_optional_fields_string(self.current_step,self.completed_steps)

response = self.get_response(msg,optional_fields)
self.publish_generated_response(msg, response)

def publish_generated_response(
Expand All @@ -130,15 +191,34 @@ def publish_generated_response(
)
self._qa_publisher.publish(publish_msg)

def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"):
prompt = self.prompt.format(question=question)
self.log.info(f"Prompting OpenAI with\n{prompt}\n")
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7,
"max_tokens": 128,
def prompt_gpt(self, question, optional_fields: str, model: str = "gpt-4o"):
prompt = self.prompt.format(question=question, taskactivity=optional_fields)
self.log.info(f"Prompting OpenAI with\n{question} with \"{optional_fields}\"\n")

if self.image_msg==None or len(self.image_msg)<=1:
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.0,
"max_tokens": 128,
}
else:
payload = {
"model": model,
"messages": [{"role": "user", "content": [
{ "type": "text",
"text": "Use the image to answer the question."+ prompt},
{"type": "image_url",
"image_url" : {
"url" : "data:image/jpeg;base64,"+self.image_msg
}
}
]}],
"temperature": 0.0,
"max_tokens": 128
}


req = requests.post(
"https://api.openai.com/v1/chat/completions",
json=payload,
Expand All @@ -150,6 +230,27 @@ def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"):
.lstrip()
)

def _get_optional_fields_string(self, current_step: int, completed_steps: list) -> str:
optional_fields_string = ""

if current_step==None:
#non started case
return "I didn't start the recipe yet."
else:
if completed_steps[-1]==True:
#the last step is finished
optional_fields_string += f"I am done with all steps."
elif current_step==0:
#user is at step 1
optional_fields_string += f"I am doing {current_step+1}"
optional_fields_string += f" and I am about to do {current_step+2}"
else:
optional_fields_string += f"I am doing {current_step+1}"
if current_step<=len(completed_steps)-2:
optional_fields_string += f" and I am about to do {current_step+2}"

return optional_fields_string.rstrip("\n")

def prompt_gpt_with_chat_history(self, question, model: str = "gpt-3.5-turbo"):
prompt = self.prompt.format(
chat_history=self._format_chat_history_str(), question=question
Expand Down Expand Up @@ -193,7 +294,7 @@ def _apply_filter(self, msg):
none if the message should be filtered out. Else, return the incoming
msg if it can be included.
"""
if msg.intent == "inquiry":
if "angela" in msg.utterance_text.lower() or "angel" in msg.utterance_text.lower() or "angela," in msg.utterance_text.lower() or "angel," in msg.utterance_text.lower():
return msg
return None

Expand Down
Loading
Loading