From 6b2088ea9788fd8ff2dc714c12c642e1fc0ba08d Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Thu, 1 Feb 2024 14:53:48 -0500 Subject: [PATCH 01/19] Update .env to supply innocuous default for XAUTH_FILEPATH Fix build script to *actually* pass forward extra args to docker compose build command. --- angel-docker-build.sh | 6 +++--- docker/.env | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/angel-docker-build.sh b/angel-docker-build.sh index 4180fb1f4..43018d243 100755 --- a/angel-docker-build.sh +++ b/angel-docker-build.sh @@ -19,7 +19,7 @@ Build the PTG ANGEL system docker container images. Options: -h | --help Display this message. - --force Force image building regardless of workspace hygiene.f + -f | --force Force image building regardless of workspace hygiene. " } @@ -32,7 +32,7 @@ do usage exit 0 ;; - --force) + -f|--force) log "Forcing build regardless of workspace hygiene." shift FORCE_BUILD=1 @@ -113,4 +113,4 @@ get_docker_compose_cmd DC_CMD --env-file "$SCRIPT_DIR"/docker/.env \ -f "$SCRIPT_DIR"/docker/docker-compose.yml \ --profile build-only \ - build "$@" + build "${dc_forward_params[@]}" "$@" diff --git a/docker/.env b/docker/.env index 7fb316548..7c260dd65 100644 --- a/docker/.env +++ b/docker/.env @@ -32,3 +32,10 @@ RMW_IMPLEMENTATION=rmw_cyclonedds_cpp # This must specify the network interface for CycloneDDS to use. CYCLONE_DDS_INTERFACE=lo + +# Starting with the docker compose plugin (v2), the whole compose file will be +# validated, even for services not being run. This provides a valid "default" +# path to cause validation to succeed. This variable should be overridden when +# attempting to actually run a service that makes use of this variable. +# Path considered relative to where the docker-compose file is located. +XAUTH_FILEPATH=../.container_xauth/.placeholder From 7866baa61c774dee3366a91cb370ea01edc3d65e Mon Sep 17 00:00:00 2001 From: derkmed Date: Wed, 20 Dec 2023 23:06:58 -0500 Subject: [PATCH 02/19] Add new modular DialogueUtterance msg that allows node reordering --- ros/angel_msgs/CMakeLists.txt | 1 + ros/angel_msgs/msg/DialogueUtterance.msg | 24 +++++ .../Angel/msg/DialogueUtteranceMsg.cs | 99 +++++++++++++++++++ .../Angel/msg/DialogueUtteranceMsg.cs.meta | 11 +++ 4 files changed, 135 insertions(+) create mode 100644 ros/angel_msgs/msg/DialogueUtterance.msg create mode 100644 unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs create mode 100644 unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs.meta diff --git a/ros/angel_msgs/CMakeLists.txt b/ros/angel_msgs/CMakeLists.txt index e5abe06e3..bd9890ce4 100644 --- a/ros/angel_msgs/CMakeLists.txt +++ b/ros/angel_msgs/CMakeLists.txt @@ -28,6 +28,7 @@ set( message_files msg/AruiObject3d.msg msg/AruiUpdate.msg msg/AruiUserNotification.msg + msg/DialogueUtterance.msg msg/EyeGazeData.msg msg/HandJointPose.msg msg/HandJointPosesUpdate.msg diff --git a/ros/angel_msgs/msg/DialogueUtterance.msg b/ros/angel_msgs/msg/DialogueUtterance.msg new file mode 100644 index 000000000..49d3122ee --- /dev/null +++ b/ros/angel_msgs/msg/DialogueUtterance.msg @@ -0,0 +1,24 @@ +# +# Dialogue Utterance with additional information about the environmental state +# and user model. +# + +# The header primarily encapsulates when this message was emitted. +# The time component of this may be utilized as an identifier for this user +# intent and utterance. +std_msgs/Header header + +# Speech-to-text of the user utterance we have interpreted +string utterance_text + +# Below are optional fields + +# Canonical user intent that has been interpreted. "Canonical" in this context +# is to mean that this string may be used as an identifier of this type of +# user intent. Should be in the range [0,1] where 1.0 means absolute confidence. +string intent +float64 intent_confidence_score + +# Emotion classification. Should be in the range [0,1] where 1.0 means absolute confidence. +string emotion +float64 emotion_confidence_score diff --git a/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs b/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs new file mode 100644 index 000000000..2e1479edd --- /dev/null +++ b/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs @@ -0,0 +1,99 @@ +//Do not edit! This file was generated by Unity-ROS MessageGeneration. +using System; +using System.Linq; +using System.Collections.Generic; +using System.Text; +using Unity.Robotics.ROSTCPConnector.MessageGeneration; + +namespace RosMessageTypes.Angel +{ + [Serializable] + public class DialogueUtteranceMsg : Message + { + public const string k_RosMessageName = "angel_msgs/DialogueUtterance"; + public override string RosMessageName => k_RosMessageName; + + // + // Dialogue Utterance with additional information about the environmental state + // and user model. + // + // The header primarily encapsulates when this message was emitted. + // The time component of this may be utilized as an identifier for this user + // intent and utterance. + public Std.HeaderMsg header; + // Speech-to-text of the user utterance we have interpreted + public string utterance_text; + // Below are optional fields + // Canonical user intent that has been interpreted. "Canonical" in this context + // is to mean that this string may be used as an identifier of this type of + // user intent. Should be in the range [0,1] where 1.0 means absolute confidence. + public string intent; + public double intent_confidence_score; + // Emotion classification. Should be in the range [0,1] where 1.0 means absolute confidence. + public string emotion; + public double emotion_confidence_score; + + public DialogueUtteranceMsg() + { + this.header = new Std.HeaderMsg(); + this.utterance_text = ""; + this.intent = ""; + this.intent_confidence_score = 0.0; + this.emotion = ""; + this.emotion_confidence_score = 0.0; + } + + public DialogueUtteranceMsg(Std.HeaderMsg header, string utterance_text, string intent, double intent_confidence_score, string emotion, double emotion_confidence_score) + { + this.header = header; + this.utterance_text = utterance_text; + this.intent = intent; + this.intent_confidence_score = intent_confidence_score; + this.emotion = emotion; + this.emotion_confidence_score = emotion_confidence_score; + } + + public static DialogueUtteranceMsg Deserialize(MessageDeserializer deserializer) => new DialogueUtteranceMsg(deserializer); + + private DialogueUtteranceMsg(MessageDeserializer deserializer) + { + this.header = Std.HeaderMsg.Deserialize(deserializer); + deserializer.Read(out this.utterance_text); + deserializer.Read(out this.intent); + deserializer.Read(out this.intent_confidence_score); + deserializer.Read(out this.emotion); + deserializer.Read(out this.emotion_confidence_score); + } + + public override void SerializeTo(MessageSerializer serializer) + { + serializer.Write(this.header); + serializer.Write(this.utterance_text); + serializer.Write(this.intent); + serializer.Write(this.intent_confidence_score); + serializer.Write(this.emotion); + serializer.Write(this.emotion_confidence_score); + } + + public override string ToString() + { + return "DialogueUtteranceMsg: " + + "\nheader: " + header.ToString() + + "\nutterance_text: " + utterance_text.ToString() + + "\nintent: " + intent.ToString() + + "\nintent_confidence_score: " + intent_confidence_score.ToString() + + "\nemotion: " + emotion.ToString() + + "\nemotion_confidence_score: " + emotion_confidence_score.ToString(); + } + +#if UNITY_EDITOR + [UnityEditor.InitializeOnLoadMethod] +#else + [UnityEngine.RuntimeInitializeOnLoadMethod] +#endif + public static void Register() + { + MessageRegistry.Register(k_RosMessageName, Deserialize); + } + } +} \ No newline at end of file diff --git a/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs.meta b/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs.meta new file mode 100644 index 000000000..5d154c5ad --- /dev/null +++ b/unity/ARUI/Assets/RosMessages/Angel/msg/DialogueUtteranceMsg.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 244f6af8d6d7e4c18a6e2d52b444d387 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: \ No newline at end of file From 8422d7dcf9d2adc1061c67bfc37d94c9baeb76ad Mon Sep 17 00:00:00 2001 From: derkmed Date: Wed, 20 Dec 2023 23:23:05 -0500 Subject: [PATCH 03/19] Add Dialogue Utterance processing library --- .../audio/dialogue_utterance_processing.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py new file mode 100644 index 000000000..6e05fd8f9 --- /dev/null +++ b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py @@ -0,0 +1,39 @@ +from angel_msgs.msg import DialogueUtterance + +def get_intent_or(msg: DialogueUtterance, + or_value: str = "not available") -> str: + """ + Returns the msg intent classification information. If the value is absent, + the or_value is passed in. + """ + return msg.intent if msg.intent else or_value + +def get_emotion_or(msg: DialogueUtterance, + or_value: str = "not available") -> str: + """ + Returns the msg emotion classification information. If the value is absent, + the or_value is passed in. + """ + return msg.emotion if msg.emotion else or_value + +def copy_dialogue_utterance(msg: DialogueUtterance, + node_name, + copy_time) -> DialogueUtterance: + msg = DialogueUtterance() + msg.header.frame_id = node_name + msg.utterance_text = msg.utterance_text + + # Assign new time for publication. + msg.header.stamp = copy_time + + # Copy over intent classification information if present. + if msg.intent: + msg.intent = msg.intent + msg.intent_confidence_score = msg.intent_confidence_score + + # Copy over intent classification information if present. + if msg.emotion: + msg.emotion = msg.emotion + msg.emotion_confidence_score = msg.emotion_confidence_score + + return msg From 4ee24490e6893f5130bfc7008d598691e00c4a84 Mon Sep 17 00:00:00 2001 From: derkmed Date: Wed, 20 Dec 2023 23:47:11 -0500 Subject: [PATCH 04/19] Migrate audio nodes to rely on DialogueUtterance as input/output messages --- .../angel_system_nodes/audio/asr.py | 25 +++--- .../audio/emotion/base_emotion_detector.py | 79 ++++++++----------- .../audio/intent/base_intent_detector.py | 61 +++++++------- .../audio/question_answerer.py | 50 ++++++------ 4 files changed, 103 insertions(+), 112 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py index 3cf7c4b0c..e432bcd0c 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py @@ -11,7 +11,7 @@ from rclpy.node import Node import simpleaudio as sa -from angel_msgs.msg import HeadsetAudioData, Utterance +from angel_msgs.msg import HeadsetAudioData, DialogueUtterance from angel_utils import make_default_main @@ -106,7 +106,7 @@ def __init__(self): self.subscription = self.create_subscription( HeadsetAudioData, self._audio_topic, self.listener_callback, 1 ) - self._publisher = self.create_publisher(Utterance, self._utterances_topic, 1) + self._publisher = self.create_publisher(Utterance self.audio_stream = [] self.t = threading.Thread() @@ -205,15 +205,20 @@ def asr_server_request_thread(self, audio_data, num_channels, sample_rate): self.log.info("Complete ASR text is:\n" + f'"{response_text}"') if self._is_sentence_tokenize_mode: for sentence in sent_tokenize(response_text): - utterance_msg = Utterance() - utterance_msg.value = sentence - self.log.info("Publishing message: " + f'"{sentence}"') - self._publisher.publish(utterance_msg) + self._publisher.publish( + self._construct_dialogue_utterance(sentence)) else: - utterance_msg = Utterance() - utterance_msg.value = response_text - self.log.info("Publishing message: " + f'"{response_text}"') - self._publisher.publish(utterance_msg) + self._publisher.publish( + self._construct_dialogue_utterance(response_text)) + + def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance: + msg = DialogueUtterance() + msg.header.frame_id = "ASR" + msg.header.stamp = self.get_clock().now().to_msg() + msg.utterance_text = msg_text + self.log.info("Publishing message: " + f'"{msg_text}"') + return msg + main = make_default_main(ASR) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py index df99a7490..9a4a2aef6 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py @@ -4,14 +4,14 @@ import threading from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer -from angel_msgs.msg import InterpretedAudioUserEmotion, InterpretedAudioUserIntent +from angel_msgs.msg import DialogueUtterance from angel_utils import declare_and_get_parameters from angel_utils import make_default_main +from angel_system_nodes.audio import dialogue_utterance_processing -IN_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic" -IN_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic" -OUT_INTERP_USER_EMOTION_TOPIC = "user_emotion_topic" +IN_TOPIC = "input_topic" +OUT_TOPIC = "user_emotion_topic" # Currently supported emotions. This is tied with the emotions # output to VaderSentiment (https://github.com/cjhutto/vaderSentiment) and @@ -26,8 +26,8 @@ class BaseEmotionDetector(Node): """ - As of Q22023, emotion detection is derived via VaderSentiment - (https://github.com/cjhutto/vaderSentiment). + This is the base emotion detection node that other emotion detection nodes + should inherit from. """ def __init__(self): @@ -38,31 +38,23 @@ def __init__(self): param_values = declare_and_get_parameters( self, [ - (IN_EXPECT_USER_INTENT_TOPIC,), - (IN_INTERP_USER_INTENT_TOPIC,), - (OUT_INTERP_USER_EMOTION_TOPIC,), + (IN_TOPIC,), + (OUT_TOPIC,), ], ) - self._in_expect_uintent_topic = param_values[IN_EXPECT_USER_INTENT_TOPIC] - self._in_interp_uintent_topic = param_values[IN_INTERP_USER_INTENT_TOPIC] - self._out_interp_uemotion_topic = param_values[OUT_INTERP_USER_EMOTION_TOPIC] + self._in_topic = param_values[IN_TOPIC] + self._out_topic = param_values[OUT_TOPIC] # Handle subscription/publication topics. - self.expect_uintent_subscription = self.create_subscription( - InterpretedAudioUserIntent, + self._subscription = self.create_subscription( + DialogueUtterance, self._in_expect_uintent_topic, - self.intent_detection_callback, + self.emotion_detection_callback, 1, ) - self.interp_uintent_subscription = self.create_subscription( - InterpretedAudioUserIntent, - self._in_interp_uintent_topic, - self.intent_detection_callback, - 1, - ) - self._interp_emo_publisher = self.create_publisher( - InterpretedAudioUserEmotion, self._out_interp_uemotion_topic, 1 + self._emo_publisher = self.create_publisher( + DialogueUtterance, self._out_topic, 1 ) self.message_queue = queue.Queue() @@ -95,14 +87,14 @@ def _get_vader_sentiment_analysis(self, utterance: str): ) return (classification, confidence) - def get_inference(self, msg): + def get_inference(self, msg: DialogueUtterance): """ Abstract away the different model inference calls depending on the node's configure model mode. """ return self._get_vader_sentiment_analysis(msg.utterance_text) - def intent_detection_callback(self, msg): + def emotion_detection_callback(self, msg): """ This is the main ROS node listener callback loop that will process all messages received via subscribed topics. @@ -119,29 +111,26 @@ def process_message_queue(self): while True: msg = self.message_queue.get() self.log.debug(f'Processing message:\n\n"{msg.utterance_text}"') - classification, confidence_score = self.get_inference(msg) - self.publish_detected_emotion( - msg.utterance_text, classification, confidence_score - ) - - def publish_detected_emotion( - self, utterance: str, classification: str, confidence_score: float - ): + self.process_message(msg) + + def process_message(self, msg: DialogueUtterance): """ Handles message publishing for an utterance with a detected emotion classification. """ - emotion_msg = InterpretedAudioUserEmotion() - emotion_msg.header.frame_id = "Emotion Detection" - emotion_msg.header.stamp = self.get_clock().now().to_msg() - emotion_msg.utterance_text = utterance - emotion_msg.user_emotion = classification - emotion_msg.confidence = confidence_score - self._interp_emo_publisher.publish(emotion_msg) - colored_utterance = colored(utterance, "light_blue") - colored_emotion = colored(classification, "light_green") + classification, confidence_score = self.get_inference(msg) + pub_msg = dialogue_utterance_processing.copy_dialogue_utterance( + msg, node_name="Emotion Detection") + # Overwrite the user emotion with the latest classification information. + pub_msg.emotion = classification + pub_msg.emotion_confidence_score = confidence_score + self.emotion_publication.publish(pub_msg) + + # Log emotion detection information. + colored_utterance = colored(pub_msg.utterance_text, "light_blue") + colored_emotion = colored(pub_msg.emotion, "light_green") self.log.info( f'Publishing {{"{colored_emotion}": {confidence_score}}} ' - + f'to {self._out_interp_uemotion_topic} for:\n>>> "{colored_utterance}"' + + f'to {self._out_topic} for:\n>>> "{colored_utterance}"' ) def _apply_filter(self, msg): @@ -150,10 +139,6 @@ def _apply_filter(self, msg): none if the message should be filtered out. Else, return the incoming msg if it can be included. """ - # if msg.user_intent.lower() == "user inquiry": - # return msg - # else: - # return None return msg diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py index 0651aa512..ff396eac0 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py @@ -3,7 +3,7 @@ from termcolor import colored import threading -from angel_msgs.msg import InterpretedAudioUserIntent, Utterance +from angel_msgs.msg import DialogueUtterance from angel_utils import declare_and_get_parameters from angel_utils import make_default_main @@ -18,7 +18,7 @@ # https://docs.google.com/document/d/1uuvSL5de3LVM9c0tKpRKYazDxckffRHf7IAcabSw9UA . INTENT_LABELS = ["next_step", "prev_step", "inquiry", "other"] -UTTERANCES_TOPIC = "utterances_topic" +IN_TOPIC = "utterances_topic" PARAM_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic" PARAM_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic" @@ -32,24 +32,24 @@ def __init__(self): param_values = declare_and_get_parameters( self, [ - (UTTERANCES_TOPIC,), + (IN_TOPIC,), (PARAM_EXPECT_USER_INTENT_TOPIC,), (PARAM_INTERP_USER_INTENT_TOPIC,), ], ) - self._utterances_topic = param_values[UTTERANCES_TOPIC] + self._input_topic = param_values[IN_TOPIC] self._expect_uintent_topic = param_values[PARAM_EXPECT_USER_INTENT_TOPIC] self._interp_uintent_topic = param_values[PARAM_INTERP_USER_INTENT_TOPIC] # Handle subscription/publication topics. self.subscription = self.create_subscription( - Utterance, self._utterances_topic, self.utterance_callback, 1 + DialogueUtterance, self._input_topic, self.utterance_callback, 1 ) self._expected_publisher = self.create_publisher( - InterpretedAudioUserIntent, self._expect_uintent_topic, 1 + DialogueUtterance, self._expect_uintent_topic, 1 ) self._interp_publisher = self.create_publisher( - InterpretedAudioUserIntent, self._interp_uintent_topic, 1 + DialogueUtterance, self._interp_uintent_topic, 1 ) self.utterance_message_queue = queue.Queue() @@ -63,7 +63,7 @@ def utterance_callback(self, msg): This is the main ROS node listener callback loop that will process all messages received via subscribed topics. """ - self.log.debug(f'Received message:\n\n"{msg.value}"') + self.log.debug(f'Received message:\n\n"{msg.utterance_text}"') self.utterance_message_queue.put(msg) def process_utterance_message_queue(self): @@ -72,13 +72,10 @@ def process_utterance_message_queue(self): """ while True: msg = self.utterance_message_queue.get() - self.log.debug(f'Processing message:\n\n"{msg.value}"') - intent, score = self.detect_intents(msg) - if not intent: - continue - self.publish_msg(msg.value, intent, score) + self.log.debug(f'Processing message:\n\n"{msg.utterance_text}"') + self.process_message(msg) - def detect_intents(self, msg): + def process_message(self, msg: DialogueUtterance): """ Keyphrase search for intent detection. This implementation does simple string matching to assign a detected label. When multiple intents are @@ -98,7 +95,8 @@ def _tiebreak_intents(intents, confidences): ) return classification, score - lower_utterance = msg.value.lower() + intent, score = self.detect_intents(msg) + lower_utterance = msg.utterance_text.lower() intents = [] confidences = [] if self._contains_phrase(lower_utterance, NEXT_STEP_KEYPHRASES): @@ -111,35 +109,38 @@ def _tiebreak_intents(intents, confidences): intents.append(INTENT_LABELS[2]) confidences.append(0.5) if not intents: - colored_utterance = colored(msg.value, "light_blue") + colored_utterance = colored(msg.utterance_text, "light_blue") self.log.info(f'No intents detected for:\n>>> "{colored_utterance}":') return None, -1.0 classification, confidence = _tiebreak_intents(intents, confidences) classification = colored(classification, "light_green") - return classification, confidence + + if intent: + self.publish_msg(msg.utterance_text, intent, score) - def publish_msg(self, utterance, intent, score): + def publish_msg(self, msg: DialogueUtterance, intent: str, score: float): """ Handles message publishing for an utterance with a detected intent. """ - intent_msg = InterpretedAudioUserIntent() - intent_msg.header.frame_id = "Intent Detection" - intent_msg.header.stamp = self.get_clock().now().to_msg() - intent_msg.utterance_text = utterance - intent_msg.user_intent = intent - intent_msg.confidence = score + pub_msg = self.copy_dialogue_utterance(msg, + node_name="Intent Detection") + # Overwrite the user intent with the latest classification information. + pub_msg.intent = intent + pub_msg.intent_confidence_score = score + + # Decide which intent topic to publish the message to. published_topic = None - if self._contains_phrase(utterance.lower(), OVERRIDE_KEYPHRASES): - intent_msg.confidence = 1.0 - self._expected_publisher.publish(intent_msg) + if self._contains_phrase(pub_msg.utterance_text.lower(), OVERRIDE_KEYPHRASES): + pub_msg.intent_confidence_score = 1.0 + self._expected_publisher.publish(pub_msg) published_topic = PARAM_EXPECT_USER_INTENT_TOPIC else: - self._interp_publisher.publish(intent_msg) + self._interp_publisher.publish(pub_msg) published_topic = PARAM_INTERP_USER_INTENT_TOPIC - colored_utterance = colored(utterance, "light_blue") - colored_intent = colored(intent_msg.user_intent, "light_green") + colored_utterance = colored(pub_msg.utterance_text, "light_blue") + colored_intent = colored(pub_msg.intent, "light_green") self.log.info( f'Publishing {{"{colored_intent}": {score}}} to {published_topic} ' + f'for:\n>>> "{colored_utterance}"' diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py b/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py index 548d286c6..c0963e9ba 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py @@ -7,7 +7,7 @@ from termcolor import colored import threading -from angel_msgs.msg import InterpretedAudioUserEmotion, SystemTextResponse +from angel_msgs.msg import DialogueUtterance, SystemTextResponse from angel_utils import declare_and_get_parameters from angel_utils import make_default_main @@ -15,9 +15,10 @@ openai.organization = os.getenv("OPENAI_ORG_ID") openai.api_key = os.getenv("OPENAI_API_KEY") -IN_EMOTION_TOPIC = "user_emotion_topic" +INPUT_TOPIC = "input_topic" OUT_QA_TOPIC = "system_text_response_topic" FEW_SHOT_PROMPT = "few_shot_prompt_file" +PARAM_TIMEOUT = "timeout" class QuestionAnswerer(Node): @@ -28,14 +29,15 @@ def __init__(self): param_values = declare_and_get_parameters( self, [ - (IN_EMOTION_TOPIC,), + (INPUT_TOPIC,), (OUT_QA_TOPIC,), (FEW_SHOT_PROMPT,), ], ) - self._in_emotion_topic = param_values[IN_EMOTION_TOPIC] + self._input_topic = param_values[INPUT_TOPIC] self._out_qa_topic = param_values[OUT_QA_TOPIC] self.prompt_file = param_values[FEW_SHOT_PROMPT] + self.timeout = param_values[PARAM_TIMEOUT] self.question_queue = queue.Queue() self.handler_thread = threading.Thread(target=self.process_question_queue) @@ -59,8 +61,8 @@ def __init__(self): # Handle subscription/publication topics. self.subscription = self.create_subscription( - InterpretedAudioUserEmotion, - self._in_emotion_topic, + DialogueUtterance, + self._input_topic, self.question_answer_callback, 1, ) @@ -68,28 +70,25 @@ def __init__(self): SystemTextResponse, self._out_qa_topic, 1 ) - def get_response(self, user_utterance: str, user_emotion: str): + def get_response(self, sub_msg: DialogueUtterance): """ - Generate a response to the utterance, enriched with the addition of - the user's detected emotion. Inference calls can be added and revised - here. + Generate a response to the received message. + Inference calls can be added and revised here. """ - return_msg = "" try: if self.is_openai_ready: return_msg = colored( - self.prompt_gpt(user_utterance) + "\n", "light_green" + self.prompt_gpt(sub_msg.utterance_text) + "\n", "light_green" ) except RuntimeError as err: self.log.info(err) colored_apology = colored( "I'm sorry. I don't know how to answer your statement.", "light_red" ) - colored_emotion = colored(user_emotion, "light_red") + colored_emotion = colored(sub_msg.emotion, "light_red") return_msg = ( f"{colored_apology} I understand that you feel {colored_emotion}." ) - return return_msg def question_answer_callback(self, msg): """ @@ -108,22 +107,22 @@ def process_question_queue(self): while True: msg = self.question_queue.get() emotion = msg.user_emotion - response = self.get_response(msg.utterance_text, emotion) - self.publish_generated_response(msg.utterance_text, response) - - def publish_generated_response(self, utterance: str, response: str): - msg = SystemTextResponse() - msg.header.frame_id = "GPT Question Answering" - msg.header.stamp = self.get_clock().now().to_msg() - msg.utterance_text = utterance - msg.response = response - colored_utterance = colored(utterance, "light_blue") + response = self.get_response(msg) + self.publish_generated_response(msg, response) + + def publish_generated_response(self, sub_msg: DialogueUtterance, response: str): + pub_msg = SystemTextResponse() + pub_msg.header.frame_id = "GPT Question Answering" + pub_msg.header.stamp = self.get_clock().now().to_msg() + pub_msg.utterance_text = sub_msg.utterance_text + pub_msg.response = response + colored_utterance = colored(sub_msg.utterance_text, "light_blue") colored_response = colored(response, "light_green") self.log.info( f'Responding to utterance:\n>>> "{colored_utterance}"\n>>> with:\n' + f'>>> "{colored_response}"' ) - self._qa_publisher.publish(msg) + self._qa_publisher.publish(pub_msg) def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"): prompt = self.prompt.format(question) @@ -138,6 +137,7 @@ def prompt_gpt(self, question, model: str = "gpt-3.5-turbo"): "https://api.openai.com/v1/chat/completions", json=payload, headers={"Authorization": "Bearer {}".format(self.openai_api_key)}, + timeout=self.timeout, ) return ( json.loads(req.text)["choices"][0]["message"]["content"] From 278a03c781c05d16136c2938c71d9c259520f207 Mon Sep 17 00:00:00 2001 From: derkmed Date: Wed, 20 Dec 2023 23:53:37 -0500 Subject: [PATCH 05/19] Remove unused legacy intent detection --- .../audio/intent/intent_detector.py | 127 ------------------ 1 file changed, 127 deletions(-) delete mode 100644 ros/angel_system_nodes/angel_system_nodes/audio/intent/intent_detector.py diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/intent_detector.py deleted file mode 100644 index 14d35a330..000000000 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/intent_detector.py +++ /dev/null @@ -1,127 +0,0 @@ -import rclpy -from rclpy.node import Node - -from angel_msgs.msg import InterpretedAudioUserIntent, Utterance -from angel_utils import make_default_main - - -# Please refer to labels defined in -# https://docs.google.com/document/d/1uuvSL5de3LVM9c0tKpRKYazDxckffRHf7IAcabSw9UA . -NEXT_STEP_KEYPHRASES = ["skip", "next", "next step"] -PREV_STEP_KEYPHRASES = ["previous", "previous step", "last step", "go back"] -OVERRIDE_KEYPHRASES = ["angel", "angel system"] - -# TODO(derekahmed): Please figure out how to keep this sync-ed with -# config/angel_system_cmds/user_intent_to_sys_cmd_v1.yaml. -LABELS = ["Go to next step", "Go to previous step"] - - -UTTERANCES_TOPIC = "utterances_topic" -PARAM_EXPECT_USER_INTENT_TOPIC = "expect_user_intent_topic" -PARAM_INTERP_USER_INTENT_TOPIC = "interp_user_intent_topic" - - -class IntentDetector(Node): - """ - As of Q12023, intent detection is derived heuristically. This will be shifted - to a model-based approach in the near-future. - """ - - def __init__(self): - super().__init__(self.__class__.__name__) - self.log = self.get_logger() - - parameter_names = [ - UTTERANCES_TOPIC, - PARAM_EXPECT_USER_INTENT_TOPIC, - PARAM_INTERP_USER_INTENT_TOPIC, - ] - set_parameters = self.declare_parameters( - namespace="", - parameters=[(p,) for p in parameter_names], - ) - # Check for not-set parameters - some_not_set = False - for p in set_parameters: - if p.type_ is rclpy.parameter.Parameter.Type.NOT_SET: - some_not_set = True - self.log.error(f"Parameter not set: {p.name}") - if some_not_set: - raise ValueError("Some parameters are not set.") - - self._utterances_topic = self.get_parameter(UTTERANCES_TOPIC).value - self._expect_uintent_topic = self.get_parameter( - PARAM_EXPECT_USER_INTENT_TOPIC - ).value - self._interp_uintent_topic = self.get_parameter( - PARAM_INTERP_USER_INTENT_TOPIC - ).value - self.log.info( - f"Utterances topic: " - f"({type(self._utterances_topic).__name__}) " - f"{self._utterances_topic}" - ) - self.log.info( - f"Expected User Intent topic: " - f"({type(self._expect_uintent_topic).__name__}) " - f"{self._expect_uintent_topic}" - ) - self.log.info( - f"Interpreted User Intent topic: " - f"({type(self._interp_uintent_topic).__name__}) " - f"{self._interp_uintent_topic}" - ) - - # TODO(derekahmed): Add internal queueing to reduce subscriber queue - # size to 1. - self.subscription = self.create_subscription( - Utterance, self._utterances_topic, self.listener_callback, 10 - ) - - self._expected_publisher = self.create_publisher( - InterpretedAudioUserIntent, self._expect_uintent_topic, 1 - ) - - self._interp_publisher = self.create_publisher( - InterpretedAudioUserIntent, self._interp_uintent_topic, 1 - ) - - def listener_callback(self, msg): - log = self.get_logger() - intent_msg = InterpretedAudioUserIntent() - intent_msg.utterance_text = msg.value - - lower_utterance = msg.value.lower() - if self.contains_phrase(lower_utterance, NEXT_STEP_KEYPHRASES): - intent_msg.user_intent = LABELS[0] - intent_msg.confidence = 0.5 - elif self.contains_phrase(lower_utterance, PREV_STEP_KEYPHRASES): - intent_msg.user_intent = LABELS[1] - intent_msg.confidence = 0.5 - else: - log.info(f'Detected no intents for "{msg.value}":') - return - - if self.contains_phrase(lower_utterance, OVERRIDE_KEYPHRASES): - intent_msg.confidence = 1.0 - self._expected_publisher.publish(intent_msg) - else: - self._interp_publisher.publish(intent_msg) - - log.info( - f'Detected intents for "{msg.value}":\n' - + f'"{intent_msg.user_intent}": {intent_msg.confidence}' - ) - - def contains_phrase(self, utterance, phrases): - for phrase in phrases: - if phrase in utterance: - return True - return False - - -main = make_default_main(IntentDetector) - - -if __name__ == "__main__": - main() From 5c6332b97e0911bad174db5ff3fa8c8c4ec944ec Mon Sep 17 00:00:00 2001 From: derkmed Date: Wed, 20 Dec 2023 23:53:54 -0500 Subject: [PATCH 06/19] Add timeout parameter to gpt intent detection node --- .../audio/intent/gpt_intent_detector.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py index 8efeedbae..8c5d29a78 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py @@ -5,11 +5,13 @@ import os import rclpy +from angel_msgs.msg import DialogueUtterance from angel_system_nodes.audio.intent.base_intent_detector import ( BaseIntentDetector, INTENT_LABELS, ) -from angel_utils import make_default_main +from angel_utils import declare_and_get_parameters, make_default_main + openai.organization = os.getenv("OPENAI_ORG_ID") @@ -17,18 +19,28 @@ # The following are few shot examples when prompting GPT. FEW_SHOT_EXAMPLES = [ - {"utterance": "Go back to the previous step!", "label": "prev_step."}, - {"utterance": "Next step, please.", "label": "next_step"}, - {"utterance": "How should I wrap this tourniquet?", "label": "inquiry"}, - {"utterance": "The sky is blue", "label": "other"}, + {"utterance": "Go back to the previous step!", "label": "prev_step[eos]"}, + {"utterance": "Next step, please.", "label": "next_step[eos]"}, + {"utterance": "How should I wrap this tourniquet?", "label": "inquiry[eos]"}, + {"utterance": "The sky is blue", "label": "other[eos]"}, + {"utterance": "What is this thing?", "label": "object_clarification[eos]"}, ] +PARAM_TIMEOUT = "timeout" class GptIntentDetector(BaseIntentDetector): def __init__(self): super().__init__() self.log = self.get_logger() + param_values = declare_and_get_parameters( + self, + [ + (PARAM_TIMEOUT, 600), + ], + ) + self.timeout = param_values[PARAM_TIMEOUT] + # This node additionally includes fields for interacting with OpenAI # via LangChain. if not os.getenv("OPENAI_API_KEY"): @@ -79,17 +91,16 @@ def _labels_list_str(labels): model_name="gpt-3.5-turbo", openai_api_key=self.openai_api_key, temperature=0.0, - # Only 2 tokens needed for classification (tokens are delimited by use of '_', i.e. - # 'next_step' counts as 2 tokens). - max_tokens=2, + request_timeout=self.timeout, ) return LLMChain(llm=openai_llm, prompt=few_shot_prompt) - def detect_intents(self, msg): + def detect_intents(self, msg: DialogueUtterance): """ Detects the user intent via langchain execution of GPT. """ - return self.chain.run(utterance=msg), 0.5 + intent = self.chain.run(utterance=msg.utterance_text) + return intent.split('[eos]')[0], 0.5 main = make_default_main(GptIntentDetector) From 08f018e39a4388ce79486f61ce43d50be75f998a Mon Sep 17 00:00:00 2001 From: derkmed Date: Thu, 21 Dec 2023 00:06:56 -0500 Subject: [PATCH 07/19] Add copy times to Intent and Emotion Detection nodes --- .../angel_system_nodes/audio/emotion/base_emotion_detector.py | 3 ++- .../angel_system_nodes/audio/intent/base_intent_detector.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py index 9a4a2aef6..a63d9c50c 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py @@ -119,7 +119,8 @@ def process_message(self, msg: DialogueUtterance): """ classification, confidence_score = self.get_inference(msg) pub_msg = dialogue_utterance_processing.copy_dialogue_utterance( - msg, node_name="Emotion Detection") + msg, node_name="Emotion Detection", + copy_time=self.get_clock().now().to_msg()) # Overwrite the user emotion with the latest classification information. pub_msg.emotion = classification pub_msg.emotion_confidence_score = confidence_score diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py index ff396eac0..f2d0b66ee 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py @@ -124,7 +124,8 @@ def publish_msg(self, msg: DialogueUtterance, intent: str, score: float): Handles message publishing for an utterance with a detected intent. """ pub_msg = self.copy_dialogue_utterance(msg, - node_name="Intent Detection") + node_name="Intent Detection", + copy_time=self.get_clock().now().to_msg()) # Overwrite the user intent with the latest classification information. pub_msg.intent = intent pub_msg.intent_confidence_score = score From 82a377fbf7f05557b08b142686f241129b3b8225 Mon Sep 17 00:00:00 2001 From: derkmed Date: Thu, 18 Jan 2024 08:40:30 -0500 Subject: [PATCH 08/19] Fix ASR compiler error and clean up code --- .../angel_system_nodes/audio/asr.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py index e432bcd0c..1d765e919 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py @@ -106,7 +106,8 @@ def __init__(self): self.subscription = self.create_subscription( HeadsetAudioData, self._audio_topic, self.listener_callback, 1 ) - self._publisher = self.create_publisher(Utterance + self._publisher = self.create_publisher(DialogueUtterance, + self._utterances_topic, 1) self.audio_stream = [] self.t = threading.Thread() @@ -203,12 +204,16 @@ def asr_server_request_thread(self, audio_data, num_channels, sample_rate): if response: response_text = json.loads(response.text)["text"] self.log.info("Complete ASR text is:\n" + f'"{response_text}"') - if self._is_sentence_tokenize_mode: - for sentence in sent_tokenize(response_text): - self._publisher.publish( + self._publish_response(response_text, + self._is_sentence_tokenize_mode) + + def _publish_response(self, response_text: str, tokenize_sentences: bool): + if tokenize_sentences: + for sentence in sent_tokenize(response_text): + self._publisher.publish( self._construct_dialogue_utterance(sentence)) - else: - self._publisher.publish( + else: + self._publisher.publish( self._construct_dialogue_utterance(response_text)) def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance: From cab2ea9a5a0315f2daaee5983eee65e323d96bde Mon Sep 17 00:00:00 2001 From: derkmed Date: Thu, 18 Jan 2024 08:41:30 -0500 Subject: [PATCH 09/19] Remove unnecessary dialogue utterance library code --- .../audio/dialogue_utterance_processing.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py index 6e05fd8f9..16f6b3a44 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py @@ -1,21 +1,5 @@ from angel_msgs.msg import DialogueUtterance -def get_intent_or(msg: DialogueUtterance, - or_value: str = "not available") -> str: - """ - Returns the msg intent classification information. If the value is absent, - the or_value is passed in. - """ - return msg.intent if msg.intent else or_value - -def get_emotion_or(msg: DialogueUtterance, - or_value: str = "not available") -> str: - """ - Returns the msg emotion classification information. If the value is absent, - the or_value is passed in. - """ - return msg.emotion if msg.emotion else or_value - def copy_dialogue_utterance(msg: DialogueUtterance, node_name, copy_time) -> DialogueUtterance: From 9b27c444290d46f89f943d64dc7d10e419ffe13a Mon Sep 17 00:00:00 2001 From: derkmed Date: Thu, 18 Jan 2024 08:45:20 -0500 Subject: [PATCH 10/19] Clean up Emotion Detection Node --- .../angel_system_nodes/audio/emotion/base_emotion_detector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py index a63d9c50c..bb8555a14 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py @@ -53,7 +53,7 @@ def __init__(self): self.emotion_detection_callback, 1, ) - self._emo_publisher = self.create_publisher( + self._publication = self.create_publisher( DialogueUtterance, self._out_topic, 1 ) @@ -94,7 +94,7 @@ def get_inference(self, msg: DialogueUtterance): """ return self._get_vader_sentiment_analysis(msg.utterance_text) - def emotion_detection_callback(self, msg): + def emotion_detection_callback(self, msg: DialogueUtterance): """ This is the main ROS node listener callback loop that will process all messages received via subscribed topics. From 6f383bcf58d6de21a94594b1c40e6dfaef4ecacc Mon Sep 17 00:00:00 2001 From: derkmed Date: Thu, 18 Jan 2024 08:56:42 -0500 Subject: [PATCH 11/19] Fix Intent Detection Compiler Error --- .../audio/intent/base_intent_detector.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py index f2d0b66ee..3ff903d18 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py @@ -95,7 +95,6 @@ def _tiebreak_intents(intents, confidences): ) return classification, score - intent, score = self.detect_intents(msg) lower_utterance = msg.utterance_text.lower() intents = [] confidences = [] @@ -108,37 +107,39 @@ def _tiebreak_intents(intents, confidences): if self._contains_phrase(lower_utterance, QUESTION_KEYPHRASES): intents.append(INTENT_LABELS[2]) confidences.append(0.5) + if not intents: colored_utterance = colored(msg.utterance_text, "light_blue") - self.log.info(f'No intents detected for:\n>>> "{colored_utterance}":') + self.log.info( + f'No intents detected for:\n>>> "{colored_utterance}":') return None, -1.0 + else: + classification, confidence = _tiebreak_intents(intents, confidences) + classification = colored(classification, "light_green") + self.publish_message(msg.utterance_text, classification, confidence) - classification, confidence = _tiebreak_intents(intents, confidences) - classification = colored(classification, "light_green") - - if intent: - self.publish_msg(msg.utterance_text, intent, score) - - def publish_msg(self, msg: DialogueUtterance, intent: str, score: float): + def publish_message(self, msg: DialogueUtterance, intent: str, + score: float): """ Handles message publishing for an utterance with a detected intent. """ - pub_msg = self.copy_dialogue_utterance(msg, - node_name="Intent Detection", - copy_time=self.get_clock().now().to_msg()) + pub_msg = self.copy_dialogue_utterance( + msg, node_name="Intent Detection", + copy_time=self.get_clock().now().to_msg()) # Overwrite the user intent with the latest classification information. pub_msg.intent = intent pub_msg.intent_confidence_score = score # Decide which intent topic to publish the message to. published_topic = None - if self._contains_phrase(pub_msg.utterance_text.lower(), OVERRIDE_KEYPHRASES): + if self._contains_phrase(pub_msg.utterance_text.lower(), + OVERRIDE_KEYPHRASES): + published_topic = PARAM_EXPECT_USER_INTENT_TOPIC pub_msg.intent_confidence_score = 1.0 self._expected_publisher.publish(pub_msg) - published_topic = PARAM_EXPECT_USER_INTENT_TOPIC else: - self._interp_publisher.publish(pub_msg) published_topic = PARAM_INTERP_USER_INTENT_TOPIC + self._interp_publisher.publish(pub_msg) colored_utterance = colored(pub_msg.utterance_text, "light_blue") colored_intent = colored(pub_msg.intent, "light_green") From 0275be10a21654d97b607811ab11657711cb6851 Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 11:20:26 -0500 Subject: [PATCH 12/19] Apply code formatting to audio nodes --- .../angel_system_nodes/audio/asr.py | 17 +++++++---------- .../audio/dialogue_utterance_processing.py | 7 ++++--- .../audio/emotion/base_emotion_detector.py | 10 +++++----- .../audio/intent/base_intent_detector.py | 13 +++++-------- .../audio/intent/gpt_intent_detector.py | 4 ++-- 5 files changed, 23 insertions(+), 28 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py index 1d765e919..a24208dea 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/asr.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/asr.py @@ -106,8 +106,9 @@ def __init__(self): self.subscription = self.create_subscription( HeadsetAudioData, self._audio_topic, self.listener_callback, 1 ) - self._publisher = self.create_publisher(DialogueUtterance, - self._utterances_topic, 1) + self._publisher = self.create_publisher( + DialogueUtterance, self._utterances_topic, 1 + ) self.audio_stream = [] self.t = threading.Thread() @@ -204,18 +205,15 @@ def asr_server_request_thread(self, audio_data, num_channels, sample_rate): if response: response_text = json.loads(response.text)["text"] self.log.info("Complete ASR text is:\n" + f'"{response_text}"') - self._publish_response(response_text, - self._is_sentence_tokenize_mode) + self._publish_response(response_text, self._is_sentence_tokenize_mode) def _publish_response(self, response_text: str, tokenize_sentences: bool): if tokenize_sentences: for sentence in sent_tokenize(response_text): - self._publisher.publish( - self._construct_dialogue_utterance(sentence)) + self._publisher.publish(self._construct_dialogue_utterance(sentence)) else: - self._publisher.publish( - self._construct_dialogue_utterance(response_text)) - + self._publisher.publish(self._construct_dialogue_utterance(response_text)) + def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance: msg = DialogueUtterance() msg.header.frame_id = "ASR" @@ -225,7 +223,6 @@ def _construct_dialogue_utterance(self, msg_text: str) -> DialogueUtterance: return msg - main = make_default_main(ASR) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py index 16f6b3a44..2674d7000 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/dialogue_utterance_processing.py @@ -1,8 +1,9 @@ from angel_msgs.msg import DialogueUtterance -def copy_dialogue_utterance(msg: DialogueUtterance, - node_name, - copy_time) -> DialogueUtterance: + +def copy_dialogue_utterance( + msg: DialogueUtterance, node_name, copy_time +) -> DialogueUtterance: msg = DialogueUtterance() msg.header.frame_id = node_name msg.utterance_text = msg.utterance_text diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py index bb8555a14..f2b98bf89 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py @@ -53,9 +53,7 @@ def __init__(self): self.emotion_detection_callback, 1, ) - self._publication = self.create_publisher( - DialogueUtterance, self._out_topic, 1 - ) + self._publication = self.create_publisher(DialogueUtterance, self._out_topic, 1) self.message_queue = queue.Queue() self.handler_thread = threading.Thread(target=self.process_message_queue) @@ -119,8 +117,10 @@ def process_message(self, msg: DialogueUtterance): """ classification, confidence_score = self.get_inference(msg) pub_msg = dialogue_utterance_processing.copy_dialogue_utterance( - msg, node_name="Emotion Detection", - copy_time=self.get_clock().now().to_msg()) + msg, + node_name="Emotion Detection", + copy_time=self.get_clock().now().to_msg(), + ) # Overwrite the user emotion with the latest classification information. pub_msg.emotion = classification pub_msg.emotion_confidence_score = confidence_score diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py index 3ff903d18..cb1c7f6fc 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/base_intent_detector.py @@ -110,30 +110,27 @@ def _tiebreak_intents(intents, confidences): if not intents: colored_utterance = colored(msg.utterance_text, "light_blue") - self.log.info( - f'No intents detected for:\n>>> "{colored_utterance}":') + self.log.info(f'No intents detected for:\n>>> "{colored_utterance}":') return None, -1.0 else: classification, confidence = _tiebreak_intents(intents, confidences) classification = colored(classification, "light_green") self.publish_message(msg.utterance_text, classification, confidence) - def publish_message(self, msg: DialogueUtterance, intent: str, - score: float): + def publish_message(self, msg: DialogueUtterance, intent: str, score: float): """ Handles message publishing for an utterance with a detected intent. """ pub_msg = self.copy_dialogue_utterance( - msg, node_name="Intent Detection", - copy_time=self.get_clock().now().to_msg()) + msg, node_name="Intent Detection", copy_time=self.get_clock().now().to_msg() + ) # Overwrite the user intent with the latest classification information. pub_msg.intent = intent pub_msg.intent_confidence_score = score # Decide which intent topic to publish the message to. published_topic = None - if self._contains_phrase(pub_msg.utterance_text.lower(), - OVERRIDE_KEYPHRASES): + if self._contains_phrase(pub_msg.utterance_text.lower(), OVERRIDE_KEYPHRASES): published_topic = PARAM_EXPECT_USER_INTENT_TOPIC pub_msg.intent_confidence_score = 1.0 self._expected_publisher.publish(pub_msg) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py index 8c5d29a78..47099c2ce 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/intent/gpt_intent_detector.py @@ -13,7 +13,6 @@ from angel_utils import declare_and_get_parameters, make_default_main - openai.organization = os.getenv("OPENAI_ORG_ID") openai.api_key = os.getenv("OPENAI_API_KEY") @@ -28,6 +27,7 @@ PARAM_TIMEOUT = "timeout" + class GptIntentDetector(BaseIntentDetector): def __init__(self): super().__init__() @@ -100,7 +100,7 @@ def detect_intents(self, msg: DialogueUtterance): Detects the user intent via langchain execution of GPT. """ intent = self.chain.run(utterance=msg.utterance_text) - return intent.split('[eos]')[0], 0.5 + return intent.split("[eos]")[0], 0.5 main = make_default_main(GptIntentDetector) From ea4224cc65db0d67d83529e5da4b1f449e4d1e6f Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 11:54:04 -0500 Subject: [PATCH 13/19] Revise ros_bag in Q&A tmux config --- tmux/demos/cooking/eval_vocalized_question_answering.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmux/demos/cooking/eval_vocalized_question_answering.yml b/tmux/demos/cooking/eval_vocalized_question_answering.yml index 3fd3c8dbb..ed10e1ae3 100644 --- a/tmux/demos/cooking/eval_vocalized_question_answering.yml +++ b/tmux/demos/cooking/eval_vocalized_question_answering.yml @@ -60,7 +60,7 @@ tmux_options: -f <%= ENV["ANGEL_WORKSPACE_DIR"] %>/tmux/tmux.conf windows: # - ros_bag_play: ros2 bag play <> - - ros_bag_play: sleep 5; ros2 bag play /angel_workspace/ros_bags/rosbag2_2023_03_01-17_28_00/rosbag2_2023_03_01-17_28_00_0.db3 + - ros_bag_play: sleep 5; ros2 bag play /angel_workspace/ros_bags/rosbag2_2023_07_12-17_51_14_0.db3 - vocal: layout: even-vertical panes: From 7b3409f9841f6f50876ab5ede310b27f4c0ae4de Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:18:13 -0500 Subject: [PATCH 14/19] Fix audio node module setup paths --- .../angel_system_nodes/audio/emotion/__init__.py | 0 .../angel_system_nodes/audio/intent/__init__.py | 0 ros/angel_system_nodes/setup.py | 11 +++++------ 3 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 ros/angel_system_nodes/angel_system_nodes/audio/emotion/__init__.py create mode 100644 ros/angel_system_nodes/angel_system_nodes/audio/intent/__init__.py diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/__init__.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/intent/__init__.py b/ros/angel_system_nodes/angel_system_nodes/audio/intent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ros/angel_system_nodes/setup.py b/ros/angel_system_nodes/setup.py index 81c576c9a..f0f8baed3 100644 --- a/ros/angel_system_nodes/setup.py +++ b/ros/angel_system_nodes/setup.py @@ -20,12 +20,11 @@ entry_points={ "console_scripts": [ "video_listener = angel_system_nodes.video_subscriber:main", - "base_intent_detector = angel_system_nodes.base_intent_detector:main", - "gpt_intent_detector = angel_system_nodes.gpt_intent_detector:main", - "base_emotion_detector = angel_system_nodes.base_emotion_detector:main", - "gpt_emotion_detector = angel_system_nodes.gpt_emotion_detector:main", - "question_answerer = angel_system_nodes.question_answerer:main", - "intent_detector = angel_system_nodes.intent_detector:main", + "base_intent_detector = angel_system_nodes.audio.intent.base_intent_detector:main", + "gpt_intent_detector = angel_system_nodes.audio.intent.gpt_intent_detector:main", + "base_emotion_detector = angel_system_nodes.audio.emotion.base_emotion_detector:main", + "gpt_emotion_detector = angel_system_nodes.audio.emotion.gpt_emotion_detector:main", + "question_answerer = angel_system_nodes.audio.question_answerer:main", "spatial_mapper = angel_system_nodes.spatial_mapper:main", "feedback_generator = angel_system_nodes.feedback_generator:main", "annotation_event_monitor = angel_system_nodes.annotation_event_monitor:main", From 87f363f5b237b4e755facebec592a60b0062d849 Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:34:05 -0500 Subject: [PATCH 15/19] Fix emotion detection node subscription --- .../angel_system_nodes/audio/emotion/base_emotion_detector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py index f2b98bf89..e05621f93 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/base_emotion_detector.py @@ -49,7 +49,7 @@ def __init__(self): # Handle subscription/publication topics. self._subscription = self.create_subscription( DialogueUtterance, - self._in_expect_uintent_topic, + self._in_topic, self.emotion_detection_callback, 1, ) From 090ad68ced069d14f12a45a5f61f464e62f9bec0 Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:48:28 -0500 Subject: [PATCH 16/19] Add timeout to emotion detection node --- .../audio/emotion/gpt_emotion_detector.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py index 06668bf91..9436609bf 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py @@ -8,7 +8,7 @@ BaseEmotionDetector, LABEL_MAPPINGS, ) -from angel_utils import make_default_main +from angel_utils import declare_and_get_parameters, make_default_main openai.organization = os.getenv("OPENAI_ORG_ID") openai.api_key = os.getenv("OPENAI_API_KEY") @@ -23,12 +23,21 @@ {"utterance": "We're doing great and I'm learning a lot!", "label": "positive"}, ] +PARAM_TIMEOUT = "timeout" class GptEmotionDetector(BaseEmotionDetector): def __init__(self): super().__init__() self.log = self.get_logger() + param_values = declare_and_get_parameters( + self, + [ + (PARAM_TIMEOUT, 600), + ], + ) + self.timeout = param_values[PARAM_TIMEOUT] + # This node additionally includes fields for interacting with OpenAI # via LangChain. if not os.getenv("OPENAI_API_KEY"): From 91a3b306ec3acd8286e5e7ee7684056faa350dbd Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:48:52 -0500 Subject: [PATCH 17/19] Add default timeout to question answerer node --- .../angel_system_nodes/audio/question_answerer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py b/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py index c0963e9ba..956044652 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/question_answerer.py @@ -32,6 +32,7 @@ def __init__(self): (INPUT_TOPIC,), (OUT_QA_TOPIC,), (FEW_SHOT_PROMPT,), + (PARAM_TIMEOUT, 600), ], ) self._input_topic = param_values[INPUT_TOPIC] From b233a4f09e5e88720985940e40f22d098d6ff014 Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:49:28 -0500 Subject: [PATCH 18/19] Fix question answering config to apply latest refactorings and relocations --- tmux/demos/cooking/eval_vocalized_question_answering.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tmux/demos/cooking/eval_vocalized_question_answering.yml b/tmux/demos/cooking/eval_vocalized_question_answering.yml index ed10e1ae3..de0e0a5c8 100644 --- a/tmux/demos/cooking/eval_vocalized_question_answering.yml +++ b/tmux/demos/cooking/eval_vocalized_question_answering.yml @@ -25,7 +25,7 @@ root: <%= ENV["ANGEL_WORKSPACE_DIR"] %> # on_project_start: command on_project_start: | export ROS_NAMESPACE=${ROS_NAMESPACE:-/debug} - export CONFIG_DIR=${ANGEL_WORKSPACE_DIR}/src/angel_system_nodes/configs + export CONFIG_DIR=${ANGEL_WORKSPACE_DIR}/config export NODE_RESOURCES_DIR=${ANGEL_WORKSPACE_DIR}/src/angel_system_nodes/resource # Run on project start, the first time # on_project_first_start: command @@ -94,14 +94,13 @@ windows: panes: - gpt_emotion_detection: ros2 run angel_system_nodes gpt_emotion_detector --ros-args -r __ns:=${ROS_NAMESPACE} - -p expect_user_intent_topic:=expect_user_intent_topic - -p interp_user_intent_topic:=interp_user_intent_topic + -p input_topic:=interp_user_intent_topic -p user_emotion_topic:=gpt_emotion_topic - question_answering: layout: even-vertical panes: - gpt_question_answering: ros2 run angel_system_nodes question_answerer --ros-args -r __ns:=${ROS_NAMESPACE} - -p user_emotion_topic:=gpt_emotion_topic + -p input_topic:=gpt_emotion_topic -p system_text_response_topic:=system_text_response_topic -p few_shot_prompt_file:=${CONFIG_DIR}/llm_prompts/tourniquet_steps_prompt From 3ec232daf00c9e4edd98355bb443e9ee1dff8ece Mon Sep 17 00:00:00 2001 From: derkmed Date: Fri, 2 Feb 2024 12:51:52 -0500 Subject: [PATCH 19/19] Apply code style to Emotion Detection node --- .../angel_system_nodes/audio/emotion/gpt_emotion_detector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py index 9436609bf..f725478df 100644 --- a/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py +++ b/ros/angel_system_nodes/angel_system_nodes/audio/emotion/gpt_emotion_detector.py @@ -25,6 +25,7 @@ PARAM_TIMEOUT = "timeout" + class GptEmotionDetector(BaseEmotionDetector): def __init__(self): super().__init__()