diff --git a/hlpr_speech_recognition/data/kps.dic b/hlpr_speech_recognition/data/kps.dic index 0aa3501..7ea2dc8 100644 --- a/hlpr_speech_recognition/data/kps.dic +++ b/hlpr_speech_recognition/data/kps.dic @@ -7,7 +7,6 @@ CAN(2) K AH N CLOSE K L OW S CLOSE(2) K L OW Z END EH N D -EXPERIMENT IH K S P EH R AH M AH N T FINISH F IH N IH SH GO G OW HAND HH AE N D diff --git a/hlpr_speech_recognition/data/kps.map b/hlpr_speech_recognition/data/kps.map index 6fe3a45..99b6ce2 100644 --- a/hlpr_speech_recognition/data/kps.map +++ b/hlpr_speech_recognition/data/kps.map @@ -1,7 +1,7 @@ GREETING:HELLO POLI! HEAR_CHECK:CAN YOU HEAR ME? SMALL_TALK:HOW ARE YOU TODAY? -START_EXP:LET'S BEGIN THE EXPERIMENT +START_EXP:LET'S START OPEN_HAND:OPEN YOUR HAND CLOSE_HAND:CLOSE YOUR HAND START_GC:RELEASE YOUR ARM diff --git a/hlpr_speech_recognition/data/kps.txt b/hlpr_speech_recognition/data/kps.txt index a1851e7..e905345 100644 --- a/hlpr_speech_recognition/data/kps.txt +++ b/hlpr_speech_recognition/data/kps.txt @@ -1,13 +1,13 @@ -HELLO POLI! -CAN YOU HEAR ME? -HOW ARE YOU TODAY? -LET'S BEGIN THE EXPERIMENT -OPEN YOUR HAND -CLOSE YOUR HAND -RELEASE YOUR ARM -HOLD YOUR ARM -START HERE -BEGIN HERE -END HERE -FINISH HERE -GO HERE +HELLO POLI!/1e-35/ +CAN YOU HEAR ME?/1e-40/ +HOW ARE YOU TODAY?/1e-40/ +LET'S START/1e-25/ +OPEN YOUR HAND/1e-30/ +CLOSE YOUR HAND/1e-30/ +RELEASE YOUR ARM/1e-20/ +HOLD YOUR ARM/1e-25/ +START HERE/1e-20/ +BEGIN HERE/1e-10/ +END HERE/1e-20/ +FINISH HERE/1e-20/ +GO HERE/1e-20/ diff --git a/hlpr_speech_recognition/data/kps.yaml b/hlpr_speech_recognition/data/kps.yaml index d4d6062..d5250c2 100644 --- a/hlpr_speech_recognition/data/kps.yaml +++ b/hlpr_speech_recognition/data/kps.yaml @@ -8,7 +8,7 @@ tag: "SMALL_TALK" speech: ["HOW ARE YOU TODAY?"] --- tag: "START_EXP" -speech: ["LET'S BEGIN THE EXPERIMENT"] +speech: ["LET'S START"] --- tag: "OPEN_HAND" speech: ["OPEN YOUR HAND"] @@ -30,3 +30,6 @@ speech: ["END HERE","FINISH HERE"] --- tag: "KEYFRAME" speech: ["GO HERE"] +--- +tag: "UNKNOWN" +speech: ["UNKNOWN"] \ No newline at end of file diff --git a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_gui.py b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_gui.py index fd63e03..06b8f6e 100755 --- a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_gui.py +++ b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_gui.py @@ -54,7 +54,7 @@ class SpeechGui(QtGui.QWidget): def __init__(self): QtGui.QWidget.__init__(self) - + newFont = QtGui.QFont("Times", 24, QtGui.QFont.Bold) # Add a main layout @@ -66,8 +66,8 @@ def __init__(self): # Initialize rosnode rospy.init_node("speech_gui") - - # Default values for speech listeners + + # Default values for speech listeners rospack = rospkg.RosPack() default_pub_topic = 'hlpr_speech_commands' @@ -85,9 +85,9 @@ def __init__(self): self.keywords = rospy.get_param(SpeechListener.KEYWORDS_PARAM, dict()).values() self.commands = [val for sublist in self.keywords for val in sublist] self.commands.sort() - + positions = [(i,j) for i in range(len(self.commands)) for j in range(3)] - + for position, name in zip(positions, self.commands): button = QtGui.QPushButton(name) button.setObjectName('%s' % name) @@ -98,8 +98,8 @@ def __init__(self): mainLayout.addLayout(grid) mainLayout.addStretch() - - # Show the GUI + + # Show the GUI self.adjustSize() self.setWindowTitle("Speech Commands Interface") self.show() @@ -109,7 +109,7 @@ def __init__(self): self.pub = rospy.Publisher(self.recog_topic, StampedString, queue_size=1) rospy.loginfo("Finished initializing speech GUI") - + # Button handler after its clicked def handleButton(self): clicked_button = self.sender() diff --git a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_listener.py b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_listener.py index 3da01f4..74713de 100755 --- a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_listener.py +++ b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_listener.py @@ -31,13 +31,13 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# A script to use pocketsphinx's "keyphrase spotting" feature with +# A script to use pocketsphinx's "keyphrase spotting" feature with # python and ros. Note that it # # Authors: Baris Akgun, Priyanka Khante # Edited: Vivian Chu, 8-29-16 - rosparam and multiple yaml files # -# A convenience class to map speech recognition result to commands +# A convenience class to map speech recognition result to commands # while keeping the time stamp. # # Note that currently the mapping is done by hand @@ -53,115 +53,114 @@ class SpeechListener: - COMMAND_TOPIC_PARAM = "/speech/publish_topic" - SERVICE_TOPIC_PARAM = "/speech/service_topic" - KEYWORDS_PARAM = "/speech/keywords" - COMMAND_TYPE = "/speech/command_type" - LEAVE_COMMAND = "/speech/leave_command" - - def __init__(self, commandBuffSize=10, init_node=True): - - if (init_node): - # initialize the ros node - rospy.init_node("speech_listener") - - # Default values for speech listener - rospack = rospkg.RosPack() - default_pub_topic = 'hlpr_speech_commands' - default_yaml_files = [rospack.get_path('hlpr_speech_recognition')+'/data/kps.yaml'] - default_service_topic = 'get_last_speech_cmd' - - # Pull values from rosparam - self.recog_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic) - self.yaml_files = rospy.get_param("~yaml_list", default_yaml_files) - self.service_topic = rospy.get_param(SpeechListener.SERVICE_TOPIC_PARAM, default_service_topic) - self.msg_type = eval(rospy.get_param(SpeechListener.COMMAND_TYPE, 'StampedString')) # True if message is only str, false includes header - self.leave_command_flag = rospy.get_param(SpeechListener.LEAVE_COMMAND, False) #do we care if we the last command is old - - rospy.Subscriber(self.recog_topic, self.msg_type, self.callback) - - # Converts the yaml files into keywords to store into the dictionary - self.keywords_to_commands = {} - for kps_path in self.yaml_files: - for data in yaml.load_all(file(kps_path,'r')): - self.keywords_to_commands[str(data['tag'])] = data['speech'] - - # Store this on the rosparam server now - rospy.set_param(SpeechListener.KEYWORDS_PARAM, self.keywords_to_commands) - - self._commandBuffSize = commandBuffSize - #self.commandsQueue = deque(maxlen=self._commandBuffSize) - - # Flags for starting/stopping the node - self.spinning = False - self.last_command_fresh = False - self.last_command = None - self.last_ts = None - self.last_string = None - - # Setup service call - s = rospy.Service(self.service_topic, SpeechService, self.get_last_command) - rospy.loginfo("Speech listener initialized") - - # The following function is called each time, for every message - def callback(self, msg): - - if self.msg_type == StampedString: - self.last_string = msg.keyphrase - self.last_ts = msg.stamp - else: - self.last_string = msg.data - - self.last_command = self._map_keyword_to_command(self.last_string) - self.last_command_fresh = True - if self.spinning: - rospy.loginfo(rospy.get_caller_id() + ' I heard %s', str(self.last_command)) - - # method to extract command string from msg - def _map_keyword_to_command(self, data): - for (command, keywords) in self.keywords_to_commands.iteritems(): - for word in keywords: - if data.find(word) > -1: - return command - - # This is now made a service call - def get_last_command(self, req=None): - - # Check if we care how "recent" the command was - if not self.leave_command_flag: - - # returns a service request error - if not self.last_command_fresh: - return None - - # The command hasn't been ask for before - self.last_command_fresh = False - if (req): - return {'speech_cmd': self.last_command} - else: - return self.last_command - - def get_last_string(self): - return self.last_string - - def get_last_ts(self): - return self.last_ts - - # clears commands queue - def cleanup(self): - #commandsQueue.clear() - pass - - def spin(self): - self.spinning = True - # if shutdown, need to clean up the commands queue - rospy.on_shutdown(self.cleanup) - rospy.spin() + COMMAND_TOPIC_PARAM = "/speech/publish_topic" + SERVICE_TOPIC_PARAM = "/speech/service_topic" + KEYWORDS_PARAM = "/speech/keywords" + COMMAND_TYPE = "/speech/command_type" + LEAVE_COMMAND = "/speech/leave_command" + + def __init__(self, commandBuffSize=10, init_node=True): + + if (init_node): + # initialize the ros node + rospy.init_node("speech_listener") + + # Default values for speech listener + rospack = rospkg.RosPack() + default_pub_topic = 'hlpr_speech_commands' + default_yaml_files = [rospack.get_path('hlpr_speech_recognition')+'/data/kps.yaml'] + default_service_topic = 'get_last_speech_cmd' + + # Pull values from rosparam + self.recog_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic) + self.yaml_files = rospy.get_param("~yaml_list", default_yaml_files) + self.service_topic = rospy.get_param(SpeechListener.SERVICE_TOPIC_PARAM, default_service_topic) + self.msg_type = eval(rospy.get_param(SpeechListener.COMMAND_TYPE, 'StampedString')) # True if message is only str, false includes header + self.leave_command_flag = rospy.get_param(SpeechListener.LEAVE_COMMAND, False) #do we care if we the last command is old + + rospy.Subscriber(self.recog_topic, self.msg_type, self.callback) + + # Converts the yaml files into keywords to store into the dictionary + self.keywords_to_commands = {} + for kps_path in self.yaml_files: + for data in yaml.load_all(file(kps_path,'r')): + self.keywords_to_commands[str(data['tag'])] = data['speech'] + + # Store this on the rosparam server now + rospy.set_param(SpeechListener.KEYWORDS_PARAM, self.keywords_to_commands) + + self._commandBuffSize = commandBuffSize + #self.commandsQueue = deque(maxlen=self._commandBuffSize) + + # Flags for starting/stopping the node + self.spinning = False + self.last_command_fresh = False + self.last_command = None + self.last_ts = None + self.last_string = None + + # Setup service call + s = rospy.Service(self.service_topic, SpeechService, self.get_last_command) + rospy.loginfo("Speech listener initialized") + + # The following function is called each time, for every message + def callback(self, msg): + + if self.msg_type == StampedString: + self.last_string = msg.keyphrase + self.last_ts = msg.stamp + else: + self.last_string = msg.data + + self.last_command = self._map_keyword_to_command(self.last_string) + self.last_command_fresh = True + if self.spinning: + rospy.loginfo(rospy.get_caller_id() + ' I heard %s', str(self.last_command)) + + # method to extract command string from msg + def _map_keyword_to_command(self, data): + for (command, keywords) in self.keywords_to_commands.iteritems(): + for word in keywords: + if data.find(word) > -1: + return command + + # This is now made a service call + def get_last_command(self, req=None): + + # Check if we care how "recent" the command was + if not self.leave_command_flag: + + # returns a service request error + if not self.last_command_fresh: + return None + + # The command hasn't been ask for before + self.last_command_fresh = False + if (req): + return {'speech_cmd': self.last_command} + else: + return self.last_command + + def get_last_string(self): + return self.last_string + + def get_last_ts(self): + return self.last_ts + + # clears commands queue + def cleanup(self): + #commandsQueue.clear() + pass + + def spin(self): + self.spinning = True + # if shutdown, need to clean up the commands queue + rospy.on_shutdown(self.cleanup) + rospy.spin() def listener(): - sl = SpeechListener() - sl.spin() + sl = SpeechListener() + sl.spin() if __name__ == '__main__': - listener() - + listener() diff --git a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_recognizer.py b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_recognizer.py index d790ea0..b84046d 100755 --- a/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_recognizer.py +++ b/hlpr_speech_recognition/src/hlpr_speech_recognition/speech_recognizer.py @@ -31,20 +31,26 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# A script to use pocketsphinx's "keyphrase spotting" feature with +# A script to use pocketsphinx's "keyphrase spotting" feature with # python and ros. Note that it # -# Authors: Baris Akgun +# Authors: Baris Akgun # Edited: Vivian Chu, 8-29-16: rosparam config values +import json +import os +import sys +import time +import wave + +import pyaudio +import rospkg import rospy -from std_msgs.msg import String -from hlpr_speech_msgs.msg import StampedString, SpeechCommand -import sys, os +from hlpr_speech_msgs.msg import SpeechCommand, StampedString from pocketsphinx.pocketsphinx import * from sphinxbase.sphinxbase import * -import pyaudio -import rospkg +from std_msgs.msg import String + from .speech_listener import SpeechListener # Global values specific to speech @@ -54,119 +60,164 @@ class SpeechRecognizer(): - def __init__(self): - - # Intialize the node - rospy.init_node("hlpr_speech_recognizer") - - # get an instance of RosPack with the default search paths - rospack = rospkg.RosPack() - - # Default data files for speech dictionaries - default_modeldir = "/usr/local/share/pocketsphinx/model" - default_dict_path = rospack.get_path('hlpr_speech_recognition') + '/data/kps.dic' - default_kps_path = rospack.get_path('hlpr_speech_recognition') + '/data/kps.txt' - default_rec_thresh = 300 #higher reduces false positives but makes it harder to detect - default_pub_topic = 'hlpr_speech_commands' - - # Load model and dictionary values from param server - modeldir = rospy.get_param("~model_dir", default_modeldir) - dict_path = rospy.get_param("~dict_path", default_dict_path) - kps_path = rospy.get_param("~kps_path", default_kps_path) - self.verbose = rospy.get_param("/speech/verbose", True) # default prints out more info - self.str_msg = rospy.get_param(SpeechListener.COMMAND_TYPE, 'StampedString') # True if message is only str, false includes header - self.cmd_pub_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic) - - # Parameters for recognition - self.RECOGNITION_THRESHOLD = rospy.get_param("/speech/rec_thresh", default_rec_thresh) - - # Create a decoder with certain model - self.config = Decoder.default_config() - self.config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us')) - - # Configure the dictionary - not used? - #lm_path = rospack.get_path('hlpr_speech_recognition') + '/data/kps.lm' - #self.config.set_string('-lm', lm_path) - - # Configuration settings for speech detection - self.config.set_string('-dict', dict_path) - self.config.set_string('-kws', kps_path) #A file with keyphrases to spot, one per line - self.config.set_float('-kws_threshold', 1e-2) #Threshold for p(hyp)/p(alternatives) ratio - self.config.set_float('-kws_plp',1e-10 ) #Phone loop probability for keyword spotting - #self.config.set_float('-kws_delay', 1) #Delay to wait for best detection score - - # Check if we dump extra information to null - if not self.verbose: - self.config.set_string('-logfn','/dev/null') - - # Setup the publisher - if self.str_msg == 'String': - self.pub = rospy.Publisher(self.cmd_pub_topic, String, queue_size=1) - else: - self.pub = rospy.Publisher(self.cmd_pub_topic, StampedString, queue_size=1) - - rospy.loginfo("Finished initializing speech recognizer") - - # Start recognizing - self.begin_rec() - - def begin_rec(self): - - p = pyaudio.PyAudio() - stream = p.open(format=pyaudio.paInt16, - channels=N_CHANNELS, - rate=RATE, - input=True, - frames_per_buffer=BUFFER_SIZE) - stream.start_stream() - - # Process audio chunk by chunk. On keyword detected perform action and restart search - decoder = Decoder(self.config) - decoder.start_utt() - - while not rospy.is_shutdown(): - selectedSegment = None - buf = stream.read(BUFFER_SIZE) - if buf: - decoder.process_raw(buf, False, False) - else: - break - if decoder.hyp() != None: - hypothesis = decoder.hyp() - maxProb = 0 - for seg in decoder.seg(): - if seg.prob > maxProb: - selectedSegment = seg - maxProb = seg.prob - if self.verbose: - print ([(seg.word, seg.prob, seg.start_frame, seg.end_frame) for seg in decoder.seg()]) - - if selectedSegment: - if selectedSegment.prob > self.RECOGNITION_THRESHOLD: - if not hypothesis.hypstr == selectedSegment.word: - print "Hypothesis and the selected segment do not match! Going with the selected segment" - - print ("Detected keyword: " + selectedSegment.word) - # Get the time stamp for the message - now = rospy.get_rostime() - - if self.str_msg == 'String': - keyphrase = selectedSegment.word - else: - keyphrase = StampedString() - keyphrase.keyphrase = selectedSegment.word - keyphrase.stamp = rospy.get_rostime() - - self.pub.publish(keyphrase) - elif self.verbose: - print "Not confident enough in the detected keyword" + def __init__(self, subnode=False, publishDebug=False): + + if not subnode: + # Intialize the node + rospy.init_node("hlpr_speech_recognizer") + + # Get an instance of RosPack with the default search paths + rospack = rospkg.RosPack() + + # Default data files for speech dictionaries + default_modeldir = "/usr/local/share/pocketsphinx/model" + default_dict_path = rospack.get_path("hlpr_speech_recognition") + "/data/kps.dic" + default_kps_path = rospack.get_path("hlpr_speech_recognition") + "/data/kps.txt" + default_rec_thresh = 300 #higher reduces false positives but makes it harder to detect + default_pub_topic = "hlpr_speech_commands" + + # Load model and dictionary values from param server + modeldir = rospy.get_param("~model_dir", default_modeldir) + dict_path = rospy.get_param("~dict_path", default_dict_path) + kps_path = rospy.get_param("~kps_path", default_kps_path) + self.verbose = rospy.get_param("/speech/verbose", True) # default prints out more info + self.str_msg = rospy.get_param(SpeechListener.COMMAND_TYPE, "StampedString") # True if message is only str, false includes header + self.cmd_pub_topic = rospy.get_param(SpeechListener.COMMAND_TOPIC_PARAM, default_pub_topic) + + # Parameters for recognition + self.RECOGNITION_THRESHOLD = rospy.get_param("/speech/rec_thresh", default_rec_thresh) + + # Create a decoder with certain model + self.config = Decoder.default_config() + self.config.set_string("-hmm", os.path.join(modeldir, "en-us/en-us")) + + # Configure the dictionary - not used? + #lm_path = rospack.get_path("hlpr_speech_recognition") + "/data/kps.lm" + #self.config.set_string("-lm", lm_path) + + # Configuration settings for speech detection + self.config.set_string("-dict", dict_path) + self.config.set_string("-kws", kps_path) #A file with keyphrases to spot, one per line + self.config.set_float("-kws_threshold", 1e-2) #Threshold for p(hyp)/p(alternatives) ratio + self.config.set_float("-kws_plp",1e-10 ) #Phone loop probability for keyword spotting + #self.config.set_float("-kws_delay", 1) #Delay to wait for best detection score + + # Check if we dump extra information to null + if not self.verbose: + self.config.set_string("-logfn","/dev/null") + + # Setup the publisher(s) + if self.str_msg == "String": + self.pub = rospy.Publisher(self.cmd_pub_topic, String, queue_size=1) else: - print 'No Selected Segment' - - decoder.end_utt() + self.pub = rospy.Publisher(self.cmd_pub_topic, StampedString, queue_size=1) + + self.publishDebug = publishDebug + if publishDebug: + self.debugPub = rospy.Publisher("{}_debug".format(default_pub_topic), String) + + rospy.loginfo("Finished initializing speech recognizer") + + # Start recognizing + if not subnode: + self.begin_rec() + self.should_stop_recording = False + + def begin_rec(self, file = None): + self.should_stop_recording = False + if file: + # Audio input from file + stream = open(file, "rb") + rospy.loginfo("Loading audio data from {}".format(file)) + else: + # Audio input from microphone + p = pyaudio.PyAudio() + stream = p.open(format=pyaudio.paInt16, + channels=N_CHANNELS, + rate=RATE, + input=True, + frames_per_buffer=BUFFER_SIZE) + stream.start_stream() + rospy.loginfo("Recording live audio") + + # Process audio chunk by chunk. On keyword detected perform action and restart search + decoder = Decoder(self.config) decoder.start_utt() -if __name__ == '__main__': - SpeechRecognizer() - - + while not rospy.is_shutdown() and not self.should_stop_recording: + selectedSegment = None + buf = stream.read(BUFFER_SIZE) + + if buf: + decoder.process_raw(buf, False, False) + else: + break + if decoder.hyp() != None: + hypothesis = decoder.hyp() + maxProb = float("-inf") + for seg in decoder.seg(): + if seg.prob < -1500: + # Discard suggestions with an absolute probability less than -1500 + continue + + thresholdTooLow = False + for seg2 in decoder.seg(): + diff = seg.prob - seg2.prob + if diff != 0 and diff < self.RECOGNITION_THRESHOLD: + thresholdTooLow = True + break + if thresholdTooLow: + continue + + if seg.prob > maxProb: + selectedSegment = seg + maxProb = seg.prob + if self.verbose: + if self.publishDebug: + self.debugPub.publish(json.dumps([ + {"word": seg.word, "probability": seg.prob, "start": seg.start_frame, "end": seg.end_frame} + for seg in decoder.seg() + ])) + print([(seg.word, seg.prob, seg.start_frame, seg.end_frame) for seg in decoder.seg()]) + + if selectedSegment: + if not hypothesis.hypstr == selectedSegment.word: + if self.publishDebug: + self.debugPub.publish(json.dumps({ + "message": "Hypothesis and the selected segment do not match! Going with the selected segment" + })) + print "Hypothesis and the selected segment do not match! Going with the selected segment" + + print ("Detected keyword: " + selectedSegment.word) + # Get the time stamp for the message + now = rospy.get_rostime() + + if self.str_msg == "String": + keyphrase = selectedSegment.word + else: + keyphrase = StampedString() + keyphrase.keyphrase = selectedSegment.word + keyphrase.stamp = rospy.get_rostime() + + self.pub.publish(keyphrase) + else: + print "No selected segment or not confident enough in the detected keyword" + + if self.str_msg == "String": + keyphrase = "UNKNOWN" + else: + keyphrase = StampedString() + keyphrase.keyphrase = "UNKNOWN" + keyphrase.stamp = rospy.get_rostime() + + self.pub.publish(keyphrase) + + decoder.end_utt() + decoder.start_utt() + rospy.loginfo("Stopped speech recognition") + + def end_rec(self): + self.should_stop_recording = True + +if __name__ == "__main__": + SpeechRecognizer() diff --git a/rqt_speech_testing/CMakeLists.txt b/rqt_speech_testing/CMakeLists.txt new file mode 100644 index 0000000..f213c17 --- /dev/null +++ b/rqt_speech_testing/CMakeLists.txt @@ -0,0 +1,206 @@ +cmake_minimum_required(VERSION 2.8.3) +project(rqt_speech_testing) + +## Add support for C++11, supported in ROS Kinetic and newer +# add_definitions(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS + rospy + rqt_gui + rqt_gui_py +) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + + +## Uncomment this if the package has a setup.py. This macro ensures +## modules and global scripts declared therein get installed +## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html +catkin_python_setup() + +################################################ +## Declare ROS messages, services and actions ## +################################################ + +## To declare and build messages, services or actions from within this +## package, follow these steps: +## * Let MSG_DEP_SET be the set of packages whose message types you use in +## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). +## * In the file package.xml: +## * add a build_depend tag for "message_generation" +## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET +## * If MSG_DEP_SET isn't empty the following dependency has been pulled in +## but can be declared for certainty nonetheless: +## * add a run_depend tag for "message_runtime" +## * In this file (CMakeLists.txt): +## * add "message_generation" and every package in MSG_DEP_SET to +## find_package(catkin REQUIRED COMPONENTS ...) +## * add "message_runtime" and every package in MSG_DEP_SET to +## catkin_package(CATKIN_DEPENDS ...) +## * uncomment the add_*_files sections below as needed +## and list every .msg/.srv/.action file to be processed +## * uncomment the generate_messages entry below +## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) + +## Generate messages in the 'msg' folder +# add_message_files( +# FILES +# Message1.msg +# Message2.msg +# ) + +## Generate services in the 'srv' folder +# add_service_files( +# FILES +# Service1.srv +# Service2.srv +# ) + +## Generate actions in the 'action' folder +# add_action_files( +# FILES +# Action1.action +# Action2.action +# ) + +## Generate added messages and services with any dependencies listed here +# generate_messages( +# DEPENDENCIES +# std_msgs # Or other packages containing msgs +# ) + +################################################ +## Declare ROS dynamic reconfigure parameters ## +################################################ + +## To declare and build dynamic reconfigure parameters within this +## package, follow these steps: +## * In the file package.xml: +## * add a build_depend and a run_depend tag for "dynamic_reconfigure" +## * In this file (CMakeLists.txt): +## * add "dynamic_reconfigure" to +## find_package(catkin REQUIRED COMPONENTS ...) +## * uncomment the "generate_dynamic_reconfigure_options" section below +## and list every .cfg file to be processed + +## Generate dynamic reconfigure parameters in the 'cfg' folder +# generate_dynamic_reconfigure_options( +# cfg/DynReconf1.cfg +# cfg/DynReconf2.cfg +# ) + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if you package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES rqt_speech_testing +# CATKIN_DEPENDS rospy rqt_gui rqt_gui_py +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +# include_directories(include) +include_directories( + ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/rqt_speech_testing.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/rqt_speech_testing_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +install(FILES plugin.xml + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +) + +install(DIRECTORY resource + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +) + +install(PROGRAMS scripts/rqt_speech_testing + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +## Mark executables and/or libraries for installation +# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_rqt_speech_testing.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) diff --git a/rqt_speech_testing/data/Begin here.wav b/rqt_speech_testing/data/Begin here.wav new file mode 100644 index 0000000..7cc8155 Binary files /dev/null and b/rqt_speech_testing/data/Begin here.wav differ diff --git a/rqt_speech_testing/data/Can you hear me?.wav b/rqt_speech_testing/data/Can you hear me?.wav new file mode 100644 index 0000000..977cf31 Binary files /dev/null and b/rqt_speech_testing/data/Can you hear me?.wav differ diff --git a/rqt_speech_testing/data/Close your hand.wav b/rqt_speech_testing/data/Close your hand.wav new file mode 100644 index 0000000..b41bfd4 Binary files /dev/null and b/rqt_speech_testing/data/Close your hand.wav differ diff --git a/rqt_speech_testing/data/End here.wav b/rqt_speech_testing/data/End here.wav new file mode 100644 index 0000000..05f2b16 Binary files /dev/null and b/rqt_speech_testing/data/End here.wav differ diff --git a/rqt_speech_testing/data/Finish here.wav b/rqt_speech_testing/data/Finish here.wav new file mode 100644 index 0000000..f00f720 Binary files /dev/null and b/rqt_speech_testing/data/Finish here.wav differ diff --git a/rqt_speech_testing/data/Go here.wav b/rqt_speech_testing/data/Go here.wav new file mode 100644 index 0000000..bfac111 Binary files /dev/null and b/rqt_speech_testing/data/Go here.wav differ diff --git a/rqt_speech_testing/data/Hello Poli!.wav b/rqt_speech_testing/data/Hello Poli!.wav new file mode 100644 index 0000000..b8602ac Binary files /dev/null and b/rqt_speech_testing/data/Hello Poli!.wav differ diff --git a/rqt_speech_testing/data/Hold your arm.wav b/rqt_speech_testing/data/Hold your arm.wav new file mode 100644 index 0000000..09eef66 Binary files /dev/null and b/rqt_speech_testing/data/Hold your arm.wav differ diff --git a/rqt_speech_testing/data/How are you today?.wav b/rqt_speech_testing/data/How are you today?.wav new file mode 100644 index 0000000..fc85c23 Binary files /dev/null and b/rqt_speech_testing/data/How are you today?.wav differ diff --git a/rqt_speech_testing/data/Let's begin the experiment.wav b/rqt_speech_testing/data/Let's begin the experiment.wav new file mode 100644 index 0000000..dbbdde4 Binary files /dev/null and b/rqt_speech_testing/data/Let's begin the experiment.wav differ diff --git a/rqt_speech_testing/data/Open your hand.wav b/rqt_speech_testing/data/Open your hand.wav new file mode 100644 index 0000000..d1399b9 Binary files /dev/null and b/rqt_speech_testing/data/Open your hand.wav differ diff --git a/rqt_speech_testing/data/Release your arm.wav b/rqt_speech_testing/data/Release your arm.wav new file mode 100644 index 0000000..e67fa00 Binary files /dev/null and b/rqt_speech_testing/data/Release your arm.wav differ diff --git a/rqt_speech_testing/data/Start here.wav b/rqt_speech_testing/data/Start here.wav new file mode 100644 index 0000000..0239c32 Binary files /dev/null and b/rqt_speech_testing/data/Start here.wav differ diff --git a/rqt_speech_testing/package.xml b/rqt_speech_testing/package.xml new file mode 100644 index 0000000..e415e5b --- /dev/null +++ b/rqt_speech_testing/package.xml @@ -0,0 +1,56 @@ + + + rqt_speech_testing + 0.0.0 + The rqt_speech_testing package + + + + + Ryan Petschek + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + rospy + rqt_gui + rqt_gui_py + rospy + rqt_gui + rqt_gui_py + + + + + + + + diff --git a/rqt_speech_testing/plugin.xml b/rqt_speech_testing/plugin.xml new file mode 100644 index 0000000..8f0a6a3 --- /dev/null +++ b/rqt_speech_testing/plugin.xml @@ -0,0 +1,15 @@ + + + + TODO + + + + + applications-other + HLP-R Speech Recognition Testing + + + \ No newline at end of file diff --git a/rqt_speech_testing/resource/SpeechTest.ui b/rqt_speech_testing/resource/SpeechTest.ui new file mode 100644 index 0000000..85a65b2 --- /dev/null +++ b/rqt_speech_testing/resource/SpeechTest.ui @@ -0,0 +1,155 @@ + + + Speech Test + + + + 0 + 0 + 1006 + 488 + + + + Speech Test GUI + + + + + + + + + + + Open folder + + + + + + + + + + Load an audio file + + + + + + + Open + + + + .. + + + + + + + + + + + Record + + + + .. + + + true + + + false + + + + + + + Record audio + + + + + + + + + Qt::Horizontal + + + + + + + + Info + + + + + Recognized Text + + + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + Clear output + + + + + + + + + + Export output + + + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + + diff --git a/rqt_speech_testing/scripts/rqt_speech_testing b/rqt_speech_testing/scripts/rqt_speech_testing new file mode 100755 index 0000000..b7ac00c --- /dev/null +++ b/rqt_speech_testing/scripts/rqt_speech_testing @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import sys + +from rqt_gui.main import Main + +plugin = "rqt_speech_testing.speech_testing.SpeechTest" +main = Main(filename=plugin) +sys.exit(main.main(standalone=plugin)) \ No newline at end of file diff --git a/rqt_speech_testing/setup.py b/rqt_speech_testing/setup.py new file mode 100644 index 0000000..ada47a8 --- /dev/null +++ b/rqt_speech_testing/setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +from distutils.core import setup +from catkin_pkg.python_setup import generate_distutils_setup + +d = generate_distutils_setup( + packages=["rqt_speech_testing"], + package_dir={"": "src"}, + scripts=['scripts/rqt_speech_testing'] +) + +setup(**d) \ No newline at end of file diff --git a/rqt_speech_testing/src/rqt_speech_testing/__init__.py b/rqt_speech_testing/src/rqt_speech_testing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rqt_speech_testing/src/rqt_speech_testing/speech_testing.py b/rqt_speech_testing/src/rqt_speech_testing/speech_testing.py new file mode 100644 index 0000000..d9dff42 --- /dev/null +++ b/rqt_speech_testing/src/rqt_speech_testing/speech_testing.py @@ -0,0 +1,48 @@ +import rospy +import rospkg + +from qt_gui.plugin import Plugin +from python_qt_binding import loadUi +from python_qt_binding.QtGui import QWidget + +from .speech_testing_widget import SpeechTestWidget + +class SpeechTest(Plugin): + + def __init__(self, context): + super(SpeechTest, self).__init__(context) + # Give QObjects reasonable names + self.setObjectName('SpeechTest') + + # Process standalone plugin command-line arguments + from argparse import ArgumentParser + parser = ArgumentParser() + # Add argument(s) to the parser. + parser.add_argument("-q", "--quiet", action="store_true", + dest="quiet", + help="Put plugin in silent mode") + args, unknowns = parser.parse_known_args(context.argv()) + if not args.quiet: + print 'arguments: ', args + print 'unknowns: ', unknowns + + self._widget = SpeechTestWidget(context) + + def shutdown_plugin(self): + # TODO unregister all publishers here + pass + + def save_settings(self, plugin_settings, instance_settings): + # TODO save intrinsic configuration, usually using: + # instance_settings.set_value(k, v) + pass + + def restore_settings(self, plugin_settings, instance_settings): + # TODO restore intrinsic configuration, usually using: + # v = instance_settings.value(k) + pass + + #def trigger_configuration(self): + # Comment in to signal that the plugin has a way to configure + # This will enable a setting button (gear icon) in each dock widget title bar + # Usually used to open a modal configuration dialog \ No newline at end of file diff --git a/rqt_speech_testing/src/rqt_speech_testing/speech_testing_widget.py b/rqt_speech_testing/src/rqt_speech_testing/speech_testing_widget.py new file mode 100644 index 0000000..cda2df3 --- /dev/null +++ b/rqt_speech_testing/src/rqt_speech_testing/speech_testing_widget.py @@ -0,0 +1,173 @@ +import json +import os +import signal +import threading +import time + +import rospkg +import rospy +from python_qt_binding import loadUi +from python_qt_binding.QtCore import Qt, Signal, qWarning +from python_qt_binding.QtGui import (QApplication, QFileDialog, QHeaderView, + QIcon, QMessageBox, QTreeWidgetItem, + QWidget) + +from hlpr_speech_msgs.msg import SpeechCommand, StampedString +from hlpr_speech_recognition.speech_recognizer import SpeechRecognizer + + +class SpeechTestWidget(QWidget): + """ + Widget for testing speech recognition + Handles all widget callbacks + """ + + def __init__(self, context): + super(SpeechTestWidget, self).__init__() + ui_file = os.path.join(rospkg.RosPack().get_path( + "rqt_speech_testing"), "resource", "SpeechTest.ui") + loadUi(ui_file, self) + + self.setObjectName('SpeechTestUi') + self.setWindowTitle(self.windowTitle() + (' (%d)' % + context.serial_number())) + # Add widget to the user interface + context.add_widget(self) + + self.recognizer = SpeechRecognizer(subnode=True, publishDebug=True) + + recog_topic = rospy.get_param("/speech/publish_topic", "hlpr_speech_commands") + msg_type = eval(rospy.get_param("/speech/command_type", "StampedString")) # True if message is only str, false includes header + rospy.Subscriber(recog_topic, msg_type, self.speechCallback) + + self.currentRootItem = None + self.waitingOnResult = False + + # Set icons for buttons because they don't persist from Qt creator + self.openLocationButton.setIcon(QIcon.fromTheme("document-open")) + self.openFolderButton.setIcon(QIcon.fromTheme("folder")) + self.recordButton.setIcon(QIcon.fromTheme("media-record")) + self.clearButton.setIcon(QIcon.fromTheme("edit-delete")) + self.exportButton.setIcon(QIcon.fromTheme("document-save-as")) + + # Attach event handlers + self.openLocationButton.clicked[bool].connect(self.openAudio) + self.openFolderButton.clicked[bool].connect(self.openAudioFolder) + self.location.returnPressed.connect(self.loadAudio) + self.recordButton.toggled.connect(self.recordAudio) + self.outputTree.itemDoubleClicked.connect(self.handleDoubleClick) + self.clearButton.clicked[bool].connect(lambda: self.outputTree.clear()) + self.exportButton.clicked[bool].connect(self.export) + + # Set sizing options for tree widget headers + self.outputTree.header().setStretchLastSection(False) + self.outputTree.header().setResizeMode(0, QHeaderView.Stretch) + self.outputTree.header().setResizeMode(1, QHeaderView.ResizeToContents) + + def handleDoubleClick(self, item, index): + if not item.parent(): + root = self.outputTree.invisibleRootItem() + root.removeChild(item) + + def openAudioFolder(self): + location = QFileDialog.getExistingDirectory(directory=os.path.dirname(self.location.text())) + if not location: + return + self.location.setText(location) + self.loadAudio() + + def openAudio(self): + location = QFileDialog.getOpenFileName(filter="*.wav;;*", directory=os.path.dirname(self.location.text()))[0] + if not location: + return + self.location.setText(location) + self.loadAudio() + + def loadAudio(self): + location = self.location.text() + if os.path.isdir(location): + locations = [os.path.join(location, f) for f in os.listdir(location) if os.path.isfile(os.path.join(location, f)) and f.split(".")[-1] == "wav"] + elif os.path.isfile(location): + locations = [location] + else: + return + + if len(locations) == 0 or len(locations[0]) == 0: + return + + QApplication.setOverrideCursor(Qt.WaitCursor) + for location in sorted(locations): + self.currentRootItem = QTreeWidgetItem() + self.currentRootItem.setText(0, location) + self.outputTree.addTopLevelItem(self.currentRootItem) + self.outputTree.scrollToItem(self.currentRootItem) + self.currentRootItem.setExpanded(True) + + self.waitingOnResult = True + threading.Thread(target=self.recordAudioThread, kwargs={"file": location}).start() + + waiting = 0 + while self.waitingOnResult: + time.sleep(0.1) + waiting += 0.1 + if (waiting > 1): + self.waitingOnResult = False + rospy.loginfo("{} didn't finish recognition before timeout".format(location)) + break + QApplication.restoreOverrideCursor() + + def recordAudio(self, state): + if state: + self.currentRootItem = QTreeWidgetItem() + self.currentRootItem.setText(0, "Recording") + self.outputTree.addTopLevelItem(self.currentRootItem) + self.outputTree.scrollToItem(self.currentRootItem) + self.currentRootItem.setExpanded(True) + + threading.Thread(target=self.recordAudioThread).start() + else: + self.recognizer.end_rec() + + def loadAudioThread(self, file): + QApplication.setOverrideCursor(Qt.WaitCursor) + self.recognizer.begin_rec(file=file) + QApplication.restoreOverrideCursor() + + def recordAudioThread(self, file=None): + self.recognizer.begin_rec(file=file) + + def speechCallback(self, msg): + if msg._type == "hlpr_speech_msgs/StampedString": + last_string = msg.keyphrase + last_ts = msg.stamp + else: + last_string = msg.data + + item = QTreeWidgetItem() + item.setText(0, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))) + item.setText(1, last_string) + self.currentRootItem.addChild(item) + self.outputTree.scrollToItem(item) + self.waitingOnResult = False + + def export(self): + location = QFileDialog.getSaveFileName(filter = "*.json;;*")[0] + if not location: + return + if location.split(".")[-1] != "json": + location = location + ".json" + + output = [] + root = self.outputTree.invisibleRootItem() + + for i in range(root.childCount()): + item = root.child(i) + data = {"name": item.text(0), "recognizedText": []} + for k in range(item.childCount()): + subitem = item.child(k) + subdata = {"timestamp": subitem.text(0), "text": subitem.text(1)} + data["recognizedText"].append(subdata) + output.append(data) + + with open(location, "w") as jsonFile: + jsonFile.write(json.dumps(output, indent=4)) \ No newline at end of file