ROS 语音识别
ROS语音识别一、语音识别包1、安装 安装很简单,直接使用ubuntu命令即可,首先安装依赖库:123$ sudo apt-get install gstreamer0.10-pocketsphinx$ sudo apt-get install ros-indigo-audio-common
一、语音识别包
1、安装
1
2
3
|
$ sudo apt-get install gstreamer0.10-pocketsphinx
$ sudo apt-get install ros-indigo-audio-common
//我安装的是indigo版本的
$ sudo apt-get install libasound2
|
$ sudo apt-get install gstreamer0.10-gconf
然后来安装ROS包:
sudo apt-get install ros-indigo-pocketsphinx
2. 安装完成后我们就可以运行测试了。
首先,插入你的麦克风设备,然后在系统设置里测试麦克风是否有语音输入。
然后,运行包中的测试程序:
1
|
$ roslaunch pocketsphinx robocup.launch
|
我们也可以直接看ROS最后发布的结果消息:
1
|
$ rostopic echo /recognizer/output
|
二、语音库
1、查看语音库
1
2
|
$ roscd rbx1_speech/config
$ more nav_commands.txt
|
这就是需要添加的文本,我们也可以修改其中的某些文本,改成自己需要的。
然后我们要把这个文件在线生成语音信息和库文件,这一步需要登陆网站http://www.speech.cs.cmu.edu/tools/lmtool-new.html,根据网站的提示上传文件,然后在线编译生成库文件。
上传nav_commands.txt文件
把下载的文件都解压放在rbx1_speech包的config文件夹下。我们可以给这些文件改个名字:
1
|
rename
-f
's/593/nav_commands/'
*
|
在rbx1_speech/launch文件夹下看看voice_nav_commands.launch这个文件:
1
2
3
4
5
6
7
8
|
<launch>
<node name=
"recognizer"
pkg=
"pocketsphinx"
type=
"recognizer.py"
output=
"screen"
>
<param name=
"lm"
value=
"$(find rbx1_speech)/config/nav_commands.lm"
/>
<param name=
"dict"
value=
"$(find rbx1_speech)/config/nav_commands.dic"
/>
</node>
</launch>
|
可以看到,这个launch文件在运行recognizer.py节点的时候使用了我们生成的语音识别库和文件参数,这样就可以实用我们自己的语音库来进行语音识别了。
通过之前的命令来测试一下效果如何吧:
$ roslaunch rbx1_speech voice_nav_commands.launch
$ rostopic echo /recognizer/output
三、语音控制
1、机器人控制节点
在pocketsphinx包中本身有一个语音控制发布Twist消息的例程voice_cmd_vel.py,rbx1_speech包对其进行了一些简化修改,在nodes文件夹里可以查看voice_nav.py文件:
#!/usr/bin/env python
"""
voice_nav.py - Version 1.1 2013-12-20
Allows controlling a mobile base using simple speech commands.
Based on the voice_cmd_vel.py script by Michael Ferguson in
the pocketsphinx ROS package.
See http://www.ros.org/wiki/pocketsphinx
"""
import rospy
from geometry_msgs.msg import Twist
from std_msgs.msg import String
from math import copysign
class VoiceNav:
def __init__(self):
rospy.init_node('voice_nav')
rospy.on_shutdown(self.cleanup)
# Set a number of parameters affecting the robot's speed
self.max_speed = rospy.get_param("~max_speed", 0.4)
self.max_angular_speed = rospy.get_param("~max_angular_speed", 1.5)
self.speed = rospy.get_param("~start_speed", 0.1)
self.angular_speed = rospy.get_param("~start_angular_speed", 0.5)
self.linear_increment = rospy.get_param("~linear_increment", 0.05)
self.angular_increment = rospy.get_param("~angular_increment", 0.4)
# We don't have to run the script very fast
self.rate = rospy.get_param("~rate", 5)
r = rospy.Rate(self.rate)
# A flag to determine whether or not voice control is paused
self.paused = False
# Initialize the Twist message we will publish.
self.cmd_vel = Twist()
# Publish the Twist message to the cmd_vel topic
self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist, queue_size=5)
# Subscribe to the /recognizer/output topic to receive voice commands.
rospy.Subscriber('/recognizer/output', String, self.speech_callback)
# A mapping from keywords or phrases to commands
self.keywords_to_command = {'stop': ['stop', 'halt', 'abort', 'kill', 'panic', 'off', 'freeze', 'shut down', 'turn off', 'help', 'help me'],
'slower': ['slow down', 'slower'],
'faster': ['speed up', 'faster'],
'forward': ['forward', 'ahead', 'straight'],
'backward': ['back', 'backward', 'back up'],
'rotate left': ['rotate left'],
'rotate right': ['rotate right'],
'turn left': ['turn left'],
'turn right': ['turn right'],
'quarter': ['quarter speed'],
'half': ['half speed'],
'full': ['full speed'],
'pause': ['pause speech'],
'continue': ['continue speech']}
rospy.loginfo("Ready to receive voice commands")
# We have to keep publishing the cmd_vel message if we want the robot to keep moving.
while not rospy.is_shutdown():
self.cmd_vel_pub.publish(self.cmd_vel)
r.sleep()
def get_command(self, data):
# Attempt to match the recognized word or phrase to the
# keywords_to_command dictionary and return the appropriate
# command
for (command, keywords) in self.keywords_to_command.iteritems():
for word in keywords:
if data.find(word) > -1:
return command
def speech_callback(self, msg):
# Get the motion command from the recognized phrase
command = self.get_command(msg.data)
# Log the command to the screen
rospy.loginfo("Command: " + str(command))
# If the user has asked to pause/continue voice control,
# set the flag accordingly
if command == 'pause':
self.paused = True
elif command == 'continue':
self.paused = False
# If voice control is paused, simply return without
# performing any action
if self.paused:
return
# The list of if-then statements should be fairly
# self-explanatory
if command == 'forward':
self.cmd_vel.linear.x = self.speed
self.cmd_vel.angular.z = 0
elif command == 'rotate left':
self.cmd_vel.linear.x = 0
self.cmd_vel.angular.z = self.angular_speed
elif command == 'rotate right':
self.cmd_vel.linear.x = 0
self.cmd_vel.angular.z = -self.angular_speed
elif command == 'turn left':
if self.cmd_vel.linear.x != 0:
self.cmd_vel.angular.z += self.angular_increment
else:
self.cmd_vel.angular.z = self.angular_speed
elif command == 'turn right':
if self.cmd_vel.linear.x != 0:
self.cmd_vel.angular.z -= self.angular_increment
else:
self.cmd_vel.angular.z = -self.angular_speed
elif command == 'backward':
self.cmd_vel.linear.x = -self.speed
self.cmd_vel.angular.z = 0
elif command == 'stop':
# Stop the robot! Publish a Twist message consisting of all zeros.
self.cmd_vel = Twist()
elif command == 'faster':
self.speed += self.linear_increment
self.angular_speed += self.angular_increment
if self.cmd_vel.linear.x != 0:
self.cmd_vel.linear.x += copysign(self.linear_increment, self.cmd_vel.linear.x)
if self.cmd_vel.angular.z != 0:
self.cmd_vel.angular.z += copysign(self.angular_increment, self.cmd_vel.angular.z)
elif command == 'slower':
self.speed -= self.linear_increment
self.angular_speed -= self.angular_increment
if self.cmd_vel.linear.x != 0:
self.cmd_vel.linear.x -= copysign(self.linear_increment, self.cmd_vel.linear.x)
if self.cmd_vel.angular.z != 0:
self.cmd_vel.angular.z -= copysign(self.angular_increment, self.cmd_vel.angular.z)
elif command in ['quarter', 'half', 'full']:
if command == 'quarter':
self.speed = copysign(self.max_speed / 4, self.speed)
elif command == 'half':
self.speed = copysign(self.max_speed / 2, self.speed)
elif command == 'full':
self.speed = copysign(self.max_speed, self.speed)
if self.cmd_vel.linear.x != 0:
self.cmd_vel.linear.x = copysign(self.speed, self.cmd_vel.linear.x)
if self.cmd_vel.angular.z != 0:
self.cmd_vel.angular.z = copysign(self.angular_speed, self.cmd_vel.angular.z)
else:
return
self.cmd_vel.linear.x = min(self.max_speed, max(-self.max_speed, self.cmd_vel.linear.x))
self.cmd_vel.angular.z = min(self.max_angular_speed, max(-self.max_angular_speed, self.cmd_vel.angular.z))
def cleanup(self):
# When shutting down be sure to stop the robot!
twist = Twist()
self.cmd_vel_pub.publish(twist)
rospy.sleep(1)
if __name__=="__main__":
try:
VoiceNav()
rospy.spin()
except rospy.ROSInterruptException:
rospy.loginfo("Voice navigation terminated.")
2、仿真测试
和前面一样,我们在rviz中进行仿真测试。
首先是运行一个机器人模型:
1
|
$ roslaunch rbx1_bringup fake_turtlebot.launch
|
然后打开rviz:
1
|
$ rosrun rviz rviz
-
d `rospack find rbx1_nav`
/
sim.rviz
|
再打开语音识别的节点:
$ roslaunch rbx1_speech voice_nav_commands.laun
最后就是机器人的控制节点了:
$ roslaunch rbx1_speech turtlebot_voice_nav.launch
OK,打开上面这一堆的节点之后,就可以开始了。可以使用的命令如下:
可能是英语太差吧,很难识别;
四、播放语音
运行下面的命令:
1
2
|
$ rosrun sound_play soundplay_node.py
$ rosrun sound_play say.py
"Greetings Humans. Take me to your leader."
|
有没有听见声音!ROS通过识别我们输入的文本,让机器人读了出来。发出这个声音的人叫做kal_diphone,如果不喜欢,我们也可以换一个人来读:
1
2
|
$ sudo apt
-
get install festvox
-
don
$ rosrun sound_play say.py
"Welcome to the future"
voice_don_diphone
|
哈哈,这回换了一个人吧,好吧,这不也是我们的重点。
在rbx1_speech/nodes文件夹中有一个让机器人说话的节点talkback.py:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
#!/usr/bin/env python
"""
talkback.py - Version 1.1 2013-12-20
Use the sound_play client to say back what is heard by the pocketsphinx recognizer.
Created for the Pi Robot Project: http://www.pirobot.org
Copyright (c) 2012 Patrick Goebel. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.5
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details at:
http://www.gnu.org/licenses/gpl.htmlPoint
"""
import
rospy
from
std_msgs.msg
import
String
from
sound_play.libsoundplay
import
SoundClient
import
sys
class
TalkBack:
def
__init__(
self
, script_path):
rospy.init_node(
'talkback'
)
rospy.on_shutdown(
self
.cleanup)
# Set the default TTS voice to use
self
.voice
=
rospy.get_param(
"~voice"
,
"voice_don_diphone"
)
# Set the wave file path if used
self
.wavepath
=
rospy.get_param(
"~wavepath"
, script_path
+
"/../sounds"
)
# Create the sound client object
self
.soundhandle
=
SoundClient()
# Wait a moment to let the client connect to the
# sound_play server
rospy.sleep(
1
)
# Make sure any lingering sound_play processes are stopped.
self
.soundhandle.stopAll()
# Announce that we are ready for input
self
.soundhandle.playWave(
self
.wavepath
+
"/R2D2a.wav"
)
rospy.sleep(
1
)
self
.soundhandle.say(
"Ready"
,
self
.voice)
rospy.loginfo(
"Say one of the navigation commands..."
)
# Subscribe to the recognizer output and set the callback function
rospy.Subscriber(
'/recognizer/output'
, String,
self
.talkback)
def
talkback(
self
, msg):
# Print the recognized words on the screen
rospy.loginfo(msg.data)
# Speak the recognized words in the selected voice
self
.soundhandle.say(msg.data,
self
.voice)
# Uncomment to play one of the built-in sounds
#rospy.sleep(2)
#self.soundhandle.play(5)
# Uncomment to play a wave file
#rospy.sleep(2)
#self.soundhandle.playWave(self.wavepath + "/R2D2a.wav")
def
cleanup(
self
):
self
.soundhandle.stopAll()
rospy.loginfo(
"Shutting down talkback node..."
)
if
__name__
=
=
"__main__"
:
try
:
TalkBack(sys.path[
0
])
rospy.spin()
except
rospy.ROSInterruptException:
rospy.loginfo(
"Talkback node terminated."
)
|
我们来运行看一下效果:
1
|
$ roslaunch rbx1_speech talkback.launch
|
更多推荐
所有评论(0)