ROS语音识别
ROS语音识别一、语音识别包1、安装 安装很简单,直接使用ubuntu命令即可,首先安装依赖库:123$ sudo apt-get install gstreamer0.10-pocketsphinx$ sudo apt-get install ros-indigo-audio-common
一、语音识别包
1、安装
1
2
3
|
$ sudo apt-get install gstreamer0.10-pocketsphinx
$ sudo apt-get install ros-indigo-audio-common
//我安装的是indigo版本的
$ sudo apt-get install libasound2
|
$ sudo apt-get install gstreamer0.10-gconf
然后来安装ROS包:
sudo apt-get install ros-indigo-pocketsphinx
2. 安装完成后我们就可以运行测试了。
首先,插入你的麦克风设备,然后在系统设置里测试麦克风是否有语音输入。
然后,运行包中的测试程序:
1
|
$ roslaunch pocketsphinx robocup.launch
|
我们也可以直接看ROS最后发布的结果消息:
1
|
$ rostopic echo /recognizer/output
|
二、语音库
1、查看语音库
1
2
|
$ roscd rbx1_speech/config
$ more nav_commands.txt
|
这就是需要添加的文本,我们也可以修改其中的某些文本,改成自己需要的。
然后我们要把这个文件在线生成语音信息和库文件,这一步需要登陆网站http://www.speech.cs.cmu.edu/tools/lmtool-new.html,根据网站的提示上传文件,然后在线编译生成库文件。
上传nav_commands.txt文件
把下载的文件都解压放在rbx1_speech包的config文件夹下。我们可以给这些文件改个名字:
1
|
rename
-f
's/593/nav_commands/'
*
|
在rbx1_speech/launch文件夹下看看voice_nav_commands.launch这个文件:
1
2
3
4
5
6
7
8
|
<launch>
<node name=
"recognizer"
pkg=
"pocketsphinx"
type=
"recognizer.py"
output=
"screen"
>
<param name=
"lm"
value=
"$(find rbx1_speech)/config/nav_commands.lm"
/>
<param name=
"dict"
value=
"$(find rbx1_speech)/config/nav_commands.dic"
/>
</node>
</launch>
|
可以看到,这个launch文件在运行recognizer.py节点的时候使用了我们生成的语音识别库和文件参数,这样就可以实用我们自己的语音库来进行语音识别了。
通过之前的命令来测试一下效果如何吧:
$ roslaunch rbx1_speech voice_nav_commands.launch
$ rostopic echo /recognizer/output
三、语音控制
1、机器人控制节点
在pocketsphinx包中本身有一个语音控制发布Twist消息的例程voice_cmd_vel.py,rbx1_speech包对其进行了一些简化修改,在nodes文件夹里可以查看voice_nav.py文件:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
#!/usr/bin/env python
"""
voice_nav.py - Version 1.1 2013-12-20
Allows controlling a mobile base using simple speech commands.
Based on the voice_cmd_vel.py script by Michael Ferguson in
the pocketsphinx ROS package.
See http://www.ros.org/wiki/pocketsphinx
"""
import
rospy
from
geometry_msgs.msg
import
Twist
from
std_msgs.msg
import
String
from
math
import
copysign
class
VoiceNav:
def
__init__(
self
):
rospy.init_node(
'voice_nav'
)
rospy.on_shutdown(
self
.cleanup)
# Set a number of parameters affecting the robot's speed
self
.max_speed
=
rospy.get_param(
"~max_speed"
,
0.4
)
self
.max_angular_speed
=
rospy.get_param(
"~max_angular_speed"
,
1.5
)
self
.speed
=
rospy.get_param(
"~start_speed"
,
0.1
)
self
.angular_speed
=
rospy.get_param(
"~start_angular_speed"
,
0.5
)
self
.linear_increment
=
rospy.get_param(
"~linear_increment"
,
0.05
)
self
.angular_increment
=
rospy.get_param(
"~angular_increment"
,
0.4
)
# We don't have to run the script very fast
self
.rate
=
rospy.get_param(
"~rate"
,
5
)
r
=
rospy.Rate(
self
.rate)
# A flag to determine whether or not voice control is paused
self
.paused
=
False
# Initialize the Twist message we will publish.
self
.cmd_vel
=
Twist()
# Publish the Twist message to the cmd_vel topic
self
.cmd_vel_pub
=
rospy.Publisher(
'cmd_vel'
, Twist, queue_size
=
5
)
# Subscribe to the /recognizer/output topic to receive voice commands.
rospy.Subscriber(
'/recognizer/output'
, String,
self
.speech_callback)
# A mapping from keywords or phrases to commands
self
.keywords_to_command
=
{
'stop'
: [
'stop'
,
'halt'
,
'abort'
,
'kill'
,
'panic'
,
'off'
,
'freeze'
,
'shut down'
,
'turn off'
,
'help'
,
'help me'
],
'slower'
: [
'slow down'
,
'slower'
],
'faster'
: [
'speed up'
,
'faster'
],
'forward'
: [
'forward'
,
'ahead'
,
'straight'
],
'backward'
: [
'back'
,
'backward'
,
'back up'
],
'rotate left'
: [
'rotate left'
],
'rotate right'
: [
'rotate right'
],
'turn left'
: [
'turn left'
],
'turn right'
: [
'turn right'
],
'quarter'
: [
'quarter speed'
],
'half'
: [
'half speed'
],
'full'
: [
'full speed'
],
'pause'
: [
'pause speech'
],
'continue'
: [
'continue speech'
]}
rospy.loginfo(
"Ready to receive voice commands"
)
# We have to keep publishing the cmd_vel message if we want the robot to keep moving.
while
not
rospy.is_shutdown():
self
.cmd_vel_pub.publish(
self
.cmd_vel)
r.sleep()
def
get_command(
self
, data):
# Attempt to match the recognized word or phrase to the
# keywords_to_command dictionary and return the appropriate
# command
for
(command, keywords)
in
self
.keywords_to_command.iteritems():
for
word
in
keywords:
if
data.find(word) >
-
1
:
return
command
def
speech_callback(
self
, msg):
# Get the motion command from the recognized phrase
command
=
self
.get_command(msg.data)
# Log the command to the screen
rospy.loginfo(
"Command: "
+
str
(command))
# If the user has asked to pause/continue voice control,
# set the flag accordingly
if
command
=
=
'pause'
:
self
.paused
=
True
elif
command
=
=
'continue'
:
self
.paused
=
False
# If voice control is paused, simply return without
# performing any action
if
self
.paused:
return
# The list of if-then statements should be fairly
# self-explanatory
if
command
=
=
'forward'
:
self
.cmd_vel.linear.x
=
self
.speed
self
.cmd_vel.angular.z
=
0
elif
command
=
=
'rotate left'
:
self
.cmd_vel.linear.x
=
0
self
.cmd_vel.angular.z
=
self
.angular_speed
elif
command
=
=
'rotate right'
:
self
.cmd_vel.linear.x
=
0
self
.cmd_vel.angular.z
=
-
self
.angular_speed
elif
command
=
=
'turn left'
:
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.angular.z
+
=
self
.angular_increment
else
:
self
.cmd_vel.angular.z
=
self
.angular_speed
elif
command
=
=
'turn right'
:
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.angular.z
-
=
self
.angular_increment
else
:
self
.cmd_vel.angular.z
=
-
self
.angular_speed
elif
command
=
=
'backward'
:
self
.cmd_vel.linear.x
=
-
self
.speed
self
.cmd_vel.angular.z
=
0
elif
command
=
=
'stop'
:
# Stop the robot! Publish a Twist message consisting of all zeros.
self
.cmd_vel
=
Twist()
elif
command
=
=
'faster'
:
self
.speed
+
=
self
.linear_increment
self
.angular_speed
+
=
self
.angular_increment
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
+
=
copysign(
self
.linear_increment,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
+
=
copysign(
self
.angular_increment,
self
.cmd_vel.angular.z)
elif
command
=
=
'slower'
:
self
.speed
-
=
self
.linear_increment
self
.angular_speed
-
=
self
.angular_increment
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
-
=
copysign(
self
.linear_increment,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
-
=
copysign(
self
.angular_increment,
self
.cmd_vel.angular.z)
elif
command
in
[
'quarter'
,
'half'
,
'full'
]:
if
command
=
=
'quarter'
:
self
.speed
=
copysign(
self
.max_speed
/
4
,
self
.speed)
elif
command
=
=
'half'
:
self
.speed
=
copysign(
self
.max_speed
/
2
,
self
.speed)
elif
command
=
=
'full'
:
self
.speed
=
copysign(
self
.max_speed,
self
.speed)
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
=
copysign(
self
.speed,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
=
copysign(
self
.angular_speed,
self
.cmd_vel.angular.z)
else
:
return
self
.cmd_vel.linear.x
=
min
(
self
.max_speed,
max
(
-
self
.max_speed,
self
.cmd_vel.linear.x))
self
.cmd_vel.angular.z
=
min
(
self
.max_angular_speed,
max
(
-
self
.max_angular_speed,
self
.cmd_vel.angular.z))
def
cleanup(
self
):
# When shutting down be sure to stop the robot!
twist
=
Twist()
self
.cmd_vel_pub.publish(twist)
rospy.sleep(
1
)
if
__name__
=
=
"__main__"
:
try
:
VoiceNav()
rospy.spin()
except
rospy.ROSInterruptException:
rospy.loginfo(
"Voice navigation terminated."
)
|
2、仿真测试
和前面一样,我们在rviz中进行仿真测试。
首先是运行一个机器人模型:
1
|
$ roslaunch rbx1_bringup fake_turtlebot.launch
|
然后打开rviz:
1
|
$ rosrun rviz rviz
-
d `rospack find rbx1_nav`
/
sim.rviz
|
最后就是机器人的控制节点了:
1
|
$ roslaunch rbx1_speech voice_nav_commands.launch 再打开语音识别的节点:$ roslaunch rbx1_speech turtlebot_voice_nav.launch 最后就是机器人的控制节点了:
|
OK,打开上面这一堆的节点之后,就可以开始了。可以使用的命令如下:
可能是英语太差吧,很难识别;
四、播放语音
运行下面的命令:
1
2
|
$ rosrun sound_play soundplay_node.py
$ rosrun sound_play say.py
"Greetings Humans. Take me to your leader."
|
有没有听见声音!ROS通过识别我们输入的文本,让机器人读了出来。发出这个声音的人叫做kal_diphone,如果不喜欢,我们也可以换一个人来读:
1
2
|
$ sudo apt
-
get install festvox
-
don
$ rosrun sound_play say.py
"Welcome to the future"
voice_don_diphone
|
哈哈,这回换了一个人吧,好吧,这不也是我们的重点。
在rbx1_speech/nodes文件夹中有一个让机器人说话的节点talkback.py:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
#!/usr/bin/env python
"""
talkback.py - Version 1.1 2013-12-20
Use the sound_play client to say back what is heard by the pocketsphinx recognizer.
Created for the Pi Robot Project: http://www.pirobot.org
Copyright (c) 2012 Patrick Goebel. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.5
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details at:
http://www.gnu.org/licenses/gpl.htmlPoint
"""
import
rospy
from
std_msgs.msg
import
String
from
sound_play.libsoundplay
import
SoundClient
import
sys
class
TalkBack:
def
__init__(
self
, script_path):
rospy.init_node(
'talkback'
)
rospy.on_shutdown(
self
.cleanup)
# Set the default TTS voice to use
self
.voice
=
rospy.get_param(
"~voice"
,
"voice_don_diphone"
)
# Set the wave file path if used
self
.wavepath
=
rospy.get_param(
"~wavepath"
, script_path
+
"/../sounds"
)
# Create the sound client object
self
.soundhandle
=
SoundClient()
# Wait a moment to let the client connect to the
# sound_play server
rospy.sleep(
1
)
# Make sure any lingering sound_play processes are stopped.
self
.soundhandle.stopAll()
# Announce that we are ready for input
self
.soundhandle.playWave(
self
.wavepath
+
"/R2D2a.wav"
)
rospy.sleep(
1
)
self
.soundhandle.say(
"Ready"
,
self
.voice)
rospy.loginfo(
"Say one of the navigation commands..."
)
# Subscribe to the recognizer output and set the callback function
rospy.Subscriber(
'/recognizer/output'
, String,
self
.talkback)
def
talkback(
self
, msg):
# Print the recognized words on the screen
rospy.loginfo(msg.data)
# Speak the recognized words in the selected voice
self
.soundhandle.say(msg.data,
self
.voice)
# Uncomment to play one of the built-in sounds
#rospy.sleep(2)
#self.soundhandle.play(5)
# Uncomment to play a wave file
#rospy.sleep(2)
#self.soundhandle.playWave(self.wavepath + "/R2D2a.wav")
def
cleanup(
self
):
self
.soundhandle.stopAll()
rospy.loginfo(
"Shutting down talkback node..."
)
if
__name__
=
=
"__main__"
:
try
:
TalkBack(sys.path[
0
])
rospy.spin()
except
rospy.ROSInterruptException:
rospy.loginfo(
"Talkback node terminated."
)
|
我们来运行看一下效果:
1
|
$ roslaunch rbx1_speech talkback.launch
|
更多推荐
所有评论(0)