Asterisk - The Open Source Telephony Project GIT-master-7e7a603
res_speech.c
Go to the documentation of this file.
1/*
2 * Asterisk -- An open source telephony toolkit.
3 *
4 * Copyright (C) 2006, Digium, Inc.
5 *
6 * Joshua Colp <jcolp@digium.com>
7 *
8 * See http://www.asterisk.org for more information about
9 * the Asterisk project. Please do not directly contact
10 * any of the maintainers of this project for assistance;
11 * the project provides a web site, mailing lists and IRC
12 * channels for your use.
13 *
14 * This program is free software, distributed under the terms of
15 * the GNU General Public License Version 2. See the LICENSE file
16 * at the top of the source tree.
17 */
18
19/*! \file
20 *
21 * \brief Generic Speech Recognition API
22 *
23 * \author Joshua Colp <jcolp@digium.com>
24 */
25
26/*** MODULEINFO
27 <support_level>core</support_level>
28 ***/
29
30#include "asterisk.h"
31
32#include "asterisk/channel.h"
33#include "asterisk/module.h"
34#include "asterisk/lock.h"
36#include "asterisk/cli.h"
37#include "asterisk/term.h"
38#include "asterisk/speech.h"
40#include "asterisk/translate.h"
41
44
45/*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
46struct ast_speech_engine *ast_speech_find_engine(const char *engine_name)
47{
48 struct ast_speech_engine *engine = NULL;
49
50 /* If no name is specified -- use the default engine */
51 if (ast_strlen_zero(engine_name))
52 return default_engine;
53
55 AST_RWLIST_TRAVERSE(&engines, engine, list) {
56 if (!strcasecmp(engine->name, engine_name)) {
57 break;
58 }
59 }
61
62 return engine;
63}
64
65/*! \brief Activate a loaded (either local or global) grammar */
66int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
67{
68 return (speech->engine->activate ? speech->engine->activate(speech, grammar_name) : -1);
69}
70
71/*! \brief Deactivate a loaded grammar on a speech structure */
72int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
73{
74 return (speech->engine->deactivate ? speech->engine->deactivate(speech, grammar_name) : -1);
75}
76
77/*! \brief Load a local grammar on a speech structure */
78int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
79{
80 return (speech->engine->load ? speech->engine->load(speech, grammar_name, grammar) : -1);
81}
82
83/*! \brief Unload a local grammar from a speech structure */
84int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
85{
86 return (speech->engine->unload ? speech->engine->unload(speech, grammar_name) : -1);
87}
88
89/*! \brief Return the results of a recognition from the speech structure */
91{
92 return (speech->engine->get ? speech->engine->get(speech) : NULL);
93}
94
95/*! \brief Free a list of results */
97{
98 struct ast_speech_result *current_result = result, *prev_result = NULL;
99 int res = 0;
100
101 while (current_result != NULL) {
102 prev_result = current_result;
103 /* Deallocate what we can */
104 if (current_result->text != NULL) {
105 ast_free(current_result->text);
106 current_result->text = NULL;
107 }
108 if (current_result->grammar != NULL) {
109 ast_free(current_result->grammar);
110 current_result->grammar = NULL;
111 }
112 /* Move on and then free ourselves */
113 current_result = AST_LIST_NEXT(current_result, list);
114 ast_free(prev_result);
115 prev_result = NULL;
116 }
117
118 return res;
119}
120
121/*! \brief Start speech recognition on a speech structure */
122void ast_speech_start(struct ast_speech *speech)
123{
124
125 /* Clear any flags that may affect things */
129
130 /* If results are on the structure, free them since we are starting again */
131 if (speech->results) {
133 speech->results = NULL;
134 }
135
136 /* If the engine needs to start stuff up, do it */
137 if (speech->engine->start)
138 speech->engine->start(speech);
139
140 return;
141}
142
143/*! \brief Write in signed linear audio to be recognized */
144int ast_speech_write(struct ast_speech *speech, void *data, int len)
145{
146 /* Make sure the speech engine is ready to accept audio */
147 if (speech->state != AST_SPEECH_STATE_READY)
148 return -1;
149
150 return speech->engine->write(speech, data, len);
151}
152
153/*! \brief Signal to the engine that DTMF was received */
154int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
155{
156 int res = 0;
157
158 if (speech->state != AST_SPEECH_STATE_READY)
159 return -1;
160
161 if (speech->engine->dtmf != NULL) {
162 res = speech->engine->dtmf(speech, dtmf);
163 }
164
165 return res;
166}
167
168/*! \brief Change an engine specific attribute */
169int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
170{
171 return (speech->engine->change ? speech->engine->change(speech, name, value) : -1);
172}
173
174/*! \brief Get an engine specific attribute */
175int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
176{
177 return (speech->engine->get_setting ? speech->engine->get_setting(speech, name, buf, len) : -1);
178}
179
180/*! \brief Create a new speech structure using the engine specified */
181struct ast_speech *ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
182{
183 struct ast_speech_engine *engine = NULL;
184 struct ast_speech *new_speech = NULL;
185 struct ast_format_cap *joint;
186 RAII_VAR(struct ast_format *, best, NULL, ao2_cleanup);
187 RAII_VAR(struct ast_format *, best_translated, NULL, ao2_cleanup);
188
189 /* Try to find the speech recognition engine that was requested */
190 if (!(engine = ast_speech_find_engine(engine_name)))
191 return NULL;
192
194 if (!joint) {
195 return NULL;
196 }
197
198 ast_format_cap_get_compatible(engine->formats, cap, joint);
199 best = ast_format_cap_get_format(joint, 0);
200 ao2_ref(joint, -1);
201
202 if (!best) {
205 } else {
206 /*
207 * If there is no overlap and the engine does not support slin, find the best
208 * format to translate to and set that as the 'best' input format for the engine.
209 * API consumer is responsible for translating to this format.
210 * Safe to cast cap as ast_translator_best_choice does not modify the caps
211 */
212 if (ast_translator_best_choice(engine->formats, (struct ast_format_cap *)cap, &best, &best_translated)) {
213 /* No overlapping formats and no translatable formats */
214 return NULL;
215 }
216 }
217 }
218
219 /* Allocate our own speech structure, and try to allocate a structure from the engine too */
220 if (!(new_speech = ast_calloc(1, sizeof(*new_speech)))) {
221 return NULL;
222 }
223
224 /* Initialize the lock */
225 ast_mutex_init(&new_speech->lock);
226
227 /* Make sure no results are present */
228 new_speech->results = NULL;
229
230 /* Copy over our engine pointer */
231 new_speech->engine = engine;
232
233 /* Can't forget the format audio is going to be in */
234 new_speech->format = ao2_bump(best);
235
236 /* We are not ready to accept audio yet */
238
239 /* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
240 if (engine->create(new_speech, new_speech->format)) {
241 ast_mutex_destroy(&new_speech->lock);
242 ao2_ref(new_speech->format, -1);
243 ast_free(new_speech);
244 return NULL;
245 }
246
247 return new_speech;
248}
249
250/*! \brief Destroy a speech structure */
252{
253 int res = 0;
254
255 /* Call our engine so we are destroyed properly */
256 speech->engine->destroy(speech);
257
258 /* Deinitialize the lock */
259 ast_mutex_destroy(&speech->lock);
260
261 /* If results exist on the speech structure, destroy them */
262 if (speech->results)
264
265 /* If a processing sound is set - free the memory used by it */
266 if (speech->processing_sound)
267 ast_free(speech->processing_sound);
268
269 ao2_ref(speech->format, -1);
270
271 /* Aloha we are done */
272 ast_free(speech);
273
274 return res;
275}
276
277/*! \brief Change state of a speech structure */
279{
280 int res = 0;
281
282 switch (state) {
284 /* The engine heard audio, so they spoke */
286 default:
287 speech->state = state;
288 break;
289 }
290
291 return res;
292}
293
295{
296 switch (type) {
298 return "normal";
300 return "nbest";
301 default:
302 ast_assert(0);
303 return "unknown";
304 }
305}
306
307/*! \brief Change the type of results we want */
309{
310 speech->results_type = results_type;
311
312 return (speech->engine->change_results_type ? speech->engine->change_results_type(speech, results_type) : 0);
313}
314
315/*! \brief Register a speech recognition engine */
317{
318 int res = 0;
319
320 /* Confirm the engine meets the minimum API requirements */
321 if (!engine->create || !engine->write || !engine->destroy) {
322 ast_log(LOG_WARNING, "Speech recognition engine '%s' did not meet minimum API requirements.\n", engine->name);
323 return -1;
324 }
325
326 /* If an engine is already loaded with this name, error out */
327 if (ast_speech_find_engine(engine->name)) {
328 ast_log(LOG_WARNING, "Speech recognition engine '%s' already exists.\n", engine->name);
329 return -1;
330 }
331
332 ast_verb(5, "Registered speech recognition engine '%s'\n", engine->name);
333
334 /* Add to the engine linked list and make default if needed */
336 AST_RWLIST_INSERT_HEAD(&engines, engine, list);
337 if (!default_engine) {
338 default_engine = engine;
339 ast_verb(5, "Made '%s' the default speech recognition engine\n", engine->name);
340 }
342
343 return res;
344}
345
346/*! \brief Unregister a speech recognition engine */
347int ast_speech_unregister(const char *engine_name)
348{
349 return ast_speech_unregister2(engine_name) == NULL ? -1 : 0;
350}
351
352struct ast_speech_engine *ast_speech_unregister2(const char *engine_name)
353{
354 struct ast_speech_engine *engine = NULL;
355
356 if (ast_strlen_zero(engine_name)) {
357 return NULL;
358 }
359
362 if (!strcasecmp(engine->name, engine_name)) {
363 /* We have our engine... removed it */
365 /* If this was the default engine, we need to pick a new one */
366 if (engine == default_engine) {
368 }
369 ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine_name);
370 /* All went well */
371 break;
372 }
373 }
376
377 return engine;
378}
379
381 int (*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data,
382 void (*on_unregistered)(void *obj))
383{
384 struct ast_speech_engine *engine = NULL;
385
386 if (!should_unregister) {
387 return;
388 }
389
392 if (should_unregister(engine, data)) {
393 /* We have our engine... removed it */
395 /* If this was the default engine, we need to pick a new one */
396 if (engine == default_engine) {
398 }
399 ast_verb(5, "Unregistered speech recognition engine '%s'\n", engine->name);
400 /* All went well */
401 if (on_unregistered) {
402 on_unregistered(engine);
403 }
404 }
405 }
408}
409
410static int unload_module(void)
411{
412 /* We can not be unloaded */
413 return -1;
414}
415
416static int load_module(void)
417{
419}
420
422 .support_level = AST_MODULE_SUPPORT_CORE,
423 .load = load_module,
425 .load_pri = AST_MODPRI_APP_DEPEND - 1,
Asterisk main include file. File version handling, generic pbx functions.
#define ast_free(a)
Definition: astmm.h:180
#define ast_calloc(num, len)
A wrapper for calloc()
Definition: astmm.h:202
#define ast_log
Definition: astobj2.c:42
#define ao2_cleanup(obj)
Definition: astobj2.h:1934
#define ao2_ref(o, delta)
Reference/unreference an object and return the old refcount.
Definition: astobj2.h:459
#define ao2_bump(obj)
Bump refcount on an AO2 object by one, returning the object.
Definition: astobj2.h:480
enum cc_state state
Definition: ccss.c:393
static PGresult * result
Definition: cel_pgsql.c:84
static const char type[]
Definition: chan_ooh323.c:109
General Asterisk PBX channel definitions.
Standard Command Line Interface.
char buf[BUFSIZE]
Definition: eagi_proxy.c:66
@ AST_FORMAT_CMP_NOT_EQUAL
Definition: format.h:38
Media Format Cache API.
struct ast_format * ast_format_slin
Built-in cached signed linear 8kHz format.
Definition: format_cache.c:41
struct ast_format * ast_format_cap_get_format(const struct ast_format_cap *cap, int position)
Get the format at a specific index.
Definition: format_cap.c:400
int ast_format_cap_get_compatible(const struct ast_format_cap *cap1, const struct ast_format_cap *cap2, struct ast_format_cap *result)
Find the compatible formats between two capabilities structures.
Definition: format_cap.c:628
enum ast_format_cmp_res ast_format_cap_iscompatible_format(const struct ast_format_cap *cap, const struct ast_format *format)
Find if ast_format is within the capabilities of the ast_format_cap object.
Definition: format_cap.c:581
@ AST_FORMAT_CAP_FLAG_DEFAULT
Definition: format_cap.h:38
#define ast_format_cap_alloc(flags)
Allocate a new ast_format_cap structure.
Definition: format_cap.h:49
static const char name[]
Definition: format_mp3.c:68
static int len(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t buflen)
#define ast_verb(level,...)
#define LOG_WARNING
A set of macros to manage forward-linked lists.
#define AST_RWLIST_REMOVE_CURRENT
Definition: linkedlists.h:570
#define AST_RWLIST_RDLOCK(head)
Read locks a list.
Definition: linkedlists.h:78
#define AST_RWLIST_TRAVERSE_SAFE_BEGIN
Definition: linkedlists.h:545
#define AST_RWLIST_WRLOCK(head)
Write locks a list.
Definition: linkedlists.h:52
#define AST_RWLIST_UNLOCK(head)
Attempts to unlock a read/write based list.
Definition: linkedlists.h:151
#define AST_RWLIST_HEAD_STATIC(name, type)
Defines a structure to be used to hold a read/write list of specified type, statically initialized.
Definition: linkedlists.h:333
#define AST_RWLIST_FIRST
Definition: linkedlists.h:423
#define AST_RWLIST_TRAVERSE_SAFE_END
Definition: linkedlists.h:617
#define AST_RWLIST_TRAVERSE
Definition: linkedlists.h:494
#define AST_RWLIST_INSERT_HEAD
Definition: linkedlists.h:718
#define AST_LIST_NEXT(elm, field)
Returns the next entry in the list after the given entry.
Definition: linkedlists.h:439
Asterisk locking-related definitions:
#define ast_mutex_init(pmutex)
Definition: lock.h:186
#define ast_mutex_destroy(a)
Definition: lock.h:188
Asterisk module definitions.
@ AST_MODFLAG_LOAD_ORDER
Definition: module.h:317
@ AST_MODFLAG_GLOBAL_SYMBOLS
Definition: module.h:316
#define AST_MODULE_INFO(keystr, flags_to_set, desc, fields...)
Definition: module.h:543
@ AST_MODPRI_APP_DEPEND
Definition: module.h:328
@ AST_MODULE_SUPPORT_CORE
Definition: module.h:121
#define ASTERISK_GPL_KEY
The text the key() function should return.
Definition: module.h:46
@ AST_MODULE_LOAD_SUCCESS
Definition: module.h:70
struct ast_speech_engine * ast_speech_find_engine(const char *engine_name)
Find a speech recognition engine of specified name, if NULL then use the default one.
Definition: res_speech.c:46
int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
Signal to the engine that DTMF was received.
Definition: res_speech.c:154
int ast_speech_results_free(struct ast_speech_result *result)
Free a list of results.
Definition: res_speech.c:96
int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
Deactivate a loaded grammar on a speech structure.
Definition: res_speech.c:72
const char * ast_speech_results_type_to_string(enum ast_speech_results_type type)
Convert a speech results type to a string.
Definition: res_speech.c:294
int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
Unload a local grammar from a speech structure.
Definition: res_speech.c:84
void ast_speech_start(struct ast_speech *speech)
Start speech recognition on a speech structure.
Definition: res_speech.c:122
int ast_speech_unregister(const char *engine_name)
Unregister a speech recognition engine.
Definition: res_speech.c:347
struct ast_speech_engine * ast_speech_unregister2(const char *engine_name)
Unregister a speech recognition engine.
Definition: res_speech.c:352
int ast_speech_write(struct ast_speech *speech, void *data, int len)
Write in signed linear audio to be recognized.
Definition: res_speech.c:144
int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
Change an engine specific attribute.
Definition: res_speech.c:169
void ast_speech_unregister_engines(int(*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data, void(*on_unregistered)(void *obj))
Unregister all speech recognition engines told to by callback.
Definition: res_speech.c:380
int ast_speech_destroy(struct ast_speech *speech)
Destroy a speech structure.
Definition: res_speech.c:251
static struct ast_speech_engine * default_engine
Definition: res_speech.c:43
int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
Change the type of results we want.
Definition: res_speech.c:308
int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
Load a local grammar on a speech structure.
Definition: res_speech.c:78
int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
Activate a loaded (either local or global) grammar.
Definition: res_speech.c:66
struct ast_speech * ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
Create a new speech structure using the engine specified.
Definition: res_speech.c:181
int ast_speech_register(struct ast_speech_engine *engine)
Register a speech recognition engine.
Definition: res_speech.c:316
int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
Get an engine specific attribute.
Definition: res_speech.c:175
int ast_speech_change_state(struct ast_speech *speech, int state)
Change state of a speech structure.
Definition: res_speech.c:278
static int load_module(void)
Definition: res_speech.c:416
static int unload_module(void)
Definition: res_speech.c:410
struct ast_speech_result * ast_speech_results_get(struct ast_speech *speech)
Return the results of a recognition from the speech structure.
Definition: res_speech.c:90
static int should_unregister(const struct ast_speech_engine *engine, void *data)
#define NULL
Definition: resample.c:96
Generic Speech Recognition API.
@ AST_SPEECH_STATE_READY
Definition: speech.h:40
@ AST_SPEECH_STATE_NOT_READY
Definition: speech.h:39
@ AST_SPEECH_STATE_WAIT
Definition: speech.h:41
ast_speech_results_type
Definition: speech.h:45
@ AST_SPEECH_RESULTS_TYPE_NBEST
Definition: speech.h:47
@ AST_SPEECH_RESULTS_TYPE_NORMAL
Definition: speech.h:46
@ AST_SPEECH_QUIET
Definition: speech.h:32
@ AST_SPEECH_HAVE_RESULTS
Definition: speech.h:34
@ AST_SPEECH_SPOKE
Definition: speech.h:33
static force_inline int attribute_pure ast_strlen_zero(const char *s)
Definition: strings.h:65
Format capabilities structure, holds formats + preference order + etc.
Definition: format_cap.c:54
Definition of a media format.
Definition: format.c:43
struct ast_format_cap * formats
Definition: speech.h:106
int(* deactivate)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:90
int(* start)(struct ast_speech *speech)
Definition: speech.h:96
int(* change_results_type)(struct ast_speech *speech, enum ast_speech_results_type results_type)
Definition: speech.h:102
struct ast_speech_result *(* get)(struct ast_speech *speech)
Definition: speech.h:104
int(* get_setting)(struct ast_speech *speech, const char *name, char *buf, size_t len)
Definition: speech.h:100
int(* destroy)(struct ast_speech *speech)
Definition: speech.h:82
int(* dtmf)(struct ast_speech *speech, const char *dtmf)
Definition: speech.h:94
int(* load)(struct ast_speech *speech, const char *grammar_name, const char *grammar)
Definition: speech.h:84
int(* change)(struct ast_speech *speech, const char *name, const char *value)
Definition: speech.h:98
int(* activate)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:88
char * name
Definition: speech.h:78
int(* create)(struct ast_speech *speech, struct ast_format *format)
Definition: speech.h:80
int(* write)(struct ast_speech *speech, void *data, int len)
Definition: speech.h:92
int(* unload)(struct ast_speech *speech, const char *grammar_name)
Definition: speech.h:86
char * grammar
Definition: speech.h:119
char * processing_sound
Definition: speech.h:60
int state
Definition: speech.h:62
struct ast_format * format
Definition: speech.h:64
struct ast_speech_engine * engine
Definition: speech.h:72
ast_mutex_t lock
Definition: speech.h:56
enum ast_speech_results_type results_type
Definition: speech.h:70
struct ast_speech_result * results
Definition: speech.h:68
int value
Definition: syslog.c:37
Handy terminal functions for vt* terms.
Support for translation of data formats. translate.c.
int ast_translator_best_choice(struct ast_format_cap *dst_cap, struct ast_format_cap *src_cap, struct ast_format **dst_fmt_out, struct ast_format **src_fmt_out)
Chooses the best translation path.
Definition: translate.c:1402
#define RAII_VAR(vartype, varname, initval, dtor)
Declare a variable that will call a destructor function when it goes out of scope.
Definition: utils.h:941
#define ast_assert(a)
Definition: utils.h:739
#define ast_clear_flag(p, flag)
Definition: utils.h:77
#define ast_set_flag(p, flag)
Definition: utils.h:70