s3_decode.h File Reference
header for live mode decoding API
More...
#include <cmd_ln.h>
#include <fe.h>
#include "s3types.h"
#include "sphinx3_export.h"
#include "kb.h"
#include "kbcore.h"
#include "dag.h"
#include "search.h"
Go to the source code of this file.
Classes |
struct | s3_decode_t |
Defines |
#define | S3_DECODE_SUCCESS 0 |
#define | S3_DECODE_ERROR_OUT_OF_MEMORY -0x01 |
#define | S3_DECODE_ERROR_NULL_POINTER -0x02 |
#define | S3_DECODE_ERROR_INVALID_STATE -0x04 |
#define | S3_DECODE_ERROR_INTERNAL -0x08 |
#define | S3_DECODE_STATE_IDLE 0 |
#define | S3_DECODE_STATE_DECODING 1 |
#define | S3_DECODE_STATE_FINISHED 2 |
Functions |
S3DECODER_EXPORT int | s3_decode_init (s3_decode_t *_decode, cmd_ln_t *_config) |
S3DECODER_EXPORT void | s3_decode_close (s3_decode_t *_decode) |
S3DECODER_EXPORT int | s3_decode_begin_utt (s3_decode_t *_decode, char *_uttid) |
S3DECODER_EXPORT void | s3_decode_end_utt (s3_decode_t *_decode) |
S3DECODER_EXPORT int | s3_decode_process (s3_decode_t *_decode, float32 **_frames, int32 _num_frames) |
S3DECODER_EXPORT int | s3_decode_hypothesis (s3_decode_t *_decode, char **_uttid, char **_hyp_str, hyp_t ***_hyp_segs) |
S3DECODER_EXPORT dag_t * | s3_decode_word_graph (s3_decode_t *_decode) |
S3DECODER_EXPORT void | s3_decode_set_lm (s3_decode_t *_decode, const char *lmname) |
S3DECODER_EXPORT void | s3_decode_delete_lm (s3_decode_t *_decode, const char *lmname) |
S3DECODER_EXPORT void | s3_decode_read_lm (s3_decode_t *_decode, const char *lmfile, const char *lmname) |
Variables |
S3DECODER_EXPORT arg_t | S3_DECODE_ARG_DEFS [] |
Detailed Description
header for live mode decoding API
Define Documentation
#define S3_DECODE_ERROR_INTERNAL -0x08 |
#define S3_DECODE_ERROR_INVALID_STATE -0x04 |
#define S3_DECODE_ERROR_NULL_POINTER -0x02 |
#define S3_DECODE_ERROR_OUT_OF_MEMORY -0x01 |
#define S3_DECODE_STATE_DECODING 1 |
#define S3_DECODE_STATE_FINISHED 2 |
#define S3_DECODE_STATE_IDLE 0 |
#define S3_DECODE_SUCCESS 0 |
Function Documentation
S3DECODER_EXPORT int s3_decode_begin_utt |
( |
s3_decode_t * |
_decode, |
|
|
char * |
_uttid | |
|
) |
| | |
Marks the start of the current utterance. An utterance is a session of speech decoding that starts with a call to s3_decode_begin_utt() and ends with a call to s3_decode_end_utt(). In the duration of an utterance, speech data is processed with either s3_decode_process_raw() or s3_decode_process_ceps(}. Decoding results (hypothesis) can be retrieved any time after the start of an utterance using s3_decode_hypothesis(). All previous results will be clobbered at the start of a new utterance.
At the moment, there is an undocumented time limit to the length of an utterance. (Yitao: there is?)
- Parameters:
-
| _decode | Pointer to the decoder. |
| _uttid | Utterance ID string. If null, a somewhat unique utterance id will be generated instead. |
- Returns:
- 0 for success. -1 for failure.
- See also:
- s3_decode_end_utt
-
s3_decode_process
-
s3_decode_hypothesis
Referenced by main().
S3DECODER_EXPORT void s3_decode_close |
( |
s3_decode_t * |
_decode |
) |
|
Wraps up the Sphinx3 decoder. All internal modules are closed or unloaded. Internal variables are freed and/or set to a finishing state. This function should be called once the user is finished with the Sphinx3 decoder.
- Parameters:
-
| _decode | Pointer to the decoder. |
- See also:
- s3_decode_init
Referenced by main().
S3DECODER_EXPORT void s3_decode_delete_lm |
( |
s3_decode_t * |
_decode, |
|
|
const char * |
lmname | |
|
) |
| | |
S3DECODER_EXPORT void s3_decode_end_utt |
( |
s3_decode_t * |
_decode |
) |
|
Marks the end of the current utterance. The Sphinx3 decoder can no longer process speech data until the start of the next utterance. Any hypothesis retrieved prior to the end of the utterance is called a partial hypothesis. Any hypothesis retrieved after the end of the utterance is called the final hypothesis. See s3_decode_hypothesis() on how to retrieve hypothesis.
- Parameters:
-
| _decode | Pointer to the decoder |
- See also:
- s3_decode_begin_utt
-
s3_decode_process
-
s3_decode_hypothesis
Referenced by process_thread().
S3DECODER_EXPORT int s3_decode_hypothesis |
( |
s3_decode_t * |
_decode, |
|
|
char ** |
_uttid, |
|
|
char ** |
_hyp_str, |
|
|
hyp_t *** |
_hyp_segs | |
|
) |
| | |
Retrieve partial or final decoding results (hypothesis). Any hypothesis retrieved prior to the end of the utterance is called a partial hypothesis. Any hypothesis retrieved after the end of the utterance is called the final hypothesis. The hypothesis can be returned in a plain READ-ONLY string and/or an array of READ-ONLY word segments. In the plain string result, all filler and end words are filtered out as well as the pronouciation information. What is left is a very readable string representation of the decoding result. There is no such filtering in the word segment result.
Here is an example on how to use the result returned by s3_decode_hypothesis:
s3_decode_t d;
char *str, *uttid;
hyp_t **segs;
...
s3_decode_hypothesis(&d, &uttid, &str, &segs);
printf("Decoded string: %s\n", str);
for (; *segs; segs++) {
printf("Word-segment id: %i\n", (*segs)->id);
}
- Parameters:
-
| _decode | Pointer to the decoder. |
| _uttid | Pointer to utterance ID string. |
| _hyp_str | Return pointer to a READ-ONLY string. If null, the string is not returned. |
| _hyp_segs | Return pointer to a null-terminated array of word segments. If null, the array is not returned. |
- Returns:
- 0 for success. -1 for failure.
Referenced by main(), and process_thread().
S3DECODER_EXPORT int s3_decode_init |
( |
s3_decode_t * |
_decode, |
|
|
cmd_ln_t * |
_config | |
|
) |
| | |
Initializes a Sphinx3 decoder object (re-entrant). Internal modules, eg. search algorithms, language model, accoustic model, etc, are read from file and initialized. The decoder internal variables are set to a starting state.
This version of the Sphinx3 decoder assumes the user has externally parsed arguments using cmd_ln_parse_r() or cmd_ln_parse_file_r(). The user is responsible for calling cmd_ln_free_r() when he/she is done with the decoder.
- Parameters:
-
| _decode | Pointer to the decoder. |
| _config | Pointer to the command-line object returned by cmd_ln_parse_r(). |
- Returns:
- 0 for success. -1 for failure.
Referenced by main().
S3DECODER_EXPORT int s3_decode_process |
( |
s3_decode_t * |
_decode, |
|
|
float32 ** |
_frames, |
|
|
int32 |
_num_frames | |
|
) |
| | |
Process a buffer of cepstrum frames for the current utterance. This function has to be called in the duration of an utterance. That is, in between calls to s3_decode_begin_utt() and s3_decode_end_utt().
One common issue with Sphinx3 decoder is the mismatch of parameters to the signal processor and accoustic model. Please double check with the accoustic model training scripts and your signal processing front-end to make sure the cepstrals are generated consistently.
- Parameters:
-
| _decode | Pointer to the decoder. |
| _frames | Buffer of audio feature frames. |
| _num_frames | Number of frames in the buffer. |
- Returns:
- 0 for success. -1 for failure.
- See also:
- s3_decode_begin_utt
-
s3_decode_end_utt
-
s3_decode_process_ceps
Referenced by process_thread().
S3DECODER_EXPORT void s3_decode_read_lm |
( |
s3_decode_t * |
_decode, |
|
|
const char * |
lmfile, |
|
|
const char * |
lmname | |
|
) |
| | |
Read LM from a file.
- Parameters:
-
| _decode | Pointer to the decoder. |
| lmfile | LM file name. |
| lmname | LM name associated with this file. |
- See also:
- s3_decode_set_lm
S3DECODER_EXPORT void s3_decode_set_lm |
( |
s3_decode_t * |
_decode, |
|
|
const char * |
lmname | |
|
) |
| | |
Retrieve a word graph of final hypothesis. You must call s3_decode_end_utt() before this. See dag.h and astar.h for information on what to do with this structure.
- Parameters:
-
| _decode | Pointer to the decoder. |
- Returns:
- A dag_t structure, or NULL on failure. This pointer becomes invalid after a call to s3_decode_begin_utt().
Variable Documentation