dict.h File Reference

Operations on dictionary. More...

#include <hash_table.h>
#include <s3types.h>
#include "mdef.h"
#include "lts.h"

Go to the source code of this file.

Classes

struct  dictword_t
 a structure for one dictionary word. More...
struct  dict_t
 a structure for a dictionary. More...

Defines

#define DICT_INC_SZ   4096
#define dict_size(d)   ((d)->n_word)
#define dict_basewid(d, w)   ((d)->word[w].basewid)
#define dict_wordstr(d, w)   ((d)->word[w].word)
#define dict_nextalt(d, w)   ((d)->word[w].alt)
#define dict_pronlen(d, w)   ((d)->word[w].pronlen)
#define dict_pron(d, w, p)   ((d)->word[w].ciphone[p])
#define dict_filler_start(d)   ((d)->filler_start)
#define dict_filler_end(d)   ((d)->filler_end)
#define dict_startwid(d)   ((d)->startwid)
#define dict_finishwid(d)   ((d)->finishwid)
#define dict_silwid(d)   ((d)->silwid)
#define dict_first_phone(d, w)   ((d)->word[w].ciphone[0])
#define dict_second_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
#define dict_last_phone(d, w)   ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
#define S3_START_WORD   "<s>"
#define S3_FINISH_WORD   "</s>"
#define S3_SILENCE_WORD   "<sil>"
#define S3_UNKNOWN_WORD   "<UNK>"

Functions

S3DECODER_EXPORT dict_tdict_init (mdef_t *mdef, const char *dictfile, const char *fillerfile, const char comp_sep, int useLTS, int breport)
S3DECODER_EXPORT s3wid_t dict_wordid (dict_t *d, const char *word)
S3DECODER_EXPORT int32 dict_filler_word (dict_t *d, s3wid_t w)
s3wid_t dict_add_word (dict_t *d, char *word, s3cipid_t *p, int32 np)
s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len)
const char * dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos)
s3wid_t _dict_basewid (dict_t *d, s3wid_t w)
char * _dict_wordstr (dict_t *d, s3wid_t wid)
s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid)
int32 dict_word2basestr (char *word)
S3DECODER_EXPORT void dict_free (dict_t *d)
void dict_report (dict_t *d)

Detailed Description

Operations on dictionary.


Define Documentation

#define dict_basewid ( d,
 )     ((d)->word[w].basewid)
#define dict_filler_end (  )     ((d)->filler_end)
#define dict_filler_start (  )     ((d)->filler_start)
#define dict_finishwid (  )     ((d)->finishwid)
#define dict_first_phone ( d,
 )     ((d)->word[w].ciphone[0])
#define DICT_INC_SZ   4096
#define dict_last_phone ( d,
 )     ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
#define dict_nextalt ( d,
 )     ((d)->word[w].alt)
#define dict_pron ( d,
w,
 )     ((d)->word[w].ciphone[p])

The CI phones of the word w at position p

Referenced by dict2pid_build().

#define dict_pronlen ( d,
 )     ((d)->word[w].pronlen)

Referenced by dict2pid_build(), and dict2pid_dump().

#define dict_second_last_phone ( d,
 )     ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
#define dict_silwid (  )     ((d)->silwid)
#define dict_size (  )     ((d)->n_word)

Packaged macro access to dictionary members

Referenced by dict2pid_build(), and dict2pid_dump().

#define dict_startwid (  )     ((d)->startwid)
#define dict_wordstr ( d,
 )     ((d)->word[w].word)

Referenced by dict2pid_build(), and dict2pid_dump().

#define S3_FINISH_WORD   "</s>"
#define S3_SILENCE_WORD   "<sil>"
#define S3_START_WORD   "<s>"
#define S3_UNKNOWN_WORD   "<UNK>"

Function Documentation

s3wid_t _dict_basewid ( dict_t d,
s3wid_t  w 
)

Return base word id for given word id w (which may be itself). w must be valid.

s3wid_t _dict_nextalt ( dict_t d,
s3wid_t  wid 
)

Return the next alternative word id for the given word id, which must be valid. The returned id may be BAD_S3WID if there is none.

char* _dict_wordstr ( dict_t d,
s3wid_t  wid 
)

Return word string for given word id, which must be valid.

s3wid_t dict_add_word ( dict_t d,
char *  word,
s3cipid_t p,
int32  np 
)

Add a word with the given ciphone pronunciation list to the dictionary. Return value: Result word id if successful, BAD_S3WID otherwise

Parameters:
d The dictionary structure
word The word
const char* dict_ciphone_str ( dict_t d,
s3wid_t  wid,
int32  pos 
)

Return value: CI phone string for the given word, phone position.

Parameters:
d In: Dictionary to look up
wid In: Component word being looked up
pos In: Pronunciation phone position
S3DECODER_EXPORT int32 dict_filler_word ( dict_t d,
s3wid_t  w 
)

Return 1 if w is a filler word, 0 if not. A filler word is one that was read in from the filler dictionary; however, sentence START and FINISH words are not filler words.

Parameters:
d The dictionary structure
w The The word

Referenced by dump_line().

S3DECODER_EXPORT void dict_free ( dict_t d  ) 

Free memory allocated for the dictionary

S3DECODER_EXPORT dict_t* dict_init ( mdef_t mdef,
const char *  dictfile,
const char *  fillerfile,
const char  comp_sep,
int  useLTS,
int  breport 
)

Initialize with given main and filler dictionary files. fillerfile can be NULL (but external modules might impose their own requirements). Return ptr to dict_t if successful, NULL otherwise.

Parameters:
mdef For looking up CI phone IDs; NULL if none, in which case CI phones kept internally
dictfile Main dictionary file
fillerfile Filler dictionary file
comp_sep Compound word separator character, or 0 if no compound words
useLTS Whether to use letter-to-sound rules
breport Whether we should report the progress
void dict_report ( dict_t d  ) 

Report a diciontary structure

Parameters:
d A dictionary structure
s3wid_t dict_wids2compwid ( dict_t d,
s3wid_t wid,
int32  len 
)

Look for a compound word that matches the given word-id sequence. Return value: Base ID of compound word if found, else BAD_S3WID.

Parameters:
d In: Dictionary to look up
wid In: Component words to look for
len In: No. of component words
int32 dict_word2basestr ( char *  word  ) 

If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation specification), strip that trailing portion from it. Note that the given string is modified. Return value: If string was modified, the character position at which the original string was truncated; otherwise -1.

S3DECODER_EXPORT s3wid_t dict_wordid ( dict_t d,
const char *  word 
)

Return word id for given word string if present. Otherwise return BAD_S3WID

Referenced by align_build_sent_hmm(), and word_cand_load().


Generated on 7 Mar 2010 by  doxygen 1.6.1