00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 /* 00035 * fsg_search.h -- Search structures for FSM decoding. 00036 * 00037 * ********************************************** 00038 * CMU ARPA Speech Project 00039 * 00040 * Copyright (c) 2004 Carnegie Mellon University. 00041 * ALL RIGHTS RESERVED. 00042 * ********************************************** 00043 * 00044 * HISTORY 00045 * 00046 * $Log: fsg_search.h,v $ 00047 * Revision 1.2 2006/02/23 05:12:43 arthchan2003 00048 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3 00049 * 00050 * Revision 1.1.2.7 2006/01/16 18:20:46 arthchan2003 00051 * Remove junks in the code, change the reporting from printf to log_hypstr. 00052 * 00053 * Revision 1.1.2.6 2005/07/24 19:34:46 arthchan2003 00054 * Removed search_hyp_t, used srch_hyp_t instead 00055 * 00056 * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003 00057 * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID 00058 * 00059 * Revision 1.1.2.4 2005/07/17 05:44:32 arthchan2003 00060 * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet. 00061 * 00062 * Revision 1.1.2.3 2005/07/13 18:39:48 arthchan2003 00063 * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet. 00064 * 00065 * Revision 1.1.2.2 2005/06/28 07:01:20 arthchan2003 00066 * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed. The number of empty functions in fsg_search is now decreased from 35 to 30. 00067 * 00068 * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003 00069 * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users. 00070 * 00071 * Revision 1.2 2004/07/23 23:36:34 egouvea 00072 * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently 00073 * 00074 * Revision 1.6 2004/07/20 13:40:55 rkm 00075 * Added FSG get/set start/final state functions. 00076 * 00077 * Revision 1.5 2004/07/16 19:55:28 rkm 00078 * Added state information to hypothesis. 00079 * 00080 * Revision 1.1 2004/07/16 00:57:12 egouvea 00081 * Added Ravi's implementation of FSG support. 00082 * 00083 * Revision 1.4 2004/07/07 13:56:33 rkm 00084 * Added reporting of (acoustic score - best senone score)/frame 00085 * 00086 * Revision 1.3 2004/06/22 15:36:12 rkm 00087 * Added partial result handling in FSG mode 00088 * 00089 * Revision 1.2 2004/05/27 14:22:57 rkm 00090 * FSG cross-word triphones completed (but for single-phone words) 00091 * 00092 * Revision 1.1.1.1 2004/03/01 14:30:31 rkm 00093 * 00094 * 00095 * Revision 1.6 2004/02/27 16:15:13 rkm 00096 * Added FSG switching 00097 * 00098 * Revision 1.5 2004/02/27 15:05:21 rkm 00099 * *** empty log message *** 00100 * 00101 * Revision 1.4 2004/02/26 14:48:20 rkm 00102 * *** empty log message *** 00103 * 00104 * Revision 1.3 2004/02/26 01:14:48 rkm 00105 * *** empty log message *** 00106 * 00107 * Revision 1.2 2004/02/24 18:13:05 rkm 00108 * Added NULL transition handling 00109 * 00110 * Revision 1.1 2004/02/23 15:53:45 rkm 00111 * Renamed from fst to fsg 00112 * 00113 * Revision 1.1 2004/02/19 21:16:54 rkm 00114 * Added fsg_search.{c,h} 00115 * 00116 * 00117 * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00118 * Started. 00119 */ 00120 00121 00122 #ifndef __S2_FSG_SEARCH_H__ 00123 #define __S2_FSG_SEARCH_H__ 00124 00125 #define HYP_SZ 1024 00126 00127 00128 #include <stdio.h> 00129 00130 #include <cmd_ln.h> 00131 #include <logmath.h> 00132 #include <s3types.h> 00133 #include <glist.h> 00134 #include <word_fsg.h> 00135 #include <fsg_lextree.h> 00136 #include <fsg_history.h> 00137 #include <ascr.h> 00138 #include <search.h> 00139 #include <dict.h> 00140 #include <mdef.h> 00141 #include <tmat.h> 00142 #include <hmm.h> 00143 00144 00145 #ifdef __cplusplus 00146 extern "C" { 00147 #endif 00148 #if 0 00149 /* Fool Emacs. */ 00150 } 00151 #endif 00152 00153 typedef struct fsg_search_s { 00154 glist_t fsglist; /* List of all FSGs loaded */ 00155 00156 word_fsg_t *fsg; /* Currently active FSG; NULL if none. One 00157 must be made active before starting FSG 00158 decoding */ 00159 fsg_lextree_t *lextree; /* Lextree structure for the currently 00160 active FSG */ 00161 fsg_history_t *history; /* For storing the Viterbi search history */ 00162 00163 glist_t pnode_active; /* Those active in this frame */ 00164 glist_t pnode_active_next; /* Those activated for the next frame */ 00165 00166 int32 beam; /* Global threshold */ 00167 int32 pbeam; /* Threshold for phone transition */ 00168 int32 wbeam; /* Threshold for word exit */ 00169 00170 int32 frame; /* Current frame */ 00171 00172 int32 bestscore; /* For beam pruning */ 00173 int32 bpidx_start; /* First history entry index this frame */ 00174 00175 srch_hyp_t *hyp; /* Search hypothesis */ 00176 int32 ascr, lscr; /* Total acoustic and lm score for utt */ 00177 00178 int32 n_hmm_eval; /* Total HMMs evaluated this utt */ 00179 00180 int32 state; /* Whether IDLE or BUSY */ 00181 00182 hmm_context_t *hmmctx; 00183 00184 /*Added by Arthur at 20050627*/ 00185 int32 isUsealtpron; 00186 int32 isUseFiller; 00187 int32 isBacktrace; 00188 int32 isReportAltpron; 00189 char* DumpLatdir; 00190 int32 n_ci_phone; 00191 00192 dict_t *dict; 00193 mdef_t *mdef; 00194 tmat_t *tmat; 00195 ascr_t *am_score_pool; 00196 char* uttid; 00197 int32 *senscale; 00199 FILE* matchfp; 00200 FILE* matchsegfp; 00201 00202 cmd_ln_t *config; 00203 logmath_t *logmath; 00204 } fsg_search_t; 00205 00206 00207 /* Access macros */ 00208 #define fsg_search_frame(s) ((s)->frame) 00209 00210 00211 /* 00212 * Create, initialize and return a search module for the given FSM. 00213 * If no FSG is given (i.e., the argument is NULL), a search structure is 00214 * still created. If an FSG is provided, it is made the currently active 00215 * FSG. 00216 */ 00217 fsg_search_t *fsg_search_init (word_fsg_t *, void *srch_struct); 00218 00219 00223 void fsg_search_free(fsg_search_t *s); 00224 00225 /* 00226 * Lookup the FSG associated with the given name and return it, or NULL if 00227 * no match found. 00228 */ 00229 word_fsg_t *fsg_search_fsgname_to_fsg (fsg_search_t *, char *name); 00230 00231 00232 /* 00233 * Add the given FSG to the collection of FSGs known to this search object. 00234 * The given fsg is simply added to the collection. It is not automatically 00235 * made the currently active one. 00236 * The name of the new FSG must not match any of the existing ones. If so, 00237 * FALSE is returned. If successfully added, TRUE is returned. 00238 */ 00239 int fsg_search_add_fsg (fsg_search_t *, word_fsg_t *); 00240 00241 00242 /* 00243 * Delete the given FSG from the known collection. Free the FSG itself, 00244 * and if it was the currently active FSG, also free the associated search 00245 * structures and leave the current FSG undefined. 00246 */ 00247 int fsg_search_del_fsg (fsg_search_t *, word_fsg_t *); 00248 00249 00250 /* Like fsg_search_del_fsg(), but identifies the FSG by its name */ 00251 int fsg_search_del_fsg_byname (fsg_search_t *, char *name); 00252 00253 00254 /* 00255 * Switch to a new FSG (identified by its string name). Must not be invoked 00256 * when search is busy (ie, in the midst of an utterance. That's an error 00257 * and FALSE is returned. If successful, returns TRUE. 00258 */ 00259 int fsg_search_set_current_fsg (fsg_search_t *, char *); 00260 00261 00262 /* 00263 * Deallocate search structure. 00264 */ 00265 void fsg_search_free (fsg_search_t *); 00266 00267 00268 /* 00269 * Prepare the FSG search structure for beginning decoding of the next 00270 * utterance. 00271 */ 00272 void fsg_search_utt_start (fsg_search_t *); 00273 00274 00275 /* 00276 * Windup and clean the FSG search structure after utterance. Fill in the 00277 * results of search: fsg_search_t.{hyp,ascr,lscr,frame}. (But some fields 00278 * of hyp are left unfilled for now: conf, latden, phone_perp.) 00279 */ 00280 void fsg_search_utt_end (fsg_search_t *); 00281 00282 00283 /* 00284 * Step one frame forward through the Viterbi search. 00285 */ 00286 void fsg_search_frame_fwd (fsg_search_t *); 00287 00288 00289 /* 00290 * Compute the partial or final Viterbi backtrace result. (The result can 00291 * be retrieved using the API functions seach_result or search_get_hyp().) 00292 * If "check_fsg_final_state" is TRUE, the backtrace starts from the best 00293 * history entry ending in the final state (if it exists). Otherwise it 00294 * starts from the best entry, regardless of the terminating state (usually 00295 * used for partial results). 00296 */ 00297 void fsg_search_history_backtrace (fsg_search_t *search, 00298 int check_fsg_final_state); 00299 00300 /* 00301 * Return the start (or final) state of the currently active FSG, if any. 00302 * Otherwise return -1. 00303 */ 00304 int32 fsg_search_get_start_state (fsg_search_t *); 00305 int32 fsg_search_get_final_state (fsg_search_t *); 00306 00307 00308 /* 00309 * Set the start (or final) state of the current active FSG, if any, to the 00310 * given state. This operation can only be done in between utterances, not 00311 * in the midst of one. Return the previous start (or final) state if 00312 * successful. Return -1 if any error. 00313 */ 00314 int32 fsg_search_set_start_state (fsg_search_t *, int32 state); 00315 int32 fsg_search_set_final_state (fsg_search_t *, int32 state); 00316 00317 00318 void fsg_search_sen_active (fsg_search_t *search); 00319 00320 #ifdef __cplusplus 00321 } 00322 #endif 00323 00324 00325 #endif