00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 /* 00035 * fsg_history.h -- FSG Viterbi decode history 00036 * 00037 * ********************************************** 00038 * CMU ARPA Speech Project 00039 * 00040 * Copyright (c) 1999 Carnegie Mellon University. 00041 * ALL RIGHTS RESERVED. 00042 * ********************************************** 00043 * 00044 * HISTORY 00045 * 00046 * $Log$ 00047 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00048 * A Great Reorganzation of header files and executables 00049 * 00050 * Revision 1.2 2006/02/23 05:10:18 arthchan2003 00051 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3 00052 * 00053 * Revision 1.1.2.5 2005/07/24 19:34:46 arthchan2003 00054 * Removed search_hyp_t, used srch_hyp_t instead 00055 * 00056 * Revision 1.1.2.4 2005/07/24 01:34:54 arthchan2003 00057 * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID 00058 * 00059 * Revision 1.1.2.3 2005/07/13 18:39:47 arthchan2003 00060 * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet. 00061 * 00062 * Revision 1.1.2.2 2005/06/28 07:01:20 arthchan2003 00063 * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed. The number of empty functions in fsg_search is now decreased from 35 to 30. 00064 * 00065 * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003 00066 * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users. 00067 * 00068 * Revision 1.1 2004/07/16 00:57:12 egouvea 00069 * Added Ravi's implementation of FSG support. 00070 * 00071 * Revision 1.7 2004/07/07 22:30:35 rkm 00072 * *** empty log message *** 00073 * 00074 * Revision 1.6 2004/07/07 13:56:33 rkm 00075 * Added reporting of (acoustic score - best senone score)/frame 00076 * 00077 * Revision 1.5 2004/06/25 14:49:08 rkm 00078 * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state 00079 * 00080 * Revision 1.4 2004/06/23 20:32:16 rkm 00081 * *** empty log message *** 00082 * 00083 * Revision 1.3 2004/05/27 15:16:08 rkm 00084 * *** empty log message *** 00085 * 00086 * 00087 * 25-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00088 * Started, based on S3.3 version. 00089 */ 00090 00091 00092 #ifndef __S2_FSG_HISTORY_H__ 00093 #define __S2_FSG_HISTORY_H__ 00094 00095 00096 #include <stdio.h> 00097 00098 #include <glist.h> 00099 #include <s3types.h> 00100 #include <blkarray_list.h> 00101 #include <fsg_psubtree.h> 00102 #include <word_fsg.h> 00103 #include <search.h> 00104 #include <dict.h> 00105 00106 00107 #ifdef __cplusplus 00108 extern "C" { 00109 #endif 00110 #if 0 00111 /* Fool Emacs. */ 00112 } 00113 #endif 00114 00115 /* 00116 * The Viterbi history structure. This is a tree, with the root at the 00117 * FSG start state, at frame 0, with a null predecessor. 00118 */ 00119 00120 /* 00121 * A single Viterbi history entry 00122 */ 00123 typedef struct fsg_hist_entry_s { 00124 word_fsglink_t *fsglink; /* Link taken result in this entry */ 00125 int32 frame; /* Ending frame for this entry */ 00126 int32 score; /* Total path score at the end of this 00127 transition */ 00128 int32 pred; /* Predecessor entry; -1 if none */ 00129 int32 lc; /* Left context provided by this entry to 00130 succeeding words */ 00131 fsg_pnode_ctxt_t rc; /* Possible right contexts to which this entry 00132 applies */ 00133 } fsg_hist_entry_t; 00134 00135 /* Access macros */ 00136 #define fsg_hist_entry_fsglink(v) ((v)->fsglink) 00137 #define fsg_hist_entry_frame(v) ((v)->frame) 00138 #define fsg_hist_entry_score(v) ((v)->score) 00139 #define fsg_hist_entry_pred(v) ((v)->pred) 00140 #define fsg_hist_entry_lc(v) ((v)->lc) 00141 #define fsg_hist_entry_rc(v) ((v)->rc) 00142 00143 00144 /* 00145 * The entire tree of history entries (fsg_history_t.entries). 00146 * Optimization: In a given frame, there may be several history entries, with 00147 * the same left and right phonetic context, terminating in a particular state. 00148 * Only the best scoring one of these needs to be saved, since everything else 00149 * will be pruned according to the Viterbi algorithm. frame_entries is used 00150 * temporarily in each frame to determine these best scoring entries in that 00151 * frame. Only the ones not pruned are transferred to entries at the end of 00152 * the frame. However, null transitions are a problem since they create 00153 * entries that depend on entries created in the CURRENT frame. Hence, this 00154 * pruning is done in two stages: first for the non-null transitions, and then 00155 * for the null transitions alone. (This solution is sub-optimal, and can be 00156 * improved with a little more work. SMOP.) 00157 * Why is frame_entries a list? Each entry has a unique terminating state, 00158 * and has a unique lc CIphone. But it has a SET of rc CIphones. 00159 * frame_entries[s][lc] is an ordered list of entries created in the current 00160 * frame, terminating in state s, and with left context lc. The list is in 00161 * descending order of path score. When a new entry with (s,lc) arrives, 00162 * its position in the list is determined. Then its rc set is modified by 00163 * subtracting the union of the rc's of all its predecessors (i.e., better 00164 * scoring entries). If the resulting rc set is empty, the entry is discarded. 00165 * Otherwise, it is inserted, and the rc sets of all downstream entries in the 00166 * list are updated by subtracting the new entry's rc. If any of them becomes 00167 * empty, it is also discarded. 00168 * As mentioned earlier, this procedure is applied in two stages, for the 00169 * non-null transitions, and the null transitions, separately. 00170 */ 00171 typedef struct fsg_history_s { 00172 word_fsg_t *fsg; /* The FSG for which this object applies */ 00173 blkarray_list_t *entries; /* A list of history table entries; the root 00174 entry is the first element of the list */ 00175 glist_t **frame_entries; 00176 00177 /*Added by Arthur at 20050627*/ 00178 int32 n_ciphone; 00179 } fsg_history_t; 00180 00181 00182 /* 00183 * One-time intialization: Allocate and return an initially empty history 00184 * module. 00185 */ 00186 fsg_history_t *fsg_history_init (word_fsg_t *fsg,int32 n_ciphone); 00187 00188 void fsg_history_free (fsg_history_t *); 00189 00190 void fsg_history_utt_start (fsg_history_t *); 00191 00192 00193 void fsg_history_utt_end (fsg_history_t *); 00194 00195 00196 /* 00197 * Create a history entry recording the completion of the given FSG 00198 * transition, at the end of the given frame, with the given score, and 00199 * the given predecessor history entry. 00200 * The entry is initially temporary, and may be superseded by another 00201 * with a higher score. The surviving entries must be transferred to 00202 * the main history table, via fsg_history_end_frame(). 00203 */ 00204 void fsg_history_entry_add (fsg_history_t *, 00205 word_fsglink_t *, /* FSG transition */ 00206 int32 frame, 00207 int32 score, 00208 int32 pred, 00209 int32 lc, 00210 fsg_pnode_ctxt_t rc); 00211 00212 /* 00213 * Transfer the surviving history entries for this frame into the permanent 00214 * history table. This function can be called several times during a frame. 00215 * Each time, the entries surviving so far are transferred, and the temporary 00216 * lists cleared. This feature is used to handle the entries due to non-null 00217 * transitions and null transitions separately. 00218 */ 00219 void fsg_history_end_frame (fsg_history_t *); 00220 00221 00222 /* Clear the hitory table */ 00223 void fsg_history_reset (fsg_history_t *); 00224 00225 00226 /* Return the number of valid entries in the given history table */ 00227 int32 fsg_history_n_entries (fsg_history_t *h); 00228 00229 00230 /* 00231 * Viterbi backtrace. 00232 */ 00233 glist_t fsg_history_backtrace (fsg_history_t *); 00234 00235 00236 /* 00237 * Dump the Viterbi history data to the given file (mainly for debugging). 00238 */ 00239 void fsg_history_dump (fsg_history_t *vh, char const *uttid, FILE *fp, dict_t *dict); 00240 00241 00242 /* 00243 * Return a ptr to the history entry for the given ID; NULL if there is no 00244 * such entry. 00245 */ 00246 fsg_hist_entry_t *fsg_history_entry_get(fsg_history_t *, int32 id); 00247 00248 00249 /* 00250 * Switch the FSG associated with the given history module. Should be done 00251 * when the history list is empty. If not empty, the list is cleared. 00252 */ 00253 void fsg_history_set_fsg (fsg_history_t *, word_fsg_t *); 00254 00255 00256 /* Free the given Viterbi search history object */ 00257 void fsg_history_free (fsg_history_t *h); 00258 00259 00260 /* 00261 * Extract and fill out a srch_hyp_t structure from the given history entry 00262 * (index). Caller must allocate hyp. 00263 * Note: Must not be called with index <= 0. 00264 * Note: (hyp->wid < 0) iff the entry corresponds to a null transition. 00265 * Note: hyp->{conf,latden,phone_perp} have dummy values filled in. 00266 * Note: hyp->next is unaffected. 00267 * Return value: -1 if index <= 0 (error; hyp is not filled in); 00268 * the return value is +ve if it is a valid, real (non-dummy) entry. 00269 */ 00270 int32 fsg_history_entry_hyp_extract (fsg_history_t *h, int32 index, 00271 srch_hyp_t *hyp, dict_t *dict); 00272 00273 #ifdef __cplusplus 00274 } 00275 #endif 00276 00277 00278 #endif