00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * lextree.h -- 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.11 2006/02/23 15:08:24 arthchan2003 00053 * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: 00054 * 00055 * 1, Fixed memory leaks. 00056 * 2, Add logic for full triphone expansion. At this point, the 00057 * propagation of scores in word end is still incorrect. So composite 00058 * triphone should still be used by default. 00059 * 3, Removed lextree_copies_hmm_propagate. 00060 * 00061 * Revision 1.10.4.6 2005/11/17 06:28:50 arthchan2003 00062 * Changed the code to used compressed triphones. Not yet correct at this point 00063 * 00064 * Revision 1.10.4.5 2005/10/17 04:53:44 arthchan2003 00065 * Shrub the trees so that the run-time memory could be controlled. 00066 * 00067 * Revision 1.10.4.4 2005/10/07 19:34:31 arthchan2003 00068 * In full cross-word triphones expansion, the previous implementation has several flaws, e.g, 1, it didn't consider the phone beam on cross word triphones. 2, Also, when the cross word triphone phone is used, children of the last phones will be regarded as cross word triphone. So, the last phone should not be evaluated at all. Last implementation has not safe-guaded that. 3, The rescoring for language model is not done correctly. What we still need to do: a, test the algorithm in more databases. b, implement some speed up schemes. 00069 * 00070 * Revision 1.10.4.2 2005/09/25 19:23:55 arthchan2003 00071 * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code. 00072 * 00073 * Revision 1.10.4.1 2005/06/27 05:37:05 arthchan2003 00074 * Incorporated several fixes to the search. 1, If a tree is empty, it will be removed and put back to the pool of tree, so number of trees will not be always increasing. 2, In the previous search, the answer is always "STOP P I T G S B U R G H </s>"and filler words never occurred in the search. The reason is very simple, fillers was not properly propagated in the search at all <**exculamation**> This version fixed this problem. The current search will give <sil> P I T T S B U R G H </sil> </s> to me. This I think it looks much better now. 00075 * 00076 * Revision 1.10 2005/06/21 23:32:58 arthchan2003 00077 * Log. Introduced lextree_init and filler_init to wrap up lextree_build 00078 * process. Split the hmm propagation to propagation for leaves and 00079 * non-leaves node. This allows an easier time for turning off the 00080 * rescoring stage. However, the implementation is not clever enough. One 00081 * should split the array to leave array and non-leave array. 00082 * 00083 * Revision 1.7 2005/06/16 04:59:10 archan 00084 * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale. 00085 * 00086 * Revision 1.6 2005/06/13 04:02:59 archan 00087 * Fixed most doxygen-style documentation under libs3decoder. 00088 * 00089 * Revision 1.5 2005/04/25 23:53:35 archan 00090 * 1, Some minor modification of vithist_t, vithist_rescore can now support optional LM rescoring, vithist also has its own reporting routine. A new argument -lmrescore is also added in decode and livepretend. This can switch on and off the rescoring procedure. 2, I am reaching the final difficulty of mode 5 implementation. That is, to implement an algorithm which dynamically decide which tree copies should be entered. However, stuffs like score propagation in the leave nodes and non-leaves nodes are already done. 3, As briefly mentioned in 2, implementation of rescoring , which used to happened at leave nodes are now separated. The current implementation is not the most clever one. Wish I have time to change it before check-in to the canonical. 00091 * 00092 * Revision 1.4 2005/04/25 19:22:47 archan 00093 * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need. 00094 * 00095 * Revision 1.3 2005/03/30 01:22:47 archan 00096 * Fixed mistakes in last updates. Add 00097 * 00098 * 00099 * 29-Feb-2000 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00100 * Modified some functions to be able to deal with HMMs with any number 00101 * of states. 00102 * 00103 * 07-Jul-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00104 * Added lextree_node_t.ci and lextree_ci_active(). 00105 * 00106 * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00107 * Started. 00108 */ 00109 00110 00111 #ifndef _S3_LEXTREE_H_ 00112 #define _S3_LEXTREE_H_ 00113 00114 #include <stdio.h> 00115 00116 #include <bitvec.h> 00117 #include <s3types.h> 00118 #include <glist.h> 00119 #include "kbcore.h" 00120 #include "hmm.h" 00121 #include "lm.h" 00122 #include "vithist.h" 00123 #include "ascr.h" 00124 #include "fast_algo_struct.h" 00125 #include "dict.h" 00126 #include "mdef.h" 00127 00128 #ifdef __cplusplus 00129 extern "C" { 00130 #endif 00131 #if 0 00132 } /* Fool Emacs into not indenting things. */ 00133 #endif 00134 00135 #define LEXTREE_OPERATION_SUCCESS 1 00136 #define LEXTREE_OPERATION_FAILURE 0 00137 00138 #define LEXTREE_TYPE_FILLER -1 00139 #define LEXTREE_TYPE_UNIGRAM 0 00140 #define LEXTREE_TYPE_BIGRAM 1 00141 #define LEXTREE_TYPE_TRIGRAM 2 00142 00143 #if 0 /*Number reserved but not used */ 00144 #define LEXTREE_TYPE_QUADGRAM 3 00145 #define LEXTREE_TYPE_QUINGRAM 4 00146 #endif 00147 00148 00179 /* 00180 * \struct lextree_node_t 00181 * One node in a lextree. 00182 */ 00183 typedef struct { 00184 hmm_t hmm; 00185 hmm_context_t *ctx; 00187 glist_t children; 00201 int32 wid; 00202 int32 prob; 00203 int32 ssid; 00204 s3cipid_t rc; 00206 s3cipid_t ci; 00207 int8 composite; 00208 } lextree_node_t; 00209 00210 /* Access macros; not meant for arbitrary use */ 00211 #define lextree_node_wid(n) ((n)->wid) 00212 #define lextree_node_prob(n) ((n)->prob) 00213 #define lextree_node_ssid(n) ((n)->ssid) 00214 #define lextree_node_rc(n) ((n)->rc) 00215 #define lextree_node_composite(n) ((n)->composite) 00216 #define lextree_node_frame(n) ((n)->frame) 00217 00218 00219 /* 00220 * \struct lextree_lcroot_t 00221 * Root nodes of the lextree valid for a given left context CIphone. 00222 */ 00223 typedef struct { 00224 s3cipid_t lc; /* Left context CIphone */ 00225 glist_t root; /* Its data.ptr are the root nodes (lextree_node_t *) of interest; subset 00226 of the entire lextree roots */ 00227 } lextree_lcroot_t; 00228 00229 /* 00230 * \struct lextree_t 00231 * Entire lextree. 00232 */ 00233 typedef struct { 00234 int32 type; 00236 glist_t root; 00237 lextree_lcroot_t *lcroot; 00239 int32 n_lc; 00240 int32 n_node; 00241 int32 n_alloc_node; 00242 int32 n_alloc_blk_sz; 00244 hmm_context_t *ctx; 00245 hmm_context_t *comctx; 00247 lextree_node_t **active; 00248 lextree_node_t **next_active; 00250 int32 n_active; 00251 int32 n_next_active; 00253 int32 best; 00254 int32 wbest; 00256 char prev_word[100]; 00257 } lextree_t; 00258 00259 /* Access macros; not meant for arbitrary usage */ 00260 #define lextree_type(l) ((l)->type) 00261 #define lextree_root(l) ((l)->root) 00262 #define lextree_lcroot(l) ((l)->lcroot) 00263 #define lextree_n_lc(l) ((l)->n_lc) 00264 #define lextree_n_node(l) ((l)->n_node) 00265 #define lextree_n_alloc_node(l) ((l)->n_alloc_node) 00266 #define lextree_active(l) ((l)->active) 00267 #define lextree_next_active(l) ((l)->next_active) 00268 #define lextree_n_active(l) ((l)->n_active) 00269 #define lextree_n_next_active(l) ((l)->n_next_active) 00270 00271 00275 lextree_t* lextree_init( 00276 kbcore_t *kbcore, 00277 lm_t* lm, 00278 char *lmname, 00279 int32 istreeUgProb, 00280 int32 bReport, 00281 int32 type 00282 ); 00283 00286 lextree_t* fillertree_init( 00287 kbcore_t *kbcore 00288 ); 00289 00290 00293 void lextree_report( 00294 lextree_t *ltree 00295 ); 00296 00303 lextree_t * 00304 lextree_build (kbcore_t *kbc, 00305 wordprob_t *wordprob, 00306 int32 n_word, 00307 s3cipid_t *lc, 00309 int32 type 00310 ); 00311 00312 /* Free a lextree that was created by lextree_build */ 00313 void lextree_free (lextree_t *lextree); 00314 00315 00320 void lextree_utt_end (lextree_t *l, kbcore_t *kbc); 00321 00322 00326 void lextree_enter (lextree_t *lextree, 00327 s3cipid_t lc, 00328 int32 frame, 00329 int32 inscore, 00330 int32 inhist, 00331 int32 thresh, 00333 kbcore_t *kbc 00334 ); 00335 00341 void lextree_active_swap (lextree_t *lextree 00342 ); 00343 00344 00349 void lextree_ssid_active (lextree_t *lextree, 00350 uint8 *ssid, 00352 uint8 *comssid 00354 ); 00355 00359 void lextree_ci_active (lextree_t *lextree, 00360 bitvec_t *ci_active 00361 ); 00362 00368 int32 lextree_hmm_eval (lextree_t *lextree, 00369 kbcore_t *kbc, 00370 ascr_t *ascr, 00371 int32 f, 00372 FILE *fp 00373 ); 00374 00375 /* 00376 * ARCHAN: Starting from Sphinx 3.6, lextree_hmm_propagate is separated to 00377 * lextree_hmm_propagate_non_leaves and lextree_hmm_propagate_leaves. This allows 00378 * the rescoring routine to be more easily switched off 00379 */ 00380 00405 int32 lextree_hmm_propagate_non_leaves (lextree_t *lextree, 00407 kbcore_t *kbc, 00408 int32 cf, 00409 int32 th, 00410 int32 pth, 00411 int32 wth, 00412 pl_t* pl 00413 ); 00414 00415 00426 int32 lextree_hmm_propagate_leaves (lextree_t *lextree, 00428 kbcore_t *kbc, 00429 vithist_t *vh, 00431 int32 cf, 00432 int32 wth 00433 ); 00434 00435 00442 void lextree_hmm_histbin (lextree_t *lextree, 00443 int32 bestscr, 00444 int32 *bin, 00446 int32 nbin, 00447 int32 bw 00448 ); 00449 00451 void lextree_dump (lextree_t *lextree, 00452 dict_t *dict, 00453 mdef_t *mdef, 00454 FILE *fp, 00455 int32 fmt 00456 ); 00457 00459 int32 num_lextree_links(lextree_t *ltree 00460 ); 00461 #if 0 00462 { /* Stop indent from complaining */ 00463 #endif 00464 #ifdef __cplusplus 00465 } 00466 #endif 00467 00468 #endif