00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * fast_algo_struct.h -- Various forms of pruning beam 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1999 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.10 2006/02/22 16:39:43 arthchan2003 00053 * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Initialize beam->n_ciphone properly, 2, use ckd_free instead of free, use float64 for subvqbeam and cipbeam. 3, Add a proper free function for fast_gmm_free 00054 * 00055 * 00056 * Revision 1.7.4.5 2005/11/17 06:07:07 arthchan2003 00057 * Added fast_gmm_free in the header. 00058 * 00059 * Revision 1.7.4.4 2005/07/05 21:28:57 arthchan2003 00060 * 1, Merged from HEAD. 2, Remove redundant keyword in cont_mgau. 00061 * 00062 * Revision 1.7.4.3 2005/07/05 05:47:59 arthchan2003 00063 * Fixed dox-doc. struct level of documentation are included. 00064 * 00065 * Revision 1.7.4.2 2005/07/04 02:44:25 arthchan2003 00066 * Changed float32 to float64 for ci_pbeam and svqbeam. Code now compiled. 00067 * 00068 * Revision 1.7.4.1 2005/06/27 05:22:19 arthchan2003 00069 * Merged from the HEAD. 00070 * 00071 * Revision 1.9 2005/06/30 13:08:45 egouvea 00072 * Beams in linear scale have to be float64, since they can be easily defined as < 1e-40 00073 * 00074 * Revision 1.8 2005/06/22 08:00:09 arthchan2003 00075 * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs. 00076 * 00077 * Revision 1.7 2005/06/21 18:26:38 arthchan2003 00078 * Log. fast_algo_struct.c go through major changes in the gentle 00079 * refactoring process. It is the location of several wrapper structures 00080 * that control fast search. That includes beam_t for storing beams and 00081 * scores. pl_t for storing structure for phoneme lookahead, histprune_t 00082 * for storing structures for histogram pruning. Lastly 00083 * fast_algo_struct_t, for storing structures for fast GMM 00084 * computation. 00085 * 00086 * Log. General Remark All of them now has consistent inteface, _init, 00087 * _report and _free. They are respectively used for allocation, 00088 * reporting and deallocation of the routine. Doxygen documentation are 00089 * fixed for all structures. 00090 * 00091 * Log. Individual changes; beam_t start to own bestscore, bestwordscore, 00092 * wordbestscores, wordbestexits. They were owned by kb_t. histprune_t 00093 * now wrapped up maxwpf, maxhmmpdf, maxhistpf and 00094 * hmm_hist_binsize. Currently, the beam size determination routine is 00095 * controlled by search implementation modules. It is done because 00096 * wrapping that operation up means we may need to introduce a bridge 00097 * between beam_t and histprune_t. pl_t is now owning heuristic type, 00098 * the phoneme lookahead beam size. It also wrapped up phoneme heuristic 00099 * computation. 00100 * 00101 * Revision 1.5 2005/04/20 03:33:54 archan 00102 * Remove pl_win and pl_win_strt, Now consider them as the parameters of the search abstraction in srch.c 00103 * 00104 * Revision 1.4 2005/03/30 01:22:46 archan 00105 * Fixed mistakes in last updates. Add 00106 * 00107 * 00108 * 19-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00109 * Started. 00110 */ 00111 00118 #ifndef _S3_BEAM_H_ 00119 #define _S3_BEAM_H_ 00120 00121 #include <s3types.h> 00122 #include <mdef.h> 00123 #include <ascr.h> 00124 #include <logmath.h> 00125 00126 #ifdef __cplusplus 00127 extern "C" { 00128 #endif 00129 #if 0 00130 } /* Fool Emacs into not indenting things. */ 00131 #endif 00132 00133 00149 typedef struct { 00150 int32 hmm; 00151 int32 ptrans; 00152 int32 word; 00153 int32 ptranskip; 00154 int32 wordend; 00155 int32 n_ciphone; 00157 int32 bestscore; 00158 int32 bestwordscore; 00159 int32 thres; 00160 int32 phone_thres; 00161 int32 word_thres; 00163 int32 *wordbestscores; 00164 int32 *wordbestexits; 00166 } beam_t; 00167 00173 typedef struct{ 00174 int32 pheurtype; 00175 int32 pl_beam; 00177 int32 n_ciphone; 00178 int32 *phn_heur_list; 00182 }pl_t; 00183 00190 typedef struct { 00191 int32 maxwpf; 00192 int32 maxhistpf; 00193 int32 maxhmmpf; 00194 int32 hmm_hist_binsize; 00195 int32 hmm_hist_bins; 00196 int32 *hmm_hist; 00198 } histprune_t; 00199 00205 typedef struct{ 00206 int32 ds_ratio; 00207 int32 cond_ds; 00212 int32 dist_ds; 00217 int32 skip_count; 00219 } downsampling_t; 00220 00225 typedef struct{ 00226 int32 ci_pbeam; 00228 int32 dyn_ci_pbeam; 00230 float32 tighten_factor; 00234 int32 *ci_occu; 00236 int32 *idx; 00238 int32 max_cd; 00240 } gmm_select_t; 00241 00247 typedef struct{ 00248 int32 subvqbeam; 00249 int32 rec_bstcid; 00250 } gau_select_t; 00251 00256 typedef struct{ 00257 downsampling_t* downs; 00258 gmm_select_t* gmms; 00259 gau_select_t* gaus; 00260 int32 gs4gs; 00262 int32 svq4svq; 00263 int32 rec_bst_senscr; 00264 float32 *last_feat; 00266 } fast_gmm_t; 00267 00277 beam_t *beam_init ( 00278 float64 hmm, 00279 float64 ptr, 00280 float64 wd, 00281 float64 wdend, 00282 int32 ptranskip, 00283 int32 n_ciphone, 00284 logmath_t *logmath 00285 ); 00286 00287 00289 void beam_report(beam_t* b 00290 ); 00291 00293 void beam_free(beam_t *b 00294 ); 00295 00300 histprune_t *histprune_init (int32 maxhmm, 00301 int32 maxhist, 00302 int32 maxword, 00303 int32 hmmhistbinsize, 00304 int32 numNodes 00305 ); 00306 00307 void histprune_zero_histbin(histprune_t *h 00308 ); 00309 00310 void histprune_update_histbinsize(histprune_t *h, 00311 int32 hmmhistbinsize, 00312 int32 numNodes 00313 ); 00314 00316 void histprune_report(histprune_t* h 00317 ); 00318 00320 void histprune_showhistbin(histprune_t *hp, 00321 int32 nfr, 00322 char* uttid 00323 ); 00324 00326 void histprune_free(histprune_t *h 00327 ); 00331 S3DECODER_EXPORT 00332 fast_gmm_t *fast_gmm_init (int32 down_sampling_ratio, 00333 int32 mode_cond_ds, 00334 int32 mode_dist_ds, 00335 int32 isGS4GS, 00336 int32 isSVQ4SVQ, 00337 float64 subvqbeam, 00338 float64 cibeam, 00339 float32 tighten_factor, 00340 int32 max_cd, 00341 int32 n_ci_sen, 00342 logmath_t *logmath 00343 ); 00344 00346 void fast_gmm_report(fast_gmm_t* f 00347 ); 00348 00350 S3DECODER_EXPORT 00351 void fast_gmm_free (fast_gmm_t *fg 00352 ); 00353 00354 00359 pl_t* pl_init(int32 pheurtype, 00360 int32 pl_beam, 00361 int32 n_ciphone, 00362 logmath_t *logmath 00363 ); 00364 00366 void pl_report(pl_t* pl 00367 ); 00368 00370 void pl_free(pl_t* pl 00371 ); 00372 00377 void pl_computePhnHeur(mdef_t* md, 00378 ascr_t *a, 00379 pl_t* pl, 00380 int32 heutype, 00381 int32 win_strt, 00382 int32 win_efv 00383 ); 00384 00385 #if 0 00386 { /* Stop indent from complaining */ 00387 #endif 00388 #ifdef __cplusplus 00389 } 00390 #endif 00391 00392 00393 #endif