fast_algo_struct.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * fast_algo_struct.h -- Various forms of pruning beam
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1999 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00050  * A Great Reorganzation of header files and executables
00051  * 
00052  * Revision 1.10  2006/02/22 16:39:43  arthchan2003
00053  * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: 1, Initialize beam->n_ciphone properly, 2, use ckd_free instead of free, use float64 for subvqbeam and cipbeam.  3, Add a proper free function for fast_gmm_free
00054  *
00055  *
00056  * Revision 1.7.4.5  2005/11/17 06:07:07  arthchan2003
00057  * Added fast_gmm_free in the header.
00058  *
00059  * Revision 1.7.4.4  2005/07/05 21:28:57  arthchan2003
00060  * 1, Merged from HEAD. 2, Remove redundant keyword in cont_mgau.
00061  *
00062  * Revision 1.7.4.3  2005/07/05 05:47:59  arthchan2003
00063  * Fixed dox-doc. struct level of documentation are included.
00064  *
00065  * Revision 1.7.4.2  2005/07/04 02:44:25  arthchan2003
00066  * Changed float32 to float64 for ci_pbeam and svqbeam. Code now compiled.
00067  *
00068  * Revision 1.7.4.1  2005/06/27 05:22:19  arthchan2003
00069  * Merged from the HEAD.
00070  *
00071  * Revision 1.9  2005/06/30 13:08:45  egouvea
00072  * Beams in linear scale have to be float64, since they can be easily defined as < 1e-40
00073  *
00074  * Revision 1.8  2005/06/22 08:00:09  arthchan2003
00075  * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs.
00076  *
00077  * Revision 1.7  2005/06/21 18:26:38  arthchan2003
00078  * Log. fast_algo_struct.c go through major changes in the gentle
00079  * refactoring process. It is the location of several wrapper structures
00080  * that control fast search.  That includes beam_t for storing beams and
00081  * scores. pl_t for storing structure for phoneme lookahead, histprune_t
00082  * for storing structures for histogram pruning. Lastly
00083  * fast_algo_struct_t, for storing structures for fast GMM
00084  * computation.
00085  *
00086  * Log. General Remark All of them now has consistent inteface, _init,
00087  * _report and _free.  They are respectively used for allocation,
00088  * reporting and deallocation of the routine. Doxygen documentation are
00089  * fixed for all structures.
00090  *
00091  * Log. Individual changes; beam_t start to own bestscore, bestwordscore,
00092  * wordbestscores, wordbestexits. They were owned by kb_t. histprune_t
00093  * now wrapped up maxwpf, maxhmmpdf, maxhistpf and
00094  * hmm_hist_binsize. Currently, the beam size determination routine is
00095  * controlled by search implementation modules.  It is done because
00096  * wrapping that operation up means we may need to introduce a bridge
00097  * between beam_t and histprune_t.  pl_t is now owning heuristic type,
00098  * the phoneme lookahead beam size. It also wrapped up phoneme heuristic
00099  * computation.
00100  *
00101  * Revision 1.5  2005/04/20 03:33:54  archan
00102  * Remove pl_win and pl_win_strt, Now consider them as the parameters of the search abstraction in srch.c
00103  *
00104  * Revision 1.4  2005/03/30 01:22:46  archan
00105  * Fixed mistakes in last updates. Add
00106  *
00107  * 
00108  * 19-May-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00109  *              Started.
00110  */
00111 
00118 #ifndef _S3_BEAM_H_
00119 #define _S3_BEAM_H_
00120 
00121 #include <s3types.h>
00122 #include <mdef.h>
00123 #include <ascr.h>
00124 #include <logmath.h>
00125 
00126 #ifdef __cplusplus
00127 extern "C" {
00128 #endif
00129 #if 0
00130 } /* Fool Emacs into not indenting things. */
00131 #endif
00132 
00133 
00149 typedef struct {
00150     int32 hmm;             
00151     int32 ptrans;          
00152     int32 word;            
00153     int32 ptranskip;       
00154     int32 wordend;         
00155     int32 n_ciphone;       
00157     int32 bestscore;       
00158     int32 bestwordscore;   
00159     int32 thres;           
00160     int32 phone_thres;     
00161     int32 word_thres;      
00163     int32 *wordbestscores; 
00164     int32 *wordbestexits;  
00166 } beam_t;
00167 
00173 typedef struct{
00174     int32 pheurtype;       
00175     int32 pl_beam;         
00177     int32 n_ciphone;       
00178     int32 *phn_heur_list;  
00182 }pl_t;
00183 
00190 typedef struct {
00191     int32 maxwpf;          
00192     int32 maxhistpf;       
00193     int32 maxhmmpf;        
00194     int32 hmm_hist_binsize;
00195     int32 hmm_hist_bins;   
00196     int32 *hmm_hist;       
00198 } histprune_t;
00199 
00205 typedef struct{
00206     int32 ds_ratio;        
00207     int32 cond_ds;         
00212     int32 dist_ds;         
00217     int32 skip_count;      
00219 } downsampling_t;
00220 
00225 typedef struct{
00226     int32 ci_pbeam;        
00228     int32 dyn_ci_pbeam;    
00230     float32 tighten_factor;
00234     int32 *ci_occu;        
00236     int32 *idx;            
00238     int32 max_cd;          
00240 } gmm_select_t;
00241 
00247 typedef struct{
00248     int32 subvqbeam;       
00249     int32 rec_bstcid;      
00250 } gau_select_t;
00251 
00256 typedef struct{
00257     downsampling_t* downs; 
00258     gmm_select_t* gmms;    
00259     gau_select_t* gaus;    
00260     int32 gs4gs;           
00262     int32 svq4svq;         
00263     int32 rec_bst_senscr;  
00264     float32 *last_feat;    
00266 } fast_gmm_t;
00267 
00277 beam_t *beam_init (
00278     float64 hmm,  
00279     float64 ptr,  
00280     float64 wd,  
00281     float64 wdend,  
00282     int32 ptranskip, 
00283     int32 n_ciphone,  
00284     logmath_t *logmath
00285     );
00286 
00287 
00289 void beam_report(beam_t* b 
00290     );
00291 
00293 void beam_free(beam_t *b 
00294     );
00295 
00300 histprune_t *histprune_init (int32 maxhmm,         
00301                              int32 maxhist,          
00302                              int32 maxword,        
00303                              int32 hmmhistbinsize, 
00304                              int32 numNodes        
00305     );
00306 
00307 void histprune_zero_histbin(histprune_t *h         
00308     );
00309   
00310 void histprune_update_histbinsize(histprune_t *h, 
00311                                   int32 hmmhistbinsize,  
00312                                   int32 numNodes        
00313     );
00314 
00316 void histprune_report(histprune_t* h 
00317     );
00318 
00320 void histprune_showhistbin(histprune_t *hp, 
00321                            int32 nfr, 
00322                            char* uttid 
00323     );
00324 
00326 void histprune_free(histprune_t *h 
00327     );
00331 S3DECODER_EXPORT
00332 fast_gmm_t *fast_gmm_init (int32 down_sampling_ratio, 
00333                            int32 mode_cond_ds,  
00334                            int32 mode_dist_ds, 
00335                            int32 isGS4GS, 
00336                            int32 isSVQ4SVQ, 
00337                            float64 subvqbeam, 
00338                            float64 cibeam,  
00339                            float32 tighten_factor, 
00340                            int32 max_cd,    
00341                            int32 n_ci_sen, 
00342                            logmath_t *logmath
00343     );
00344 
00346 void fast_gmm_report(fast_gmm_t* f  
00347     );
00348 
00350 S3DECODER_EXPORT
00351 void fast_gmm_free (fast_gmm_t *fg 
00352     );
00353   
00354 
00359 pl_t* pl_init(int32 pheurtype, 
00360               int32 pl_beam,  
00361               int32 n_ciphone, 
00362               logmath_t *logmath
00363     );
00364 
00366 void pl_report(pl_t* pl  
00367     );
00368 
00370 void pl_free(pl_t* pl 
00371     );
00372 
00377 void pl_computePhnHeur(mdef_t* md, 
00378                        ascr_t *a,  
00379                        pl_t* pl,   
00380                        int32 heutype, 
00381                        int32 win_strt, 
00382                        int32 win_efv   
00383     );
00384 
00385 #if 0
00386 { /* Stop indent from complaining */
00387 #endif
00388 #ifdef __cplusplus
00389 }
00390 #endif
00391 
00392 
00393 #endif

Generated on 7 Mar 2010 by  doxygen 1.6.1