srch.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /* srch.h
00039  * HISTORY
00040  * $Log$
00041  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00042  * A Great Reorganzation of header files and executables
00043  * 
00044  * Revision 1.2  2006/02/23 15:26:10  arthchan2003
00045  * Merged from SPHINX3_5_2_RCI_IRII:
00046  *
00047  * Summary of changes. Detail could be seen in the comments from the
00048  * branches.
00049  *
00050  *  After 6 months, we have two more searches using interface
00051  * provided by srch.c. That included an adapted version of Sphinx 2's FSG
00052  * search.  Also, the original version of flat-lexicon decoding search.
00053  *
00054  * Second stage search operation is still not properly put in the srch_t
00055  * structure.  We should create function hooks that allow developer to
00056  * put the code more properly than now.
00057  *
00058  * The interface of srch.c is still not very completed. Things we should
00059  * support include switching of AM and MLLR.  They are currently
00060  * commented.
00061  *
00062  * Mode 5, the word-dependent tree copies are now fended off from the
00063  * users.
00064  *
00065  * Mode 2, the FSG search are opened.  It is not very well tested so the
00066  * user will be warned about its nature.
00067  *
00068  * Revision 1.1.4.15  2006/01/16 20:01:20  arthchan2003
00069  * Added Commented code in srch.[ch] for second-stage rescoring. Not used for now.
00070  *
00071  * Revision 1.1.4.14  2005/11/17 06:36:36  arthchan2003
00072  * There are several important changes. 1, acoustic score scale has changed back to put it the search structure.  This fixed a bug introduced pre-2005 code branching where only the scaling factor of the last frame. 2, Added a fmt argument of matchseg_write , implemented segmentation output for s2 and ctm file format. matchseg_write also now shared across the flat and tree decoder now. 3, Added Rong's read_seg_hyp_line.
00073  *
00074  * Revision 1.1.4.13  2005/09/25 19:23:55  arthchan2003
00075  * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code.
00076  *
00077  * Revision 1.1.4.12  2005/09/18 01:44:12  arthchan2003
00078  * Very boldly, started to support flat lexicon decoding (mode 3) in srch.c.  Add log_hypseg. Mode 3 is implemented as srch-one-frame implementation. Scaling doesn't work at this point.
00079  *
00080  * Revision 1.1.4.11  2005/09/11 23:07:28  arthchan2003
00081  * srch.c now support lattice rescoring by rereading the generated lattice in a file. When it is operated, silence cannot be unlinked from the dictionary.  This is a hack and its reflected in the code of dag, kbcore and srch. code
00082  *
00083  * Revision 1.1.4.10  2005/08/02 21:37:28  arthchan2003
00084  * 1, Used s3_cd_gmm_compute_sen instead of approx_cd_gmm_compute_sen in mode 2, 4 and 5.  This will suppose to make s3.0 to be able to read SCHMM and use them as well. 2, Change srch_gmm_compute_lv2 to accept a two-dimensional array (no_stream*no_coeff) instead of a one dimensional array (no_coeff).
00085  *
00086  * Revision 1.1.4.9  2005/07/24 19:35:59  arthchan2003
00087  * Added GAUDEN_EVAL_WINDOW in srch.h. Assuming this is property of a search.
00088  *
00089  * Revision 1.1.4.8  2005/07/24 01:39:26  arthchan2003
00090  * Added srch_on_srch_frame_lv[12] in the search abstraction routine.  This will allow implementation just provide the search for one frame without supplying all function pointer in the standard abstraction.
00091  *
00092  * Revision 1.1.4.7  2005/07/22 03:41:05  arthchan2003
00093  * 1, (Incomplete) Add function pointers for flat foward search. Notice implementation is not yet filled in. 2, adding log_hypstr and log_hyp_detailed.  It is sphinx 3.0 version of matchwrite.  Add it to possible code merge.
00094  *
00095  * Revision 1.1.4.6  2005/07/17 05:54:55  arthchan2003
00096  * replace vithist_dag_write_header with dag_write_header
00097  *
00098  * Revision 1.1.4.5  2005/07/13 18:46:39  arthchan2003
00099  * Re-included srch_fsg.h
00100  *
00101  * Revision 1.1.4.4  2005/07/07 02:37:39  arthchan2003
00102  * 1, Changed names of srchmode* functions to srch_mode*, 2, complete srch_mode_index_to_str, 3, Remove srch_rescoring and ask implementation to call these "rescoring functions" themselves.  The reason is rescoring is not as universal as I would think in the general search. I think search implementer should be the one who decide whether rescoring is one part of their search algorithms
00103  *
00104  * Revision 1.1.4.3  2005/07/04 07:18:49  arthchan2003
00105  * Disabled support of FSG. Added comments for srch_utt_begin and srch_utt_end.
00106  *
00107  * Revision 1.1.4.2  2005/07/03 23:04:55  arthchan2003
00108  * 1, Added srchmode_str_to_index, 2, called the deallocation routine of the search implementation layer in srch_uninit
00109  *
00110  * Revision 1.1.4.1  2005/06/28 07:03:01  arthchan2003
00111  * Added read_fsg operation as one method. Currently, it is still not clear how it should iteract with lm
00112  *
00113  * Revision 1.1  2005/06/22 02:24:42  arthchan2003
00114  * Log. A search interface implementation are checked in. I will call
00115  * srch_t to be search abstraction or search mechanism from now on.  The
00116  * major reason of separating with the search implementation routine
00117  * (srch_*.[ch]) is that search is something that people could come up
00118  * with thousands of ways to implement.
00119  *
00120  * Such a design shows a certain sense of defiance of conventional ways
00121  * of designing speech recognition. Namely, **always** using generic
00122  * graph as the grandfather ancester of every search lattice.  This could
00123  * 1) break a lot of legacy optimization code. 2) could be slow depends
00124  * on the implementation.
00125  *
00126  * The current design only specify the operations that are supposed to be
00127  * generic in every search (or atomic search operations (ASOs)).
00128  * Ideally, users only need to implement the interface to make the code
00129  * work for another search.
00130  *
00131  * From this point of view, the current check-in still have some
00132  * fundamental flaws.  For example, the communication mechanism between
00133  * different atomic search operations are not clearly defined. Scores are
00134  * now computed and put into structures of ascr. (ascr has no clear
00135  * interface to outside world). This is something we need to improve.
00136  *
00137  * Revision 1.18  2005/06/16 04:59:10  archan
00138  * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale.
00139  *
00140  * Revision 1.17  2005/06/10 03:40:57  archan
00141  * 1, Fixed doxygen documentation of srch.h, 2, eliminate srch.h C-style functions. 3, Start to fend off the users for using mode 5.  We are ready to merge the code.
00142  *
00143  * Revision 1.16  2005/06/10 03:01:50  archan
00144  * Fixed file_open.
00145  *
00146  * Revision 1.15  2005/06/09 21:03:33  archan
00147  * Update srch.h and srch_debug.c such that include files doesn't depend on explicitly specified directory name.  Rather it would be taken care by -I option in Makefile.am
00148  *
00149  * Revision 1.14  2005/05/11 06:10:38  archan
00150  * Code for lattice and back track pointer table dumping is now wrapped in reg_result_dump.  The function is shared across mode 4 and mode 5.  Possibly later for mode 3 and mode 6 as well.
00151  *
00152  * Revision 1.13  2005/05/11 00:18:45  archan
00153  * Add comments on srch.h and srch_time_switch_tree.h and srch_debug.h on how things work. A very detail comment is added in srch.h to describe how generally srch_t is interacting with other parts of the code.
00154  *
00155  * Revision 1.12  2005/05/04 05:15:25  archan
00156  * reverted the last change, seems to be not working because of compilation issue. Try not to deal with it now.
00157  *
00158  * Revision 1.1  2005/05/04 04:46:04  archan
00159  * Move srch.c and srch.h to search. More and more this type of refactoring will be done in future
00160  *
00161  * Revision 1.10  2005/05/03 04:09:09  archan
00162  * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore.  This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame.  The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century.  But well, after all, everything needs a start.  I will then really get the results from the search and see how it looks.
00163  *
00164  * Revision 1.9  2005/04/25 19:22:47  archan
00165  * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need.
00166  *
00167  * Revision 1.8  2005/04/22 04:22:36  archan
00168  * Add gmm_wrap, this will share code across op_mode 4 and op_mode 5. Also it also separate active senone selection into a different process.  I hope this is the final step before making the WST search works.  At the current stage, the code of mode-5 looks very much alike mode-4.  This is intended because in Prototype 4, tail sharing will be used to reduce memory.
00169  *
00170  * Revision 1.7  2005/04/21 23:50:26  archan
00171  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in.  At this moment, everything in search mode 5 is already done.  It is time to test the idea whether the search can really be used.
00172  *
00173  * Revision 1.6  2005/04/20 03:42:55  archan
00174  * srch.c now is the only of the master search driver. When there is any change in the **interaction** of different blocks, srch.c should be changed first.  Then the search implenetation, such as srch_time_switch_tree.c
00175  *
00176  * Revision 1.5  2005/03/30 01:22:47  archan
00177  * Fixed mistakes in last updates. Add
00178  *
00179  * 
00180  * 17-Mar-2005 A. Chan (archan@cs.cmu.edu) at Carnegie Mellon University
00181  * 1            Started. This replaced utt.c starting from Sphinx 3.6. 
00182  */
00183 
00184 #include <stdio.h>
00185 
00186 #include <s3types.h>
00187 #include <glist.h>
00188 #include "dag.h"
00189 #include "lm.h"
00190 #include "ascr.h"
00191 #include "adaptor.h"
00192 #include "stat.h"
00193 #include "fast_algo_struct.h"
00194 #include "kbcore.h"
00195 #include "kb.h"
00196 
00197 
00198 /* Mode 1 */
00199 #include "srch_allphone.h"
00200 
00201 /* Mode 2 */
00202 #include "srch_fsg.h"
00203 
00204 /* Mode 3 */
00205 #include "srch_flat_fwd.h"
00206 
00207 /* Mode 4 */
00208 #include "srch_time_switch_tree.h"
00209 
00210 /* Mode 5 */
00211 #include "srch_word_switch_tree.h"
00212 
00213 /* Mode 1368*/
00214 #include "srch_do_nothing.h"
00215 
00216 /* Mode 1369*/
00217 #include "srch_debug.h"
00218 
00219 
00220 #include "srch_output.h"
00221 
00222 #ifndef _SRCH_H_
00223 #define _SRCH_H_
00224 
00225 
00226 #ifdef __cplusplus
00227 extern "C" {
00228 #endif
00229 #if 0
00230 /* Fool Emacs. */
00231 }
00232 #endif
00233 
00234 #define SRCH_SUCCESS 0
00235 #define SRCH_FAILURE 1
00236 
00250 #define OPERATION_ALIGN         0 
00256 #define OPERATION_ALLPHONE      1 
00258 #define OPERATION_GRAPH         2 
00262 #define OPERATION_FLATFWD       3 
00269 #define OPERATION_TST_DECODE    4 
00276 #define OPERATION_WST_DECODE    5 
00283 #define OPERATION_EVANDRO_MODE  6 
00290 #define OPERATION_DAVID_MODE    7 
00302 #define OPERATION_ARTHUR_MODE   8 
00310 #define OPERATION_YITAO_MODE    9 
00315 #define OPERATION_RAVI_MODE    10 
00326 #define OPERATION_STEVE_MODE   88 
00337 #define OPERATION_DO_NOTHING 1368 
00343 #define OPERATION_DEBUG      1369 
00350 #define GRAPH_STRUCT_FLAT 0
00351 #define GRAPH_STRUCT_TST 1
00352 #define GRAPH_STRUCT_WST 2
00353 #define GRAPH_STRUCT_GENGRAPH 3
00354 #define GRAPH_STRUCT_PHMM 4
00355 
00356 #define GMM_STRUCT_CDHMM 0
00357 #define GMM_STRUCT_SCHMM 1
00358 
00359 
00360 #define GAUDEN_EVAL_WINDOW 8 /*Moving window length when frames are
00361                                considered as blocks, currently used in
00362                                3.0 family of tools. */
00363 
00364 #define DFLT_UTT_SIZE 5000 
00365 #define DFLT_NUM_SEGS 200  
00369 /* \struct grp_str_t 
00370  */
00371 typedef struct {
00372     void *graph_struct; 
00373     int32 graph_type;   
00374 }grp_str_t;
00375 
00376 
00377 
00529 typedef struct srch_funcs_s {
00530     /*
00531       Function pointers that perform the operations.  Every mode will
00532       set these pointers at the beginning of the search.
00533     */
00534 
00536     int (*init)(kb_t *kb, 
00537                 void* srch_struct 
00538         );
00539 
00541     int (*uninit)(
00542         void* srch_struct 
00543         );
00545     int (*utt_begin)(
00546         void* srch_struct 
00547         );
00548 
00550     int (*utt_end)(
00551         void* srch_struct 
00552         );
00554     int (*decode)(
00555         void* srch_struct 
00556         );
00557 
00559     int (*set_lm)(
00560         void* srch_struct, 
00561         const char *lmname 
00562         );
00563 
00565     int (*add_lm)(void* srch_struct, 
00566                   lm_t* lm,          
00567                   const char *lmname 
00568         );
00569 
00571     int (*delete_lm)(void* srch_struct,  
00572                      const char *lmname  
00573         );
00574 
00576 #if 0
00577     word_fsg_t* (*read_fsgfile)(void* srch_struct, 
00578                                 const char* fsgname 
00580         );
00581 #endif
00582     /* The 4 operations that require switching during the approximate search process */
00586     int (*gmm_compute_lv1)(void* srch_struct,  
00587                            float32 *feat,      
00588                            int32 frmno_lp1,    
00589                            int32 frmno_lp2     
00590         );
00591 
00592 
00593     /* The level 1 search functions are not yet fully used. Not all of them are defined nowWhen fast
00594        match is needed. We will need them more. 
00595     */
00596     int (*one_srch_frame_lv1)(void* srch_struct 
00597         );
00598 
00599     int (*hmm_compute_lv1)(void* srch_struct);
00600     int (*eval_beams_lv1)(void* srch_struct);
00601     int (*propagate_graph_ph_lv1)(void* srch_struct);
00602     int (*propagate_graph_wd_lv1)(void* srch_struct);
00603 
00604     /* The 4 operations that require switching during the detail search process */
00609     int (*gmm_compute_lv2)(void* srch_struct,  
00610                            float32 **feat,      
00611                            int32 time          
00612         );
00613 
00614 
00619     int (*one_srch_frame_lv2)(void* srch_struct 
00620         );
00621 
00622 
00624     int (*hmm_compute_lv2)(void* srch_struct,  
00625                            int32 frmno         
00626         );
00627 
00629     int (*eval_beams_lv2)(void* srch_struct     
00630         );
00631 
00633     int (*propagate_graph_ph_lv2)(void* srch_struct, 
00634                                   int32 frmno        
00635         );
00636 
00638     int (*propagate_graph_wd_lv2)(void* srch_struct,  
00639                                   int32 frmno       
00640         );
00641 
00643     int (*rescoring) (void* srch_struct,  
00644                       int32 frmno         
00645         );  
00646 
00647     int (*frame_windup) (void * srch_struct, int32 frmno);
00648     int (*compute_heuristic) (void * srch_struct, int32 win_efv);
00649     int (*shift_one_cache_frame) (void *srch_struct,int32 win_efv);
00650     int (*select_active_gmm) (void *srch_struct);
00651 
00652 
00660     glist_t (*gen_hyp) (void * srch_struct 
00661         );
00662 
00669     dag_t* (*gen_dag) (void* srch_struct, 
00670                        glist_t hyp
00671         );
00672 
00676     int (*dump_vithist)(void * srch_struct 
00677         );
00678 
00682     glist_t (*bestpath_impl)(void *srch_struct, 
00683                              dag_t *dag 
00684         );
00685 
00689     int (*dag_dump) (void * srch_struct,
00690                      dag_t *dag
00691         );
00692 
00696     glist_t (*nbest_impl)(void *srch_struct, 
00697                           dag_t *dag 
00698         );
00699 
00701     void *nothing;
00702 } srch_funcs_t;
00703 
00704 typedef struct srch_s {
00708     srch_funcs_t *funcs;
00709 
00710     grp_str_t* grh;     
00711     int op_mode;        
00712     stat_t *stat;       
00713     char *uttid;        
00714     char *uttfile;      
00716     /*
00717       These variables control the logistic of a search operation.  The
00718       are global to all different search modes. 
00719     */
00720     int32 cache_win;    
00721     int32 cache_win_strt;    
00723     int32 senscale;     
00725     int32 *ascale;   
00728     int32 ascale_sz;       
00729     int32 num_frm;        
00731     int32 *segsz;   
00732     int32 segsz_sz;      
00734     int32 num_segs;     
00740     /* 
00741        Auxillary Structures for the search. 
00742     */
00743     int32 exit_id;              
00745     dag_t *dag;                 
00747     /* ARCHAN: Various pruning beams, put them together such that it looks more logical. */
00748     ascr_t *ascr;                 
00749     beam_t *beam;                 
00750     fast_gmm_t *fastgmm;    
00751     pl_t *pl;              
00752     adapt_am_t * adapt_am; 
00753     kbcore_t *kbc;      
00756     FILE *matchfp;          
00757     FILE *matchsegfp;       
00759     FILE *hmmdumpfp;        
00761     /* FIXME, duplicated with fwd_dbg_t */
00762     int32 hmm_dump_sf;  
00763     int32 hmm_dump_ef;  
00764 }srch_t;
00765 
00785 int32 srch_mode_str_to_index(const char* mode_str);
00786 
00792 char* srch_mode_index_to_str(int32 index);
00793 
00794 
00795 /* The following are C-style method for srch structure.  In theory,
00796    users could used both C-style and function pointer style to access
00797    functionalities of the code. However, we recommend developers to use
00798    the C-style functions because 1) it won't scare people that match, 2)
00799    it is more consistent with other modules in sphinx 3. 
00800 */
00801 
00819 srch_t* srch_init(kb_t *kb, 
00820                   int32 op_mode 
00821     );
00822 
00828 void srch_report(srch_t* srch 
00829     );
00830 
00838 int32 srch_utt_begin(srch_t* srch 
00839     );
00840 
00844 S3DECODER_EXPORT
00845 int32 srch_utt_decode_blk(srch_t* srch, 
00846                           float ***block_feat,  
00847                           int32 block_nfeatvec, 
00848                           int32 *curfrm  
00849     );
00850 
00854 int32 srch_utt_end(srch_t* srch 
00855     );
00856 
00858 int32 srch_uninit(srch_t* srch 
00859     );
00860 
00864 glist_t srch_get_hyp(srch_t *srch 
00865     );
00866 
00872 dag_t *srch_get_dag(srch_t *srch);
00873 
00874 
00876 void reg_result_dump (srch_t* s, 
00877                       int32 id  
00878     );
00882 void write_bstsenscr(FILE *fp, 
00883                      int32 numframe, 
00884                      int32* scale    
00885     );
00886 
00887 
00889 S3DECODER_EXPORT
00890 int32 srch_set_lm(srch_t* srch,  
00891                   const char *lmname 
00892     );
00893 
00895 int32 srch_delete_lm(srch_t* srch,  
00896                   const char *lmname 
00897     );
00898 
00899 #if 0 /*Tentative: but not yet implemented */
00900 int32 srch_set_am(void);
00901 
00903 int32 srch_add_am(void);
00904 
00906 int32 srch_delete_am(void);
00907 
00909 int32 srch_add_lm(void);
00910 
00911 
00913 int32 srch_set_mllr(void);
00914 
00916 int32 srch_add_mllr(void);
00917 
00919 int32 srch_delete_mllr(void);
00920 
00922 int32 srch_set_lamdafn(void);
00923 
00925 int32 srch_add_lamdafn(void);
00926 
00928 int32 srch_delete_lamdafn(void);
00929 
00931 int32 srch_add_words_to_dict(void);
00932 
00933 #endif /* End not implemented */
00934 
00935 #ifdef __cplusplus
00936 }
00937 #endif
00938 
00939 
00940 #endif /*_SRCH_H_ */

Generated on 7 Mar 2010 by  doxygen 1.6.1