00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 /* srch.h 00039 * HISTORY 00040 * $Log$ 00041 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00042 * A Great Reorganzation of header files and executables 00043 * 00044 * Revision 1.2 2006/02/23 15:26:10 arthchan2003 00045 * Merged from SPHINX3_5_2_RCI_IRII: 00046 * 00047 * Summary of changes. Detail could be seen in the comments from the 00048 * branches. 00049 * 00050 * After 6 months, we have two more searches using interface 00051 * provided by srch.c. That included an adapted version of Sphinx 2's FSG 00052 * search. Also, the original version of flat-lexicon decoding search. 00053 * 00054 * Second stage search operation is still not properly put in the srch_t 00055 * structure. We should create function hooks that allow developer to 00056 * put the code more properly than now. 00057 * 00058 * The interface of srch.c is still not very completed. Things we should 00059 * support include switching of AM and MLLR. They are currently 00060 * commented. 00061 * 00062 * Mode 5, the word-dependent tree copies are now fended off from the 00063 * users. 00064 * 00065 * Mode 2, the FSG search are opened. It is not very well tested so the 00066 * user will be warned about its nature. 00067 * 00068 * Revision 1.1.4.15 2006/01/16 20:01:20 arthchan2003 00069 * Added Commented code in srch.[ch] for second-stage rescoring. Not used for now. 00070 * 00071 * Revision 1.1.4.14 2005/11/17 06:36:36 arthchan2003 00072 * There are several important changes. 1, acoustic score scale has changed back to put it the search structure. This fixed a bug introduced pre-2005 code branching where only the scaling factor of the last frame. 2, Added a fmt argument of matchseg_write , implemented segmentation output for s2 and ctm file format. matchseg_write also now shared across the flat and tree decoder now. 3, Added Rong's read_seg_hyp_line. 00073 * 00074 * Revision 1.1.4.13 2005/09/25 19:23:55 arthchan2003 00075 * 1, Added arguments for turning on/off LTS rules. 2, Added arguments for turning on/off composite triphones. 3, Moved dict2pid deallocation back to dict2pid. 4, Tidying up the clean up code. 00076 * 00077 * Revision 1.1.4.12 2005/09/18 01:44:12 arthchan2003 00078 * Very boldly, started to support flat lexicon decoding (mode 3) in srch.c. Add log_hypseg. Mode 3 is implemented as srch-one-frame implementation. Scaling doesn't work at this point. 00079 * 00080 * Revision 1.1.4.11 2005/09/11 23:07:28 arthchan2003 00081 * srch.c now support lattice rescoring by rereading the generated lattice in a file. When it is operated, silence cannot be unlinked from the dictionary. This is a hack and its reflected in the code of dag, kbcore and srch. code 00082 * 00083 * Revision 1.1.4.10 2005/08/02 21:37:28 arthchan2003 00084 * 1, Used s3_cd_gmm_compute_sen instead of approx_cd_gmm_compute_sen in mode 2, 4 and 5. This will suppose to make s3.0 to be able to read SCHMM and use them as well. 2, Change srch_gmm_compute_lv2 to accept a two-dimensional array (no_stream*no_coeff) instead of a one dimensional array (no_coeff). 00085 * 00086 * Revision 1.1.4.9 2005/07/24 19:35:59 arthchan2003 00087 * Added GAUDEN_EVAL_WINDOW in srch.h. Assuming this is property of a search. 00088 * 00089 * Revision 1.1.4.8 2005/07/24 01:39:26 arthchan2003 00090 * Added srch_on_srch_frame_lv[12] in the search abstraction routine. This will allow implementation just provide the search for one frame without supplying all function pointer in the standard abstraction. 00091 * 00092 * Revision 1.1.4.7 2005/07/22 03:41:05 arthchan2003 00093 * 1, (Incomplete) Add function pointers for flat foward search. Notice implementation is not yet filled in. 2, adding log_hypstr and log_hyp_detailed. It is sphinx 3.0 version of matchwrite. Add it to possible code merge. 00094 * 00095 * Revision 1.1.4.6 2005/07/17 05:54:55 arthchan2003 00096 * replace vithist_dag_write_header with dag_write_header 00097 * 00098 * Revision 1.1.4.5 2005/07/13 18:46:39 arthchan2003 00099 * Re-included srch_fsg.h 00100 * 00101 * Revision 1.1.4.4 2005/07/07 02:37:39 arthchan2003 00102 * 1, Changed names of srchmode* functions to srch_mode*, 2, complete srch_mode_index_to_str, 3, Remove srch_rescoring and ask implementation to call these "rescoring functions" themselves. The reason is rescoring is not as universal as I would think in the general search. I think search implementer should be the one who decide whether rescoring is one part of their search algorithms 00103 * 00104 * Revision 1.1.4.3 2005/07/04 07:18:49 arthchan2003 00105 * Disabled support of FSG. Added comments for srch_utt_begin and srch_utt_end. 00106 * 00107 * Revision 1.1.4.2 2005/07/03 23:04:55 arthchan2003 00108 * 1, Added srchmode_str_to_index, 2, called the deallocation routine of the search implementation layer in srch_uninit 00109 * 00110 * Revision 1.1.4.1 2005/06/28 07:03:01 arthchan2003 00111 * Added read_fsg operation as one method. Currently, it is still not clear how it should iteract with lm 00112 * 00113 * Revision 1.1 2005/06/22 02:24:42 arthchan2003 00114 * Log. A search interface implementation are checked in. I will call 00115 * srch_t to be search abstraction or search mechanism from now on. The 00116 * major reason of separating with the search implementation routine 00117 * (srch_*.[ch]) is that search is something that people could come up 00118 * with thousands of ways to implement. 00119 * 00120 * Such a design shows a certain sense of defiance of conventional ways 00121 * of designing speech recognition. Namely, **always** using generic 00122 * graph as the grandfather ancester of every search lattice. This could 00123 * 1) break a lot of legacy optimization code. 2) could be slow depends 00124 * on the implementation. 00125 * 00126 * The current design only specify the operations that are supposed to be 00127 * generic in every search (or atomic search operations (ASOs)). 00128 * Ideally, users only need to implement the interface to make the code 00129 * work for another search. 00130 * 00131 * From this point of view, the current check-in still have some 00132 * fundamental flaws. For example, the communication mechanism between 00133 * different atomic search operations are not clearly defined. Scores are 00134 * now computed and put into structures of ascr. (ascr has no clear 00135 * interface to outside world). This is something we need to improve. 00136 * 00137 * Revision 1.18 2005/06/16 04:59:10 archan 00138 * Sphinx3 to s3.generic, a gentle-refactored version of Dave's change in senone scale. 00139 * 00140 * Revision 1.17 2005/06/10 03:40:57 archan 00141 * 1, Fixed doxygen documentation of srch.h, 2, eliminate srch.h C-style functions. 3, Start to fend off the users for using mode 5. We are ready to merge the code. 00142 * 00143 * Revision 1.16 2005/06/10 03:01:50 archan 00144 * Fixed file_open. 00145 * 00146 * Revision 1.15 2005/06/09 21:03:33 archan 00147 * Update srch.h and srch_debug.c such that include files doesn't depend on explicitly specified directory name. Rather it would be taken care by -I option in Makefile.am 00148 * 00149 * Revision 1.14 2005/05/11 06:10:38 archan 00150 * Code for lattice and back track pointer table dumping is now wrapped in reg_result_dump. The function is shared across mode 4 and mode 5. Possibly later for mode 3 and mode 6 as well. 00151 * 00152 * Revision 1.13 2005/05/11 00:18:45 archan 00153 * Add comments on srch.h and srch_time_switch_tree.h and srch_debug.h on how things work. A very detail comment is added in srch.h to describe how generally srch_t is interacting with other parts of the code. 00154 * 00155 * Revision 1.12 2005/05/04 05:15:25 archan 00156 * reverted the last change, seems to be not working because of compilation issue. Try not to deal with it now. 00157 * 00158 * Revision 1.1 2005/05/04 04:46:04 archan 00159 * Move srch.c and srch.h to search. More and more this type of refactoring will be done in future 00160 * 00161 * Revision 1.10 2005/05/03 04:09:09 archan 00162 * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. 00163 * 00164 * Revision 1.9 2005/04/25 19:22:47 archan 00165 * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need. 00166 * 00167 * Revision 1.8 2005/04/22 04:22:36 archan 00168 * Add gmm_wrap, this will share code across op_mode 4 and op_mode 5. Also it also separate active senone selection into a different process. I hope this is the final step before making the WST search works. At the current stage, the code of mode-5 looks very much alike mode-4. This is intended because in Prototype 4, tail sharing will be used to reduce memory. 00169 * 00170 * Revision 1.7 2005/04/21 23:50:26 archan 00171 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00172 * 00173 * Revision 1.6 2005/04/20 03:42:55 archan 00174 * srch.c now is the only of the master search driver. When there is any change in the **interaction** of different blocks, srch.c should be changed first. Then the search implenetation, such as srch_time_switch_tree.c 00175 * 00176 * Revision 1.5 2005/03/30 01:22:47 archan 00177 * Fixed mistakes in last updates. Add 00178 * 00179 * 00180 * 17-Mar-2005 A. Chan (archan@cs.cmu.edu) at Carnegie Mellon University 00181 * 1 Started. This replaced utt.c starting from Sphinx 3.6. 00182 */ 00183 00184 #include <stdio.h> 00185 00186 #include <s3types.h> 00187 #include <glist.h> 00188 #include "dag.h" 00189 #include "lm.h" 00190 #include "ascr.h" 00191 #include "adaptor.h" 00192 #include "stat.h" 00193 #include "fast_algo_struct.h" 00194 #include "kbcore.h" 00195 #include "kb.h" 00196 00197 00198 /* Mode 1 */ 00199 #include "srch_allphone.h" 00200 00201 /* Mode 2 */ 00202 #include "srch_fsg.h" 00203 00204 /* Mode 3 */ 00205 #include "srch_flat_fwd.h" 00206 00207 /* Mode 4 */ 00208 #include "srch_time_switch_tree.h" 00209 00210 /* Mode 5 */ 00211 #include "srch_word_switch_tree.h" 00212 00213 /* Mode 1368*/ 00214 #include "srch_do_nothing.h" 00215 00216 /* Mode 1369*/ 00217 #include "srch_debug.h" 00218 00219 00220 #include "srch_output.h" 00221 00222 #ifndef _SRCH_H_ 00223 #define _SRCH_H_ 00224 00225 00226 #ifdef __cplusplus 00227 extern "C" { 00228 #endif 00229 #if 0 00230 /* Fool Emacs. */ 00231 } 00232 #endif 00233 00234 #define SRCH_SUCCESS 0 00235 #define SRCH_FAILURE 1 00236 00250 #define OPERATION_ALIGN 0 00256 #define OPERATION_ALLPHONE 1 00258 #define OPERATION_GRAPH 2 00262 #define OPERATION_FLATFWD 3 00269 #define OPERATION_TST_DECODE 4 00276 #define OPERATION_WST_DECODE 5 00283 #define OPERATION_EVANDRO_MODE 6 00290 #define OPERATION_DAVID_MODE 7 00302 #define OPERATION_ARTHUR_MODE 8 00310 #define OPERATION_YITAO_MODE 9 00315 #define OPERATION_RAVI_MODE 10 00326 #define OPERATION_STEVE_MODE 88 00337 #define OPERATION_DO_NOTHING 1368 00343 #define OPERATION_DEBUG 1369 00350 #define GRAPH_STRUCT_FLAT 0 00351 #define GRAPH_STRUCT_TST 1 00352 #define GRAPH_STRUCT_WST 2 00353 #define GRAPH_STRUCT_GENGRAPH 3 00354 #define GRAPH_STRUCT_PHMM 4 00355 00356 #define GMM_STRUCT_CDHMM 0 00357 #define GMM_STRUCT_SCHMM 1 00358 00359 00360 #define GAUDEN_EVAL_WINDOW 8 /*Moving window length when frames are 00361 considered as blocks, currently used in 00362 3.0 family of tools. */ 00363 00364 #define DFLT_UTT_SIZE 5000 00365 #define DFLT_NUM_SEGS 200 00369 /* \struct grp_str_t 00370 */ 00371 typedef struct { 00372 void *graph_struct; 00373 int32 graph_type; 00374 }grp_str_t; 00375 00376 00377 00529 typedef struct srch_funcs_s { 00530 /* 00531 Function pointers that perform the operations. Every mode will 00532 set these pointers at the beginning of the search. 00533 */ 00534 00536 int (*init)(kb_t *kb, 00537 void* srch_struct 00538 ); 00539 00541 int (*uninit)( 00542 void* srch_struct 00543 ); 00545 int (*utt_begin)( 00546 void* srch_struct 00547 ); 00548 00550 int (*utt_end)( 00551 void* srch_struct 00552 ); 00554 int (*decode)( 00555 void* srch_struct 00556 ); 00557 00559 int (*set_lm)( 00560 void* srch_struct, 00561 const char *lmname 00562 ); 00563 00565 int (*add_lm)(void* srch_struct, 00566 lm_t* lm, 00567 const char *lmname 00568 ); 00569 00571 int (*delete_lm)(void* srch_struct, 00572 const char *lmname 00573 ); 00574 00576 #if 0 00577 word_fsg_t* (*read_fsgfile)(void* srch_struct, 00578 const char* fsgname 00580 ); 00581 #endif 00582 /* The 4 operations that require switching during the approximate search process */ 00586 int (*gmm_compute_lv1)(void* srch_struct, 00587 float32 *feat, 00588 int32 frmno_lp1, 00589 int32 frmno_lp2 00590 ); 00591 00592 00593 /* The level 1 search functions are not yet fully used. Not all of them are defined nowWhen fast 00594 match is needed. We will need them more. 00595 */ 00596 int (*one_srch_frame_lv1)(void* srch_struct 00597 ); 00598 00599 int (*hmm_compute_lv1)(void* srch_struct); 00600 int (*eval_beams_lv1)(void* srch_struct); 00601 int (*propagate_graph_ph_lv1)(void* srch_struct); 00602 int (*propagate_graph_wd_lv1)(void* srch_struct); 00603 00604 /* The 4 operations that require switching during the detail search process */ 00609 int (*gmm_compute_lv2)(void* srch_struct, 00610 float32 **feat, 00611 int32 time 00612 ); 00613 00614 00619 int (*one_srch_frame_lv2)(void* srch_struct 00620 ); 00621 00622 00624 int (*hmm_compute_lv2)(void* srch_struct, 00625 int32 frmno 00626 ); 00627 00629 int (*eval_beams_lv2)(void* srch_struct 00630 ); 00631 00633 int (*propagate_graph_ph_lv2)(void* srch_struct, 00634 int32 frmno 00635 ); 00636 00638 int (*propagate_graph_wd_lv2)(void* srch_struct, 00639 int32 frmno 00640 ); 00641 00643 int (*rescoring) (void* srch_struct, 00644 int32 frmno 00645 ); 00646 00647 int (*frame_windup) (void * srch_struct, int32 frmno); 00648 int (*compute_heuristic) (void * srch_struct, int32 win_efv); 00649 int (*shift_one_cache_frame) (void *srch_struct,int32 win_efv); 00650 int (*select_active_gmm) (void *srch_struct); 00651 00652 00660 glist_t (*gen_hyp) (void * srch_struct 00661 ); 00662 00669 dag_t* (*gen_dag) (void* srch_struct, 00670 glist_t hyp 00671 ); 00672 00676 int (*dump_vithist)(void * srch_struct 00677 ); 00678 00682 glist_t (*bestpath_impl)(void *srch_struct, 00683 dag_t *dag 00684 ); 00685 00689 int (*dag_dump) (void * srch_struct, 00690 dag_t *dag 00691 ); 00692 00696 glist_t (*nbest_impl)(void *srch_struct, 00697 dag_t *dag 00698 ); 00699 00701 void *nothing; 00702 } srch_funcs_t; 00703 00704 typedef struct srch_s { 00708 srch_funcs_t *funcs; 00709 00710 grp_str_t* grh; 00711 int op_mode; 00712 stat_t *stat; 00713 char *uttid; 00714 char *uttfile; 00716 /* 00717 These variables control the logistic of a search operation. The 00718 are global to all different search modes. 00719 */ 00720 int32 cache_win; 00721 int32 cache_win_strt; 00723 int32 senscale; 00725 int32 *ascale; 00728 int32 ascale_sz; 00729 int32 num_frm; 00731 int32 *segsz; 00732 int32 segsz_sz; 00734 int32 num_segs; 00740 /* 00741 Auxillary Structures for the search. 00742 */ 00743 int32 exit_id; 00745 dag_t *dag; 00747 /* ARCHAN: Various pruning beams, put them together such that it looks more logical. */ 00748 ascr_t *ascr; 00749 beam_t *beam; 00750 fast_gmm_t *fastgmm; 00751 pl_t *pl; 00752 adapt_am_t * adapt_am; 00753 kbcore_t *kbc; 00756 FILE *matchfp; 00757 FILE *matchsegfp; 00759 FILE *hmmdumpfp; 00761 /* FIXME, duplicated with fwd_dbg_t */ 00762 int32 hmm_dump_sf; 00763 int32 hmm_dump_ef; 00764 }srch_t; 00765 00785 int32 srch_mode_str_to_index(const char* mode_str); 00786 00792 char* srch_mode_index_to_str(int32 index); 00793 00794 00795 /* The following are C-style method for srch structure. In theory, 00796 users could used both C-style and function pointer style to access 00797 functionalities of the code. However, we recommend developers to use 00798 the C-style functions because 1) it won't scare people that match, 2) 00799 it is more consistent with other modules in sphinx 3. 00800 */ 00801 00819 srch_t* srch_init(kb_t *kb, 00820 int32 op_mode 00821 ); 00822 00828 void srch_report(srch_t* srch 00829 ); 00830 00838 int32 srch_utt_begin(srch_t* srch 00839 ); 00840 00844 S3DECODER_EXPORT 00845 int32 srch_utt_decode_blk(srch_t* srch, 00846 float ***block_feat, 00847 int32 block_nfeatvec, 00848 int32 *curfrm 00849 ); 00850 00854 int32 srch_utt_end(srch_t* srch 00855 ); 00856 00858 int32 srch_uninit(srch_t* srch 00859 ); 00860 00864 glist_t srch_get_hyp(srch_t *srch 00865 ); 00866 00872 dag_t *srch_get_dag(srch_t *srch); 00873 00874 00876 void reg_result_dump (srch_t* s, 00877 int32 id 00878 ); 00882 void write_bstsenscr(FILE *fp, 00883 int32 numframe, 00884 int32* scale 00885 ); 00886 00887 00889 S3DECODER_EXPORT 00890 int32 srch_set_lm(srch_t* srch, 00891 const char *lmname 00892 ); 00893 00895 int32 srch_delete_lm(srch_t* srch, 00896 const char *lmname 00897 ); 00898 00899 #if 0 /*Tentative: but not yet implemented */ 00900 int32 srch_set_am(void); 00901 00903 int32 srch_add_am(void); 00904 00906 int32 srch_delete_am(void); 00907 00909 int32 srch_add_lm(void); 00910 00911 00913 int32 srch_set_mllr(void); 00914 00916 int32 srch_add_mllr(void); 00917 00919 int32 srch_delete_mllr(void); 00920 00922 int32 srch_set_lamdafn(void); 00923 00925 int32 srch_add_lamdafn(void); 00926 00928 int32 srch_delete_lamdafn(void); 00929 00931 int32 srch_add_words_to_dict(void); 00932 00933 #endif /* End not implemented */ 00934 00935 #ifdef __cplusplus 00936 } 00937 #endif 00938 00939 00940 #endif /*_SRCH_H_ */