fsg_search.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 /*
00035  * fsg_search.h -- Search structures for FSM decoding.
00036  * 
00037  * **********************************************
00038  * CMU ARPA Speech Project
00039  *
00040  * Copyright (c) 2004 Carnegie Mellon University.
00041  * ALL RIGHTS RESERVED.
00042  * **********************************************
00043  * 
00044  * HISTORY
00045  * 
00046  * $Log: fsg_search.h,v $
00047  * Revision 1.2  2006/02/23 05:12:43  arthchan2003
00048  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
00049  *
00050  * Revision 1.1.2.7  2006/01/16 18:20:46  arthchan2003
00051  * Remove junks in the code, change the reporting from printf to log_hypstr.
00052  *
00053  * Revision 1.1.2.6  2005/07/24 19:34:46  arthchan2003
00054  * Removed search_hyp_t, used srch_hyp_t instead
00055  *
00056  * Revision 1.1.2.5  2005/07/24 01:34:54  arthchan2003
00057  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
00058  *
00059  * Revision 1.1.2.4  2005/07/17 05:44:32  arthchan2003
00060  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
00061  *
00062  * Revision 1.1.2.3  2005/07/13 18:39:48  arthchan2003
00063  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro.  There are 8 minor HACKs where functions need to be removed temporarily.  Also, there are three major hacks. 1,  there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables.  This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement.  But I haven't figure it out yet.
00064  *
00065  * Revision 1.1.2.2  2005/06/28 07:01:20  arthchan2003
00066  * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed.  The number of empty functions in fsg_search is now decreased from 35 to 30.
00067  *
00068  * Revision 1.1.2.1  2005/06/27 05:26:29  arthchan2003
00069  * Sphinx 2 fsg mainpulation routines.  Compiled with faked functions.  Currently fended off from users.
00070  *
00071  * Revision 1.2  2004/07/23 23:36:34  egouvea
00072  * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently
00073  *
00074  * Revision 1.6  2004/07/20 13:40:55  rkm
00075  * Added FSG get/set start/final state functions.
00076  *
00077  * Revision 1.5  2004/07/16 19:55:28  rkm
00078  * Added state information to hypothesis.
00079  *
00080  * Revision 1.1  2004/07/16 00:57:12  egouvea
00081  * Added Ravi's implementation of FSG support.
00082  *
00083  * Revision 1.4  2004/07/07 13:56:33  rkm
00084  * Added reporting of (acoustic score - best senone score)/frame
00085  *
00086  * Revision 1.3  2004/06/22 15:36:12  rkm
00087  * Added partial result handling in FSG mode
00088  *
00089  * Revision 1.2  2004/05/27 14:22:57  rkm
00090  * FSG cross-word triphones completed (but for single-phone words)
00091  *
00092  * Revision 1.1.1.1  2004/03/01 14:30:31  rkm
00093  *
00094  *
00095  * Revision 1.6  2004/02/27 16:15:13  rkm
00096  * Added FSG switching
00097  *
00098  * Revision 1.5  2004/02/27 15:05:21  rkm
00099  * *** empty log message ***
00100  *
00101  * Revision 1.4  2004/02/26 14:48:20  rkm
00102  * *** empty log message ***
00103  *
00104  * Revision 1.3  2004/02/26 01:14:48  rkm
00105  * *** empty log message ***
00106  *
00107  * Revision 1.2  2004/02/24 18:13:05  rkm
00108  * Added NULL transition handling
00109  *
00110  * Revision 1.1  2004/02/23 15:53:45  rkm
00111  * Renamed from fst to fsg
00112  *
00113  * Revision 1.1  2004/02/19 21:16:54  rkm
00114  * Added fsg_search.{c,h}
00115  *
00116  * 
00117  * 18-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00118  *              Started.
00119  */
00120 
00121 
00122 #ifndef __S2_FSG_SEARCH_H__
00123 #define __S2_FSG_SEARCH_H__
00124 
00125 #define HYP_SZ               1024
00126 
00127 
00128 #include <stdio.h>
00129 
00130 #include <cmd_ln.h>
00131 #include <logmath.h>
00132 #include <s3types.h>
00133 #include <glist.h>
00134 #include <word_fsg.h>
00135 #include <fsg_lextree.h>
00136 #include <fsg_history.h>
00137 #include <ascr.h>
00138 #include <search.h>
00139 #include <dict.h>
00140 #include <mdef.h>
00141 #include <tmat.h>
00142 #include <hmm.h>
00143 
00144 
00145 #ifdef __cplusplus
00146 extern "C" {
00147 #endif
00148 #if 0
00149 /* Fool Emacs. */
00150 }
00151 #endif
00152 
00153 typedef struct fsg_search_s {
00154     glist_t fsglist;            /* List of all FSGs loaded */
00155   
00156     word_fsg_t *fsg;            /* Currently active FSG; NULL if none.  One
00157                                    must be made active before starting FSG
00158                                    decoding */
00159     fsg_lextree_t *lextree;     /* Lextree structure for the currently
00160                                    active FSG */
00161     fsg_history_t *history;     /* For storing the Viterbi search history */
00162   
00163     glist_t pnode_active;               /* Those active in this frame */
00164     glist_t pnode_active_next;  /* Those activated for the next frame */
00165   
00166     int32 beam;                 /* Global threshold */
00167     int32 pbeam;                        /* Threshold for phone transition */
00168     int32 wbeam;                        /* Threshold for word exit */
00169   
00170     int32 frame;                        /* Current frame */
00171 
00172     int32 bestscore;            /* For beam pruning */
00173     int32 bpidx_start;          /* First history entry index this frame */
00174   
00175     srch_hyp_t *hyp;            /* Search hypothesis */
00176     int32 ascr, lscr;           /* Total acoustic and lm score for utt */
00177   
00178     int32 n_hmm_eval;           /* Total HMMs evaluated this utt */
00179   
00180     int32 state;                        /* Whether IDLE or BUSY */
00181 
00182     hmm_context_t *hmmctx;
00183 
00184     /*Added by Arthur at 20050627*/
00185     int32 isUsealtpron;
00186     int32 isUseFiller;
00187     int32 isBacktrace;
00188     int32 isReportAltpron;
00189     char* DumpLatdir;
00190     int32 n_ci_phone;
00191   
00192     dict_t *dict;
00193     mdef_t *mdef;
00194     tmat_t *tmat; 
00195     ascr_t *am_score_pool;
00196     char* uttid;  
00197     int32 *senscale; 
00199     FILE* matchfp; 
00200     FILE* matchsegfp; 
00201 
00202     cmd_ln_t *config;
00203     logmath_t *logmath;
00204 } fsg_search_t;
00205 
00206 
00207 /* Access macros */
00208 #define fsg_search_frame(s)     ((s)->frame)
00209 
00210 
00211 /*
00212  * Create, initialize and return a search module for the given FSM.
00213  * If no FSG is given (i.e., the argument is NULL), a search structure is
00214  * still created.  If an FSG is provided, it is made the currently active
00215  * FSG.
00216  */
00217 fsg_search_t *fsg_search_init (word_fsg_t *, void *srch_struct);
00218 
00219 
00223 void fsg_search_free(fsg_search_t *s);
00224 
00225 /*
00226  * Lookup the FSG associated with the given name and return it, or NULL if
00227  * no match found.
00228  */
00229 word_fsg_t *fsg_search_fsgname_to_fsg (fsg_search_t *, char *name);
00230 
00231 
00232 /*
00233  * Add the given FSG to the collection of FSGs known to this search object.
00234  * The given fsg is simply added to the collection.  It is not automatically
00235  * made the currently active one.
00236  * The name of the new FSG must not match any of the existing ones.  If so,
00237  * FALSE is returned.  If successfully added, TRUE is returned.
00238  */
00239 int fsg_search_add_fsg (fsg_search_t *, word_fsg_t *);
00240 
00241 
00242 /*
00243  * Delete the given FSG from the known collection.  Free the FSG itself,
00244  * and if it was the currently active FSG, also free the associated search
00245  * structures and leave the current FSG undefined.
00246  */
00247 int fsg_search_del_fsg (fsg_search_t *, word_fsg_t *);
00248 
00249 
00250 /* Like fsg_search_del_fsg(), but identifies the FSG by its name */
00251 int fsg_search_del_fsg_byname (fsg_search_t *, char *name);
00252 
00253 
00254 /*
00255  * Switch to a new FSG (identified by its string name).  Must not be invoked
00256  * when search is busy (ie, in the midst of an utterance.  That's an error
00257  * and FALSE is returned.  If successful, returns TRUE.
00258  */
00259 int fsg_search_set_current_fsg (fsg_search_t *, char *);
00260 
00261 
00262 /*
00263  * Deallocate search structure.
00264  */
00265 void fsg_search_free (fsg_search_t *);
00266 
00267 
00268 /*
00269  * Prepare the FSG search structure for beginning decoding of the next
00270  * utterance.
00271  */
00272 void fsg_search_utt_start (fsg_search_t *);
00273 
00274 
00275 /*
00276  * Windup and clean the FSG search structure after utterance.  Fill in the
00277  * results of search: fsg_search_t.{hyp,ascr,lscr,frame}.  (But some fields
00278  * of hyp are left unfilled for now: conf, latden, phone_perp.)
00279  */
00280 void fsg_search_utt_end (fsg_search_t *);
00281 
00282 
00283 /*
00284  * Step one frame forward through the Viterbi search.
00285  */
00286 void fsg_search_frame_fwd (fsg_search_t *);
00287 
00288 
00289 /*
00290  * Compute the partial or final Viterbi backtrace result.  (The result can
00291  * be retrieved using the API functions seach_result or search_get_hyp().)
00292  * If "check_fsg_final_state" is TRUE, the backtrace starts from the best
00293  * history entry ending in the final state (if it exists).  Otherwise it
00294  * starts from the best entry, regardless of the terminating state (usually
00295  * used for partial results).
00296  */
00297 void fsg_search_history_backtrace (fsg_search_t *search,
00298                                    int check_fsg_final_state);
00299 
00300 /*
00301  * Return the start (or final) state of the currently active FSG, if any.
00302  * Otherwise return -1.
00303  */
00304 int32 fsg_search_get_start_state (fsg_search_t *);
00305 int32 fsg_search_get_final_state (fsg_search_t *);
00306 
00307 
00308 /*
00309  * Set the start (or final) state of the current active FSG, if any, to the
00310  * given state.  This operation can only be done in between utterances, not
00311  * in the midst of one.  Return the previous start (or final) state if
00312  * successful.  Return -1 if any error.
00313  */
00314 int32 fsg_search_set_start_state (fsg_search_t *, int32 state);
00315 int32 fsg_search_set_final_state (fsg_search_t *, int32 state);
00316 
00317 
00318 void fsg_search_sen_active (fsg_search_t *search);
00319 
00320 #ifdef __cplusplus
00321 }
00322 #endif
00323 
00324 
00325 #endif

Generated on 7 Mar 2010 by  doxygen 1.6.1