fsg_history.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 /*
00035  * fsg_history.h -- FSG Viterbi decode history
00036  * 
00037  * **********************************************
00038  * CMU ARPA Speech Project
00039  *
00040  * Copyright (c) 1999 Carnegie Mellon University.
00041  * ALL RIGHTS RESERVED.
00042  * **********************************************
00043  * 
00044  * HISTORY
00045  * 
00046  * $Log$
00047  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00048  * A Great Reorganzation of header files and executables
00049  * 
00050  * Revision 1.2  2006/02/23 05:10:18  arthchan2003
00051  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
00052  *
00053  * Revision 1.1.2.5  2005/07/24 19:34:46  arthchan2003
00054  * Removed search_hyp_t, used srch_hyp_t instead
00055  *
00056  * Revision 1.1.2.4  2005/07/24 01:34:54  arthchan2003
00057  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
00058  *
00059  * Revision 1.1.2.3  2005/07/13 18:39:47  arthchan2003
00060  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro.  There are 8 minor HACKs where functions need to be removed temporarily.  Also, there are three major hacks. 1,  there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables.  This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement.  But I haven't figure it out yet.
00061  *
00062  * Revision 1.1.2.2  2005/06/28 07:01:20  arthchan2003
00063  * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed.  The number of empty functions in fsg_search is now decreased from 35 to 30.
00064  *
00065  * Revision 1.1.2.1  2005/06/27 05:26:29  arthchan2003
00066  * Sphinx 2 fsg mainpulation routines.  Compiled with faked functions.  Currently fended off from users.
00067  *
00068  * Revision 1.1  2004/07/16 00:57:12  egouvea
00069  * Added Ravi's implementation of FSG support.
00070  *
00071  * Revision 1.7  2004/07/07 22:30:35  rkm
00072  * *** empty log message ***
00073  *
00074  * Revision 1.6  2004/07/07 13:56:33  rkm
00075  * Added reporting of (acoustic score - best senone score)/frame
00076  *
00077  * Revision 1.5  2004/06/25 14:49:08  rkm
00078  * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state
00079  *
00080  * Revision 1.4  2004/06/23 20:32:16  rkm
00081  * *** empty log message ***
00082  *
00083  * Revision 1.3  2004/05/27 15:16:08  rkm
00084  * *** empty log message ***
00085  *
00086  * 
00087  * 25-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00088  *              Started, based on S3.3 version.
00089  */
00090 
00091 
00092 #ifndef __S2_FSG_HISTORY_H__
00093 #define __S2_FSG_HISTORY_H__
00094 
00095 
00096 #include <stdio.h>
00097 
00098 #include <glist.h>
00099 #include <s3types.h>
00100 #include <blkarray_list.h>
00101 #include <fsg_psubtree.h>
00102 #include <word_fsg.h>
00103 #include <search.h>
00104 #include <dict.h>
00105 
00106 
00107 #ifdef __cplusplus
00108 extern "C" {
00109 #endif
00110 #if 0
00111 /* Fool Emacs. */
00112 }
00113 #endif
00114 
00115 /*
00116  * The Viterbi history structure.  This is a tree, with the root at the
00117  * FSG start state, at frame 0, with a null predecessor.
00118  */
00119 
00120 /*
00121  * A single Viterbi history entry
00122  */
00123 typedef struct fsg_hist_entry_s {
00124     word_fsglink_t *fsglink;    /* Link taken result in this entry */
00125     int32 frame;                        /* Ending frame for this entry */
00126     int32 score;                        /* Total path score at the end of this
00127                                            transition */
00128     int32 pred;                         /* Predecessor entry; -1 if none */
00129     int32 lc;                   /* Left context provided by this entry to
00130                                    succeeding words */
00131     fsg_pnode_ctxt_t rc;                /* Possible right contexts to which this entry
00132                                            applies */
00133 } fsg_hist_entry_t;
00134 
00135 /* Access macros */
00136 #define fsg_hist_entry_fsglink(v)       ((v)->fsglink)
00137 #define fsg_hist_entry_frame(v)         ((v)->frame)
00138 #define fsg_hist_entry_score(v)         ((v)->score)
00139 #define fsg_hist_entry_pred(v)          ((v)->pred)
00140 #define fsg_hist_entry_lc(v)            ((v)->lc)
00141 #define fsg_hist_entry_rc(v)            ((v)->rc)
00142 
00143 
00144 /*
00145  * The entire tree of history entries (fsg_history_t.entries).
00146  * Optimization: In a given frame, there may be several history entries, with
00147  * the same left and right phonetic context, terminating in a particular state.
00148  * Only the best scoring one of these needs to be saved, since everything else
00149  * will be pruned according to the Viterbi algorithm.  frame_entries is used
00150  * temporarily in each frame to determine these best scoring entries in that
00151  * frame.  Only the ones not pruned are transferred to entries at the end of
00152  * the frame.  However, null transitions are a problem since they create
00153  * entries that depend on entries created in the CURRENT frame.  Hence, this
00154  * pruning is done in two stages: first for the non-null transitions, and then
00155  * for the null transitions alone.  (This solution is sub-optimal, and can be
00156  * improved with a little more work.  SMOP.)
00157  * Why is frame_entries a list?  Each entry has a unique terminating state,
00158  * and has a unique lc CIphone.  But it has a SET of rc CIphones.
00159  * frame_entries[s][lc] is an ordered list of entries created in the current
00160  * frame, terminating in state s, and with left context lc.  The list is in
00161  * descending order of path score.  When a new entry with (s,lc) arrives,
00162  * its position in the list is determined.  Then its rc set is modified by
00163  * subtracting the union of the rc's of all its predecessors (i.e., better
00164  * scoring entries).  If the resulting rc set is empty, the entry is discarded.
00165  * Otherwise, it is inserted, and the rc sets of all downstream entries in the
00166  * list are updated by subtracting the new entry's rc.  If any of them becomes
00167  * empty, it is also discarded.
00168  * As mentioned earlier, this procedure is applied in two stages, for the
00169  * non-null transitions, and the null transitions, separately.
00170  */
00171 typedef struct fsg_history_s {
00172     word_fsg_t *fsg;            /* The FSG for which this object applies */
00173     blkarray_list_t *entries;   /* A list of history table entries; the root
00174                                    entry is the first element of the list */
00175     glist_t **frame_entries;
00176 
00177     /*Added by Arthur at 20050627*/
00178     int32 n_ciphone;
00179 } fsg_history_t;
00180 
00181 
00182 /*
00183  * One-time intialization: Allocate and return an initially empty history
00184  * module.
00185  */
00186 fsg_history_t *fsg_history_init (word_fsg_t *fsg,int32 n_ciphone);
00187 
00188 void fsg_history_free (fsg_history_t *);
00189 
00190 void fsg_history_utt_start (fsg_history_t *);
00191 
00192 
00193 void fsg_history_utt_end (fsg_history_t *);
00194 
00195 
00196 /*
00197  * Create a history entry recording the completion of the given FSG
00198  * transition, at the end of the given frame, with the given score, and
00199  * the given predecessor history entry.
00200  * The entry is initially temporary, and may be superseded by another
00201  * with a higher score.  The surviving entries must be transferred to
00202  * the main history table, via fsg_history_end_frame().
00203  */
00204 void fsg_history_entry_add (fsg_history_t *,
00205                             word_fsglink_t *,   /* FSG transition */
00206                             int32 frame,
00207                             int32 score,
00208                             int32 pred,
00209                             int32 lc,
00210                             fsg_pnode_ctxt_t rc);
00211 
00212 /*
00213  * Transfer the surviving history entries for this frame into the permanent
00214  * history table.  This function can be called several times during a frame.
00215  * Each time, the entries surviving so far are transferred, and the temporary
00216  * lists cleared.  This feature is used to handle the entries due to non-null
00217  * transitions and null transitions separately.
00218  */
00219 void fsg_history_end_frame (fsg_history_t *);
00220 
00221 
00222 /* Clear the hitory table */
00223 void fsg_history_reset (fsg_history_t *);
00224 
00225 
00226 /* Return the number of valid entries in the given history table */
00227 int32 fsg_history_n_entries (fsg_history_t *h);
00228 
00229 
00230 /*
00231  * Viterbi backtrace.
00232  */
00233 glist_t fsg_history_backtrace (fsg_history_t *);
00234 
00235 
00236 /*
00237  * Dump the Viterbi history data to the given file (mainly for debugging).
00238  */
00239 void fsg_history_dump (fsg_history_t *vh, char const *uttid, FILE *fp, dict_t *dict);
00240 
00241 
00242 /*
00243  * Return a ptr to the history entry for the given ID; NULL if there is no
00244  * such entry.
00245  */
00246 fsg_hist_entry_t *fsg_history_entry_get(fsg_history_t *, int32 id);
00247 
00248 
00249 /*
00250  * Switch the FSG associated with the given history module.  Should be done
00251  * when the history list is empty.  If not empty, the list is cleared.
00252  */
00253 void fsg_history_set_fsg (fsg_history_t *, word_fsg_t *);
00254 
00255 
00256 /* Free the given Viterbi search history object */
00257 void fsg_history_free (fsg_history_t *h);
00258 
00259 
00260 /*
00261  * Extract and fill out a srch_hyp_t structure from the given history entry
00262  * (index).  Caller must allocate hyp.
00263  * Note: Must not be called with index <= 0.
00264  * Note: (hyp->wid < 0) iff the entry corresponds to a null transition.
00265  * Note: hyp->{conf,latden,phone_perp} have dummy values filled in.
00266  * Note: hyp->next is unaffected.
00267  * Return value: -1 if index <= 0 (error; hyp is not filled in);
00268  * the return value is +ve if it is a valid, real (non-dummy) entry.
00269  */
00270 int32 fsg_history_entry_hyp_extract (fsg_history_t *h, int32 index,
00271                                      srch_hyp_t *hyp, dict_t *dict);
00272 
00273 #ifdef __cplusplus
00274 }
00275 #endif
00276 
00277 
00278 #endif

Generated on 7 Mar 2010 by  doxygen 1.6.1