fsg_psubtree.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 /*
00035  * fsg_psubtree.h -- Phone-level FSG subtree representing all transitions
00036  * out of a single FSG state. 
00037  * (Note: Currently, it is actually a flat lexicon representation
00038  * 
00039  * **********************************************
00040  * CMU ARPA Speech Project
00041  *
00042  * Copyright (c) 2004 Carnegie Mellon University.
00043  * ALL RIGHTS RESERVED.
00044  * **********************************************
00045  * 
00046  * HISTORY
00047  * 
00048  * $Log$
00049  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00050  * A Great Reorganzation of header files and executables
00051  * 
00052  * Revision 1.2  2006/02/23 05:10:18  arthchan2003
00053  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
00054  *
00055  * Revision 1.1.2.5  2005/07/24 01:34:54  arthchan2003
00056  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
00057  *
00058  * Revision 1.1.2.4  2005/07/20 21:18:30  arthchan2003
00059  * FSG can now be read, srch_fsg_init can now be initialized, psubtree can be built. Sounds like it is time to plug in other function pointers.
00060  *
00061  * Revision 1.1.2.3  2005/07/17 05:44:32  arthchan2003
00062  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
00063  *
00064  * Revision 1.1.2.2  2005/07/13 18:39:47  arthchan2003
00065  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro.  There are 8 minor HACKs where functions need to be removed temporarily.  Also, there are three major hacks. 1,  there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables.  This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement.  But I haven't figure it out yet.
00066  *
00067  * Revision 1.1.2.1  2005/06/27 05:26:29  arthchan2003
00068  * Sphinx 2 fsg mainpulation routines.  Compiled with faked functions.  Currently fended off from users.
00069  *
00070  * Revision 1.1  2004/07/16 00:57:12  egouvea
00071  * Added Ravi's implementation of FSG support.
00072  *
00073  * Revision 1.3  2004/06/25 14:49:08  rkm
00074  * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state
00075  *
00076  * Revision 1.2  2004/05/27 14:22:57  rkm
00077  * FSG cross-word triphones completed (but for single-phone words)
00078  *
00079  * Revision 1.1.1.1  2004/03/01 14:30:31  rkm
00080  *
00081  *
00082  * Revision 1.2  2004/02/27 15:05:21  rkm
00083  * *** empty log message ***
00084  *
00085  * Revision 1.1  2004/02/23 15:53:45  rkm
00086  * Renamed from fst to fsg
00087  *
00088  * Revision 1.4  2004/02/23 15:09:50  rkm
00089  * *** empty log message ***
00090  *
00091  * Revision 1.3  2004/02/19 21:16:54  rkm
00092  * Added fsg_search.{c,h}
00093  *
00094  * Revision 1.2  2004/02/18 15:02:34  rkm
00095  * Added fsg_lextree.{c,h}
00096  *
00097  * Revision 1.1  2004/02/17 21:11:49  rkm
00098  * *** empty log message ***
00099  *
00100  * 
00101  * 09-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00102  *              Started.
00103  */
00104 
00105 
00106 #ifndef __S2_FSG_PSUBTREE_H__
00107 #define __S2_FSG_PSUBTREE_H__
00108 
00109 
00110 #include <stdio.h>
00111 
00112 #include <cmd_ln.h>
00113 #include <logmath.h>
00114 
00115 #include "s3types.h"
00116 #include "word_fsg.h"
00117 #include "fsg.h"
00118 #include "hmm.h"
00119 #include "dict.h"
00120 #include "mdef.h"
00121 
00122 
00123 #ifdef __cplusplus
00124 extern "C" {
00125 #endif
00126 #if 0
00127 /* Fool Emacs. */
00128 }
00129 #endif
00130 
00131 /*
00132  * **HACK-ALERT**!!  Compile-time constant determining the size of the
00133  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv.  (See below.)
00134  * But it makes memory allocation simpler and more efficient.
00135  */
00136 #define FSG_PNODE_CTXT_BVSZ     2
00137 
00138 typedef struct {
00139     uint32 bv[FSG_PNODE_CTXT_BVSZ];
00140 } fsg_pnode_ctxt_t;
00141 
00142 
00159 typedef struct fsg_pnode_s {
00167     union {
00168         struct fsg_pnode_s *succ;
00169         word_fsglink_t *fsglink;
00170     } next;
00171   
00172     /*
00173      * For simplicity of memory management (i.e., freeing the pnodes), all
00174      * pnodes allocated for all transitions out of a state are maintained in a
00175      * linear linked list through the alloc_next pointer.
00176      */
00177     struct fsg_pnode_s *alloc_next;
00178   
00179     /*
00180      * The next node that is also a child of the parent of this node; NULL if
00181      * none.
00182      */
00183     struct fsg_pnode_s *sibling;
00184 
00185     /*
00186      * The transition (log) probability to be incurred upon transitioning to
00187      * this node.  (Transition probabilities are really associated with the
00188      * transitions.  But a lextree node has exactly one incoming transition.
00189      * Hence, the prob can be associated with the node.)
00190      * This is a logs2(prob) value, and includes the language weight.
00191      */
00192     int32 logs2prob;
00193   
00194     /*
00195      * The root and leaf positions associated with any transition have to deal
00196      * with multiple phonetic contexts.  However, different contexts may result
00197      * in the same SSID (senone-seq ID), and can share a single pnode with that
00198      * SSID.  But the pnode should track the set of context CI phones that share
00199      * it.  Hence the fsg_pnode_ctxt_t bit-vector set-representation.  (For
00200      * simplicity of implementation, its size is a compile-time constant for
00201      * now.)  Single phone words would need a 2-D array of context, but that's
00202      * too expensive.  For now, they simply use SIL as right context, so only
00203      * the left context is properly modelled.
00204      * (For word-internal phones, this field is unused, of course.)
00205      */
00206     fsg_pnode_ctxt_t ctxt;
00207   
00208     uint8 ci_ext;               /* This node's CIphone as viewed externally (context) */
00209     uint8 ppos;         /* Phoneme position in pronunciation */
00210     uint8 leaf;         /* Whether this is a leaf node */
00211   
00212     /* HMM-state-level stuff here */
00213     hmm_t hmm;
00214 } fsg_pnode_t;
00215 
00216 /* Access macros */
00217 #define fsg_pnode_leaf(p)       ((p)->leaf)
00218 #define fsg_pnode_logs2prob(p)  ((p)->logs2prob)
00219 #define fsg_pnode_succ(p)       ((p)->next.succ)
00220 #define fsg_pnode_fsglink(p)    ((p)->next.fsglink)
00221 #define fsg_pnode_sibling(p)    ((p)->sibling)
00222 #define fsg_pnode_hmmptr(p)     (&((p)->hmm))
00223 #define fsg_pnode_ci_ext(p)     ((p)->ci_ext)
00224 #define fsg_pnode_ppos(p)       ((p)->ppos)
00225 #define fsg_pnode_leaf(p)       ((p)->leaf)
00226 #define fsg_pnode_ctxt(p)       ((p)->ctxt)
00227 
00228 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
00229 
00230 
00237 fsg_pnode_t *fsg_psubtree_init (hmm_context_t *ctx,
00238                                 word_fsg_t *fsg, 
00239                                 int32 from_state, 
00240                                 fsg_pnode_t **alloc_head,
00241                                 cmd_ln_t *config,
00242                                 logmath_t *logmath
00243     );
00244 
00245 
00250 void fsg_psubtree_free (fsg_pnode_t *alloc_head);
00251 
00252 
00253 /*
00254  * Dump the list of nodes in the given lextree to the given file.  alloc_head:
00255  * head of linear list of allocated nodes updated by fsg_psubtree_init().
00256  */
00257 void fsg_psubtree_dump (fsg_pnode_t *alloc_head, FILE *fp,
00258                         dict_t *dict, mdef_t *mdef
00259     );
00260 
00261 
00262 /*
00263  * Attempt to transition into the given node with the given attributes.
00264  * If the node is already active in the given frame with a score better
00265  * than the incoming score, nothing is done.  Otherwise the transition is
00266  * successful.
00267  * Return value: TRUE if the node was newly activated for the given frame,
00268  * FALSE if it was already activated for that frame (whether the incoming
00269  * transition was successful or not).
00270  */
00271 int fsg_psubtree_pnode_enter (fsg_pnode_t *pnode,
00272                               int32 score,
00273                               int32 frame,
00274                               int32 bpidx);
00275 
00276 
00277 /*
00278  * Mark the given pnode as inactive (for search).
00279  */
00280 void fsg_psubtree_pnode_deactivate (fsg_pnode_t *pnode);
00281 
00282 
00283 /* Set all flags on in the given context bitvector */
00284 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt);
00285 
00286 /*
00287  * Subtract bitvector sub from bitvector src (src updated with the result).
00288  * Return 0 if result is all 0, non-zero otherwise.
00289  */
00290 uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub);
00291 
00292 #ifdef __cplusplus
00293 }
00294 #endif
00295 
00296 
00297 #endif

Generated on 7 Mar 2010 by  doxygen 1.6.1