00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00020 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00022 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00023 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 * 00031 * ==================================================================== 00032 * 00033 */ 00034 /* 00035 * fsg_psubtree.h -- Phone-level FSG subtree representing all transitions 00036 * out of a single FSG state. 00037 * (Note: Currently, it is actually a flat lexicon representation 00038 * 00039 * ********************************************** 00040 * CMU ARPA Speech Project 00041 * 00042 * Copyright (c) 2004 Carnegie Mellon University. 00043 * ALL RIGHTS RESERVED. 00044 * ********************************************** 00045 * 00046 * HISTORY 00047 * 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.2 2006/02/23 05:10:18 arthchan2003 00053 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3 00054 * 00055 * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003 00056 * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID 00057 * 00058 * Revision 1.1.2.4 2005/07/20 21:18:30 arthchan2003 00059 * FSG can now be read, srch_fsg_init can now be initialized, psubtree can be built. Sounds like it is time to plug in other function pointers. 00060 * 00061 * Revision 1.1.2.3 2005/07/17 05:44:32 arthchan2003 00062 * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet. 00063 * 00064 * Revision 1.1.2.2 2005/07/13 18:39:47 arthchan2003 00065 * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet. 00066 * 00067 * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003 00068 * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users. 00069 * 00070 * Revision 1.1 2004/07/16 00:57:12 egouvea 00071 * Added Ravi's implementation of FSG support. 00072 * 00073 * Revision 1.3 2004/06/25 14:49:08 rkm 00074 * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state 00075 * 00076 * Revision 1.2 2004/05/27 14:22:57 rkm 00077 * FSG cross-word triphones completed (but for single-phone words) 00078 * 00079 * Revision 1.1.1.1 2004/03/01 14:30:31 rkm 00080 * 00081 * 00082 * Revision 1.2 2004/02/27 15:05:21 rkm 00083 * *** empty log message *** 00084 * 00085 * Revision 1.1 2004/02/23 15:53:45 rkm 00086 * Renamed from fst to fsg 00087 * 00088 * Revision 1.4 2004/02/23 15:09:50 rkm 00089 * *** empty log message *** 00090 * 00091 * Revision 1.3 2004/02/19 21:16:54 rkm 00092 * Added fsg_search.{c,h} 00093 * 00094 * Revision 1.2 2004/02/18 15:02:34 rkm 00095 * Added fsg_lextree.{c,h} 00096 * 00097 * Revision 1.1 2004/02/17 21:11:49 rkm 00098 * *** empty log message *** 00099 * 00100 * 00101 * 09-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00102 * Started. 00103 */ 00104 00105 00106 #ifndef __S2_FSG_PSUBTREE_H__ 00107 #define __S2_FSG_PSUBTREE_H__ 00108 00109 00110 #include <stdio.h> 00111 00112 #include <cmd_ln.h> 00113 #include <logmath.h> 00114 00115 #include "s3types.h" 00116 #include "word_fsg.h" 00117 #include "fsg.h" 00118 #include "hmm.h" 00119 #include "dict.h" 00120 #include "mdef.h" 00121 00122 00123 #ifdef __cplusplus 00124 extern "C" { 00125 #endif 00126 #if 0 00127 /* Fool Emacs. */ 00128 } 00129 #endif 00130 00131 /* 00132 * **HACK-ALERT**!! Compile-time constant determining the size of the 00133 * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.) 00134 * But it makes memory allocation simpler and more efficient. 00135 */ 00136 #define FSG_PNODE_CTXT_BVSZ 2 00137 00138 typedef struct { 00139 uint32 bv[FSG_PNODE_CTXT_BVSZ]; 00140 } fsg_pnode_ctxt_t; 00141 00142 00159 typedef struct fsg_pnode_s { 00167 union { 00168 struct fsg_pnode_s *succ; 00169 word_fsglink_t *fsglink; 00170 } next; 00171 00172 /* 00173 * For simplicity of memory management (i.e., freeing the pnodes), all 00174 * pnodes allocated for all transitions out of a state are maintained in a 00175 * linear linked list through the alloc_next pointer. 00176 */ 00177 struct fsg_pnode_s *alloc_next; 00178 00179 /* 00180 * The next node that is also a child of the parent of this node; NULL if 00181 * none. 00182 */ 00183 struct fsg_pnode_s *sibling; 00184 00185 /* 00186 * The transition (log) probability to be incurred upon transitioning to 00187 * this node. (Transition probabilities are really associated with the 00188 * transitions. But a lextree node has exactly one incoming transition. 00189 * Hence, the prob can be associated with the node.) 00190 * This is a logs2(prob) value, and includes the language weight. 00191 */ 00192 int32 logs2prob; 00193 00194 /* 00195 * The root and leaf positions associated with any transition have to deal 00196 * with multiple phonetic contexts. However, different contexts may result 00197 * in the same SSID (senone-seq ID), and can share a single pnode with that 00198 * SSID. But the pnode should track the set of context CI phones that share 00199 * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For 00200 * simplicity of implementation, its size is a compile-time constant for 00201 * now.) Single phone words would need a 2-D array of context, but that's 00202 * too expensive. For now, they simply use SIL as right context, so only 00203 * the left context is properly modelled. 00204 * (For word-internal phones, this field is unused, of course.) 00205 */ 00206 fsg_pnode_ctxt_t ctxt; 00207 00208 uint8 ci_ext; /* This node's CIphone as viewed externally (context) */ 00209 uint8 ppos; /* Phoneme position in pronunciation */ 00210 uint8 leaf; /* Whether this is a leaf node */ 00211 00212 /* HMM-state-level stuff here */ 00213 hmm_t hmm; 00214 } fsg_pnode_t; 00215 00216 /* Access macros */ 00217 #define fsg_pnode_leaf(p) ((p)->leaf) 00218 #define fsg_pnode_logs2prob(p) ((p)->logs2prob) 00219 #define fsg_pnode_succ(p) ((p)->next.succ) 00220 #define fsg_pnode_fsglink(p) ((p)->next.fsglink) 00221 #define fsg_pnode_sibling(p) ((p)->sibling) 00222 #define fsg_pnode_hmmptr(p) (&((p)->hmm)) 00223 #define fsg_pnode_ci_ext(p) ((p)->ci_ext) 00224 #define fsg_pnode_ppos(p) ((p)->ppos) 00225 #define fsg_pnode_leaf(p) ((p)->leaf) 00226 #define fsg_pnode_ctxt(p) ((p)->ctxt) 00227 00228 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f))) 00229 00230 00237 fsg_pnode_t *fsg_psubtree_init (hmm_context_t *ctx, 00238 word_fsg_t *fsg, 00239 int32 from_state, 00240 fsg_pnode_t **alloc_head, 00241 cmd_ln_t *config, 00242 logmath_t *logmath 00243 ); 00244 00245 00250 void fsg_psubtree_free (fsg_pnode_t *alloc_head); 00251 00252 00253 /* 00254 * Dump the list of nodes in the given lextree to the given file. alloc_head: 00255 * head of linear list of allocated nodes updated by fsg_psubtree_init(). 00256 */ 00257 void fsg_psubtree_dump (fsg_pnode_t *alloc_head, FILE *fp, 00258 dict_t *dict, mdef_t *mdef 00259 ); 00260 00261 00262 /* 00263 * Attempt to transition into the given node with the given attributes. 00264 * If the node is already active in the given frame with a score better 00265 * than the incoming score, nothing is done. Otherwise the transition is 00266 * successful. 00267 * Return value: TRUE if the node was newly activated for the given frame, 00268 * FALSE if it was already activated for that frame (whether the incoming 00269 * transition was successful or not). 00270 */ 00271 int fsg_psubtree_pnode_enter (fsg_pnode_t *pnode, 00272 int32 score, 00273 int32 frame, 00274 int32 bpidx); 00275 00276 00277 /* 00278 * Mark the given pnode as inactive (for search). 00279 */ 00280 void fsg_psubtree_pnode_deactivate (fsg_pnode_t *pnode); 00281 00282 00283 /* Set all flags on in the given context bitvector */ 00284 void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt); 00285 00286 /* 00287 * Subtract bitvector sub from bitvector src (src updated with the result). 00288 * Return 0 if result is all 0, non-zero otherwise. 00289 */ 00290 uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); 00291 00292 #ifdef __cplusplus 00293 } 00294 #endif 00295 00296 00297 #endif