00001 /* ==================================================================== 00002 * Copyright (c) 1995-2004 Carnegie Mellon University. All rights 00003 * reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in 00014 * the documentation and/or other materials provided with the 00015 * distribution. 00016 * 00017 * This work was supported in part by funding from the Defense Advanced 00018 * Research Projects Agency and the National Science Foundation of the 00019 * United States of America, and the CMU Sphinx Speech Consortium. 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00022 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00023 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00024 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00025 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00027 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00028 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00029 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00030 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00031 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 * 00033 * ==================================================================== 00034 * 00035 */ 00036 /* 00037 * align.h -- Exported time-aligner functions and data structures. 00038 * 00039 * ********************************************** 00040 * CMU ARPA Speech Project 00041 * 00042 * Copyright (c) 1996 Carnegie Mellon University. 00043 * ALL RIGHTS RESERVED. 00044 * ********************************************** 00045 * 00046 * HISTORY 00047 * 00048 * $Log$ 00049 * Revision 1.9 2006/03/28 04:50:14 dhdfu 00050 * Add an option to control the insertion of optional silences and filler 00051 * words (the TTS people may want to use this, and I need it) 00052 * 00053 * Revision 1.8 2006/02/24 18:30:20 arthchan2003 00054 * Changed back s3senid to int32. Don't know the reason why using s3senid_t will cause failure in test. Need to talk with Dave. 00055 * 00056 * Revision 1.7 2006/02/24 16:42:48 arthchan2003 00057 * Fixed function prototype for align_sen_active 00058 * 00059 * Revision 1.6 2006/02/24 04:42:32 arthchan2003 00060 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Fixed dox-doc. 00061 * 00062 * Revision 1.5.4.2 2005/08/02 21:42:34 arthchan2003 00063 * 1, Moved static variables from function level to the application level. 2, united all initialization of HMM using s3_am_init, 3 united all GMM computation using ms_cont_mgau_frame_eval. 00064 * 00065 * Revision 1.5.4.1 2005/07/22 03:46:56 arthchan2003 00066 * 1, cleaned up the code, 2, fixed dox-doc. 3, use srch.c version of log_hypstr and log_hyp_detailed. 00067 * 00068 * Revision 1.5 2005/06/22 05:39:56 arthchan2003 00069 * Synchronize argument with decode. Removed silwid, startwid and finishwid. Wrapped up logs3_init, Wrapped up lmset. Refactor with functions in dag. 00070 * 00071 * Revision 1.1.1.1 2005/03/24 15:24:01 archan 00072 * I found Evandro's suggestion is quite right after yelling at him 2 days later. So I decide to check this in again without any binaries. (I have done make distcheck. ) . Again, this is a candidate for s3.6 and I believe I need to work out 4-5 intermediate steps before I can complete the first prototype. That's why I keep local copies. 00073 * 00074 * Revision 1.4 2004/12/06 11:31:48 arthchan2003 00075 * Fix brief comments for programs. 00076 * 00077 * Revision 1.3 2004/12/05 12:01:32 arthchan2003 00078 * 1, move libutil/libutil.h to s3types.h, seems to me not very nice to have it in every files. 2, Remove warning messages of main_align.c 3, Remove warning messages in chgCase.c 00079 * 00080 * Revision 1.2 2004/09/13 08:13:28 arthchan2003 00081 * update copyright notice from 200x to 2004 00082 * 00083 * Revision 1.1 2004/08/30 22:29:19 arthchan2003 00084 * Refactor the s3.0 tools, currently it is still quite messy, we need to make it modularize later on. 00085 * 00086 * Revision 1.1 2004/08/09 00:17:12 arthchan2003 00087 * Incorporating s3.0 align, at this point, there are still some small problems in align but they don't hurt. For example, the score doesn't match with s3.0 and the output will have problem if files are piped to /dev/null/. I think we can go for it. 00088 * 00089 * Revision 1.1 2003/02/14 14:40:34 cbq 00090 * Compiles. Analysis is probably hosed. 00091 * 00092 * Revision 1.1 2000/04/24 09:39:41 lenzo 00093 * s3 import. 00094 * 00095 * 00096 * 13-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00097 * Changed align_sen_active to flag active senones instead of building a list 00098 * of them. 00099 * 00100 * 15-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00101 * Created. 00102 */ 00103 00104 00105 #ifndef _LIBFBS_ALIGN_H_ 00106 #define _LIBFBS_ALIGN_H_ 00107 00111 #include <logmath.h> 00112 #include <s3types.h> 00113 00114 00115 #ifdef __cplusplus 00116 extern "C" { 00117 #endif 00118 #if 0 00119 /* Fool Emacs. */ 00120 } 00121 #endif 00122 00124 typedef struct align_stseg_s { 00125 s3pid_t pid; 00126 s3senid_t sen; 00127 int8 state; 00128 int8 start; 00129 int32 score; 00131 int32 bsdiff; 00132 struct align_stseg_s *next; 00133 } align_stseg_t; 00134 00135 00137 typedef struct align_phseg_s { 00138 s3pid_t pid; 00139 s3frmid_t sf, ef; 00140 int32 score; 00141 int32 bsdiff; 00143 struct align_phseg_s *next; 00144 } align_phseg_t; 00145 00146 00148 typedef struct align_wdseg_s { 00149 s3wid_t wid; 00150 s3frmid_t sf, ef; 00151 int32 score; 00152 int32 bsdiff; 00154 struct align_wdseg_s *next; 00155 } align_wdseg_t; 00156 00157 00158 int32 align_init(mdef_t * _mdef, tmat_t * _tmat, dict_t * _dict, cmd_ln_t *_config, logmath_t *_logmath); 00159 00160 void align_free(void); 00161 00162 int32 align_build_sent_hmm(char *transcript, 00163 int insert_sil 00164 ); 00165 00166 int32 align_destroy_sent_hmm(void); 00167 00168 int32 align_start_utt(char *uttid); 00169 00174 void align_sen_active(uint8 * senlist, 00175 int32 n_sen 00176 ); 00177 00178 00180 int32 align_frame(int32 * senscr 00181 ); 00182 00183 00188 int32 align_end_utt(align_stseg_t ** stseg, 00189 align_phseg_t ** phseg, 00190 align_wdseg_t ** wdseg 00191 ); 00192 00193 #ifdef __cplusplus 00194 } 00195 #endif 00196 00197 00198 #endif