00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * corpus.h -- Corpus-file related misc functions. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.1 2006/04/05 20:27:30 dhdfu 00050 * A Great Reorganzation of header files and executables 00051 * 00052 * Revision 1.13 2006/02/22 19:49:25 arthchan2003 00053 * Merged from SPHINX3_5_2_RCI_IRII: 00054 * 1, Add structure utt_res_t, this is an utterance-based resouce 00055 * structure. Add basic operation such as free and report. 00056 * 2, Modify the structure of the loop in ctl_corpus to make it not so 00057 * clunky. Tested with make check . 00058 * 3, Completely removed ctl_process_dyn_lm, it is a product of code 00059 * duplication (alright, it is written by me......) 00060 * 4, Fixed doc-dox. 00061 * 00062 * Revision 1.12.4.3 2005/07/27 23:19:11 arthchan2003 00063 * 1, Added utt_res_t structure and its methods. 2, Changed the function pointer prototype. 3, Removed the lm and mllr set process out of ctl_process 00064 * 00065 * Revision 1.12.4.2 2005/07/26 03:14:17 arthchan2003 00066 * Removed ctl_process_dyn_lm. One of my sin. 00067 * 00068 * Revision 1.12.4.1 2005/07/05 06:25:40 arthchan2003 00069 * Fixed dox-doc. 00070 * 00071 * Revision 1.12 2005/06/21 20:44:34 arthchan2003 00072 * 1, Fixed doxygen documentation, 2, Add the $ keyword. 00073 * 00074 * Revision 1.4 2005/06/18 20:05:23 archan 00075 * Sphinx3 to s3.generic: Set lm correctly in dag.c and astar.c. Same changes should also be applied to decode_anytopo. 00076 * 00077 * Revision 1.3 2005/03/30 01:22:46 archan 00078 * Fixed mistakes in last updates. Add 00079 * 00080 * 00081 * 09-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00082 * Added ctl_process_utt (). 00083 * 00084 * 01-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00085 * Updated ctl_infile() spec to included check for already existing file extension. 00086 * 00087 * 23-Mar-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00088 * Added a general purpose data argument to ctl_process() and its function 00089 * argument func. 00090 * 00091 * 22-Nov-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00092 * Added an optional validation function argument and an optional 00093 * duplicate-resolution function argument to both corpus_load_headid() and 00094 * corpus_load_tailid(). 00095 * 00096 * 25-Oct-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon 00097 * Created. 00098 */ 00099 00100 00101 #ifndef _S3_CORPUS_H_ 00102 #define _S3_CORPUS_H_ 00103 00104 #include <stdio.h> 00105 00106 #include <hash_table.h> 00107 #include <profile.h> 00108 #include <s3types.h> 00109 00110 00111 00115 #ifdef __cplusplus 00116 extern "C" { 00117 #endif 00118 #if 0 00119 } /* Fool Emacs into not indenting things. */ 00120 #endif 00121 00127 typedef struct 00128 { 00129 char* uttfile; 00130 char* lmname; 00132 char* fsgname; 00135 char* regmatname; 00137 char* cb2mllrname; 00139 } utt_res_t; 00140 00141 #define utt_res_set_uttfile(ur,name) ur->uttfile=name 00142 #define utt_res_set_lmname(ur,name) ur->lmname=name 00143 #define utt_res_set_fsgname(ur,name) ur->fsgname=name 00144 #define utt_res_set_regmatname(ur,name) ur->regmatname=name 00145 #define utt_res_set_cb2mllrname(ur,name) ur->cb2mllrname=name 00146 00148 utt_res_t* new_utt_res(void); 00149 00151 void free_utt_res( 00152 utt_res_t* ur 00153 ); 00154 00156 void report_utt_res( 00157 utt_res_t *ur 00158 ); 00159 00169 typedef struct { 00170 hash_table_t *ht; 00171 int32 n; 00172 char **str; 00173 } corpus_t; 00174 00175 00204 corpus_t *corpus_load_headid (const char *file, 00205 int32 (*validate)(char *str), 00206 int32 (*dup_resolve)(char *s1, char *s2)); 00207 00211 corpus_t *corpus_load_tailid (const char *file, 00212 int32 (*validate)(char *str), 00213 int32 (*dup_resolve)(char *s1, char *s2)); 00214 00219 char *corpus_lookup (corpus_t *corp, const char *id); 00220 00221 00231 int32 ctl_read_entry (FILE *fp, 00232 char *uttfile, 00233 int32 *sf, 00234 int32 *ef, 00236 char *uttid 00238 ); 00239 00240 00250 S3DECODER_EXPORT 00251 ptmr_t ctl_process (const char *ctlfile, 00252 const char *ctllmfile, 00253 const char *ctlmllrfile, 00254 int32 nskip, 00255 int32 count, 00256 void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid), 00259 void *kb 00261 ); 00262 00263 00272 S3DECODER_EXPORT 00273 ptmr_t ctl_process_utt (const char *uttfile, 00274 int32 count, 00275 void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid), 00277 void *kb); 00278 00285 void ctl_infile (char *file, 00286 const char *dir, 00287 const char *ext, 00289 const char *utt 00292 ); 00293 00301 void ctl_outfile (char *file, 00302 const char *dir, 00304 const char *ext, 00305 const char *utt, 00308 const char *uttid 00309 ); 00310 00311 #if 0 00312 { /* Stop indent from complaining */ 00313 #endif 00314 #ifdef __cplusplus 00315 } 00316 #endif 00317 00318 #endif