corpus.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * corpus.h -- Corpus-file related misc functions.
00039  *
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * $Log$
00049  * Revision 1.1  2006/04/05  20:27:30  dhdfu
00050  * A Great Reorganzation of header files and executables
00051  * 
00052  * Revision 1.13  2006/02/22 19:49:25  arthchan2003
00053  * Merged from SPHINX3_5_2_RCI_IRII:
00054  * 1, Add structure utt_res_t, this is an utterance-based resouce
00055  * structure. Add basic operation such as free and report.
00056  * 2, Modify the structure of the loop in ctl_corpus to make it not so
00057  * clunky. Tested with make check .
00058  * 3, Completely removed ctl_process_dyn_lm, it is a product of code
00059  * duplication (alright, it is written by me......)
00060  * 4, Fixed doc-dox.
00061  *
00062  * Revision 1.12.4.3  2005/07/27 23:19:11  arthchan2003
00063  * 1, Added utt_res_t structure and its methods. 2, Changed the function pointer prototype. 3, Removed the lm and mllr set process out of ctl_process
00064  *
00065  * Revision 1.12.4.2  2005/07/26 03:14:17  arthchan2003
00066  * Removed ctl_process_dyn_lm. One of my sin.
00067  *
00068  * Revision 1.12.4.1  2005/07/05 06:25:40  arthchan2003
00069  * Fixed dox-doc.
00070  *
00071  * Revision 1.12  2005/06/21 20:44:34  arthchan2003
00072  * 1, Fixed doxygen documentation, 2, Add the $ keyword.
00073  *
00074  * Revision 1.4  2005/06/18 20:05:23  archan
00075  * Sphinx3 to s3.generic: Set lm correctly in dag.c and astar.c.  Same changes should also be applied to decode_anytopo.
00076  *
00077  * Revision 1.3  2005/03/30 01:22:46  archan
00078  * Fixed mistakes in last updates. Add
00079  *
00080  * 
00081  * 09-Dec-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00082  *              Added ctl_process_utt ().
00083  * 
00084  * 01-Mar-1999  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00085  *              Updated ctl_infile() spec to included check for already existing file extension.
00086  * 
00087  * 23-Mar-1998  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00088  *              Added a general purpose data argument to ctl_process() and its function
00089  *              argument func.
00090  * 
00091  * 22-Nov-1997  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00092  *              Added an optional validation function argument and an optional
00093  *              duplicate-resolution function argument to both corpus_load_headid() and
00094  *              corpus_load_tailid().
00095  * 
00096  * 25-Oct-1997  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00097  *              Created.
00098  */
00099 
00100 
00101 #ifndef _S3_CORPUS_H_
00102 #define _S3_CORPUS_H_
00103 
00104 #include <stdio.h>
00105 
00106 #include <hash_table.h>
00107 #include <profile.h>
00108 #include <s3types.h>
00109 
00110 
00111 
00115 #ifdef __cplusplus
00116 extern "C" {
00117 #endif
00118 #if 0
00119 } /* Fool Emacs into not indenting things. */
00120 #endif
00121 
00127 typedef struct 
00128 {
00129     char* uttfile; 
00130     char* lmname;  
00132     char* fsgname;  
00135     char* regmatname; 
00137     char* cb2mllrname; 
00139 } utt_res_t;
00140 
00141 #define utt_res_set_uttfile(ur,name) ur->uttfile=name
00142 #define utt_res_set_lmname(ur,name)  ur->lmname=name
00143 #define utt_res_set_fsgname(ur,name) ur->fsgname=name
00144 #define utt_res_set_regmatname(ur,name) ur->regmatname=name
00145 #define utt_res_set_cb2mllrname(ur,name) ur->cb2mllrname=name
00146 
00148 utt_res_t* new_utt_res(void);
00149 
00151 void free_utt_res(
00152     utt_res_t* ur 
00153     );
00154 
00156 void report_utt_res(
00157     utt_res_t *ur 
00158     );
00159 
00169 typedef struct {
00170     hash_table_t *ht;   
00171     int32 n;            
00172     char **str;         
00173 } corpus_t;
00174 
00175 
00204 corpus_t *corpus_load_headid (const char *file, 
00205                               int32 (*validate)(char *str),
00206                               int32 (*dup_resolve)(char *s1, char *s2));
00207 
00211 corpus_t *corpus_load_tailid (const char *file, 
00212                               int32 (*validate)(char *str),
00213                               int32 (*dup_resolve)(char *s1, char *s2));
00214 
00219 char *corpus_lookup (corpus_t *corp, const char *id);
00220 
00221 
00231 int32 ctl_read_entry (FILE *fp,         
00232                       char *uttfile,    
00233                       int32 *sf,        
00234                       int32 *ef,        
00236                       char *uttid       
00238     );
00239 
00240 
00250 S3DECODER_EXPORT
00251 ptmr_t ctl_process (const char *ctlfile,        
00252                     const char *ctllmfile,     
00253                     const char *ctlmllrfile,   
00254                     int32 nskip,        
00255                     int32 count,        
00256                     void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid),
00259                     void *kb            
00261     );
00262 
00263 
00272 S3DECODER_EXPORT
00273 ptmr_t ctl_process_utt (const char *uttfile,    
00274                         int32 count,    
00275                         void (*func) (void *kb, utt_res_t *ur, int32 sf, int32 ef, char *uttid),
00277                         void *kb);
00278 
00285 void ctl_infile (char *file,    
00286                  const char *dir,       
00287                  const char *ext,       
00289                  const char *utt        
00292     );
00293 
00301 void ctl_outfile (char *file,   
00302                   const char *dir,      
00304                   const char *ext,      
00305                   const char *utt,      
00308                   const char *uttid     
00309     );
00310 
00311 #if 0
00312 { /* Stop indent from complaining */
00313 #endif
00314 #ifdef __cplusplus
00315 }
00316 #endif
00317 
00318 #endif

Generated on 7 Mar 2010 by  doxygen 1.6.1