s2_semi_mgau.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * Interface for "semi-continuous vector quantization", a.k.a. Sphinx2
00039  * fast GMM computation.
00040  */
00041 
00042 #ifndef __S2_SEMI_MGAU_H__
00043 #define __S2_SEMI_MGAU_H__
00044 
00045 #include <logmath.h>
00046 #include "s3types.h"
00047 #include "fe.h"
00048 #include "ascr.h"
00049 #include "fast_algo_struct.h"
00050 #include "kdtree.h"
00051 
00052 
00053 #ifdef __cplusplus
00054 extern "C" {
00055 #endif
00056 #if 0
00057 /* Fool Emacs. */
00058 }
00059 #endif
00060 
00061 #define S2_NUM_ALPHABET 256
00062 #define S2_NUM_FEATURES 4
00063 #define S2_MAX_TOPN     6       /* max number of TopN codewords */
00064 
00065 typedef struct {
00066     union {
00067         int32   score;
00068         int32   dist;   /* distance to next closest vector */
00069     } val;
00070     int32 codeword;             /* codeword (vector index) */
00071 } vqFeature_t;
00072 typedef vqFeature_t *vqFrame_t;
00073 
00074 typedef float32 mean_t;
00075 typedef float32 var_t;
00076 #define GMMSUB(a,b) ((a)-(b))
00077 #define GMMADD(a,b) ((a)+(b))
00078 
00079 typedef struct s2_semi_mgau_s s2_semi_mgau_t;
00080 struct s2_semi_mgau_s {
00081     logmath_t *logmath;
00082     int32   detArr[S2_NUM_FEATURES*S2_NUM_ALPHABET];    /* storage for det vectors */
00083     int32   *dets[S2_NUM_FEATURES];     /* det values foreach feature */
00084     mean_t  *means[S2_NUM_FEATURES];    /* mean vectors foreach feature */
00085     var_t   *vars[S2_NUM_FEATURES];     /* var vectors foreach feature */
00086 
00087     unsigned char **OPDF_8B[4]; /* mixture weights */
00088 
00089     int32 topN;
00090     int32 CdWdPDFMod;
00091 
00092     kd_tree_t **kdtrees;
00093     uint32 n_kdtrees;
00094     uint32 kd_maxdepth;
00095     int32 kd_maxbbi;
00096     float64 dcep80msWeight;
00097     int32 use20ms_diff_pow;
00098 
00099     int32 num_frames;
00100     int32 frame_ds_ratio;
00101 
00102     vqFeature_t f[S2_NUM_FEATURES][S2_MAX_TOPN];
00103     vqFeature_t lcfrm[S2_MAX_TOPN];
00104     vqFeature_t ldfrm[S2_MAX_TOPN];
00105     vqFeature_t lxfrm[S2_MAX_TOPN];
00106     vqFeature_t vtmp;
00107 };
00108 
00109 s2_semi_mgau_t *s2_semi_mgau_init(const char *mean_path, const char *var_path,
00110                                   float64 varfloor, const char *mixw_path,
00111                                   float64 mixwfloor, int32 topn, logmath_t *logmath);
00112 
00113 void s2_semi_mgau_free(s2_semi_mgau_t *s);
00114 
00115 S3DECODER_EXPORT
00116 int32 s2_semi_mgau_frame_eval(s2_semi_mgau_t *s,
00117                               ascr_t *ascr,
00118                               fast_gmm_t *fgmm,
00119                               mfcc_t **feat,
00120                               int32 frame);
00121 
00122 int32 s2_semi_mgau_load_kdtree(s2_semi_mgau_t *s, const char *kdtree_path,
00123                                uint32 maxdepth, int32 maxbbi);
00124 
00125 #ifdef __cplusplus
00126 }
00127 #endif
00128 
00129 
00130 #endif /*  __S2_SEMI_MGAU_H__ */

Generated on 7 Mar 2010 by  doxygen 1.6.1