s3_cfg.h

Go to the documentation of this file.
00001 /* ====================================================================
00002  * Copyright (c) 1996-2005 Carnegie Mellon University.  All rights 
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  */
00035 
00036 /* Sphinx3 Context Free Grammar Parser
00037  *
00038  * The purpose here is to create a parser that can handle multiple input
00039  * streams at the same time.  The implementation is based on the Earley 
00040  * algorithm.
00041  *
00042  * The legal rules are in the form
00043  *
00044  *     0.33 $rule1 N product1 product2 ... productN
00045  *
00046  * 0.33 is a float32 indicating the score (or probability) of this rule being
00047  * applied.  $rule1 is the name of a non-terminal to be expanded.  And
00048  * product[1...N] is a string of (non-)terminals that $rule1 will expand to.
00049  * 
00050  * The rules are read from a file, if that is not obviously indicated by the
00051  * API.
00052  */
00053 
00054 #ifndef _S3_CONTEXT_FREE_GRAMMAR_H
00055 #define _S3_CONTEXT_FREE_GRAMMAR_H
00056 
00057 #include <stdio.h>
00058 
00059 #include <logmath.h>
00060 #include "prim_type.h"
00061 #include "hash_table.h"
00062 #include "s3_arraylist.h"
00063 #include "fsg.h"
00064 
00065 #ifdef __cplusplus
00066 extern "C" {
00067 #endif
00068 
00069 #define S3_CFG_MAX_RULE_STR_LEN         1023
00070 #define S3_CFG_MAX_ITEM_STR_LEN         40
00071 #define S3_CFG_MAX_ITEM_COUNT           20
00072 #define S3_CFG_INITIAL_RULE_COUNT       1
00073 #define S3_CFG_INITIAL_RULE_SET_COUNT   50
00074 #define S3_CFG_INITIAL_PARSE_SET_COUNT  20
00075 #define S3_CFG_PARSE_HASH_SIZE          251
00076 #define S3_CFG_INITIAL_STATE_SET_COUNT  20
00077 #define S3_CFG_INITIAL_TERM_COUNT       50
00078 #define S3_CFG_NAME_HASH_SIZE           4091
00079 
00080 #define S3_CFG_INVALID_SCORE            1.0f
00081 #define S3_CFG_INITIAL_SCORE            0.0f
00082 
00083 #define S3_CFG_NONTERM_PREFIX           '$'
00084 #define S3_CFG_TERM_BIT                 0x80000000
00085 #define S3_CFG_INDEX_MASK               0x7FFFFFFF
00086 
00087 #define S3_CFG_INVALID_ID               0x7FFFFFFF
00088 
00089 #define S3_CFG_PSTART_ITEM              0x00000000
00090 #define S3_CFG_PSTART_ITEM_STR          "$PSTART"
00091 #define S3_CFG_START_ITEM               0x00000001
00092 #define S3_CFG_START_ITEM_STR           "$START"
00093 #define S3_CFG_EOR_ITEM                 (0x00000002 | S3_CFG_TERM_BIT)
00094 #define S3_CFG_EOR_ITEM_STR             "#EOR#"
00095 #define S3_CFG_EOI_ITEM                 (0x00000003 | S3_CFG_TERM_BIT)
00096 #define S3_CFG_EOI_ITEM_STR             "#EOI#"
00097 #define S3_CFG_NIL_ITEM                 (0x00000004 | S3_CFG_TERM_BIT)
00098 #define S3_CFG_NIL_ITEM_STR             "#NIL#"
00099 
00100 #define S3_CFG_START_RULE               \
00101   { S3_CFG_PSTART_ITEM, 0.0f, { S3_CFG_START_ITEM, S3_CFG_EOR_ITEM }, 1 }
00102 
00103 #define S3_CFG_AUTO_PRUNE_SCORE         0x00000001
00104 #define S3_CFG_AUTO_PRUNE_RANK          0x00000002
00105 
00106 #define s3_cfg_is_null_parse(x) (x->entries.count == 0)
00107 
00108 #define s3_cfg_is_terminal(x) (x & S3_CFG_TERM_BIT)
00109 
00110 #define s3_cfg_id2index(x) (x & S3_CFG_INDEX_MASK)
00111 
00112 typedef uint32 s3_cfg_id_t;
00113 
00114 typedef struct s3_cfg_rule_s {
00115   s3_cfg_id_t src;
00116 
00117   /* arbitrary floating point score */
00118   float32 score;
00119   /* normalized probability score */
00120   float32 prob_score; 
00121   /* probability fed to logs3 */
00122   int32 log_score;
00123 
00124   s3_cfg_id_t *products;
00125   int len;
00126 } s3_cfg_rule_t;
00127 
00128 typedef struct {
00129   s3_cfg_id_t id;
00130   char *name;
00131   s3_arraylist_t rules;
00132   s3_cfg_rule_t *nil_rule;
00133 } s3_cfg_item_t;
00134 
00135 struct s3_cfg_state_s;
00136 typedef struct s3_cfg_entry_s {
00137   s3_cfg_rule_t *rule;
00138   int dot;
00139   struct s3_cfg_state_s *origin;
00140   int32 score;
00141   struct s3_cfg_entry_s *back;
00142   struct s3_cfg_entry_s *complete;
00143 } s3_cfg_entry_t;
00144 
00145 
00146 typedef struct s3_cfg_state_s {
00147   s3_cfg_id_t input;
00148   s3_arraylist_t entries;
00149   s3_arraylist_t expansions;
00150   struct s3_cfg_state_s *back;
00151 
00152   s3_cfg_entry_t *best_completed_entry;
00153   s3_cfg_entry_t *best_overall_entry;
00154   s3_cfg_entry_t *best_completed_parse;
00155   s3_cfg_entry_t *best_overall_parse;
00156 
00157   int num_expanded;
00158 } s3_cfg_state_t;
00159 
00160 typedef struct {
00161   s3_arraylist_t rules;
00162   s3_arraylist_t item_info;
00163   hash_table_t *name2id;
00164 
00165   int8 *predictions;
00166 } s3_cfg_t;
00167 
00174 void
00175 s3_cfg_init(s3_cfg_t *_cfg);
00176 
00177 
00184 void
00185 s3_cfg_close(s3_cfg_t *_cfg);
00186 
00187 
00195 S3DECODER_EXPORT
00196 s3_cfg_t *
00197 s3_cfg_read_simple(const char *_fn);
00198 
00199 
00208 s3_cfg_t *
00209 s3_cfg_read_srgs(const char *_fn);
00210 
00211 
00219 void
00220 s3_cfg_write_simple(s3_cfg_t *_cfg, const char *_fn);
00221 
00222 
00229 S3DECODER_EXPORT
00230 s2_fsg_t *
00231 s3_cfg_convert_to_fsg(s3_cfg_t *_cfg, int _max_expansion);
00232 
00233 
00234 /*
00235  *
00236  */
00237 void
00238 s3_cfg_rescore(s3_cfg_t *_cfg, logmath_t *logmath);
00239 
00240 
00248 s3_cfg_item_t *
00249 s3_cfg_get_term_info(s3_cfg_t *_cfg, s3_cfg_id_t _id);
00250 
00251 
00258 s3_cfg_state_t *
00259 s3_cfg_create_parse(s3_cfg_t *_cfg);
00260 
00261 
00268 void
00269 s3_cfg_free_parse(s3_cfg_t *_cfg, s3_cfg_state_t *_parse);
00270 
00271 
00272 /*
00273  *
00274  */
00275 void
00276 s3_cfg_free_parse_tree(s3_cfg_t *_cfg, s3_cfg_state_t *_parse);
00277 
00278 
00286 s3_cfg_state_t *
00287 s3_cfg_input_term(s3_cfg_t *_cfg, s3_cfg_state_t *_cur, s3_cfg_id_t _term);
00288 
00289 
00300 s3_cfg_rule_t *
00301 s3_cfg_add_rule(s3_cfg_t *_cfg, s3_cfg_id_t _src, float32 _fake_score, 
00302                 s3_cfg_id_t *_products);
00303 
00304 
00311 S3DECODER_EXPORT
00312 void
00313 s3_cfg_compile_rules(s3_cfg_t *_cfg, logmath_t *logmath);
00314 
00315 
00323 void
00324 s3_cfg_print_rule(s3_cfg_t *_cfg, s3_cfg_rule_t *_rule, FILE *_out);
00325 
00326 
00335 void
00336 s3_cfg_print_entry(s3_cfg_t *_cfg, s3_cfg_entry_t *_entry, FILE *_out);
00337 
00338 
00347 void
00348 s3_cfg_print_parse(s3_cfg_t *_cfg, s3_cfg_entry_t *_parse, FILE *_out);
00349 
00350 
00359 s3_cfg_id_t
00360 s3_cfg_str2id(s3_cfg_t *_cfg, char *_item);
00361 
00362 
00370 const char *
00371 s3_cfg_id2str(s3_cfg_t *_cfg, s3_cfg_id_t _id);
00372 
00373 #ifdef __cplusplus
00374 }
00375 #endif
00376 #endif
00377 

Generated on 7 Mar 2010 by  doxygen 1.6.1