SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 2007 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00043 #ifndef __NGRAM_MODEL_H__ 00044 #define __NGRAM_MODEL_H__ 00045 00046 #include <stdarg.h> 00047 00048 /* Win32/WinCE DLL gunk */ 00049 #include <sphinxbase/sphinxbase_export.h> 00050 #include <sphinxbase/prim_type.h> 00051 #include <sphinxbase/cmd_ln.h> 00052 #include <sphinxbase/logmath.h> 00053 #include <sphinxbase/mmio.h> 00054 00055 #ifdef __cplusplus 00056 extern "C" { 00057 #endif 00058 #if 0 00059 /* Fool Emacs. */ 00060 } 00061 #endif 00062 00066 typedef struct ngram_model_s ngram_model_t; 00067 00071 typedef struct ngram_class_s ngram_class_t; 00072 00076 typedef enum ngram_file_type_e { 00077 NGRAM_INVALID = -1, 00078 NGRAM_AUTO, 00079 NGRAM_ARPA, 00080 NGRAM_DMP, 00081 NGRAM_DMP32, 00082 } ngram_file_type_t; 00083 00084 #define NGRAM_INVALID_WID -1 00106 SPHINXBASE_EXPORT 00107 ngram_model_t *ngram_model_read(cmd_ln_t *config, 00108 const char *file_name, 00109 ngram_file_type_t file_type, 00110 logmath_t *lmath); 00111 00117 SPHINXBASE_EXPORT 00118 int ngram_model_write(ngram_model_t *model, const char *file_name, 00119 ngram_file_type_t format); 00120 00126 SPHINXBASE_EXPORT 00127 ngram_file_type_t ngram_file_name_to_type(const char *file_name); 00128 00134 SPHINXBASE_EXPORT 00135 ngram_file_type_t ngram_str_to_type(const char *str_name); 00136 00143 SPHINXBASE_EXPORT 00144 char const *ngram_type_to_str(int type); 00145 00151 SPHINXBASE_EXPORT 00152 ngram_model_t *ngram_model_retain(ngram_model_t *model); 00153 00159 SPHINXBASE_EXPORT 00160 int ngram_model_free(ngram_model_t *model); 00161 00178 SPHINXBASE_EXPORT 00179 int ngram_model_recode(ngram_model_t *model, const char *from, const char *to); 00180 00184 typedef enum ngram_case_e { 00185 NGRAM_UPPER, 00186 NGRAM_LOWER 00187 } ngram_case_t; 00188 00195 SPHINXBASE_EXPORT 00196 int ngram_model_casefold(ngram_model_t *model, int kase); 00197 00209 SPHINXBASE_EXPORT 00210 int ngram_model_apply_weights(ngram_model_t *model, 00211 float32 lw, float32 wip, float32 uw); 00212 00221 SPHINXBASE_EXPORT 00222 float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip, 00223 int32 *out_log_uw); 00224 00257 SPHINXBASE_EXPORT 00258 int32 ngram_score(ngram_model_t *model, const char *word, ...); 00259 00263 SPHINXBASE_EXPORT 00264 int32 ngram_tg_score(ngram_model_t *model, 00265 int32 w3, int32 w2, int32 w1, 00266 int32 *n_used); 00267 00271 SPHINXBASE_EXPORT 00272 int32 ngram_bg_score(ngram_model_t *model, 00273 int32 w2, int32 w1, 00274 int32 *n_used); 00275 00279 SPHINXBASE_EXPORT 00280 int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history, 00281 int32 n_hist, int32 *n_used); 00282 00293 SPHINXBASE_EXPORT 00294 int32 ngram_prob(ngram_model_t *model, const char *word, ...); 00295 00302 SPHINXBASE_EXPORT 00303 int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history, 00304 int32 n_hist, int32 *n_used); 00305 00317 SPHINXBASE_EXPORT 00318 int32 ngram_score_to_prob(ngram_model_t *model, int32 score); 00319 00323 SPHINXBASE_EXPORT 00324 int32 ngram_wid(ngram_model_t *model, const char *word); 00325 00329 SPHINXBASE_EXPORT 00330 const char *ngram_word(ngram_model_t *model, int32 wid); 00331 00345 SPHINXBASE_EXPORT 00346 int32 ngram_unknown_wid(ngram_model_t *model); 00347 00351 SPHINXBASE_EXPORT 00352 int32 ngram_zero(ngram_model_t *model); 00353 00357 SPHINXBASE_EXPORT 00358 int32 ngram_model_get_size(ngram_model_t *model); 00359 00363 SPHINXBASE_EXPORT 00364 int32 const *ngram_model_get_counts(ngram_model_t *model); 00365 00369 typedef struct ngram_iter_s ngram_iter_t; 00370 00379 SPHINXBASE_EXPORT 00380 ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m); 00381 00385 SPHINXBASE_EXPORT 00386 ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...); 00387 00391 SPHINXBASE_EXPORT 00392 ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); 00393 00402 SPHINXBASE_EXPORT 00403 int32 const *ngram_iter_get(ngram_iter_t *itor, 00404 int32 *out_score, 00405 int32 *out_bowt); 00406 00412 SPHINXBASE_EXPORT 00413 ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor); 00414 00418 SPHINXBASE_EXPORT 00419 ngram_iter_t *ngram_iter_next(ngram_iter_t *itor); 00420 00424 SPHINXBASE_EXPORT 00425 void ngram_iter_free(ngram_iter_t *itor); 00426 00439 SPHINXBASE_EXPORT 00440 int32 ngram_model_add_word(ngram_model_t *model, 00441 const char *word, float32 weight); 00442 00456 SPHINXBASE_EXPORT 00457 int32 ngram_model_read_classdef(ngram_model_t *model, 00458 const char *file_name); 00459 00468 SPHINXBASE_EXPORT 00469 int32 ngram_model_add_class(ngram_model_t *model, 00470 const char *classname, 00471 float32 classweight, 00472 char **words, 00473 const float32 *weights, 00474 int32 n_words); 00475 00485 SPHINXBASE_EXPORT 00486 int32 ngram_model_add_class_word(ngram_model_t *model, 00487 const char *classname, 00488 const char *word, 00489 float32 weight); 00490 00515 SPHINXBASE_EXPORT 00516 ngram_model_t *ngram_model_set_init(cmd_ln_t *config, 00517 ngram_model_t **models, 00518 char **names, 00519 const float32 *weights, 00520 int32 n_models); 00521 00552 SPHINXBASE_EXPORT 00553 ngram_model_t *ngram_model_set_read(cmd_ln_t *config, 00554 const char *lmctlfile, 00555 logmath_t *lmath); 00556 00560 SPHINXBASE_EXPORT 00561 int32 ngram_model_set_count(ngram_model_t *set); 00562 00566 typedef struct ngram_model_set_iter_s ngram_model_set_iter_t; 00567 00573 SPHINXBASE_EXPORT 00574 ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set); 00575 00581 SPHINXBASE_EXPORT 00582 ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor); 00583 00587 SPHINXBASE_EXPORT 00588 void ngram_model_set_iter_free(ngram_model_set_iter_t *itor); 00589 00597 SPHINXBASE_EXPORT 00598 ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor, 00599 char const **lmname); 00600 00607 SPHINXBASE_EXPORT 00608 ngram_model_t *ngram_model_set_select(ngram_model_t *set, 00609 const char *name); 00610 00617 SPHINXBASE_EXPORT 00618 ngram_model_t *ngram_model_set_lookup(ngram_model_t *set, 00619 const char *name); 00620 00624 SPHINXBASE_EXPORT 00625 const char *ngram_model_set_current(ngram_model_t *set); 00626 00634 SPHINXBASE_EXPORT 00635 ngram_model_t *ngram_model_set_interp(ngram_model_t *set, 00636 const char **names, 00637 const float32 *weights); 00638 00651 SPHINXBASE_EXPORT 00652 ngram_model_t *ngram_model_set_add(ngram_model_t *set, 00653 ngram_model_t *model, 00654 const char *name, 00655 float32 weight, 00656 int reuse_widmap); 00657 00666 SPHINXBASE_EXPORT 00667 ngram_model_t *ngram_model_set_remove(ngram_model_t *set, 00668 const char *name, 00669 int reuse_widmap); 00670 00674 SPHINXBASE_EXPORT 00675 void ngram_model_set_map_words(ngram_model_t *set, 00676 const char **words, 00677 int32 n_words); 00678 00686 SPHINXBASE_EXPORT 00687 int32 ngram_model_set_current_wid(ngram_model_t *set, 00688 int32 set_wid); 00689 00699 SPHINXBASE_EXPORT 00700 int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid); 00701 00709 SPHINXBASE_EXPORT 00710 void ngram_model_flush(ngram_model_t *lm); 00711 00712 #ifdef __cplusplus 00713 } 00714 #endif 00715 00716 00717 #endif /* __NGRAM_MODEL_H__ */