• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/fsg_search.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00020  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00021  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00022  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00023  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00026  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00027  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00029  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  *
00031  * ====================================================================
00032  *
00033  */
00034 
00035 /*
00036  * fsg_search.c -- Search structures for FSM decoding.
00037  * 
00038  * **********************************************
00039  * CMU ARPA Speech Project
00040  *
00041  * Copyright (c) 2004 Carnegie Mellon University.
00042  * ALL RIGHTS RESERVED.
00043  * **********************************************
00044  * 
00045  * HISTORY
00046  *
00047  * 18-Feb-2004  M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
00048  *              Started.
00049  */
00050 
00051 /* System headers. */
00052 #include <stdio.h>
00053 #include <string.h>
00054 #include <assert.h>
00055 
00056 /* SphinxBase headers. */
00057 #include <err.h>
00058 #include <ckd_alloc.h>
00059 #include <cmd_ln.h>
00060 
00061 /* Local headers. */
00062 #include "pocketsphinx_internal.h"
00063 #include "ps_lattice_internal.h"
00064 #include "fsg_search_internal.h"
00065 #include "fsg_history.h"
00066 #include "fsg_lextree.h"
00067 
00068 /* Turn this on for detailed debugging dump */
00069 #define __FSG_DBG__             0
00070 #define __FSG_DBG_CHAN__        0
00071 
00072 static ps_seg_t *fsg_search_seg_iter(ps_search_t *search, int32 *out_score);
00073 static ps_lattice_t *fsg_search_lattice(ps_search_t *search);
00074 static int fsg_search_prob(ps_search_t *search);
00075 
00076 static ps_searchfuncs_t fsg_funcs = {
00077     /* name: */   "fsg",
00078     /* start: */  fsg_search_start,
00079     /* step: */   fsg_search_step,
00080     /* finish: */ fsg_search_finish,
00081     /* reinit: */ fsg_search_reinit,
00082     /* free: */   fsg_search_free,
00083     /* lattice: */  fsg_search_lattice,
00084     /* hyp: */      fsg_search_hyp,
00085     /* prob: */     fsg_search_prob,
00086     /* seg_iter: */ fsg_search_seg_iter,
00087 };
00088 
00089 ps_search_t *
00090 fsg_search_init(cmd_ln_t *config,
00091                 acmod_t *acmod,
00092                 dict_t *dict,
00093                 dict2pid_t *d2p)
00094 {
00095     fsg_search_t *fsgs;
00096     char const *path;
00097 
00098     fsgs = ckd_calloc(1, sizeof(*fsgs));
00099     ps_search_init(ps_search_base(fsgs), &fsg_funcs, config, acmod, dict, d2p);
00100 
00101     /* Initialize HMM context. */
00102     fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00103                                     acmod->tmat->tp, NULL, acmod->mdef->sseq);
00104     if (fsgs->hmmctx == NULL) {
00105         ps_search_free(ps_search_base(fsgs));
00106         return NULL;
00107     }
00108 
00109     /* Intialize the search history object */
00110     fsgs->history = fsg_history_init(NULL, dict);
00111     fsgs->frame = -1;
00112 
00113     /* Initialize FSG table. */
00114     fsgs->fsgs = hash_table_new(5, FALSE);
00115 
00116     /* Get search pruning parameters */
00117     fsgs->beam_factor = 1.0f;
00118     fsgs->beam = fsgs->beam_orig
00119         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"));
00120     fsgs->pbeam = fsgs->pbeam_orig
00121         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"));
00122     fsgs->wbeam = fsgs->wbeam_orig
00123         = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"));
00124 
00125     /* LM related weights/penalties */
00126     fsgs->lw = cmd_ln_float32_r(config, "-lw");
00127     fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip"))
00128                            * fsgs->lw);
00129     fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip"))
00130                            * fsgs->lw);
00131 
00132     /* Best path search (and confidence annotation)? */
00133     if (cmd_ln_boolean_r(config, "-bestpath"))
00134         fsgs->bestpath = TRUE;
00135 
00136     /* Acoustic score scale for posterior probabilities. */
00137     fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
00138 
00139     E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
00140            fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig,
00141            fsgs->wip, fsgs->pip);
00142 
00143     /* Load an FSG if one was specified in config */
00144     if ((path = cmd_ln_str_r(config, "-fsg"))) {
00145         fsg_model_t *fsg;
00146 
00147         if ((fsg = fsg_model_readfile(path, acmod->lmath, fsgs->lw)) == NULL)
00148             goto error_out;
00149         if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
00150             fsg_model_free(fsg);
00151             goto error_out;
00152         }
00153         if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
00154             goto error_out;
00155         if (fsg_search_reinit(ps_search_base(fsgs),
00156                               ps_search_dict(fsgs),
00157                               ps_search_dict2pid(fsgs)) < 0)
00158             goto error_out;
00159     }
00160     /* Or load a JSGF grammar */
00161     else if ((path = cmd_ln_str_r(config, "-jsgf"))) {
00162         fsg_model_t *fsg;
00163         jsgf_rule_t *rule;
00164         char const *toprule;
00165 
00166         if ((fsgs->jsgf = jsgf_parse_file(path, NULL)) == NULL)
00167             goto error_out;
00168 
00169         rule = NULL;
00170         /* Take the -toprule if specified. */
00171         if ((toprule = cmd_ln_str_r(config, "-toprule"))) {
00172             char *anglerule;
00173 
00174             anglerule = ckd_calloc(1, strlen(toprule) + 3);
00175             anglerule[0] = '<';
00176             strcat(anglerule, toprule);
00177             strcat(anglerule, ">");
00178             rule = jsgf_get_rule(fsgs->jsgf, anglerule);
00179             ckd_free(anglerule);
00180             if (rule == NULL) {
00181                 E_ERROR("Start rule %s not found\n", toprule);
00182                 goto error_out;
00183             }
00184         }
00185         /* Otherwise, take the first public rule. */
00186         else {
00187             jsgf_rule_iter_t *itor;
00188 
00189             for (itor = jsgf_rule_iter(fsgs->jsgf); itor;
00190                  itor = jsgf_rule_iter_next(itor)) {
00191                 rule = jsgf_rule_iter_rule(itor);
00192                 if (jsgf_rule_public(rule))
00193                     break;
00194             }
00195             if (rule == NULL) {
00196                 E_ERROR("No public rules found in %s\n", path);
00197                 goto error_out;
00198             }
00199         }
00200         fsg = jsgf_build_fsg(fsgs->jsgf, rule, acmod->lmath, fsgs->lw);
00201         if (fsg_set_add(fsgs, fsg_model_name(fsg), fsg) != fsg) {
00202             fsg_model_free(fsg);
00203             goto error_out;
00204         }
00205         if (fsg_set_select(fsgs, fsg_model_name(fsg)) == NULL)
00206             goto error_out;
00207         if (fsg_search_reinit(ps_search_base(fsgs),
00208                               ps_search_dict(fsgs),
00209                               ps_search_dict2pid(fsgs)) < 0)
00210             goto error_out;
00211     }
00212 
00213     return ps_search_base(fsgs);
00214 
00215 error_out:
00216     fsg_search_free(ps_search_base(fsgs));
00217     return NULL;
00218 }
00219 
00220 void
00221 fsg_search_free(ps_search_t *search)
00222 {
00223     fsg_search_t *fsgs = (fsg_search_t *)search;
00224     hash_iter_t *itor;
00225 
00226     ps_search_deinit(search);
00227     if (fsgs->jsgf)
00228         jsgf_grammar_free(fsgs->jsgf);
00229     fsg_lextree_free(fsgs->lextree);
00230     fsg_history_reset(fsgs->history);
00231     fsg_history_set_fsg(fsgs->history, NULL, NULL);
00232     for (itor = hash_table_iter(fsgs->fsgs);
00233          itor; itor = hash_table_iter_next(itor)) {
00234         fsg_model_t *fsg = (fsg_model_t *) hash_entry_val(itor->ent);
00235         fsg_model_free(fsg);
00236     }
00237     hash_table_free(fsgs->fsgs);
00238     fsg_history_free(fsgs->history);
00239     hmm_context_free(fsgs->hmmctx);
00240     ckd_free(fsgs);
00241 }
00242 
00243 int
00244 fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
00245 {
00246     fsg_search_t *fsgs = (fsg_search_t *)search;
00247 
00248     /* Free the old lextree */
00249     if (fsgs->lextree)
00250         fsg_lextree_free(fsgs->lextree);
00251 
00252     /* Free old dict2pid, dict */
00253     ps_search_base_reinit(search, dict, d2p);
00254 
00255     /* Update the number of words (not used by this module though). */
00256     search->n_words = dict_size(dict);
00257 
00258     /* Allocate new lextree for the given FSG */
00259     fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p,
00260                                      ps_search_acmod(fsgs)->mdef,
00261                                      fsgs->hmmctx, fsgs->wip, fsgs->pip);
00262 
00263     /* Inform the history module of the new fsg */
00264     fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict);
00265 
00266     return 0;
00267 }
00268 
00269 
00270 static int
00271 fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg)
00272 {
00273     dict_t *dict;
00274     int32 wid;
00275     int n_sil;
00276 
00277     dict = ps_search_dict(fsgs);
00278     /*
00279      * NOTE: Unlike N-Gram search, we do not use explicit start and
00280      * end symbols.  This is because the start and end nodes are
00281      * defined in the grammar.  We do add silence/filler self-loops to
00282      * all states in order to allow for silence between words and at
00283      * the beginning and end of utterances.
00284      *
00285      * This has some implications for word graph generation, namely,
00286      * that there can (and usually will) be multiple start and end
00287      * states in the word graph.  We therefore do add explicit start
00288      * and end nodes to the graph.
00289      */
00290     /* Add silence self-loops to all states. */
00291     fsg_model_add_silence(fsg, "<sil>", -1,
00292                           cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"));
00293     n_sil = 0;
00294     /* Add self-loops for all other fillers. */
00295     for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
00296         char const *word = dict_wordstr(dict, wid);
00297         if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
00298             continue;
00299         fsg_model_add_silence(fsg, word, -1,
00300                               cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"));
00301         ++n_sil;
00302     }
00303 
00304     return n_sil;
00305 }
00306 
00307 /* Scans the dictionary and check if all words are present. */
00308 static int
00309 fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg)
00310 {
00311     dict_t *dict;
00312     int i;
00313 
00314     dict = ps_search_dict(fsgs);
00315     for (i = 0; i < fsg_model_n_word(fsg); ++i) {
00316         char const *word;
00317         int32 wid;
00318 
00319         word = fsg_model_word_str(fsg, i);
00320         wid = dict_wordid(dict, word);
00321         if (wid == BAD_S3WID) {
00322             E_ERROR("The word '%s' is missing in the dictionary\n", word);
00323             return FALSE;
00324         }
00325     }
00326 
00327     return TRUE;
00328 }
00329 
00330 static int
00331 fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
00332 {
00333     dict_t *dict;
00334     int n_alt, n_word;
00335     int i;
00336 
00337     dict = ps_search_dict(fsgs);
00338     /* Scan FSG's vocabulary for words that have alternate pronunciations. */
00339     n_alt = 0;
00340     n_word = fsg_model_n_word(fsg);
00341     for (i = 0; i < n_word; ++i) {
00342         char const *word;
00343         int32 wid;
00344 
00345         word = fsg_model_word_str(fsg, i);
00346         wid = dict_wordid(dict, word);
00347         if (wid != BAD_S3WID) {
00348             while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) {
00349                 fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
00350                 ++n_alt;
00351             }
00352         }
00353     }
00354 
00355     return n_alt;
00356 }
00357 
00358 fsg_model_t *
00359 fsg_set_get_fsg(fsg_search_t *fsgs, const char *name)
00360 {
00361     void *val;
00362 
00363     if (hash_table_lookup(fsgs->fsgs, name, &val) < 0)
00364         return NULL;
00365     return (fsg_model_t *)val;
00366 }
00367 
00368 fsg_model_t *
00369 fsg_set_add(fsg_search_t *fsgs, char const *name, fsg_model_t *fsg)
00370 {
00371     if (name == NULL)
00372         name = fsg_model_name(fsg);
00373 
00374     if (!fsg_search_check_dict(fsgs, fsg))
00375         return NULL;
00376 
00377     /* Add silence transitions and alternate words. */
00378     if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusefiller")
00379         && !fsg_model_has_sil(fsg))
00380         fsg_search_add_silences(fsgs, fsg);
00381     if (cmd_ln_boolean_r(ps_search_config(fsgs), "-fsgusealtpron")
00382         && !fsg_model_has_alt(fsg))
00383         fsg_search_add_altpron(fsgs, fsg);
00384 
00385     return (fsg_model_t *)hash_table_enter(fsgs->fsgs, name, fsg);
00386 }
00387 
00388 
00389 fsg_model_t *
00390 fsg_set_remove_byname(fsg_search_t *fsgs, char const *key)
00391 {
00392     fsg_model_t *oldfsg;
00393     void *val;
00394 
00395     /* Look for the matching FSG. */
00396     if (hash_table_lookup(fsgs->fsgs, key, &val) < 0) {
00397         E_ERROR("FSG `%s' to be deleted not found\n", key);
00398         return NULL;
00399     }
00400     oldfsg = val;
00401 
00402     /* Remove it from the FSG table. */
00403     hash_table_delete(fsgs->fsgs, key);
00404     /* If this was the currently active FSG, also delete other stuff */
00405     if (fsgs->fsg == oldfsg) {
00406         fsg_lextree_free(fsgs->lextree);
00407         fsgs->lextree = NULL;
00408         fsg_history_set_fsg(fsgs->history, NULL, NULL);
00409         fsgs->fsg = NULL;
00410     }
00411     return oldfsg;
00412 }
00413 
00414 
00415 fsg_model_t *
00416 fsg_set_remove(fsg_search_t *fsgs, fsg_model_t *fsg)
00417 {
00418     char const *key;
00419     hash_iter_t *itor;
00420 
00421     key = NULL;
00422     for (itor = hash_table_iter(fsgs->fsgs);
00423          itor; itor = hash_table_iter_next(itor)) {
00424         fsg_model_t *oldfsg;
00425 
00426         oldfsg = (fsg_model_t *) hash_entry_val(itor->ent);
00427         if (oldfsg == fsg) {
00428             key = hash_entry_key(itor->ent);
00429             hash_table_iter_free(itor);
00430             break;
00431         }
00432     }
00433     if (key == NULL) {
00434         E_WARN("FSG '%s' to be deleted not found\n", fsg_model_name(fsg));
00435         return NULL;
00436     }
00437     else
00438         return fsg_set_remove_byname(fsgs, key);
00439 }
00440 
00441 
00442 fsg_model_t *
00443 fsg_set_select(fsg_search_t *fsgs, const char *name)
00444 {
00445     fsg_model_t *fsg;
00446 
00447     fsg = fsg_set_get_fsg(fsgs, name);
00448     if (fsg == NULL) {
00449         E_ERROR("FSG '%s' not known; cannot make it current\n", name);
00450         return NULL;
00451     }
00452     fsgs->fsg = fsg;
00453     return fsg;
00454 }
00455 
00456 fsg_set_iter_t *
00457 fsg_set_iter(fsg_set_t *fsgs)
00458 {
00459     return hash_table_iter(fsgs->fsgs);
00460 }
00461 
00462 fsg_set_iter_t *
00463 fsg_set_iter_next(fsg_set_iter_t *itor)
00464 {
00465     return hash_table_iter_next(itor);
00466 }
00467 
00468 fsg_model_t *
00469 fsg_set_iter_fsg(fsg_set_iter_t *itor)
00470 {
00471     return ((fsg_model_t *)itor->ent->val);
00472 }
00473 
00474 void
00475 fsg_set_iter_free(fsg_set_iter_t *itor)
00476 {
00477     hash_table_iter_free(itor);
00478 }
00479 
00480 static void
00481 fsg_search_sen_active(fsg_search_t *fsgs)
00482 {
00483     gnode_t *gn;
00484     fsg_pnode_t *pnode;
00485     hmm_t *hmm;
00486 
00487     acmod_clear_active(ps_search_acmod(fsgs));
00488 
00489     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00490         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00491         hmm = fsg_pnode_hmmptr(pnode);
00492         assert(hmm_frame(hmm) == fsgs->frame);
00493         acmod_activate_hmm(ps_search_acmod(fsgs), hmm);
00494     }
00495 }
00496 
00497 
00498 /*
00499  * Evaluate all the active HMMs.
00500  * (Executed once per frame.)
00501  */
00502 static void
00503 fsg_search_hmm_eval(fsg_search_t *fsgs)
00504 {
00505     gnode_t *gn;
00506     fsg_pnode_t *pnode;
00507     hmm_t *hmm;
00508     int32 bestscore;
00509     int32 n, maxhmmpf;
00510 
00511     bestscore = WORST_SCORE;
00512 
00513     if (!fsgs->pnode_active) {
00514         E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame);
00515         return;
00516     }
00517 
00518     for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) {
00519         int32 score;
00520 
00521         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00522         hmm = fsg_pnode_hmmptr(pnode);
00523         assert(hmm_frame(hmm) == fsgs->frame);
00524 
00525 #if __FSG_DBG__
00526         E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode,
00527                fsgs->frame);
00528         hmm_dump(hmm, stdout);
00529 #endif
00530         score = hmm_vit_eval(hmm);
00531 #if __FSG_DBG_CHAN__
00532         E_INFO("pnode(%08x) after eval @frm %5d\n",
00533                (int32) pnode, fsgs->frame);
00534         hmm_dump(hmm, stdout);
00535 #endif
00536 
00537         if (score BETTER_THAN bestscore)
00538             bestscore = score;
00539     }
00540 
00541 #if __FSG_DBG__
00542     E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore);
00543 #endif
00544     fsgs->n_hmm_eval += n;
00545 
00546     /* Adjust beams if #active HMMs larger than absolute threshold */
00547     maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf");
00548     if (maxhmmpf != -1 && n > maxhmmpf) {
00549         /*
00550          * Too many HMMs active; reduce the beam factor applied to the default
00551          * beams, but not if the factor is already at a floor (0.1).
00552          */
00553         if (fsgs->beam_factor > 0.1) {        /* Hack!!  Hardwired constant 0.1 */
00554             fsgs->beam_factor *= 0.9f;        /* Hack!!  Hardwired constant 0.9 */
00555             fsgs->beam =
00556                 (int32) (fsgs->beam_orig * fsgs->beam_factor);
00557             fsgs->pbeam =
00558                 (int32) (fsgs->pbeam_orig * fsgs->beam_factor);
00559             fsgs->wbeam =
00560                 (int32) (fsgs->wbeam_orig * fsgs->beam_factor);
00561         }
00562     }
00563     else {
00564         fsgs->beam_factor = 1.0f;
00565         fsgs->beam = fsgs->beam_orig;
00566         fsgs->pbeam = fsgs->pbeam_orig;
00567         fsgs->wbeam = fsgs->wbeam_orig;
00568     }
00569 
00570     if (n > fsg_lextree_n_pnode(fsgs->lextree))
00571         E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
00572                 fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree));
00573 
00574     fsgs->bestscore = bestscore;
00575 }
00576 
00577 
00578 static void
00579 fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode)
00580 {
00581     fsg_pnode_t *child;
00582     hmm_t *hmm;
00583     int32 newscore, thresh, nf;
00584 
00585     assert(pnode);
00586     assert(!fsg_pnode_leaf(pnode));
00587 
00588     nf = fsgs->frame + 1;
00589     thresh = fsgs->bestscore + fsgs->beam;
00590 
00591     hmm = fsg_pnode_hmmptr(pnode);
00592 
00593     for (child = fsg_pnode_succ(pnode);
00594          child; child = fsg_pnode_sibling(child)) {
00595         newscore = hmm_out_score(hmm) + child->logs2prob;
00596 
00597         if ((newscore BETTER_THAN thresh)
00598             && (newscore BETTER_THAN hmm_in_score(&child->hmm))) {
00599             /* Incoming score > pruning threshold and > target's existing score */
00600             if (hmm_frame(&child->hmm) < nf) {
00601                 /* Child node not yet activated; do so */
00602                 fsgs->pnode_active_next =
00603                     glist_add_ptr(fsgs->pnode_active_next,
00604                                   (void *) child);
00605             }
00606 
00607             hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
00608         }
00609     }
00610 }
00611 
00612 
00613 static void
00614 fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode)
00615 {
00616     hmm_t *hmm;
00617     fsg_link_t *fl;
00618     int32 wid;
00619     fsg_pnode_ctxt_t ctxt;
00620 
00621     assert(pnode);
00622     assert(fsg_pnode_leaf(pnode));
00623 
00624     hmm = fsg_pnode_hmmptr(pnode);
00625     fl = fsg_pnode_fsglink(pnode);
00626     assert(fl);
00627 
00628     wid = fsg_link_wid(fl);
00629     assert(wid >= 0);
00630 
00631 #if __FSG_DBG__
00632     E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
00633            fsgs->frame, (int32) pnode,
00634            hmm_out_score(hmm), hmm_out_history(hmm));
00635 #endif
00636 
00637     /*
00638      * Check if this is filler or single phone word; these do not model right
00639      * context (i.e., the exit score applies to all right contexts).
00640      */
00641     if (fsg_model_is_filler(fsgs->fsg, wid)
00642         /* FIXME: This might be slow due to repeated calls to dict_to_id(). */
00643         || (dict_is_single_phone(ps_search_dict(fsgs),
00644                                    dict_wordid(ps_search_dict(fsgs),
00645                                                fsg_model_word_str(fsgs->fsg, wid))))) {
00646         /* Create a dummy context structure that applies to all right contexts */
00647         fsg_pnode_add_all_ctxt(&ctxt);
00648 
00649         /* Create history table entry for this word exit */
00650         fsg_history_entry_add(fsgs->history,
00651                               fl,
00652                               fsgs->frame,
00653                               hmm_out_score(hmm),
00654                               hmm_out_history(hmm),
00655                               pnode->ci_ext, ctxt);
00656 
00657     }
00658     else {
00659         /* Create history table entry for this word exit */
00660         fsg_history_entry_add(fsgs->history,
00661                               fl,
00662                               fsgs->frame,
00663                               hmm_out_score(hmm),
00664                               hmm_out_history(hmm),
00665                               pnode->ci_ext, pnode->ctxt);
00666     }
00667 }
00668 
00669 
00670 /*
00671  * (Beam) prune the just evaluated HMMs, determine which ones remain
00672  * active, which ones transition to successors, which ones exit and
00673  * terminate in their respective destination FSM states.
00674  * (Executed once per frame.)
00675  */
00676 static void
00677 fsg_search_hmm_prune_prop(fsg_search_t *fsgs)
00678 {
00679     gnode_t *gn;
00680     fsg_pnode_t *pnode;
00681     hmm_t *hmm;
00682     int32 thresh, word_thresh, phone_thresh;
00683 
00684     assert(fsgs->pnode_active_next == NULL);
00685 
00686     thresh = fsgs->bestscore + fsgs->beam;
00687     phone_thresh = fsgs->bestscore + fsgs->pbeam;
00688     word_thresh = fsgs->bestscore + fsgs->wbeam;
00689 
00690     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00691         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00692         hmm = fsg_pnode_hmmptr(pnode);
00693 
00694         if (hmm_bestscore(hmm) >= thresh) {
00695             /* Keep this HMM active in the next frame */
00696             if (hmm_frame(hmm) == fsgs->frame) {
00697                 hmm_frame(hmm) = fsgs->frame + 1;
00698                 fsgs->pnode_active_next =
00699                     glist_add_ptr(fsgs->pnode_active_next,
00700                                   (void *) pnode);
00701             }
00702             else {
00703                 assert(hmm_frame(hmm) == fsgs->frame + 1);
00704             }
00705 
00706             if (!fsg_pnode_leaf(pnode)) {
00707                 if (hmm_out_score(hmm) >= phone_thresh) {
00708                     /* Transition out of this phone into its children */
00709                     fsg_search_pnode_trans(fsgs, pnode);
00710                 }
00711             }
00712             else {
00713                 if (hmm_out_score(hmm) >= word_thresh) {
00714                     /* Transition out of leaf node into destination FSG state */
00715                     fsg_search_pnode_exit(fsgs, pnode);
00716                 }
00717             }
00718         }
00719     }
00720 }
00721 
00722 
00723 /*
00724  * Propagate newly created history entries through null transitions.
00725  */
00726 static void
00727 fsg_search_null_prop(fsg_search_t *fsgs)
00728 {
00729     int32 bpidx, n_entries, thresh, newscore;
00730     fsg_hist_entry_t *hist_entry;
00731     fsg_link_t *l;
00732     int32 s, d;
00733     fsg_model_t *fsg;
00734 
00735     fsg = fsgs->fsg;
00736     thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */
00737 
00738     n_entries = fsg_history_n_entries(fsgs->history);
00739 
00740     for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
00741         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
00742 
00743         l = fsg_hist_entry_fsglink(hist_entry);
00744 
00745         /* Destination FSG state for history entry */
00746         s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
00747 
00748         /*
00749          * Check null transitions from d to all other states.  (Only need to
00750          * propagate one step, since FSG contains transitive closure of null
00751          * transitions.)
00752          */
00753         for (d = 0; d < fsg_model_n_state(fsg); d++) {
00754             l = fsg_model_null_trans(fsg, s, d);
00755 
00756             if (l) {            /* Propagate history entry through this null transition */
00757                 newscore =
00758                     fsg_hist_entry_score(hist_entry) +
00759                     fsg_link_logs2prob(l);
00760 
00761                 if (newscore >= thresh) {
00762                     fsg_history_entry_add(fsgs->history, l,
00763                                           fsg_hist_entry_frame(hist_entry),
00764                                           newscore,
00765                                           bpidx,
00766                                           fsg_hist_entry_lc(hist_entry),
00767                                           fsg_hist_entry_rc(hist_entry));
00768                 }
00769             }
00770         }
00771     }
00772 }
00773 
00774 
00775 /*
00776  * Perform cross-word transitions; propagate each history entry created in this
00777  * frame to lextree roots attached to the target FSG state for that entry.
00778  */
00779 static void
00780 fsg_search_word_trans(fsg_search_t *fsgs)
00781 {
00782     int32 bpidx, n_entries;
00783     fsg_hist_entry_t *hist_entry;
00784     fsg_link_t *l;
00785     int32 score, newscore, thresh, nf, d;
00786     fsg_pnode_t *root;
00787     int32 lc, rc;
00788 
00789     n_entries = fsg_history_n_entries(fsgs->history);
00790 
00791     thresh = fsgs->bestscore + fsgs->beam;
00792     nf = fsgs->frame + 1;
00793 
00794     for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) {
00795         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
00796         assert(hist_entry);
00797         score = fsg_hist_entry_score(hist_entry);
00798         assert(fsgs->frame == fsg_hist_entry_frame(hist_entry));
00799 
00800         l = fsg_hist_entry_fsglink(hist_entry);
00801 
00802         /* Destination state for hist_entry */
00803         d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
00804                                                                 fsg);
00805 
00806         lc = fsg_hist_entry_lc(hist_entry);
00807 
00808         /* Transition to all root nodes attached to state d */
00809         for (root = fsg_lextree_root(fsgs->lextree, d);
00810              root; root = root->sibling) {
00811             rc = root->ci_ext;
00812 
00813             if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
00814                 (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
00815                 /*
00816                  * Last CIphone of history entry is in left-context list supported by
00817                  * target root node, and
00818                  * first CIphone of target root node is in right context list supported
00819                  * by history entry;
00820                  * So the transition can go ahead (if new score is good enough).
00821                  */
00822                 newscore = score + root->logs2prob;
00823 
00824                 if ((newscore BETTER_THAN thresh)
00825                     && (newscore BETTER_THAN hmm_in_score(&root->hmm))) {
00826                     if (hmm_frame(&root->hmm) < nf) {
00827                         /* Newly activated node; add to active list */
00828                         fsgs->pnode_active_next =
00829                             glist_add_ptr(fsgs->pnode_active_next,
00830                                           (void *) root);
00831 #if __FSG_DBG__
00832                         E_INFO
00833                             ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
00834                              fsgs->frame, bpidx, (int32) root);
00835 #endif
00836                     }
00837                     else {
00838 #if __FSG_DBG__
00839                         E_INFO
00840                             ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
00841                              fsgs->frame, bpidx, (int32) root);
00842 #endif
00843                     }
00844 
00845                     hmm_enter(&root->hmm, newscore, bpidx, nf);
00846                 }
00847             }
00848         }
00849     }
00850 }
00851 
00852 
00853 int
00854 fsg_search_step(ps_search_t *search, int frame_idx)
00855 {
00856     fsg_search_t *fsgs = (fsg_search_t *)search;
00857     int16 const *senscr;
00858     acmod_t *acmod = search->acmod;
00859     gnode_t *gn;
00860     fsg_pnode_t *pnode;
00861     hmm_t *hmm;
00862 
00863     /* Activate our HMMs for the current frame if need be. */
00864     if (!acmod->compallsen)
00865         fsg_search_sen_active(fsgs);
00866     /* Compute GMM scores for the current frame. */
00867     senscr = acmod_score(acmod, &frame_idx);
00868     fsgs->n_sen_eval += acmod->n_senone_active;
00869     hmm_context_set_senscore(fsgs->hmmctx, senscr);
00870 
00871     /* Mark backpointer table for current frame. */
00872     fsgs->bpidx_start = fsg_history_n_entries(fsgs->history);
00873 
00874     /* Evaluate all active pnodes (HMMs) */
00875     fsg_search_hmm_eval(fsgs);
00876 
00877     /*
00878      * Prune and propagate the HMMs evaluated; create history entries for
00879      * word exits.  The words exits are tentative, and may be pruned; make
00880      * the survivors permanent via fsg_history_end_frame().
00881      */
00882     fsg_search_hmm_prune_prop(fsgs);
00883     fsg_history_end_frame(fsgs->history);
00884 
00885     /*
00886      * Propagate new history entries through any null transitions, creating
00887      * new history entries, and then make the survivors permanent.
00888      */
00889     fsg_search_null_prop(fsgs);
00890     fsg_history_end_frame(fsgs->history);
00891 
00892     /*
00893      * Perform cross-word transitions; propagate each history entry across its
00894      * terminating state to the root nodes of the lextree attached to the state.
00895      */
00896     fsg_search_word_trans(fsgs);
00897 
00898     /*
00899      * We've now come full circle, HMM and FSG states have been updated for
00900      * the next frame.
00901      * Update the active lists, deactivate any currently active HMMs that
00902      * did not survive into the next frame
00903      */
00904     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
00905         pnode = (fsg_pnode_t *) gnode_ptr(gn);
00906         hmm = fsg_pnode_hmmptr(pnode);
00907 
00908         if (hmm_frame(hmm) == fsgs->frame) {
00909             /* This HMM NOT activated for the next frame; reset it */
00910             fsg_psubtree_pnode_deactivate(pnode);
00911         }
00912         else {
00913             assert(hmm_frame(hmm) == (fsgs->frame + 1));
00914         }
00915     }
00916 
00917     /* Free the currently active list */
00918     glist_free(fsgs->pnode_active);
00919 
00920     /* Make the next-frame active list the current one */
00921     fsgs->pnode_active = fsgs->pnode_active_next;
00922     fsgs->pnode_active_next = NULL;
00923 
00924     /* End of this frame; ready for the next */
00925     ++fsgs->frame;
00926 
00927     return 1;
00928 }
00929 
00930 
00931 /*
00932  * Set all HMMs to inactive, clear active lists, initialize FSM start
00933  * state to be the only active node.
00934  * (Executed at the start of each utterance.)
00935  */
00936 int
00937 fsg_search_start(ps_search_t *search)
00938 {
00939     fsg_search_t *fsgs = (fsg_search_t *)search;
00940     int32 silcipid;
00941     fsg_pnode_ctxt_t ctxt;
00942 
00943     /* Reset dynamic adjustment factor for beams */
00944     fsgs->beam_factor = 1.0f;
00945     fsgs->beam = fsgs->beam_orig;
00946     fsgs->pbeam = fsgs->pbeam_orig;
00947     fsgs->wbeam = fsgs->wbeam_orig;
00948 
00949     silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL");
00950 
00951     /* Initialize EVERYTHING to be inactive */
00952     assert(fsgs->pnode_active == NULL);
00953     assert(fsgs->pnode_active_next == NULL);
00954 
00955     fsg_history_reset(fsgs->history);
00956     fsg_history_utt_start(fsgs->history);
00957     fsgs->final = FALSE;
00958 
00959     /* Dummy context structure that allows all right contexts to use this entry */
00960     fsg_pnode_add_all_ctxt(&ctxt);
00961 
00962     /* Create dummy history entry leading to start state */
00963     fsgs->frame = -1;
00964     fsgs->bestscore = 0;
00965     fsg_history_entry_add(fsgs->history,
00966                           NULL, -1, 0, -1, silcipid, ctxt);
00967     fsgs->bpidx_start = 0;
00968 
00969     /* Propagate dummy history entry through NULL transitions from start state */
00970     fsg_search_null_prop(fsgs);
00971 
00972     /* Perform word transitions from this dummy history entry */
00973     fsg_search_word_trans(fsgs);
00974 
00975     /* Make the next-frame active list the current one */
00976     fsgs->pnode_active = fsgs->pnode_active_next;
00977     fsgs->pnode_active_next = NULL;
00978 
00979     ++fsgs->frame;
00980 
00981     fsgs->n_hmm_eval = 0;
00982     fsgs->n_sen_eval = 0;
00983 
00984     return 0;
00985 }
00986 
00987 /*
00988  * Cleanup at the end of each utterance.
00989  */
00990 int
00991 fsg_search_finish(ps_search_t *search)
00992 {
00993     fsg_search_t *fsgs = (fsg_search_t *)search;
00994     gnode_t *gn;
00995     fsg_pnode_t *pnode;
00996     int32 n_hist;
00997 
00998     /* Deactivate all nodes in the current and next-frame active lists */
00999     for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) {
01000         pnode = (fsg_pnode_t *) gnode_ptr(gn);
01001         fsg_psubtree_pnode_deactivate(pnode);
01002     }
01003     for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) {
01004         pnode = (fsg_pnode_t *) gnode_ptr(gn);
01005         fsg_psubtree_pnode_deactivate(pnode);
01006     }
01007 
01008     glist_free(fsgs->pnode_active);
01009     fsgs->pnode_active = NULL;
01010     glist_free(fsgs->pnode_active_next);
01011     fsgs->pnode_active_next = NULL;
01012 
01013     fsgs->final = TRUE;
01014 
01015     n_hist = fsg_history_n_entries(fsgs->history);
01016     E_INFO
01017         ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
01018          fsgs->frame, fsgs->n_hmm_eval,
01019          (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0,
01020          fsgs->n_sen_eval,
01021          (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0,
01022          n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0);
01023 
01024     /* Sanity check */
01025     if (fsgs->n_hmm_eval >
01026         fsg_lextree_n_pnode(fsgs->lextree) * fsgs->frame) {
01027         E_ERROR
01028             ("SANITY CHECK #HMMEval(%d) > %d (#HMMs(%d)*#frames(%d)) FAILED\n",
01029              fsgs->n_hmm_eval,
01030              fsg_lextree_n_pnode(fsgs->lextree) * fsgs->frame,
01031              fsg_lextree_n_pnode(fsgs->lextree), fsgs->frame);
01032     }
01033 
01034     return 0;
01035 }
01036 
01037 static int
01038 fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score)
01039 {
01040     fsg_hist_entry_t *hist_entry;
01041     fsg_model_t *fsg;
01042     int bpidx, frm, last_frm, besthist;
01043     int32 bestscore;
01044 
01045     if (frame_idx == -1)
01046         frame_idx = fsgs->frame - 1;
01047     last_frm = frm = frame_idx;
01048 
01049     /* Scan backwards to find a word exit in frame_idx. */
01050     bpidx = fsg_history_n_entries(fsgs->history) - 1;
01051     while (bpidx > 0) {
01052         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
01053         if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
01054             frm = last_frm = fsg_hist_entry_frame(hist_entry);
01055             break;
01056         }
01057     }
01058 
01059     /* No hypothesis (yet). */
01060     if (bpidx <= 0) 
01061         return bpidx;
01062 
01063     /* Now find best word exit in this frame. */
01064     bestscore = INT_MIN;
01065     besthist = -1;
01066     fsg = fsgs->fsg;
01067     while (frm == last_frm) {
01068         fsg_link_t *fl;
01069         int32 score;
01070 
01071         fl = fsg_hist_entry_fsglink(hist_entry);
01072         score = fsg_hist_entry_score(hist_entry);
01073 
01074         if (score BETTER_THAN bestscore) {
01075             /* Only enforce the final state constraint if this is a final hypothesis. */
01076             if ((!final)
01077                 || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
01078                 bestscore = score;
01079                 besthist = bpidx;
01080             }
01081         }
01082 
01083         --bpidx;
01084         if (bpidx < 0)
01085             break;
01086         hist_entry = fsg_history_entry_get(fsgs->history, bpidx);
01087         frm = fsg_hist_entry_frame(hist_entry);
01088     }
01089 
01090     /* Final state not reached. */
01091     if (besthist == -1) {
01092         E_ERROR("Final state not reached in frame %d\n", frame_idx);
01093         return -1;
01094     }
01095 
01096     /* This here's the one we want. */
01097     if (out_score)
01098         *out_score = bestscore;
01099     return besthist;
01100 }
01101 
01102 /* FIXME: Mostly duplicated with ngram_search_bestpath(). */
01103 static ps_latlink_t *
01104 fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
01105 {
01106     fsg_search_t *fsgs = (fsg_search_t *)search;
01107 
01108     if (search->last_link == NULL) {
01109         search->last_link = ps_lattice_bestpath(search->dag, NULL,
01110                                                 1.0, fsgs->ascale);
01111         if (search->last_link == NULL)
01112             return NULL;
01113         /* Also calculate betas so we can fill in the posterior
01114          * probability field in the segmentation. */
01115         if (search->post == 0)
01116             search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale);
01117     }
01118     if (out_score)
01119         *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
01120     return search->last_link;
01121 }
01122 
01123 char const *
01124 fsg_search_hyp(ps_search_t *search, int32 *out_score)
01125 {
01126     fsg_search_t *fsgs = (fsg_search_t *)search;
01127     dict_t *dict = ps_search_dict(search);
01128     char *c;
01129     size_t len;
01130     int bp, bpidx;
01131 
01132     /* Get last backpointer table index. */
01133     bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
01134     /* No hypothesis (yet). */
01135     if (bpidx <= 0)
01136         return NULL;
01137 
01138     /* If bestpath is enabled and the utterance is complete, then run it. */
01139     if (fsgs->bestpath && fsgs->final) {
01140         ps_lattice_t *dag;
01141         ps_latlink_t *link;
01142 
01143         if ((dag = fsg_search_lattice(search)) == NULL)
01144             return NULL;
01145         if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL)
01146             return NULL;
01147         return ps_lattice_hyp(dag, link);
01148     }
01149 
01150     bp = bpidx;
01151     len = 0;
01152     while (bp > 0) {
01153         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01154         fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
01155         char const *baseword;
01156         int32 wid;
01157 
01158         bp = fsg_hist_entry_pred(hist_entry);
01159         wid = fsg_link_wid(fl);
01160         if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
01161             continue;
01162         baseword = dict_basestr(dict,
01163                                 dict_wordid(dict,
01164                                             fsg_model_word_str(fsgs->fsg, wid)));
01165         len += strlen(baseword) + 1;
01166     }
01167     
01168     ckd_free(search->hyp_str);
01169     if (len == 0) {
01170         search->hyp_str = NULL;
01171         return search->hyp_str;
01172     }
01173     search->hyp_str = ckd_calloc(1, len);
01174 
01175     bp = bpidx;
01176     c = search->hyp_str + len - 1;
01177     while (bp > 0) {
01178         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01179         fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
01180         char const *baseword;
01181         int32 wid;
01182 
01183         bp = fsg_hist_entry_pred(hist_entry);
01184         wid = fsg_link_wid(fl);
01185         if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid))
01186             continue;
01187         baseword = dict_basestr(dict,
01188                                 dict_wordid(dict,
01189                                             fsg_model_word_str(fsgs->fsg, wid)));
01190         len = strlen(baseword);
01191         c -= len;
01192         memcpy(c, baseword, len);
01193         if (c > search->hyp_str) {
01194             --c;
01195             *c = ' ';
01196         }
01197     }
01198 
01199     return search->hyp_str;
01200 }
01201 
01202 static void
01203 fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry)
01204 {
01205     fsg_search_t *fsgs = (fsg_search_t *)seg->search;
01206     fsg_hist_entry_t *ph = NULL;
01207     int32 bp;
01208 
01209     if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
01210         ph = fsg_history_entry_get(fsgs->history, bp);
01211     seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid);
01212     seg->ef = fsg_hist_entry_frame(hist_entry);
01213     seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
01214     /* This is kind of silly but it happens for null transitions. */
01215     if (seg->sf > seg->ef) seg->sf = seg->ef;
01216     seg->prob = 0; /* Bogus value... */
01217     /* "Language model" score = transition probability. */
01218     seg->lback = 1;
01219     seg->lscr = hist_entry->fsglink->logs2prob;
01220     if (ph) {
01221         /* FIXME: Not sure exactly how cross-word triphones are handled. */
01222         seg->ascr = hist_entry->score - ph->score - seg->lscr;
01223     }
01224     else
01225         seg->ascr = hist_entry->score - seg->lscr;
01226 }
01227 
01228 
01229 static void
01230 fsg_seg_free(ps_seg_t *seg)
01231 {
01232     fsg_seg_t *itor = (fsg_seg_t *)seg;
01233     ckd_free(itor->hist);
01234     ckd_free(itor);
01235 }
01236 
01237 static ps_seg_t *
01238 fsg_seg_next(ps_seg_t *seg)
01239 {
01240     fsg_seg_t *itor = (fsg_seg_t *)seg;
01241 
01242     if (++itor->cur == itor->n_hist) {
01243         fsg_seg_free(seg);
01244         return NULL;
01245     }
01246 
01247     fsg_seg_bp2itor(seg, itor->hist[itor->cur]);
01248     return seg;
01249 }
01250 
01251 static ps_segfuncs_t fsg_segfuncs = {
01252     /* seg_next */ fsg_seg_next,
01253     /* seg_free */ fsg_seg_free
01254 };
01255 
01256 static ps_seg_t *
01257 fsg_search_seg_iter(ps_search_t *search, int32 *out_score)
01258 {
01259     fsg_search_t *fsgs = (fsg_search_t *)search;
01260     fsg_seg_t *itor;
01261     int bp, bpidx, cur;
01262 
01263     bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score);
01264     /* No hypothesis (yet). */
01265     if (bpidx <= 0)
01266         return NULL;
01267 
01268     /* If bestpath is enabled and the utterance is complete, then run it. */
01269     if (fsgs->bestpath && fsgs->final) {
01270         ps_lattice_t *dag;
01271         ps_latlink_t *link;
01272 
01273         if ((dag = fsg_search_lattice(search)) == NULL)
01274             return NULL;
01275         if ((link = fsg_search_bestpath(search, out_score, TRUE)) == NULL)
01276             return NULL;
01277         return ps_lattice_seg_iter(dag, link, 1.0);
01278     }
01279 
01280     /* Calling this an "iterator" is a bit of a misnomer since we have
01281      * to get the entire backtrace in order to produce it.  On the
01282      * other hand, all we actually need is the bptbl IDs, and we can
01283      * allocate a fixed-size array of them. */
01284     itor = ckd_calloc(1, sizeof(*itor));
01285     itor->base.vt = &fsg_segfuncs;
01286     itor->base.search = search;
01287     itor->base.lwf = 1.0;
01288     itor->n_hist = 0;
01289     bp = bpidx;
01290     while (bp > 0) {
01291         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01292         bp = fsg_hist_entry_pred(hist_entry);
01293         ++itor->n_hist;
01294     }
01295     if (itor->n_hist == 0) {
01296         ckd_free(itor);
01297         return NULL;
01298     }
01299     itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist));
01300     cur = itor->n_hist - 1;
01301     bp = bpidx;
01302     while (bp > 0) {
01303         fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp);
01304         itor->hist[cur] = hist_entry;
01305         bp = fsg_hist_entry_pred(hist_entry);
01306         --cur;
01307     }
01308 
01309     /* Fill in relevant fields for first element. */
01310     fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]);
01311     
01312     return (ps_seg_t *)itor;
01313 }
01314 
01315 static int
01316 fsg_search_prob(ps_search_t *search)
01317 {
01318     fsg_search_t *fsgs = (fsg_search_t *)search;
01319 
01320     /* If bestpath is enabled and the utterance is complete, then run it. */
01321     if (fsgs->bestpath && fsgs->final) {
01322         ps_lattice_t *dag;
01323         ps_latlink_t *link;
01324 
01325         if ((dag = fsg_search_lattice(search)) == NULL)
01326             return 0;
01327         if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
01328             return 0;
01329         return search->post;
01330     }
01331     else {
01332         /* FIXME: Give some kind of good estimate here, eventually. */
01333         return 0;
01334     }
01335 }
01336 
01337 static ps_latnode_t *
01338 new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 ascr)
01339 {
01340     ps_latnode_t *node;
01341 
01342     for (node = dag->nodes; node; node = node->next)
01343         if (node->sf == sf && node->wid == wid)
01344             break;
01345 
01346     if (node) {
01347         /* Update end frames. */
01348         if (node->lef == -1 || node->lef < ef)
01349             node->lef = ef;
01350         if (node->fef == -1 || node->fef > ef)
01351             node->fef = ef;
01352         /* Update best link score. */
01353         if (ascr BETTER_THAN node->info.best_exit)
01354             node->info.best_exit = ascr;
01355     }
01356     else {
01357         /* New node; link to head of list */
01358         node = listelem_malloc(dag->latnode_alloc);
01359         node->wid = wid;
01360         node->sf = sf;
01361         node->fef = node->lef = ef;
01362         node->reachable = FALSE;
01363         node->entries = NULL;
01364         node->exits = NULL;
01365         node->info.best_exit = ascr;
01366 
01367         node->next = dag->nodes;
01368         dag->nodes = node;
01369     }
01370 
01371     return node;
01372 }
01373 
01374 static ps_latnode_t *
01375 find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid)
01376 {
01377     ps_latnode_t *node;
01378 
01379     for (node = dag->nodes; node; node = node->next)
01380         if (node->sf == sf && node->wid == wid)
01381             break;
01382     return node;
01383 }
01384 
01385 static ps_latnode_t *
01386 find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag)
01387 {
01388     ps_latnode_t *node;
01389     glist_t start = NULL;
01390     int nstart = 0;
01391 
01392     /* Look for all nodes starting in frame zero with some exits. */
01393     for (node = dag->nodes; node; node = node->next) {
01394         if (node->sf == 0 && node->exits) {
01395             E_INFO("Start node %s.%d:%d:%d\n",
01396                    fsg_model_word_str(fsgs->fsg, node->wid),
01397                    node->sf, node->fef, node->lef);
01398             start = glist_add_ptr(start, node);
01399             ++nstart;
01400         }
01401     }
01402 
01403     /* If there was more than one start node candidate, then we need
01404      * to create an artificial start node with epsilon transitions to
01405      * all of them. */
01406     if (nstart == 1) {
01407         node = gnode_ptr(start);
01408     }
01409     else {
01410         gnode_t *st;
01411         int wid;
01412 
01413         wid = fsg_model_word_add(fsgs->fsg, "<s>");
01414         if (fsgs->fsg->silwords)
01415             bitvec_set(fsgs->fsg->silwords, wid);
01416         node = new_node(dag, fsgs->fsg, 0, 0, wid, 0);
01417         for (st = start; st; st = gnode_next(st))
01418             ps_lattice_link(dag, node, gnode_ptr(st), 0, 0);
01419     }
01420     glist_free(start);
01421     return node;
01422 }
01423 
01424 static ps_latnode_t *
01425 find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag)
01426 {
01427     ps_latnode_t *node;
01428     glist_t end = NULL;
01429     int nend = 0;
01430 
01431     /* Look for all nodes ending in last frame with some entries. */
01432     for (node = dag->nodes; node; node = node->next) {
01433         if (node->lef == dag->n_frames - 1 && node->entries) {
01434             E_INFO("End node %s.%d:%d:%d (%d)\n",
01435                    fsg_model_word_str(fsgs->fsg, node->wid),
01436                    node->sf, node->fef, node->lef, node->info.best_exit);
01437             end = glist_add_ptr(end, node);
01438             ++nend;
01439         }
01440     }
01441 
01442     if (nend == 1) {
01443         node = gnode_ptr(end);
01444     }
01445     else if (nend == 0) {
01446         ps_latnode_t *last = NULL;
01447         int ef = 0;
01448 
01449         /* If there were no end node candidates, then just use the
01450          * node with the last exit frame. */
01451         for (node = dag->nodes; node; node = node->next) {
01452             if (node->lef > ef && node->entries) {
01453                 last = node;
01454                 ef = node->lef;
01455             }
01456         }
01457         node = last;
01458         if (node)
01459             E_INFO("End node %s.%d:%d:%d (%d)\n",
01460                    fsg_model_word_str(fsgs->fsg, node->wid),
01461                    node->sf, node->fef, node->lef, node->info.best_exit);
01462     }    
01463     else {
01464         /* If there was more than one end node candidate, then we need
01465          * to create an artificial end node with epsilon transitions
01466          * out of all of them. */
01467         gnode_t *st;
01468         int wid;
01469 
01470         wid = fsg_model_word_add(fsgs->fsg, "</s>");
01471         if (fsgs->fsg->silwords)
01472             bitvec_set(fsgs->fsg->silwords, wid);
01473         node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, 0);
01474         /* Use the "best" (in reality it will be the only) exit link
01475          * score from this final node as the link score. */
01476         for (st = end; st; st = gnode_next(st)) {
01477             ps_latnode_t *src = gnode_ptr(st);
01478             ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame);
01479         }
01480     }
01481     glist_free(end);
01482     return node;
01483 }
01484 
01485 static void
01486 mark_reachable(ps_lattice_t *dag, ps_latnode_t *end)
01487 {
01488     glist_t q;
01489 
01490     /* It doesn't matter which order we do this in. */
01491     end->reachable = TRUE;
01492     q = glist_add_ptr(NULL, end);
01493     while (q) {
01494         ps_latnode_t *node = gnode_ptr(q);
01495         latlink_list_t *x;
01496 
01497         /* Pop the front of the list. */
01498         q = gnode_free(q, NULL);
01499         /* Expand all its predecessors that haven't been seen yet. */
01500         for (x = node->entries; x; x = x->next) {
01501             ps_latnode_t *next = x->link->from;
01502             if (!next->reachable) {
01503                 next->reachable = TRUE;
01504                 q = glist_add_ptr(q, next);
01505             }
01506         }
01507     }
01508 }
01509 
01518 static ps_lattice_t *
01519 fsg_search_lattice(ps_search_t *search)
01520 {
01521     fsg_search_t *fsgs;
01522     fsg_model_t *fsg;
01523     ps_latnode_t *node;
01524     ps_lattice_t *dag;
01525     int32 i, n;
01526 
01527     fsgs = (fsg_search_t *)search;
01528 
01529     /* Check to see if a lattice has previously been created over the
01530      * same number of frames, and reuse it if so. */
01531     if (search->dag && search->dag->n_frames == fsgs->frame)
01532         return search->dag;
01533 
01534     /* Nope, create a new one. */
01535     ps_lattice_free(search->dag);
01536     search->dag = NULL;
01537     dag = ps_lattice_init_search(search, fsgs->frame);
01538     fsg = fsgs->fsg;
01539 
01540     /*
01541      * Each history table entry represents a link in the word graph.
01542      * The set of nodes is determined by the number of unique
01543      * (word,start-frame) pairs in the history table.  So we will
01544      * first find all those nodes.
01545      */
01546     n = fsg_history_n_entries(fsgs->history);
01547     for (i = 0; i < n; ++i) {
01548         fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
01549         ps_latnode_t *node;
01550         int32 ascr;
01551         int sf;
01552 
01553         /* Skip null transitions. */
01554         if (fh->fsglink == NULL || fh->fsglink->wid == -1)
01555             continue;
01556 
01557         /* Find the start node of this link. */
01558         if (fh->pred) {
01559             fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
01560             /* FIXME: We include the transition score in the lattice
01561              * link score.  This is because of the practical
01562              * difficulty of obtaining it separately in bestpath or
01563              * forward-backward search, and because it is essentially
01564              * a unigram probability, so there is no need to treat it
01565              * separately from the acoustic score.  However, it's not
01566              * clear that this will actually yield correct results.*/
01567             ascr = fh->score - pfh->score;
01568             sf = pfh->frame + 1;
01569         }
01570         else {
01571             ascr = fh->score;
01572             sf = 0;
01573         }
01574 
01575         /*
01576          * Note that although scores are tied to links rather than
01577          * nodes, it's possible that there are no links out of the
01578          * destination node, and thus we need to preserve its score in
01579          * case it turns out to be utterance-final.
01580          */
01581         node = new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, ascr);
01582     }
01583 
01584     /*
01585      * Now, we will create links only to nodes that actually exist.
01586      */
01587     n = fsg_history_n_entries(fsgs->history);
01588     for (i = 0; i < n; ++i) {
01589         fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i);
01590         ps_latnode_t *src, *dest;
01591         int32 ascr;
01592         int sf;
01593         int j;
01594 
01595         /* Skip null transitions. */
01596         if (fh->fsglink == NULL || fh->fsglink->wid == -1)
01597             continue;
01598 
01599         /* Find the start node of this link and calculate its link score. */
01600         if (fh->pred) {
01601             fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred);
01602             sf = pfh->frame + 1;
01603             ascr = fh->score - pfh->score;
01604         }
01605         else {
01606             ascr = fh->score;
01607             sf = 0;
01608         }
01609         src = find_node(dag, fsg, sf, fh->fsglink->wid);
01610     
01611         /*
01612          * For each non-epsilon link following this one, look for a
01613          * matching node in the lattice and link to it.
01614          */
01615         sf = fh->frame + 1;
01616         for (j = 0; j < fsg->n_state; ++j) {
01617             gnode_t *gn;
01618 
01619             for (gn = fsg_model_trans(fsg, fh->fsglink->to_state, j); gn; gn = gnode_next(gn)) {
01620                 fsg_link_t *link = gnode_ptr(gn);
01621                 if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
01622                     ps_lattice_link(dag, src, dest, ascr, fh->frame);
01623             }
01624 
01625             /*
01626              * Transitive closure on nulls has already been done, so we
01627              * just need to look one link forward from them.
01628              */
01629             if (fsg_model_null_trans(fsg, fh->fsglink->to_state,j)) {
01630                 gnode_t *gn2;
01631                 int k;
01632 
01633                 /* Add all non-null links out of j. */
01634                 for (k = 0; k < fsg->n_state; ++k) {
01635                     for (gn2 = fsg_model_trans(fsg, j, k); gn2; gn2 = gnode_next(gn2)) {
01636                         fsg_link_t *link = gnode_ptr(gn2);
01637                         if ((dest = find_node(dag, fsg, sf, link->wid)) != NULL)
01638                             ps_lattice_link(dag, src, dest, ascr, fh->frame);
01639                     }
01640                 }
01641             }
01642         }
01643     }
01644 
01645     /* Figure out which nodes are the start and end nodes. */
01646     if ((dag->start = find_start_node(fsgs, dag)) == NULL)
01647         goto error_out;
01648     if ((dag->end = find_end_node(fsgs, dag)) == NULL)
01649         goto error_out;
01650     E_INFO("lattice start node %s.%d end node %s.%d\n",
01651            fsg_model_word_str(fsg, dag->start->wid), dag->start->sf,
01652            fsg_model_word_str(fsg, dag->end->wid), dag->end->sf);
01653     /* FIXME: Need to calculate final_node_ascr here. */
01654 
01655     /*
01656      * Convert word IDs from FSG to dictionary.
01657      */
01658     for (node = dag->nodes; node; node = node->next) {
01659         node->wid = dict_wordid(dag->search->dict,
01660                                 fsg_model_word_str(fsg, node->wid));
01661         node->basewid = dict_basewid(dag->search->dict, node->wid);
01662     }
01663 
01664     /*
01665      * Now we are done, because the links in the graph are uniquely
01666      * defined by the history table.  However we should remove any
01667      * nodes which are not reachable from the end node of the FSG.
01668      * Everything is reachable from the start node by definition.
01669      */
01670     mark_reachable(dag, dag->end);
01671 
01672     ps_lattice_delete_unreachable(dag);
01673     {
01674         int32 silpen, fillpen;
01675 
01676         silpen = (int32)(logmath_log(fsg->lmath,
01677                                      cmd_ln_float32_r(ps_search_config(fsgs), "-silprob"))
01678                          * fsg->lw);
01679         fillpen = (int32)(logmath_log(fsg->lmath,
01680                                       cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob"))
01681                           * fsg->lw);
01682         ps_lattice_bypass_fillers(dag, silpen, fillpen);
01683     }
01684     search->dag = dag;
01685     return dag;
01686 
01687 error_out:
01688     ps_lattice_free(dag);
01689     return NULL;
01690 
01691 }

Generated on Sat Jan 8 2011 for PocketSphinx by  doxygen 1.7.1