PocketSphinx  0.6
state_align_search.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 #include "state_align_search.h"
43 
44 static int
45 state_align_search_start(ps_search_t *search)
46 {
48 
49  /* Activate the initial state. */
50  hmm_enter(sas->hmms, 0, 0, 0);
51 
52  return 0;
53 }
54 
55 static void
56 renormalize_hmms(state_align_search_t *sas, int frame_idx, int32 norm)
57 {
58  int i;
59  for (i = 0; i < sas->n_phones; ++i)
60  hmm_normalize(sas->hmms + i, norm);
61 }
62 
63 static int32
64 evaluate_hmms(state_align_search_t *sas, int16 const *senscr, int frame_idx)
65 {
66  int32 bs = WORST_SCORE;
67  int i, bi;
68 
69  hmm_context_set_senscore(sas->hmmctx, senscr);
70 
71  bi = 0;
72  for (i = 0; i < sas->n_phones; ++i) {
73  hmm_t *hmm = sas->hmms + i;
74  int32 score;
75 
76  if (hmm_frame(hmm) < frame_idx)
77  continue;
78  score = hmm_vit_eval(hmm);
79  if (score BETTER_THAN bs) {
80  bs = score;
81  bi = i;
82  }
83  }
84  return bs;
85 }
86 
87 static void
88 prune_hmms(state_align_search_t *sas, int frame_idx)
89 {
90  int nf = frame_idx + 1;
91  int i;
92 
93  /* Check all phones to see if they remain active in the next frame. */
94  for (i = 0; i < sas->n_phones; ++i) {
95  hmm_t *hmm = sas->hmms + i;
96  if (hmm_frame(hmm) < frame_idx)
97  continue;
98  hmm_frame(hmm) = nf;
99  }
100 }
101 
102 static void
103 phone_transition(state_align_search_t *sas, int frame_idx)
104 {
105  int nf = frame_idx + 1;
106  int i;
107 
108  for (i = 0; i < sas->n_phones - 1; ++i) {
109  hmm_t *hmm, *nhmm;
110  int32 newphone_score;
111 
112  hmm = sas->hmms + i;
113  if (hmm_frame(hmm) != nf)
114  continue;
115 
116  newphone_score = hmm_out_score(hmm);
117  /* Transition into next phone using the usual Viterbi rule. */
118  nhmm = hmm + 1;
119  if (hmm_frame(nhmm) < frame_idx
120  || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
121  hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
122  }
123  }
124 }
125 
126 #define TOKEN_STEP 20
127 static void
128 extend_tokenstack(state_align_search_t *sas, int frame_idx)
129 {
130  if (frame_idx >= sas->n_fr_alloc) {
131  sas->n_fr_alloc = frame_idx + TOKEN_STEP + 1;
132  sas->tokens = ckd_realloc(sas->tokens,
133  sas->n_emit_state * sas->n_fr_alloc
134  * sizeof(*sas->tokens));
135  }
136  memset(sas->tokens + frame_idx * sas->n_emit_state, 0xff,
137  sas->n_emit_state * sizeof(*sas->tokens));
138 }
139 
140 static void
141 record_transitions(state_align_search_t *sas, int frame_idx)
142 {
143  uint16 *tokens;
144  int i;
145 
146  /* Push another frame of tokens on the stack. */
147  extend_tokenstack(sas, frame_idx);
148  tokens = sas->tokens + frame_idx * sas->n_emit_state;
149 
150  /* Scan all active HMMs */
151  for (i = 0; i < sas->n_phones; ++i) {
152  hmm_t *hmm = sas->hmms + i;
153  int j;
154 
155  if (hmm_frame(hmm) < frame_idx)
156  continue;
157  for (j = 0; j < sas->hmmctx->n_emit_state; ++j) {
158  int state_idx = i * sas->hmmctx->n_emit_state + j;
159  /* Record their backpointers on the token stack. */
160  tokens[state_idx] = hmm_history(hmm, j);
161  /* Update backpointer fields with state index. */
162  hmm_history(hmm, j) = state_idx;
163  }
164  }
165 }
166 
167 static int
168 state_align_search_step(ps_search_t *search, int frame_idx)
169 {
171  acmod_t *acmod = ps_search_acmod(search);
172  int16 const *senscr;
173  int i;
174 
175  /* Calculate senone scores. */
176  for (i = 0; i < sas->n_phones; ++i)
177  acmod_activate_hmm(acmod, sas->hmms + i);
178  senscr = acmod_score(acmod, &frame_idx);
179 
180  /* Renormalize here if needed. */
181  /* FIXME: Make sure to (unit-)test this!!! */
182  if ((sas->best_score - 0x300000) WORSE_THAN WORST_SCORE) {
183  E_INFO("Renormalizing Scores at frame %d, best score %d\n",
184  frame_idx, sas->best_score);
185  renormalize_hmms(sas, frame_idx, sas->best_score);
186  }
187 
188  /* Viterbi step. */
189  sas->best_score = evaluate_hmms(sas, senscr, frame_idx);
190  prune_hmms(sas, frame_idx);
191 
192  /* Transition out of non-emitting states. */
193  phone_transition(sas, frame_idx);
194 
195  /* Generate new tokens from best path results. */
196  record_transitions(sas, frame_idx);
197 
198  /* Update frame counter */
199  sas->frame = frame_idx;
200 
201  return 0;
202 }
203 
204 static int
205 state_align_search_finish(ps_search_t *search)
206 {
208  hmm_t *final_phone = sas->hmms + sas->n_phones - 1;
209  ps_alignment_iter_t *itor;
211  int next_state, next_start, state, frame;
212 
213  /* Best state exiting the last frame. */
214  next_state = state = hmm_out_history(final_phone);
215  if (state == 0xffff) {
216  E_ERROR("Failed to reach final state in alignment\n");
217  return -1;
218  }
219  itor = ps_alignment_states(sas->al);
220  next_start = sas->frame + 1;
221  for (frame = sas->frame - 1; frame >= 0; --frame) {
222  state = sas->tokens[frame * sas->n_emit_state + state];
223  /* State boundary, update alignment entry for next state. */
224  if (state != next_state) {
225  itor = ps_alignment_iter_goto(itor, next_state);
226  assert(itor != NULL);
227  ent = ps_alignment_iter_get(itor);
228  ent->start = frame + 1;
229  ent->duration = next_start - ent->start;
230  E_DEBUG(1,("state %d start %d end %d\n", next_state,
231  ent->start, next_start));
232  next_state = state;
233  next_start = frame + 1;
234  }
235  }
236  /* Update alignment entry for initial state. */
237  itor = ps_alignment_iter_goto(itor, 0);
238  assert(itor != NULL);
239  ent = ps_alignment_iter_get(itor);
240  ent->start = 0;
241  ent->duration = next_start;
242  E_DEBUG(1,("state %d start %d end %d\n", 0,
243  ent->start, next_start));
246 
247  return 0;
248 }
249 
250 static int
251 state_align_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
252 {
253  /* This does nothing. */
254  return 0;
255 }
256 
257 static void
258 state_align_search_free(ps_search_t *search)
259 {
261  ps_search_deinit(search);
262  ckd_free(sas->hmms);
263  ckd_free(sas->tokens);
264  hmm_context_free(sas->hmmctx);
265  ckd_free(sas);
266 }
267 
268 static ps_searchfuncs_t state_align_search_funcs = {
269  /* name: */ "state_align",
270  /* start: */ state_align_search_start,
271  /* step: */ state_align_search_step,
272  /* finish: */ state_align_search_finish,
273  /* reinit: */ state_align_search_reinit,
274  /* free: */ state_align_search_free,
275  /* lattice: */ NULL,
276  /* hyp: */ NULL,
277  /* prob: */ NULL,
278  /* seg_iter: */ NULL,
279 };
280 
281 ps_search_t *
282 state_align_search_init(cmd_ln_t *config,
283  acmod_t *acmod,
284  ps_alignment_t *al)
285 {
287  ps_alignment_iter_t *itor;
288  hmm_t *hmm;
289 
290  sas = ckd_calloc(1, sizeof(*sas));
291  ps_search_init(ps_search_base(sas), &state_align_search_funcs,
292  config, acmod, al->d2p->dict, al->d2p);
293  sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
294  acmod->tmat->tp, NULL, acmod->mdef->sseq);
295  if (sas->hmmctx == NULL) {
296  ckd_free(sas);
297  return NULL;
298  }
299  sas->al = al;
300 
301  /* Generate HMM vector from phone level of alignment. */
302  sas->n_phones = ps_alignment_n_phones(al);
304  sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms));
305  for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor;
306  ++hmm, itor = ps_alignment_iter_next(itor)) {
308  hmm_init(sas->hmmctx, hmm, FALSE,
309  ent->id.pid.ssid, ent->id.pid.tmatid);
310  }
311  return ps_search_base(sas);
312 }