PocketSphinx  0.6
ms_senone.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers. */
39 #include <string.h>
40 #include <stdio.h>
41 #include <assert.h>
42 
43 /* SphinxBase headers. */
44 #include <sphinxbase/bio.h>
45 
46 /* Local headers. */
47 #include "ms_senone.h"
48 
49 
50 #define MIXW_PARAM_VERSION "1.0"
51 #define SPDEF_PARAM_VERSION "1.2"
52 
53 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ == 199901L)
54 #define LOGMATH_INLINE inline
55 #elif defined(__GNUC__)
56 #define LOGMATH_INLINE static inline
57 #elif defined(_MSC_VER)
58 #define LOGMATH_INLINE __inline
59 #else
60 #define LOGMATH_INLINE static
61 #endif
62 
63 static int32
64 senone_mgau_map_read(senone_t * s, char const *file_name)
65 {
66  FILE *fp;
67  int32 byteswap, chksum_present, n_gauden_present;
68  uint32 chksum;
69  int32 i;
70  char eofchk;
71  char **argname, **argval;
72  void *ptr;
73  float32 v;
74 
75  E_INFO("Reading senone gauden-codebook map file: %s\n", file_name);
76 
77  if ((fp = fopen(file_name, "rb")) == NULL)
78  E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name);
79 
80  /* Read header, including argument-value info and 32-bit byteorder magic */
81  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
82  E_FATAL("Failed to read header from file '%s'\n", file_name);
83 
84  /* Parse argument-value list */
85  chksum_present = 0;
86  n_gauden_present = 0;
87  for (i = 0; argname[i]; i++) {
88  if (strcmp(argname[i], "version") == 0) {
89  if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) {
90  E_WARN("Version mismatch(%s): %s, expecting %s\n",
91  file_name, argval[i], SPDEF_PARAM_VERSION);
92  }
93 
94  /* HACK!! Convert version# to float32 and take appropriate action */
95  if (sscanf(argval[i], "%f", &v) != 1)
96  E_FATAL("%s: Bad version no. string: %s\n", file_name,
97  argval[i]);
98 
99  n_gauden_present = (v > 1.1) ? 1 : 0;
100  }
101  else if (strcmp(argname[i], "chksum0") == 0) {
102  chksum_present = 1; /* Ignore the associated value */
103  }
104  }
105  bio_hdrarg_free(argname, argval);
106  argname = argval = NULL;
107 
108  chksum = 0;
109 
110  /* Read #gauden (if version matches) */
111  if (n_gauden_present) {
112  E_INFO("Reading number of codebooks from %s\n", file_name);
113  if (bio_fread
114  (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1)
115  E_FATAL("fread(%s) (#gauden) failed\n", file_name);
116  }
117 
118  /* Read 1d array data */
119  if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp,
120  byteswap, &chksum) < 0) {
121  E_FATAL("bio_fread_1d(%s) failed\n", file_name);
122  }
123  s->mgau = ptr;
124  E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden);
125 
126  /* Infer n_gauden if not present in this version */
127  if (!n_gauden_present) {
128  s->n_gauden = 1;
129  for (i = 0; i < s->n_sen; i++)
130  if (s->mgau[i] >= s->n_gauden)
131  s->n_gauden = s->mgau[i] + 1;
132  }
133 
134  if (chksum_present)
135  bio_verify_chksum(fp, byteswap, chksum);
136 
137  if (fread(&eofchk, 1, 1, fp) == 1)
138  E_FATAL("More data than expected in %s: %d\n", file_name, eofchk);
139 
140  fclose(fp);
141 
142  E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen,
143  s->n_gauden);
144 
145  return 1;
146 }
147 
148 
149 static int32
150 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath)
151 {
152  char eofchk;
153  FILE *fp;
154  int32 byteswap, chksum_present;
155  uint32 chksum;
156  float32 *pdf;
157  int32 i, f, c, p, n_err;
158  char **argname, **argval;
159 
160  E_INFO("Reading senone mixture weights: %s\n", file_name);
161 
162  if ((fp = fopen(file_name, "rb")) == NULL)
163  E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name);
164 
165  /* Read header, including argument-value info and 32-bit byteorder magic */
166  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
167  E_FATAL("Failed to read header from file '%s'\n", file_name);
168 
169  /* Parse argument-value list */
170  chksum_present = 0;
171  for (i = 0; argname[i]; i++) {
172  if (strcmp(argname[i], "version") == 0) {
173  if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0)
174  E_WARN("Version mismatch(%s): %s, expecting %s\n",
175  file_name, argval[i], MIXW_PARAM_VERSION);
176  }
177  else if (strcmp(argname[i], "chksum0") == 0) {
178  chksum_present = 1; /* Ignore the associated value */
179  }
180  }
181  bio_hdrarg_free(argname, argval);
182  argname = argval = NULL;
183 
184  chksum = 0;
185 
186  /* Read #senones, #features, #codewords, arraysize */
187  if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
188  1)
189  ||
190  (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum)
191  != 1)
192  || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum)
193  != 1)
194  || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
195  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
196  }
197  if (i != s->n_sen * s->n_feat * s->n_cw) {
198  E_FATAL
199  ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
200  file_name, i, s->n_sen, s->n_feat, s->n_cw);
201  }
202 
203  /*
204  * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits.
205  * All PDF values will be truncated (in the LSB positions) by these many bits.
206  */
207  if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0))
208  E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor);
209 
210  /* Use a fixed shift for compatibility with everything else. */
211  E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT);
212 
213  /*
214  * Allocate memory for senone PDF data. Organize normally or transposed depending on
215  * s->n_gauden.
216  */
217  if (s->n_gauden > 1) {
218  E_INFO("Not transposing mixture weights in memory\n");
219  s->pdf =
220  (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw,
221  sizeof(senprob_t));
222  }
223  else {
224  E_INFO("Transposing mixture weights in memory\n");
225  s->pdf =
226  (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen,
227  sizeof(senprob_t));
228  }
229 
230  /* Temporary structure to read in floats */
231  pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32));
232 
233  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
234  n_err = 0;
235  for (i = 0; i < s->n_sen; i++) {
236  for (f = 0; f < s->n_feat; f++) {
237  if (bio_fread
238  ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap,
239  &chksum)
240  != s->n_cw) {
241  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
242  }
243 
244  /* Normalize and floor */
245  if (vector_sum_norm(pdf, s->n_cw) <= 0.0)
246  n_err++;
247  vector_floor(pdf, s->n_cw, s->mixwfloor);
248  vector_sum_norm(pdf, s->n_cw);
249 
250  /* Convert to logs3, truncate to 8 bits, and store in s->pdf */
251  for (c = 0; c < s->n_cw; c++) {
252  p = -(logmath_log(lmath, pdf[c]));
253  p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */
254 
255  if (s->n_gauden > 1)
256  s->pdf[i][f][c] =
257  (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
258  else
259  s->pdf[f][c][i] =
260  (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255;
261  }
262  }
263  }
264  if (n_err > 0)
265  E_WARN("Weight normalization failed for %d senones\n", n_err);
266 
267  ckd_free(pdf);
268 
269  if (chksum_present)
270  bio_verify_chksum(fp, byteswap, chksum);
271 
272  if (fread(&eofchk, 1, 1, fp) == 1)
273  E_FATAL("More data than expected in %s\n", file_name);
274 
275  fclose(fp);
276 
277  E_INFO
278  ("Read mixture weights for %d senones: %d features x %d codewords\n",
279  s->n_sen, s->n_feat, s->n_cw);
280 
281  return 1;
282 }
283 
284 
285 senone_t *
286 senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file,
287  float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef)
288 {
289  senone_t *s;
290  int32 n = 0, i;
291 
292  s = (senone_t *) ckd_calloc(1, sizeof(senone_t));
293  s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
294  s->mixwfloor = mixwfloor;
295 
296  s->n_gauden = g->n_mgau;
297  if (sen2mgau_map_file) {
298  if (!(strcmp(sen2mgau_map_file, ".semi.") == 0
299  || strcmp(sen2mgau_map_file, ".ptm.") == 0
300  || strcmp(sen2mgau_map_file, ".cont.") == 0)) {
301  senone_mgau_map_read(s, sen2mgau_map_file);
302  n = s->n_sen;
303  }
304  }
305  else {
306  if (s->n_gauden == 1)
307  sen2mgau_map_file = ".semi.";
308  else if (s->n_gauden == bin_mdef_n_ciphone(mdef))
309  sen2mgau_map_file = ".ptm.";
310  else
311  sen2mgau_map_file = ".cont.";
312  }
313 
314  senone_mixw_read(s, mixwfile, lmath);
315 
316  if (strcmp(sen2mgau_map_file, ".semi.") == 0) {
317  /* All-to-1 senones-codebook mapping */
318  E_INFO("Mapping all senones to one codebook\n");
319  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
320  }
321  else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) {
322  /* All-to-ciphone-id senones-codebook mapping */
323  E_INFO("Mapping senones to context-independent phone codebooks\n");
324  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
325  for (i = 0; i < s->n_sen; i++)
326  s->mgau[i] = bin_mdef_sen2cimap(mdef, i);
327  }
328  else if (strcmp(sen2mgau_map_file, ".cont.") == 0
329  || strcmp(sen2mgau_map_file, ".s3cont.") == 0) {
330  /* 1-to-1 senone-codebook mapping */
331  E_INFO("Mapping senones to individual codebooks\n");
332  if (s->n_sen <= 1)
333  E_FATAL("#senone=%d; must be >1\n", s->n_sen);
334 
335  s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau));
336  for (i = 0; i < s->n_sen; i++)
337  s->mgau[i] = i;
338  /* Not sure why this is here, it probably does nothing. */
339  s->n_gauden = s->n_sen;
340  }
341  else {
342  if (s->n_sen != n)
343  E_FATAL("#senones inconsistent: %d in %s; %d in %s\n",
344  n, sen2mgau_map_file, s->n_sen, mixwfile);
345  }
346 
347  s->featscr = NULL;
348  return s;
349 }
350 
351 void
352 senone_free(senone_t * s)
353 {
354  if (s == NULL)
355  return;
356  if (s->pdf)
357  ckd_free_3d((void *) s->pdf);
358  if (s->mgau)
359  ckd_free(s->mgau);
360  if (s->featscr)
361  ckd_free(s->featscr);
362  logmath_free(s->lmath);
363  ckd_free(s);
364 }
365 
366 
367 /*
368  * Compute senone score for one senone.
369  * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values.
370  * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden.
371  */
372 int32
373 senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top)
374 {
375  int32 scr; /* total senone score */
376  int32 fden; /* Gaussian density */
377  int32 fscr; /* senone score for one feature */
378  int32 fwscr; /* senone score for one feature, one codeword */
379  int32 f, t;
380  gauden_dist_t *fdist;
381 
382  assert((id >= 0) && (id < s->n_sen));
383  assert((n_top > 0) && (n_top <= s->n_cw));
384 
385  scr = 0;
386 
387  for (f = 0; f < s->n_feat; f++) {
388  int top;
389  fdist = dist[f];
390 
391  /* Top codeword for feature f */
392  top = fden = ((int32)fdist[0].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
393  fscr = (s->n_gauden > 1)
394  ? (fden + -s->pdf[id][f][fdist[0].id]) /* untransposed */
395  : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */
396  E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
397  id, f, -(fscr - fden), -(fden-top), -(fscr-top)));
398  /* Remaining of n_top codewords for feature f */
399  for (t = 1; t < n_top; t++) {
400  fden = ((int32)fdist[t].dist + ((1<<SENSCR_SHIFT) - 1)) >> SENSCR_SHIFT;
401  fwscr = (s->n_gauden > 1) ?
402  (fden + -s->pdf[id][f][fdist[t].id]) :
403  (fden + -s->pdf[f][fdist[t].id][id]);
404  fscr = logmath_add(s->lmath, fscr, fwscr);
405  E_DEBUG(1, ("fden[%d][%d] l+= %d + %d = %d\n",
406  id, f, -(fwscr - fden), -(fden-top), -(fscr-top)));
407  }
408  /* Senone scores are also scaled, negated logs3 values. Hence
409  * we have to negate the stuff we calculated above. */
410  scr -= fscr;
411  }
412  /* Downscale scores. */
413  scr /= s->aw;
414 
415  /* Avoid overflowing int16 */
416  if (scr > 32767)
417  scr = 32767;
418  if (scr < -32768)
419  scr = -32768;
420  return scr;
421 }