92 #include <sphinxbase/ckd_alloc.h>
93 #include <sphinxbase/err.h>
99 #define MODEL_DEF_VERSION "0.3"
102 ciphone_add(
mdef_t * m,
char *ci,
int p)
104 assert(p < m->n_ciphone);
108 (
void *)(
long)p) != (
void *)(
long)p)
109 E_FATAL(
"hash_table_enter(%s) failed; duplicate CIphone?\n",
115 find_ph_lc(
ph_lc_t * lclist,
int lc)
119 for (lcptr = lclist; lcptr && (lcptr->lc != lc); lcptr = lcptr->next);
125 find_ph_rc(
ph_rc_t * rclist,
int rc)
129 for (rcptr = rclist; rcptr && (rcptr->rc != rc); rcptr = rcptr->next);
142 assert(p < m->n_phone);
159 if ((rcptr = find_ph_rc(lcptr->rclist, rc)) != NULL) {
160 __BIGSTACKVARIABLE__
char buf[4096];
162 mdef_phone_str(m, rcptr->pid, buf);
163 E_FATAL(
"Duplicate triphone: %s\n", buf);
169 rcptr->next = lcptr->rclist;
170 lcptr->rclist = rcptr;
179 if (hash_table_lookup_int32(m->
ciphone_ht, ci, &
id) < 0)
189 assert((
id >= 0) && (id < m->n_ciphone));
196 mdef_phone_str(
mdef_t * m,
int pid,
char *buf)
201 assert((pid >= 0) && (pid < m->n_phone));
205 if (pid < m->n_ciphone)
206 sprintf(buf,
"%s", mdef_ciphone_str(m, pid));
208 sprintf(buf,
"%s %s %s %c",
209 mdef_ciphone_str(m, m->
phone[pid].ci),
210 mdef_ciphone_str(m, m->
phone[pid].lc),
211 mdef_ciphone_str(m, m->
phone[pid].
rc),
227 assert((ci >= 0) && (ci < m->n_ciphone));
228 assert((lc >= 0) && (lc < m->n_ciphone));
229 assert((rc >= 0) && (rc < m->n_ciphone));
234 || ((rcptr = find_ph_rc(lcptr->rclist, rc)) == NULL)) {
239 newl = m->
ciphone[(int) lc].filler ? m->
sil : lc;
241 if ((newl == lc) && (newr == rc))
244 return (mdef_phone_id(m, ci, newl, newr, wpos));
254 assert((p >= 0) && (p < m->n_phone));
256 return ((p < m->n_ciphone) ? 1 : 0);
267 return ((s == m->
cd2cisen[s]) ? 1 : 0);
273 parse_tmat_senmap(
mdef_t * m,
char *line, int32 off,
int p)
276 __BIGSTACKVARIABLE__
char word[1024], *lp;
281 if ((sscanf(lp,
"%d%n", &n, &wlen) != 1) || (n < 0))
282 E_FATAL(
"Missing or bad transition matrix id: %s\n", line);
285 E_FATAL(
"tmat-id(%d) > #tmat in header(%d): %s\n", n, m->
n_tmat,
291 if ((sscanf(lp,
"%d%n", &s, &wlen) != 1) || (s < 0))
292 E_FATAL(
"Missing or bad state[%d]->senone mapping: %s\n", n,
298 if ((p < m->n_ciphone) && (m->
n_ci_sen <= s))
299 E_FATAL(
"CI-senone-id(%d) > #CI-senones(%d): %s\n", s,
302 E_FATAL(
"Senone-id(%d) > #senones(%d): %s\n", s, m->
n_sen,
310 if ((sscanf(lp,
"%s%n", word, &wlen) != 1) || (strcmp(word,
"N") != 0))
311 E_FATAL(
"Missing non-emitting state spec: %s\n", line);
315 if (sscanf(lp,
"%s%n", word, &wlen) == 1)
316 E_FATAL(
"Non-empty beyond non-emitting final state: %s\n", line);
321 parse_base_line(
mdef_t * m,
char *line,
int p)
324 __BIGSTACKVARIABLE__
char word[1024], *lp;
330 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
331 E_FATAL(
"Missing base phone name: %s\n", line);
335 ci = mdef_ciphone_id(m, word);
337 E_FATAL(
"Duplicate base phone: %s\n", line);
340 ciphone_add(m, word, p);
344 for (n = 0; n < 3; n++) {
345 if ((sscanf(lp,
"%s%n", word, &wlen) != 1)
346 || (strcmp(word,
"-") != 0))
347 E_FATAL(
"Bad context info for base phone: %s\n", line);
352 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
353 E_FATAL(
"Missing filler atribute field: %s\n", line);
355 if (strcmp(word,
"filler") == 0)
356 m->
ciphone[(int) ci].filler = 1;
357 else if (strcmp(word,
"n/a") == 0)
360 E_FATAL(
"Bad filler attribute field: %s\n", line);
365 parse_tmat_senmap(m, line, lp - line, p);
370 parse_tri_line(
mdef_t * m,
char *line,
int p)
373 __BIGSTACKVARIABLE__
char word[1024], *lp;
380 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
381 E_FATAL(
"Missing base phone name: %s\n", line);
384 ci = mdef_ciphone_id(m, word);
386 E_FATAL(
"Unknown base phone: %s\n", line);
389 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
390 E_FATAL(
"Missing left context: %s\n", line);
392 lc = mdef_ciphone_id(m, word);
394 E_FATAL(
"Unknown left context: %s\n", line);
397 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
398 E_FATAL(
"Missing right context: %s\n", line);
400 rc = mdef_ciphone_id(m, word);
402 E_FATAL(
"Unknown right context: %s\n", line);
405 if ((sscanf(lp,
"%s%n", word, &wlen) != 1) || (word[1] !=
'\0'))
406 E_FATAL(
"Missing or bad word-position spec: %s\n", line);
422 E_FATAL(
"Bad word-position spec: %s\n", line);
426 if (sscanf(lp,
"%s%n", word, &wlen) != 1)
427 E_FATAL(
"Missing filler attribute field: %s\n", line);
429 if (((strcmp(word,
"filler") == 0) && (m->
ciphone[(
int) ci].
filler)) ||
430 ((strcmp(word,
"n/a") == 0) && (!m->
ciphone[(
int) ci].
filler))) {
434 E_FATAL(
"Bad filler attribute field: %s\n", line);
436 triphone_add(m, ci, lc, rc, wpos, p);
439 parse_tmat_senmap(m, line, lp - line, p);
456 h = hash_table_new(m->
n_phone, HASH_CASE_YES);
460 for (p = 0; p < m->
n_phone; p++) {
463 == (j = hash_table_enter_bkey_int32(h, (
char *)m->
sseq[p], k, n_sseq)))
470 sseq = ckd_calloc_2d(n_sseq, m->
n_emit_state,
sizeof(**sseq));
472 g = hash_table_tolist(h, &j);
475 for (gn = g; gn; gn = gnode_next(gn)) {
476 he = (hash_entry_t *) gnode_ptr(gn);
477 j = (long)hash_entry_val(he);
478 memcpy(sseq[j], hash_entry_key(he), k);
483 ckd_free_2d(m->
sseq);
492 noncomment_line(
char *line, int32 size, FILE * fp)
494 while (fgets(line, size, fp) != NULL) {
509 int32 n_ci, n_tri, n_map, n;
510 __BIGSTACKVARIABLE__
char tag[1024], buf[1024];
517 E_FATAL(
"No mdef-file\n");
520 E_INFO(
"Reading model definition: %s\n", mdeffile);
524 if ((fp = fopen(mdeffile,
"r")) == NULL)
525 E_FATAL_SYSTEM(
"Failed to open mdef file '%s' for reading", mdeffile);
527 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
528 E_FATAL(
"Empty file: %s\n", mdeffile);
530 if (strncmp(buf,
"BMDF", 4) == 0 || strncmp(buf,
"FDMB", 4) == 0) {
532 (
"Found byte-order mark %.4s, assuming this is a binary mdef file\n",
538 if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
539 E_FATAL(
"Version error: Expecing %s, but read %s\n",
540 MODEL_DEF_VERSION, buf);
550 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
551 E_FATAL(
"Incomplete header\n");
553 if ((sscanf(buf,
"%d %s", &n, tag) != 2) || (n < 0))
554 E_FATAL(
"Error in header: %s\n", buf);
556 if (strcmp(tag,
"n_base") == 0)
558 else if (strcmp(tag,
"n_tri") == 0)
560 else if (strcmp(tag,
"n_state_map") == 0)
562 else if (strcmp(tag,
"n_tied_ci_state") == 0)
564 else if (strcmp(tag,
"n_tied_state") == 0)
566 else if (strcmp(tag,
"n_tied_tmat") == 0)
569 E_FATAL(
"Unknown header line: %s\n", buf);
570 }
while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
575 E_FATAL(
"%s: Error in header\n", mdeffile);
578 if (n_ci >= MAX_INT16)
579 E_FATAL(
"%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
581 if (n_ci + n_tri >= MAX_INT32)
582 E_FATAL(
"%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
583 n_ci + n_tri, MAX_INT32);
584 if (m->
n_sen >= MAX_INT16)
585 E_FATAL(
"%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
586 m->
n_sen, MAX_INT16);
587 if (m->
n_tmat >= MAX_INT32)
588 E_FATAL(
"%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
594 (
"Header error: n_state_map not a multiple of n_ci*n_tri\n");
598 m->
ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES);
627 for (p = 0; p < m->
n_phone; p++)
641 for (p = 0; p < n_ci; p++) {
642 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
643 E_FATAL(
"Premature EOF reading CIphone %d\n", p);
644 parse_base_line(m, buf, p);
650 if (noncomment_line(buf,
sizeof(buf), fp) < 0)
651 E_FATAL(
"Premature EOF reading phone %d\n", p);
652 parse_tri_line(m, buf, p);
655 if (noncomment_line(buf,
sizeof(buf), fp) >= 0)
656 E_ERROR(
"Non-empty file beyond expected #phones (%d)\n",
662 (
"#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
668 for (s = 0; s < m->
n_sen; s++)
674 for (p = n_ci; p < m->
n_phone; p++) {
692 E_INFO_NOFN(
"Initialization of mdef_t, report:\n");
694 (
"%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
719 mdef_free_recursive_rc(lc->rclist);
722 mdef_free_recursive_lc(lc->next);
724 ckd_free((
void *) lc);
734 mdef_free_recursive_rc(rc->next);
736 ckd_free((
void *) rc);
773 ckd_free_2d((
void *) m->
sseq);
776 ckd_free((
void *) m->
phone);
792 ckd_free((
void *) m);