#include <lm.h>
int32 lm_s::access_type |
Updated on every lm_{tg,bg,ug}_score call to reflect the kind of n-gram accessed: 3 for 3-gram, 2 for 2-gram and 1 for 1-gram
Bigram 32 bits, NULL iff disk-based
uint32 lm_s::bg_seg_sz |
int32 lm_s::bgoff |
BG offsets into DMP file (used iff disk-based)
tgcache 32 bits Table of actual bigram probs
int32 lm_s::byteswap |
Whether this file is in the WRONG byte order
a mapping from dictionary word to LM word
int32 lm_s::dict_size |
Only used in class-based LM, because class-based LM is addressed in the dictionary space.
S3_FINISH_WORD id, if it exists
FILE* lm_s::fp |
hash_table_t* lm_s::HT |
hash table for word-string->word-id map
int32* lm_s::inclass_ugscore |
An array of inter-class unigram probability
int32 lm_s::inputenc |
Input encoding method
int32 lm_s::is32bits |
Whether the current LM is 32 bits or not. Derived from version and n_ug
int32 lm_s::isLM_IN_MEMORY |
Whether LM in in memory, it is a property, potentially it means the code could allow you some model to be disk-based, some are not.
LM class for this LM
uint32 lm_s::log_bg_seg_sz |
See big comment above
logmath_t* lm_s::logmath |
float32 lm_s::lw |
Language weight currently in effect for this LM
int32 lm_s::max_sorted_entries |
Temporary Variable: 2x the maximum size of the MAX_SORTED_ENTRIES
int32 lm_s::max_ug |
To which n_ug can grow with dynamic addition of words
membg[w1] = bigrams for lm wid w1 (used iff disk-based)
membg 32bits membg[w1] = bigrams for lm wid w1 (used iff disk-based)
int32 lm_s::n_bg |
#bigrams in entire LM
int32 lm_s::n_bg_bo |
#bg_score ops backed off to ug
int32 lm_s::n_bg_fill |
#bg fill operations
int32 lm_s::n_bg_inmem |
#bg in memory
int32 lm_s::n_bg_score |
#bg_score operations
int32 lm_s::n_bgprob |
int32 lm_s::n_lmclass |
# LM class
int32 lm_s::n_ng |
if unigram, n_ng=1, if bigram n_bg=2 and so one
int32 lm_s::n_tg |
#trigrams in entire LM
int32 lm_s::n_tg_bo |
#tg_score ops backed off to bg
int32 lm_s::n_tg_fill |
Similar stats for trigrams
int32 lm_s::n_tg_inmem |
#tg in memory
int32 lm_s::n_tg_score |
#tg_score operations
int32 lm_s::n_tgbowt |
int32 lm_s::n_tgcache_hit |
# of trigram cache hit ops backed off to bg
int32 lm_s::n_tgprob |
int32 lm_s::n_ug |
#unigrams in LM
char* lm_s::name |
The name of the LM
int32 lm_s::outputenc |
Output encoding method
Temporary Variable: Sorted list
Temporary Variable: Sorted list
Temporary Variable: Sorted list
S3_START_WORD id, if it exists
Trigram 32 bits NULL iff disk-based
int32* lm_s::tg_segbase |
tg_segbase[i>>lm_t.log_bg_seg_sz] = index of 1st trigram for bigram segment (i>>lm_t.log_bg_seg_sz)
Table of actual trigram backoff weights
<w0,w1,w2> hashed to an entry into this array. Only the last trigram mapping to any * given hash entry is kept in that entry. (The cache doesn't have to be super-efficient.)
tginfo[w2] = fast trigram access info for bigrams (*,w2)
tginfo 32bits tginfo[w2] = fast trigram access info for bigrams (*,w2)
int32 lm_s::tgoff |
TG offsets into DMP file (used iff disk-based)
Table of actual trigram probs
int32 lm_s::version |
The version number of LM, in particular, this is the version that recently read in.
int32 lm_s::wip |
logs3(word insertion penalty) in effect for this LM
char** lm_s::wordstr |
The LM word list (in unigram order)