30 std::vector<std::string> filenames;
32 tprintf(
"Failed to load list of eval filenames from %s\n", filenames_file);
53 if (total_pages_ == 0) {
54 result +=
"No test data at iteration " + std::to_string(iteration);
57 if (!LockIfNotRunning()) {
58 result +=
"Previous test incomplete, skipping test at iteration " + std::to_string(iteration);
62 std::string prev_result = test_result_;
64 if (training_errors !=
nullptr) {
65 test_iteration_ = iteration;
66 test_training_errors_ = training_errors;
67 test_model_mgr_ = model_mgr;
68 test_training_stage_ = training_stage;
69 std::thread t(&LSTMTester::ThreadFunc,
this);
86 return "Deserialize failed";
88 int eval_iteration = 0;
89 double char_error = 0.0;
90 double word_error = 0.0;
92 while (error_count < total_pages_) {
101 if (verbosity > 1 || (verbosity > 0 && result !=
PERFECT)) {
103 std::vector<int> ocr_labels;
104 std::vector<int> xcoords;
106 std::string ocr_text = trainer.
DecodeLabels(ocr_labels);
107 tprintf(
"OCR :%s\n", ocr_text.c_str());
108 if (verbosity > 2 || (verbosity > 1 && result !=
PERFECT)) {
109 tprintf(
"Line BCER=%f, BWER=%f\n\n",
116 char_error *= 100.0 / total_pages_;
117 word_error *= 100.0 / total_pages_;
119 if (iteration != 0 || training_stage != 0) {
120 result +=
"At iteration " + std::to_string(iteration);
121 result +=
", stage " + std::to_string(training_stage) +
", ";
123 result +=
"BCER eval=" + std::to_string(char_error);
124 result +=
", BWER eval=" + std::to_string(word_error);
131void LSTMTester::ThreadFunc() {
133 RunEvalSync(test_iteration_, test_training_errors_, test_model_mgr_, test_training_stage_,
140bool LSTMTester::LockIfNotRunning() {
141 std::lock_guard<std::mutex> lock(running_mutex_);
142 if (async_running_) {
145 async_running_ =
true;
150void LSTMTester::UnlockRunning() {
151 std::lock_guard<std::mutex> lock(running_mutex_);
152 async_running_ =
false;
void tprintf(const char *format,...)
bool LoadFileLinesToStrings(const char *filename, std::vector< std::string > *lines)
const std::string & transcription() const
const ImageData * GetPageBySerial(int serial)
TESS_API bool LoadDocuments(const std::vector< std::string > &filenames, CachingStrategy cache_strategy, FileReader reader)
TESS_API int TotalPages()
bool GetComponent(TessdataType type, TFile *fp)
std::string DecodeLabels(const std::vector< int > &labels)
void LabelsFromOutputs(const NetworkIO &outputs, std::vector< int > *labels, std::vector< int > *xcoords)
void SetIteration(int iteration)
std::string RunEvalAsync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
std::string RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
LSTMTester(int64_t max_memory)
bool LoadAllEvalData(const char *filenames_file)
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs, NetworkIO *targets)
bool InitCharSet(const std::string &traineddata_path)
double NewSingleError(ErrorTypes type) const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)