631 {
632#if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT)
633
634# if defined(__clang__)
635
636 feenableexcept(FE_DIVBYZERO);
637# else
638 feenableexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_INVALID);
639# endif
640#endif
641 const char *lang = nullptr;
642 const char *image = nullptr;
643 const char *outputbase = nullptr;
644 const char *datapath = nullptr;
645 bool list_langs = false;
646 bool print_parameters = false;
647 bool print_fonts_table = false;
648 l_int32 dpi = 0;
649 int arg_i = 1;
651#ifdef DISABLED_LEGACY_ENGINE
653#else
655#endif
656 std::vector<std::string> vars_vec;
657 std::vector<std::string> vars_values;
658
659 if (std::getenv("LEPT_MSG_SEVERITY")) {
660
661 setMsgSeverity(L_SEVERITY_EXTERNAL);
662 } else {
663
664 setMsgSeverity(L_SEVERITY_ERROR);
665 }
666
667#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
668
669 TIFFSetErrorHandler(Win32ErrorHandler);
670 TIFFSetWarningHandler(Win32WarningHandler);
671#endif
672
673 if (!ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi, &list_langs,
674 &print_parameters, &print_fonts_table, &vars_vec, &vars_values, &arg_i, &pagesegmode, &enginemode)) {
675 return EXIT_FAILURE;
676 }
677
678 bool in_recognition_mode = !list_langs && !print_parameters && !print_fonts_table;
679
680 if (lang == nullptr && in_recognition_mode) {
681
682 lang = "eng";
683 }
684
685 if (image == nullptr && in_recognition_mode) {
686 return EXIT_SUCCESS;
687 }
688
689
690
691
693
695
697
698 const int init_failed = api.
Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i,
699 &vars_vec, &vars_values, false);
700
701 if (!SetVariablesFromCLArgs(api, argc, argv)) {
702 return EXIT_FAILURE;
703 }
704
705
707
708 if (list_langs) {
709 PrintLangsList(api);
710 return EXIT_SUCCESS;
711 }
712
713 if (init_failed) {
714 fprintf(stderr, "Could not initialize tesseract.\n");
715 return EXIT_FAILURE;
716 }
717
718 if (print_parameters) {
719 FILE *fout = stdout;
720 fprintf(stdout, "Tesseract parameters:\n");
723 return EXIT_SUCCESS;
724 }
725
726#ifndef DISABLED_LEGACY_ENGINE
727 if (print_fonts_table) {
728 FILE* fout = stdout;
729 fprintf(stdout, "Tesseract fonts table:\n");
732 return EXIT_SUCCESS;
733 }
734#endif
735
736 FixPageSegMode(api, pagesegmode);
737
738 if (dpi) {
739 auto dpi_string = std::to_string(dpi);
740 api.
SetVariable(
"user_defined_dpi", dpi_string.c_str());
741 }
742
743 int ret_val = EXIT_SUCCESS;
744
746 Pix *pixs = pixRead(image);
747 if (!pixs) {
748 fprintf(stderr, "Leptonica can't process input file: %s\n", image);
749 return 2;
750 }
751
753
757 float deskew_angle;
758
759 const std::unique_ptr<const tesseract::PageIterator> it(api.
AnalyseLayout());
760 if (it) {
761
762
763 it->Orientation(&orientation, &direction, &order, &deskew_angle);
765 "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
766 "Deskew angle: %.4f\n",
767 orientation, direction, order, deskew_angle);
768 } else {
769 ret_val = EXIT_FAILURE;
770 }
771
772 pixDestroy(&pixs);
773 return ret_val;
774 }
775
776
777
778
779 bool b = false;
780 bool in_training_mode = (api.
GetBoolVariable(
"tessedit_ambigs_training", &b) && b) ||
784
785#ifdef DISABLED_LEGACY_ENGINE
787 auto osd_warning = std::string("");
789 const char *disabled_osd_msg =
790 "\nERROR: The page segmentation mode 0 (OSD Only) is currently "
791 "disabled.\n\n";
792 fprintf(stderr, "%s", disabled_osd_msg);
793 return EXIT_FAILURE;
796 osd_warning +=
797 "\nWarning: The page segmentation mode 1 (Auto+OSD) is currently "
798 "disabled. "
799 "Using PSM 3 (Auto) instead.\n\n";
802 osd_warning +=
803 "\nWarning: The page segmentation mode 12 (Sparse text + OSD) is "
804 "currently disabled. "
805 "Using PSM 11 (Sparse text) instead.\n\n";
806 }
807#endif
808
809 std::vector<std::unique_ptr<TessResultRenderer>> renderers;
810
811 if (in_training_mode) {
812 renderers.push_back(nullptr);
813 } else if (outputbase != nullptr) {
814 PreloadRenderers(api, renderers, pagesegmode, outputbase);
815 }
816
817 if (!renderers.empty()) {
818#ifdef DISABLED_LEGACY_ENGINE
819 if (!osd_warning.empty()) {
820 fprintf(stderr, "%s", osd_warning.c_str());
821 }
822#endif
823 bool succeed = api.
ProcessPages(image,
nullptr, 0, renderers[0].get());
824 if (!succeed) {
825 fprintf(stderr, "Error during processing.\n");
826 ret_val = EXIT_FAILURE;
827 }
828 }
829
830 return ret_val;
831}
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_SPARSE_TEXT
Find as much text as possible in no particular order.
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
@ PSM_AUTO
Fully automatic page segmentation, but no OSD.
@ PSM_SPARSE_TEXT_OSD
Sparse text with orientation and script det.
void tprintf(const char *format,...)
void SetPageSegMode(PageSegMode mode)
bool SetVariable(const char *name, const char *value)
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
PageSegMode GetPageSegMode() const
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const std::vector< std::string > *vars_vec, const std::vector< std::string > *vars_values, bool set_only_non_debug_params)
void PrintVariables(FILE *fp) const
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
PageIterator * AnalyseLayout()
void PrintFontsTable(FILE *fp) const
bool GetBoolVariable(const char *name, bool *value) const
void SetOutputName(const char *name)
static TESS_API void Update()
static DawgCache * GlobalDawgCache()