tesseract 5.2.0
Loading...
Searching...
No Matches
tesseract::ImageFind Class Reference

#include <imagefind.h>

Static Public Member Functions

static Image FindImages (Image pix, DebugPixa *pixa_debug)
 
static bool BoundsWithinRect (Image pix, int *x_start, int *y_start, int *x_end, int *y_end)
 
static double ColorDistanceFromLine (const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
 
static bool BlankImageInBetween (const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Image pix)
 
static int CountPixelsInRotatedBox (TBOX box, const TBOX &im_box, const FCOORD &rotation, Image pix)
 
static void TransferImagePartsToImageMask (const FCOORD &rerotation, ColPartitionGrid *part_grid, Image image_mask)
 
static void FindImagePartitions (Image image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
 

Detailed Description

Definition at line 38 of file imagefind.h.

Member Function Documentation

◆ BlankImageInBetween()

bool tesseract::ImageFind::BlankImageInBetween ( const TBOX box1,
const TBOX box2,
const TBOX im_box,
const FCOORD rotation,
Image  pix 
)
static

Definition at line 441 of file imagefind.cpp.

442 {
443 TBOX search_box(box1);
444 search_box += box2;
445 if (box1.x_gap(box2) >= box1.y_gap(box2)) {
446 if (box1.x_gap(box2) <= 0) {
447 return true;
448 }
449 search_box.set_left(std::min(box1.right(), box2.right()));
450 search_box.set_right(std::max(box1.left(), box2.left()));
451 } else {
452 if (box1.y_gap(box2) <= 0) {
453 return true;
454 }
455 search_box.set_top(std::max(box1.bottom(), box2.bottom()));
456 search_box.set_bottom(std::min(box1.top(), box2.top()));
457 }
458 return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
459}
@ TBOX
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, Image pix)
Definition: imagefind.cpp:463

◆ BoundsWithinRect()

bool tesseract::ImageFind::BoundsWithinRect ( Image  pix,
int *  x_start,
int *  y_start,
int *  x_end,
int *  y_end 
)
static

Definition at line 347 of file imagefind.cpp.

347 {
348 Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
349 Box *output_box = nullptr;
350 pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
351 bool result = output_box != nullptr;
352 if (result) {
353 l_int32 x, y, width, height;
354 boxGetGeometry(output_box, &x, &y, &width, &height);
355 *x_start = x;
356 *y_start = y;
357 *x_end = x + width;
358 *y_end = y + height;
359 boxDestroy(&output_box);
360 }
361 boxDestroy(&input_box);
362 return result;
363}

◆ ColorDistanceFromLine()

double tesseract::ImageFind::ColorDistanceFromLine ( const uint8_t *  line1,
const uint8_t *  line2,
const uint8_t *  point 
)
static

Definition at line 368 of file imagefind.cpp.

369 {
370 int line_vector[kRGBRMSColors];
371 int point_vector[kRGBRMSColors];
372 for (int i = 0; i < kRGBRMSColors; ++i) {
373 line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
374 point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
375 }
376 line_vector[L_ALPHA_CHANNEL] = 0;
377 // Now the cross product in 3d.
378 int cross[kRGBRMSColors];
379 cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] -
380 line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
381 cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] -
382 line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
383 cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] -
384 line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
385 cross[L_ALPHA_CHANNEL] = 0;
386 // Now the sums of the squares.
387 double cross_sq = 0.0;
388 double line_sq = 0.0;
389 for (int j = 0; j < kRGBRMSColors; ++j) {
390 cross_sq += static_cast<double>(cross[j]) * cross[j];
391 line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
392 }
393 if (line_sq == 0.0) {
394 return 0.0;
395 }
396 return cross_sq / line_sq; // This is the squared distance.
397}
const int kRGBRMSColors
Definition: colpartition.h:36

◆ CountPixelsInRotatedBox()

int tesseract::ImageFind::CountPixelsInRotatedBox ( TBOX  box,
const TBOX im_box,
const FCOORD rotation,
Image  pix 
)
static

Definition at line 463 of file imagefind.cpp.

464 {
465 // Intersect it with the image box.
466 box &= im_box; // This is in-place box intersection.
467 if (box.null_box()) {
468 return 0;
469 }
470 box.rotate(rotation);
471 TBOX rotated_im_box(im_box);
472 rotated_im_box.rotate(rotation);
473 Image rect_pix = pixCreate(box.width(), box.height(), 1);
474 pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
475 box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
476 l_int32 result;
477 pixCountPixels(rect_pix, &result, nullptr);
478 rect_pix.destroy();
479 return result;
480}

◆ FindImagePartitions()

void tesseract::ImageFind::FindImagePartitions ( Image  image_pix,
const FCOORD rotation,
const FCOORD rerotation,
TO_BLOCK block,
TabFind tab_grid,
DebugPixa pixa_debug,
ColPartitionGrid part_grid,
ColPartition_LIST *  big_parts 
)
static

Definition at line 1145 of file imagefind.cpp.

1148 {
1149 int imageheight = pixGetHeight(image_pix);
1150 Boxa *boxa;
1151 Pixa *pixa;
1152 ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
1153 // Iterate the connected components in the image regions mask.
1154 int nboxes = 0;
1155 if (boxa != nullptr && pixa != nullptr) {
1156 nboxes = boxaGetCount(boxa);
1157 }
1158 for (int i = 0; i < nboxes; ++i) {
1159 l_int32 x, y, width, height;
1160 boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1161 Image pix = pixaGetPix(pixa, i, L_CLONE);
1162 TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
1163 im_box.rotate(rotation); // Now matches all partitions and blobs.
1164 ColPartitionGridSearch rectsearch(part_grid);
1165 rectsearch.SetUniqueMode(true);
1166 ColPartition_LIST part_list;
1167 DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list);
1168 if (textord_tabfind_show_images && pixa_debug != nullptr) {
1169 pixa_debug->AddPix(pix, "ImageComponent");
1170 tprintf("Component has %d parts\n", part_list.length());
1171 }
1172 pix.destroy();
1173 if (!part_list.empty()) {
1174 ColPartition_IT part_it(&part_list);
1175 if (part_list.singleton()) {
1176 // We didn't have to chop it into a polygon to fit around text, so
1177 // try expanding it to merge fragmented image parts, as long as it
1178 // doesn't touch strong text.
1179 ColPartition *part = part_it.extract();
1180 TBOX text_box(im_box);
1181 MaximalImageBoundingBox(part_grid, &text_box);
1182 while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)) {
1183 ;
1184 }
1185 part_it.set_to_list(&part_list);
1186 part_it.add_after_then_move(part);
1187 im_box = part->bounding_box();
1188 }
1189 EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1190 // Iterate the part_list and put the parts into the grid.
1191 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1192 ColPartition *image_part = part_it.extract();
1193 im_box = image_part->bounding_box();
1194 part_grid->InsertBBox(true, true, image_part);
1195 if (!part_it.at_last()) {
1196 ColPartition *neighbour = part_it.data_relative(1);
1197 image_part->AddPartner(false, neighbour);
1198 neighbour->AddPartner(true, image_part);
1199 }
1200 }
1201 }
1202 }
1203 boxaDestroy(&boxa);
1204 pixaDestroy(&pixa);
1205 DeleteSmallImages(part_grid);
1206#ifndef GRAPHICS_DISABLED
1207 if (textord_tabfind_show_images) {
1208 ScrollView *images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1209 part_grid->DisplayBoxes(images_win_);
1210 }
1211#endif
1212}
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919

◆ FindImages()

Image tesseract::ImageFind::FindImages ( Image  pix,
DebugPixa pixa_debug 
)
static

Definition at line 252 of file imagefind.cpp.

252 {
253 // Not worth looking at small images.
254 if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
255 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
256 }
257
258 // Reduce by factor 2.
259 Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
260 if (textord_tabfind_show_images && pixa_debug != nullptr) {
261 pixa_debug->AddPix(pixr, "CascadeReduced");
262 }
263
264 // Get the halftone mask directly from Leptonica.
265 //
266 // Leptonica will print an error message and return nullptr if we call
267 // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
268 // want to bypass that.
269 if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
270 pixr.destroy();
271 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
272 }
273 // Get the halftone mask.
274 l_int32 ht_found = 0;
275 Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
276 Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
277 if (pixadb) {
278 Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
279 if (textord_tabfind_show_images && pixa_debug != nullptr) {
280 pixa_debug->AddPix(pixdb, "HalftoneMask");
281 }
282 pixdb.destroy();
283 pixaDestroy(&pixadb);
284 }
285 pixr.destroy();
286 if (!ht_found && pixht2 != nullptr) {
287 pixht2.destroy();
288 }
289 if (pixht2 == nullptr) {
290 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
291 }
292
293 // Expand back up again.
294 Image pixht = pixExpandReplicate(pixht2, 2);
295 if (textord_tabfind_show_images && pixa_debug != nullptr) {
296 pixa_debug->AddPix(pixht, "HalftoneReplicated");
297 }
298 pixht2.destroy();
299
300 // Fill to capture pixels near the mask edges that were missed
301 Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
302 pixht |= pixt;
303 pixt.destroy();
304
305 // Eliminate lines and bars that may be joined to images.
306 Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
307 pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
308 if (textord_tabfind_show_images && pixa_debug != nullptr) {
309 pixa_debug->AddPix(pixfinemask, "FineMask");
310 }
311 Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
312 Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
313 pixreduced.destroy();
314 pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
315 Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
316 pixreduced2.destroy();
317 if (textord_tabfind_show_images && pixa_debug != nullptr) {
318 pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
319 }
320 // Combine the coarse and fine image masks.
321 pixcoarsemask &= pixfinemask;
322 pixfinemask.destroy();
323 // Dilate a bit to make sure we get everything.
324 pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
325 Image pixmask = pixExpandReplicate(pixcoarsemask, 16);
326 pixcoarsemask.destroy();
327 if (textord_tabfind_show_images && pixa_debug != nullptr) {
328 pixa_debug->AddPix(pixmask, "MaskDilated");
329 }
330 // And the image mask with the line and bar remover.
331 pixht &= pixmask;
332 pixmask.destroy();
333 if (textord_tabfind_show_images && pixa_debug != nullptr) {
334 pixa_debug->AddPix(pixht, "FinalMask");
335 }
336 // Make the result image the same size as the input.
337 Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
338 result |= pixht;
339 pixht.destroy();
340 return result;
341}
const int kMinImageFindSize
Definition: imagefind.cpp:48

◆ TransferImagePartsToImageMask()

void tesseract::ImageFind::TransferImagePartsToImageMask ( const FCOORD rerotation,
ColPartitionGrid part_grid,
Image  image_mask 
)
static

Definition at line 1092 of file imagefind.cpp.

1093 {
1094 // Extract the noise parts from the grid and put them on a temporary list.
1095 ColPartition_LIST parts_list;
1096 ColPartition_IT part_it(&parts_list);
1097 ColPartitionGridSearch gsearch(part_grid);
1098 gsearch.StartFullSearch();
1099 ColPartition *part;
1100 while ((part = gsearch.NextFullSearch()) != nullptr) {
1101 BlobRegionType type = part->blob_type();
1102 if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1103 part_it.add_after_then_move(part);
1104 gsearch.RemoveBBox();
1105 }
1106 }
1107 // Render listed noise partitions to the image mask.
1108 MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1109}
BlobRegionType
Definition: blobbox.h:74
@ BRT_NOISE
Definition: blobbox.h:75
@ BRT_POLYIMAGE
Definition: blobbox.h:79
@ BRT_RECTIMAGE
Definition: blobbox.h:78

The documentation for this class was generated from the following files: