30 #include "config_auto.h" 33 #include "allheaders.h" 60 static Pix* RemoveEnclosingCircle(Pix* pixs) {
61 Pix* pixsi = pixInvert(
nullptr, pixs);
62 Pix* pixc = pixCreateTemplate(pixs);
63 pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
64 pixSeedfillBinary(pixc, pixc, pixsi, 4);
65 pixInvert(pixc, pixc);
67 Pix* pixt = pixAnd(
nullptr, pixs, pixc);
69 pixCountConnComp(pixt, 8, &max_count);
71 l_int32 min_count = INT32_MAX;
72 Pix* pixout =
nullptr;
75 pixErodeBrick(pixc, pixc, 3, 3);
76 pixt = pixAnd(
nullptr, pixs, pixc);
78 pixCountConnComp(pixt, 8, &
count);
79 if (i == 1 ||
count > max_count) {
82 }
else if (i > 1 &&
count < min_count) {
85 pixout = pixCopy(
nullptr, pixt);
86 }
else if (
count >= min_count) {
103 int width = pixGetWidth(pix_binary_);
104 int height = pixGetHeight(pix_binary_);
110 input_file !=
nullptr && input_file->
length() > 0) {
111 STRING name = *input_file;
112 const char* lastdot = strrchr(name.
string(),
'.');
113 if (lastdot !=
nullptr)
114 name[lastdot - name.
string()] =
'\0';
117 if (blocks->empty()) {
120 BLOCK_IT block_it(blocks);
123 block_it.add_to_end(block);
134 BLOBNBOX_LIST diacritic_blobs;
135 int auto_page_seg_ret_val = 0;
136 TO_BLOCK_LIST to_blocks;
140 pageseg_mode, blocks, &to_blocks,
143 return auto_page_seg_ret_val;
147 deskew_ =
FCOORD(1.0f, 0.0f);
148 reskew_ =
FCOORD(1.0f, 0.0f);
150 Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
151 if (pixcleaned !=
nullptr) {
152 pixDestroy(&pix_binary_);
153 pix_binary_ = pixcleaned;
158 if (auto_page_seg_ret_val < 0) {
162 if (blocks->empty()) {
171 textord_.
TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
172 pix_thresholds_, pix_grey_, splitting || cjk_mode,
173 &diacritic_blobs, blocks, &to_blocks);
174 return auto_page_seg_ret_val;
202 TO_BLOCK_LIST* to_blocks,
203 BLOBNBOX_LIST* diacritic_blobs,
Tesseract* osd_tess,
205 Pix* photomask_pix =
nullptr;
206 Pix* musicmask_pix =
nullptr;
208 BLOCK_LIST found_blocks;
209 TO_BLOCK_LIST temp_blocks;
212 pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
215 if (finder !=
nullptr) {
216 TO_BLOCK_IT to_block_it(&temp_blocks);
217 TO_BLOCK* to_block = to_block_it.data();
218 if (musicmask_pix !=
nullptr) {
221 pixOr(photomask_pix, photomask_pix, musicmask_pix);
226 result = finder->
FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
227 to_block, photomask_pix, pix_thresholds_,
228 pix_grey_, &pixa_debug_, &found_blocks,
229 diacritic_blobs, to_blocks);
234 pixDestroy(&photomask_pix);
235 pixDestroy(&musicmask_pix);
236 if (result < 0)
return result;
239 BLOCK_IT block_it(blocks);
241 block_it.add_list_after(&found_blocks);
247 static void AddAllScriptsConverted(
const UNICHARSET& sid_set,
273 OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
274 Pix** music_mask_pix) {
277 TabVector_LIST v_lines;
278 TabVector_LIST h_lines;
283 pixa_debug_.
AddPix(pix_binary_,
"PageSegInput");
288 &vertical_x, &vertical_y, music_mask_pix,
291 pixa_debug_.
AddPix(pix_binary_,
"NoLines");
296 pixa_debug_.
AddPix(pix_binary_,
"NoImages");
303 TO_BLOCK_IT to_block_it(to_blocks);
307 TO_BLOCK* to_block = to_block_it.data();
308 TBOX blkbox = to_block->block->pdblk.bounding_box();
310 int estimated_resolution = source_resolution_;
315 estimated_resolution = res;
316 tprintf(
"Estimating resolution as %d\n", estimated_resolution);
320 if (to_block->line_size >= 2) {
321 finder =
new ColumnFinder(static_cast<int>(to_block->line_size),
325 &h_lines, vertical_x, vertical_y);
329 #ifndef DISABLED_LEGACY_ENGINE 335 BLOBNBOX_CLIST osd_blobs;
340 int osd_orientation = 0;
347 to_block, &osd_blobs);
349 if (
PSM_OSD_ENABLED(pageseg_mode) && osd_tess !=
nullptr && osr !=
nullptr) {
351 if (osd_tess !=
this) {
355 for (
int s = 0; s < sub_langs_.size(); ++s) {
356 AddAllScriptsConverted(sub_langs_[s]->
unicharset,
368 for (
int i = 0; i < 4; ++i) {
369 if (i != osd_orientation &&
375 const char* best_script_str =
380 strcmp(
"Japanese", best_script_str) == 0 ||
381 strcmp(
"Korean", best_script_str) == 0 ||
382 strcmp(
"Hangul", best_script_str) == 0;
388 if (!cjk && !vertical_text && osd_orientation == 2) {
390 tprintf(
"OSD: Weak margin (%.2f), horiz textlines, not CJK: " 391 "Don't rotate.\n", osd_margin);
395 "OSD: Weak margin (%.2f) for %d blob text block, " 396 "but using orientation anyway: %d\n",
397 osd_margin, osd_blobs.length(), osd_orientation);
401 osd_blobs.shallow_clear();
404 #endif // ndef DISABLED_LEGACY_ENGINE bool textord_tabfind_vertical_text
const ICOORD & topright() const
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
bool PSM_SPARSE(int pageseg_mode)
Assume a single uniform block of text. (Default.)
bool PSM_OSD_ENABLED(int pageseg_mode)
constexpr int kResolutionEstimationFactor
double min_orientation_margin
const char * get_script_from_script_id(int id) const
const char * string() const
int LabelSpecialText(TO_BLOCK *to_block)
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
int get_script_table_size() const
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
bool enable_noise_removal
int pageseg_devanagari_split_strategy
int get_script_id_from_name(const char *script_name) const
static void FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x, int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines)
bool tessedit_dump_pageseg_images
void set_cjk_script(bool is_cjk)
bool textord_tabfind_force_vertical_text
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
const int kMaxCircleErosions
bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode)
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
double textord_tabfind_vertical_text_ratio
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
bool textord_use_cjk_fp_model
int textord_debug_tabfind
int IntCastRounded(double x)
bool right_to_left() const
int AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr)
DLLSYM void tprintf(const char *format,...)
void SetEquationDetect(EquationDetectBase *detect)
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
Treat the image as a single word in a circle.
const ICOORD & botleft() const
int tessedit_pageseg_mode
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
bool read_unlv_file(STRING name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
static Pix * FindImages(Pix *pix, DebugPixa *pixa_debug)
Orientation and script detection only.
bool textord_tabfind_show_vlines
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
void AddPix(const Pix *pix, const char *caption)
void set_right_to_left(bool value)
constexpr int kMinCredibleResolution
double textord_tabfind_aligned_gap_fraction
constexpr int kMaxCredibleResolution
int os_detect_blobs(const GenericVector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
ColumnFinder * SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix)