tesseract
5.0.0-alpha-619-ge9db
|
#include <equationdetect.h>
|
void | IdentifySpecialText (BLOBNBOX *blob, const int height_th) |
|
BlobSpecialTextType | EstimateTypeForUnichar (const UNICHARSET &unicharset, const UNICHAR_ID id) const |
|
void | IdentifySpecialText () |
|
void | IdentifyBlobsToSkip (ColPartition *part) |
|
void | MergePartsByLocation () |
|
void | SearchByOverlap (ColPartition *seed, GenericVector< ColPartition * > *parts_overlap) |
|
void | InsertPartAfterAbsorb (ColPartition *part) |
|
void | IdentifySeedParts () |
|
bool | CheckSeedBlobsCount (ColPartition *part) |
|
float | ComputeForegroundDensity (const TBOX &tbox) |
|
bool | CheckForSeed2 (const GenericVector< int > &indented_texts_left, const float foreground_density_th, ColPartition *part) |
|
int | CountAlignment (const GenericVector< int > &sorted_vec, const int val) const |
|
bool | CheckSeedFgDensity (const float density_th, ColPartition *part) |
|
void | SplitCPHorLite (ColPartition *part, GenericVector< TBOX > *splitted_boxes) |
|
void | SplitCPHor (ColPartition *part, GenericVector< ColPartition * > *parts_splitted) |
|
bool | CheckSeedDensity (const float math_density_high, const float math_density_low, const ColPartition *part) const |
|
IndentType | IsIndented (ColPartition *part) |
|
void | IdentifyInlineParts () |
|
void | ComputeCPsSuperBBox () |
|
void | IdentifyInlinePartsHorizontal () |
|
int | EstimateTextPartLineSpacing () |
|
void | IdentifyInlinePartsVertical (const bool top_to_bottom, const int textPartsLineSpacing) |
|
bool | IsInline (const bool search_bottom, const int textPartsLineSpacing, ColPartition *part) |
|
bool | ExpandSeed (ColPartition *seed) |
|
void | ExpandSeedHorizontal (const bool search_left, ColPartition *seed, GenericVector< ColPartition * > *parts_to_merge) |
|
void | ExpandSeedVertical (const bool search_bottom, ColPartition *seed, GenericVector< ColPartition * > *parts_to_merge) |
|
bool | IsNearSmallNeighbor (const TBOX &seed_box, const TBOX &part_box) const |
|
bool | CheckSeedNeighborDensity (const ColPartition *part) const |
|
void | ProcessMathBlockSatelliteParts () |
|
bool | IsMathBlockSatellite (ColPartition *part, GenericVector< ColPartition * > *math_blocks) |
|
ColPartition * | SearchNNVertical (const bool search_bottom, const ColPartition *part) |
|
bool | IsNearMathNeighbor (const int y_gap, const ColPartition *neighbor) const |
|
void | GetOutputTiffName (const char *name, STRING *image_name) const |
|
void | PaintColParts (const STRING &outfile) const |
|
void | PaintSpecialTexts (const STRING &outfile) const |
|
void | PrintSpecialBlobsDensity (const ColPartition *part) const |
|
Definition at line 38 of file equationdetect.h.
◆ IndentType
Enumerator |
---|
NO_INDENT | |
LEFT_INDENT | |
RIGHT_INDENT | |
BOTH_INDENT | |
INDENT_TYPE_COUNT | |
Definition at line 44 of file equationdetect.h.
◆ EquationDetect()
tesseract::EquationDetect::EquationDetect |
( |
const char * |
equ_datapath, |
|
|
const char * |
equ_language |
|
) |
| |
Definition at line 102 of file equationdetect.cpp.
104 const char* default_name =
"equ";
105 if (equ_name ==
nullptr) {
106 equ_name = default_name;
114 tprintf(
"Warning: equation region detection requested,"
115 " but %s failed to load from %s\n", equ_name, equ_datapath);
◆ ~EquationDetect()
tesseract::EquationDetect::~EquationDetect |
( |
| ) |
|
|
override |
◆ CheckForSeed2()
bool tesseract::EquationDetect::CheckForSeed2 |
( |
const GenericVector< int > & |
indented_texts_left, |
|
|
const float |
foreground_density_th, |
|
|
ColPartition * |
part |
|
) |
| |
|
protected |
◆ CheckSeedBlobsCount()
bool tesseract::EquationDetect::CheckSeedBlobsCount |
( |
ColPartition * |
part | ) |
|
|
protected |
Definition at line 983 of file equationdetect.cpp.
987 const int kSeedMathBlobsCount = 2;
988 const int kSeedMathDigitBlobsCount = 5;
990 const int blobs = part->boxes_count(),
991 math_blobs = part->SpecialBlobsCount(
BSTT_MATH),
992 digit_blobs = part->SpecialBlobsCount(
BSTT_DIGIT);
994 math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) {
◆ CheckSeedDensity()
bool tesseract::EquationDetect::CheckSeedDensity |
( |
const float |
math_density_high, |
|
|
const float |
math_density_low, |
|
|
const ColPartition * |
part |
|
) |
| const |
|
protected |
Definition at line 1001 of file equationdetect.cpp.
1006 float math_digit_density = part->SpecialBlobsDensity(
BSTT_MATH)
1008 float italic_density = part->SpecialBlobsDensity(
BSTT_ITALIC);
1009 if (math_digit_density > math_density_high) {
1013 math_digit_density > math_density_low) {
◆ CheckSeedFgDensity()
bool tesseract::EquationDetect::CheckSeedFgDensity |
( |
const float |
density_th, |
|
|
ColPartition * |
part |
|
) |
| |
|
protected |
Definition at line 625 of file equationdetect.cpp.
632 float parts_passed = 0.0;
633 for (
int i = 0; i < sub_boxes.
size(); ++i) {
635 if (density < density_th) {
641 const float kSeedPartRatioTh = 0.3;
642 bool retval = (parts_passed / sub_boxes.
size() >= kSeedPartRatioTh);
◆ CheckSeedNeighborDensity()
bool tesseract::EquationDetect::CheckSeedNeighborDensity |
( |
const ColPartition * |
part | ) |
const |
|
protected |
◆ ComputeCPsSuperBBox()
void tesseract::EquationDetect::ComputeCPsSuperBBox |
( |
| ) |
|
|
protected |
Definition at line 791 of file equationdetect.cpp.
793 ColPartition *part =
nullptr;
794 gsearch.StartFullSearch();
797 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
798 (*cps_super_bbox_) += part->bounding_box();
◆ ComputeForegroundDensity()
float tesseract::EquationDetect::ComputeForegroundDensity |
( |
const TBOX & |
tbox | ) |
|
|
protected |
Definition at line 611 of file equationdetect.cpp.
613 const int pix_height = pixGetHeight(pix_bi);
614 Box* box = boxCreate(tbox.
left(), pix_height - tbox.
top(),
616 Pix *pix_sub = pixClipRectangle(pix_bi, box,
nullptr);
618 pixForegroundFraction(pix_sub, &fract);
619 pixDestroy(&pix_sub);
◆ CountAlignment()
int tesseract::EquationDetect::CountAlignment |
( |
const GenericVector< int > & |
sorted_vec, |
|
|
const int |
val |
|
) |
| const |
|
protected |
Definition at line 759 of file equationdetect.cpp.
761 if (sorted_vec.
empty()) {
764 const int kDistTh = static_cast<int>(roundf(0.03 *
resolution_));
770 while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) {
776 while (index < sorted_vec.
size() && sorted_vec[index++] - val < kDistTh) {
◆ EstimateTextPartLineSpacing()
int tesseract::EquationDetect::EstimateTextPartLineSpacing |
( |
| ) |
|
|
protected |
Definition at line 867 of file equationdetect.cpp.
871 ColPartition *current =
nullptr, *prev =
nullptr;
872 gsearch.StartFullSearch();
874 while ((current = gsearch.NextFullSearch()) !=
nullptr) {
878 if (prev !=
nullptr) {
879 const TBOX ¤t_box = current->bounding_box();
880 const TBOX &prev_box = prev->bounding_box();
884 int gap = current_box.
y_gap(prev_box);
885 if (gap < std::min(current_box.
height(), prev_box.
height())) {
894 if (ygaps.
size() < 8) {
900 int spacing = 0,
count;
902 spacing += ygaps[
count];
904 return spacing /
count;
◆ EstimateTypeForUnichar()
Definition at line 224 of file equationdetect.cpp.
234 if (ids_to_exclude.
empty()) {
235 static const STRING kCharsToEx[] = {
"'",
"`",
"\"",
"\\",
",",
".",
236 "〈",
"〉",
"《",
"》",
"」",
"「",
""};
238 while (kCharsToEx[i] !=
"") {
242 ids_to_exclude.
sort();
249 static const STRING kDigitsChars =
"|";
◆ ExpandSeed()
bool tesseract::EquationDetect::ExpandSeed |
( |
ColPartition * |
seed | ) |
|
|
protected |
Definition at line 1084 of file equationdetect.cpp.
1085 if (seed ==
nullptr ||
1086 seed->IsVerticalType()) {
1098 if (parts_to_merge.
empty()) {
1106 for (
int i = 0; i < parts_to_merge.
size(); ++i) {
1107 ColPartition* part = parts_to_merge[i];
1111 for (
int j = 0; j <
cp_seeds_.size(); ++j) {
1121 seed->Absorb(part,
nullptr);
◆ ExpandSeedHorizontal()
Definition at line 1127 of file equationdetect.cpp.
1131 ASSERT_HOST(seed !=
nullptr && parts_to_merge !=
nullptr);
1132 const float kYOverlapTh = 0.6;
1133 const int kXGapTh = static_cast<int>(roundf(0.2 *
resolution_));
1136 const TBOX& seed_box(seed->bounding_box());
1137 const int x = search_left ? seed_box.
left() : seed_box.right();
1138 search.StartSideSearch(x, seed_box.bottom(), seed_box.top());
1139 search.SetUniqueMode(
true);
1142 ColPartition *part =
nullptr;
1143 while ((part =
search.NextSideSearch(search_left)) !=
nullptr) {
1147 const TBOX& part_box(part->bounding_box());
1148 if (part_box.x_gap(seed_box) > kXGapTh) {
1153 if ((part_box.left() >= seed_box.left() && search_left) ||
1154 (part_box.right() <= seed_box.right() && !search_left)) {
1171 if (part_box.y_overlap_fraction(seed_box) < kYOverlapTh &&
1172 seed_box.y_overlap_fraction(part_box) < kYOverlapTh) {
◆ ExpandSeedVertical()
Definition at line 1183 of file equationdetect.cpp.
1187 ASSERT_HOST(seed !=
nullptr && parts_to_merge !=
nullptr &&
1189 const float kXOverlapTh = 0.4;
1190 const int kYGapTh = static_cast<int>(roundf(0.2 *
resolution_));
1193 const TBOX& seed_box(seed->bounding_box());
1194 const int y = search_bottom ? seed_box.
bottom() : seed_box.top();
1195 search.StartVerticalSearch(
1197 search.SetUniqueMode(
true);
1200 ColPartition *part =
nullptr;
1202 int skipped_min_top = std::numeric_limits<int>::max(), skipped_max_bottom = -1;
1203 while ((part =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1207 const TBOX& part_box(part->bounding_box());
1209 if (part_box.y_gap(seed_box) > kYGapTh) {
1214 if ((part_box.bottom() >= seed_box.bottom() && search_bottom) ||
1215 (part_box.top() <= seed_box.top() && !search_bottom)) {
1219 bool skip_part =
false;
1232 if (part_box.x_overlap_fraction(seed_box) < kXOverlapTh &&
1233 seed_box.x_overlap_fraction(part_box) < kXOverlapTh) {
1239 if (skipped_min_top > part_box.top()) {
1240 skipped_min_top = part_box.
top();
1242 if (skipped_max_bottom < part_box.bottom()) {
1243 skipped_max_bottom = part_box.bottom();
1258 for (
int i = 0; i < parts.
size(); i++) {
1259 const TBOX& part_box(parts[i]->bounding_box());
1260 if ((search_bottom && part_box.top() <= skipped_max_bottom) ||
1261 (!search_bottom && part_box.bottom() >= skipped_min_top)) {
◆ FindEquationParts()
Implements tesseract::EquationDetectBase.
Definition at line 358 of file equationdetect.cpp.
361 tprintf(
"Warning: lang_tesseract_ is nullptr!\n");
364 if (!part_grid || !best_columns) {
365 tprintf(
"part_grid/best_columns is nullptr!!\n");
375 if (equationdetect_save_bi_image) {
391 if (equationdetect_save_seed_image) {
399 for (
int i = 0; i <
cp_seeds_.size(); ++i) {
407 for (
int i = 0; i < seeds_expanded.
size(); ++i) {
416 if (equationdetect_save_merged_image) {
◆ GetOutputTiffName()
void tesseract::EquationDetect::GetOutputTiffName |
( |
const char * |
name, |
|
|
STRING * |
image_name |
|
) |
| const |
|
protected |
◆ IdentifyBlobsToSkip()
void tesseract::EquationDetect::IdentifyBlobsToSkip |
( |
ColPartition * |
part | ) |
|
|
protected |
Definition at line 310 of file equationdetect.cpp.
312 BLOBNBOX_C_IT blob_it(part->boxes());
314 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
318 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
327 BLOBNBOX_C_IT blob_it2 = blob_it;
329 while (!blob_it2.at_last()) {
330 BLOBNBOX* nextblob = blob_it2.forward();
332 if (nextblob_box.
left() >= blob_box.
right()) {
335 const float kWidthR = 0.4, kHeightR = 0.3;
337 yoverlap = blob_box.
y_overlap(nextblob_box);
338 const float widthR = static_cast<float>(
339 std::min(nextblob_box.
width(), blob_box.
width())) /
340 std::max(nextblob_box.
width(), blob_box.
width());
341 const float heightR = static_cast<float>(
345 if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) {
349 blob_box += nextblob_box;
◆ IdentifyInlineParts()
void tesseract::EquationDetect::IdentifyInlineParts |
( |
| ) |
|
|
protected |
◆ IdentifyInlinePartsHorizontal()
void tesseract::EquationDetect::IdentifyInlinePartsHorizontal |
( |
| ) |
|
|
protected |
Definition at line 802 of file equationdetect.cpp.
807 const int kGapTh = static_cast<int>(roundf(
810 search.SetUniqueMode(
true);
813 for (
int i = 0; i <
cp_seeds_.size(); ++i) {
815 const TBOX& part_box(part->bounding_box());
819 if (left_margin + kMarginDiffTh < right_margin &&
820 left_margin < kMarginDiffTh) {
823 part_box.right(), part_box.top(), part_box.bottom());
824 right_to_left =
false;
825 }
else if (left_margin > cps_cx) {
829 part_box.left(), part_box.top(), part_box.bottom());
830 right_to_left =
true;
835 ColPartition* neighbor =
nullptr;
836 bool side_neighbor_found =
false;
837 while ((neighbor =
search.NextSideSearch(right_to_left)) !=
nullptr) {
838 const TBOX& neighbor_box(neighbor->bounding_box());
840 part_box.x_gap(neighbor_box) > kGapTh ||
841 !part_box.major_y_overlap(neighbor_box) ||
842 part_box.major_x_overlap(neighbor_box)) {
846 side_neighbor_found =
true;
849 if (!side_neighbor_found) {
853 const TBOX& neighbor_box(neighbor->bounding_box());
854 if (neighbor_box.width() > part_box.width() &&
◆ IdentifyInlinePartsVertical()
void tesseract::EquationDetect::IdentifyInlinePartsVertical |
( |
const bool |
top_to_bottom, |
|
|
const int |
textPartsLineSpacing |
|
) |
| |
|
protected |
◆ IdentifySeedParts()
void tesseract::EquationDetect::IdentifySeedParts |
( |
| ) |
|
|
protected |
Definition at line 539 of file equationdetect.cpp.
541 ColPartition *part =
nullptr;
542 gsearch.StartFullSearch();
549 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
553 part->ComputeSpecialBlobsDensity();
555 const int kTextBlobsTh = 20;
568 part->boxes_count() > kTextBlobsTh) {
570 const TBOX&box = part->bounding_box();
580 indented_texts_left.
sort();
581 texts_foreground_density.
sort();
582 float foreground_density_th = 0.15;
583 if (!texts_foreground_density.
empty()) {
585 foreground_density_th = 0.8 * texts_foreground_density[
586 texts_foreground_density.
size() / 2];
589 for (
int i = 0; i < seeds1.
size(); ++i) {
590 const TBOX& box = seeds1[i]->bounding_box();
603 for (
int i = 0; i < seeds2.
size(); ++i) {
604 if (
CheckForSeed2(indented_texts_left, foreground_density_th, seeds2[i])) {
◆ IdentifySpecialText() [1/2]
void tesseract::EquationDetect::IdentifySpecialText |
( |
| ) |
|
|
protected |
Definition at line 258 of file equationdetect.cpp.
265 const int classify_integer_matcher =
271 ColPartition *part =
nullptr;
272 gsearch.StartFullSearch();
273 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
278 BLOBNBOX_C_IT bbox_it(part->boxes());
281 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
283 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
284 blob_heights.
push_back(bbox_it.data()->bounding_box().height());
288 const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
289 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
291 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
299 classify_class_pruner);
301 classify_integer_matcher);
303 if (equationdetect_save_spt_image) {
◆ IdentifySpecialText() [2/2]
void tesseract::EquationDetect::IdentifySpecialText |
( |
BLOBNBOX * |
blob, |
|
|
const int |
height_th |
|
) |
| |
|
protected |
Definition at line 151 of file equationdetect.cpp.
154 if (blobnbox->bounding_box().height() < height_th && height_th > 0) {
156 blobnbox->set_special_text_type(
BSTT_NONE);
160 BLOB_CHOICE_LIST ratings_equ, ratings_lang;
170 const float x_orig = (box.
left() + box.
right()) / 2.0f, y_orig = box.
bottom();
171 std::unique_ptr<TBLOB> normed_blob(
new TBLOB(*tblob));
172 normed_blob->Normalize(
nullptr,
nullptr,
nullptr, x_orig, y_orig, scaling, scaling,
182 BLOB_CHOICE *lang_choice =
nullptr, *equ_choice =
nullptr;
183 if (ratings_lang.length() > 0) {
184 BLOB_CHOICE_IT choice_it(&ratings_lang);
185 lang_choice = choice_it.data();
187 if (ratings_equ.length() > 0) {
188 BLOB_CHOICE_IT choice_it(&ratings_equ);
189 equ_choice = choice_it.data();
192 const float lang_score = lang_choice ? lang_choice->
certainty() : -FLT_MAX;
193 const float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX;
195 const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
198 const float diff = fabs(lang_score - equ_score);
202 if (fmax(lang_score, equ_score) < kConfScoreTh) {
205 }
else if (diff > kConfDiffTh && equ_score > lang_score) {
209 }
else if (lang_choice) {
220 blobnbox->set_special_text_type(
type);
◆ InsertPartAfterAbsorb()
void tesseract::EquationDetect::InsertPartAfterAbsorb |
( |
ColPartition * |
part | ) |
|
|
protected |
Definition at line 512 of file equationdetect.cpp.
523 const TBOX& part_box(part->bounding_box());
526 part_box.left(), part_box.bottom(), &grid_x, &grid_y);
530 part->set_type(part_type);
531 part->set_blob_type(blob_type);
532 part->set_flow(flow_type);
533 part->SetBlobTypes();
◆ IsIndented()
Definition at line 1020 of file equationdetect.cpp.
1024 ColPartition *neighbor =
nullptr;
1025 const TBOX& part_box(part->bounding_box());
1026 const int kXGapTh = static_cast<int>(roundf(0.5 *
resolution_));
1027 const int kRadiusTh = static_cast<int>(roundf(3.0 *
resolution_));
1028 const int kYGapTh = static_cast<int>(roundf(0.5 *
resolution_));
1033 search.StartRadSearch((part_box.left() + part_box.right()) / 2,
1034 (part_box.top() + part_box.bottom()) / 2, kRadiusTh);
1035 search.SetUniqueMode(
true);
1036 bool left_indented =
false, right_indented =
false;
1037 while ((neighbor =
search.NextRadSearch()) !=
nullptr &&
1038 (!left_indented || !right_indented)) {
1039 if (neighbor == part) {
1042 const TBOX& neighbor_box(neighbor->bounding_box());
1044 if (part_box.major_y_overlap(neighbor_box) &&
1045 part_box.x_gap(neighbor_box) < kXGapTh) {
1056 if (!part_box.x_overlap(neighbor_box) || part_box.y_overlap(neighbor_box)) {
1060 if (part_box.y_gap(neighbor_box) < kYGapTh) {
1061 const int left_gap = part_box.left() - neighbor_box.left();
1062 const int right_gap = neighbor_box.right() - part_box.right();
1063 if (left_gap > kXGapTh) {
1064 left_indented =
true;
1066 if (right_gap > kXGapTh) {
1067 right_indented =
true;
1072 if (left_indented && right_indented) {
1075 if (left_indented) {
1078 if (right_indented) {
◆ IsInline()
bool tesseract::EquationDetect::IsInline |
( |
const bool |
search_bottom, |
|
|
const int |
textPartsLineSpacing, |
|
|
ColPartition * |
part |
|
) |
| |
|
protected |
Definition at line 936 of file equationdetect.cpp.
943 ColPartition *neighbor =
nullptr;
944 const TBOX& part_box(part->bounding_box());
945 const float kYGapRatioTh = 1.0;
948 search.StartVerticalSearch(part_box.left(), part_box.right(),
951 search.StartVerticalSearch(part_box.left(), part_box.right(),
954 search.SetUniqueMode(
true);
955 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
956 const TBOX& neighbor_box(neighbor->bounding_box());
957 if (part_box.y_gap(neighbor_box) > kYGapRatioTh *
958 std::min(part_box.height(), neighbor_box.height())) {
967 const float kHeightRatioTh = 0.5;
968 const int kYGapTh = textparts_linespacing > 0 ?
969 textparts_linespacing + static_cast<int>(roundf(0.02 *
resolution_)):
971 if (part_box.x_overlap(neighbor_box) &&
972 part_box.y_gap(neighbor_box) <= kYGapTh &&
974 static_cast<float>(std::min(part_box.height(), neighbor_box.height())) /
975 std::max(part_box.height(), neighbor_box.height()) > kHeightRatioTh) {
◆ IsMathBlockSatellite()
Definition at line 1358 of file equationdetect.cpp.
1360 ASSERT_HOST(part !=
nullptr && math_blocks !=
nullptr);
1361 math_blocks->
clear();
1362 const TBOX& part_box(part->bounding_box());
1364 ColPartition *neighbors[2];
1365 int y_gaps[2] = {std::numeric_limits<int>::max(), std::numeric_limits<int>::max()};
1367 int neighbors_left = std::numeric_limits<int>::max(), neighbors_right = 0;
1368 for (
int i = 0; i < 2; ++i) {
1371 const TBOX& neighbor_box = neighbors[i]->bounding_box();
1372 y_gaps[i] = neighbor_box.
y_gap(part_box);
1373 if (neighbor_box.
left() < neighbors_left) {
1374 neighbors_left = neighbor_box.
left();
1376 if (neighbor_box.
right() > neighbors_right) {
1377 neighbors_right = neighbor_box.
right();
1381 if (neighbors[0] == neighbors[1]) {
1383 neighbors[1] =
nullptr;
1384 y_gaps[1] = std::numeric_limits<int>::max();
1388 if (part_box.left() < neighbors_left || part_box.right() > neighbors_right) {
1393 int index = y_gaps[0] < y_gaps[1] ? 0 : 1;
1397 math_blocks->
push_back(neighbors[index]);
1406 math_blocks->
push_back(neighbors[index]);
◆ IsNearMathNeighbor()
bool tesseract::EquationDetect::IsNearMathNeighbor |
( |
const int |
y_gap, |
|
|
const ColPartition * |
neighbor |
|
) |
| const |
|
protected |
◆ IsNearSmallNeighbor()
bool tesseract::EquationDetect::IsNearSmallNeighbor |
( |
const TBOX & |
seed_box, |
|
|
const TBOX & |
part_box |
|
) |
| const |
|
protected |
Definition at line 1270 of file equationdetect.cpp.
1272 const int kXGapTh = static_cast<int>(roundf(0.25 *
resolution_));
1273 const int kYGapTh = static_cast<int>(roundf(0.05 *
resolution_));
1283 part_box.
y_gap(seed_box) > kYGapTh) &&
1285 part_box.
x_gap(seed_box) > kXGapTh)) {
◆ LabelSpecialText()
int tesseract::EquationDetect::LabelSpecialText |
( |
TO_BLOCK * |
to_block | ) |
|
|
overridevirtual |
Implements tesseract::EquationDetectBase.
Definition at line 131 of file equationdetect.cpp.
132 if (to_block ==
nullptr) {
133 tprintf(
"Warning: input to_block is nullptr!\n");
140 for (
int i = 0; i < blob_lists.
size(); ++i) {
141 BLOBNBOX_IT bbox_it(blob_lists[i]);
142 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
144 bbox_it.data()->set_special_text_type(
BSTT_NONE);
◆ MergePartsByLocation()
void tesseract::EquationDetect::MergePartsByLocation |
( |
| ) |
|
|
protected |
Definition at line 424 of file equationdetect.cpp.
426 ColPartition* part =
nullptr;
430 gsearch.StartFullSearch();
431 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
437 if (parts_to_merge.
empty()) {
443 for (
int i = 0; i < parts_to_merge.
size(); ++i) {
444 ASSERT_HOST(parts_to_merge[i] !=
nullptr && parts_to_merge[i] != part);
445 part->Absorb(parts_to_merge[i],
nullptr);
447 gsearch.RepositionIterator();
452 if (parts_updated.
empty()) {
457 for (
int i = 0; i < parts_updated.
size(); ++i) {
◆ PaintColParts()
void tesseract::EquationDetect::PaintColParts |
( |
const STRING & |
outfile | ) |
const |
|
protected |
Definition at line 1481 of file equationdetect.cpp.
1484 gsearch.StartFullSearch();
1485 ColPartition* part =
nullptr;
1486 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1487 const TBOX& tbox = part->bounding_box();
1488 Box *box = boxCreate(tbox.
left(), pixGetHeight(pix) - tbox.
top(),
1491 pixRenderBoxArb(pix, box, 5, 255, 0, 0);
1493 pixRenderBoxArb(pix, box, 5, 0, 255, 0);
1495 pixRenderBoxArb(pix, box, 5, 0, 0, 255);
1500 pixWrite(outfile.
c_str(), pix, IFF_TIFF_LZW);
◆ PaintSpecialTexts()
void tesseract::EquationDetect::PaintSpecialTexts |
( |
const STRING & |
outfile | ) |
const |
|
protected |
Definition at line 1464 of file equationdetect.cpp.
1466 pix = pixConvertTo32(pixBi);
1468 ColPartition* part =
nullptr;
1469 gsearch.StartFullSearch();
1470 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1471 BLOBNBOX_C_IT blob_it(part->boxes());
1472 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1477 pixWrite(outfile.
c_str(), pix, IFF_TIFF_LZW);
◆ PrintSpecialBlobsDensity()
void tesseract::EquationDetect::PrintSpecialBlobsDensity |
( |
const ColPartition * |
part | ) |
const |
|
protected |
Definition at line 1504 of file equationdetect.cpp.
1506 TBOX box(part->bounding_box());
1508 tprintf(
"Printing special blobs density values for ColParition (t=%d,b=%d) ",
1509 h - box.top(), h - box.bottom());
1511 tprintf(
"blobs count = %d, density = ", part->boxes_count());
1513 auto type = static_cast<BlobSpecialTextType>(i);
1514 tprintf(
"%d:%f ", i, part->SpecialBlobsDensity(
type));
◆ ProcessMathBlockSatelliteParts()
void tesseract::EquationDetect::ProcessMathBlockSatelliteParts |
( |
| ) |
|
|
protected |
Definition at line 1309 of file equationdetect.cpp.
1312 ColPartition *part =
nullptr;
1315 gsearch.StartFullSearch();
1316 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1321 if (text_parts.
empty()) {
1326 text_parts.
sort(&SortCPByHeight);
1327 const TBOX& text_box = text_parts[text_parts.
size() / 2]->bounding_box();
1328 int med_height = text_box.
height();
1329 if (text_parts.
size() % 2 == 0 && text_parts.
size() > 1) {
1330 const TBOX& text_box =
1331 text_parts[text_parts.
size() / 2 - 1]->bounding_box();
1332 med_height = static_cast<int>(roundf(
1333 0.5 * (text_box.
height() + med_height)));
1337 for (
int i = 0; i < text_parts.
size(); ++i) {
1338 const TBOX& text_box(text_parts[i]->bounding_box());
1339 if (text_box.
height() > med_height) {
1350 for (
int j = 0; j < math_blocks.
size(); ++j) {
1352 text_parts[i]->Absorb(math_blocks[j],
nullptr);
◆ SearchByOverlap()
Definition at line 463 of file equationdetect.cpp.
466 ASSERT_HOST(seed !=
nullptr && parts_overlap !=
nullptr);
471 const TBOX& seed_box(seed->bounding_box());
472 const int kRadNeighborCells = 30;
473 search.StartRadSearch((seed_box.left() + seed_box.right()) / 2,
474 (seed_box.top() + seed_box.bottom()) / 2,
476 search.SetUniqueMode(
true);
481 const float kLargeOverlapTh = 0.95;
482 const float kEquXOverlap = 0.4, kEquYOverlap = 0.5;
483 while ((part =
search.NextRadSearch()) !=
nullptr) {
487 const TBOX& part_box(part->bounding_box());
491 y_overlap_fraction = part_box.y_overlap_fraction(seed_box);
494 if (x_overlap_fraction >= kLargeOverlapTh &&
495 y_overlap_fraction >= kLargeOverlapTh) {
499 if ((x_overlap_fraction > kEquXOverlap && y_overlap_fraction > 0.0) ||
500 (x_overlap_fraction > 0.0 && y_overlap_fraction > kEquYOverlap)) {
◆ SearchNNVertical()
Definition at line 1412 of file equationdetect.cpp.
1415 ColPartition *nearest_neighbor =
nullptr, *neighbor =
nullptr;
1416 const int kYGapTh = static_cast<int>(roundf(
resolution_ * 0.5));
1419 search.SetUniqueMode(
true);
1420 const TBOX& part_box(part->bounding_box());
1421 int y = search_bottom ? part_box.
bottom() : part_box.top();
1422 search.StartVerticalSearch(part_box.left(), part_box.right(), y);
1423 int min_y_gap = std::numeric_limits<int>::max();
1424 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1428 const TBOX& neighbor_box(neighbor->bounding_box());
1429 int y_gap = neighbor_box.
y_gap(part_box);
1430 if (y_gap > kYGapTh) {
1433 if (!neighbor_box.major_x_overlap(part_box) ||
1434 (search_bottom && neighbor_box.bottom() > part_box.bottom()) ||
1435 (!search_bottom && neighbor_box.top() < part_box.top())) {
1438 if (y_gap < min_y_gap) {
1440 nearest_neighbor = neighbor;
1444 return nearest_neighbor;
◆ SetLangTesseract()
void tesseract::EquationDetect::SetLangTesseract |
( |
Tesseract * |
lang_tesseract | ) |
|
◆ SetResolution()
void tesseract::EquationDetect::SetResolution |
( |
const int |
resolution | ) |
|
◆ SplitCPHor()
Definition at line 647 of file equationdetect.cpp.
650 if (part->median_width() == 0 || part->boxes_count() == 0) {
655 ColPartition* right_part = part->CopyButDontOwnBlobs();
657 parts_splitted->
clear();
659 const double kThreshold = part->median_width() * 3.0;
660 bool found_split =
true;
661 while (found_split) {
663 BLOBNBOX_C_IT box_it(right_part->boxes());
668 int previous_right = INT32_MIN;
671 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
672 const TBOX& box = box_it.data()->bounding_box();
673 if (previous_right != INT32_MIN &&
674 box.
left() - previous_right > kThreshold) {
677 const int mid_x = (box.
left() + previous_right) / 2;
678 ColPartition* left_part = right_part;
679 right_part = left_part->SplitAt(mid_x);
682 left_part->ComputeSpecialBlobsDensity();
688 previous_right = std::max(previous_right, static_cast<int>(box.
right()));
693 right_part->ComputeSpecialBlobsDensity();
◆ SplitCPHorLite()
Definition at line 697 of file equationdetect.cpp.
700 splitted_boxes->
clear();
701 if (part->median_width() == 0) {
705 const double kThreshold = part->median_width() * 3.0;
712 int previous_right = INT32_MIN;
713 BLOBNBOX_C_IT box_it(part->boxes());
714 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
715 const TBOX& box = box_it.data()->bounding_box();
716 if (previous_right != INT32_MIN &&
717 box.
left() - previous_right > kThreshold) {
720 previous_right = INT32_MIN;
722 if (previous_right == INT32_MIN) {
728 previous_right = std::max(previous_right, static_cast<int>(box.
right()));
732 if (previous_right != INT32_MIN) {
◆ best_columns_
◆ cp_seeds_
◆ cps_super_bbox_
TBOX* tesseract::EquationDetect::cps_super_bbox_ |
|
protected |
◆ equ_tesseract_
Tesseract tesseract::EquationDetect::equ_tesseract_ |
|
protected |
◆ lang_tesseract_
Tesseract* tesseract::EquationDetect::lang_tesseract_ |
|
protected |
◆ page_count_
int tesseract::EquationDetect::page_count_ |
|
protected |
◆ part_grid_
◆ resolution_
int tesseract::EquationDetect::resolution_ |
|
protected |
The documentation for this class was generated from the following files:
void delete_data_pointers()
void ExpandSeedHorizontal(const bool search_left, ColPartition *seed, GenericVector< ColPartition * > *parts_to_merge)
int EstimateTextPartLineSpacing()
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition *part)
void GetOutputTiffName(const char *name, STRING *image_name) const
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
const int kSeedBlobsCountTh
bool get_isdigit(UNICHAR_ID unichar_id) const
bool get_isalpha(UNICHAR_ID unichar_id) const
bool IsRightIndented(const EquationDetect::IndentType type)
const float kMathDigitDensityTh1
void IdentifyInlineParts()
const float kUnclearDensityTh
bool CheckForSeed2(const GenericVector< int > &indented_texts_left, const float foreground_density_th, ColPartition *part)
bool IsLeftIndented(const EquationDetect::IndentType type)
void RemoveBBox(BBC *bbox)
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const
void IdentifyInlinePartsVertical(const bool top_to_bottom, const int textPartsLineSpacing)
void ExpandSeedVertical(const bool search_bottom, ColPartition *seed, GenericVector< ColPartition * > *parts_to_merge)
void set_special_text_type(BlobSpecialTextType new_type)
static void RenderSpecialText(Pix *pix, BLOBNBOX *blob)
bool major_y_overlap(const TBOX &box) const
int IntCastRounded(double x)
int classify_class_pruner_multiplier
bool IsTextOrEquationType(PolyBlockType type)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
UNICHAR_ID unichar_id() const
bool get_ispunctuation(UNICHAR_ID unichar_id) const
bool y_overlap(const TBOX &box) const
int y_gap(const TBOX &box) const
double x_overlap_fraction(const TBOX &box) const
UnicityTable< FontInfo > & get_fontinfo_table()
BlobSpecialTextType special_text_type() const
bool bool_binary_search(const T &target) const
void IdentifyInlinePartsHorizontal()
BLOBNBOX_LIST large_blobs
bool major_x_overlap(const TBOX &box) const
const char * c_str() const
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, const UNICHAR_ID id) const
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
void SplitCPHorLite(ColPartition *part, GenericVector< TBOX > *splitted_boxes)
bool CheckSeedFgDensity(const float density_th, ColPartition *part)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
bool joined_to_prev() const
bool ExpandSeed(ColPartition *seed)
void IdentifySpecialText()
int CountAlignment(const GenericVector< int > &sorted_vec, const int val) const
ColPartitionSet ** best_columns_
const float kMathDigitDensityTh2
const float kMathItalicDensityTh
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
IndentType IsIndented(ColPartition *part)
int source_resolution() const
void MergePartsByLocation()
const int kLeftIndentAlignmentCountTh
void SearchByOverlap(ColPartition *seed, GenericVector< ColPartition * > *parts_overlap)
float ComputeForegroundDensity(const TBOX &tbox)
Tesseract * lang_tesseract_
bool CheckSeedBlobsCount(ColPartition *part)
void PaintColParts(const STRING &outfile) const
void PaintSpecialTexts(const STRING &outfile) const
const TBOX & bounding_box() const
TBOX bounding_box() const
void ProcessMathBlockSatelliteParts()
void ComputeCPsSuperBBox()
ColPartitionGrid * part_grid_
int16_t fontinfo_id() const
bool contains(char c) const
bool PTIsTextType(PolyBlockType type)
GenericVector< ColPartition * > cp_seeds_
ColPartition * SearchNNVertical(const bool search_bottom, const ColPartition *part)
void InsertPartAfterAbsorb(ColPartition *part)
DLLSYM void tprintf(const char *format,...)
int binary_search(const T &target) const
int classify_integer_matcher_multiplier
const char * id_to_unichar(UNICHAR_ID id) const
bool IsMathBlockSatellite(ColPartition *part, GenericVector< ColPartition * > *math_blocks)
bool CheckSeedDensity(const float math_density_high, const float math_density_low, const ColPartition *part) const
bool CheckSeedNeighborDensity(const ColPartition *part) const
void IdentifyBlobsToSkip(ColPartition *part)
LIST search(LIST list, void *key, int_compare is_equal)
const int kBlnBaselineOffset
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
int x_gap(const TBOX &box) const