31 #include "config_auto.h" 56 static int SortCPByTopReverse(
const void* p1,
const void* p2) {
57 const ColPartition* cp1 = *
static_cast<ColPartition* const*
>(p1);
58 const ColPartition* cp2 = *
static_cast<ColPartition* const*
>(p2);
60 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
61 return box2.
top() - box1.top();
64 static int SortCPByBottom(
const void* p1,
const void* p2) {
65 const ColPartition* cp1 = *
static_cast<ColPartition* const*
>(p1);
66 const ColPartition* cp2 = *
static_cast<ColPartition* const*
>(p2);
68 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
69 return box1.
bottom() - box2.bottom();
72 static int SortCPByHeight(
const void* p1,
const void* p2) {
73 const ColPartition* cp1 = *
static_cast<ColPartition* const*
>(p1);
74 const ColPartition* cp2 = *
static_cast<ColPartition* const*
>(p2);
76 const TBOX &box1(cp1->bounding_box()), &box2(cp2->bounding_box());
77 return box1.
height() - box2.height();
104 const char* equ_name) {
105 const char* default_name =
"equ";
106 if (equ_name ==
nullptr) {
107 equ_name = default_name;
115 tprintf(
"Warning: equation region detection requested," 116 " but %s failed to load from %s\n", equ_name, equ_datapath);
133 if (to_block ==
nullptr) {
134 tprintf(
"Warning: input to_block is nullptr!\n");
141 for (
int i = 0; i < blob_lists.
size(); ++i) {
142 BLOBNBOX_IT bbox_it(blob_lists[i]);
143 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
145 bbox_it.data()->set_special_text_type(
BSTT_NONE);
153 BLOBNBOX *blobnbox,
const int height_th) {
161 BLOB_CHOICE_LIST ratings_equ, ratings_lang;
171 const float x_orig = (box.
left() + box.
right()) / 2.0f, y_orig = box.
bottom();
172 std::unique_ptr<TBLOB> normed_blob(
new TBLOB(*tblob));
173 normed_blob->
Normalize(
nullptr,
nullptr,
nullptr, x_orig, y_orig, scaling, scaling,
183 BLOB_CHOICE *lang_choice =
nullptr, *equ_choice =
nullptr;
184 if (ratings_lang.length() > 0) {
185 BLOB_CHOICE_IT choice_it(&ratings_lang);
186 lang_choice = choice_it.data();
188 if (ratings_equ.length() > 0) {
189 BLOB_CHOICE_IT choice_it(&ratings_equ);
190 equ_choice = choice_it.data();
193 const float lang_score = lang_choice ? lang_choice->
certainty() : -FLT_MAX;
194 const float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX;
196 const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
199 const float diff = fabs(lang_score - equ_score);
203 if (fmax(lang_score, equ_score) < kConfScoreTh) {
206 }
else if (diff > kConfDiffTh && equ_score > lang_score) {
210 }
else if (lang_choice) {
235 if (ids_to_exclude.
empty()) {
236 static const STRING kCharsToEx[] = {
"'",
"`",
"\"",
"\\",
",",
".",
237 "〈",
"〉",
"《",
"》",
"」",
"「",
""};
239 while (kCharsToEx[i] !=
"") {
243 ids_to_exclude.
sort();
250 static const STRING kDigitsChars =
"|";
266 const int classify_integer_matcher =
279 BLOBNBOX_C_IT bbox_it(part->
boxes());
282 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
284 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
285 blob_heights.
push_back(bbox_it.data()->bounding_box().height());
289 const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
290 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
292 if (bbox_it.data()->special_text_type() !=
BSTT_SKIP) {
300 classify_class_pruner);
302 classify_integer_matcher);
313 BLOBNBOX_C_IT blob_it(part->
boxes());
315 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
319 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
328 BLOBNBOX_C_IT blob_it2 = blob_it;
330 while (!blob_it2.at_last()) {
331 BLOBNBOX* nextblob = blob_it2.forward();
333 if (nextblob_box.
left() >= blob_box.
right()) {
336 const float kWidthR = 0.4, kHeightR = 0.3;
338 yoverlap = blob_box.
y_overlap(nextblob_box);
339 const float widthR =
static_cast<float>(
340 std::min(nextblob_box.
width(), blob_box.
width())) /
341 std::max(nextblob_box.
width(), blob_box.
width());
342 const float heightR =
static_cast<float>(
346 if (xoverlap && yoverlap && widthR > kWidthR && heightR > kHeightR) {
350 blob_box += nextblob_box;
362 tprintf(
"Warning: lang_tesseract_ is nullptr!\n");
365 if (!part_grid || !best_columns) {
366 tprintf(
"part_grid/best_columns is nullptr!!\n");
400 for (
int i = 0; i <
cp_seeds_.size(); ++i) {
408 for (
int i = 0; i < seeds_expanded.
size(); ++i) {
438 if (parts_to_merge.
empty()) {
444 for (
int i = 0; i < parts_to_merge.
size(); ++i) {
445 ASSERT_HOST(parts_to_merge[i] !=
nullptr && parts_to_merge[i] != part);
446 part->
Absorb(parts_to_merge[i],
nullptr);
453 if (parts_updated.
empty()) {
458 for (
int i = 0; i < parts_updated.
size(); ++i) {
467 ASSERT_HOST(seed !=
nullptr && parts_overlap !=
nullptr);
473 const int kRadNeighborCells = 30;
474 search.StartRadSearch((seed_box.left() + seed_box.right()) / 2,
475 (seed_box.top() + seed_box.bottom()) / 2,
477 search.SetUniqueMode(
true);
482 const float kLargeOverlapTh = 0.95;
483 const float kEquXOverlap = 0.4, kEquYOverlap = 0.5;
484 while ((part =
search.NextRadSearch()) !=
nullptr) {
492 y_overlap_fraction = part_box.y_overlap_fraction(seed_box);
495 if (x_overlap_fraction >= kLargeOverlapTh &&
496 y_overlap_fraction >= kLargeOverlapTh) {
500 if ((x_overlap_fraction > kEquXOverlap && y_overlap_fraction > 0.0) ||
501 (x_overlap_fraction > 0.0 && y_overlap_fraction > kEquYOverlap)) {
527 part_box.left(), part_box.bottom(), &grid_x, &grid_y);
556 const int kTextBlobsTh = 20;
581 indented_texts_left.
sort();
582 texts_foreground_density.
sort();
583 float foreground_density_th = 0.15;
584 if (!texts_foreground_density.
empty()) {
586 foreground_density_th = 0.8 * texts_foreground_density[
587 texts_foreground_density.
size() / 2];
590 for (
int i = 0; i < seeds1.
size(); ++i) {
591 const TBOX& box = seeds1[i]->bounding_box();
604 for (
int i = 0; i < seeds2.
size(); ++i) {
605 if (
CheckForSeed2(indented_texts_left, foreground_density_th, seeds2[i])) {
614 const int pix_height = pixGetHeight(pix_bi);
615 Box* box = boxCreate(tbox.
left(), pix_height - tbox.
top(),
617 Pix *pix_sub = pixClipRectangle(pix_bi, box,
nullptr);
619 pixForegroundFraction(pix_sub, &fract);
620 pixDestroy(&pix_sub);
633 float parts_passed = 0.0;
634 for (
int i = 0; i < sub_boxes.
size(); ++i) {
636 if (density < density_th) {
642 const float kSeedPartRatioTh = 0.3;
643 bool retval = (parts_passed / sub_boxes.
size() >= kSeedPartRatioTh);
658 parts_splitted->
clear();
661 bool found_split =
true;
662 while (found_split) {
664 BLOBNBOX_C_IT box_it(right_part->
boxes());
669 int previous_right = INT32_MIN;
672 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
673 const TBOX& box = box_it.data()->bounding_box();
674 if (previous_right != INT32_MIN &&
675 box.
left() - previous_right > kThreshold) {
678 const int mid_x = (box.
left() + previous_right) / 2;
680 right_part = left_part->
SplitAt(mid_x);
689 previous_right = std::max(previous_right, static_cast<int>(box.
right()));
701 splitted_boxes->
clear();
713 int previous_right = INT32_MIN;
714 BLOBNBOX_C_IT box_it(part->
boxes());
715 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
716 const TBOX& box = box_it.data()->bounding_box();
717 if (previous_right != INT32_MIN &&
718 box.
left() - previous_right > kThreshold) {
721 previous_right = INT32_MIN;
723 if (previous_right == INT32_MIN) {
729 previous_right = std::max(previous_right, static_cast<int>(box.
right()));
733 if (previous_right != INT32_MIN) {
740 const float foreground_density_th,
746 if (!indented_texts_left.
empty() &&
762 if (sorted_vec.
empty()) {
765 const int kDistTh =
static_cast<int>(roundf(0.03 *
resolution_));
771 while (index >= 0 && abs(val - sorted_vec[index--]) < kDistTh) {
777 while (index < sorted_vec.
size() && sorted_vec[index++] - val < kDistTh) {
808 const int kGapTh =
static_cast<int>(roundf(
811 search.SetUniqueMode(
true);
814 for (
int i = 0; i <
cp_seeds_.size(); ++i) {
820 if (left_margin + kMarginDiffTh < right_margin &&
821 left_margin < kMarginDiffTh) {
824 part_box.right(), part_box.top(), part_box.bottom());
825 right_to_left =
false;
826 }
else if (left_margin > cps_cx) {
830 part_box.left(), part_box.top(), part_box.bottom());
831 right_to_left =
true;
837 bool side_neighbor_found =
false;
838 while ((neighbor =
search.NextSideSearch(right_to_left)) !=
nullptr) {
841 part_box.x_gap(neighbor_box) > kGapTh ||
842 !part_box.major_y_overlap(neighbor_box) ||
843 part_box.major_x_overlap(neighbor_box)) {
847 side_neighbor_found =
true;
850 if (!side_neighbor_found) {
855 if (neighbor_box.width() > part_box.width() &&
879 if (prev !=
nullptr) {
881 const TBOX &prev_box = prev->bounding_box();
885 int gap = current_box.
y_gap(prev_box);
886 if (gap < std::min(current_box.
height(), prev_box.
height())) {
895 if (ygaps.
size() < 8) {
901 int spacing = 0,
count;
903 spacing += ygaps[
count];
905 return spacing /
count;
909 const bool top_to_bottom,
const int textparts_linespacing) {
922 for (
int i = 0; i <
cp_seeds_.size(); ++i) {
928 if (
IsInline(!top_to_bottom, textparts_linespacing, part)) {
938 const int textparts_linespacing,
946 const float kYGapRatioTh = 1.0;
949 search.StartVerticalSearch(part_box.left(), part_box.right(),
952 search.StartVerticalSearch(part_box.left(), part_box.right(),
955 search.SetUniqueMode(
true);
956 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
958 if (part_box.y_gap(neighbor_box) > kYGapRatioTh *
959 std::min(part_box.height(), neighbor_box.height())) {
968 const float kHeightRatioTh = 0.5;
969 const int kYGapTh = textparts_linespacing > 0 ?
970 textparts_linespacing +
static_cast<int>(roundf(0.02 *
resolution_)):
972 if (part_box.x_overlap(neighbor_box) &&
973 part_box.y_gap(neighbor_box) <= kYGapTh &&
975 static_cast<float>(std::min(part_box.height(), neighbor_box.height())) /
976 std::max(part_box.height(), neighbor_box.height()) > kHeightRatioTh) {
988 const int kSeedMathBlobsCount = 2;
989 const int kSeedMathDigitBlobsCount = 5;
995 math_blobs + digit_blobs <= kSeedMathDigitBlobsCount) {
1003 const float math_density_high,
1004 const float math_density_low,
1010 if (math_digit_density > math_density_high) {
1014 math_digit_density > math_density_low) {
1027 const int kXGapTh =
static_cast<int>(roundf(0.5 *
resolution_));
1028 const int kRadiusTh =
static_cast<int>(roundf(3.0 *
resolution_));
1029 const int kYGapTh =
static_cast<int>(roundf(0.5 *
resolution_));
1034 search.StartRadSearch((part_box.left() + part_box.right()) / 2,
1035 (part_box.top() + part_box.bottom()) / 2, kRadiusTh);
1036 search.SetUniqueMode(
true);
1037 bool left_indented =
false, right_indented =
false;
1038 while ((neighbor =
search.NextRadSearch()) !=
nullptr &&
1039 (!left_indented || !right_indented)) {
1040 if (neighbor == part) {
1045 if (part_box.major_y_overlap(neighbor_box) &&
1046 part_box.x_gap(neighbor_box) < kXGapTh) {
1057 if (!part_box.x_overlap(neighbor_box) || part_box.y_overlap(neighbor_box)) {
1061 if (part_box.y_gap(neighbor_box) < kYGapTh) {
1062 const int left_gap = part_box.left() - neighbor_box.left();
1063 const int right_gap = neighbor_box.right() - part_box.right();
1064 if (left_gap > kXGapTh) {
1065 left_indented =
true;
1067 if (right_gap > kXGapTh) {
1068 right_indented =
true;
1073 if (left_indented && right_indented) {
1076 if (left_indented) {
1079 if (right_indented) {
1086 if (seed ==
nullptr ||
1099 if (parts_to_merge.
empty()) {
1107 for (
int i = 0; i < parts_to_merge.
size(); ++i) {
1112 for (
int j = 0; j <
cp_seeds_.size(); ++j) {
1122 seed->
Absorb(part,
nullptr);
1129 const bool search_left,
1132 ASSERT_HOST(seed !=
nullptr && parts_to_merge !=
nullptr);
1133 const float kYOverlapTh = 0.6;
1134 const int kXGapTh =
static_cast<int>(roundf(0.2 *
resolution_));
1138 const int x = search_left ? seed_box.
left() : seed_box.right();
1139 search.StartSideSearch(x, seed_box.bottom(), seed_box.top());
1140 search.SetUniqueMode(
true);
1144 while ((part =
search.NextSideSearch(search_left)) !=
nullptr) {
1149 if (part_box.x_gap(seed_box) > kXGapTh) {
1154 if ((part_box.left() >= seed_box.left() && search_left) ||
1155 (part_box.right() <= seed_box.right() && !search_left)) {
1172 if (part_box.y_overlap_fraction(seed_box) < kYOverlapTh &&
1173 seed_box.y_overlap_fraction(part_box) < kYOverlapTh) {
1185 const bool search_bottom,
1188 ASSERT_HOST(seed !=
nullptr && parts_to_merge !=
nullptr &&
1190 const float kXOverlapTh = 0.4;
1191 const int kYGapTh =
static_cast<int>(roundf(0.2 *
resolution_));
1195 const int y = search_bottom ? seed_box.
bottom() : seed_box.top();
1196 search.StartVerticalSearch(
1198 search.SetUniqueMode(
true);
1203 int skipped_min_top = std::numeric_limits<int>::max(), skipped_max_bottom = -1;
1204 while ((part =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1210 if (part_box.y_gap(seed_box) > kYGapTh) {
1215 if ((part_box.bottom() >= seed_box.bottom() && search_bottom) ||
1216 (part_box.top() <= seed_box.top() && !search_bottom)) {
1220 bool skip_part =
false;
1233 if (part_box.x_overlap_fraction(seed_box) < kXOverlapTh &&
1234 seed_box.x_overlap_fraction(part_box) < kXOverlapTh) {
1240 if (skipped_min_top > part_box.top()) {
1241 skipped_min_top = part_box.
top();
1243 if (skipped_max_bottom < part_box.bottom()) {
1244 skipped_max_bottom = part_box.bottom();
1259 for (
int i = 0; i < parts.
size(); i++) {
1260 const TBOX& part_box(parts[i]->bounding_box());
1261 if ((search_bottom && part_box.
top() <= skipped_max_bottom) ||
1262 (!search_bottom && part_box.
bottom() >= skipped_min_top)) {
1272 const TBOX& part_box)
const {
1273 const int kXGapTh =
static_cast<int>(roundf(0.25 *
resolution_));
1274 const int kYGapTh =
static_cast<int>(roundf(0.05 *
resolution_));
1284 part_box.
y_gap(seed_box) > kYGapTh) &&
1286 part_box.
x_gap(seed_box) > kXGapTh)) {
1322 if (text_parts.
empty()) {
1327 text_parts.
sort(&SortCPByHeight);
1328 const TBOX& text_box = text_parts[text_parts.
size() / 2]->bounding_box();
1329 int med_height = text_box.
height();
1330 if (text_parts.
size() % 2 == 0 && text_parts.
size() > 1) {
1331 const TBOX& text_box =
1332 text_parts[text_parts.
size() / 2 - 1]->bounding_box();
1333 med_height =
static_cast<int>(roundf(
1334 0.5 * (text_box.
height() + med_height)));
1338 for (
int i = 0; i < text_parts.
size(); ++i) {
1339 const TBOX& text_box(text_parts[i]->bounding_box());
1340 if (text_box.
height() > med_height) {
1351 for (
int j = 0; j < math_blocks.
size(); ++j) {
1353 text_parts[i]->Absorb(math_blocks[j],
nullptr);
1361 ASSERT_HOST(part !=
nullptr && math_blocks !=
nullptr);
1362 math_blocks->
clear();
1366 int y_gaps[2] = {std::numeric_limits<int>::max(), std::numeric_limits<int>::max()};
1368 int neighbors_left = std::numeric_limits<int>::max(), neighbors_right = 0;
1369 for (
int i = 0; i < 2; ++i) {
1372 const TBOX& neighbor_box = neighbors[i]->bounding_box();
1373 y_gaps[i] = neighbor_box.
y_gap(part_box);
1374 if (neighbor_box.
left() < neighbors_left) {
1375 neighbors_left = neighbor_box.
left();
1377 if (neighbor_box.
right() > neighbors_right) {
1378 neighbors_right = neighbor_box.
right();
1382 if (neighbors[0] == neighbors[1]) {
1384 neighbors[1] =
nullptr;
1385 y_gaps[1] = std::numeric_limits<int>::max();
1389 if (part_box.left() < neighbors_left || part_box.right() > neighbors_right) {
1394 int index = y_gaps[0] < y_gaps[1] ? 0 : 1;
1398 math_blocks->
push_back(neighbors[index]);
1407 math_blocks->
push_back(neighbors[index]);
1416 ColPartition *nearest_neighbor =
nullptr, *neighbor =
nullptr;
1417 const int kYGapTh =
static_cast<int>(roundf(
resolution_ * 0.5));
1420 search.SetUniqueMode(
true);
1422 int y = search_bottom ? part_box.
bottom() : part_box.top();
1423 search.StartVerticalSearch(part_box.left(), part_box.right(), y);
1424 int min_y_gap = std::numeric_limits<int>::max();
1425 while ((neighbor =
search.NextVerticalSearch(search_bottom)) !=
nullptr) {
1429 const TBOX& neighbor_box(neighbor->bounding_box());
1430 int y_gap = neighbor_box.
y_gap(part_box);
1431 if (y_gap > kYGapTh) {
1434 if (!neighbor_box.major_x_overlap(part_box) ||
1435 (search_bottom && neighbor_box.bottom() > part_box.bottom()) ||
1436 (!search_bottom && neighbor_box.top() < part_box.top())) {
1439 if (y_gap < min_y_gap) {
1441 nearest_neighbor = neighbor;
1445 return nearest_neighbor;
1453 const int kYGapTh =
static_cast<int>(roundf(
resolution_ * 0.1));
1458 STRING* image_name)
const {
1461 snprintf(page,
sizeof(page),
"%04d",
page_count_);
1467 pix = pixConvertTo32(pixBi);
1472 BLOBNBOX_C_IT blob_it(part->
boxes());
1473 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1478 pixWrite(outfile.
string(), pix, IFF_TIFF_LZW);
1485 gsearch.StartFullSearch();
1487 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1489 Box *box = boxCreate(tbox.
left(), pixGetHeight(pix) - tbox.
top(),
1492 pixRenderBoxArb(pix, box, 5, 255, 0, 0);
1494 pixRenderBoxArb(pix, box, 5, 0, 255, 0);
1496 pixRenderBoxArb(pix, box, 5, 0, 0, 255);
1501 pixWrite(outfile.
string(), pix, IFF_TIFF_LZW);
1509 tprintf(
"Printing special blobs density values for ColParition (t=%d,b=%d) ",
1510 h - box.top(), h - box.bottom());
void RepositionIterator()
bool equationdetect_save_bi_image
bool major_y_overlap(const TBOX &box) const
int CountAlignment(const GenericVector< int > &sorted_vec, const int val) const
void SplitCPHor(ColPartition *part, GenericVector< ColPartition *> *parts_splitted)
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
void IdentifyInlineParts()
bool get_ispunctuation(UNICHAR_ID unichar_id) const
void GetOutputTiffName(const char *name, STRING *image_name) const
bool IsRightIndented(const EquationDetect::IndentType type)
#define BOOL_VAR(name, val, comment)
void IdentifyBlobsToSkip(ColPartition *part)
float ComputeForegroundDensity(const TBOX &tbox)
UnicityTable< FontInfo > & get_fontinfo_table()
int y_gap(const TBOX &box) const
const char * string() const
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
int LabelSpecialText(TO_BLOCK *to_block)
BlobRegionType blob_type() const
bool equationdetect_save_seed_image
void SetLangTesseract(Tesseract *lang_tesseract)
bool IsInline(const bool search_bottom, const int textPartsLineSpacing, ColPartition *part)
Tesseract * lang_tesseract_
bool equationdetect_save_spt_image
bool CheckForSeed2(const GenericVector< int > &indented_texts_left, const float foreground_density_th, ColPartition *part)
void ProcessMathBlockSatelliteParts()
static void RenderSpecialText(Pix *pix, BLOBNBOX *blob)
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
int x_gap(const TBOX &box) const
float SpecialBlobsDensity(const BlobSpecialTextType type) const
const int kBlnBaselineOffset
const int kSeedBlobsCountTh
bool get_isalpha(UNICHAR_ID unichar_id) const
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const
void SearchByOverlap(ColPartition *seed, GenericVector< ColPartition *> *parts_overlap)
void SetResolution(const int resolution)
void Absorb(ColPartition *other, WidthCallback *cb)
ColPartition * SearchNNVertical(const bool search_bottom, const ColPartition *part)
bool IsTextOrEquationType(PolyBlockType type)
void ComputeCPsSuperBBox()
int16_t fontinfo_id() const
double x_overlap_fraction(const TBOX &box) const
IndentType IsIndented(ColPartition *part)
int classify_class_pruner_multiplier
void set_blob_type(BlobRegionType t)
void set_flow(BlobTextFlowType f)
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, const UNICHAR_ID id) const
const float kMathDigitDensityTh2
bool get_isdigit(UNICHAR_ID unichar_id) const
void IdentifyInlinePartsVertical(const bool top_to_bottom, const int textPartsLineSpacing)
bool bool_binary_search(const T &target) const
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
ColPartition * SplitAt(int split_x)
void PaintColParts(const STRING &outfile) const
int source_resolution() const
void RemoveBBox(BBC *bbox)
bool CheckSeedFgDensity(const float density_th, ColPartition *part)
bool major_x_overlap(const TBOX &box) const
bool joined_to_prev() const
int IntCastRounded(double x)
void IdentifyInlinePartsHorizontal()
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
LIST search(LIST list, void *key, int_compare is_equal)
BlobSpecialTextType special_text_type() const
ColPartition * CopyButDontOwnBlobs()
DLLSYM void tprintf(const char *format,...)
TBOX bounding_box() const
int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)
bool ExpandSeed(ColPartition *seed)
void InsertPartAfterAbsorb(ColPartition *part)
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
ColPartitionGrid * part_grid_
bool equationdetect_save_merged_image
GenericVector< ColPartition * > cp_seeds_
BlobTextFlowType flow() const
const TBOX & bounding_box() const
int classify_integer_matcher_multiplier
int SpecialBlobsCount(const BlobSpecialTextType type)
bool contains(const char c) const
void MergePartsByLocation()
bool y_overlap(const TBOX &box) const
bool CheckSeedBlobsCount(ColPartition *part)
void SetPartitionType(int resolution, ColPartitionSet *columns)
bool IsMathBlockSatellite(ColPartition *part, GenericVector< ColPartition *> *math_blocks)
const char * id_to_unichar(UNICHAR_ID id) const
void ExpandSeedVertical(const bool search_bottom, ColPartition *seed, GenericVector< ColPartition *> *parts_to_merge)
void delete_data_pointers()
bool PTIsTextType(PolyBlockType type)
int binary_search(const T &target) const
const TBOX & bounding_box() const
bool CheckSeedNeighborDensity(const ColPartition *part) const
bool IsVerticalType() const
void SplitCPHorLite(ColPartition *part, GenericVector< TBOX > *splitted_boxes)
bool CheckSeedDensity(const float math_density_high, const float math_density_low, const ColPartition *part) const
const float kMathItalicDensityTh
void IdentifySpecialText()
void ComputeSpecialBlobsDensity()
BLOBNBOX_LIST large_blobs
void PaintSpecialTexts(const STRING &outfile) const
const int kLeftIndentAlignmentCountTh
bool IsLeftIndented(const EquationDetect::IndentType type)
ColPartitionSet ** best_columns_
EquationDetect(const char *equ_datapath, const char *equ_language)
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const
void ExpandSeedHorizontal(const bool search_left, ColPartition *seed, GenericVector< ColPartition *> *parts_to_merge)
UNICHAR_ID unichar_id() const
int EstimateTextPartLineSpacing()
void PrintSpecialBlobsDensity(const ColPartition *part) const
void set_type(PolyBlockType t)
const float kMathDigitDensityTh1
const float kUnclearDensityTh
PolyBlockType type() const
void set_special_text_type(BlobSpecialTextType new_type)