tesseract  5.0.0-alpha-619-ge9db
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

Public Member Functions

 ParagraphModelSmearer (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
void Smear ()
 

Detailed Description

Definition at line 236 of file paragraphs_internal.h.

Constructor & Destructor Documentation

◆ ParagraphModelSmearer()

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1335 of file paragraphs.cpp.

1337  {
1338  SetOfModels no_models;
1339  if (row_start < row_start_) row_start = row_start_;
1340  if (row_end > row_end_) row_end = row_end_;
1341 
1342  for (int row = (row_start > 0) ? row_start - 1 : row_start; row < row_end;
1343  row++) {
1344  if ((*rows_)[row].ri_->num_words == 0) {
1345  OpenModels(row + 1) = no_models;
1346  } else {
1347  SetOfModels &opened = OpenModels(row);
1348  (*rows_)[row].StartHypotheses(&opened);
1349 

Member Function Documentation

◆ Smear()

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1382 of file paragraphs.cpp.

1384  {
1385  switch (OpenModels(i)[m]->justification()) {
1386  case JUSTIFICATION_LEFT: left_align_open = true; break;
1387  case JUSTIFICATION_RIGHT: right_align_open = true; break;
1388  default: left_align_open = right_align_open = true;
1389  }
1390  }
1391  // Step Two:
1392  // Use that knowledge to figure out if this row is likely to
1393  // start a paragraph.
1394  bool likely_start;
1395  if (i == 0) {
1396  likely_start = true;
1397  } else {
1398  if ((left_align_open && right_align_open) ||
1399  (!left_align_open && !right_align_open)) {
1400  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1401  JUSTIFICATION_LEFT) ||
1402  LikelyParagraphStart((*rows_)[i - 1], row,
1404  } else if (left_align_open) {
1405  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1407  } else {
1408  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1410  }
1411  }
1412 
1413  // Step Three:
1414  // If this text line seems like an obvious first line of an
1415  // open model, or an obvious continuation of an existing
1416  // modelled paragraph, mark it up.
1417  if (likely_start) {
1418  // Add Start Hypotheses for all Open models that fit.
1419  for (int m = 0; m < OpenModels(i).size(); m++) {
1420  if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
1421  row.AddStartLine(OpenModels(i)[m]);
1422  }
1423  }
1424  } else {
1425  // Add relevant body line hypotheses.
1426  SetOfModels last_line_models;
1427  if (i > 0) {
1428  (*rows_)[i - 1].StrongHypotheses(&last_line_models);
1429  } else {
1430  theory_->NonCenteredModels(&last_line_models);
1431  }
1432  for (int m = 0; m < last_line_models.size(); m++) {
1433  const ParagraphModel *model = last_line_models[m];
1434  if (ValidBodyLine(rows_, i, model))
1435  row.AddBodyLine(model);
1436  }
1437  }
1438 
1439  // Step Four:
1440  // If we're still quite unsure about this line, go through all
1441  // models in our theory and see if this row could be the start
1442  // of any of our models.
1443  if (row.GetLineType() == LT_UNKNOWN ||
1444  (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
1445  SetOfModels all_models;
1446  theory_->NonCenteredModels(&all_models);
1447  for (int m = 0; m < all_models.size(); m++) {
1448  if (ValidFirstLine(rows_, i, all_models[m])) {
1449  row.AddStartLine(all_models[m]);
1450  }
1451  }
1452  }
1453  // Step Five:
1454  // Since we may have updated the hypotheses about this row, we need
1455  // to recalculate the Open models for the rest of rows[i + 1, row_end)
1456  if (row.GetLineType() != LT_UNKNOWN) {
1457  CalculateOpenModels(i + 1, row_end_);
1458  }
1459  }
1460 }
1461 
1462 // ================ Main Paragraph Detection Algorithm =======================
1463 
1464 // Find out what ParagraphModels are actually used, and discard any
1465 // that are not.
1466 static void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,
1467  ParagraphTheory *theory) {
1468  SetOfModels used_models;
1469  for (int i = 0; i < rows.size(); i++) {
1470  rows[i].StrongHypotheses(&used_models);
1471  }
1472  theory->DiscardUnusedModels(used_models);
1473 }
1474 
1475 // DowngradeWeakestToCrowns:

The documentation for this class was generated from the following files:
tesseract::ValidBodyLine
bool ValidBodyLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
Definition: paragraphs.cpp:1303
tesseract::JUSTIFICATION_RIGHT
Definition: publictypes.h:252
ParagraphModel
Definition: ocrpara.h:114
tesseract::JUSTIFICATION_LEFT
Definition: publictypes.h:250
tesseract::LT_UNKNOWN
Definition: paragraphs_internal.h:52
GenericVector
Definition: baseapi.h:40
tesseract::LT_START
Definition: paragraphs_internal.h:50
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::ParagraphTheory::NonCenteredModels
void NonCenteredModels(SetOfModels *models)
Definition: paragraphs.cpp:1276
tesseract::SetOfModels
GenericVectorEqEq< const ParagraphModel * > SetOfModels
Definition: paragraphs_internal.h:98
tesseract::ValidFirstLine
bool ValidFirstLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
Definition: paragraphs.cpp:1292