tessapi/5.x/a00179_source.html

/**********************************************************************

 * File:        paragraphs.cpp

 * Description: Paragraph detection for tesseract.

 * Author:      David Eger

 *

 * (C) Copyright 2011, Google Inc.

 ** Licensed under the Apache License, Version 2.0 (the "License");

 ** you may not use this file except in compliance with the License.

 ** You may obtain a copy of the License at

 ** http://www.apache.org/licenses/LICENSE-2.0

 ** Unless required by applicable law or agreed to in writing, software

 ** distributed under the License is distributed on an "AS IS" BASIS,

 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 ** See the License for the specific language governing permissions and

 ** limitations under the License.

 *

 **********************************************************************/


#include "paragraphs.h"

#include <cctype>                 // for isspace

#include <cmath>                  // for abs

#include <cstdio>                 // for snprintf

#include <cstdlib>                // for abs

#include <cstring>                // for strchr, strlen

#include <algorithm>              // for max

#include <memory>                 // for unique_ptr

#include <tesseract/genericvector.h>        // for GenericVector, GenericVectorEqEq

#include <tesseract/helpers.h>              // for UpdateRange, ClipToRange

#include "host.h"                 // for NearlyEqual

#include "mutableiterator.h"      // for MutableIterator

#include "ocrblock.h"             // for BLOCK

#include "ocrpara.h"              // for ParagraphModel, PARA, PARA_IT, PARA...

#include "ocrrow.h"               // for ROW

#include <tesseract/pageiterator.h>         // for PageIterator

#include "pageres.h"              // for PAGE_RES_IT, WERD_RES, ROW_RES, BLO...

#include "paragraphs_internal.h"  // for RowScratchRegisters, SetOfModels

#include "pdblock.h"              // for PDBLK

#include "polyblk.h"              // for POLY_BLOCK

#include <tesseract/publictypes.h>          // for JUSTIFICATION_LEFT, JUSTIFICATION_R...

#include "ratngs.h"               // for WERD_CHOICE

#include "rect.h"                 // for TBOX

#include "statistc.h"             // for STATS

#include <tesseract/strngs.h>               // for STRING

#include "tprintf.h"              // for tprintf

#include <tesseract/unichar.h>              // for UNICHAR, UNICHAR_ID

#include "unicharset.h"           // for UNICHARSET

#include "unicodes.h"             // for kPDF, kRLE

#include "werd.h"                 // for WERD, W_REP_CHAR


namespace tesseract {


// Special "weak" ParagraphModels.

const ParagraphModel *kCrownLeft

    = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD111F));

const ParagraphModel *kCrownRight

    = reinterpret_cast<ParagraphModel *>(static_cast<uintptr_t>(0xDEAD888F));


// Do the text and geometry of two rows support a paragraph break between them?

static bool LikelyParagraphStart(const RowScratchRegisters &before,

                                 const RowScratchRegisters &after,

                                 tesseract::ParagraphJustification j);


// Given the width of a typical space between words, what is the threshold

// by which by which we think left and right alignments for paragraphs

// can vary and still be aligned.

static int Epsilon(int space_pix) {

  return space_pix * 4 / 5;

}


static bool AcceptableRowArgs(

    int debug_level, int min_num_rows, const char *function_name,

    const GenericVector<RowScratchRegisters> *rows,

    int row_start, int row_end) {

  if (row_start < 0 || row_end > rows->size() || row_start > row_end) {

    tprintf("Invalid arguments rows[%d, %d) while rows is of size %d.\n",

            row_start, row_end, rows->size());

    return false;

  }

  if (row_end - row_start < min_num_rows) {

    if (debug_level > 1) {

      tprintf("# Too few rows[%d, %d) for %s.\n",

              row_start, row_end, function_name);

    }

    return false;

  }

  return true;

}


// =============================== Debug Code ================================


// Convert an integer to a decimal string.

static STRING StrOf(int num) {

  char buffer[30];

  snprintf(buffer, sizeof(buffer), "%d", num);

  return STRING(buffer);

}


// Given a row-major matrix of unicode text and a column separator, print

// a formatted table.  For ASCII, we get good column alignment.

static void PrintTable(const GenericVector<GenericVector<STRING> > &rows,

                       const STRING &colsep) {

  GenericVector<int> max_col_widths;

  for (int r = 0; r < rows.size(); r++) {

    int num_columns = rows[r].size();

    for (int c = 0; c < num_columns; c++) {

      int num_unicodes = 0;

      for (int i = 0; i < rows[r][c].size(); i++) {

        if ((rows[r][c][i] & 0xC0) != 0x80) num_unicodes++;

      }

      if (c >= max_col_widths.size()) {

        max_col_widths.push_back(num_unicodes);

      } else {

        if (num_unicodes > max_col_widths[c])

          max_col_widths[c] = num_unicodes;

      }

    }

  }


  GenericVector<STRING> col_width_patterns;

  for (int c = 0; c < max_col_widths.size(); c++) {

    col_width_patterns.push_back(

        STRING("%-") + StrOf(max_col_widths[c]) + "s");

  }


  for (int r = 0; r < rows.size(); r++) {

    for (int c = 0; c < rows[r].size(); c++) {

      if (c > 0)

        tprintf("%s", colsep.c_str());

      tprintf(col_width_patterns[c].c_str(), rows[r][c].c_str());

    }

    tprintf("\n");

  }

}


static STRING RtlEmbed(const STRING &word, bool rtlify) {

  if (rtlify)

    return STRING(kRLE) + word + STRING(kPDF);

  return word;

}


// Print the current thoughts of the paragraph detector.

static void PrintDetectorState(const ParagraphTheory &theory,

                               const GenericVector<RowScratchRegisters> &rows) {

  GenericVector<GenericVector<STRING> > output;

  output.push_back(GenericVector<STRING>());

  output.back().push_back("#row");

  output.back().push_back("space");

  output.back().push_back("..");

  output.back().push_back("lword[widthSEL]");

  output.back().push_back("rword[widthSEL]");

  RowScratchRegisters::AppendDebugHeaderFields(&output.back());

  output.back().push_back("text");


  for (int i = 0; i < rows.size(); i++) {

    output.push_back(GenericVector<STRING>());

    GenericVector<STRING> &row = output.back();

    const RowInfo& ri = *rows[i].ri_;

    row.push_back(StrOf(i));

    row.push_back(StrOf(ri.average_interword_space));

    row.push_back(ri.has_leaders ? ".." : " ");

    row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) +

                  "[" + StrOf(ri.lword_box.width()) +

                  (ri.lword_likely_starts_idea ? "S" : "s") +

                  (ri.lword_likely_ends_idea ? "E" : "e") +

                  (ri.lword_indicates_list_item ? "L" : "l") +

                  "]");

    row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) +

                  "[" + StrOf(ri.rword_box.width()) +

                  (ri.rword_likely_starts_idea ? "S" : "s") +

                  (ri.rword_likely_ends_idea ? "E" : "e") +

                  (ri.rword_indicates_list_item ? "L" : "l") +

                  "]");

    rows[i].AppendDebugInfo(theory, &row);

    row.push_back(RtlEmbed(ri.text, !ri.ltr));

  }

  PrintTable(output, " ");


  tprintf("Active Paragraph Models:\n");

  for (int m = 0; m < theory.models().size(); m++) {

    tprintf(" %d: %s\n", m + 1, theory.models()[m]->ToString().c_str());

  }

}


static void DebugDump(

    bool should_print,

    const STRING &phase,

    const ParagraphTheory &theory,

    const GenericVector<RowScratchRegisters> &rows) {

  if (!should_print)

    return;

  tprintf("# %s\n", phase.c_str());

  PrintDetectorState(theory, rows);

}


// Print out the text for rows[row_start, row_end)

static void PrintRowRange(const GenericVector<RowScratchRegisters> &rows,

                          int row_start, int row_end) {

  tprintf("======================================\n");

  for (int row = row_start; row < row_end; row++) {

    tprintf("%s\n", rows[row].ri_->text.c_str());

  }

  tprintf("======================================\n");

}


// ============= Brain Dead Language Model (ASCII Version) ===================


static bool IsLatinLetter(int ch) {

  return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');

}


static bool IsDigitLike(int ch) {

  return ch == 'o' || ch == 'O' || ch == 'l' || ch == 'I';

}


static bool IsOpeningPunct(int ch) {

  return strchr("'\"({[", ch) != nullptr;

}


static bool IsTerminalPunct(int ch) {

  return strchr(":'\".?!]})", ch) != nullptr;

}


// Return a pointer after consuming as much text as qualifies as roman numeral.

static const char *SkipChars(const char *str, const char *toskip) {

  while (*str != '\0' && strchr(toskip, *str)) { str++; }

  return str;

}


static const char *SkipChars(const char *str, bool (*skip)(int)) {

  while (*str != '\0' && skip(*str)) { str++; }

  return str;

}


static const char *SkipOne(const char *str, const char *toskip) {

  if (*str != '\0' && strchr(toskip, *str)) return str + 1;

  return str;

}


// Return whether it is very likely that this is a numeral marker that could

// start a list item.  Some examples include:

//   A   I   iii.   VI   (2)   3.5.   [C-4]

static bool LikelyListNumeral(const STRING &word) {

  const char *kRomans = "ivxlmdIVXLMD";

  const char *kDigits = "012345789";

  const char *kOpen = "[{(";

  const char *kSep = ":;-.,";

  const char *kClose = "]})";


  int num_segments = 0;

  const char *pos = word.c_str();

  while (*pos != '\0' && num_segments < 3) {

    // skip up to two open parens.

    const char *numeral_start = SkipOne(SkipOne(pos, kOpen), kOpen);

    const char *numeral_end = SkipChars(numeral_start, kRomans);

    if (numeral_end != numeral_start) {

      // Got Roman Numeral. Great.

    } else {

      numeral_end = SkipChars(numeral_start, kDigits);

      if (numeral_end == numeral_start) {

        // If there's a single latin letter, we can use that.

        numeral_end = SkipChars(numeral_start, IsLatinLetter);

        if (numeral_end - numeral_start != 1)

          break;

      }

    }

    // We got some sort of numeral.

    num_segments++;

    // Skip any trailing parens or punctuation.

    pos = SkipChars(SkipChars(numeral_end, kClose), kSep);

    if (pos == numeral_end)

      break;

  }

  return *pos == '\0';

}


static bool LikelyListMark(const STRING &word) {

  const char *kListMarks = "0Oo*.,+.";

  return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr;

}


bool AsciiLikelyListItem(const STRING &word) {

  return LikelyListMark(word) || LikelyListNumeral(word);

}


// ========== Brain Dead Language Model (Tesseract Version) ================


// Return the first Unicode Codepoint from werd[pos].

int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) {

  if (!u || !werd || pos > werd->length())

    return 0;

  return UNICHAR(u->id_to_unichar(werd->unichar_id(pos)), -1).first_uni();

}


// A useful helper class for finding the first j >= i so that word[j]

// does not have given character type.

class UnicodeSpanSkipper {

 public:

  UnicodeSpanSkipper(const UNICHARSET *unicharset, const WERD_CHOICE *word)

      : u_(unicharset), word_(word) { wordlen_ = word->length(); }


  // Given an input position, return the first position >= pos not punc.

  int SkipPunc(int pos);

  // Given an input position, return the first position >= pos not digit.

  int SkipDigits(int pos);

  // Given an input position, return the first position >= pos not roman.

  int SkipRomans(int pos);

  // Given an input position, return the first position >= pos not alpha.

  int SkipAlpha(int pos);


 private:

  const UNICHARSET *u_;

  const WERD_CHOICE *word_;

  int wordlen_;

};


int UnicodeSpanSkipper::SkipPunc(int pos) {

  while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++;

  return pos;

}


int UnicodeSpanSkipper::SkipDigits(int pos) {

  while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) ||

                            IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++;

  return pos;

}


int UnicodeSpanSkipper::SkipRomans(int pos) {

  const char *kRomans = "ivxlmdIVXLMD";

  while (pos < wordlen_) {

    int ch = UnicodeFor(u_, word_, pos);

    if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) break;

    pos++;

  }

  return pos;

}


int UnicodeSpanSkipper::SkipAlpha(int pos) {

  while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++;

  return pos;

}


static bool LikelyListMarkUnicode(int ch) {

  if (ch < 0x80) {

    STRING single_ch;

    single_ch += ch;

    return LikelyListMark(single_ch);

  }

  switch (ch) {

    // TODO(eger) expand this list of unicodes as needed.

    case 0x00B0:  // degree sign

    case 0x2022:  // bullet

    case 0x25E6:  // white bullet

    case 0x00B7:  // middle dot

    case 0x25A1:  // white square

    case 0x25A0:  // black square

    case 0x25AA:  // black small square

    case 0x2B1D:  // black very small square

    case 0x25BA:  // black right-pointing pointer

    case 0x25CF:  // black circle

    case 0x25CB:  // white circle

      return true;

    default:

      break;  // fall through

  }

  return false;

}


// Return whether it is very likely that this is a numeral marker that could

// start a list item.  Some examples include:

//   A   I   iii.   VI   (2)   3.5.   [C-4]

static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {

  if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0)))

    return true;


  UnicodeSpanSkipper m(u, werd);

  int num_segments = 0;

  int pos = 0;

  while (pos < werd->length() && num_segments < 3) {

    int numeral_start = m.SkipPunc(pos);

    if (numeral_start > pos + 1) break;

    int numeral_end = m.SkipRomans(numeral_start);

    if (numeral_end == numeral_start) {

      numeral_end = m.SkipDigits(numeral_start);

      if (numeral_end == numeral_start) {

        // If there's a single latin letter, we can use that.

        numeral_end = m.SkipAlpha(numeral_start);

        if (numeral_end - numeral_start != 1)

          break;

      }

    }

    // We got some sort of numeral.

    num_segments++;

    // Skip any trailing punctuation.

    pos = m.SkipPunc(numeral_end);

    if (pos == numeral_end)

      break;

  }

  return pos == werd->length();

}


// ========= Brain Dead Language Model (combined entry points) ================


// Given the leftmost word of a line either as a Tesseract unicharset + werd

// or a utf8 string, set the following attributes for it:

//   is_list -      this word might be a list number or bullet.

//   starts_idea -  this word is likely to start a sentence.

//   ends_idea -    this word is likely to end a sentence.

void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,

                        const STRING &utf8,

                        bool *is_list, bool *starts_idea, bool *ends_idea) {

  *is_list = false;

  *starts_idea = false;

  *ends_idea = false;

  if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) {  // Empty

    *ends_idea = true;

    return;

  }


  if (unicharset && werd) {  // We have a proper werd and unicharset so use it.

    if (UniLikelyListItem(unicharset, werd)) {

      *is_list = true;

      *starts_idea = true;

      *ends_idea = true;

    }

    if (unicharset->get_isupper(werd->unichar_id(0))) {

      *starts_idea = true;

    }

    if (unicharset->get_ispunctuation(werd->unichar_id(0))) {

      *starts_idea = true;

      *ends_idea = true;

    }

  } else {  // Assume utf8 is mostly ASCII

    if (AsciiLikelyListItem(utf8)) {

      *is_list = true;

      *starts_idea = true;

    }

    int start_letter = utf8[0];

    if (IsOpeningPunct(start_letter)) {

      *starts_idea = true;

    }

    if (IsTerminalPunct(start_letter)) {

      *ends_idea = true;

    }

    if (start_letter >= 'A' && start_letter <= 'Z') {

      *starts_idea = true;

    }

  }

}


// Given the rightmost word of a line either as a Tesseract unicharset + werd

// or a utf8 string, set the following attributes for it:

//   is_list -      this word might be a list number or bullet.

//   starts_idea -  this word is likely to start a sentence.

//   ends_idea -    this word is likely to end a sentence.

void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,

                         const STRING &utf8,

                         bool *is_list, bool *starts_idea, bool *ends_idea) {

  *is_list = false;

  *starts_idea = false;

  *ends_idea = false;

  if (utf8.size() == 0 || (werd != nullptr && werd->length() == 0)) {  // Empty

    *ends_idea = true;

    return;

  }


  if (unicharset && werd) {  // We have a proper werd and unicharset so use it.

    if (UniLikelyListItem(unicharset, werd)) {

      *is_list = true;

      *starts_idea = true;

    }

    UNICHAR_ID last_letter = werd->unichar_id(werd->length() - 1);

    if (unicharset->get_ispunctuation(last_letter)) {

      *ends_idea = true;

    }

  } else {  // Assume utf8 is mostly ASCII

    if (AsciiLikelyListItem(utf8)) {

      *is_list = true;

      *starts_idea = true;

    }

    int last_letter = utf8[utf8.size() - 1];

    if (IsOpeningPunct(last_letter) || IsTerminalPunct(last_letter)) {

      *ends_idea = true;

    }

  }

}


// =============== Implementation of RowScratchRegisters =====================

/* static */

void RowScratchRegisters::AppendDebugHeaderFields(

    GenericVector<STRING> *header) {

  header->push_back("[lmarg,lind;rind,rmarg]");

  header->push_back("model");

}


void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,

                                          GenericVector<STRING> *dbg) const {

  char s[30];

  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]",

           lmargin_, lindent_, rindent_, rmargin_);

  dbg->push_back(s);

  STRING model_string;

  model_string += static_cast<char>(GetLineType());

  model_string += ":";


  int model_numbers = 0;

  for (int h = 0; h < hypotheses_.size(); h++) {

    if (hypotheses_[h].model == nullptr)

      continue;

    if (model_numbers > 0)

      model_string += ",";

    if (StrongModel(hypotheses_[h].model)) {

      model_string += StrOf(1 + theory.IndexOf(hypotheses_[h].model));

    } else if (hypotheses_[h].model == kCrownLeft) {

      model_string += "CrL";

    } else if (hypotheses_[h].model == kCrownRight) {

      model_string += "CrR";

    }

    model_numbers++;

  }

  if (model_numbers == 0)

    model_string += "0";


  dbg->push_back(model_string);

}


void RowScratchRegisters::Init(const RowInfo &row) {

  ri_ = &row;

  lmargin_ = 0;

  lindent_ = row.pix_ldistance;

  rmargin_ = 0;

  rindent_ = row.pix_rdistance;

}


LineType RowScratchRegisters::GetLineType() const {

  if (hypotheses_.empty())

    return LT_UNKNOWN;

  bool has_start = false;

  bool has_body = false;

  for (int i = 0; i < hypotheses_.size(); i++) {

    switch (hypotheses_[i].ty) {

      case LT_START: has_start = true; break;

      case LT_BODY: has_body = true; break;

      default:

        tprintf("Encountered bad value in hypothesis list: %c\n",

                hypotheses_[i].ty);

        break;

    }

  }

  if (has_start && has_body)

    return LT_MULTIPLE;

  return has_start ? LT_START : LT_BODY;

}


LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const {

  if (hypotheses_.empty())

    return LT_UNKNOWN;

  bool has_start = false;

  bool has_body = false;

  for (int i = 0; i < hypotheses_.size(); i++) {

    if (hypotheses_[i].model != model)

      continue;

    switch (hypotheses_[i].ty) {

      case LT_START: has_start = true; break;

      case LT_BODY: has_body = true; break;

      default:

        tprintf("Encountered bad value in hypothesis list: %c\n",

                hypotheses_[i].ty);

        break;

    }

  }

  if (has_start && has_body)

    return LT_MULTIPLE;

  return has_start ? LT_START : LT_BODY;

}


void RowScratchRegisters::SetStartLine() {

  LineType current_lt = GetLineType();

  if (current_lt != LT_UNKNOWN && current_lt != LT_START) {

    tprintf("Trying to set a line to be START when it's already BODY.\n");

  }

  if (current_lt == LT_UNKNOWN || current_lt == LT_BODY) {

    hypotheses_.push_back_new(LineHypothesis(LT_START, nullptr));

  }

}


void RowScratchRegisters::SetBodyLine() {

  LineType current_lt = GetLineType();

  if (current_lt != LT_UNKNOWN && current_lt != LT_BODY) {

    tprintf("Trying to set a line to be BODY when it's already START.\n");

  }

  if (current_lt == LT_UNKNOWN || current_lt == LT_START) {

    hypotheses_.push_back_new(LineHypothesis(LT_BODY, nullptr));

  }

}


void RowScratchRegisters::AddStartLine(const ParagraphModel *model) {

  hypotheses_.push_back_new(LineHypothesis(LT_START, model));

  int old_idx = hypotheses_.get_index(LineHypothesis(LT_START, nullptr));

  if (old_idx >= 0)

    hypotheses_.remove(old_idx);

}


void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) {

  hypotheses_.push_back_new(LineHypothesis(LT_BODY, model));

  int old_idx = hypotheses_.get_index(LineHypothesis(LT_BODY, nullptr));

  if (old_idx >= 0)

    hypotheses_.remove(old_idx);

}


void RowScratchRegisters::StartHypotheses(SetOfModels *models) const {

  for (int h = 0; h < hypotheses_.size(); h++) {

    if (hypotheses_[h].ty == LT_START && StrongModel(hypotheses_[h].model))

      models->push_back_new(hypotheses_[h].model);

  }

}


void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const {

  for (int h = 0; h < hypotheses_.size(); h++) {

    if (StrongModel(hypotheses_[h].model))

      models->push_back_new(hypotheses_[h].model);

  }

}


void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const {

  for (int h = 0; h < hypotheses_.size(); h++) {

    if (hypotheses_[h].model != nullptr)

      models->push_back_new(hypotheses_[h].model);

  }

}


const ParagraphModel *RowScratchRegisters::UniqueStartHypothesis() const {

  if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START)

    return nullptr;

  return hypotheses_[0].model;

}


const ParagraphModel *RowScratchRegisters::UniqueBodyHypothesis() const {

  if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY)

    return nullptr;

  return hypotheses_[0].model;

}


// Discard any hypotheses whose model is not in the given list.

void RowScratchRegisters::DiscardNonMatchingHypotheses(

    const SetOfModels &models) {

  if (models.empty())

    return;

  for (int h = hypotheses_.size() - 1; h >= 0; h--) {

    if (!models.contains(hypotheses_[h].model)) {

      hypotheses_.remove(h);

    }

  }

}


// ============ Geometry based Paragraph Detection Algorithm =================


struct Cluster {

  Cluster() : center(0), count(0) {}

  Cluster(int cen, int num) : center(cen), count(num) {}


  int center;  // The center of the cluster.

  int count;   // The number of entries within the cluster.

};


class SimpleClusterer {

 public:

  explicit SimpleClusterer(int max_cluster_width)

      : max_cluster_width_(max_cluster_width) {}

  void Add(int value) { values_.push_back(value); }

  int size() const { return values_.size(); }

  void GetClusters(GenericVector<Cluster> *clusters);


 private:

  int max_cluster_width_;

  GenericVectorEqEq<int> values_;

};


// Return the index of the cluster closest to value.

static int ClosestCluster(const GenericVector<Cluster> &clusters, int value) {

  int best_index = 0;

  for (int i = 0; i < clusters.size(); i++) {

    if (abs(value - clusters[i].center) <

        abs(value - clusters[best_index].center))

        best_index = i;

  }

  return best_index;

}


void SimpleClusterer::GetClusters(GenericVector<Cluster> *clusters) {

  clusters->clear();

  values_.sort();

  for (int i = 0; i < values_.size();) {

    int orig_i = i;

    int lo = values_[i];

    int hi = lo;

    while (++i < values_.size() && values_[i] <= lo + max_cluster_width_) {

      hi = values_[i];

    }

    clusters->push_back(Cluster((hi + lo) / 2, i - orig_i));

  }

}


// Calculate left- and right-indent tab stop values seen in

// rows[row_start, row_end) given a tolerance of tolerance.

static void CalculateTabStops(GenericVector<RowScratchRegisters> *rows,

                              int row_start, int row_end, int tolerance,

                              GenericVector<Cluster> *left_tabs,

                              GenericVector<Cluster> *right_tabs) {

  if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))

    return;

  // First pass: toss all left and right indents into clusterers.

  SimpleClusterer initial_lefts(tolerance);

  SimpleClusterer initial_rights(tolerance);

  GenericVector<Cluster> initial_left_tabs;

  GenericVector<Cluster> initial_right_tabs;

  for (int i = row_start; i < row_end; i++) {

    initial_lefts.Add((*rows)[i].lindent_);

    initial_rights.Add((*rows)[i].rindent_);

  }

  initial_lefts.GetClusters(&initial_left_tabs);

  initial_rights.GetClusters(&initial_right_tabs);


  // Second pass: cluster only lines that are not "stray"

  //   An example of a stray line is a page number -- a line whose start

  //   and end tab-stops are far outside the typical start and end tab-stops

  //   for the block.

  //   Put another way, we only cluster data from lines whose start or end

  //   tab stop is frequent.

  SimpleClusterer lefts(tolerance);

  SimpleClusterer rights(tolerance);


  // Outlier elimination.  We might want to switch this to test outlier-ness

  // based on how strange a position an outlier is in instead of or in addition

  // to how rare it is.  These outliers get re-added if we end up having too

  // few tab stops, to work with, however.

  int infrequent_enough_to_ignore = 0;

  if (row_end - row_start >= 8) infrequent_enough_to_ignore = 1;

  if (row_end - row_start >= 20) infrequent_enough_to_ignore = 2;


  for (int i = row_start; i < row_end; i++) {

    int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_);

    int ridx = ClosestCluster(initial_right_tabs, (*rows)[i].rindent_);

    if (initial_left_tabs[lidx].count > infrequent_enough_to_ignore ||

        initial_right_tabs[ridx].count > infrequent_enough_to_ignore) {

      lefts.Add((*rows)[i].lindent_);

      rights.Add((*rows)[i].rindent_);

    }

  }

  lefts.GetClusters(left_tabs);

  rights.GetClusters(right_tabs);


  if ((left_tabs->size() == 1 && right_tabs->size() >= 4) ||

      (right_tabs->size() == 1 && left_tabs->size() >= 4)) {

    // One side is really ragged, and the other only has one tab stop,

    // so those "insignificant outliers" are probably important, actually.

    // This often happens on a page of an index.  Add back in the ones

    // we omitted in the first pass.

    for (int i = row_start; i < row_end; i++) {

      int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_);

      int ridx = ClosestCluster(initial_right_tabs, (*rows)[i].rindent_);

      if (!(initial_left_tabs[lidx].count > infrequent_enough_to_ignore ||

            initial_right_tabs[ridx].count > infrequent_enough_to_ignore)) {

        lefts.Add((*rows)[i].lindent_);

        rights.Add((*rows)[i].rindent_);

      }

    }

  }

  lefts.GetClusters(left_tabs);

  rights.GetClusters(right_tabs);


  // If one side is almost a two-indent aligned side, and the other clearly

  // isn't, try to prune out the least frequent tab stop from that side.

  if (left_tabs->size() == 3 && right_tabs->size() >= 4) {

    int to_prune = -1;

    for (int i = left_tabs->size() - 1; i >= 0; i--) {

      if (to_prune < 0 ||

          (*left_tabs)[i].count < (*left_tabs)[to_prune].count) {

        to_prune = i;

      }

    }

    if (to_prune >= 0 &&

        (*left_tabs)[to_prune].count <= infrequent_enough_to_ignore) {

      left_tabs->remove(to_prune);

    }

  }

  if (right_tabs->size() == 3 && left_tabs->size() >= 4) {

    int to_prune = -1;

    for (int i = right_tabs->size() - 1; i >= 0; i--) {

      if (to_prune < 0 ||

          (*right_tabs)[i].count < (*right_tabs)[to_prune].count) {

        to_prune = i;

      }

    }

    if (to_prune >= 0 &&

        (*right_tabs)[to_prune].count <= infrequent_enough_to_ignore) {

      right_tabs->remove(to_prune);

    }

  }

}


// Given a paragraph model mark rows[row_start, row_end) as said model

// start or body lines.

//

// Case 1: model->first_indent_ != model->body_indent_

//   Differentiating the paragraph start lines from the paragraph body lines in

//   this case is easy, we just see how far each line is indented.

//

// Case 2: model->first_indent_ == model->body_indent_

//   Here, we find end-of-paragraph lines by looking for "short lines."

//   What constitutes a "short line" changes depending on whether the text

//   ragged-right[left] or fully justified (aligned left and right).

//

//   Case 2a: Ragged Right (or Left) text.  (eop_threshold == 0)

//     We have a new paragraph it the first word would have at the end

//     of the previous line.

//

//   Case 2b: Fully Justified.  (eop_threshold > 0)

//     We mark a line as short (end of paragraph) if the offside indent

//     is greater than eop_threshold.

static void MarkRowsWithModel(GenericVector<RowScratchRegisters> *rows,

                              int row_start, int row_end,

                              const ParagraphModel *model,

                              bool ltr, int eop_threshold) {

  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))

    return;

  for (int row = row_start; row < row_end; row++) {

    bool valid_first = ValidFirstLine(rows, row, model);

    bool valid_body = ValidBodyLine(rows, row, model);

    if (valid_first && !valid_body) {

      (*rows)[row].AddStartLine(model);

    } else if (valid_body && !valid_first) {

      (*rows)[row].AddBodyLine(model);

    } else if (valid_body && valid_first) {

      bool after_eop = (row == row_start);

      if (row > row_start) {

        if (eop_threshold > 0) {

          if (model->justification() == JUSTIFICATION_LEFT) {

            after_eop = (*rows)[row - 1].rindent_ > eop_threshold;

          } else {

            after_eop = (*rows)[row - 1].lindent_ > eop_threshold;

          }

        } else {

         after_eop = FirstWordWouldHaveFit((*rows)[row - 1], (*rows)[row],

                                           model->justification());

        }

      }

      if (after_eop) {

        (*rows)[row].AddStartLine(model);

      } else {

        (*rows)[row].AddBodyLine(model);

      }

    } else {

      // Do nothing. Stray row.

    }

  }

}


// GeometricClassifierState holds all of the information we'll use while

// trying to determine a paragraph model for the text lines in a block of

// text:

//   + the rows under consideration [row_start, row_end)

//   + the common left- and right-indent tab stops

//   + does the block start out left-to-right or right-to-left

// Further, this struct holds the data we amass for the (single) ParagraphModel

// we'll assign to the text lines (assuming we get that far).

struct GeometricClassifierState {

  GeometricClassifierState(int dbg_level,

                           GenericVector<RowScratchRegisters> *r,

                           int r_start, int r_end)

      : debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end) {

    tolerance = InterwordSpace(*r, r_start, r_end);

    CalculateTabStops(r, r_start, r_end, tolerance,

                      &left_tabs, &right_tabs);

    if (debug_level >= 3) {

      tprintf("Geometry: TabStop cluster tolerance = %d; "

              "%d left tabs; %d right tabs\n",

              tolerance, left_tabs.size(), right_tabs.size());

    }

    ltr = (*r)[r_start].ri_->ltr;

  }


  void AssumeLeftJustification() {

    just = tesseract::JUSTIFICATION_LEFT;

    margin = (*rows)[row_start].lmargin_;

  }


  void AssumeRightJustification() {

    just = tesseract::JUSTIFICATION_RIGHT;

    margin = (*rows)[row_start].rmargin_;

  }


  // Align tabs are the tab stops the text is aligned to.

  const GenericVector<Cluster> &AlignTabs() const {

    if (just == tesseract::JUSTIFICATION_RIGHT) return right_tabs;

    return left_tabs;

  }


  // Offside tabs are the tab stops opposite the tabs used to align the text.

  //

  // Note that for a left-to-right text which is aligned to the right such as

  //     this function comment, the offside tabs are the horizontal tab stops

  //                 marking the beginning of ("Note", "this" and "marking").

  const GenericVector<Cluster> &OffsideTabs() const {

    if (just == tesseract::JUSTIFICATION_RIGHT) return left_tabs;

    return right_tabs;

  }


  // Return whether the i'th row extends from the leftmost left tab stop

  // to the right most right tab stop.

  bool IsFullRow(int i) const {

    return ClosestCluster(left_tabs, (*rows)[i].lindent_) == 0 &&

        ClosestCluster(right_tabs, (*rows)[i].rindent_) == 0;

  }


  int AlignsideTabIndex(int row_idx) const {

    return ClosestCluster(AlignTabs(), (*rows)[row_idx].AlignsideIndent(just));

  }


  // Given what we know about the paragraph justification (just), would the

  // first word of row_b have fit at the end of row_a?

  bool FirstWordWouldHaveFit(int row_a, int row_b) {

    return ::tesseract::FirstWordWouldHaveFit(

        (*rows)[row_a], (*rows)[row_b], just);

  }


  void PrintRows() const { PrintRowRange(*rows, row_start, row_end); }


  void Fail(int min_debug_level, const char *why) const {

    if (debug_level < min_debug_level) return;

    tprintf("# %s\n", why);

    PrintRows();

  }


  ParagraphModel Model() const {

    return ParagraphModel(just, margin, first_indent, body_indent, tolerance);

  }


  // We print out messages with a debug level at least as great as debug_level.

  int debug_level = 0;


  // The Geometric Classifier was asked to find a single paragraph model

  // to fit the text rows (*rows)[row_start, row_end)

  GenericVector<RowScratchRegisters> *rows;

  int row_start = 0;

  int row_end = 0;


  // The amount by which we expect the text edge can vary and still be aligned.

  int tolerance = 0;


  // Is the script in this text block left-to-right?

  // HORRIBLE ROUGH APPROXIMATION.  TODO(eger): Improve

  bool ltr = false;


  // These left and right tab stops were determined to be the common tab

  // stops for the given text.

  GenericVector<Cluster> left_tabs;

  GenericVector<Cluster> right_tabs;


  // These are parameters we must determine to create a ParagraphModel.

  tesseract::ParagraphJustification just = JUSTIFICATION_UNKNOWN;

  int margin = 0;

  int first_indent = 0;

  int body_indent = 0;


  // eop_threshold > 0 if the text is fully justified.  See MarkRowsWithModel()

  int eop_threshold = 0;

};


// Given a section of text where strong textual clues did not help identifying

// paragraph breaks, and for which the left and right indents have exactly

// three tab stops between them, attempt to find the paragraph breaks based

// solely on the outline of the text and whether the script is left-to-right.

//

// Algorithm Detail:

//   The selected rows are in the form of a rectangle except

//   for some number of "short lines" of the same length:

//

//   (A1)  xxxxxxxxxxxxx  (B1) xxxxxxxxxxxx

//           xxxxxxxxxxx       xxxxxxxxxx    # A "short" line.

//         xxxxxxxxxxxxx       xxxxxxxxxxxx

//         xxxxxxxxxxxxx       xxxxxxxxxxxx

//

//   We have a slightly different situation if the only short

//   line is at the end of the excerpt.

//

//   (A2) xxxxxxxxxxxxx  (B2) xxxxxxxxxxxx

//        xxxxxxxxxxxxx       xxxxxxxxxxxx

//        xxxxxxxxxxxxx       xxxxxxxxxxxx

//          xxxxxxxxxxx       xxxxxxxxxx     # A "short" line.

//

//   We'll interpret these as follows based on the reasoning in the comment for

//   GeometricClassify():

//       [script direction: first indent, body indent]

//   (A1) LtR: 2,0  RtL: 0,0   (B1) LtR: 0,0  RtL: 2,0

//   (A2) LtR: 2,0  RtL: CrR   (B2) LtR: CrL  RtL: 2,0

static void GeometricClassifyThreeTabStopTextBlock(

    int debug_level,

    GeometricClassifierState &s,

    ParagraphTheory *theory) {

  int num_rows = s.row_end - s.row_start;

  int num_full_rows = 0;

  int last_row_full = 0;

  for (int i = s.row_start; i < s.row_end; i++) {

    if (s.IsFullRow(i)) {

      num_full_rows++;

      if (i == s.row_end - 1) last_row_full++;

    }

  }


  if (num_full_rows < 0.7 * num_rows) {

    s.Fail(1, "Not enough full lines to know which lines start paras.");

    return;

  }


  // eop_threshold gets set if we're fully justified; see MarkRowsWithModel()

  s.eop_threshold = 0;


  if (s.ltr) {

    s.AssumeLeftJustification();

  } else {

    s.AssumeRightJustification();

  }


  if (debug_level > 0) {

    tprintf("# Not enough variety for clear outline classification. "

            "Guessing these are %s aligned based on script.\n",

            s.ltr ? "left" : "right");

    s.PrintRows();

  }


  if (s.AlignTabs().size() == 2) {  // case A1 or A2

    s.first_indent = s.AlignTabs()[1].center;

    s.body_indent = s.AlignTabs()[0].center;

  } else {                      // case B1 or B2

    if (num_rows - 1 == num_full_rows - last_row_full) {

      // case B2

      const ParagraphModel *model = s.ltr ? kCrownLeft : kCrownRight;

      (*s.rows)[s.row_start].AddStartLine(model);

      for (int i = s.row_start + 1; i < s.row_end; i++) {

        (*s.rows)[i].AddBodyLine(model);

      }

      return;

    } else {

      // case B1

      s.first_indent = s.body_indent = s.AlignTabs()[0].center;

      s.eop_threshold = (s.OffsideTabs()[0].center +

                         s.OffsideTabs()[1].center) / 2;

    }

  }

  const ParagraphModel *model = theory->AddModel(s.Model());

  MarkRowsWithModel(s.rows, s.row_start, s.row_end, model,

                    s.ltr, s.eop_threshold);

  return;

}


// This function is called if strong textual clues were not available, but

// the caller hopes that the paragraph breaks will be super obvious just

// by the outline of the text.

//

// The particularly difficult case is figuring out what's going on if you

// don't have enough short paragraph end lines to tell us what's going on.

//

// For instance, let's say you have the following outline:

//

//   (A1)  xxxxxxxxxxxxxxxxxxxxxx

//           xxxxxxxxxxxxxxxxxxxx

//         xxxxxxxxxxxxxxxxxxxxxx

//         xxxxxxxxxxxxxxxxxxxxxx

//

// Even if we know that the text is left-to-right and so will probably be

// left-aligned, both of the following are possible texts:

//

//  (A1a)  1. Here our list item

//           with two full lines.

//         2. Here a second item.

//         3. Here our third one.

//

//  (A1b)  so ends paragraph one.

//           Here  starts another

//         paragraph  we want  to

//         read.  This  continues

//

// These examples are obvious from the text and should have been caught

// by the StrongEvidenceClassify pass.  However, for languages where we don't

// have capital letters to go on (e.g. Hebrew, Arabic, Hindi, Chinese),

// it's worth guessing that (A1b) is the correct interpretation if there are

// far more "full" lines than "short" lines.

static void GeometricClassify(int debug_level,

                              GenericVector<RowScratchRegisters> *rows,

                              int row_start, int row_end,

                              ParagraphTheory *theory) {

  if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end))

    return;

  if (debug_level > 1) {

    tprintf("###############################################\n");

    tprintf("##### GeometricClassify( rows[%d:%d) )   ####\n",

            row_start, row_end);

    tprintf("###############################################\n");

  }

  RecomputeMarginsAndClearHypotheses(rows, row_start, row_end, 10);


  GeometricClassifierState s(debug_level, rows, row_start, row_end);

  if (s.left_tabs.size() > 2 && s.right_tabs.size() > 2) {

    s.Fail(2, "Too much variety for simple outline classification.");

    return;

  }

  if (s.left_tabs.size() <= 1 && s.right_tabs.size() <= 1) {

    s.Fail(1, "Not enough variety for simple outline classification.");

    return;

  }

  if (s.left_tabs.size() + s.right_tabs.size() == 3) {

    GeometricClassifyThreeTabStopTextBlock(debug_level, s, theory);

    return;

  }


  // At this point, we know that one side has at least two tab stops, and the

  // other side has one or two tab stops.

  // Left to determine:

  //   (1) Which is the body indent and which is the first line indent?

  //   (2) Is the text fully justified?


  // If one side happens to have three or more tab stops, assume that side

  // is opposite of the aligned side.

  if (s.right_tabs.size() > 2) {

    s.AssumeLeftJustification();

  } else if (s.left_tabs.size() > 2) {

    s.AssumeRightJustification();

  } else if (s.ltr) {  // guess based on script direction

    s.AssumeLeftJustification();

  } else {

    s.AssumeRightJustification();

  }


  if (s.AlignTabs().size() == 2) {

    // For each tab stop on the aligned side, how many of them appear

    // to be paragraph start lines?  [first lines]

    int firsts[2] = {0, 0};

    // Count the first line as a likely paragraph start line.

    firsts[s.AlignsideTabIndex(s.row_start)]++;

    // For each line, if the first word would have fit on the previous

    // line count it as a likely paragraph start line.

    bool jam_packed = true;

    for (int i = s.row_start + 1; i < s.row_end; i++) {

      if (s.FirstWordWouldHaveFit(i - 1, i)) {

        firsts[s.AlignsideTabIndex(i)]++;

        jam_packed = false;

      }

    }

    // Make an extra accounting for the last line of the paragraph just

    // in case it's the only short line in the block.  That is, take its

    // first word as typical and see if this looks like the *last* line

    // of a paragraph.  If so, mark the *other* indent as probably a first.

    if (jam_packed && s.FirstWordWouldHaveFit(s.row_end - 1, s.row_end - 1)) {

      firsts[1 - s.AlignsideTabIndex(s.row_end - 1)]++;

    }


    int percent0firsts, percent1firsts;

    percent0firsts = (100 * firsts[0]) / s.AlignTabs()[0].count;

    percent1firsts = (100 * firsts[1]) / s.AlignTabs()[1].count;


    // TODO(eger): Tune these constants if necessary.

    if ((percent0firsts < 20 && 30 < percent1firsts) ||

        percent0firsts + 30 < percent1firsts) {

      s.first_indent = s.AlignTabs()[1].center;

      s.body_indent = s.AlignTabs()[0].center;

    } else if ((percent1firsts < 20 && 30 < percent0firsts) ||

               percent1firsts + 30 < percent0firsts) {

      s.first_indent = s.AlignTabs()[0].center;

      s.body_indent = s.AlignTabs()[1].center;

    } else {

      // Ambiguous! Probably lineated (poetry)

      if (debug_level > 1) {

        tprintf("# Cannot determine %s indent likely to start paragraphs.\n",

                s.just == tesseract::JUSTIFICATION_LEFT ? "left" : "right");

        tprintf("# Indent of %d looks like a first line %d%% of the time.\n",

                s.AlignTabs()[0].center, percent0firsts);

        tprintf("# Indent of %d looks like a first line %d%% of the time.\n",

                s.AlignTabs()[1].center, percent1firsts);

        s.PrintRows();

      }

      return;

    }

  } else {

    // There's only one tab stop for the "aligned to" side.

    s.first_indent = s.body_indent = s.AlignTabs()[0].center;

  }


  // At this point, we have our model.

  const ParagraphModel *model = theory->AddModel(s.Model());


  // Now all we have to do is figure out if the text is fully justified or not.

  // eop_threshold: default to fully justified unless we see evidence below.

  //    See description on MarkRowsWithModel()

  s.eop_threshold =

      (s.OffsideTabs()[0].center + s.OffsideTabs()[1].center) / 2;

  // If the text is not fully justified, re-set the eop_threshold to 0.

  if (s.AlignTabs().size() == 2) {

    // Paragraphs with a paragraph-start indent.

    for (int i = s.row_start; i < s.row_end - 1; i++) {

      if (ValidFirstLine(s.rows, i + 1, model) &&

          !NearlyEqual(s.OffsideTabs()[0].center,

                       (*s.rows)[i].OffsideIndent(s.just), s.tolerance)) {

        // We found a non-end-of-paragraph short line: not fully justified.

        s.eop_threshold = 0;

        break;

      }

    }

  } else {

    // Paragraphs with no paragraph-start indent.

    for (int i = s.row_start; i < s.row_end - 1; i++) {

      if (!s.FirstWordWouldHaveFit(i, i + 1) &&

          !NearlyEqual(s.OffsideTabs()[0].center,

                       (*s.rows)[i].OffsideIndent(s.just), s.tolerance)) {

        // We found a non-end-of-paragraph short line: not fully justified.

        s.eop_threshold = 0;

        break;

      }

    }

  }

  MarkRowsWithModel(rows, row_start, row_end, model, s.ltr, s.eop_threshold);

}


// =============== Implementation of ParagraphTheory =====================


const ParagraphModel *ParagraphTheory::AddModel(const ParagraphModel &model) {

  for (int i = 0; i < models_->size(); i++) {

    if ((*models_)[i]->Comparable(model))

      return (*models_)[i];

  }

  auto *m = new ParagraphModel(model);

  models_->push_back(m);

  models_we_added_.push_back_new(m);

  return m;

}


void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {

  for (int i = models_->size() - 1; i >= 0; i--) {

    ParagraphModel *m = (*models_)[i];

    if (!used_models.contains(m) && models_we_added_.contains(m)) {

      models_->remove(i);

      models_we_added_.remove(models_we_added_.get_index(m));

      delete m;

    }

  }

}


// Examine rows[start, end) and try to determine if an existing non-centered

// paragraph model would fit them perfectly.  If so, return a pointer to it.

// If not, return nullptr.

const ParagraphModel *ParagraphTheory::Fits(

    const GenericVector<RowScratchRegisters> *rows, int start, int end) const {

  for (int m = 0; m < models_->size(); m++) {

    const ParagraphModel *model = (*models_)[m];

    if (model->justification() != JUSTIFICATION_CENTER &&

        RowsFitModel(rows, start, end, model))

      return model;

  }

  return nullptr;

}


void ParagraphTheory::NonCenteredModels(SetOfModels *models) {

  for (int m = 0; m < models_->size(); m++) {

    const ParagraphModel *model = (*models_)[m];

    if (model->justification() != JUSTIFICATION_CENTER)

      models->push_back_new(model);

  }

}


int ParagraphTheory::IndexOf(const ParagraphModel *model) const {

  for (int i = 0; i < models_->size(); i++) {

    if ((*models_)[i] == model)

      return i;

  }

  return -1;

}


bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows,

                    int row, const ParagraphModel *model) {

  if (!StrongModel(model)) {

    tprintf("ValidFirstLine() should only be called with strong models!\n");

  }

  return StrongModel(model) &&

      model->ValidFirstLine(

          (*rows)[row].lmargin_, (*rows)[row].lindent_,

          (*rows)[row].rindent_, (*rows)[row].rmargin_);

}


bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows,

                   int row, const ParagraphModel *model) {

  if (!StrongModel(model)) {

    tprintf("ValidBodyLine() should only be called with strong models!\n");

  }

  return StrongModel(model) &&

      model->ValidBodyLine(

          (*rows)[row].lmargin_, (*rows)[row].lindent_,

          (*rows)[row].rindent_, (*rows)[row].rmargin_);

}


bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows,

                     int a, int b, const ParagraphModel *model) {

  if (model != kCrownRight && model != kCrownLeft) {

    tprintf("CrownCompatible() should only be called with crown models!\n");

    return false;

  }

  RowScratchRegisters &row_a = (*rows)[a];

  RowScratchRegisters &row_b = (*rows)[b];

  if (model == kCrownRight) {

    return NearlyEqual(row_a.rindent_ + row_a.rmargin_,

                       row_b.rindent_ + row_b.rmargin_,

                       Epsilon(row_a.ri_->average_interword_space));

  }

  return NearlyEqual(row_a.lindent_ + row_a.lmargin_,

                     row_b.lindent_ + row_b.lmargin_,

                     Epsilon(row_a.ri_->average_interword_space));

}


// =============== Implementation of ParagraphModelSmearer ====================


ParagraphModelSmearer::ParagraphModelSmearer(

    GenericVector<RowScratchRegisters> *rows,

    int row_start, int row_end, ParagraphTheory *theory)

        : theory_(theory), rows_(rows), row_start_(row_start),

          row_end_(row_end) {

  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {

    row_start_ = 0;

    row_end_ = 0;

    return;

  }

  SetOfModels no_models;

  for (int row = row_start - 1; row <= row_end; row++) {

    open_models_.push_back(no_models);

  }

}


// see paragraphs_internal.h

void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) {

  SetOfModels no_models;

  if (row_start < row_start_) row_start = row_start_;

  if (row_end > row_end_) row_end = row_end_;


  for (int row = (row_start > 0) ? row_start - 1 : row_start; row < row_end;

       row++) {

    if ((*rows_)[row].ri_->num_words == 0) {

      OpenModels(row + 1) = no_models;

    } else {

      SetOfModels &opened = OpenModels(row);

      (*rows_)[row].StartHypotheses(&opened);


      // Which models survive the transition from row to row + 1?

      SetOfModels still_open;

      for (int m = 0; m < opened.size(); m++) {

        if (ValidFirstLine(rows_, row, opened[m]) ||

            ValidBodyLine(rows_, row, opened[m])) {

          // This is basic filtering; we check likely paragraph starty-ness down

          // below in Smear() -- you know, whether the first word would have fit

          // and such.

          still_open.push_back_new(opened[m]);

        }

      }

      OpenModels(row + 1) = still_open;

    }

  }

}


// see paragraphs_internal.h

void ParagraphModelSmearer::Smear() {

  CalculateOpenModels(row_start_, row_end_);


  // For each row which we're unsure about (that is, it is LT_UNKNOWN or

  // we have multiple LT_START hypotheses), see if there's a model that

  // was recently used (an "open" model) which might model it well.

  for (int i = row_start_; i < row_end_; i++) {

    RowScratchRegisters &row = (*rows_)[i];

    if (row.ri_->num_words == 0)

      continue;


    // Step One:

    //   Figure out if there are "open" models which are left-alined or

    //   right-aligned.  This is important for determining whether the

    //   "first" word in a row would fit at the "end" of the previous row.

    bool left_align_open = false;

    bool right_align_open = false;

    for (int m = 0; m < OpenModels(i).size(); m++) {

      switch (OpenModels(i)[m]->justification()) {

        case JUSTIFICATION_LEFT: left_align_open = true; break;

        case JUSTIFICATION_RIGHT: right_align_open = true; break;

        default: left_align_open = right_align_open = true;

      }

    }

    // Step Two:

    //   Use that knowledge to figure out if this row is likely to

    //   start a paragraph.

    bool likely_start;

    if (i == 0) {

      likely_start = true;

    } else {

      if ((left_align_open && right_align_open) ||

          (!left_align_open && !right_align_open)) {

        likely_start = LikelyParagraphStart((*rows_)[i - 1], row,

                                            JUSTIFICATION_LEFT) ||

                       LikelyParagraphStart((*rows_)[i - 1], row,

                                            JUSTIFICATION_RIGHT);

      } else if (left_align_open) {

        likely_start = LikelyParagraphStart((*rows_)[i - 1], row,

                                            JUSTIFICATION_LEFT);

      } else {

        likely_start = LikelyParagraphStart((*rows_)[i - 1], row,

                                            JUSTIFICATION_RIGHT);

      }

    }


    // Step Three:

    //   If this text line seems like an obvious first line of an

    //   open model, or an obvious continuation of an existing

    //   modelled paragraph, mark it up.

    if (likely_start) {

      // Add Start Hypotheses for all Open models that fit.

      for (int m = 0; m < OpenModels(i).size(); m++) {

        if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {

          row.AddStartLine(OpenModels(i)[m]);

        }

      }

    } else {

      // Add relevant body line hypotheses.

      SetOfModels last_line_models;

      if (i > 0) {

        (*rows_)[i - 1].StrongHypotheses(&last_line_models);

      } else {

        theory_->NonCenteredModels(&last_line_models);

      }

      for (int m = 0; m < last_line_models.size(); m++) {

        const ParagraphModel *model = last_line_models[m];

        if (ValidBodyLine(rows_, i, model))

          row.AddBodyLine(model);

      }

    }


    // Step Four:

    //   If we're still quite unsure about this line, go through all

    //   models in our theory and see if this row could be the start

    //   of any of our  models.

    if (row.GetLineType() == LT_UNKNOWN ||

        (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {

      SetOfModels all_models;

      theory_->NonCenteredModels(&all_models);

      for (int m = 0; m < all_models.size(); m++) {

        if (ValidFirstLine(rows_, i, all_models[m])) {

          row.AddStartLine(all_models[m]);

        }

      }

    }

    // Step Five:

    //   Since we may have updated the hypotheses about this row, we need

    //   to recalculate the Open models for the rest of rows[i + 1, row_end)

    if (row.GetLineType() != LT_UNKNOWN) {

      CalculateOpenModels(i + 1, row_end_);

    }

  }

}


// ================ Main Paragraph Detection Algorithm =======================


// Find out what ParagraphModels are actually used, and discard any

// that are not.

static void DiscardUnusedModels(const GenericVector<RowScratchRegisters> &rows,

                                ParagraphTheory *theory) {

  SetOfModels used_models;

  for (int i = 0; i < rows.size(); i++) {

    rows[i].StrongHypotheses(&used_models);

  }

  theory->DiscardUnusedModels(used_models);

}


// DowngradeWeakestToCrowns:

//   Forget any flush-{left, right} models unless we see two or more

//   of them in sequence.

//

// In pass 3, we start to classify even flush-left paragraphs (paragraphs

// where the first line and body indent are the same) as having proper Models.

// This is generally dangerous, since if you start imagining that flush-left

// is a typical paragraph model when it is not, it will lead you to chop normal

// indented paragraphs in the middle whenever a sentence happens to start on a

// new line (see "This" above).  What to do?

//   What we do is to take any paragraph which is flush left and is not

// preceded by another paragraph of the same model and convert it to a "Crown"

// paragraph.  This is a weak pseudo-ParagraphModel which is a placeholder

// for later.  It means that the paragraph is flush, but it would be desirable

// to mark it as the same model as following text if it fits.  This downgrade

// FlushLeft -> CrownLeft -> Model of following paragraph.  Means that we

// avoid making flush left Paragraph Models whenever we see a top-of-the-page

// half-of-a-paragraph. and instead we mark it the same as normal body text.

//

// Implementation:

//

//   Comb backwards through the row scratch registers, and turn any

//   sequences of body lines of equivalent type abutted against the beginning

//   or a body or start line of a different type into a crown paragraph.

static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,

                                     GenericVector<RowScratchRegisters> *rows) {

  int start;

  for (int end = rows->size(); end > 0; end = start) {

    // Search back for a body line of a unique type.

    const ParagraphModel *model = nullptr;

    while (end > 0 &&

           (model = (*rows)[end - 1].UniqueBodyHypothesis()) == nullptr) {

      end--;

    }

    if (end == 0) break;

    start = end - 1;

    while (start >= 0 && (*rows)[start].UniqueBodyHypothesis() == model) {

      start--;  // walk back to the first line that is not the same body type.

    }

    if (start >= 0 && (*rows)[start].UniqueStartHypothesis() == model &&

        StrongModel(model) &&

        NearlyEqual(model->first_indent(), model->body_indent(),

                    model->tolerance())) {

        start--;

    }

    start++;

    // Now rows[start, end) is a sequence of unique body hypotheses of model.

    if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER)

      continue;

    if (!StrongModel(model)) {

      while (start > 0 &&

             CrownCompatible(rows, start - 1, start, model))

        start--;

    }

    if (start == 0 ||

        (!StrongModel(model)) ||

        (StrongModel(model) && !ValidFirstLine(rows, start - 1, model))) {

      // crownify rows[start, end)

      const ParagraphModel *crown_model = model;

      if (StrongModel(model)) {

          if (model->justification() == JUSTIFICATION_LEFT)

            crown_model = kCrownLeft;

          else

            crown_model = kCrownRight;

      }

      (*rows)[start].SetUnknown();

      (*rows)[start].AddStartLine(crown_model);

      for (int row = start + 1; row < end; row++) {

        (*rows)[row].SetUnknown();

        (*rows)[row].AddBodyLine(crown_model);

      }

    }

  }

  DiscardUnusedModels(*rows, theory);

}


// Clear all hypotheses about lines [start, end) and reset margins.

//

// The empty space between the left of a row and the block boundary (and

// similarly for the right) is split into two pieces: margin and indent.

// In initial processing, we assume the block is tight and the margin for

// all lines is set to zero.   However, if our first pass does not yield

// models for  everything,  it may be  due to an  inset paragraph like a

// block-quote.   In that case, we make a second pass over that unmarked

// section of the page and reset the "margin" portion of the empty space

// to the common amount of space at  the ends of the lines under consid-

// eration.    This would be equivalent to percentile set to 0. However,

// sometimes we have a single character sticking out in the right margin

// of a text block  (like the 'r' in 'for' on line 3 above),  and we can

// really  just ignore it as an outlier.   To express this, we allow the

// user to specify  the percentile (0..100)  of indent values  to use as

// the common margin for each row in the run of rows[start, end).

void RecomputeMarginsAndClearHypotheses(

    GenericVector<RowScratchRegisters> *rows, int start, int end,

    int percentile) {

  if (!AcceptableRowArgs(0, 0, __func__, rows, start, end))

    return;


  int lmin, lmax, rmin, rmax;

  lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_;

  rmin = rmax = (*rows)[start].rmargin_ + (*rows)[start].rindent_;

  for (int i = start; i < end; i++) {

    RowScratchRegisters &sr = (*rows)[i];

    sr.SetUnknown();

    if (sr.ri_->num_words == 0)

      continue;

    UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);

    UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);

  }

  STATS lefts(lmin, lmax + 1);

  STATS rights(rmin, rmax + 1);

  for (int i = start; i < end; i++) {

    RowScratchRegisters &sr = (*rows)[i];

    if (sr.ri_->num_words == 0)

      continue;

    lefts.add(sr.lmargin_ + sr.lindent_, 1);

    rights.add(sr.rmargin_ + sr.rindent_, 1);

  }

  int ignorable_left = lefts.ile(ClipToRange(percentile, 0, 100) / 100.0);

  int ignorable_right = rights.ile(ClipToRange(percentile, 0, 100) / 100.0);

  for (int i = start; i < end; i++) {

    RowScratchRegisters &sr = (*rows)[i];

    int ldelta = ignorable_left - sr.lmargin_;

    sr.lmargin_ += ldelta;

    sr.lindent_ -= ldelta;

    int rdelta = ignorable_right - sr.rmargin_;

    sr.rmargin_ += rdelta;

    sr.rindent_ -= rdelta;

  }

}


// Return the median inter-word space in rows[row_start, row_end).

int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,

                   int row_start, int row_end) {

  if (row_end < row_start + 1) return 1;

  int word_height = (rows[row_start].ri_->lword_box.height() +

                     rows[row_end - 1].ri_->lword_box.height()) / 2;

  int word_width = (rows[row_start].ri_->lword_box.width() +

                    rows[row_end - 1].ri_->lword_box.width())  / 2;

  STATS spacing_widths(0, 5 + word_width);

  for (int i = row_start; i < row_end; i++) {

    if (rows[i].ri_->num_words > 1) {

      spacing_widths.add(rows[i].ri_->average_interword_space, 1);

    }

  }

  int minimum_reasonable_space = word_height / 3;

  if (minimum_reasonable_space < 2)

    minimum_reasonable_space = 2;

  int median = spacing_widths.median();

  return (median > minimum_reasonable_space)

      ? median : minimum_reasonable_space;

}


// Return whether the first word on the after line can fit in the space at

// the end of the before line (knowing which way the text is aligned and read).

bool FirstWordWouldHaveFit(const RowScratchRegisters &before,

                           const RowScratchRegisters &after,

                           tesseract::ParagraphJustification justification) {

  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)

    return true;


  if (justification == JUSTIFICATION_UNKNOWN) {

    tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");

  }

  int available_space;

  if (justification == JUSTIFICATION_CENTER) {

    available_space = before.lindent_ + before.rindent_;

  } else {

    available_space = before.OffsideIndent(justification);

  }

  available_space -= before.ri_->average_interword_space;


  if (before.ri_->ltr)

    return after.ri_->lword_box.width() < available_space;

  return after.ri_->rword_box.width() < available_space;

}


// Return whether the first word on the after line can fit in the space at

// the end of the before line (not knowing which way the text goes) in a left

// or right alignment.

bool FirstWordWouldHaveFit(const RowScratchRegisters &before,

                           const RowScratchRegisters &after) {

  if (before.ri_->num_words == 0 || after.ri_->num_words == 0)

    return true;


  int available_space = before.lindent_;

  if (before.rindent_ > available_space)

    available_space = before.rindent_;

  available_space -= before.ri_->average_interword_space;


  if (before.ri_->ltr)

    return after.ri_->lword_box.width() < available_space;

  return after.ri_->rword_box.width() < available_space;

}


static bool TextSupportsBreak(const RowScratchRegisters &before,

                              const RowScratchRegisters &after) {

  if (before.ri_->ltr) {

    return before.ri_->rword_likely_ends_idea &&

           after.ri_->lword_likely_starts_idea;

  } else {

    return before.ri_->lword_likely_ends_idea &&

           after.ri_->rword_likely_starts_idea;

  }

}


static bool LikelyParagraphStart(const RowScratchRegisters &before,

                                 const RowScratchRegisters &after,

                                 tesseract::ParagraphJustification j) {

  return before.ri_->num_words == 0 ||

      (FirstWordWouldHaveFit(before, after, j) &&

       TextSupportsBreak(before, after));

}


// Examine rows[start, end) and try to determine what sort of ParagraphModel

// would fit them as a single paragraph.

// If we can't produce a unique model justification_ = JUSTIFICATION_UNKNOWN.

// If the rows given could be a consistent start to a paragraph, set *consistent

// true.

static ParagraphModel InternalParagraphModelByOutline(

    const GenericVector<RowScratchRegisters> *rows,

    int start, int end, int tolerance, bool *consistent) {

  int ltr_line_count = 0;

  for (int i = start; i < end; i++) {

    ltr_line_count += static_cast<int>((*rows)[i].ri_->ltr);

  }

  bool ltr = (ltr_line_count >= (end - start) / 2);


  *consistent = true;

  if (!AcceptableRowArgs(0, 2, __func__, rows, start, end))

    return ParagraphModel();


  // Ensure the caller only passed us a region with a common rmargin and

  // lmargin.

  int lmargin = (*rows)[start].lmargin_;

  int rmargin = (*rows)[start].rmargin_;

  int lmin, lmax, rmin, rmax, cmin, cmax;

  lmin = lmax = (*rows)[start + 1].lindent_;

  rmin = rmax = (*rows)[start + 1].rindent_;

  cmin = cmax = 0;

  for (int i = start + 1; i < end; i++) {

    if ((*rows)[i].lmargin_ != lmargin || (*rows)[i].rmargin_ != rmargin) {

      tprintf("Margins don't match! Software error.\n");

      *consistent = false;

      return ParagraphModel();

    }

    UpdateRange((*rows)[i].lindent_, &lmin, &lmax);

    UpdateRange((*rows)[i].rindent_, &rmin, &rmax);

    UpdateRange((*rows)[i].rindent_ - (*rows)[i].lindent_, &cmin, &cmax);

  }

  int ldiff = lmax - lmin;

  int rdiff = rmax - rmin;

  int cdiff = cmax - cmin;

  if (rdiff > tolerance && ldiff > tolerance) {

    if (cdiff < tolerance * 2) {

      if (end - start < 3)

        return ParagraphModel();

      return ParagraphModel(JUSTIFICATION_CENTER, 0, 0, 0, tolerance);

    }

    *consistent = false;

    return ParagraphModel();

  }

  if (end - start < 3)  // Don't return a model for two line paras.

    return ParagraphModel();


  // These booleans keep us from saying something is aligned left when the body

  // left variance is too large.

  bool body_admits_left_alignment = ldiff < tolerance;

  bool body_admits_right_alignment = rdiff < tolerance;


  ParagraphModel left_model =

      ParagraphModel(JUSTIFICATION_LEFT, lmargin, (*rows)[start].lindent_,

                     (lmin + lmax) / 2, tolerance);

  ParagraphModel right_model =

      ParagraphModel(JUSTIFICATION_RIGHT, rmargin, (*rows)[start].rindent_,

                     (rmin + rmax) / 2, tolerance);


  // These booleans keep us from having an indent on the "wrong side" for the

  // first line.

  bool text_admits_left_alignment = ltr || left_model.is_flush();

  bool text_admits_right_alignment = !ltr || right_model.is_flush();


  // At least one of the edges is less than tolerance in variance.

  // If the other is obviously ragged, it can't be the one aligned to.

  // [Note the last line is included in this raggedness.]

  if (tolerance < rdiff) {

    if (body_admits_left_alignment && text_admits_left_alignment)

      return left_model;

    *consistent = false;

    return ParagraphModel();

  }

  if (tolerance < ldiff) {

    if (body_admits_right_alignment && text_admits_right_alignment)

      return right_model;

    *consistent = false;

    return ParagraphModel();

  }


  // At this point, we know the body text doesn't vary much on either side.


  // If the first line juts out oddly in one direction or the other,

  // that likely indicates the side aligned to.

  int first_left = (*rows)[start].lindent_;

  int first_right = (*rows)[start].rindent_;


  if (ltr && body_admits_left_alignment &&

      (first_left < lmin || first_left > lmax))

    return left_model;

  if (!ltr && body_admits_right_alignment &&

      (first_right < rmin || first_right > rmax))

    return right_model;


  *consistent = false;

  return ParagraphModel();

}


// Examine rows[start, end) and try to determine what sort of ParagraphModel

// would fit them as a single paragraph.   If nothing fits,

// justification_ = JUSTIFICATION_UNKNOWN and print the paragraph to debug

// output if we're debugging.

static ParagraphModel ParagraphModelByOutline(

    int debug_level,

    const GenericVector<RowScratchRegisters> *rows,

    int start, int end, int tolerance) {

  bool unused_consistent;

  ParagraphModel retval = InternalParagraphModelByOutline(

      rows, start, end, tolerance, &unused_consistent);

  if (debug_level >= 2 && retval.justification() == JUSTIFICATION_UNKNOWN) {

    tprintf("Could not determine a model for this paragraph:\n");

    PrintRowRange(*rows, start, end);

  }

  return retval;

}


// Do rows[start, end) form a single instance of the given paragraph model?

bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,

                  int start, int end, const ParagraphModel *model) {

  if (!AcceptableRowArgs(0, 1, __func__, rows, start, end))

    return false;

  if (!ValidFirstLine(rows, start, model)) return false;

  for (int i = start + 1 ; i < end; i++) {

    if (!ValidBodyLine(rows, i, model)) return false;

  }

  return true;

}


// Examine rows[row_start, row_end) as an independent section of text,

// and mark rows that are exceptionally clear as start-of-paragraph

// and paragraph-body lines.

//

// We presume that any lines surrounding rows[row_start, row_end) may

// have wildly different paragraph models, so we don't key any data off

// of those lines.

//

// We only take the very strongest signals, as we don't want to get

// confused and marking up centered text, poetry, or source code as

// clearly part of a typical paragraph.

static void MarkStrongEvidence(GenericVector<RowScratchRegisters> *rows,

                               int row_start, int row_end) {

  // Record patently obvious body text.

  for (int i = row_start + 1; i < row_end; i++) {

    const RowScratchRegisters &prev = (*rows)[i - 1];

    RowScratchRegisters &curr = (*rows)[i];

    tesseract::ParagraphJustification typical_justification =

        prev.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT;

    if (!curr.ri_->rword_likely_starts_idea &&

        !curr.ri_->lword_likely_starts_idea &&

        !FirstWordWouldHaveFit(prev, curr, typical_justification)) {

      curr.SetBodyLine();

    }

  }


  // Record patently obvious start paragraph lines.

  //

  // It's an extremely good signal of the start of a paragraph that

  // the first word would have fit on the end of the previous line.

  // However, applying just that signal would have us mark random

  // start lines of lineated text (poetry and source code) and some

  // centered headings as paragraph start lines.  Therefore, we use

  // a second qualification for a paragraph start: Not only should

  // the first word of this line have fit on the previous line,

  // but also, this line should go full to the right of the block,

  // disallowing a subsequent word from having fit on this line.


  // First row:

  {

    RowScratchRegisters &curr = (*rows)[row_start];

    RowScratchRegisters &next = (*rows)[row_start + 1];

    tesseract::ParagraphJustification j =

        curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT;

    if (curr.GetLineType() == LT_UNKNOWN &&

        !FirstWordWouldHaveFit(curr, next, j) &&

        (curr.ri_->lword_likely_starts_idea ||

         curr.ri_->rword_likely_starts_idea)) {

      curr.SetStartLine();

    }

  }

  // Middle rows

  for (int i = row_start + 1; i < row_end - 1; i++) {

    RowScratchRegisters &prev = (*rows)[i - 1];

    RowScratchRegisters &curr = (*rows)[i];

    RowScratchRegisters &next = (*rows)[i + 1];

    tesseract::ParagraphJustification j =

        curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT;

    if (curr.GetLineType() == LT_UNKNOWN &&

        !FirstWordWouldHaveFit(curr, next, j) &&

        LikelyParagraphStart(prev, curr, j)) {

      curr.SetStartLine();

    }

  }

  // Last row

  {  // the short circuit at the top means we have at least two lines.

    RowScratchRegisters &prev = (*rows)[row_end - 2];

    RowScratchRegisters &curr = (*rows)[row_end - 1];

    tesseract::ParagraphJustification j =

        curr.ri_->ltr ? JUSTIFICATION_LEFT : JUSTIFICATION_RIGHT;

    if (curr.GetLineType() == LT_UNKNOWN &&

        !FirstWordWouldHaveFit(curr, curr, j) &&

        LikelyParagraphStart(prev, curr, j)) {

      curr.SetStartLine();

    }

  }

}


// Look for sequences of a start line followed by some body lines in

// rows[row_start, row_end) and create ParagraphModels for them if

// they seem coherent.

static void ModelStrongEvidence(int debug_level,

                                GenericVector<RowScratchRegisters> *rows,

                                int row_start, int row_end,

                                bool allow_flush_models,

                                ParagraphTheory *theory) {

  if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))

    return;


  int start = row_start;

  while (start < row_end) {

    while (start < row_end && (*rows)[start].GetLineType() != LT_START)

      start++;

    if (start >= row_end - 1)

      break;


    int tolerance = Epsilon((*rows)[start + 1].ri_->average_interword_space);

    int end = start;

    ParagraphModel last_model;

    bool next_consistent;

    do {

      ++end;

      // rows[row, end) was consistent.

      // If rows[row, end + 1) is not consistent,

      //   just model rows[row, end)

      if (end < row_end - 1) {

        RowScratchRegisters &next = (*rows)[end];

        LineType lt = next.GetLineType();

        next_consistent = lt == LT_BODY ||

            (lt == LT_UNKNOWN &&

             !FirstWordWouldHaveFit((*rows)[end - 1], (*rows)[end]));

      } else {

        next_consistent = false;

      }

      if (next_consistent) {

        ParagraphModel next_model = InternalParagraphModelByOutline(

            rows, start, end + 1, tolerance, &next_consistent);

        if (((*rows)[start].ri_->ltr &&

             last_model.justification() == JUSTIFICATION_LEFT &&

             next_model.justification() != JUSTIFICATION_LEFT) ||

            (!(*rows)[start].ri_->ltr &&

             last_model.justification() == JUSTIFICATION_RIGHT &&

             next_model.justification() != JUSTIFICATION_RIGHT)) {

          next_consistent = false;

        }

        last_model = next_model;

      } else {

        next_consistent = false;

      }

    } while (next_consistent && end < row_end);

    // At this point, rows[start, end) looked like it could have been a

    // single paragraph.  If we can make a good ParagraphModel for it,

    // do so and mark this sequence with that model.

    if (end > start + 1) {

      // emit a new paragraph if we have more than one line.

      const ParagraphModel *model = nullptr;

      ParagraphModel new_model = ParagraphModelByOutline(

          debug_level, rows, start, end,

          Epsilon(InterwordSpace(*rows, start, end)));

      if (new_model.justification() == JUSTIFICATION_UNKNOWN) {

        // couldn't create a good model, oh well.

      } else if (new_model.is_flush()) {

        if (end == start + 2) {

          // It's very likely we just got two paragraph starts in a row.

          end = start + 1;

        } else if (start == row_start) {

          // Mark this as a Crown.

          if (new_model.justification() == JUSTIFICATION_LEFT) {

            model = kCrownLeft;

          } else {

            model = kCrownRight;

          }

        } else if (allow_flush_models) {

          model = theory->AddModel(new_model);

        }

      } else {

        model = theory->AddModel(new_model);

      }

      if (model) {

        (*rows)[start].AddStartLine(model);

        for (int i = start + 1; i < end; i++) {

          (*rows)[i].AddBodyLine(model);

        }

      }

    }

    start = end;

  }

}


// We examine rows[row_start, row_end) and do the following:

//   (1) Clear all existing hypotheses for the rows being considered.

//   (2) Mark up any rows as exceptionally likely to be paragraph starts

//       or paragraph body lines as such using both geometric and textual

//       clues.

//   (3) Form models for any sequence of start + continuation lines.

//   (4) Smear the paragraph models to cover surrounding text.

static void StrongEvidenceClassify(int debug_level,

                                   GenericVector<RowScratchRegisters> *rows,

                                   int row_start, int row_end,

                                   ParagraphTheory *theory) {

  if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))

    return;


  if (debug_level > 1) {

    tprintf("#############################################\n");

    tprintf("# StrongEvidenceClassify( rows[%d:%d) )\n", row_start, row_end);

    tprintf("#############################################\n");

  }


  RecomputeMarginsAndClearHypotheses(rows, row_start, row_end, 10);

  MarkStrongEvidence(rows, row_start, row_end);


  DebugDump(debug_level > 2, "Initial strong signals.", *theory, *rows);


  // Create paragraph models.

  ModelStrongEvidence(debug_level, rows, row_start, row_end, false, theory);


  DebugDump(debug_level > 2, "Unsmeared hypotheses.s.", *theory, *rows);


  // At this point, some rows are marked up as paragraphs with model numbers,

  // and some rows are marked up as either LT_START or LT_BODY.  Now let's

  // smear any good paragraph hypotheses forward and backward.

  ParagraphModelSmearer smearer(rows, row_start, row_end, theory);

  smearer.Smear();

}


static void SeparateSimpleLeaderLines(GenericVector<RowScratchRegisters> *rows,

                                      int row_start, int row_end,

                                      ParagraphTheory *theory) {

  for (int i = row_start + 1; i < row_end - 1; i++) {

    if ((*rows)[i - 1].ri_->has_leaders &&

        (*rows)[i].ri_->has_leaders &&

        (*rows)[i + 1].ri_->has_leaders) {

      const ParagraphModel *model = theory->AddModel(

          ParagraphModel(JUSTIFICATION_UNKNOWN, 0, 0, 0, 0));

      (*rows)[i].AddStartLine(model);

    }

  }

}


// Collect sequences of unique hypotheses in row registers and create proper

// paragraphs for them, referencing the paragraphs in row_owners.

static void ConvertHypothesizedModelRunsToParagraphs(

    int debug_level,

    const GenericVector<RowScratchRegisters> &rows,

    GenericVector<PARA *> *row_owners,

    ParagraphTheory *theory) {

  int end = rows.size();

  int start;

  for (; end > 0; end = start) {

    start = end - 1;

    const ParagraphModel *model = nullptr;

    // TODO(eger): Be smarter about dealing with multiple hypotheses.

    bool single_line_paragraph = false;

    SetOfModels models;

    rows[start].NonNullHypotheses(&models);

    if (!models.empty()) {

      model = models[0];

      if (rows[start].GetLineType(model) != LT_BODY)

        single_line_paragraph = true;

    }

    if (model && !single_line_paragraph) {

      // walk back looking for more body lines and then a start line.

      while (--start > 0 && rows[start].GetLineType(model) == LT_BODY) {

        // do nothing

      }

      if (start < 0 || rows[start].GetLineType(model) != LT_START) {

        model = nullptr;

      }

    }

    if (model == nullptr) {

      continue;

    }

    // rows[start, end) should be a paragraph.

    PARA *p = new PARA();

    if (model == kCrownLeft || model == kCrownRight) {

      p->is_very_first_or_continuation = true;

      // Crown paragraph.

      //   If we can find an existing ParagraphModel that fits, use it,

      //   else create a new one.

      for (int row = end; row < rows.size(); row++) {

        if ((*row_owners)[row] &&

            (ValidBodyLine(&rows, start, (*row_owners)[row]->model) &&

            (start == 0 ||

             ValidFirstLine(&rows, start, (*row_owners)[row]->model)))) {

          model = (*row_owners)[row]->model;

          break;

        }

      }

      if (model == kCrownLeft) {

        // No subsequent model fits, so cons one up.

        model = theory->AddModel(ParagraphModel(

            JUSTIFICATION_LEFT, rows[start].lmargin_ + rows[start].lindent_,

            0, 0, Epsilon(rows[start].ri_->average_interword_space)));

      } else if (model == kCrownRight) {

        // No subsequent model fits, so cons one up.

        model = theory->AddModel(ParagraphModel(

            JUSTIFICATION_RIGHT, rows[start].rmargin_ + rows[start].rmargin_,

            0, 0, Epsilon(rows[start].ri_->average_interword_space)));

      }

    }

    rows[start].SetUnknown();

    rows[start].AddStartLine(model);

    for (int i = start + 1; i < end; i++) {

      rows[i].SetUnknown();

      rows[i].AddBodyLine(model);

    }

    p->model = model;

    p->has_drop_cap = rows[start].ri_->has_drop_cap;

    p->is_list_item =

        model->justification() == JUSTIFICATION_RIGHT

            ? rows[start].ri_->rword_indicates_list_item

            : rows[start].ri_->lword_indicates_list_item;

    for (int row = start; row < end; row++) {

      if ((*row_owners)[row] != nullptr) {

        tprintf("Memory leak! ConvertHypothesizeModelRunsToParagraphs() called "

                "more than once!\n");

        delete (*row_owners)[row];

      }

      (*row_owners)[row] = p;

    }

  }

}


struct Interval {

  Interval() : begin(0), end(0) {}

  Interval(int b, int e) : begin(b), end(e) {}


  int begin;

  int end;

};


// Return whether rows[row] appears to be stranded, meaning that the evidence

// for this row is very weak due to context.  For instance, two lines of source

// code may happen to be indented at the same tab vector as body text starts,

// leading us to think they are two start-of-paragraph lines.  This is not

// optimal.  However, we also don't want to mark a sequence of short dialog

// as "weak," so our heuristic is:

//   (1) If a line is surrounded by lines of unknown type, it's weak.

//   (2) If two lines in a row are start lines for a given paragraph type, but

//       after that the same paragraph type does not continue, they're weak.

static bool RowIsStranded(const GenericVector<RowScratchRegisters> &rows,

                          int row) {

  SetOfModels row_models;

  rows[row].StrongHypotheses(&row_models);


  for (int m = 0; m < row_models.size(); m++) {

    bool all_starts = rows[row].GetLineType();

    int run_length = 1;

    bool continues = true;

    for (int i = row - 1; i >= 0 && continues; i--) {

      SetOfModels models;

      rows[i].NonNullHypotheses(&models);

      switch (rows[i].GetLineType(row_models[m])) {

        case LT_START: run_length++; break;

        case LT_MULTIPLE:  // explicit fall-through

        case LT_BODY: run_length++; all_starts = false; break;

        case LT_UNKNOWN:  // explicit fall-through

        default: continues = false;

      }

    }

    continues = true;

    for (int i = row + 1; i < rows.size() && continues; i++) {

      SetOfModels models;

      rows[i].NonNullHypotheses(&models);

      switch (rows[i].GetLineType(row_models[m])) {

        case LT_START: run_length++; break;

        case LT_MULTIPLE:  // explicit fall-through

        case LT_BODY: run_length++; all_starts = false; break;

        case LT_UNKNOWN:  // explicit fall-through

        default: continues = false;

      }

    }

    if (run_length > 2 || (!all_starts && run_length > 1)) return false;

  }

  return true;

}


// Go through rows[row_start, row_end) and gather up sequences that need better

// classification.

// + Sequences of non-empty rows without hypotheses.

// + Crown paragraphs not immediately followed by a strongly modeled line.

// + Single line paragraphs surrounded by text that doesn't match the

//   model.

static void LeftoverSegments(const GenericVector<RowScratchRegisters> &rows,

                             GenericVector<Interval> *to_fix,

                             int row_start, int row_end) {

  to_fix->clear();

  for (int i = row_start; i < row_end; i++) {

    bool needs_fixing = false;


    SetOfModels models;

    SetOfModels models_w_crowns;

    rows[i].StrongHypotheses(&models);

    rows[i].NonNullHypotheses(&models_w_crowns);

    if (models.empty() && !models_w_crowns.empty()) {

      // Crown paragraph.  Is it followed by a modeled line?

      for (int end = i + 1; end < rows.size(); end++) {

        SetOfModels end_models;

        SetOfModels strong_end_models;

        rows[end].NonNullHypotheses(&end_models);

        rows[end].StrongHypotheses(&strong_end_models);

        if (end_models.empty()) {

          needs_fixing = true;

          break;

        } else if (!strong_end_models.empty()) {

          needs_fixing = false;

          break;

        }

      }

    } else if (models.empty() && rows[i].ri_->num_words > 0) {

      // No models at all.

      needs_fixing = true;

    }


    if (!needs_fixing && !models.empty()) {

      needs_fixing = RowIsStranded(rows, i);

    }


    if (needs_fixing) {

      if (!to_fix->empty() && to_fix->back().end == i - 1)

        to_fix->back().end = i;

      else

        to_fix->push_back(Interval(i, i));

    }

  }

  // Convert inclusive intervals to half-open intervals.

  for (int i = 0; i < to_fix->size(); i++) {

    (*to_fix)[i].end = (*to_fix)[i].end + 1;

  }

}


// Given a set of row_owners pointing to PARAs or nullptr (no paragraph known),

// normalize each row_owner to point to an actual PARA, and output the

// paragraphs in order onto paragraphs.

void CanonicalizeDetectionResults(

    GenericVector<PARA *> *row_owners,

    PARA_LIST *paragraphs) {

  GenericVector<PARA *> &rows = *row_owners;

  paragraphs->clear();

  PARA_IT out(paragraphs);

  PARA *formerly_null = nullptr;

  for (int i = 0; i < rows.size(); i++) {

    if (rows[i] == nullptr) {

      if (i == 0 || rows[i - 1] != formerly_null) {

        rows[i] = formerly_null = new PARA();

      } else {

        rows[i] = formerly_null;

        continue;

      }

    } else if (i > 0 && rows[i - 1] == rows[i]) {

      continue;

    }

    out.add_after_then_move(rows[i]);

  }

}


// Main entry point for Paragraph Detection Algorithm.

//

// Given a set of equally spaced textlines (described by row_infos),

// Split them into paragraphs.

//

// Output:

//   row_owners - one pointer for each row, to the paragraph it belongs to.

//   paragraphs - this is the actual list of PARA objects.

//   models - the list of paragraph models referenced by the PARA objects.

//            caller is responsible for deleting the models.

void DetectParagraphs(int debug_level,

                      GenericVector<RowInfo> *row_infos,

                      GenericVector<PARA *> *row_owners,

                      PARA_LIST *paragraphs,

                      GenericVector<ParagraphModel *> *models) {

  GenericVector<RowScratchRegisters> rows;

  ParagraphTheory theory(models);


  // Initialize row_owners to be a bunch of nullptr pointers.

  row_owners->init_to_size(row_infos->size(), nullptr);


  // Set up row scratch registers for the main algorithm.

  rows.init_to_size(row_infos->size(), RowScratchRegisters());

  for (int i = 0; i < row_infos->size(); i++) {

    rows[i].Init((*row_infos)[i]);

  }


  // Pass 1:

  //   Detect sequences of lines that all contain leader dots (.....)

  //   These are likely Tables of Contents.  If there are three text lines in

  //   a row with leader dots, it's pretty safe to say the middle one should

  //   be a paragraph of its own.

  SeparateSimpleLeaderLines(&rows, 0, rows.size(), &theory);


  DebugDump(debug_level > 1, "End of Pass 1", theory, rows);


  GenericVector<Interval> leftovers;

  LeftoverSegments(rows, &leftovers, 0, rows.size());

  for (int i = 0; i < leftovers.size(); i++) {

    // Pass 2a:

    //   Find any strongly evidenced start-of-paragraph lines.  If they're

    //   followed by two lines that look like body lines, make a paragraph

    //   model for that and see if that model applies throughout the text

    //   (that is, "smear" it).

    StrongEvidenceClassify(debug_level, &rows,

                           leftovers[i].begin, leftovers[i].end, &theory);


    // Pass 2b:

    //   If we had any luck in pass 2a, we got part of the page and didn't

    //   know how to classify a few runs of rows. Take the segments that

    //   didn't find a model and reprocess them individually.

    GenericVector<Interval> leftovers2;

    LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);

    bool pass2a_was_useful = leftovers2.size() > 1 ||

        (leftovers2.size() == 1 &&

         (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size()));

    if (pass2a_was_useful) {

      for (int j = 0; j < leftovers2.size(); j++) {

        StrongEvidenceClassify(debug_level, &rows,

                               leftovers2[j].begin, leftovers2[j].end,

                               &theory);

      }

    }

  }


  DebugDump(debug_level > 1, "End of Pass 2", theory, rows);


  // Pass 3:

  //   These are the dregs for which we didn't have enough strong textual

  //   and geometric clues to form matching models for.  Let's see if

  //   the geometric clues are simple enough that we could just use those.

  LeftoverSegments(rows, &leftovers, 0, rows.size());

  for (int i = 0; i < leftovers.size(); i++) {

    GeometricClassify(debug_level, &rows,

                      leftovers[i].begin, leftovers[i].end, &theory);

  }


  // Undo any flush models for which there's little evidence.

  DowngradeWeakestToCrowns(debug_level, &theory, &rows);


  DebugDump(debug_level > 1, "End of Pass 3", theory, rows);


  // Pass 4:

  //   Take everything that's still not marked up well and clear all markings.

  LeftoverSegments(rows, &leftovers, 0, rows.size());

  for (int i = 0; i < leftovers.size(); i++) {

    for (int j = leftovers[i].begin; j < leftovers[i].end; j++) {

      rows[j].SetUnknown();

    }

  }


  DebugDump(debug_level > 1, "End of Pass 4", theory, rows);


  // Convert all of the unique hypothesis runs to PARAs.

  ConvertHypothesizedModelRunsToParagraphs(debug_level, rows, row_owners,

                                           &theory);


  DebugDump(debug_level > 0, "Final Paragraph Segmentation", theory, rows);


  // Finally, clean up any dangling nullptr row paragraph parents.

  CanonicalizeDetectionResults(row_owners, paragraphs);

}


// ============ Code interfacing with the rest of Tesseract ==================


static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it,

                                                 RowInfo *info) {

  // Set up text, lword_text, and rword_text (mostly for debug printing).

  STRING fake_text;

  PageIterator pit(static_cast<const PageIterator&>(it));

  bool first_word = true;

  if (!pit.Empty(RIL_WORD)) {

    do {

      fake_text += "x";

      if (first_word) info->lword_text += "x";

      info->rword_text += "x";

      if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&

          !pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL)) {

        fake_text += " ";

        info->rword_text = "";

        first_word = false;

      }

    } while (!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL) &&

             pit.Next(RIL_SYMBOL));

  }

  if (fake_text.size() == 0) return;


  int lspaces = info->pix_ldistance / info->average_interword_space;

  for (int i = 0; i < lspaces; i++) {

    info->text += ' ';

  }

  info->text += fake_text;


  // Set up lword_box, rword_box, and num_words.

  PAGE_RES_IT page_res_it = *it.PageResIt();

  WERD_RES *word_res = page_res_it.restart_row();

  ROW_RES *this_row = page_res_it.row();


  WERD_RES *lword = nullptr;

  WERD_RES *rword = nullptr;

  info->num_words = 0;

  do {

    if (word_res) {

      if (!lword) lword = word_res;

      if (rword != word_res) info->num_words++;

      rword = word_res;

    }

    word_res = page_res_it.forward();

  } while (page_res_it.row() == this_row);


  if (lword) info->lword_box = lword->word->bounding_box();

  if (rword) info->rword_box = rword->word->bounding_box();

}


// Given a Tesseract Iterator pointing to a text line, fill in the paragraph

// detector RowInfo with all relevant information from the row.

static void InitializeRowInfo(bool after_recognition,

                              const MutableIterator &it, RowInfo *info) {

  if (it.PageResIt()->row() != nullptr) {

    ROW *row = it.PageResIt()->row()->row;

    info->pix_ldistance = row->lmargin();

    info->pix_rdistance = row->rmargin();

    info->average_interword_space =

        row->space() > 0 ? row->space() : std::max(static_cast<int>(row->x_height()), 1);

    info->pix_xheight = row->x_height();

    info->has_leaders = false;

    info->has_drop_cap = row->has_drop_cap();

    info->ltr = true;  // set below depending on word scripts

  } else {

    info->pix_ldistance = info->pix_rdistance = 0;

    info->average_interword_space = 1;

    info->pix_xheight = 1.0;

    info->has_leaders = false;

    info->has_drop_cap = false;

    info->ltr = true;

  }


  info->num_words = 0;

  info->lword_indicates_list_item = false;

  info->lword_likely_starts_idea = false;

  info->lword_likely_ends_idea = false;

  info->rword_indicates_list_item = false;

  info->rword_likely_starts_idea = false;

  info->rword_likely_ends_idea = false;

  info->has_leaders = false;

  info->ltr = true;


  if (!after_recognition) {

    InitializeTextAndBoxesPreRecognition(it, info);

    return;

  }

  info->text = "";

  const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));

  int trailing_ws_idx = strlen(text.get());  // strip trailing space

  while (trailing_ws_idx > 0 &&

         // isspace() only takes ASCII

         isascii(text[trailing_ws_idx - 1]) &&

         isspace(text[trailing_ws_idx - 1]))

    trailing_ws_idx--;

  if (trailing_ws_idx > 0) {

    int lspaces = info->pix_ldistance / info->average_interword_space;

    for (int i = 0; i < lspaces; i++)

      info->text += ' ';

    for (int i = 0; i < trailing_ws_idx; i++)

      info->text += text[i];

  }


  if (info->text.size() == 0) {

    return;

  }


  PAGE_RES_IT page_res_it = *it.PageResIt();

  GenericVector<WERD_RES *> werds;

  WERD_RES *word_res = page_res_it.restart_row();

  ROW_RES *this_row = page_res_it.row();

  int num_leaders = 0;

  int ltr = 0;

  int rtl = 0;

  do {

    if (word_res && word_res->best_choice->unichar_string().length() > 0) {

      werds.push_back(word_res);

      ltr += word_res->AnyLtrCharsInWord() ? 1 : 0;

      rtl += word_res->AnyRtlCharsInWord() ? 1 : 0;

      if (word_res->word->flag(W_REP_CHAR)) num_leaders++;

    }

    word_res = page_res_it.forward();

  } while (page_res_it.row() == this_row);

  info->ltr = ltr >= rtl;

  info->has_leaders = num_leaders > 3;

  info->num_words = werds.size();

  if (!werds.empty()) {

    WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1];

    info->lword_text = lword->best_choice->unichar_string().c_str();

    info->rword_text = rword->best_choice->unichar_string().c_str();

    info->lword_box = lword->word->bounding_box();

    info->rword_box = rword->word->bounding_box();

    LeftWordAttributes(lword->uch_set, lword->best_choice,

                       info->lword_text,

                       &info->lword_indicates_list_item,

                       &info->lword_likely_starts_idea,

                       &info->lword_likely_ends_idea);

    RightWordAttributes(rword->uch_set, rword->best_choice,

                        info->rword_text,

                        &info->rword_indicates_list_item,

                        &info->rword_likely_starts_idea,

                        &info->rword_likely_ends_idea);

  }

}


// This is called after rows have been identified and words are recognized.

// Much of this could be implemented before word recognition, but text helps

// to identify bulleted lists and gives good signals for sentence boundaries.

void DetectParagraphs(int debug_level,

                      bool after_text_recognition,

                      const MutableIterator *block_start,

                      GenericVector<ParagraphModel *> *models) {

  // Clear out any preconceived notions.

  if (block_start->Empty(RIL_TEXTLINE)) {

    return;

  }

  BLOCK *block = block_start->PageResIt()->block()->block;

  block->para_list()->clear();

  bool is_image_block = block->pdblk.poly_block() && !block->pdblk.poly_block()->IsText();


  // Convert the Tesseract structures to RowInfos

  // for the paragraph detection algorithm.

  MutableIterator row(*block_start);

  if (row.Empty(RIL_TEXTLINE))

    return;  // end of input already.


  GenericVector<RowInfo> row_infos;

  do {

    if (!row.PageResIt()->row())

      continue;  // empty row.

    row.PageResIt()->row()->row->set_para(nullptr);

    row_infos.push_back(RowInfo());

    RowInfo &ri = row_infos.back();

    InitializeRowInfo(after_text_recognition, row, &ri);

  } while (!row.IsAtFinalElement(RIL_BLOCK, RIL_TEXTLINE) &&

           row.Next(RIL_TEXTLINE));


  // If we're called before text recognition, we might not have

  // tight block bounding boxes, so trim by the minimum on each side.

  if (!row_infos.empty()) {

    int min_lmargin = row_infos[0].pix_ldistance;

    int min_rmargin = row_infos[0].pix_rdistance;

    for (int i = 1; i < row_infos.size(); i++) {

      if (row_infos[i].pix_ldistance < min_lmargin)

        min_lmargin = row_infos[i].pix_ldistance;

      if (row_infos[i].pix_rdistance < min_rmargin)

        min_rmargin = row_infos[i].pix_rdistance;

    }

    if (min_lmargin > 0 || min_rmargin > 0) {

      for (int i = 0; i < row_infos.size(); i++) {

        row_infos[i].pix_ldistance -= min_lmargin;

        row_infos[i].pix_rdistance -= min_rmargin;

      }

    }

  }


  // Run the paragraph detection algorithm.

  GenericVector<PARA *> row_owners;

  GenericVector<PARA *> the_paragraphs;

  if (!is_image_block) {

    DetectParagraphs(debug_level, &row_infos, &row_owners, block->para_list(),

                     models);

  } else {

    row_owners.init_to_size(row_infos.size(), nullptr);

    CanonicalizeDetectionResults(&row_owners, block->para_list());

  }


  // Now stitch in the row_owners into the rows.

  row = *block_start;

  for (int i = 0; i < row_owners.size(); i++) {

    while (!row.PageResIt()->row())

      row.Next(RIL_TEXTLINE);

    row.PageResIt()->row()->row->set_para(row_owners[i]);

    row.Next(RIL_TEXTLINE);

  }

}


}  // namespace