tesseract  5.0.0-alpha-619-ge9db
ocrpara.cpp
Go to the documentation of this file.
1 // File: ocrpara.cpp
3 // Description: OCR Paragraph Output Type
4 // Author: David Eger
5 //
6 // (C) Copyright 2010, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include <cstdio>
20 
21 #include "ocrpara.h"
22 #include "host.h" // For NearlyEqual()
23 
25 
30 
31 static STRING ParagraphJustificationToString(
32  tesseract::ParagraphJustification justification) {
33  switch (justification) {
34  case JUSTIFICATION_LEFT:
35  return "LEFT";
37  return "RIGHT";
39  return "CENTER";
40  default:
41  return "UNKNOWN";
42  }
43 }
44 
45 bool ParagraphModel::ValidFirstLine(int lmargin, int lindent,
46  int rindent, int rmargin) const {
47  switch (justification_) {
48  case JUSTIFICATION_LEFT:
49  return NearlyEqual(lmargin + lindent, margin_ + first_indent_,
50  tolerance_);
52  return NearlyEqual(rmargin + rindent, margin_ + first_indent_,
53  tolerance_);
55  return NearlyEqual(lindent, rindent, tolerance_ * 2);
56  default:
57  // shouldn't happen
58  return false;
59  }
60 }
61 
62 bool ParagraphModel::ValidBodyLine(int lmargin, int lindent,
63  int rindent, int rmargin) const {
64  switch (justification_) {
65  case JUSTIFICATION_LEFT:
66  return NearlyEqual(lmargin + lindent, margin_ + body_indent_,
67  tolerance_);
69  return NearlyEqual(rmargin + rindent, margin_ + body_indent_,
70  tolerance_);
72  return NearlyEqual(lindent, rindent, tolerance_ * 2);
73  default:
74  // shouldn't happen
75  return false;
76  }
77 }
78 
79 bool ParagraphModel::Comparable(const ParagraphModel &other) const {
80  if (justification_ != other.justification_)
81  return false;
82  if (justification_ == JUSTIFICATION_CENTER ||
83  justification_ == JUSTIFICATION_UNKNOWN)
84  return true;
85  int tolerance = (tolerance_ + other.tolerance_) / 4;
86  return NearlyEqual(margin_ + first_indent_,
87  other.margin_ + other.first_indent_, tolerance) &&
88  NearlyEqual(margin_ + body_indent_,
89  other.margin_ + other.body_indent_, tolerance);
90 }
91 
93  char buffer[200];
94  const STRING &alignment = ParagraphJustificationToString(justification_);
95  snprintf(buffer, sizeof(buffer),
96  "margin: %d, first_indent: %d, body_indent: %d, alignment: %s",
97  margin_, first_indent_, body_indent_, alignment.c_str());
98  return STRING(buffer);
99 }
JUSTIFICATION_UNKNOWN
Definition: capi.h:132
host.h
ParagraphModel::ValidBodyLine
bool ValidBodyLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:62
tesseract::JUSTIFICATION_RIGHT
Definition: publictypes.h:252
NearlyEqual
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:36
ParagraphModel::Comparable
bool Comparable(const ParagraphModel &other) const
Definition: ocrpara.cpp:79
tesseract::ParagraphJustification
ParagraphJustification
Definition: publictypes.h:248
STRING
Definition: strngs.h:45
JUSTIFICATION_LEFT
Definition: capi.h:133
ParagraphModel::ValidFirstLine
bool ValidFirstLine(int lmargin, int lindent, int rindent, int rmargin) const
Definition: ocrpara.cpp:45
ParagraphModel
Definition: ocrpara.h:114
tesseract::JUSTIFICATION_LEFT
Definition: publictypes.h:250
ParagraphModel::tolerance
int tolerance() const
Definition: ocrpara.h:170
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::JUSTIFICATION_UNKNOWN
Definition: publictypes.h:249
ocrpara.h
JUSTIFICATION_CENTER
Definition: capi.h:134
ParagraphModel::ToString
STRING ToString() const
Definition: ocrpara.cpp:92
JUSTIFICATION_RIGHT
Definition: capi.h:135
PARA
Definition: ocrpara.h:29
tesseract::JUSTIFICATION_CENTER
Definition: publictypes.h:251
ELISTIZE
#define ELISTIZE(CLASSNAME)
Definition: elst.h:919