tesseract
5.0.0-alpha-619-ge9db
lm_consistency.cpp
Go to the documentation of this file.
1
// File: lm_consistency.cpp
3
// Description: Struct for recording consistency of the paths representing
4
// OCR hypotheses.
5
// Author: Rika Antonova
6
// Created: Mon Jun 20 11:26:43 PST 2012
7
//
8
// (C) Copyright 2012, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
20
21
#include "
lm_consistency.h
"
22
23
#include "
associate.h
"
24
#include "
dict.h
"
25
#include "
ratngs.h
"
26
27
namespace
tesseract
{
28
29
void
LMConsistencyInfo::ComputeXheightConsistency
(
30
const
BLOB_CHOICE
*b,
bool
is_punc) {
31
if
(
xht_decision
==
XH_INCONSISTENT
)
32
return
;
// It isn't going to get any better.
33
34
// Compute xheight consistency.
35
bool
parent_null =
xht_sp
< 0;
36
int
parent_sp =
xht_sp
;
37
// Debug strings.
38
if
(b->
yshift
() >
LMConsistencyInfo::kShiftThresh
) {
39
xht_sp
=
LMConsistencyInfo::kSUP
;
40
}
else
if
(b->
yshift
() < -
LMConsistencyInfo::kShiftThresh
) {
41
xht_sp
=
LMConsistencyInfo::kSUB
;
42
}
else
{
43
xht_sp
=
LMConsistencyInfo::kNORM
;
44
}
45
xht_count
[
xht_sp
]++;
46
if
(is_punc)
xht_count_punc
[
xht_sp
]++;
47
if
(!parent_null) {
48
xpos_entropy
+= abs(parent_sp -
xht_sp
);
49
}
50
// TODO(eger): Figure out a better way to account for small caps.
51
// For the first character not y-shifted, we only care if it is too small.
52
// Too large is common in drop caps and small caps.
53
// int16_t small_xht = b->min_xheight();
54
// if (parent_vse == nullptr && sp == LanguageModelConsistencyInfo::kNORM) {
55
// small_xht = 0;
56
// }
57
IntersectRange
(b->
min_xheight
(), b->
max_xheight
(),
58
&(
xht_lo
[
xht_sp
]), &(
xht_hi
[
xht_sp
]));
59
60
61
// Compute xheight inconsistency kinds.
62
if
(parent_null) {
63
if
(
xht_count
[
kNORM
] == 1) {
64
xht_decision
=
XH_GOOD
;
65
}
else
{
66
xht_decision
=
XH_SUBNORMAL
;
67
}
68
return
;
69
}
70
71
// When we intersect the ranges of xheights in pixels for all characters in
72
// each position (subscript, normal, superscript),
73
// How much range must be left? 0? [exactly one pixel height for xheight] 1?
74
// TODO(eger): Extend this code to take a prior for the rest of the line.
75
const
int
kMinIntersectedXHeightRange = 0;
76
for
(
int
i = 0; i <
kNumPos
; i++) {
77
if
(
xht_lo
[i] >
xht_hi
[i] - kMinIntersectedXHeightRange) {
78
xht_decision
=
XH_INCONSISTENT
;
79
return
;
80
}
81
}
82
83
// Reject as improbable anything where there's much punctuation in subscript
84
// or superscript regions.
85
if
(
xht_count_punc
[
kSUB
] >
xht_count
[
kSUB
] * 0.4 ||
86
xht_count_punc
[
kSUP
] >
xht_count
[
kSUP
] * 0.4) {
87
xht_decision
=
XH_INCONSISTENT
;
88
return
;
89
}
90
91
// Now check that the subscript and superscript aren't too small relative to
92
// the mainline.
93
auto
mainline_xht = static_cast<double>(
xht_lo
[
kNORM
]);
94
double
kMinSizeRatio = 0.4;
95
if
(mainline_xht > 0.0 &&
96
(static_cast<double>(
xht_hi
[
kSUB
]) / mainline_xht < kMinSizeRatio ||
97
static_cast<double>(
xht_hi
[
kSUP
]) / mainline_xht < kMinSizeRatio)) {
98
xht_decision
=
XH_INCONSISTENT
;
99
return
;
100
}
101
// TODO(eger): Check into inconsistency of super/subscript y offsets.
102
if
(
xpos_entropy
>
kMaxEntropy
) {
103
xht_decision
=
XH_INCONSISTENT
;
104
return
;
105
}
106
if
(
xht_count
[
kSUB
] == 0 &&
xht_count
[
kSUP
] == 0) {
107
xht_decision
=
XH_GOOD
;
108
return
;
109
}
110
xht_decision
=
XH_SUBNORMAL
;
111
}
112
113
}
// namespace tesseract
tesseract::LMConsistencyInfo::kNORM
static const int kNORM
Definition:
lm_consistency.h:50
tesseract::XH_SUBNORMAL
Definition:
dict.h:78
dict.h
BLOB_CHOICE::min_xheight
float min_xheight() const
Definition:
ratngs.h:118
IntersectRange
void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2)
Definition:
helpers.h:143
tesseract::LMConsistencyInfo::kShiftThresh
static const int kShiftThresh
Definition:
lm_consistency.h:43
tesseract::LMConsistencyInfo::xht_count_punc
int16_t xht_count_punc[kNumPos]
Definition:
lm_consistency.h:132
tesseract::XH_GOOD
Definition:
dict.h:78
tesseract::LMConsistencyInfo::xht_sp
int16_t xht_sp
Definition:
lm_consistency.h:133
ratngs.h
lm_consistency.h
tesseract::LMConsistencyInfo::xht_decision
XHeightConsistencyEnum xht_decision
Definition:
lm_consistency.h:123
tesseract::LMConsistencyInfo::kNumPos
static const int kNumPos
Definition:
lm_consistency.h:51
tesseract::LMConsistencyInfo::kMaxEntropy
static const int kMaxEntropy
Definition:
lm_consistency.h:47
tesseract::LMConsistencyInfo::xht_hi
float xht_hi[kNumPos]
Definition:
lm_consistency.h:130
tesseract::LMConsistencyInfo::xht_count
int16_t xht_count[kNumPos]
Definition:
lm_consistency.h:131
tesseract::LMConsistencyInfo::kSUB
static const int kSUB
Definition:
lm_consistency.h:50
tesseract::XH_INCONSISTENT
Definition:
dict.h:78
tesseract::LMConsistencyInfo::xpos_entropy
int16_t xpos_entropy
Definition:
lm_consistency.h:134
BLOB_CHOICE::max_xheight
float max_xheight() const
Definition:
ratngs.h:121
tesseract::LMConsistencyInfo::xht_lo
float xht_lo[kNumPos]
Definition:
lm_consistency.h:129
tesseract
Definition:
baseapi.h:65
BLOB_CHOICE
Definition:
ratngs.h:49
BLOB_CHOICE::yshift
float yshift() const
Definition:
ratngs.h:124
associate.h
tesseract::LMConsistencyInfo::kSUP
static const int kSUP
Definition:
lm_consistency.h:50
tesseract::LMConsistencyInfo::ComputeXheightConsistency
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)
Definition:
lm_consistency.cpp:29
src
wordrec
lm_consistency.cpp
Generated on Thu Jan 30 2020 14:22:21 for tesseract by
1.8.16