tesseract
5.0.0-alpha-619-ge9db
associate.h
Go to the documentation of this file.
1
// File: associate.h
3
// Description: Structs, classes, typedefs useful for the segmentation
4
// search. Functions for scoring segmentation paths according
5
// to their character widths, gap widths and seam cuts.
6
// Author: Daria Antonova
7
// Created: Mon Mar 8 11:26:43 PDT 2010
8
//
9
// (C) Copyright 2010, Google Inc.
10
// Licensed under the Apache License, Version 2.0 (the "License");
11
// you may not use this file except in compliance with the License.
12
// You may obtain a copy of the License at
13
// http://www.apache.org/licenses/LICENSE-2.0
14
// Unless required by applicable law or agreed to in writing, software
15
// distributed under the License is distributed on an "AS IS" BASIS,
16
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
// See the License for the specific language governing permissions and
18
// limitations under the License.
19
//
21
22
#ifndef ASSOCIATE_H
23
#define ASSOCIATE_H
24
25
#include "
blobs.h
"
26
#include "
elst.h
"
27
#include "
ratngs.h
"
28
#include "
seam.h
"
29
#include "
split.h
"
30
31
class
WERD_RES
;
32
33
namespace
tesseract
{
34
35
// Statisitcs about character widths, gaps and seams.
36
struct
AssociateStats
{
37
AssociateStats
() {
Clear
(); }
38
39
void
Clear
() {
40
shape_cost
= 0.0f;
41
bad_shape
=
false
;
42
full_wh_ratio
= 0.0f;
43
full_wh_ratio_total
= 0.0f;
44
full_wh_ratio_var
= 0.0f;
45
bad_fixed_pitch_right_gap
=
false
;
46
bad_fixed_pitch_wh_ratio
=
false
;
47
gap_sum
= 0;
48
}
49
50
void
Print
() {
tprintf
(
"AssociateStats: s(%g %d)\n"
,
shape_cost
,
bad_shape
); }
51
52
float
shape_cost
;
// cost of blob shape
53
bool
bad_shape
;
// true if the shape of the blob is unacceptable
54
float
full_wh_ratio
;
// width-to-hight ratio + gap on the right
55
float
full_wh_ratio_total
;
// sum of width-to-hight ratios
56
// on the path terminating at this blob
57
float
full_wh_ratio_var
;
// variance of full_wh_ratios on the path
58
bool
bad_fixed_pitch_right_gap
;
// true if there is no gap before
59
// the blob on the right
60
bool
bad_fixed_pitch_wh_ratio
;
// true if the blobs has width-to-hight
61
// ratio > kMaxFixedPitchCharAspectRatio
62
int
gap_sum
;
// sum of gaps within the blob
63
};
64
65
// Utility functions for scoring segmentation paths according to their
66
// character widths, gap widths, seam characteristics.
67
class
AssociateUtils
{
68
public
:
69
static
const
float
kMaxFixedPitchCharAspectRatio
;
70
static
const
float
kMinGap
;
71
72
// Returns outline length of the given blob is computed as:
73
// rating_cert_scale * rating / certainty
74
// Since from Wordrec::SegSearch() in segsearch.cpp
75
// rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale
76
// And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp
77
// Rating = Certainty = next.rating
78
// Rating *= rating_scale * Results->BlobLength
79
// Certainty *= -(getDict().certainty_scale)
80
static
inline
float
ComputeOutlineLength
(
float
rating_cert_scale,
81
const
BLOB_CHOICE
&b) {
82
return
rating_cert_scale * b.
rating
() / b.
certainty
();
83
}
84
static
inline
float
ComputeRating
(
float
rating_cert_scale,
85
float
cert,
int
width) {
86
return
static_cast<float>(width) * cert / rating_cert_scale;
87
}
88
89
// Computes character widths, gaps and seams stats given the
90
// AssociateStats of the path so far, col, row of the blob that
91
// is being added to the path, and WERD_RES containing information
92
// about character widths, gaps and seams.
93
// Fills associate_cost with the combined shape, gap and seam cost
94
// of adding a unichar from (col, row) to the path (note that since
95
// this function could be used to compute the prioritization for
96
// pain points, (col, row) entry might not be classified yet; thus
97
// information in the (col, row) entry of the ratings matrix is not used).
98
//
99
// Note: the function assumes that word_res, stats and
100
// associate_cost pointers are not nullptr.
101
static
void
ComputeStats
(
int
col,
int
row,
102
const
AssociateStats
*parent_stats,
103
int
parent_path_length,
104
bool
fixed_pitch,
105
float
max_char_wh_ratio,
106
WERD_RES
*word_res,
107
bool
debug,
108
AssociateStats
*stats);
109
110
// Returns the width cost for fixed-pitch text.
111
static
float
FixedPitchWidthCost
(
float
norm_width,
float
right_gap,
112
bool
end_pos,
float
max_char_wh_ratio);
113
114
// Returns the gap cost for fixed-pitch text (penalizes vertically
115
// overlapping components).
116
static
inline
float
FixedPitchGapCost
(
float
norm_gap,
bool
end_pos) {
117
return
(norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
118
}
119
};
120
121
}
// namespace tesseract
122
123
#endif
elst.h
split.h
tesseract::AssociateUtils::FixedPitchGapCost
static float FixedPitchGapCost(float norm_gap, bool end_pos)
Definition:
associate.h:116
BLOB_CHOICE::certainty
float certainty() const
Definition:
ratngs.h:81
tesseract::AssociateStats::Clear
void Clear()
Definition:
associate.h:39
tesseract::AssociateUtils::ComputeRating
static float ComputeRating(float rating_cert_scale, float cert, int width)
Definition:
associate.h:84
tesseract::AssociateStats::bad_shape
bool bad_shape
Definition:
associate.h:53
tesseract::AssociateUtils::kMaxFixedPitchCharAspectRatio
static const float kMaxFixedPitchCharAspectRatio
Definition:
associate.h:69
WERD_RES
Definition:
pageres.h:160
tesseract::AssociateStats::full_wh_ratio_total
float full_wh_ratio_total
Definition:
associate.h:55
tesseract::AssociateStats::bad_fixed_pitch_right_gap
bool bad_fixed_pitch_right_gap
Definition:
associate.h:58
blobs.h
tesseract::AssociateStats
Definition:
associate.h:36
ratngs.h
tesseract::AssociateUtils::FixedPitchWidthCost
static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
Definition:
associate.cpp:156
tesseract::AssociateStats::full_wh_ratio_var
float full_wh_ratio_var
Definition:
associate.h:57
tesseract
Definition:
baseapi.h:65
tesseract::AssociateStats::full_wh_ratio
float full_wh_ratio
Definition:
associate.h:54
BLOB_CHOICE::rating
float rating() const
Definition:
ratngs.h:78
tesseract::AssociateUtils::ComputeOutlineLength
static float ComputeOutlineLength(float rating_cert_scale, const BLOB_CHOICE &b)
Definition:
associate.h:80
BLOB_CHOICE
Definition:
ratngs.h:49
tesseract::AssociateStats::gap_sum
int gap_sum
Definition:
associate.h:62
tesseract::AssociateUtils
Definition:
associate.h:67
tprintf
DLLSYM void tprintf(const char *format,...)
Definition:
tprintf.cpp:34
tesseract::AssociateUtils::ComputeStats
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
Definition:
associate.cpp:34
seam.h
tesseract::AssociateStats::Print
void Print()
Definition:
associate.h:50
tesseract::AssociateStats::bad_fixed_pitch_wh_ratio
bool bad_fixed_pitch_wh_ratio
Definition:
associate.h:60
tesseract::AssociateUtils::kMinGap
static const float kMinGap
Definition:
associate.h:70
tesseract::AssociateStats::AssociateStats
AssociateStats()
Definition:
associate.h:37
tesseract::AssociateStats::shape_cost
float shape_cost
Definition:
associate.h:52
src
wordrec
associate.h
Generated on Thu Jan 30 2020 14:22:21 for tesseract by
1.8.16