tesseract
5.0.0-alpha-619-ge9db
|
#include <algorithm>
#include <cmath>
#include <memory>
#include <tesseract/osdetect.h>
#include "blobbox.h"
#include "blread.h"
#include "colfind.h"
#include "fontinfo.h"
#include "imagefind.h"
#include "linefind.h"
#include "oldlist.h"
#include "qrsequence.h"
#include "ratngs.h"
#include <tesseract/strngs.h>
#include "tabvector.h"
#include "tesseractclass.h"
#include "textord.h"
Go to the source code of this file.
|
int | orientation_and_script_detection (STRING &filename, OSResults *osr, tesseract::Tesseract *tess) |
|
int | os_detect (TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess) |
|
int | os_detect_blobs (const GenericVector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess) |
|
bool | os_detect_blob (BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *osr, tesseract::Tesseract *tess) |
|
int | OrientationIdToValue (const int &id) |
|
◆ orientation_and_script_detection()
Definition at line 190 of file osdetect.cpp.
197 lastdot = strrchr(name.
c_str(),
'.');
198 if (lastdot !=
nullptr)
199 name[lastdot-name.
c_str()] =
'\0';
203 int height = pixGetHeight(tess->
pix_binary());
210 TO_BLOCK_LIST land_blocks, port_blocks;
211 remove_nontext_regions(tess, &blocks, &port_blocks);
213 if (port_blocks.empty()) {
216 &blocks, &port_blocks);
227 return os_detect(&port_blocks, osr, tess);
◆ OrientationIdToValue()
int OrientationIdToValue |
( |
const int & |
id | ) |
|
◆ os_detect()
Definition at line 233 of file osdetect.cpp.
236 TO_BLOCK_IT block_it;
237 block_it.set_to_list(port_blocks);
239 BLOBNBOX_CLIST filtered_list;
240 BLOBNBOX_C_IT filtered_it(&filtered_list);
242 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
243 block_it.forward ()) {
244 TO_BLOCK* to_block = block_it.data();
248 bbox_it.set_to_list(&to_block->
blobs);
249 for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list ();
250 bbox_it.forward ()) {
257 if (box.
width() == 0)
continue;
259 float y_x = std::fabs((box.
height() * 1.0f) / box.
width());
260 float x_y = 1.0f / y_x;
262 float ratio = x_y > y_x ? x_y : y_x;
266 filtered_it.add_to_end(bbox);
◆ os_detect_blob()
Definition at line 329 of file osdetect.cpp.
337 FCOORD current_rotation(1.0f, 0.0f);
338 FCOORD rotation90(0.0f, 1.0f);
339 BLOB_CHOICE_LIST ratings[4];
341 for (
int i = 0; i < 4; ++i) {
346 float x_origin = (box.
left() + box.
right()) / 2.0f;
347 float y_origin = (box.
bottom() + box.
top()) / 2.0f;
348 if (i == 0 || i == 2) {
350 y_origin = i == 0 ? box.
bottom() : box.
top();
354 x_origin = i == 1 ? box.
left() : box.
right();
356 std::unique_ptr<TBLOB> rotated_blob(
new TBLOB(*tblob));
357 rotated_blob->Normalize(
nullptr, ¤t_rotation,
nullptr,
358 x_origin, y_origin, scaling, scaling,
362 current_rotation.rotate(rotation90);
369 stop = s->
must_stop(orientation) && stop;
◆ os_detect_blobs()
Definition at line 278 of file osdetect.cpp.
283 int maxCharactersToTry = 5 * minCharactersToTry;
291 BLOBNBOX_C_IT filtered_it(blob_list);
292 int real_max = std::min(filtered_it.length(), maxCharactersToTry);
298 if (real_max < minCharactersToTry / 2) {
299 tprintf(
"Too few characters. Skipping this page\n");
303 auto** blobs =
new BLOBNBOX*[filtered_it.length()];
304 int number_of_blobs = 0;
305 for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list ();
306 filtered_it.forward ()) {
307 blobs[number_of_blobs++] = filtered_it.data();
310 int num_blobs_evaluated = 0;
311 for (
int i = 0; i < real_max; ++i) {
313 && i > minCharactersToTry) {
316 ++num_blobs_evaluated;
321 int orientation = o.get_orientation();
323 return num_blobs_evaluated;
◆ kHanRatioInJapanese
const float kHanRatioInJapanese = 0.3 |
◆ kHanRatioInKorean
const float kHanRatioInKorean = 0.7 |
◆ kMinAcceptableBlobHeight
const int kMinAcceptableBlobHeight = 10 |
◆ kNonAmbiguousMargin
const float kNonAmbiguousMargin = 1.0 |
◆ kScriptAcceptRatio
const float kScriptAcceptRatio = 1.3 |
◆ kSizeRatioToReject
const float kSizeRatioToReject = 2.0 |
bool poly_allow_detailed_fx
TBOX bounding_box() const
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *osr, tesseract::Tesseract *tess)
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess)
const float kSizeRatioToReject
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
PDBLK pdblk
Page Description Block.
const char * c_str() const
POLY_BLOCK * poly_block() const
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Textord * mutable_textord()
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
const ICOORD & topright() const
bool read_unlv_file(STRING name, int32_t xsize, int32_t ysize, BLOCK_LIST *blocks)
void detect_blob(BLOB_CHOICE_LIST *scores)
TBOX bounding_box() const
int min_characters_to_try
void update_best_script(int orientation_id)
DLLSYM void tprintf(const char *format,...)
bool detect_blob(BLOB_CHOICE_LIST *scores)
bool must_stop(int orientation)
int os_detect_blobs(const GenericVector< int > *allowed_scripts, BLOBNBOX_CLIST *blob_list, OSResults *osr, tesseract::Tesseract *tess)
const int kBlnBaselineOffset
const int kMinAcceptableBlobHeight
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
void FullPageBlock(int width, int height, BLOCK_LIST *blocks)