tesseract
5.0.0-alpha-619-ge9db
unicharmap.h
Go to the documentation of this file.
1
// File: unicharmap.h
3
// Description: Unicode character/ligature to integer id class.
4
// Author: Thomas Kielbus
5
// Created: Wed Jun 28 17:05:01 PDT 2006
6
//
7
// (C) Copyright 2006, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
19
20
#ifndef TESSERACT_CCUTIL_UNICHARMAP_H_
21
#define TESSERACT_CCUTIL_UNICHARMAP_H_
22
23
#include <
tesseract/unichar.h
>
24
25
// A UNICHARMAP stores unique unichars. Each of them is associated with one
26
// UNICHAR_ID.
27
class
UNICHARMAP
{
28
public
:
29
30
// Create an empty UNICHARMAP
31
UNICHARMAP
();
32
33
~UNICHARMAP
();
34
35
// Insert the given unichar represention in the UNICHARMAP and associate it
36
// with the given id. The length of the representation MUST be non-zero.
37
void
insert
(
const
char
*
const
unichar_repr,
UNICHAR_ID
id
);
38
39
// Return the id associated with the given unichar representation,
40
// this representation MUST exist within the UNICHARMAP. The first
41
// length characters (maximum) from unichar_repr are used. The length
42
// MUST be non-zero.
43
UNICHAR_ID
unichar_to_id
(
const
char
*
const
unichar_repr,
int
length)
const
;
44
45
// Return true if the given unichar representation is already present in the
46
// UNICHARMAP. The first length characters (maximum) from unichar_repr are
47
// used. The length MUST be non-zero.
48
bool
contains
(
const
char
*
const
unichar_repr,
int
length)
const
;
49
50
// Return the minimum number of characters that must be used from this string
51
// to obtain a match in the UNICHARMAP.
52
int
minmatch
(
const
char
*
const
unichar_repr)
const
;
53
54
// Clear the UNICHARMAP. All previous data is lost.
55
void
clear
();
56
57
private
:
58
59
// The UNICHARMAP is represented as a tree whose nodes are of type
60
// UNICHARMAP_NODE.
61
struct
UNICHARMAP_NODE {
62
63
UNICHARMAP_NODE();
64
~UNICHARMAP_NODE();
65
66
UNICHARMAP_NODE* children;
67
UNICHAR_ID
id;
68
};
69
70
UNICHARMAP_NODE* nodes;
71
};
72
73
#endif // TESSERACT_CCUTIL_UNICHARMAP_H_
UNICHARMAP::contains
bool contains(const char *const unichar_repr, int length) const
Definition:
unicharmap.cpp:79
UNICHARMAP::clear
void clear()
Definition:
unicharmap.cpp:115
UNICHARMAP
Definition:
unicharmap.h:27
UNICHARMAP::unichar_to_id
UNICHAR_ID unichar_to_id(const char *const unichar_repr, int length) const
Definition:
unicharmap.cpp:34
UNICHARMAP::minmatch
int minmatch(const char *const unichar_repr) const
Definition:
unicharmap.cpp:100
UNICHAR_ID
int UNICHAR_ID
Definition:
unichar.h:36
UNICHARMAP::UNICHARMAP
UNICHARMAP()
Definition:
unicharmap.cpp:23
UNICHARMAP::~UNICHARMAP
~UNICHARMAP()
Definition:
unicharmap.cpp:27
unichar.h
UNICHARMAP::insert
void insert(const char *const unichar_repr, UNICHAR_ID id)
Definition:
unicharmap.cpp:56
src
ccutil
unicharmap.h
Generated on Thu Jan 30 2020 14:22:20 for tesseract by
1.8.16