All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ocrclass.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: ocrclass.h
3  * Description: Class definitions and constants for the OCR API.
4  * Author: Hewlett-Packard Co
5  *
6  * (C) Copyright 1996, Hewlett-Packard Co.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 /**********************************************************************
20  * This file contains typedefs for all the structures used by
21  * the HP OCR interface.
22  * The code is designed to be used with either a C or C++ compiler.
23  * The structures are designed to allow them to be used with any
24  * structure alignment upto 8.
25  **********************************************************************/
26 
27 #ifndef CCUTIL_OCRCLASS_H_
28 #define CCUTIL_OCRCLASS_H_
29 
30 #ifndef __GNUC__
31 #ifdef _WIN32
32 #include <windows.h>
33 #include "gettimeofday.h"
34 #endif
35 #else
36 #include <sys/time.h>
37 #endif
38 #include <time.h>
39 #include "host.h"
40 
41 /*Maximum lengths of various strings*/
42 #define MAX_FONT_NAME 34 /*name of font */
43 #define MAX_OCR_NAME 32 /*name of engine */
44 #define MAX_OCR_VERSION 17 /*version code of engine */
45 
46 /*pitch set definitions are identical to RTF*/
47 #define PITCH_DEF 0 /*default */
48 #define PITCH_FIXED 1 /*fixed pitch */
49 #define PITCH_VAR 2 /*variable pitch */
50 
51 /**********************************************************************
52  * EANYCODE_CHAR
53  * Description of a single character. The character code is defined by
54  * the character set of the current font.
55  * Output text is sent as an array of these structures.
56  * Spaces and line endings in the output are represented in the
57  * structures of the surrounding characters. They are not directly
58  * represented as characters.
59  * The first character in a word has a positive value of blanks.
60  * Missing information should be set to the defaults in the comments.
61  * If word bounds are known, but not character bounds, then the top and
62  * bottom of each character should be those of the word. The left of the
63  * first and right of the last char in each word should be set. All other
64  * lefts and rights should be set to -1.
65  * If set, the values of right and bottom are left+width and top+height.
66  * Most of the members come directly from the parameters to ocr_append_char.
67  * The formatting member uses the enhancement parameter and combines the
68  * line direction stuff into the top 3 bits.
69  * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
70  * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
71  * the coding is, only that it is backwards compatible with the previous
72  * version.
73  **********************************************************************/
74 
75 typedef struct { /*single character */
76 // It should be noted that the format for char_code for version 2.0 and beyond
77 // is UTF8 which means that ASCII characters will come out as one structure but
78 // other characters will be returned in two or more instances of this structure
79 // with a single byte of the UTF8 code in each, but each will have the same
80 // bounding box. Programs which want to handle languagues with different
81 // characters sets will need to handle extended characters appropriately, but
82 // *all* code needs to be prepared to receive UTF8 coded characters for
83 // characters such as bullet and fancy quotes.
84  uinT16 char_code; /*character itself */
85  inT16 left; /*of char (-1) */
86  inT16 right; /*of char (-1) */
87  inT16 top; /*of char (-1) */
88  inT16 bottom; /*of char (-1) */
89  inT16 font_index; /*what font (0) */
90  uinT8 confidence; /*0=perfect, 100=reject (0/100) */
91  uinT8 point_size; /*of char, 72=i inch, (10) */
92  inT8 blanks; /*no of spaces before this char (1) */
93  uinT8 formatting; /*char formatting (0) */
94 } EANYCODE_CHAR; /*single character */
95 
96 /**********************************************************************
97  * ETEXT_DESC
98  * Description of the output of the OCR engine.
99  * This structure is used as both a progress monitor and the final
100  * output header, since it needs to be a valid progress monitor while
101  * the OCR engine is storing its output to shared memory.
102  * During progress, all the buffer info is -1.
103  * Progress starts at 0 and increases to 100 during OCR. No other constraint.
104  * Every progress callback, the OCR engine must set ocr_alive to 1.
105  * The HP side will set ocr_alive to 0. Repeated failure to reset
106  * to 1 indicates that the OCR engine is dead.
107  * If the cancel function is not null then it is called with the number of
108  * user words found. If it returns true then operation is cancelled.
109  **********************************************************************/
110 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
111 
112 class ETEXT_DESC { // output header
113  public:
114  inT16 count; // chars in this buffer(0)
115  inT16 progress; // percent complete increasing (0-100)
116  inT8 more_to_come; // true if not last
117  volatile inT8 ocr_alive; // ocr sets to 1, HP 0
118  inT8 err_code; // for errcode use
119  CANCEL_FUNC cancel; // returns true to cancel
120  void* cancel_this; // this or other data for cancel
121  struct timeval end_time; // time to stop. expected to be set only by call
122  // to set_deadline_msecs()
123  EANYCODE_CHAR text[1]; // character data
124 
125  ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
126  err_code(0), cancel(NULL), cancel_this(NULL) {
127  end_time.tv_sec = 0;
128  end_time.tv_usec = 0;
129  }
130 
131  // Sets the end time to be deadline_msecs milliseconds from now.
132  void set_deadline_msecs(inT32 deadline_msecs) {
134  inT32 deadline_secs = deadline_msecs / 1000;
135  end_time.tv_sec += deadline_secs;
136  end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
137  if (end_time.tv_usec > 1000000) {
138  end_time.tv_usec -= 1000000;
139  ++end_time.tv_sec;
140  }
141  }
142 
143  // Returns false if we've not passed the end_time, or have not set a deadline.
144  bool deadline_exceeded() const {
145  if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
146  struct timeval now;
147  gettimeofday(&now, NULL);
148  return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
149  now.tv_usec > end_time.tv_usec));
150  }
151 };
152 
153 #endif // CCUTIL_OCRCLASS_H_
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:132
inT8 blanks
Definition: ocrclass.h:92
volatile inT8 ocr_alive
Definition: ocrclass.h:117
struct timeval end_time
Definition: ocrclass.h:121
void * cancel_this
Definition: ocrclass.h:120
EANYCODE_CHAR text[1]
Definition: ocrclass.h:123
ETEXT_DESC()
Definition: ocrclass.h:125
uinT8 point_size
Definition: ocrclass.h:91
inT16 right
Definition: ocrclass.h:86
inT16 count
Definition: ocrclass.h:114
CANCEL_FUNC cancel
Definition: ocrclass.h:119
bool deadline_exceeded() const
Definition: ocrclass.h:144
inT16 left
Definition: ocrclass.h:85
inT16 font_index
Definition: ocrclass.h:89
inT16 top
Definition: ocrclass.h:87
uinT8 formatting
Definition: ocrclass.h:93
uinT16 char_code
Definition: ocrclass.h:84
inT16 progress
Definition: ocrclass.h:115
#define NULL
Definition: host.h:144
SIGNED char inT8
Definition: host.h:98
uinT8 confidence
Definition: ocrclass.h:90
inT16 bottom
Definition: ocrclass.h:88
int gettimeofday(struct timeval *tp, struct timezone *tzp)
inT8 err_code
Definition: ocrclass.h:118
bool(* CANCEL_FUNC)(void *cancel_this, int words)
Definition: ocrclass.h:110
inT8 more_to_come
Definition: ocrclass.h:116
unsigned short uinT16
Definition: host.h:101
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
unsigned char uinT8
Definition: host.h:99