27 #define MAX_LINE_LEN 1024
54 if (label32_ !=
NULL) {
63 if (label32_ !=
NULL) {
72 if (label32_ !=
NULL) {
78 SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
86 unsigned short first_char;
87 unsigned short last_char;
88 unsigned short norm_top;
89 unsigned short norm_bottom;
90 unsigned short norm_aspect_ratio;
96 if (fp->
Read(&val32,
sizeof(val32)) !=
sizeof(val32)) {
99 if (val32 != 0xabd0fefe) {
103 if (fp->
Read(&val32,
sizeof(val32)) !=
sizeof(val32)) {
108 label32 =
new char_32[val32 + 1];
109 if (label32 ==
NULL) {
113 if (fp->
Read(label32, val32 *
sizeof(*label32)) !=
114 (val32 *
sizeof(*label32))) {
123 if (fp->
Read(&page,
sizeof(page)) !=
sizeof(page)) {
126 if (fp->
Read(&left,
sizeof(left)) !=
sizeof(left)) {
129 if (fp->
Read(&top,
sizeof(top)) !=
sizeof(top)) {
132 if (fp->
Read(&first_char,
sizeof(first_char)) !=
sizeof(first_char)) {
135 if (fp->
Read(&last_char,
sizeof(last_char)) !=
sizeof(last_char)) {
138 if (fp->
Read(&norm_top,
sizeof(norm_top)) !=
sizeof(norm_top)) {
141 if (fp->
Read(&norm_bottom,
sizeof(norm_bottom)) !=
sizeof(norm_bottom)) {
144 if (fp->
Read(&norm_aspect_ratio,
sizeof(norm_aspect_ratio)) !=
145 sizeof(norm_aspect_ratio)) {
150 if (char_samp ==
NULL) {
154 char_samp->label32_ = label32;
155 char_samp->page_ = page;
156 char_samp->left_ = left;
157 char_samp->top_ = top;
158 char_samp->first_char_ = first_char;
159 char_samp->last_char_ = last_char;
160 char_samp->norm_top_ = norm_top;
161 char_samp->norm_bottom_ = norm_bottom;
162 char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
176 unsigned short first_char;
177 unsigned short last_char;
178 unsigned short norm_top;
179 unsigned short norm_bottom;
180 unsigned short norm_aspect_ratio;
185 if (fread(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
188 if (val32 != 0xabd0fefe) {
192 if (fread(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
197 label32 =
new char_32[val32 + 1];
198 if (label32 ==
NULL) {
202 if (fread(label32, 1, val32 *
sizeof(*label32), fp) !=
203 (val32 *
sizeof(*label32))) {
213 if (fread(&page, 1,
sizeof(page), fp) !=
sizeof(page) ||
214 fread(&left, 1,
sizeof(left), fp) !=
sizeof(left) ||
215 fread(&top, 1,
sizeof(top), fp) !=
sizeof(top) ||
216 fread(&first_char, 1,
sizeof(first_char), fp) !=
sizeof(first_char) ||
217 fread(&last_char, 1,
sizeof(last_char), fp) !=
sizeof(last_char) ||
218 fread(&norm_top, 1,
sizeof(norm_top), fp) !=
sizeof(norm_top) ||
219 fread(&norm_bottom, 1,
sizeof(norm_bottom), fp) !=
sizeof(norm_bottom) ||
220 fread(&norm_aspect_ratio, 1,
sizeof(norm_aspect_ratio), fp) !=
221 sizeof(norm_aspect_ratio)) {
227 if (char_samp ==
NULL) {
232 char_samp->label32_ = label32;
233 char_samp->page_ = page;
234 char_samp->left_ = left;
235 char_samp->top_ = top;
236 char_samp->first_char_ = first_char;
237 char_samp->last_char_ = last_char;
238 char_samp->norm_top_ = norm_top;
239 char_samp->norm_bottom_ = norm_bottom;
240 char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
253 if (scaled_samp ==
NULL) {
256 if (scaled_samp->
ScaleFrom(
this, isotropic) ==
false) {
260 scaled_samp->left_ = left_;
261 scaled_samp->top_ = top_;
262 scaled_samp->page_ = page_;
264 scaled_samp->first_char_ = first_char_;
265 scaled_samp->last_char_ = last_char_;
266 scaled_samp->norm_top_ = norm_top_;
267 scaled_samp->norm_bottom_ = norm_bottom_;
268 scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
274 unsigned char *data) {
277 if (char_samp ==
NULL) {
292 if (fwrite(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
297 if (fwrite(&val32, 1,
sizeof(val32), fp) !=
sizeof(val32)) {
301 if (label32_ !=
NULL) {
302 if (fwrite(label32_, 1, val32 *
sizeof(*label32_), fp) !=
303 (val32 *
sizeof(*label32_))) {
308 if (fwrite(&page_, 1,
sizeof(page_), fp) !=
sizeof(page_)) {
311 if (fwrite(&left_, 1,
sizeof(left_), fp) !=
sizeof(left_)) {
314 if (fwrite(&top_, 1,
sizeof(top_), fp) !=
sizeof(top_)) {
317 if (fwrite(&first_char_, 1,
sizeof(first_char_), fp) !=
318 sizeof(first_char_)) {
321 if (fwrite(&last_char_, 1,
sizeof(last_char_), fp) !=
sizeof(last_char_)) {
324 if (fwrite(&norm_top_, 1,
sizeof(norm_top_), fp) !=
sizeof(norm_top_)) {
327 if (fwrite(&norm_bottom_, 1,
sizeof(norm_bottom_), fp) !=
328 sizeof(norm_bottom_)) {
331 if (fwrite(&norm_aspect_ratio_, 1,
sizeof(norm_aspect_ratio_), fp) !=
332 sizeof(norm_aspect_ratio_)) {
350 int cropped_left = 0;
352 int cropped_wid =
wid_;
353 int cropped_hgt =
hgt_;
355 &cropped_wid, &cropped_hgt);
357 if (cropped_wid == 0 || cropped_hgt == 0) {
363 cropped_wid, cropped_hgt);
371 cropped_wid / (cropped_wid + cropped_hgt));
376 Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
383 int max_hist_wnd,
int min_con_comp_size)
const {
390 if (concomp_cnt <= 0 || !concomp_array) {
392 delete []concomp_array;
397 for (
int concomp = 0; concomp < concomp_cnt; concomp++) {
398 int concomp_seg_cnt = 0;
402 concomp_array[concomp]->
Segment(max_hist_wnd, &concomp_seg_cnt);
404 if (concomp_alloc_seg ==
NULL) {
406 concomp_seg_array = concomp_array + concomp;
409 concomp_seg_array = concomp_alloc_seg;
410 delete concomp_array[concomp];
413 for (
int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
415 if (concomp_seg_array[seg_idx]->
Width() < 2 &&
416 concomp_seg_array[seg_idx]->
Height() < 2) {
417 delete concomp_seg_array[seg_idx];
424 if (temp_segm_array ==
NULL) {
425 fprintf(stderr,
"Cube ERROR (CharSamp::Segment): could not "
426 "allocate additional connected components\n");
427 delete []concomp_seg_array;
428 delete []concomp_array;
433 memcpy(temp_segm_array, seg_array, seg_cnt *
sizeof(*seg_array));
436 seg_array = temp_segm_array;
438 seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
441 if (concomp_alloc_seg !=
NULL) {
442 delete []concomp_alloc_seg;
445 delete []concomp_array;
448 if (seg_cnt > 0 && seg_array !=
NULL) {
449 qsort(seg_array, seg_cnt,
sizeof(*seg_array), right_2_left ?
452 (*segment_cnt) = seg_cnt;
458 int seg_flags_size,
int *seg_flags,
459 bool *left_most,
bool *right_most,
464 end_concomp = strt_concomp + seg_flags_size;
469 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
470 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
472 min_id = concomp_array[concomp]->
ID();
473 max_id = concomp_array[concomp]->
ID();
476 UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
481 if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
485 int id_cnt = max_id - min_id + 1;
486 bool *id_exist =
new bool[id_cnt];
487 bool *left_most_exist =
new bool[id_cnt];
488 bool *right_most_exist =
new bool[id_cnt];
489 if (!id_exist || !left_most_exist || !right_most_exist)
491 memset(id_exist, 0, id_cnt *
sizeof(*id_exist));
492 memset(left_most_exist, 0, id_cnt *
sizeof(*left_most_exist));
493 memset(right_most_exist, 0, id_cnt *
sizeof(*right_most_exist));
501 int unq_left_most = 0;
502 int unq_right_most = 0;
503 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
504 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
506 left = concomp_array[concomp]->
Left();
507 right = concomp_array[concomp]->
Right();
508 top = concomp_array[concomp]->
Top();
509 bottom = concomp_array[concomp]->
Bottom();
513 concomp_array[concomp]->
Right(), &left, &right);
515 concomp_array[concomp]->
Bottom(), &top, &bottom);
518 int concomp_id = concomp_array[concomp]->
ID() - min_id;
519 if (!id_exist[concomp_id]) {
520 id_exist[concomp_id] =
true;
523 if (concomp_array[concomp]->LeftMost()) {
524 if (left_most_exist[concomp_id] ==
false) {
525 left_most_exist[concomp_id] =
true;
529 if (concomp_array[concomp]->RightMost()) {
530 if (right_most_exist[concomp_id] ==
false) {
531 right_most_exist[concomp_id] =
true;
538 delete []left_most_exist;
539 delete []right_most_exist;
540 if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
543 (*left_most) = (unq_left_most >= unq_ids);
544 (*right_most) = (unq_right_most >= unq_ids);
546 CharSamp *samp =
new CharSamp(left, top, right - left + 1, bottom - top + 1);
552 for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
553 if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
556 samp->
line_buff_[pt_ptr->
y() - top][pt_ptr->
x() - left] = 0;
557 pt_ptr = pt_ptr->
Next();
583 unsigned char *raw_data = *raw_data_ptr;
586 memcpy(&val32, raw_data,
sizeof(val32));
587 raw_data +=
sizeof(val32);
588 if (val32 != 0xabd0fefe) {
592 memcpy(&val32, raw_data,
sizeof(val32));
593 raw_data +=
sizeof(val32);
596 label32 =
new char_32[val32 + 1];
597 if (label32 ==
NULL) {
601 memcpy(label32, raw_data, val32 *
sizeof(*label32));
602 raw_data += (val32 *
sizeof(*label32));
611 if (char_samp ==
NULL) {
616 char_samp->label32_ = label32;
617 memcpy(&char_samp->page_, raw_data,
sizeof(char_samp->page_));
618 raw_data +=
sizeof(char_samp->page_);
619 memcpy(&char_samp->left_, raw_data,
sizeof(char_samp->left_));
620 raw_data +=
sizeof(char_samp->left_);
621 memcpy(&char_samp->top_, raw_data,
sizeof(char_samp->top_));
622 raw_data +=
sizeof(char_samp->top_);
623 memcpy(&char_samp->first_char_, raw_data,
sizeof(char_samp->first_char_));
624 raw_data +=
sizeof(char_samp->first_char_);
625 memcpy(&char_samp->last_char_, raw_data,
sizeof(char_samp->last_char_));
626 raw_data +=
sizeof(char_samp->last_char_);
627 memcpy(&char_samp->norm_top_, raw_data,
sizeof(char_samp->norm_top_));
628 raw_data +=
sizeof(char_samp->norm_top_);
629 memcpy(&char_samp->norm_bottom_, raw_data,
sizeof(char_samp->norm_bottom_));
630 raw_data +=
sizeof(char_samp->norm_bottom_);
631 memcpy(&char_samp->norm_aspect_ratio_, raw_data,
632 sizeof(char_samp->norm_aspect_ratio_));
633 raw_data +=
sizeof(char_samp->norm_aspect_ratio_);
641 (*raw_data_ptr) = raw_data;
648 CharSamp *scaled_bmp =
Scale(conv_grid_size, conv_grid_size);
653 unsigned char *buff = scaled_bmp->
RawData();
656 int bmp_size = conv_grid_size * conv_grid_size;
657 for (input = 0; input < bmp_size; input++) {
658 features[input] = 255.0f - (1.0f * buff[input]);
unsigned short Right() const
bool LoadFromCharDumpFile(CachedFile *fp)
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
unsigned short NormAspectRatio() const
static const int kConCompAllocChunk
void SetNormBottom(unsigned short norm_bottom)
unsigned char * RawData() const
bool LoadFromRawData(unsigned char *data)
basic_string< char_32 > string_32
unsigned short Left() const
static CharSamp * FromCharDumpFile(CachedFile *fp)
static int Left2RightComparer(const void *comp1, const void *comp2)
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
unsigned short Bottom() const
unsigned short Width() const
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
static int Right2LeftComparer(const void *comp1, const void *comp2)
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
void SetLastChar(unsigned short last_char)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
void SetFirstChar(unsigned short first_char)
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
bool SaveBmp2CharDumpFile(FILE *fp) const
string stringLabel() const
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
unsigned short NormBottom() const
unsigned short NormTop() const
unsigned short LastChar() const
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
unsigned short Top() const
void SetLabel(char_32 label)
bool Save2CharDumpFile(FILE *fp) const
unsigned char ** line_buff_
ConComp ** Segment(int max_hist_wnd, int *concomp_cnt)
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
bool ScaleFrom(Bmp8 *bmp, bool isotropic=true)
bool ComputeFeatures(int conv_grid_size, float *features)
unsigned short Height() const
void SetNormTop(unsigned short norm_top)
int Read(void *read_buff, int bytes)
unsigned short FirstChar() const