22 #include "allheaders.h"
39 i_.
ResizeNoInit(width, num_features, GetPadding(num_features));
63 int x_scale,
int y_scale,
int num_features) {
80 for (
int t = 0; t < width; ++t) {
95 if (end_x < full_width) {
98 int fill_size = num_features * (full_width - end_x);
110 if (end_y < full_height) {
114 int fill_size = num_features * full_width * (full_height - end_y);
129 static void ComputeBlackWhite(Pix* pix,
float* black,
float* white) {
130 int width = pixGetWidth(pix);
131 int height = pixGetHeight(pix);
132 STATS mins(0, 256), maxes(0, 256);
135 l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
136 int prev = GET_DATA_BYTE(line, 0);
137 int curr = GET_DATA_BYTE(line, 1);
138 for (
int x = 1; x + 1 < width; ++x) {
139 int next = GET_DATA_BYTE(line, x + 1);
140 if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
144 if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
152 if (mins.get_total() == 0) mins.add(0, 1);
153 if (maxes.get_total() == 0) maxes.add(255, 1);
154 *black = mins.ile(0.25);
155 *white = maxes.ile(0.75);
163 std::vector<const Pix*> pixes(1, pix);
171 const std::vector<const Pix*>& pixes,
173 int target_height = shape.
height();
174 int target_width = shape.
width();
175 std::vector<std::pair<int, int>> h_w_pairs;
176 for (
auto pix : pixes) {
177 Pix* var_pix = const_cast<Pix*>(pix);
178 int width = pixGetWidth(var_pix);
179 if (target_width != 0) width = target_width;
180 int height = pixGetHeight(var_pix);
181 if (target_height != 0) height = target_height;
182 h_w_pairs.emplace_back(height, width);
187 for (
size_t b = 0; b < pixes.size(); ++b) {
188 Pix* pix = const_cast<Pix*>(pixes[b]);
189 float black = 0.0f, white = 255.0f;
190 if (shape.
depth() != 3) ComputeBlackWhite(pix, &black, &white);
191 float contrast = (white - black) / 2.0
f;
192 if (contrast <= 0.0
f) contrast = 1.0f;
193 if (shape.
height() == 1) {
210 int width = pixGetWidth(pix);
211 int height = pixGetHeight(pix);
212 int wpl = pixGetWpl(pix);
219 bool color = num_features == 3;
220 if (width > target_width) width = target_width;
221 uint32_t* line = pixGetData(pix);
222 for (
int y = 0; y < target_height; ++y, line += wpl) {
225 for (x = 0; x < width; ++x, ++t) {
228 for (
int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
229 int pixel = GET_DATA_BYTE(line + x, c);
230 SetPixel(t,
f++, pixel, black, contrast);
233 int pixel = GET_DATA_BYTE(line, x);
234 SetPixel(t, 0, pixel, black, contrast);
238 for (; x < target_width; ++x)
Randomize(t++, 0, num_features, randomizer);
247 float contrast,
TRand* randomizer) {
248 int width = pixGetWidth(pix);
249 int height = pixGetHeight(pix);
251 int wpl = pixGetWpl(pix);
256 if (width > target_width) width = target_width;
258 for (x = 0; x < width; ++x, ++t) {
259 for (
int y = 0; y < height; ++y) {
260 uint32_t* line = pixGetData(pix) + wpl * y;
261 int pixel = GET_DATA_BYTE(line, x);
262 SetPixel(t, y, pixel, black, contrast);
265 for (; x < target_width; ++x)
Randomize(t++, 0, height, randomizer);
276 float float_pixel = (pixel - black) / contrast - 1.0
f;
278 i_[t][
f] = ClipToRange<int>(
IntCastRounded((INT8_MAX + 1) * float_pixel),
279 -INT8_MAX, INT8_MAX);
281 f_[t][
f] = float_pixel;
291 int feature_factor = 1;
292 if (num_features == 3) {
297 Pix* pix = pixCreate(im_width, im_height * num_features, 32);
305 const int8_t* features = i_[t];
306 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
307 int pixel = features[y * feature_factor];
309 int red = ClipToRange<int>(pixel + 128, 0, 255);
310 int green = red, blue = red;
311 if (feature_factor == 3) {
313 green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
314 blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
315 }
else if (num_features > 3) {
318 red = abs(pixel) * 2;
327 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
328 (green << L_GREEN_SHIFT) |
329 (blue << L_BLUE_SHIFT));
332 const float* features = f_[t];
333 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
334 float pixel = features[y * feature_factor];
336 int red = ClipToRange<int>(
IntCastRounded((pixel + 1.0
f) * 127.5
f), 0, 255);
337 int green = red, blue = red;
338 if (feature_factor == 3) {
340 pixel = features[y * feature_factor + 1];
342 pixel = features[y * feature_factor + 2];
344 }
else if (num_features > 3) {
347 red = ClipToRange<int>(
IntCastRounded(fabs(pixel) * 255), 0, 255);
356 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
357 (green << L_GREEN_SHIFT) |
358 (blue << L_BLUE_SHIFT));
368 for (
int y = 0; y < num_features; ++y) {
369 for (
int t = 0; t <
Width(); ++t) {
370 if (num == 0 || t < num || t + num >=
Width()) {
372 tprintf(
" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
386 memcpy(i_[dest_t], src.i_[src_t], i_.
dim2() *
sizeof(i_[0][0]));
388 memcpy(f_[dest_t], src.f_[src_t], f_.
dim2() *
sizeof(f_[0][0]));
395 int src_t,
int src_offset) {
398 memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
399 num_features *
sizeof(i_[0][0]));
401 memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
402 num_features *
sizeof(f_[0][0]));
419 int8_t* line = i_[t] + offset;
420 for (
int i = 0;
i < num_features; ++
i)
424 float* line = f_[t] + offset;
425 for (
int i = 0;
i < num_features; ++
i)
432 int null_ch,
float* rating,
433 float* certainty)
const {
434 if (t_end <= t_start)
return -1;
436 float min_score = 0.0f;
438 if (c == not_this || c == null_ch)
continue;
440 if (max_char < 0 || *rating < min_score) {
451 float* rating,
float* certainty)
const {
455 if (t_end <= t_start || t_end <= 0)
return;
456 float ratings[3] = {0.0f, 0.0f, 0.0f};
457 float certs[3] = {0.0f, 0.0f, 0.0f};
458 for (
int t = t_start; t < t_end; ++t) {
459 const float* line = f_[t];
463 ratings[2] = FLT_MAX;
467 for (
int i = 2;
i >= 1; --
i) {
468 if (ratings[
i] > ratings[
i - 1]) {
469 ratings[
i] = ratings[
i - 1];
470 certs[
i] = certs[
i - 1];
474 if (zero < certs[2]) certs[2] = zero;
476 if (score < certs[1]) certs[1] = score;
479 if (zero < certs[0]) certs[0] = zero;
481 int best_i = ratings[2] < ratings[1] ? 2 : 1;
482 *rating = ratings[best_i] + t_end - t_start;
483 *certainty = certs[best_i];
490 float* score)
const {
493 float best_score = -FLT_MAX;
494 const float* line = f_[t];
495 for (
int i = 0;
i < f_.
dim2(); ++
i) {
496 if (line[
i] > best_score &&
i != not_this &&
i != not_that) {
497 best_score = line[
i];
509 int length = labels.
size();
510 int last_start = end - length;
512 double best_score = 0.0;
513 for (
int s = start; s <= last_start; ++s) {
515 if (score > best_score || best_start < 0) {
527 int length = labels.
size();
529 for (
int i = 0;
i < length; ++
i) {
530 score += f_(start +
i, labels[
i]);
540 float bad_score = (1.0f - ok_score) / (num_classes - 1);
541 float* targets = f_[t];
542 for (
int i = 0;
i < num_classes; ++
i)
543 targets[
i] = bad_score;
544 targets[label] = ok_score;
555 float* targets = f_[t];
556 for (
int c = 0; c < num_classes; ++c) {
558 targets[c] += (1.0 - targets[c]) * (2 / 3.0);
581 for (
int t = 0; t <
Width(); ++t) {
582 const float* features = f_[t];
583 for (
int y = 0; y < num_features; ++y) {
584 float grad = features[y];
585 if (grad < -confidence_thr) {
587 if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
588 (t + 1 ==
Width() || f_[t + 1][y] < confidence_thr / 2)) {
600 const int8_t* line = i_[t];
601 for (
int i = 0;
i < i_.
dim2(); ++
i) {
602 output[
i] = static_cast<double>(line[
i]) / INT8_MAX;
605 const float* line = f_[t];
606 for (
int i = 0;
i < f_.
dim2(); ++
i) {
607 output[
i] = static_cast<double>(line[
i]);
616 const int8_t* line = i_[t];
617 for (
int i = 0;
i < num_features; ++
i) {
618 inout[
i] += static_cast<double>(line[
i]) / INT8_MAX;
621 const float* line = f_[t];
622 for (
int i = 0;
i < num_features; ++
i) {
630 float* inout)
const {
632 const int8_t* line = i_[t] + offset;
633 for (
int i = 0;
i < num_features; ++
i) {
634 inout[
i] += static_cast<float>(line[
i]) / INT8_MAX;
637 const float* line = f_[t] + offset;
638 for (
int i = 0;
i < num_features; ++
i) {
652 const double* input) {
654 int8_t* line = i_[t] + offset;
655 for (
int i = 0;
i < num_features; ++
i) {
657 -INT8_MAX, INT8_MAX);
660 float* line = f_[t] + offset;
661 for (
int i = 0;
i < num_features; ++
i) {
662 line[
i] = static_cast<float>(input[
i]);
673 int8_t* dest_line = i_[dest_t];
674 const int8_t* src_line = src.i_[src_t];
675 for (
int i = 0;
i < dim; ++
i) {
676 if (dest_line[
i] < src_line[
i]) {
677 dest_line[
i] = src_line[
i];
683 float* dest_line = f_[dest_t];
684 const float* src_line = src.f_[src_t];
685 for (
int i = 0;
i < dim; ++
i) {
686 if (dest_line[
i] < src_line[
i]) {
687 dest_line[
i] = src_line[
i];
702 const int* max_line = maxes[t];
703 const float* fwd_line = fwd.f_[t];
704 int num_features = fwd.f_.
dim2();
705 for (
int i = 0;
i < num_features; ++
i) {
706 f_[max_line[
i]][
i] = fwd_line[
i];
713 float min_max = 0.0f;
716 for (
int t = 0; t < width; ++t) {
717 float max_value = -FLT_MAX;
719 const int8_t* column = i_[t];
720 for (
int i = 0;
i < num_features; ++
i) {
721 if (column[
i] > max_value) max_value = column[
i];
724 const float* column = f_[t];
725 for (
int i = 0;
i < num_features; ++
i) {
726 if (column[
i] > max_value) max_value = column[
i];
729 if (t == 0 || max_value < min_max) min_max = max_value;
744 for (
int t = 0; t < width; ++t) {
745 int8_t* out_line = i_[t];
746 const int8_t* base_line = base_output.i_[t];
747 const int8_t* comb_line = combiner_output.i_[t];
748 float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;
749 float boost_weight = 1.0f - base_weight;
750 for (
int i = 0;
i < no; ++
i) {
752 comb_line[
i] * boost_weight);
756 for (
int t = 0; t < width; ++t) {
757 float* out_line = f_[t];
758 const float* base_line = base_output.f_[t];
759 const float* comb_line = combiner_output.f_[t];
760 float base_weight = comb_line[no];
761 float boost_weight = 1.0f - base_weight;
762 for (
int i = 0;
i < no; ++
i) {
763 out_line[
i] = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
779 for (
int t = 0; t < width; ++t) {
780 const float* delta_line = fwd_deltas.f_[t];
781 const float* base_line = base_output.f_[t];
782 float* comb_line = f_[t];
783 float base_weight = comb_line[no];
784 float boost_weight = 1.0f - base_weight;
785 float max_base_delta = 0.0;
786 for (
int i = 0;
i < no; ++
i) {
788 float output = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
790 float comb_target = delta_line[
i] + output;
791 comb_line[
i] = comb_target - comb_line[
i];
792 float base_delta = fabs(comb_target - base_line[
i]);
793 if (base_delta > max_base_delta) max_base_delta = base_delta;
795 if (max_base_delta >= 0.5) {
798 comb_line[no] = 0.0 - base_weight;
801 for (
int i = 0;
i < no; ++
i) {
803 if (comb_line[
i] > 0.0) comb_line[
i] -= 1.0;
805 comb_line[no] = 1.0 - base_weight;
836 float src_max = src.f_.
MaxAbs();
838 float scale_max = scale.f_.
MaxAbs();
840 if (src_max > 0.0
f) {
841 float factor = scale_max / src_max;
842 for (
int t = 0; t < src.
Width(); ++t) {
843 const float* src_ptr = src.f_[t];
844 float* dest_ptr = f_[t];
845 for (
int i = 0;
i < src.f_.
dim2(); ++
i) dest_ptr[
i] = src_ptr[
i] * factor;
855 Resize(src, num_features);
863 int fwd_t = fwd_index.
t();
864 int rev_t = rev_index.
t();
874 Resize(src, num_features);
893 stride_map_ = src.stride_map_;
910 }
while (src_b_index.AddOffset(1,
FD_BATCH) &&
919 int width = src.
Width();
924 for (
int t = 0; t < width; ++t) {
925 memcpy(i_[t] + feature_offset, src.i_[t],
926 num_features *
sizeof(i_[t][0]));
928 for (
int t = width; t < i_.
dim1(); ++t) {
929 memset(i_[t], 0, num_features *
sizeof(i_[t][0]));
932 for (
int t = 0; t < width; ++t) {
933 memcpy(f_[t] + feature_offset, src.f_[t],
934 num_features *
sizeof(f_[t][0]));
936 for (
int t = width; t < f_.
dim1(); ++t) {
937 memset(f_[t], 0, num_features *
sizeof(f_[t][0]));
940 return num_features + feature_offset;
947 Resize(src, num_features);
948 int width = src.
Width();
951 for (
int t = 0; t < width; ++t) {
952 memcpy(i_[t], src.i_[t] + feature_offset,
953 num_features *
sizeof(i_[t][0]));
956 for (
int t = 0; t < width; ++t) {
957 memcpy(f_[t], src.f_[t] + feature_offset,
958 num_features *
sizeof(f_[t][0]));
967 for (
int t = 0; t < width; ++t)
dest->WriteStrided(t, f_[t]);
975 for (
int i = 0;
i < dim; ++
i)
976 v[
i] = ClipToRange<float>(v[
i], -range, range);
982 int NetworkIO::GetPadding(
int num_features) {