22 #include "allheaders.h" 44 i_.
ResizeNoInit(width, num_features, GetPadding(num_features));
68 int x_scale,
int y_scale,
int num_features) {
85 for (
int t = 0; t < width; ++t) {
100 if (end_x < full_width) {
103 int fill_size = num_features * (full_width - end_x);
115 if (end_y < full_height) {
119 int fill_size = num_features * full_width * (full_height - end_y);
134 static void ComputeBlackWhite(Pix* pix,
float* black,
float* white) {
135 int width = pixGetWidth(pix);
136 int height = pixGetHeight(pix);
137 STATS mins(0, 256), maxes(0, 256);
140 l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
141 int prev = GET_DATA_BYTE(line, 0);
142 int curr = GET_DATA_BYTE(line, 1);
143 for (
int x = 1; x + 1 < width; ++x) {
144 int next = GET_DATA_BYTE(line, x + 1);
145 if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
149 if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
157 if (mins.get_total() == 0) mins.add(0, 1);
158 if (maxes.get_total() == 0) maxes.add(255, 1);
159 *black = mins.ile(0.25);
160 *white = maxes.ile(0.75);
168 std::vector<const Pix*> pixes(1, pix);
176 const std::vector<const Pix*>& pixes,
178 int target_height = shape.
height();
179 int target_width = shape.
width();
180 std::vector<std::pair<int, int>> h_w_pairs;
181 for (
auto pix : pixes) {
182 Pix* var_pix =
const_cast<Pix*
>(pix);
183 int width = pixGetWidth(var_pix);
184 if (target_width != 0) width = target_width;
185 int height = pixGetHeight(var_pix);
186 if (target_height != 0) height = target_height;
187 h_w_pairs.emplace_back(height, width);
192 for (
size_t b = 0; b < pixes.size(); ++b) {
193 Pix* pix =
const_cast<Pix*
>(pixes[b]);
194 float black = 0.0f, white = 255.0f;
195 if (shape.
depth() != 3) ComputeBlackWhite(pix, &black, &white);
196 float contrast = (white - black) / 2.0
f;
197 if (contrast <= 0.0
f) contrast = 1.0f;
198 if (shape.
height() == 1) {
215 int width = pixGetWidth(pix);
216 int height = pixGetHeight(pix);
217 int wpl = pixGetWpl(pix);
224 bool color = num_features == 3;
225 if (width > target_width) width = target_width;
226 uint32_t* line = pixGetData(pix);
227 for (
int y = 0; y < target_height; ++y, line += wpl) {
230 for (x = 0; x < width; ++x, ++t) {
233 for (
int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
234 int pixel = GET_DATA_BYTE(line + x, c);
235 SetPixel(t,
f++, pixel, black, contrast);
238 int pixel = GET_DATA_BYTE(line, x);
239 SetPixel(t, 0, pixel, black, contrast);
243 for (; x < target_width; ++x)
Randomize(t++, 0, num_features, randomizer);
252 float contrast,
TRand* randomizer) {
253 int width = pixGetWidth(pix);
254 int height = pixGetHeight(pix);
256 int wpl = pixGetWpl(pix);
261 if (width > target_width) width = target_width;
263 for (x = 0; x < width; ++x, ++t) {
264 for (
int y = 0; y < height; ++y) {
265 uint32_t* line = pixGetData(pix) + wpl * y;
266 int pixel = GET_DATA_BYTE(line, x);
267 SetPixel(t, y, pixel, black, contrast);
270 for (; x < target_width; ++x)
Randomize(t++, 0, height, randomizer);
281 float float_pixel = (pixel - black) / contrast - 1.0
f;
283 i_[t][
f] = ClipToRange<int>(
IntCastRounded((INT8_MAX + 1) * float_pixel),
284 -INT8_MAX, INT8_MAX);
286 f_[t][
f] = float_pixel;
296 int feature_factor = 1;
297 if (num_features == 3) {
302 Pix* pix = pixCreate(im_width, im_height * num_features, 32);
310 const int8_t* features = i_[t];
311 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
312 int pixel = features[y * feature_factor];
314 int red = ClipToRange<int>(pixel + 128, 0, 255);
315 int green = red, blue = red;
316 if (feature_factor == 3) {
318 green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
319 blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
320 }
else if (num_features > 3) {
323 red = abs(pixel) * 2;
332 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
333 (green << L_GREEN_SHIFT) |
334 (blue << L_BLUE_SHIFT));
337 const float* features = f_[t];
338 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
339 float pixel = features[y * feature_factor];
341 int red = ClipToRange<int>(
IntCastRounded((pixel + 1.0
f) * 127.5
f), 0, 255);
342 int green = red, blue = red;
343 if (feature_factor == 3) {
345 pixel = features[y * feature_factor + 1];
347 pixel = features[y * feature_factor + 2];
349 }
else if (num_features > 3) {
352 red = ClipToRange<int>(
IntCastRounded(fabs(pixel) * 255), 0, 255);
361 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
362 (green << L_GREEN_SHIFT) |
363 (blue << L_BLUE_SHIFT));
373 for (
int y = 0; y < num_features; ++y) {
374 for (
int t = 0; t <
Width(); ++t) {
375 if (num == 0 || t < num || t + num >=
Width()) {
377 tprintf(
" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
391 memcpy(i_[dest_t], src.i_[src_t], i_.
dim2() *
sizeof(i_[0][0]));
393 memcpy(f_[dest_t], src.f_[src_t], f_.
dim2() *
sizeof(f_[0][0]));
400 int src_t,
int src_offset) {
403 memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
404 num_features *
sizeof(i_[0][0]));
406 memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
407 num_features *
sizeof(f_[0][0]));
424 int8_t* line = i_[t] + offset;
425 for (
int i = 0;
i < num_features; ++
i)
429 float* line = f_[t] + offset;
430 for (
int i = 0;
i < num_features; ++
i)
437 int null_ch,
float* rating,
438 float* certainty)
const {
439 if (t_end <= t_start)
return -1;
441 float min_score = 0.0f;
443 if (c == not_this || c == null_ch)
continue;
445 if (max_char < 0 || *rating < min_score) {
456 float* rating,
float* certainty)
const {
460 if (t_end <= t_start || t_end <= 0)
return;
461 float ratings[3] = {0.0f, 0.0f, 0.0f};
462 float certs[3] = {0.0f, 0.0f, 0.0f};
463 for (
int t = t_start; t < t_end; ++t) {
464 const float* line = f_[t];
468 ratings[2] = FLT_MAX;
472 for (
int i = 2;
i >= 1; --
i) {
473 if (ratings[
i] > ratings[
i - 1]) {
474 ratings[
i] = ratings[
i - 1];
475 certs[
i] = certs[
i - 1];
479 if (zero < certs[2]) certs[2] = zero;
481 if (score < certs[1]) certs[1] = score;
484 if (zero < certs[0]) certs[0] = zero;
486 int best_i = ratings[2] < ratings[1] ? 2 : 1;
487 *rating = ratings[best_i] + t_end - t_start;
488 *certainty = certs[best_i];
495 float* score)
const {
498 float best_score = -FLT_MAX;
499 const float* line = f_[t];
500 for (
int i = 0;
i < f_.
dim2(); ++
i) {
501 if (line[
i] > best_score &&
i != not_this &&
i != not_that) {
502 best_score = line[
i];
514 int length = labels.
size();
515 int last_start = end - length;
517 double best_score = 0.0;
518 for (
int s = start; s <= last_start; ++s) {
520 if (score > best_score || best_start < 0) {
532 int length = labels.
size();
534 for (
int i = 0;
i < length; ++
i) {
535 score += f_(start +
i, labels[
i]);
545 float bad_score = (1.0f - ok_score) / (num_classes - 1);
546 float* targets = f_[t];
547 for (
int i = 0;
i < num_classes; ++
i)
548 targets[
i] = bad_score;
549 targets[label] = ok_score;
560 float* targets = f_[t];
561 for (
int c = 0; c < num_classes; ++c) {
563 targets[c] += (1.0 - targets[c]) * (2 / 3.0);
586 for (
int t = 0; t <
Width(); ++t) {
587 const float* features = f_[t];
588 for (
int y = 0; y < num_features; ++y) {
589 float grad = features[y];
590 if (grad < -confidence_thr) {
592 if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
593 (t + 1 ==
Width() || f_[t + 1][y] < confidence_thr / 2)) {
605 const int8_t* line = i_[t];
606 for (
int i = 0;
i < i_.
dim2(); ++
i) {
607 output[
i] =
static_cast<double>(line[
i]) / INT8_MAX;
610 const float* line = f_[t];
611 for (
int i = 0;
i < f_.
dim2(); ++
i) {
612 output[
i] =
static_cast<double>(line[
i]);
621 const int8_t* line = i_[t];
622 for (
int i = 0;
i < num_features; ++
i) {
623 inout[
i] +=
static_cast<double>(line[
i]) / INT8_MAX;
626 const float* line = f_[t];
627 for (
int i = 0;
i < num_features; ++
i) {
635 float* inout)
const {
637 const int8_t* line = i_[t] + offset;
638 for (
int i = 0;
i < num_features; ++
i) {
639 inout[
i] +=
static_cast<float>(line[
i]) / INT8_MAX;
642 const float* line = f_[t] + offset;
643 for (
int i = 0;
i < num_features; ++
i) {
657 const double* input) {
659 int8_t* line = i_[t] + offset;
660 for (
int i = 0;
i < num_features; ++
i) {
662 -INT8_MAX, INT8_MAX);
665 float* line = f_[t] + offset;
666 for (
int i = 0;
i < num_features; ++
i) {
667 line[
i] =
static_cast<float>(input[
i]);
678 int8_t* dest_line = i_[dest_t];
679 const int8_t* src_line = src.i_[src_t];
680 for (
int i = 0;
i < dim; ++
i) {
681 if (dest_line[
i] < src_line[
i]) {
682 dest_line[
i] = src_line[
i];
688 float* dest_line = f_[dest_t];
689 const float* src_line = src.f_[src_t];
690 for (
int i = 0;
i < dim; ++
i) {
691 if (dest_line[
i] < src_line[
i]) {
692 dest_line[
i] = src_line[
i];
707 const int* max_line = maxes[t];
708 const float* fwd_line = fwd.f_[t];
709 int num_features = fwd.f_.
dim2();
710 for (
int i = 0;
i < num_features; ++
i) {
711 f_[max_line[
i]][
i] = fwd_line[
i];
718 float min_max = 0.0f;
721 for (
int t = 0; t < width; ++t) {
722 float max_value = -FLT_MAX;
724 const int8_t* column = i_[t];
725 for (
int i = 0;
i < num_features; ++
i) {
726 if (column[
i] > max_value) max_value = column[
i];
729 const float* column = f_[t];
730 for (
int i = 0;
i < num_features; ++
i) {
731 if (column[
i] > max_value) max_value = column[
i];
734 if (t == 0 || max_value < min_max) min_max = max_value;
749 for (
int t = 0; t < width; ++t) {
750 int8_t* out_line = i_[t];
751 const int8_t* base_line = base_output.i_[t];
752 const int8_t* comb_line = combiner_output.i_[t];
753 float base_weight =
static_cast<float>(comb_line[no]) / INT8_MAX;
754 float boost_weight = 1.0f - base_weight;
755 for (
int i = 0;
i < no; ++
i) {
757 comb_line[
i] * boost_weight);
761 for (
int t = 0; t < width; ++t) {
762 float* out_line = f_[t];
763 const float* base_line = base_output.f_[t];
764 const float* comb_line = combiner_output.f_[t];
765 float base_weight = comb_line[no];
766 float boost_weight = 1.0f - base_weight;
767 for (
int i = 0;
i < no; ++
i) {
768 out_line[
i] = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
784 for (
int t = 0; t < width; ++t) {
785 const float* delta_line = fwd_deltas.f_[t];
786 const float* base_line = base_output.f_[t];
787 float* comb_line = f_[t];
788 float base_weight = comb_line[no];
789 float boost_weight = 1.0f - base_weight;
790 float max_base_delta = 0.0;
791 for (
int i = 0;
i < no; ++
i) {
793 float output = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
795 float comb_target = delta_line[
i] + output;
796 comb_line[
i] = comb_target - comb_line[
i];
797 float base_delta = fabs(comb_target - base_line[
i]);
798 if (base_delta > max_base_delta) max_base_delta = base_delta;
800 if (max_base_delta >= 0.5) {
803 comb_line[no] = 0.0 - base_weight;
806 for (
int i = 0;
i < no; ++
i) {
808 if (comb_line[
i] > 0.0) comb_line[
i] -= 1.0;
810 comb_line[no] = 1.0 - base_weight;
841 float src_max = src.f_.
MaxAbs();
843 float scale_max = scale.f_.
MaxAbs();
845 if (src_max > 0.0
f) {
846 float factor = scale_max / src_max;
847 for (
int t = 0; t < src.
Width(); ++t) {
848 const float* src_ptr = src.f_[t];
849 float* dest_ptr = f_[t];
850 for (
int i = 0;
i < src.f_.
dim2(); ++
i) dest_ptr[
i] = src_ptr[
i] * factor;
860 Resize(src, num_features);
868 int fwd_t = fwd_index.
t();
869 int rev_t = rev_index.
t();
879 Resize(src, num_features);
898 stride_map_ = src.stride_map_;
915 }
while (src_b_index.AddOffset(1,
FD_BATCH) &&
924 int width = src.
Width();
929 for (
int t = 0; t < width; ++t) {
930 memcpy(i_[t] + feature_offset, src.i_[t],
931 num_features *
sizeof(i_[t][0]));
933 for (
int t = width; t < i_.
dim1(); ++t) {
934 memset(i_[t], 0, num_features *
sizeof(i_[t][0]));
937 for (
int t = 0; t < width; ++t) {
938 memcpy(f_[t] + feature_offset, src.f_[t],
939 num_features *
sizeof(f_[t][0]));
941 for (
int t = width; t < f_.
dim1(); ++t) {
942 memset(f_[t], 0, num_features *
sizeof(f_[t][0]));
945 return num_features + feature_offset;
952 Resize(src, num_features);
953 int width = src.
Width();
956 for (
int t = 0; t < width; ++t) {
957 memcpy(i_[t], src.i_[t] + feature_offset,
958 num_features *
sizeof(i_[t][0]));
961 for (
int t = 0; t < width; ++t) {
962 memcpy(f_[t], src.f_[t] + feature_offset,
963 num_features *
sizeof(f_[t][0]));
972 for (
int t = 0; t < width; ++t) dest->
WriteStrided(t, f_[t]);
980 for (
int i = 0;
i < dim; ++
i)
981 v[
i] = ClipToRange<float>(v[
i], -range, range);
987 int NetworkIO::GetPadding(
int num_features) {
988 if (multiplier_ ==
nullptr)
991 if (multiplier_ !=
nullptr) {
992 pad = multiplier_->
RoundInputs(num_features) - num_features;
int BestLabel(int t, float *score) const
void ZeroTimeStepGeneral(int t, int offset, int num_features)
void CopyWithXReversal(const NetworkIO &src)
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
void Print(int num) const
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
void SetActivations(int t, int label, float ok_score)
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
bool AddOffset(int offset, FlexDimensions dimension)
void ClipVector(int t, float range)
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
static IntSimdMatrix * GetFastestMultiplier()
int CopyPacking(const NetworkIO &src, int feature_offset)
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
void AddAllToFloat(const NetworkIO &src)
int RoundInputs(int size) const
void WriteTimeStep(int t, const double *input)
void SubtractAllFromFloat(const NetworkIO &src)
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
const int8_t * i(int t) const
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
void ResizeNoInit(int size1, int size2, int pad=0)
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
int MaxIndexOfDim(FlexDimensions dim) const
int Size(FlexDimensions dimension) const
void Resize(const NetworkIO &src, int num_features)
void ZeroInvalidElements()
void Transpose(TransposedArray *dest) const
void CopyWithXYTranspose(const NetworkIO &src)
int index(FlexDimensions dimension) const
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
int IntCastRounded(double x)
void FromPixes(const StaticShape &shape, const std::vector< const Pix *> &pixes, TRand *randomizer)
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
const StrideMap & stride_map() const
DLLSYM void tprintf(const char *format,...)
static float ProbToCertainty(float prob)
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
void AddTimeStep(int t, double *inout) const
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
void CopyAll(const NetworkIO &src)
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
void SetPixel(int t, int f, int pixel, float black, float contrast)
double SignedRand(double range)
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
void ScaleXY(int x_factor, int y_factor)
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
const float kMinCertainty
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
void ReadTimeStep(int t, double *output) const
void CopyWithYReversal(const NetworkIO &src)
void WriteStrided(int t, const float *data)
void ZeroVector(int n, T *vec)
void EnsureBestLabel(int t, int label)
void ResizeXTo1(const NetworkIO &src, int num_features)
void Resize2d(bool int_mode, int width, int num_features)
bool AnySuspiciousTruth(float confidence_thr) const
void Randomize(int t, int offset, int num_features, TRand *randomizer)