tesseract
5.0.0-alpha-619-ge9db
|
Go to the documentation of this file.
29 static inline double log2(
double n) {
49 bool skip_bias_back,
const double* u,
51 int num_results = w.
dim1() - skip_bias_back;
52 int extent = w.
dim2() - add_bias_fwd;
53 for (
int i = 0; i < num_results; ++i) {
54 const double* wi = w[i];
56 if (add_bias_fwd) total += wi[extent];
63 int width = input.
dim1();
64 int num_features = input.
dim2();
66 for (
int t = 0; t < width; ++t)
WriteStrided(t, input[t]);
77 float weight_range,
TRand* randomizer) {
80 if (randomizer !=
nullptr) {
81 for (
int i = 0; i < no; ++i) {
82 for (
int j = 0; j < ni; ++j) {
83 wf_[i][j] = randomizer->
SignedRand(weight_range);
99 int old_no = wf_.
dim1();
100 int new_no = code_map.size();
102 std::vector<double> means(ni, 0.0);
103 for (
int c = 0; c < old_no; ++c) {
104 const double* weights = wf_[c];
105 for (
int i = 0; i < ni; ++i) means[i] += weights[i];
107 for (
double& mean : means) mean /= old_no;
111 int src = code_map[
dest];
112 const double* src_data = src >= 0 ? old_wf[src] : means.data();
113 memcpy(wf_[
dest], src_data, ni *
sizeof(*src_data));
128 int dim2 = wi_.
dim2();
129 for (
int t = 0; t < wi_.
dim1(); ++t) {
130 double* f_line = wf_[t];
131 int8_t* i_line = wi_[t];
132 double max_abs = 0.0;
133 for (
int f = 0; f < dim2; ++f) {
134 double abs_val = fabs(f_line[f]);
135 if (abs_val > max_abs) max_abs = abs_val;
137 double scale = max_abs / INT8_MAX;
139 if (scale == 0.0) scale = 1.0;
140 for (
int f = 0; f < dim2; ++f) {
154 int no = int_mode_ ? wi_.
dim1() : wf_.
dim1();
155 int ni = int_mode_ ? wi_.
dim2() : wf_.
dim2();
157 updates_.
Resize(no, ni, 0.0);
159 if (use_adam_) dw_sq_sum_.
Resize(no, ni, 0.0);
180 if (!scales_.
Serialize(fp))
return false;
183 if (training && !updates_.
Serialize(fp))
return false;
184 if (training && use_adam_ && !dw_sq_sum_.
Serialize(fp))
return false;
208 if (use_adam_ && !dw_sq_sum_.
DeSerialize(fp))
return false;
223 for (
int i = 0; i < old_scales.
size(); ++i) scales_[i] = old_scales[i];
245 MatrixDotVectorInternal(wf_,
true,
false, u, v);
252 wi_.
dim1(), wi_.
dim2(), &shaped_w_[0], &scales_[0], u, v);
262 assert(wf_.
dim1() == 1);
264 const double* u = wf_[0];
265 for (
int i = 0; i < n; ++i) {
266 inout[i] += u[i] * v[i];
276 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
288 int num_outputs = dw_.
dim1();
289 assert(u.
dim1() == num_outputs);
291 int num_inputs = dw_.
dim2() - 1;
292 int num_samples = u.
dim2();
294 assert(v.
dim1() == num_inputs);
296 #pragma omp parallel for num_threads(4) if (in_parallel)
298 for (
int i = 0; i < num_outputs; ++i) {
299 double* dwi = dw_[i];
300 const double* ui = u[i];
301 for (
int j = 0; j < num_inputs; ++j) {
306 for (
int k = 0; k < num_samples; ++k) total += ui[k];
307 dwi[num_inputs] = total;
315 double adam_beta,
int num_samples) {
318 learning_rate *= sqrt(1.0 - pow(adam_beta, num_samples));
319 learning_rate /= 1.0 - pow(momentum, num_samples);
321 if (use_adam_ && num_samples > 0 && momentum > 0.0) {
323 dw_ *= learning_rate * (1.0 - momentum);
324 updates_ *= momentum;
328 dw_ *= learning_rate;
330 if (momentum > 0.0) wf_ += updates_;
331 if (momentum >= 0.0) updates_ *= momentum;
338 assert(dw_.
dim1() == other.dw_.
dim1());
339 assert(dw_.
dim2() == other.dw_.
dim2());
347 double* changed)
const {
348 int num_outputs = updates_.
dim1();
349 int num_inputs = updates_.
dim2();
350 assert(num_outputs == other.updates_.
dim1());
351 assert(num_inputs == other.updates_.
dim2());
352 for (
int i = 0; i < num_outputs; ++i) {
353 const double* this_i = updates_[i];
354 const double* other_i = other.updates_[i];
355 for (
int j = 0; j < num_inputs; ++j) {
356 double product = this_i[j] * other_i[j];
368 static void HistogramWeight(
double weight,
STATS* histogram) {
371 double logval = -log2(fabs(weight));
374 histogram->
add(bucket, 1);
380 for (
int i = 0; i < wi_.
dim1(); ++i) {
381 for (
int j = 0; j < wi_.
dim2(); ++j) {
382 HistogramWeight(wi_[i][j] * scales_[i], &histogram);
386 for (
int i = 0; i < wf_.
dim1(); ++i) {
387 for (
int j = 0; j < wf_.
dim2(); ++j) {
388 HistogramWeight(wf_[i][j], &histogram);
401 int dim1 = wf.
dim1();
402 int dim2 = wf.
dim2();
404 for (
int i = 0; i < dim1; ++i) {
405 const float* wfi = wf[i];
406 double* wdi = (*wd)[i];
407 for (
int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
const double kAdamEpsilon
const int kHistogramBuckets
~TransposedArray() override
void AddDeltas(const WeightMatrix &other)
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
void Init(const GENERIC_2D_ARRAY< int8_t > &w, std::vector< int8_t > &shaped_w) const
void WriteStrided(int t, const float *data)
bool DeSerialize(bool swap, FILE *fp)
void Update(double learning_rate, double momentum, double adam_beta, int num_samples)
bool Serialize(FILE *fp) const
int IntCastRounded(double x)
const int kAdamCorrectionIterations
MatrixDotVectorFunction matrixDotVectorFunction
void Debug2D(const char *msg)
void Transpose(const GENERIC_2D_ARRAY< double > &input)
bool DeSerialize(char *data, size_t count=1)
bool DeSerialize(bool swap, FILE *fp)
bool Serialize(const char *data, size_t count=1)
bool DeSerialize(bool training, TFile *fp)
void ResizeNoInit(int size1, int size2, int pad=0)
DotProductFunction DotProduct
bool Serialize(bool training, TFile *fp) const
void resize_no_init(int size)
static const IntSimdMatrix * intSimdMatrix
bool DeSerializeOld(bool training, TFile *fp)
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
double SignedRand(double range)
bool Serialize(FILE *fp) const
static void MatrixDotVector(const GENERIC_2D_ARRAY< int8_t > &w, const GenericVector< double > &scales, const int8_t *u, double *v)
int RemapOutputs(const std::vector< int > &code_map)
void SumSquares(const GENERIC_2D_ARRAY< T > &src, const T &decay_factor)
void MatrixDotVector(const double *u, double *v) const
void add(int32_t value, int32_t count)
void MultiplyAccumulate(const double *v, double *inout)
void Resize(int size1, int size2, const T &empty)
void init_to_size(int size, const T &t)
void VectorDotMatrix(const double *u, double *v) const
DLLSYM void tprintf(const char *format,...)
void AdamUpdate(const GENERIC_2D_ARRAY< T > &sum, const GENERIC_2D_ARRAY< T > &sqsum, const T &epsilon)
static void FloatToDouble(const GENERIC_2D_ARRAY< float > &wf, GENERIC_2D_ARRAY< double > *wd)