31 static inline double log2(
double n) {
32 return log(n) / log(2.0);
43 int width = input.
dim1();
44 int num_features = input.
dim2();
46 for (
int t = 0; t < width; ++t)
WriteStrided(t, input[t]);
57 float weight_range,
TRand* randomizer) {
60 if (randomizer !=
nullptr) {
61 for (
int i = 0; i < no; ++i) {
62 for (
int j = 0; j < ni; ++j) {
63 wf_[i][j] = randomizer->
SignedRand(weight_range);
79 int old_no = wf_.
dim1();
80 int new_no = code_map.size();
82 std::vector<double> means(ni, 0.0);
83 for (
int c = 0; c < old_no; ++c) {
84 const double* weights = wf_[c];
85 for (
int i = 0; i < ni; ++i) means[i] += weights[i];
87 for (
double& mean : means) mean /= old_no;
90 for (
int dest = 0; dest < new_no; ++dest) {
91 int src = code_map[dest];
92 const double* src_data = src >= 0 ? old_wf[src] : means.data();
93 memcpy(wf_[dest], src_data, ni *
sizeof(*src_data));
108 int dim2 = wi_.
dim2();
109 for (
int t = 0; t < wi_.
dim1(); ++t) {
110 double* f_line = wf_[t];
111 int8_t* i_line = wi_[t];
112 double max_abs = 0.0;
113 for (
int f = 0; f < dim2; ++f) {
114 double abs_val = fabs(f_line[f]);
115 if (abs_val > max_abs) max_abs = abs_val;
117 double scale = max_abs / INT8_MAX;
119 if (scale == 0.0) scale = 1.0;
120 for (
int f = 0; f < dim2; ++f) {
127 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
133 int no = int_mode_ ? wi_.
dim1() : wf_.
dim1();
134 int ni = int_mode_ ? wi_.
dim2() : wf_.
dim2();
136 updates_.
Resize(no, ni, 0.0);
138 if (use_adam_) dw_sq_sum_.
Resize(no, ni, 0.0);
159 if (!scales_.
Serialize(fp))
return false;
162 if (training && !updates_.
Serialize(fp))
return false;
163 if (training && use_adam_ && !dw_sq_sum_.
Serialize(fp))
return false;
180 if (multiplier_ !=
nullptr) multiplier_->Init(wi_);
186 if (use_adam_ && !dw_sq_sum_.
DeSerialize(fp))
return false;
201 for (
int i = 0; i < old_scales.
size(); ++i) scales_[i] = old_scales[i];
223 MatrixDotVectorInternal(wf_,
true,
false, u, v);
229 multiplier_->MatrixDotVector(wi_, scales_, u, v);
238 const double* u = wf_[0];
239 for (
int i = 0; i < n; ++i) {
240 inout[i] += u[i] * v[i];
250 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
262 int num_outputs = dw_.
dim1();
265 int num_inputs = dw_.
dim2() - 1;
266 int num_samples = u.
dim2();
270 #pragma omp parallel for num_threads(4) if (in_parallel) 272 for (
int i = 0; i < num_outputs; ++i) {
273 double* dwi = dw_[i];
274 const double* ui = u[i];
275 for (
int j = 0; j < num_inputs; ++j) {
280 for (
int k = 0; k < num_samples; ++k) total += ui[k];
281 dwi[num_inputs] = total;
289 double adam_beta,
int num_samples) {
292 learning_rate *= sqrt(1.0 - pow(adam_beta, num_samples));
293 learning_rate /= 1.0 - pow(momentum, num_samples);
295 if (use_adam_ && num_samples > 0 && momentum > 0.0) {
297 dw_ *= learning_rate * (1.0 - momentum);
298 updates_ *= momentum;
302 dw_ *= learning_rate;
304 if (momentum > 0.0) wf_ += updates_;
305 if (momentum >= 0.0) updates_ *= momentum;
321 double* changed)
const {
322 int num_outputs = updates_.
dim1();
323 int num_inputs = updates_.
dim2();
326 for (
int i = 0; i < num_outputs; ++i) {
327 const double* this_i = updates_[i];
328 const double* other_i = other.updates_[i];
329 for (
int j = 0; j < num_inputs; ++j) {
330 double product = this_i[j] * other_i[j];
342 static void HistogramWeight(
double weight,
STATS* histogram) {
345 double logval = -log2(fabs(weight));
348 histogram->
add(bucket, 1);
354 for (
int i = 0; i < wi_.
dim1(); ++i) {
355 for (
int j = 0; j < wi_.
dim2(); ++j) {
356 HistogramWeight(wi_[i][j] * scales_[i], &histogram);
360 for (
int i = 0; i < wf_.
dim1(); ++i) {
361 for (
int j = 0; j < wf_.
dim2(); ++j) {
362 HistogramWeight(wf_[i][j], &histogram);
385 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
394 int dim1 = wf.
dim1();
395 int dim2 = wf.
dim2();
397 for (
int i = 0; i < dim1; ++i) {
398 const float* wfi = wf[i];
399 double* wdi = (*wd)[i];
400 for (
int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
414 bool skip_bias_back,
const double* u,
416 int num_results = w.
dim1() - skip_bias_back;
417 int extent = w.
dim2() - add_bias_fwd;
418 for (
int i = 0; i < num_results; ++i) {
419 const double* wi = w[i];
421 if (add_bias_fwd) total += wi[extent];
void resize_no_init(int size)
void SumSquares(const GENERIC_2D_ARRAY< T > &src, const T &decay_factor)
double DotProductAVX(const double *u, const double *v, int n)
const int kAdamCorrectionIterations
void Transpose(const GENERIC_2D_ARRAY< double > &input)
static double DotProduct(const double *u, const double *v, int n)
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
bool DeSerialize(char *data, size_t count=1)
static IntSimdMatrix * GetFastestMultiplier()
int RemapOutputs(const std::vector< int > &code_map)
bool DeSerialize(bool swap, FILE *fp)
const double kAdamEpsilon
void VectorDotMatrix(const double *u, double *v) const
void Update(double learning_rate, double momentum, double adam_beta, int num_samples)
bool Serialize(FILE *fp) const
bool Serialize(bool training, TFile *fp) const
double DotProductSSE(const double *u, const double *v, int n)
void ResizeNoInit(int size1, int size2, int pad=0)
virtual ~TransposedArray()
int InitWeightsFloat(int no, int ni, bool use_adam, float weight_range, TRand *randomizer)
void Debug2D(const char *msg)
void AdamUpdate(const GENERIC_2D_ARRAY< T > &sum, const GENERIC_2D_ARRAY< T > &sqsum, const T &epsilon)
void MultiplyAccumulate(const double *v, double *inout)
void init_to_size(int size, const T &t)
int IntCastRounded(double x)
bool Serialize(const char *data, size_t count=1)
DLLSYM void tprintf(const char *format,...)
bool Serialize(FILE *fp) const
void MatrixDotVector(const double *u, double *v) const
void add(int32_t value, int32_t count)
bool DeSerialize(bool swap, FILE *fp)
static bool IsSSEAvailable()
static bool IsAVXAvailable()
bool DeSerializeOld(bool training, TFile *fp)
double SignedRand(double range)
bool DeSerialize(bool training, TFile *fp)
void AddDeltas(const WeightMatrix &other)
void WriteStrided(int t, const float *data)
void Resize(int size1, int size2, const T &empty)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
static void FloatToDouble(const GENERIC_2D_ARRAY< float > &wf, GENERIC_2D_ARRAY< double > *wd)
const int kHistogramBuckets
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)