tessapi/4.0.0/a00041_source.html

 // File:        intsimdmatrix.cpp
 // Description: Base class for 8-bit int SIMD matrix multipliers.
 // Author:      Ray Smith
 // Created:     Tue Aug 15 08:01:32 PST 2017
 //
 // (C) Copyright 2017, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "intsimdmatrix.h"
 #include "genericvector.h"      // for GenericVector
 #include "intsimdmatrixavx2.h"  // for IntSimdMatrixAVX2
 #include "intsimdmatrixsse.h"   // for IntSimdMatrixSSE
 #include "matrix.h"             // for GENERIC_2D_ARRAY
 #include "simddetect.h"         // for SIMDDetect

 namespace tesseract {

 // Factory makes and returns an IntSimdMatrix (sub)class of the best
 // available type for the current architecture.
 /* static */
 IntSimdMatrix* IntSimdMatrix::GetFastestMultiplier() {
   IntSimdMatrix* multiplier = nullptr;
   if (SIMDDetect::IsAVX2Available()) {
     multiplier = new IntSimdMatrixAVX2();
   } else if (SIMDDetect::IsSSEAvailable()) {
     multiplier = new IntSimdMatrixSSE();
   } else {
     // Default c++ implementation.
     multiplier = new IntSimdMatrix();
   }
   return multiplier;
 }

 // Computes a reshaped copy of the weight matrix w. If there are no
 // partial_funcs_, it does nothing.
 void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w) {
   if (partial_funcs_.empty()) return;
   int num_out = w.dim1();
   int num_in = w.dim2() - 1;
   // The rounded-up sizes of the reshaped weight matrix, excluding biases.
   int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
   int rounded_num_out = RoundOutputs(num_out);
   // Add the bias and compute the required size.
   shaped_w_.resize((rounded_num_in + 1) * rounded_num_out, 0);
   int shaped_index = 0;
   int output = 0;
   // Each number of registers needs a different format! Iterates over the
   // different numbers of registers (each a power of 2).
   for (int num_registers = max_output_registers_; num_registers >= 1;
        num_registers /= 2) {
     // The number of outputs that we will generate with this many registers.
     int num_outputs_per_register_set =
         num_registers * num_outputs_per_register_;
     // Use the max number of registers until we have to go fewer.
     while (output + num_outputs_per_register_set <= rounded_num_out) {
       // Accumulating outputs in registers saves iterating over the inputs, so
       // we only have to do it once per output register set.
       for (int input = 0; input < num_in; input += num_inputs_per_group_) {
         // Iterate over the number of outputs in a register set.
         for (int j = 0; j < num_outputs_per_register_set; ++j) {
           // Inner-most loop corresponds to the number of inputs in an input
           // group.
           for (int i = 0; i < num_inputs_per_group_; ++i) {
             int8_t weight = 0;
             if (output + j < num_out && input + i < num_in)
               weight = w(output + j, input + i);
             shaped_w_[shaped_index++] = weight;
           }
         }
       }
       // Append the bias weights for the register set.
       for (int j = 0; j < num_outputs_per_register_set; ++j) {
         int8_t weight = 0;
         if (output + j < num_out) weight = w(output + j, num_in);
         shaped_w_[shaped_index++] = weight;
       }
       output += num_outputs_per_register_set;
     }
   }
 }

 // Computes matrix.vector v = Wu.
 // u is of size W.dim2() - 1 and the output v is of size W.dim1().
 // u is imagined to have an extra element at the end with value 1, to
 // implement the bias, but it doesn't actually have it.
 void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
                                     const GenericVector<double>& scales,
                                     const int8_t* u, double* v) const {
   int num_out = w.dim1();
   int num_in = w.dim2() - 1;
   if (partial_funcs_.empty()) {
     // Base implementation.
     for (int i = 0; i < num_out; ++i) {
       const int8_t* wi = w[i];
       int total = 0;
       for (int j = 0; j < num_in; ++j) total += wi[j] * u[j];
       // Add in the bias and correct for integer values.
       v[i] = (static_cast<double>(total) / INT8_MAX + wi[num_in]) * scales[i];
     }
   } else {
     const int8_t* w_data = shaped_w_.data();
     const double* scales_data = &scales[0];
     // Each call to a partial_func_ produces group_size outputs, except the
     // last one, which can produce less.
     int group_size = num_outputs_per_register_ * max_output_registers_;
     int rounded_num_in = Roundup(num_in, num_inputs_per_group_);
     int rounded_num_out = RoundOutputs(num_out);
     int output = 0;
     for (auto fn : partial_funcs_) {
       // The amount of w_data consumed by each call to fn.
       int w_step = (rounded_num_in + 1) * group_size;
       // Run with this group size, until it would produce too much output, then
       // switch to a smaller size.
       for (; output + group_size <= rounded_num_out; output += group_size) {
         (*fn)(w_data, scales_data, u, rounded_num_in, num_out - output, v);
         w_data += w_step;
         scales_data += group_size;
         v += group_size;
       }
       group_size /= 2;
     }
   }
 }

 }  // namespace tesseract
tesseract::IntSimdMatrix::max_output_registers_
int max_output_registers_
Definition: intsimdmatrix.h:121

intsimdmatrixsse.h

tesseract::IntSimdMatrix::partial_funcs_
std::vector< PartialFunc > partial_funcs_
Definition: intsimdmatrix.h:131

intsimdmatrix.h

tesseract::IntSimdMatrix::num_outputs_per_register_
int num_outputs_per_register_
Definition: intsimdmatrix.h:119

GENERIC_2D_ARRAY< int8_t >

simddetect.h

tesseract::IntSimdMatrix::GetFastestMultiplier
static IntSimdMatrix * GetFastestMultiplier()
Definition: intsimdmatrix.cpp:31

intsimdmatrixavx2.h

tesseract::IntSimdMatrix::num_inputs_per_group_
int num_inputs_per_group_
Definition: intsimdmatrix.h:125

tesseract::IntSimdMatrixAVX2
Definition: intsimdmatrixavx2.h:26

tesseract::IntSimdMatrix::Init
void Init(const GENERIC_2D_ARRAY< int8_t > &w)
Definition: intsimdmatrix.cpp:46

genericvector.h

tesseract::IntSimdMatrix::MatrixDotVector
void MatrixDotVector(const GENERIC_2D_ARRAY< int8_t > &w, const GenericVector< double > &scales, const int8_t *u, double *v) const
Definition: intsimdmatrix.cpp:96

tesseract::IntSimdMatrix::RoundOutputs
int RoundOutputs(int size) const
Definition: intsimdmatrix.h:85

GenericVector< double >

tesseract::IntSimdMatrix::Roundup
static int Roundup(int input, int factor)
Definition: intsimdmatrix.h:114

tesseract::IntSimdMatrix::shaped_w_
std::vector< int8_t > shaped_w_
Definition: intsimdmatrix.h:129

tesseract::IntSimdMatrix
Definition: intsimdmatrix.h:61

tesseract
Definition: baseapi.cpp:94

GENERIC_2D_ARRAY::dim1
int dim1() const
Definition: matrix.h:206

tesseract::IntSimdMatrix::IntSimdMatrix
IntSimdMatrix()
Definition: intsimdmatrix.h:65

SIMDDetect::IsSSEAvailable
static bool IsSSEAvailable()
Definition: simddetect.h:40

tesseract::IntSimdMatrixSSE
Definition: intsimdmatrixsse.h:26

GENERIC_2D_ARRAY::dim2
int dim2() const
Definition: matrix.h:207

SIMDDetect::IsAVX2Available
static bool IsAVX2Available()
Definition: simddetect.h:30

matrix.h