tesseract  5.0.0-alpha-619-ge9db
simddetect.cpp
Go to the documentation of this file.
1 // File: simddetect.cpp
3 // Description: Architecture detector.
4 // Author: Stefan Weil (based on code from Ray Smith)
5 //
6 // (C) Copyright 2014, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 
18 #include "config_auto.h" // for HAVE_AVX, ...
19 #include <numeric> // for std::inner_product
20 #include "simddetect.h"
21 #include "dotproduct.h"
22 #include "intsimdmatrix.h" // for IntSimdMatrix
23 #include "params.h" // for STRING_VAR
24 #include "tprintf.h" // for tprintf
25 
26 #if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) || defined(HAVE_SSE4_1)
27 # define HAS_CPUID
28 #endif
29 
30 #if defined(HAS_CPUID)
31 #if defined(__GNUC__)
32 # include <cpuid.h>
33 #elif defined(_WIN32)
34 # include <intrin.h>
35 #endif
36 #endif
37 
38 namespace tesseract {
39 
40 // Computes and returns the dot product of the two n-vectors u and v.
41 // Note: because the order of addition is different among the different dot
42 // product functions, the results can (and do) vary slightly (although they
43 // agree to within about 4e-15). This produces different results when running
44 // training, despite all random inputs being precisely equal.
45 // To get consistent results, use just one of these dot product functions.
46 // On a test multi-layer network, serial is 57% slower than SSE, and AVX
47 // is about 8% faster than SSE. This suggests that the time is memory
48 // bandwidth constrained and could benefit from holding the reused vector
49 // in AVX registers.
51 
52 static STRING_VAR(dotproduct, "auto",
53  "Function used for calculation of dot product");
54 
55 SIMDDetect SIMDDetect::detector;
56 
57 // If true, then AVX has been detected.
58 bool SIMDDetect::avx_available_;
59 bool SIMDDetect::avx2_available_;
60 bool SIMDDetect::avx512F_available_;
61 bool SIMDDetect::avx512BW_available_;
62 // If true, then FMA has been detected.
63 bool SIMDDetect::fma_available_;
64 // If true, then SSe4.1 has been detected.
65 bool SIMDDetect::sse_available_;
66 
67 // Computes and returns the dot product of the two n-vectors u and v.
68 static double DotProductGeneric(const double* u, const double* v, int n) {
69  double total = 0.0;
70  for (int k = 0; k < n; ++k) total += u[k] * v[k];
71  return total;
72 }
73 
74 // Compute dot product using std::inner_product.
75 static double DotProductStdInnerProduct(const double* u, const double* v, int n) {
76  return std::inner_product(u, u + n, v, 0.0);
77 }
78 
79 static void SetDotProduct(DotProductFunction f, const IntSimdMatrix* m = nullptr) {
80  DotProduct = f;
82 }
83 
84 // Constructor.
85 // Tests the architecture in a system-dependent way to detect AVX, SSE and
86 // any other available SIMD equipment.
87 // __GNUC__ is also defined by compilers that include GNU extensions such as
88 // clang.
89 SIMDDetect::SIMDDetect() {
90  // The fallback is a generic dot product calculation.
91  SetDotProduct(DotProductGeneric);
92 
93 #if defined(HAS_CPUID)
94 #if defined(__GNUC__)
95  unsigned int eax, ebx, ecx, edx;
96  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
97  // Note that these tests all use hex because the older compilers don't have
98  // the newer flags.
99 #if defined(HAVE_SSE4_1)
100  sse_available_ = (ecx & 0x00080000) != 0;
101 #endif
102 #if defined(HAVE_FMA)
103  fma_available_ = (ecx & 0x00001000) != 0;
104 #endif
105 #if defined(HAVE_AVX)
106  avx_available_ = (ecx & 0x10000000) != 0;
107  if (avx_available_) {
108  // There is supposed to be a __get_cpuid_count function, but this is all
109  // there is in my cpuid.h. It is a macro for an asm statement and cannot
110  // be used inside an if.
111  __cpuid_count(7, 0, eax, ebx, ecx, edx);
112  avx2_available_ = (ebx & 0x00000020) != 0;
113  avx512F_available_ = (ebx & 0x00010000) != 0;
114  avx512BW_available_ = (ebx & 0x40000000) != 0;
115  }
116 #endif
117  }
118 # elif defined(_WIN32)
119  int cpuInfo[4];
120  int max_function_id;
121  __cpuid(cpuInfo, 0);
122  max_function_id = cpuInfo[0];
123  if (max_function_id >= 1) {
124  __cpuid(cpuInfo, 1);
125 #if defined(HAVE_SSE4_1)
126  sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
127 #endif
128 #if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA)
129  if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
130  // OSXSAVE bit is set, XMM state and YMM state are fine.
131 #if defined(HAVE_FMA)
132  fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
133 #endif
134 #if defined(HAVE_AVX)
135  avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
136 #endif
137 #if defined(HAVE_AVX2)
138  if (max_function_id >= 7) {
139  __cpuid(cpuInfo, 7);
140  avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
141  avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
142  avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
143  }
144 #endif
145  }
146 #endif
147  }
148 #else
149 #error "I don't know how to test for SIMD with this compiler"
150 #endif
151 #endif
152 
153  // Select code for calculation of dot product based on autodetection.
154  if (false) {
155  // This is a dummy to support conditional compilation.
156 #if defined(HAVE_AVX2)
157  } else if (avx2_available_) {
158  // AVX2 detected.
160 #endif
161 #if defined(HAVE_AVX)
162  } else if (avx_available_) {
163  // AVX detected.
165 #endif
166 #if defined(HAVE_SSE4_1)
167  } else if (sse_available_) {
168  // SSE detected.
170 #endif
171  }
172 }
173 
175  // Select code for calculation of dot product based on the
176  // value of the config variable if that value is not empty.
177  const char* dotproduct_method = "generic";
178  if (!strcmp(dotproduct.c_str(), "auto")) {
179  // Automatic detection. Nothing to be done.
180  } else if (!strcmp(dotproduct.c_str(), "generic")) {
181  // Generic code selected by config variable.
182  SetDotProduct(DotProductGeneric);
183  dotproduct_method = "generic";
184  } else if (!strcmp(dotproduct.c_str(), "native")) {
185  // Native optimized code selected by config variable.
186  SetDotProduct(DotProductNative);
187  dotproduct_method = "native";
188 #if defined(HAVE_AVX2)
189  } else if (!strcmp(dotproduct.c_str(), "avx2")) {
190  // AVX2 selected by config variable.
192  dotproduct_method = "avx2";
193 #endif
194 #if defined(HAVE_AVX)
195  } else if (!strcmp(dotproduct.c_str(), "avx")) {
196  // AVX selected by config variable.
198  dotproduct_method = "avx";
199 #endif
200 #if defined(HAVE_FMA)
201  } else if (!strcmp(dotproduct.c_str(), "fma")) {
202  // FMA selected by config variable.
204  dotproduct_method = "fma";
205 #endif
206 #if defined(HAVE_SSE4_1)
207  } else if (!strcmp(dotproduct.c_str(), "sse")) {
208  // SSE selected by config variable.
210  dotproduct_method = "sse";
211 #endif
212  } else if (!strcmp(dotproduct.c_str(), "std::inner_product")) {
213  // std::inner_product selected by config variable.
214  SetDotProduct(DotProductStdInnerProduct);
215  dotproduct_method = "std::inner_product";
216  } else {
217  // Unsupported value of config variable.
218  tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
219  dotproduct.c_str());
220  tprintf("Support values for dotproduct: auto generic native"
221 #if defined(HAVE_AVX)
222  " avx"
223 #endif
224 #if defined(HAVE_SSE4_1)
225  " sse"
226 #endif
227  " std::inner_product.\n");
228  }
229 
230  dotproduct.set_value(dotproduct_method);
231 }
232 
233 } // namespace tesseract
tesseract::DotProductFMA
double DotProductFMA(const double *u, const double *v, int n)
Definition: dotproductfma.cpp:30
params.h
simddetect.h
tesseract::IntSimdMatrix::intSimdMatrixAVX2
static const IntSimdMatrix intSimdMatrixAVX2
Definition: intsimdmatrix.h:117
tesseract::SIMDDetect
Definition: simddetect.h:31
STRING_VAR
#define STRING_VAR(name, val, comment)
Definition: params.h:306
tesseract::DotProductSSE
double DotProductSSE(const double *u, const double *v, int n)
Definition: dotproductsse.cpp:31
dotproduct.h
tesseract::DotProduct
DotProductFunction DotProduct
Definition: simddetect.cpp:50
tesseract::IntSimdMatrix::intSimdMatrix
static const IntSimdMatrix * intSimdMatrix
Definition: intsimdmatrix.h:116
tesseract::DotProductFunction
double(*)(const double *, const double *, int) DotProductFunction
Definition: simddetect.h:25
tesseract
Definition: baseapi.h:65
tprintf.h
tesseract::DotProductAVX
double DotProductAVX(const double *u, const double *v, int n)
Definition: dotproductavx.cpp:30
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::IntSimdMatrix::intSimdMatrixSSE
static const IntSimdMatrix intSimdMatrixSSE
Definition: intsimdmatrix.h:118
intsimdmatrix.h
tesseract::SIMDDetect::Update
static TESS_API void Update()
Definition: simddetect.cpp:174
tesseract::DotProductNative
double DotProductNative(const double *u, const double *v, int n)
Definition: dotproduct.cpp:22