18 #include "config_auto.h"
26 #if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) || defined(HAVE_SSE4_1)
30 #if defined(HAS_CPUID)
53 "Function used for calculation of dot product");
58 bool SIMDDetect::avx_available_;
59 bool SIMDDetect::avx2_available_;
60 bool SIMDDetect::avx512F_available_;
61 bool SIMDDetect::avx512BW_available_;
63 bool SIMDDetect::fma_available_;
65 bool SIMDDetect::sse_available_;
68 static double DotProductGeneric(
const double* u,
const double* v,
int n) {
70 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
75 static double DotProductStdInnerProduct(
const double* u,
const double* v,
int n) {
76 return std::inner_product(u, u + n, v, 0.0);
89 SIMDDetect::SIMDDetect() {
91 SetDotProduct(DotProductGeneric);
93 #if defined(HAS_CPUID)
95 unsigned int eax, ebx, ecx, edx;
96 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
99 #if defined(HAVE_SSE4_1)
100 sse_available_ = (ecx & 0x00080000) != 0;
102 #if defined(HAVE_FMA)
103 fma_available_ = (ecx & 0x00001000) != 0;
105 #if defined(HAVE_AVX)
106 avx_available_ = (ecx & 0x10000000) != 0;
107 if (avx_available_) {
111 __cpuid_count(7, 0, eax, ebx, ecx, edx);
112 avx2_available_ = (ebx & 0x00000020) != 0;
113 avx512F_available_ = (ebx & 0x00010000) != 0;
114 avx512BW_available_ = (ebx & 0x40000000) != 0;
118 # elif defined(_WIN32)
122 max_function_id = cpuInfo[0];
123 if (max_function_id >= 1) {
125 #if defined(HAVE_SSE4_1)
126 sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
128 #if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA)
129 if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
131 #if defined(HAVE_FMA)
132 fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
134 #if defined(HAVE_AVX)
135 avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
137 #if defined(HAVE_AVX2)
138 if (max_function_id >= 7) {
140 avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
141 avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
142 avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
149 #error "I don't know how to test for SIMD with this compiler"
156 #if defined(HAVE_AVX2)
157 }
else if (avx2_available_) {
161 #if defined(HAVE_AVX)
162 }
else if (avx_available_) {
166 #if defined(HAVE_SSE4_1)
167 }
else if (sse_available_) {
177 const char* dotproduct_method =
"generic";
178 if (!strcmp(dotproduct.c_str(),
"auto")) {
180 }
else if (!strcmp(dotproduct.c_str(),
"generic")) {
182 SetDotProduct(DotProductGeneric);
183 dotproduct_method =
"generic";
184 }
else if (!strcmp(dotproduct.c_str(),
"native")) {
187 dotproduct_method =
"native";
188 #if defined(HAVE_AVX2)
189 }
else if (!strcmp(dotproduct.c_str(),
"avx2")) {
192 dotproduct_method =
"avx2";
194 #if defined(HAVE_AVX)
195 }
else if (!strcmp(dotproduct.c_str(),
"avx")) {
198 dotproduct_method =
"avx";
200 #if defined(HAVE_FMA)
201 }
else if (!strcmp(dotproduct.c_str(),
"fma")) {
204 dotproduct_method =
"fma";
206 #if defined(HAVE_SSE4_1)
207 }
else if (!strcmp(dotproduct.c_str(),
"sse")) {
210 dotproduct_method =
"sse";
212 }
else if (!strcmp(dotproduct.c_str(),
"std::inner_product")) {
214 SetDotProduct(DotProductStdInnerProduct);
215 dotproduct_method =
"std::inner_product";
218 tprintf(
"Warning, ignoring unsupported config variable value: dotproduct=%s\n",
220 tprintf(
"Support values for dotproduct: auto generic native"
221 #
if defined(HAVE_AVX)
224 #
if defined(HAVE_SSE4_1)
227 " std::inner_product.\n");
230 dotproduct.set_value(dotproduct_method);