tesseract  5.0.0-alpha-619-ge9db
openclwrapper.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 #ifdef _WIN32
11 #include <io.h>
12 #else
13 #include <sys/types.h>
14 #include <unistd.h>
15 #endif
16 #include <cfloat>
17 #include <ctime> // for clock_gettime
18 
19 #include "oclkernels.h"
20 #include "openclwrapper.h"
21 
22 // for micro-benchmark
23 #include "otsuthr.h"
24 #include <tesseract/thresholder.h>
25 
26 // platform preprocessor commands
27 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
28  defined(__CYGWIN__) || defined(__MINGW32__)
29 #define ON_WINDOWS 1
30 #define ON_APPLE 0
31 #elif defined(__linux__)
32 #define ON_WINDOWS 0
33 #define ON_APPLE 0
34 #elif defined(__APPLE__)
35 #define ON_WINDOWS 0
36 #define ON_APPLE 1
37 #else
38 #define ON_WINDOWS 0
39 #define ON_APPLE 0
40 #endif
41 
42 #if ON_APPLE
43 #include <mach/mach_time.h>
44 #endif
45 
46 #ifdef USE_OPENCL
47 
48 #include <cstdio>
49 #include <cstring> // for memset, strcpy, ...
50 #include <vector>
51 
52 #include "errcode.h" // for ASSERT_HOST
53 
54 GPUEnv OpenclDevice::gpuEnv;
55 
56 bool OpenclDevice::deviceIsSelected = false;
57 ds_device OpenclDevice::selectedDevice;
58 
59 int OpenclDevice::isInited = 0;
60 
61 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
62 
63 static const l_uint32 lmask32[] = {
64  0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000,
65  0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
66  0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000,
67  0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
68  0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc,
69  0xfffffffe, 0xffffffff};
70 
71 static const l_uint32 rmask32[] = {
72  0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
73  0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
74  0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
75  0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
76  0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
77  0x7fffffff, 0xffffffff};
78 
79 static cl_mem pixsCLBuffer, pixdCLBuffer,
80  pixdCLIntermediate; // Morph operations buffers
81 static cl_mem pixThBuffer; // output from thresholdtopix calculation
82 static cl_int clStatus;
83 static KernelEnv rEnv;
84 
85 #define DS_TAG_VERSION "<version>"
86 #define DS_TAG_VERSION_END "</version>"
87 #define DS_TAG_DEVICE "<device>"
88 #define DS_TAG_DEVICE_END "</device>"
89 #define DS_TAG_SCORE "<score>"
90 #define DS_TAG_SCORE_END "</score>"
91 #define DS_TAG_DEVICE_TYPE "<type>"
92 #define DS_TAG_DEVICE_TYPE_END "</type>"
93 #define DS_TAG_DEVICE_NAME "<name>"
94 #define DS_TAG_DEVICE_NAME_END "</name>"
95 #define DS_TAG_DEVICE_DRIVER_VERSION "<driver>"
96 #define DS_TAG_DEVICE_DRIVER_VERSION_END "</driver>"
97 
98 #define DS_DEVICE_NATIVE_CPU_STRING "native_cpu"
99 
100 #define DS_DEVICE_NAME_LENGTH 256
101 
102 enum ds_evaluation_type { DS_EVALUATE_ALL, DS_EVALUATE_NEW_ONLY };
103 
104 struct ds_profile {
105  std::vector<ds_device> devices;
106  unsigned int numDevices;
107  const char* version;
108 };
109 
110 enum ds_status {
111  DS_SUCCESS = 0,
112  DS_INVALID_PROFILE = 1000,
113  DS_MEMORY_ERROR,
114  DS_INVALID_PERF_EVALUATOR_TYPE,
115  DS_INVALID_PERF_EVALUATOR,
116  DS_PERF_EVALUATOR_ERROR,
117  DS_FILE_ERROR,
118  DS_UNKNOWN_DEVICE_TYPE,
119  DS_PROFILE_FILE_ERROR,
120  DS_SCORE_SERIALIZER_ERROR,
121  DS_SCORE_DESERIALIZER_ERROR
122 };
123 
124 // Pointer to a function that calculates the score of a device (ex:
125 // device->score) update the data size of score. The encoding and the format
126 // of the score data is implementation defined. The function should return
127 // DS_SUCCESS if there's no error to be reported.
128 typedef ds_status (*ds_perf_evaluator)(ds_device* device, void* data);
129 
130 // deallocate memory used by score
131 typedef ds_status (*ds_score_release)(TessDeviceScore* score);
132 
133 static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) {
134  ds_status status = DS_SUCCESS;
135  if (profile != nullptr) {
136  if (sr != nullptr) {
137  unsigned int i;
138  for (i = 0; i < profile->numDevices; i++) {
139  free(profile->devices[i].oclDeviceName);
140  free(profile->devices[i].oclDriverVersion);
141  status = sr(profile->devices[i].score);
142  if (status != DS_SUCCESS) break;
143  }
144  }
145  delete profile;
146  }
147  return status;
148 }
149 
150 static ds_status initDSProfile(ds_profile** p, const char* version) {
151  int numDevices;
152  cl_uint numPlatforms;
153  std::vector<cl_platform_id> platforms;
154  std::vector <cl_device_id> devices;
155  ds_status status = DS_SUCCESS;
156  unsigned int next;
157  unsigned int i;
158 
159  if (p == nullptr) return DS_INVALID_PROFILE;
160 
161  ds_profile* profile = new ds_profile;
162 
163  memset(profile, 0, sizeof(ds_profile));
164 
165  clGetPlatformIDs(0, nullptr, &numPlatforms);
166 
167  if (numPlatforms > 0) {
168  platforms.reserve(numPlatforms);
169  clGetPlatformIDs(numPlatforms, &platforms[0], nullptr);
170  }
171 
172  numDevices = 0;
173  for (i = 0; i < numPlatforms; i++) {
174  cl_uint num;
175  clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &num);
176  numDevices += num;
177  }
178 
179  if (numDevices > 0) {
180  devices.reserve(numDevices);
181  }
182 
183  profile->numDevices =
184  numDevices + 1; // +1 to numDevices to include the native CPU
185  profile->devices.reserve(profile->numDevices);
186  memset(&profile->devices[0], 0, profile->numDevices * sizeof(ds_device));
187 
188  next = 0;
189  for (i = 0; i < numPlatforms; i++) {
190  cl_uint num;
191  unsigned j;
192  clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, &devices[0], &num);
193  for (j = 0; j < num; j++, next++) {
194  char buffer[DS_DEVICE_NAME_LENGTH];
195  size_t length;
196 
197  profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE;
198  profile->devices[next].oclDeviceID = devices[j];
199 
200  clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME,
201  DS_DEVICE_NAME_LENGTH, &buffer, nullptr);
202  length = strlen(buffer);
203  profile->devices[next].oclDeviceName = (char*)malloc(length + 1);
204  memcpy(profile->devices[next].oclDeviceName, buffer, length + 1);
205 
206  clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION,
207  DS_DEVICE_NAME_LENGTH, &buffer, nullptr);
208  length = strlen(buffer);
209  profile->devices[next].oclDriverVersion = (char*)malloc(length + 1);
210  memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1);
211  }
212  }
213  profile->devices[next].type = DS_DEVICE_NATIVE_CPU;
214  profile->version = version;
215 
216  *p = profile;
217  return status;
218 }
219 
220 static ds_status profileDevices(ds_profile* profile,
221  const ds_evaluation_type type,
222  ds_perf_evaluator evaluator,
223  void* evaluatorData, unsigned int* numUpdates) {
224  ds_status status = DS_SUCCESS;
225  unsigned int i;
226  unsigned int updates = 0;
227 
228  if (profile == nullptr) {
229  return DS_INVALID_PROFILE;
230  }
231  if (evaluator == nullptr) {
232  return DS_INVALID_PERF_EVALUATOR;
233  }
234 
235  for (i = 0; i < profile->numDevices; i++) {
236  ds_status evaluatorStatus;
237 
238  switch (type) {
239  case DS_EVALUATE_NEW_ONLY:
240  if (profile->devices[i].score != nullptr) break;
241  // else fall through
242  case DS_EVALUATE_ALL:
243  evaluatorStatus = evaluator(&profile->devices[i], evaluatorData);
244  if (evaluatorStatus != DS_SUCCESS) {
245  status = evaluatorStatus;
246  return status;
247  }
248  updates++;
249  break;
250  default:
251  return DS_INVALID_PERF_EVALUATOR_TYPE;
252  break;
253  };
254  }
255  if (numUpdates) *numUpdates = updates;
256  return status;
257 }
258 
259 static const char* findString(const char* contentStart, const char* contentEnd,
260  const char* string) {
261  size_t stringLength;
262  const char* currentPosition;
263  const char* found = nullptr;
264  stringLength = strlen(string);
265  currentPosition = contentStart;
266  for (currentPosition = contentStart; currentPosition < contentEnd;
267  currentPosition++) {
268  if (*currentPosition == string[0]) {
269  if (currentPosition + stringLength < contentEnd) {
270  if (strncmp(currentPosition, string, stringLength) == 0) {
271  found = currentPosition;
272  break;
273  }
274  }
275  }
276  }
277  return found;
278 }
279 
280 static ds_status readProFile(const char* fileName, char** content,
281  size_t* contentSize) {
282  *contentSize = 0;
283  *content = nullptr;
284  ds_status status = DS_SUCCESS;
285  FILE* input = fopen(fileName, "rb");
286  if (input == nullptr) {
287  status = DS_FILE_ERROR;
288  } else {
289  fseek(input, 0L, SEEK_END);
290  auto pos = std::ftell(input);
291  rewind(input);
292  if (pos > 0) {
293  size_t size = pos;
294  char *binary = new char[size];
295  if (fread(binary, sizeof(char), size, input) != size) {
296  status = DS_FILE_ERROR;
297  delete[] binary;
298  } else {
299  *contentSize = size;
300  *content = binary;
301  }
302  }
303  fclose(input);
304  }
305  return status;
306 }
307 
308 typedef ds_status (*ds_score_deserializer)(ds_device* device,
309  const uint8_t* serializedScore,
310  unsigned int serializedScoreSize);
311 
312 static ds_status readProfileFromFile(ds_profile* profile,
313  ds_score_deserializer deserializer,
314  const char* file) {
315  ds_status status = DS_SUCCESS;
316  char* contentStart;
317  size_t contentSize;
318 
319  if (profile == nullptr) return DS_INVALID_PROFILE;
320 
321  status = readProFile(file, &contentStart, &contentSize);
322  if (status == DS_SUCCESS) {
323  const char* currentPosition;
324  const char* dataStart;
325  const char* dataEnd;
326 
327  const char* contentEnd = contentStart + contentSize;
328  currentPosition = contentStart;
329 
330  // parse the version string
331  dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION);
332  if (dataStart == nullptr) {
333  status = DS_PROFILE_FILE_ERROR;
334  goto cleanup;
335  }
336  dataStart += strlen(DS_TAG_VERSION);
337 
338  dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END);
339  if (dataEnd == nullptr) {
340  status = DS_PROFILE_FILE_ERROR;
341  goto cleanup;
342  }
343 
344  size_t versionStringLength = strlen(profile->version);
345  if (versionStringLength + dataStart != dataEnd ||
346  strncmp(profile->version, dataStart, versionStringLength) != 0) {
347  // version mismatch
348  status = DS_PROFILE_FILE_ERROR;
349  goto cleanup;
350  }
351  currentPosition = dataEnd + strlen(DS_TAG_VERSION_END);
352 
353  // parse the device information
354  while (1) {
355  unsigned int i;
356 
357  const char* deviceTypeStart;
358  const char* deviceTypeEnd;
359  ds_device_type deviceType;
360 
361  const char* deviceNameStart;
362  const char* deviceNameEnd;
363 
364  const char* deviceScoreStart;
365  const char* deviceScoreEnd;
366 
367  const char* deviceDriverStart;
368  const char* deviceDriverEnd;
369 
370  dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE);
371  if (dataStart == nullptr) {
372  // nothing useful remain, quit...
373  break;
374  }
375  dataStart += strlen(DS_TAG_DEVICE);
376  dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END);
377  if (dataEnd == nullptr) {
378  status = DS_PROFILE_FILE_ERROR;
379  goto cleanup;
380  }
381 
382  // parse the device type
383  deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE);
384  if (deviceTypeStart == nullptr) {
385  status = DS_PROFILE_FILE_ERROR;
386  goto cleanup;
387  }
388  deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE);
389  deviceTypeEnd =
390  findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END);
391  if (deviceTypeEnd == nullptr) {
392  status = DS_PROFILE_FILE_ERROR;
393  goto cleanup;
394  }
395  memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type));
396 
397  // parse the device name
398  if (deviceType == DS_DEVICE_OPENCL_DEVICE) {
399  deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME);
400  if (deviceNameStart == nullptr) {
401  status = DS_PROFILE_FILE_ERROR;
402  goto cleanup;
403  }
404  deviceNameStart += strlen(DS_TAG_DEVICE_NAME);
405  deviceNameEnd =
406  findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END);
407  if (deviceNameEnd == nullptr) {
408  status = DS_PROFILE_FILE_ERROR;
409  goto cleanup;
410  }
411 
412  deviceDriverStart =
413  findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION);
414  if (deviceDriverStart == nullptr) {
415  status = DS_PROFILE_FILE_ERROR;
416  goto cleanup;
417  }
418  deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION);
419  deviceDriverEnd = findString(deviceDriverStart, contentEnd,
420  DS_TAG_DEVICE_DRIVER_VERSION_END);
421  if (deviceDriverEnd == nullptr) {
422  status = DS_PROFILE_FILE_ERROR;
423  goto cleanup;
424  }
425 
426  // check if this device is on the system
427  for (i = 0; i < profile->numDevices; i++) {
428  if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) {
429  size_t actualDeviceNameLength;
430  size_t driverVersionLength;
431 
432  actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName);
433  driverVersionLength = strlen(profile->devices[i].oclDriverVersion);
434  if (deviceNameStart + actualDeviceNameLength == deviceNameEnd &&
435  deviceDriverStart + driverVersionLength == deviceDriverEnd &&
436  strncmp(profile->devices[i].oclDeviceName, deviceNameStart,
437  actualDeviceNameLength) == 0 &&
438  strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart,
439  driverVersionLength) == 0) {
440  deviceScoreStart =
441  findString(dataStart, contentEnd, DS_TAG_SCORE);
442  deviceScoreStart += strlen(DS_TAG_SCORE);
443  deviceScoreEnd =
444  findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
445  status = deserializer(&profile->devices[i],
446  (const unsigned char*)deviceScoreStart,
447  deviceScoreEnd - deviceScoreStart);
448  if (status != DS_SUCCESS) {
449  goto cleanup;
450  }
451  }
452  }
453  }
454  } else if (deviceType == DS_DEVICE_NATIVE_CPU) {
455  for (i = 0; i < profile->numDevices; i++) {
456  if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) {
457  deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
458  if (deviceScoreStart == nullptr) {
459  status = DS_PROFILE_FILE_ERROR;
460  goto cleanup;
461  }
462  deviceScoreStart += strlen(DS_TAG_SCORE);
463  deviceScoreEnd =
464  findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
465  status = deserializer(&profile->devices[i],
466  (const unsigned char*)deviceScoreStart,
467  deviceScoreEnd - deviceScoreStart);
468  if (status != DS_SUCCESS) {
469  goto cleanup;
470  }
471  }
472  }
473  }
474 
475  // skip over the current one to find the next device
476  currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END);
477  }
478  }
479 cleanup:
480  delete[] contentStart;
481  return status;
482 }
483 
484 typedef ds_status (*ds_score_serializer)(ds_device* device,
485  uint8_t** serializedScore,
486  unsigned int* serializedScoreSize);
487 static ds_status writeProfileToFile(ds_profile* profile,
488  ds_score_serializer serializer,
489  const char* file) {
490  ds_status status = DS_SUCCESS;
491 
492  if (profile == nullptr) return DS_INVALID_PROFILE;
493 
494  FILE* profileFile = fopen(file, "wb");
495  if (profileFile == nullptr) {
496  status = DS_FILE_ERROR;
497  } else {
498  unsigned int i;
499 
500  // write version string
501  fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile);
502  fwrite(profile->version, sizeof(char), strlen(profile->version),
503  profileFile);
504  fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END),
505  profileFile);
506  fwrite("\n", sizeof(char), 1, profileFile);
507 
508  for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) {
509  uint8_t* serializedScore;
510  unsigned int serializedScoreSize;
511 
512  fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile);
513 
514  fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE),
515  profileFile);
516  fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile);
517  fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char),
518  strlen(DS_TAG_DEVICE_TYPE_END), profileFile);
519 
520  switch (profile->devices[i].type) {
521  case DS_DEVICE_NATIVE_CPU: {
522  // There's no need to emit a device name for the native CPU device.
523  /*
524  fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME),
525  profileFile);
526  fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char),
527  strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile);
528  fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char),
529  strlen(DS_TAG_DEVICE_NAME_END), profileFile);
530  */
531  } break;
532  case DS_DEVICE_OPENCL_DEVICE: {
533  fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME),
534  profileFile);
535  fwrite(profile->devices[i].oclDeviceName, sizeof(char),
536  strlen(profile->devices[i].oclDeviceName), profileFile);
537  fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char),
538  strlen(DS_TAG_DEVICE_NAME_END), profileFile);
539 
540  fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char),
541  strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile);
542  fwrite(profile->devices[i].oclDriverVersion, sizeof(char),
543  strlen(profile->devices[i].oclDriverVersion), profileFile);
544  fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char),
545  strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile);
546  } break;
547  default:
548  status = DS_UNKNOWN_DEVICE_TYPE;
549  continue;
550  };
551 
552  fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile);
553  status = serializer(&profile->devices[i], &serializedScore,
554  &serializedScoreSize);
555  if (status == DS_SUCCESS && serializedScore != nullptr &&
556  serializedScoreSize > 0) {
557  fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile);
558  delete[] serializedScore;
559  }
560  fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END),
561  profileFile);
562  fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END),
563  profileFile);
564  fwrite("\n", sizeof(char), 1, profileFile);
565  }
566  fclose(profileFile);
567  }
568  return status;
569 }
570 
571 // substitute invalid characters in device name with _
572 static void legalizeFileName(char* fileName) {
573  // tprintf("fileName: %s\n", fileName);
574  const char* invalidChars =
575  "/\?:*\"><| "; // space is valid but can cause headaches
576  // for each invalid char
577  for (unsigned i = 0; i < strlen(invalidChars); i++) {
578  char invalidStr[4];
579  invalidStr[0] = invalidChars[i];
580  invalidStr[1] = '\0';
581  // tprintf("eliminating %s\n", invalidStr);
582  // char *pos = strstr(fileName, invalidStr);
583  // initial ./ is valid for present directory
584  // if (*pos == '.') pos++;
585  // if (*pos == '/') pos++;
586  for (char* pos = strstr(fileName, invalidStr); pos != nullptr;
587  pos = strstr(pos + 1, invalidStr)) {
588  // tprintf("\tfound: %s, ", pos);
589  pos[0] = '_';
590  // tprintf("fileName: %s\n", fileName);
591  }
592  }
593 }
594 
595 static void populateGPUEnvFromDevice(GPUEnv* gpuInfo, cl_device_id device) {
596  // tprintf("[DS] populateGPUEnvFromDevice\n");
597  size_t size;
598  gpuInfo->mnIsUserCreated = 1;
599  // device
600  gpuInfo->mpDevID = device;
601  gpuInfo->mpArryDevsID = new cl_device_id[1];
602  gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
603  clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
604  sizeof(cl_device_type), &gpuInfo->mDevType, &size);
605  CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(TYPE)");
606  // platform
607  clStatus =
608  clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
609  sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
610  CHECK_OPENCL(clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)");
611  // context
612  cl_context_properties props[3];
613  props[0] = CL_CONTEXT_PLATFORM;
614  props[1] = (cl_context_properties)gpuInfo->mpPlatformID;
615  props[2] = 0;
616  gpuInfo->mpContext =
617  clCreateContext(props, 1, &gpuInfo->mpDevID, nullptr, nullptr, &clStatus);
618  CHECK_OPENCL(clStatus, "populateGPUEnv::createContext");
619  // queue
620  cl_command_queue_properties queueProperties = 0;
621  gpuInfo->mpCmdQueue = clCreateCommandQueue(
622  gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus);
623  CHECK_OPENCL(clStatus, "populateGPUEnv::createCommandQueue");
624 }
625 
626 int OpenclDevice::LoadOpencl() {
627 #ifdef WIN32
628  HINSTANCE HOpenclDll = nullptr;
629  void* OpenclDll = nullptr;
630  // fprintf(stderr, " LoadOpenclDllxx... \n");
631  OpenclDll = static_cast<HINSTANCE>(HOpenclDll);
632  OpenclDll = LoadLibrary("openCL.dll");
633  if (!static_cast<HINSTANCE>(OpenclDll)) {
634  fprintf(stderr, "[OD] Load opencl.dll failed!\n");
635  FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
636  return 0;
637  }
638  fprintf(stderr, "[OD] Load opencl.dll successful!\n");
639 #endif
640  return 1;
641 }
642 int OpenclDevice::SetKernelEnv(KernelEnv* envInfo) {
643  envInfo->mpkContext = gpuEnv.mpContext;
644  envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
645  envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
646 
647  return 1;
648 }
649 
650 static cl_mem allocateZeroCopyBuffer(const KernelEnv& rEnv,
651  l_uint32* hostbuffer, size_t nElements,
652  cl_mem_flags flags, cl_int* pStatus) {
653  cl_mem membuffer =
654  clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(flags),
655  nElements * sizeof(l_uint32), hostbuffer, pStatus);
656 
657  return membuffer;
658 }
659 
660 static Pix* mapOutputCLBuffer(const KernelEnv& rEnv, cl_mem clbuffer, Pix* pixd,
661  Pix* pixs, int elements, cl_mem_flags flags,
662  bool memcopy = false, bool sync = true) {
663  if (!pixd) {
664  if (memcopy) {
665  if ((pixd = pixCreateTemplate(pixs)) == nullptr)
666  tprintf("pixd not made\n");
667  } else {
668  if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs),
669  pixGetDepth(pixs))) == nullptr)
670  tprintf("pixd not made\n");
671  }
672  }
673  l_uint32* pValues = (l_uint32*)clEnqueueMapBuffer(
674  rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
675  elements * sizeof(l_uint32), 0, nullptr, nullptr, nullptr);
676 
677  if (memcopy) {
678  memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32));
679  } else {
680  pixSetData(pixd, pValues);
681  }
682 
683  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, nullptr,
684  nullptr);
685 
686  if (sync) {
687  clFinish(rEnv.mpkCmdQueue);
688  }
689 
690  return pixd;
691 }
692 
693 void OpenclDevice::releaseMorphCLBuffers() {
694  if (pixdCLIntermediate != nullptr) clReleaseMemObject(pixdCLIntermediate);
695  if (pixsCLBuffer != nullptr) clReleaseMemObject(pixsCLBuffer);
696  if (pixdCLBuffer != nullptr) clReleaseMemObject(pixdCLBuffer);
697  if (pixThBuffer != nullptr) clReleaseMemObject(pixThBuffer);
698  pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr;
699 }
700 
701 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs) {
702  SetKernelEnv(&rEnv);
703 
704  if (pixThBuffer != nullptr) {
705  pixsCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
706  CL_MEM_ALLOC_HOST_PTR, &clStatus);
707 
708  // Get the output from ThresholdToPix operation
709  clStatus =
710  clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
711  sizeof(l_uint32) * wpl * h, 0, nullptr, nullptr);
712  } else {
713  // Get data from the source image
714  l_uint32* srcdata =
715  reinterpret_cast<l_uint32*>(malloc(wpl * h * sizeof(l_uint32)));
716  memcpy(srcdata, pixGetData(pixs), wpl * h * sizeof(l_uint32));
717 
718  pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl * h,
719  CL_MEM_USE_HOST_PTR, &clStatus);
720  }
721 
722  pixdCLBuffer = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
723  CL_MEM_ALLOC_HOST_PTR, &clStatus);
724 
725  pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, nullptr, wpl * h,
726  CL_MEM_ALLOC_HOST_PTR, &clStatus);
727 
728  return (int)clStatus;
729 }
730 
731 int OpenclDevice::InitEnv() {
732 // tprintf("[OD] OpenclDevice::InitEnv()\n");
733 #ifdef SAL_WIN32
734  while (1) {
735  if (1 == LoadOpencl()) break;
736  }
737 #endif
738  // sets up environment, compiles programs
739 
740  InitOpenclRunEnv_DeviceSelection(0);
741  return 1;
742 }
743 
744 int OpenclDevice::ReleaseOpenclRunEnv() {
745  ReleaseOpenclEnv(&gpuEnv);
746 #ifdef SAL_WIN32
747  FreeOpenclDll();
748 #endif
749  return 1;
750 }
751 
752 inline int OpenclDevice::AddKernelConfig(int kCount, const char* kName) {
753  ASSERT_HOST(kCount > 0);
754  ASSERT_HOST(strlen(kName) < sizeof(gpuEnv.mArrykernelNames[kCount - 1]));
755  strcpy(gpuEnv.mArrykernelNames[kCount - 1], kName);
756  gpuEnv.mnKernelCount++;
757  return 0;
758 }
759 
760 int OpenclDevice::RegistOpenclKernel() {
761  if (!gpuEnv.mnIsUserCreated) memset(&gpuEnv, 0, sizeof(gpuEnv));
762 
763  gpuEnv.mnFileCount = 0; // argc;
764  gpuEnv.mnKernelCount = 0UL;
765 
766  AddKernelConfig(1, "oclAverageSub1");
767  return 0;
768 }
769 
770 int OpenclDevice::InitOpenclRunEnv_DeviceSelection(int argc) {
771  if (!isInited) {
772  // after programs compiled, selects best device
773  ds_device bestDevice_DS = getDeviceSelection();
774  cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
775  // overwrite global static GPUEnv with new device
776  if (selectedDeviceIsOpenCL()) {
777  // tprintf("[DS] InitOpenclRunEnv_DS::Calling populateGPUEnvFromDevice()
778  // for selected device\n");
779  populateGPUEnvFromDevice(&gpuEnv, bestDevice);
780  gpuEnv.mnFileCount = 0; // argc;
781  gpuEnv.mnKernelCount = 0UL;
782  CompileKernelFile(&gpuEnv, "");
783  } else {
784  // tprintf("[DS] InitOpenclRunEnv_DS::Skipping populateGPUEnvFromDevice()
785  // b/c native cpu selected\n");
786  }
787  isInited = 1;
788  }
789  return 0;
790 }
791 
792 OpenclDevice::OpenclDevice() {
793  // InitEnv();
794 }
795 
796 OpenclDevice::~OpenclDevice() {
797  // ReleaseOpenclRunEnv();
798 }
799 
800 int OpenclDevice::ReleaseOpenclEnv(GPUEnv* gpuInfo) {
801  int i = 0;
802  int clStatus = 0;
803 
804  if (!isInited) {
805  return 1;
806  }
807 
808  for (i = 0; i < gpuEnv.mnFileCount; i++) {
809  if (gpuEnv.mpArryPrograms[i]) {
810  clStatus = clReleaseProgram(gpuEnv.mpArryPrograms[i]);
811  CHECK_OPENCL(clStatus, "clReleaseProgram");
812  gpuEnv.mpArryPrograms[i] = nullptr;
813  }
814  }
815  if (gpuEnv.mpCmdQueue) {
816  clReleaseCommandQueue(gpuEnv.mpCmdQueue);
817  gpuEnv.mpCmdQueue = nullptr;
818  }
819  if (gpuEnv.mpContext) {
820  clReleaseContext(gpuEnv.mpContext);
821  gpuEnv.mpContext = nullptr;
822  }
823  isInited = 0;
824  gpuInfo->mnIsUserCreated = 0;
825  delete[] gpuInfo->mpArryDevsID;
826  return 1;
827 }
828 int OpenclDevice::BinaryGenerated(const char* clFileName, FILE** fhandle) {
829  unsigned int i = 0;
830  cl_int clStatus;
831  int status = 0;
832  FILE* fd = nullptr;
833  char fileName[256] = {0}, cl_name[128] = {0};
834  char deviceName[1024];
835  clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
836  sizeof(deviceName), deviceName, nullptr);
837  CHECK_OPENCL(clStatus, "clGetDeviceInfo");
838  const char* str = strstr(clFileName, ".cl");
839  memcpy(cl_name, clFileName, str - clFileName);
840  cl_name[str - clFileName] = '\0';
841  sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
842  legalizeFileName(fileName);
843  fd = fopen(fileName, "rb");
844  status = (fd != nullptr) ? 1 : 0;
845  if (fd != nullptr) {
846  *fhandle = fd;
847  }
848  return status;
849 }
850 int OpenclDevice::CachedOfKernerPrg(const GPUEnv* gpuEnvCached,
851  const char* clFileName) {
852  int i;
853  for (i = 0; i < gpuEnvCached->mnFileCount; i++) {
854  if (strcasecmp(gpuEnvCached->mArryKnelSrcFile[i], clFileName) == 0) {
855  if (gpuEnvCached->mpArryPrograms[i] != nullptr) {
856  return 1;
857  }
858  }
859  }
860 
861  return 0;
862 }
863 int OpenclDevice::WriteBinaryToFile(const char* fileName, const char* birary,
864  size_t numBytes) {
865  FILE* output = nullptr;
866  output = fopen(fileName, "wb");
867  if (output == nullptr) {
868  return 0;
869  }
870 
871  fwrite(birary, sizeof(char), numBytes, output);
872  fclose(output);
873 
874  return 1;
875 }
876 
877 int OpenclDevice::GeneratBinFromKernelSource(cl_program program,
878  const char* clFileName) {
879  unsigned int i = 0;
880  cl_int clStatus;
881  cl_uint numDevices;
882 
883  clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
884  sizeof(numDevices), &numDevices, nullptr);
885  CHECK_OPENCL(clStatus, "clGetProgramInfo");
886 
887  std::vector<cl_device_id> mpArryDevsID(numDevices);
888 
889  /* grab the handles to all of the devices in the program. */
890  clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
891  sizeof(cl_device_id) * numDevices,
892  &mpArryDevsID[0],
893  nullptr);
894  CHECK_OPENCL(clStatus, "clGetProgramInfo");
895 
896  /* figure out the sizes of each of the binaries. */
897  std::vector<size_t> binarySizes(numDevices);
898 
899  clStatus =
900  clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
901  sizeof(size_t) * numDevices, &binarySizes[0], nullptr);
902  CHECK_OPENCL(clStatus, "clGetProgramInfo");
903 
904  /* copy over all of the generated binaries. */
905  std::vector<char*> binaries(numDevices);
906 
907  for (i = 0; i < numDevices; i++) {
908  if (binarySizes[i] != 0) {
909  binaries[i] = new char[binarySizes[i]];
910  } else {
911  binaries[i] = nullptr;
912  }
913  }
914 
915  clStatus =
916  clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char*) * numDevices,
917  &binaries[0], nullptr);
918  CHECK_OPENCL(clStatus, "clGetProgramInfo");
919 
920  /* dump out each binary into its own separate file. */
921  for (i = 0; i < numDevices; i++) {
922  char fileName[256] = {0}, cl_name[128] = {0};
923 
924  if (binarySizes[i] != 0) {
925  char deviceName[1024];
926  clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
927  sizeof(deviceName), deviceName, nullptr);
928  CHECK_OPENCL(clStatus, "clGetDeviceInfo");
929 
930  const char* str = strstr(clFileName, ".cl");
931  memcpy(cl_name, clFileName, str - clFileName);
932  cl_name[str - clFileName] = '\0';
933  sprintf(fileName, "%s-%s.bin", cl_name, deviceName);
934  legalizeFileName(fileName);
935  if (!WriteBinaryToFile(fileName, binaries[i], binarySizes[i])) {
936  tprintf("[OD] write binary[%s] failed\n", fileName);
937  return 0;
938  } // else
939  tprintf("[OD] write binary[%s] successfully\n", fileName);
940  }
941  }
942 
943  // Release all resources and memory
944  for (i = 0; i < numDevices; i++) {
945  delete[] binaries[i];
946  }
947 
948  return 1;
949 }
950 
951 int OpenclDevice::CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption) {
952  cl_int clStatus = 0;
953  const char* source;
954  size_t source_size[1];
955  int binary_status, binaryExisted, idx;
956  cl_uint numDevices;
957  FILE *fd, *fd1;
958  const char* filename = "kernel.cl";
959  // fprintf(stderr, "[OD] CompileKernelFile ... \n");
960  if (CachedOfKernerPrg(gpuInfo, filename) == 1) {
961  return 1;
962  }
963 
964  idx = gpuInfo->mnFileCount;
965 
966  source = kernel_src;
967 
968  source_size[0] = strlen(source);
969  binaryExisted = 0;
970  binaryExisted = BinaryGenerated(
971  filename, &fd); // don't check for binary during microbenchmark
972  if (binaryExisted == 1) {
973  clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
974  sizeof(numDevices), &numDevices, nullptr);
975  CHECK_OPENCL(clStatus, "clGetContextInfo");
976 
977  std::vector<cl_device_id> mpArryDevsID(numDevices);
978  bool b_error = fseek(fd, 0, SEEK_END) < 0;
979  auto pos = std::ftell(fd);
980  b_error |= (pos <= 0);
981  size_t length = pos;
982  b_error |= fseek(fd, 0, SEEK_SET) < 0;
983  if (b_error) {
984  fclose(fd);
985  return 0;
986  }
987 
988  std::vector<uint8_t> binary(length + 2);
989 
990  memset(&binary[0], 0, length + 2);
991  b_error |= fread(&binary[0], 1, length, fd) != length;
992 
993  fclose(fd);
994  fd = nullptr;
995  // grab the handles to all of the devices in the context.
996  clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
997  sizeof(cl_device_id) * numDevices,
998  &mpArryDevsID[0], nullptr);
999  CHECK_OPENCL(clStatus, "clGetContextInfo");
1000  // fprintf(stderr, "[OD] Create kernel from binary\n");
1001  const uint8_t* c_binary = &binary[0];
1002  gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary(
1003  gpuInfo->mpContext, numDevices, &mpArryDevsID[0], &length, &c_binary,
1004  &binary_status, &clStatus);
1005  CHECK_OPENCL(clStatus, "clCreateProgramWithBinary");
1006  } else {
1007  // create a CL program using the kernel source
1008  // fprintf(stderr, "[OD] Create kernel from source\n");
1009  gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource(
1010  gpuInfo->mpContext, 1, &source, source_size, &clStatus);
1011  CHECK_OPENCL(clStatus, "clCreateProgramWithSource");
1012  }
1013 
1014  if (gpuInfo->mpArryPrograms[idx] == (cl_program) nullptr) {
1015  return 0;
1016  }
1017 
1018  // char options[512];
1019  // create a cl program executable for all the devices specified
1020  // tprintf("[OD] BuildProgram.\n");
1021  if (!gpuInfo->mnIsUserCreated) {
1022  clStatus =
1023  clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
1024  buildOption, nullptr, nullptr);
1025  } else {
1026  clStatus =
1027  clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
1028  buildOption, nullptr, nullptr);
1029  }
1030  if (clStatus != CL_SUCCESS) {
1031  tprintf("BuildProgram error!\n");
1032  size_t length;
1033  if (!gpuInfo->mnIsUserCreated) {
1034  clStatus = clGetProgramBuildInfo(
1035  gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
1036  CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
1037  } else {
1038  clStatus =
1039  clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
1040  CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
1041  }
1042  if (clStatus != CL_SUCCESS) {
1043  tprintf("opencl create build log fail\n");
1044  return 0;
1045  }
1046  std::vector<char> buildLog(length);
1047  if (!gpuInfo->mnIsUserCreated) {
1048  clStatus = clGetProgramBuildInfo(
1049  gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
1050  CL_PROGRAM_BUILD_LOG, length, &buildLog[0], &length);
1051  } else {
1052  clStatus = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
1053  gpuInfo->mpDevID, CL_PROGRAM_BUILD_LOG,
1054  length, &buildLog[0], &length);
1055  }
1056  if (clStatus != CL_SUCCESS) {
1057  tprintf("opencl program build info fail\n");
1058  return 0;
1059  }
1060 
1061  fd1 = fopen("kernel-build.log", "w+");
1062  if (fd1 != nullptr) {
1063  fwrite(&buildLog[0], sizeof(char), length, fd1);
1064  fclose(fd1);
1065  }
1066 
1067  return 0;
1068  }
1069 
1070  strcpy(gpuInfo->mArryKnelSrcFile[idx], filename);
1071  if (binaryExisted == 0) {
1072  GeneratBinFromKernelSource(gpuInfo->mpArryPrograms[idx], filename);
1073  }
1074 
1075  gpuInfo->mnFileCount += 1;
1076  return 1;
1077 }
1078 
1079 l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
1080  l_int32 h, l_int32 wpl,
1081  l_uint32* line) {
1082  cl_int clStatus;
1083  KernelEnv rEnv;
1084  size_t globalThreads[2];
1085  size_t localThreads[2];
1086  int gsize;
1087  cl_mem valuesCl;
1088  cl_mem outputCl;
1089 
1090  // global and local work dimensions for Horizontal pass
1091  gsize = (w + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1092  globalThreads[0] = gsize;
1093  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1094  globalThreads[1] = gsize;
1095  localThreads[0] = GROUPSIZE_X;
1096  localThreads[1] = GROUPSIZE_Y;
1097 
1098  SetKernelEnv(&rEnv);
1099 
1100  l_uint32* pResult = (l_uint32*)malloc(w * h * sizeof(l_uint32));
1101  rEnv.mpkKernel =
1102  clCreateKernel(rEnv.mpkProgram, "composeRGBPixel", &clStatus);
1103  CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel");
1104 
1105  // Allocate input and output OCL buffers
1106  valuesCl = allocateZeroCopyBuffer(
1107  rEnv, tiffdata, w * h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1108  outputCl = allocateZeroCopyBuffer(
1109  rEnv, pResult, w * h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1110 
1111  // Kernel arguments
1112  clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl);
1113  CHECK_OPENCL(clStatus, "clSetKernelArg");
1114  clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w);
1115  CHECK_OPENCL(clStatus, "clSetKernelArg");
1116  clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h);
1117  CHECK_OPENCL(clStatus, "clSetKernelArg");
1118  clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
1119  CHECK_OPENCL(clStatus, "clSetKernelArg");
1120  clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl);
1121  CHECK_OPENCL(clStatus, "clSetKernelArg");
1122 
1123  // Kernel enqueue
1124  clStatus =
1125  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1126  globalThreads, localThreads, 0, nullptr, nullptr);
1127  CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel");
1128 
1129  /* map results back from gpu */
1130  void* ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE,
1131  CL_MAP_READ, 0, w * h * sizeof(l_uint32), 0,
1132  nullptr, nullptr, &clStatus);
1133  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl");
1134  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, nullptr, nullptr);
1135 
1136  // Sync
1137  clFinish(rEnv.mpkCmdQueue);
1138  return pResult;
1139 }
1140 
1141 // Morphology Dilate operation for 5x5 structuring element. Invokes the relevant
1142 // OpenCL kernels
1143 static cl_int pixDilateCL_55(l_int32 wpl, l_int32 h) {
1144  size_t globalThreads[2];
1145  cl_mem pixtemp;
1146  cl_int status;
1147  int gsize;
1148  size_t localThreads[2];
1149 
1150  // Horizontal pass
1151  gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1152  globalThreads[0] = gsize;
1153  globalThreads[1] = GROUPSIZE_HMORY;
1154  localThreads[0] = GROUPSIZE_HMORX;
1155  localThreads[1] = GROUPSIZE_HMORY;
1156 
1157  rEnv.mpkKernel =
1158  clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_5x5", &status);
1159  CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5");
1160 
1161  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1162  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1163  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
1164  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
1165 
1166  status =
1167  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1168  globalThreads, localThreads, 0, nullptr, nullptr);
1169 
1170  // Swap source and dest buffers
1171  pixtemp = pixsCLBuffer;
1172  pixsCLBuffer = pixdCLBuffer;
1173  pixdCLBuffer = pixtemp;
1174 
1175  // Vertical
1176  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1177  globalThreads[0] = gsize;
1178  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1179  globalThreads[1] = gsize;
1180  localThreads[0] = GROUPSIZE_X;
1181  localThreads[1] = GROUPSIZE_Y;
1182 
1183  rEnv.mpkKernel =
1184  clCreateKernel(rEnv.mpkProgram, "morphoDilateVer_5x5", &status);
1185  CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5");
1186 
1187  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1188  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1189  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
1190  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
1191  status =
1192  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1193  globalThreads, localThreads, 0, nullptr, nullptr);
1194 
1195  return status;
1196 }
1197 
1198 // Morphology Erode operation for 5x5 structuring element. Invokes the relevant
1199 // OpenCL kernels
1200 static cl_int pixErodeCL_55(l_int32 wpl, l_int32 h) {
1201  size_t globalThreads[2];
1202  cl_mem pixtemp;
1203  cl_int status;
1204  int gsize;
1205  l_uint32 fwmask, lwmask;
1206  size_t localThreads[2];
1207 
1208  lwmask = lmask32[31 - 2];
1209  fwmask = rmask32[31 - 2];
1210 
1211  // Horizontal pass
1212  gsize = (wpl * h + GROUPSIZE_HMORX - 1) / GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1213  globalThreads[0] = gsize;
1214  globalThreads[1] = GROUPSIZE_HMORY;
1215  localThreads[0] = GROUPSIZE_HMORX;
1216  localThreads[1] = GROUPSIZE_HMORY;
1217 
1218  rEnv.mpkKernel =
1219  clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_5x5", &status);
1220  CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5");
1221 
1222  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1223  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1224  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
1225  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
1226 
1227  status =
1228  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1229  globalThreads, localThreads, 0, nullptr, nullptr);
1230 
1231  // Swap source and dest buffers
1232  pixtemp = pixsCLBuffer;
1233  pixsCLBuffer = pixdCLBuffer;
1234  pixdCLBuffer = pixtemp;
1235 
1236  // Vertical
1237  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1238  globalThreads[0] = gsize;
1239  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1240  globalThreads[1] = gsize;
1241  localThreads[0] = GROUPSIZE_X;
1242  localThreads[1] = GROUPSIZE_Y;
1243 
1244  rEnv.mpkKernel =
1245  clCreateKernel(rEnv.mpkProgram, "morphoErodeVer_5x5", &status);
1246  CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5");
1247 
1248  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1249  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1250  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
1251  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
1252  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask);
1253  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask);
1254  status =
1255  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1256  globalThreads, localThreads, 0, nullptr, nullptr);
1257 
1258  return status;
1259 }
1260 
1261 // Morphology Dilate operation. Invokes the relevant OpenCL kernels
1262 static cl_int pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl,
1263  l_int32 h) {
1264  l_int32 xp, yp, xn, yn;
1265  SEL* sel;
1266  size_t globalThreads[2];
1267  cl_mem pixtemp;
1268  cl_int status = 0;
1269  int gsize;
1270  size_t localThreads[2];
1271  char isEven;
1272 
1273  OpenclDevice::SetKernelEnv(&rEnv);
1274 
1275  if (hsize == 5 && vsize == 5) {
1276  // Specific case for 5x5
1277  status = pixDilateCL_55(wpl, h);
1278  return status;
1279  }
1280 
1281  sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1282 
1283  selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1284  selDestroy(&sel);
1285  // global and local work dimensions for Horizontal pass
1286  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1287  globalThreads[0] = gsize;
1288  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1289  globalThreads[1] = gsize;
1290  localThreads[0] = GROUPSIZE_X;
1291  localThreads[1] = GROUPSIZE_Y;
1292 
1293  if (xp > 31 || xn > 31) {
1294  // Generic case.
1295  rEnv.mpkKernel =
1296  clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status);
1297  CHECK_OPENCL(status, "clCreateKernel morphoDilateHor");
1298 
1299  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1300  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1301  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
1302  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
1303  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
1304  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
1305  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1306  nullptr, globalThreads, localThreads, 0,
1307  nullptr, nullptr);
1308 
1309  if (yp > 0 || yn > 0) {
1310  pixtemp = pixsCLBuffer;
1311  pixsCLBuffer = pixdCLBuffer;
1312  pixdCLBuffer = pixtemp;
1313  }
1314  } else if (xp > 0 || xn > 0) {
1315  // Specific Horizontal pass kernel for half width < 32
1316  rEnv.mpkKernel =
1317  clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status);
1318  CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word");
1319  isEven = (xp != xn);
1320 
1321  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1322  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1323  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
1324  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
1325  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
1326  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven);
1327  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1328  nullptr, globalThreads, localThreads, 0,
1329  nullptr, nullptr);
1330 
1331  if (yp > 0 || yn > 0) {
1332  pixtemp = pixsCLBuffer;
1333  pixsCLBuffer = pixdCLBuffer;
1334  pixdCLBuffer = pixtemp;
1335  }
1336  }
1337 
1338  if (yp > 0 || yn > 0) {
1339  rEnv.mpkKernel =
1340  clCreateKernel(rEnv.mpkProgram, "morphoDilateVer", &status);
1341  CHECK_OPENCL(status, "clCreateKernel morphoDilateVer");
1342 
1343  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1344  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1345  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
1346  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
1347  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
1348  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn);
1349  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1350  nullptr, globalThreads, localThreads, 0,
1351  nullptr, nullptr);
1352  }
1353 
1354  return status;
1355 }
1356 
1357 // Morphology Erode operation. Invokes the relevant OpenCL kernels
1358 static cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl,
1359  l_uint32 h) {
1360  l_int32 xp, yp, xn, yn;
1361  SEL* sel;
1362  size_t globalThreads[2];
1363  size_t localThreads[2];
1364  cl_mem pixtemp;
1365  cl_int status = 0;
1366  int gsize;
1367  char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
1368  l_uint32 rwmask, lwmask;
1369  char isEven;
1370 
1371  sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1372 
1373  selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1374  selDestroy(&sel);
1375  OpenclDevice::SetKernelEnv(&rEnv);
1376 
1377  if (hsize == 5 && vsize == 5 && isAsymmetric) {
1378  // Specific kernel for 5x5
1379  status = pixErodeCL_55(wpl, h);
1380  return status;
1381  }
1382 
1383  lwmask = lmask32[31 - (xn & 31)];
1384  rwmask = rmask32[31 - (xp & 31)];
1385 
1386  // global and local work dimensions for Horizontal pass
1387  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1388  globalThreads[0] = gsize;
1389  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1390  globalThreads[1] = gsize;
1391  localThreads[0] = GROUPSIZE_X;
1392  localThreads[1] = GROUPSIZE_Y;
1393 
1394  // Horizontal Pass
1395  if (xp > 31 || xn > 31) {
1396  // Generic case.
1397  rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status);
1398 
1399  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1400  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1401  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
1402  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn);
1403  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl);
1404  status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h);
1405  status =
1406  clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), &isAsymmetric);
1407  status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), &rwmask);
1408  status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), &lwmask);
1409  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1410  nullptr, globalThreads, localThreads, 0,
1411  nullptr, nullptr);
1412 
1413  if (yp > 0 || yn > 0) {
1414  pixtemp = pixsCLBuffer;
1415  pixsCLBuffer = pixdCLBuffer;
1416  pixdCLBuffer = pixtemp;
1417  }
1418  } else if (xp > 0 || xn > 0) {
1419  rEnv.mpkKernel =
1420  clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status);
1421  isEven = (xp != xn);
1422 
1423  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1424  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1425  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp);
1426  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
1427  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
1428  status =
1429  clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric);
1430  status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), &rwmask);
1431  status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), &lwmask);
1432  status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), &isEven);
1433  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1434  nullptr, globalThreads, localThreads, 0,
1435  nullptr, nullptr);
1436 
1437  if (yp > 0 || yn > 0) {
1438  pixtemp = pixsCLBuffer;
1439  pixsCLBuffer = pixdCLBuffer;
1440  pixdCLBuffer = pixtemp;
1441  }
1442  }
1443 
1444  // Vertical Pass
1445  if (yp > 0 || yn > 0) {
1446  rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status);
1447  CHECK_OPENCL(status, "clCreateKernel morphoErodeVer");
1448 
1449  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer);
1450  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer);
1451  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp);
1452  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
1453  status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h);
1454  status =
1455  clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric);
1456  status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn);
1457  status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1458  nullptr, globalThreads, localThreads, 0,
1459  nullptr, nullptr);
1460  }
1461 
1462  return status;
1463 }
1464 
1465 // Morphology Open operation. Invokes the relevant OpenCL kernels
1466 static cl_int pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) {
1467  cl_int status;
1468  cl_mem pixtemp;
1469 
1470  // Erode followed by Dilate
1471  status = pixErodeCL(hsize, vsize, wpl, h);
1472 
1473  pixtemp = pixsCLBuffer;
1474  pixsCLBuffer = pixdCLBuffer;
1475  pixdCLBuffer = pixtemp;
1476 
1477  status = pixDilateCL(hsize, vsize, wpl, h);
1478 
1479  return status;
1480 }
1481 
1482 // Morphology Close operation. Invokes the relevant OpenCL kernels
1483 static cl_int pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) {
1484  cl_int status;
1485  cl_mem pixtemp;
1486 
1487  // Dilate followed by Erode
1488  status = pixDilateCL(hsize, vsize, wpl, h);
1489 
1490  pixtemp = pixsCLBuffer;
1491  pixsCLBuffer = pixdCLBuffer;
1492  pixdCLBuffer = pixtemp;
1493 
1494  status = pixErodeCL(hsize, vsize, wpl, h);
1495 
1496  return status;
1497 }
1498 
1499 // output = buffer1 & ~(buffer2)
1500 static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
1501  cl_mem buffer2) {
1502  cl_int status;
1503  size_t globalThreads[2];
1504  int gsize;
1505  size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
1506 
1507  gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1508  globalThreads[0] = gsize;
1509  gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1510  globalThreads[1] = gsize;
1511 
1512  rEnv.mpkKernel =
1513  clCreateKernel(rEnv.mpkProgram, "pixSubtract_inplace", &status);
1514  CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace");
1515 
1516  // Enqueue a kernel run call.
1517  status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &buffer1);
1518  status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &buffer2);
1519  status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl);
1520  status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h);
1521  status =
1522  clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, nullptr,
1523  globalThreads, localThreads, 0, nullptr, nullptr);
1524 
1525  return status;
1526 }
1527 
1528 // OpenCL implementation of Get Lines from pix function
1529 // Note: Assumes the source and dest opencl buffer are initialized. No check
1530 // done
1531 void OpenclDevice::pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
1532  Pix** pix_hline, Pix** pixClosed,
1533  bool getpixClosed, l_int32 close_hsize,
1534  l_int32 close_vsize, l_int32 open_hsize,
1535  l_int32 open_vsize, l_int32 line_hsize,
1536  l_int32 line_vsize) {
1537  l_uint32 wpl, h;
1538  cl_mem pixtemp;
1539 
1540  wpl = pixGetWpl(pixs);
1541  h = pixGetHeight(pixs);
1542 
1543  // First step : Close Morph operation: Dilate followed by Erode
1544  clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
1545 
1546  // Copy the Close output to CPU buffer
1547  if (getpixClosed) {
1548  *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs,
1549  wpl * h, CL_MAP_READ, true, false);
1550  }
1551 
1552  // Store the output of close operation in an intermediate buffer
1553  // this will be later used for pixsubtract
1554  clStatus =
1555  clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
1556  0, sizeof(int) * wpl * h, 0, nullptr, nullptr);
1557 
1558  // Second step: Open Operation - Erode followed by Dilate
1559  pixtemp = pixsCLBuffer;
1560  pixsCLBuffer = pixdCLBuffer;
1561  pixdCLBuffer = pixtemp;
1562 
1563  clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
1564 
1565  // Third step: Subtract : (Close - Open)
1566  pixtemp = pixsCLBuffer;
1567  pixsCLBuffer = pixdCLBuffer;
1568  pixdCLBuffer = pixdCLIntermediate;
1569  pixdCLIntermediate = pixtemp;
1570 
1571  clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
1572 
1573  // Store the output of Hollow operation in an intermediate buffer
1574  // this will be later used
1575  clStatus =
1576  clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
1577  0, sizeof(int) * wpl * h, 0, nullptr, nullptr);
1578 
1579  pixtemp = pixsCLBuffer;
1580  pixsCLBuffer = pixdCLBuffer;
1581  pixdCLBuffer = pixtemp;
1582 
1583  // Fourth step: Get vertical line
1584  // pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
1585  clStatus = pixOpenCL(1, line_vsize, wpl, h);
1586 
1587  // Copy the vertical line output to CPU buffer
1588  *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h,
1589  CL_MAP_READ, true, false);
1590 
1591  pixtemp = pixsCLBuffer;
1592  pixsCLBuffer = pixdCLIntermediate;
1593  pixdCLIntermediate = pixtemp;
1594 
1595  // Fifth step: Get horizontal line
1596  // pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
1597  clStatus = pixOpenCL(line_hsize, 1, wpl, h);
1598 
1599  // Copy the horizontal line output to CPU buffer
1600  *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h,
1601  CL_MAP_READ, true, true);
1602 
1603  return;
1604 }
1605 
1606 /*************************************************************************
1607  * HistogramRect
1608  * Otsu Thresholding Operations
1609  * histogramAllChannels is laid out as all channel 0, then all channel 1...
1610  * only supports 1 or 4 channels (bytes_per_pixel)
1611  ************************************************************************/
1612 int OpenclDevice::HistogramRectOCL(void* imageData,
1613  int bytes_per_pixel, int bytes_per_line,
1614  int left, // always 0
1615  int top, // always 0
1616  int width, int height, int kHistogramSize,
1617  int* histogramAllChannels) {
1618  cl_int clStatus;
1619  int retVal = 0;
1620  KernelEnv histKern;
1621  SetKernelEnv(&histKern);
1622  KernelEnv histRedKern;
1623  SetKernelEnv(&histRedKern);
1624  /* map imagedata to device as read only */
1625  // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be
1626  // coherent which we don't need.
1627  // faster option would be to allocate initial image buffer
1628  // using a garlic bus memory type
1629  cl_mem imageBuffer = clCreateBuffer(
1630  histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1631  width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus);
1632  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
1633 
1634  /* setup work group size parameters */
1635  int block_size = 256;
1636  cl_uint numCUs;
1637  clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
1638  sizeof(numCUs), &numCUs, nullptr);
1639  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
1640 
1641  int requestedOccupancy = 10;
1642  int numWorkGroups = numCUs * requestedOccupancy;
1643  int numThreads = block_size * numWorkGroups;
1644  size_t local_work_size[] = {static_cast<size_t>(block_size)};
1645  size_t global_work_size[] = {static_cast<size_t>(numThreads)};
1646  size_t red_global_work_size[] = {
1647  static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
1648 
1649  /* map histogramAllChannels as write only */
1650 
1651  cl_mem histogramBuffer = clCreateBuffer(
1652  histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1653  kHistogramSize * bytes_per_pixel * sizeof(int), histogramAllChannels,
1654  &clStatus);
1655  CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer");
1656 
1657  /* intermediate histogram buffer */
1658  int histRed = 256;
1659  int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed;
1660 
1661  cl_mem tmpHistogramBuffer =
1662  clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE,
1663  tmpHistogramBins * sizeof(cl_uint), nullptr, &clStatus);
1664  CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer");
1665 
1666  /* atomic sync buffer */
1667  int* zeroBuffer = new int[1];
1668  zeroBuffer[0] = 0;
1669  cl_mem atomicSyncBuffer = clCreateBuffer(
1670  histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1671  sizeof(cl_int), zeroBuffer, &clStatus);
1672  CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer");
1673  delete[] zeroBuffer;
1674  // Create kernel objects based on bytes_per_pixel
1675  if (bytes_per_pixel == 1) {
1676  histKern.mpkKernel = clCreateKernel(
1677  histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus);
1678  CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel");
1679 
1680  histRedKern.mpkKernel =
1681  clCreateKernel(histRedKern.mpkProgram,
1682  "kernel_HistogramRectOneChannelReduction", &clStatus);
1683  CHECK_OPENCL(clStatus,
1684  "clCreateKernel kernel_HistogramRectOneChannelReduction");
1685  } else {
1686  histKern.mpkKernel = clCreateKernel(
1687  histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus);
1688  CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectAllChannels");
1689 
1690  histRedKern.mpkKernel =
1691  clCreateKernel(histRedKern.mpkProgram,
1692  "kernel_HistogramRectAllChannelsReduction", &clStatus);
1693  CHECK_OPENCL(clStatus,
1694  "clCreateKernel kernel_HistogramRectAllChannelsReduction");
1695  }
1696 
1697  void* ptr;
1698 
1699  // Initialize tmpHistogramBuffer buffer
1700  ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE,
1701  CL_MAP_WRITE, 0, tmpHistogramBins * sizeof(cl_uint),
1702  0, nullptr, nullptr, &clStatus);
1703  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer tmpHistogramBuffer");
1704 
1705  memset(ptr, 0, tmpHistogramBins * sizeof(cl_uint));
1706  clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0,
1707  nullptr, nullptr);
1708 
1709  /* set kernel 1 arguments */
1710  clStatus =
1711  clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer);
1712  CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer");
1713  cl_uint numPixels = width * height;
1714  clStatus = clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels);
1715  CHECK_OPENCL(clStatus, "clSetKernelArg numPixels");
1716  clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem),
1717  &tmpHistogramBuffer);
1718  CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer");
1719 
1720  /* set kernel 2 arguments */
1721  int n = numThreads / bytes_per_pixel;
1722  clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n);
1723  CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer");
1724  clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem),
1725  &tmpHistogramBuffer);
1726  CHECK_OPENCL(clStatus, "clSetKernelArg tmpHistogramBuffer");
1727  clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem),
1728  &histogramBuffer);
1729  CHECK_OPENCL(clStatus, "clSetKernelArg histogramBuffer");
1730 
1731  /* launch histogram */
1732  clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1,
1733  nullptr, global_work_size, local_work_size,
1734  0, nullptr, nullptr);
1735  CHECK_OPENCL(clStatus,
1736  "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels");
1737  clFinish(histKern.mpkCmdQueue);
1738  if (clStatus != 0) {
1739  retVal = -1;
1740  }
1741  /* launch histogram */
1742  clStatus = clEnqueueNDRangeKernel(
1743  histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, nullptr,
1744  red_global_work_size, local_work_size, 0, nullptr, nullptr);
1745  CHECK_OPENCL(
1746  clStatus,
1747  "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction");
1748  clFinish(histRedKern.mpkCmdQueue);
1749  if (clStatus != 0) {
1750  retVal = -1;
1751  }
1752 
1753  /* map results back from gpu */
1754  ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE,
1755  CL_MAP_READ, 0,
1756  kHistogramSize * bytes_per_pixel * sizeof(int), 0,
1757  nullptr, nullptr, &clStatus);
1758  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer");
1759  if (clStatus != 0) {
1760  retVal = -1;
1761  }
1762  clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0,
1763  nullptr, nullptr);
1764 
1765  clReleaseMemObject(histogramBuffer);
1766  clReleaseMemObject(imageBuffer);
1767  return retVal;
1768 }
1769 
1770 /*************************************************************************
1771  * Threshold the rectangle, taking everything except the image buffer pointer
1772  * from the class, using thresholds/hi_values to the output IMAGE.
1773  * only supports 1 or 4 channels
1774  ************************************************************************/
1775 int OpenclDevice::ThresholdRectToPixOCL(unsigned char* imageData,
1776  int bytes_per_pixel, int bytes_per_line,
1777  int* thresholds, int* hi_values,
1778  Pix** pix, int height, int width,
1779  int top, int left) {
1780  int retVal = 0;
1781  /* create pix result buffer */
1782  *pix = pixCreate(width, height, 1);
1783  uint32_t* pixData = pixGetData(*pix);
1784  int wpl = pixGetWpl(*pix);
1785  int pixSize = wpl * height * sizeof(uint32_t); // number of pixels
1786 
1787  cl_int clStatus;
1788  KernelEnv rEnv;
1789  SetKernelEnv(&rEnv);
1790 
1791  /* setup work group size parameters */
1792  int block_size = 256;
1793  cl_uint numCUs = 6;
1794  clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
1795  sizeof(numCUs), &numCUs, nullptr);
1796  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
1797 
1798  int requestedOccupancy = 10;
1799  int numWorkGroups = numCUs * requestedOccupancy;
1800  int numThreads = block_size * numWorkGroups;
1801  size_t local_work_size[] = {(size_t)block_size};
1802  size_t global_work_size[] = {(size_t)numThreads};
1803 
1804  /* map imagedata to device as read only */
1805  // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be
1806  // coherent which we don't need.
1807  // faster option would be to allocate initial image buffer
1808  // using a garlic bus memory type
1809  cl_mem imageBuffer = clCreateBuffer(
1810  rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1811  width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus);
1812  CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
1813 
1814  /* map pix as write only */
1815  pixThBuffer =
1816  clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1817  pixSize, pixData, &clStatus);
1818  CHECK_OPENCL(clStatus, "clCreateBuffer pix");
1819 
1820  /* map thresholds and hi_values */
1821  cl_mem thresholdsBuffer =
1822  clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1823  bytes_per_pixel * sizeof(int), thresholds, &clStatus);
1824  CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer");
1825  cl_mem hiValuesBuffer =
1826  clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1827  bytes_per_pixel * sizeof(int), hi_values, &clStatus);
1828  CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer");
1829 
1830  /* compile kernel */
1831  if (bytes_per_pixel == 4) {
1832  rEnv.mpkKernel =
1833  clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus);
1834  CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix");
1835  } else {
1836  rEnv.mpkKernel = clCreateKernel(
1837  rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus);
1838  CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan");
1839  }
1840 
1841  /* set kernel arguments */
1842  clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer);
1843  CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer");
1844  clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height);
1845  CHECK_OPENCL(clStatus, "clSetKernelArg height");
1846  clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), &width);
1847  CHECK_OPENCL(clStatus, "clSetKernelArg width");
1848  clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl);
1849  CHECK_OPENCL(clStatus, "clSetKernelArg wpl");
1850  clStatus =
1851  clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &thresholdsBuffer);
1852  CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer");
1853  clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), &hiValuesBuffer);
1854  CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer");
1855  clStatus = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer);
1856  CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer");
1857 
1858  /* launch kernel & wait */
1859  clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1,
1860  nullptr, global_work_size, local_work_size,
1861  0, nullptr, nullptr);
1862  CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix");
1863  clFinish(rEnv.mpkCmdQueue);
1864  if (clStatus != 0) {
1865  tprintf("Setting return value to -1\n");
1866  retVal = -1;
1867  }
1868  /* map results back from gpu */
1869  void* ptr =
1870  clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0,
1871  pixSize, 0, nullptr, nullptr, &clStatus);
1872  CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer");
1873  clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, nullptr,
1874  nullptr);
1875 
1876  clReleaseMemObject(imageBuffer);
1877  clReleaseMemObject(thresholdsBuffer);
1878  clReleaseMemObject(hiValuesBuffer);
1879 
1880  return retVal;
1881 }
1882 
1883 /******************************************************************************
1884  * Data Types for Device Selection
1885  *****************************************************************************/
1886 
1887 struct TessScoreEvaluationInputData {
1888  int height;
1889  int width;
1890  int numChannels;
1891  unsigned char* imageData;
1892  Pix* pix;
1893 };
1894 
1895 static void populateTessScoreEvaluationInputData(
1896  TessScoreEvaluationInputData* input) {
1897  srand(1);
1898  // 8.5x11 inches @ 300dpi rounded to clean multiples
1899  int height = 3328; // %256
1900  int width = 2560; // %512
1901  int numChannels = 4;
1902  input->height = height;
1903  input->width = width;
1904  input->numChannels = numChannels;
1905  unsigned char(*imageData4)[4] = (unsigned char(*)[4])malloc(
1906  height * width * numChannels *
1907  sizeof(unsigned char)); // new unsigned char[4][height*width];
1908  input->imageData = (unsigned char*)&imageData4[0];
1909 
1910  // zero out image
1911  unsigned char pixelWhite[4] = {0, 0, 0, 255};
1912  unsigned char pixelBlack[4] = {255, 255, 255, 255};
1913  for (int p = 0; p < height * width; p++) {
1914  // unsigned char tmp[4] = imageData4[0];
1915  imageData4[p][0] = pixelWhite[0];
1916  imageData4[p][1] = pixelWhite[1];
1917  imageData4[p][2] = pixelWhite[2];
1918  imageData4[p][3] = pixelWhite[3];
1919  }
1920  // random lines to be eliminated
1921  int maxLineWidth = 64; // pixels wide
1922  int numLines = 10;
1923  // vertical lines
1924  for (int i = 0; i < numLines; i++) {
1925  int lineWidth = rand() % maxLineWidth;
1926  int vertLinePos = lineWidth + rand() % (width - 2 * lineWidth);
1927  // tprintf("[PI] VerticalLine @ %i (w=%i)\n", vertLinePos, lineWidth);
1928  for (int row = vertLinePos - lineWidth / 2;
1929  row < vertLinePos + lineWidth / 2; row++) {
1930  for (int col = 0; col < height; col++) {
1931  // imageData4[row*width+col] = pixelBlack;
1932  imageData4[row * width + col][0] = pixelBlack[0];
1933  imageData4[row * width + col][1] = pixelBlack[1];
1934  imageData4[row * width + col][2] = pixelBlack[2];
1935  imageData4[row * width + col][3] = pixelBlack[3];
1936  }
1937  }
1938  }
1939  // horizontal lines
1940  for (int i = 0; i < numLines; i++) {
1941  int lineWidth = rand() % maxLineWidth;
1942  int horLinePos = lineWidth + rand() % (height - 2 * lineWidth);
1943  // tprintf("[PI] HorizontalLine @ %i (w=%i)\n", horLinePos, lineWidth);
1944  for (int row = 0; row < width; row++) {
1945  for (int col = horLinePos - lineWidth / 2;
1946  col < horLinePos + lineWidth / 2;
1947  col++) { // for (int row = vertLinePos-lineWidth/2; row <
1948  // vertLinePos+lineWidth/2; row++) {
1949  // tprintf("[PI] HoizLine pix @ (%3i, %3i)\n", row, col);
1950  // imageData4[row*width+col] = pixelBlack;
1951  imageData4[row * width + col][0] = pixelBlack[0];
1952  imageData4[row * width + col][1] = pixelBlack[1];
1953  imageData4[row * width + col][2] = pixelBlack[2];
1954  imageData4[row * width + col][3] = pixelBlack[3];
1955  }
1956  }
1957  }
1958  // spots (noise, squares)
1959  float fractionBlack = 0.1; // how much of the image should be blackened
1960  int numSpots =
1961  (height * width) * fractionBlack / (maxLineWidth * maxLineWidth / 2 / 2);
1962  for (int i = 0; i < numSpots; i++) {
1963  int lineWidth = rand() % maxLineWidth;
1964  int col = lineWidth + rand() % (width - 2 * lineWidth);
1965  int row = lineWidth + rand() % (height - 2 * lineWidth);
1966  // tprintf("[PI] Spot[%i/%i] @ (%3i, %3i)\n", i, numSpots, row, col );
1967  for (int r = row - lineWidth / 2; r < row + lineWidth / 2; r++) {
1968  for (int c = col - lineWidth / 2; c < col + lineWidth / 2; c++) {
1969  // tprintf("[PI] \tSpot[%i/%i] @ (%3i, %3i)\n", i, numSpots, r, c );
1970  // imageData4[row*width+col] = pixelBlack;
1971  imageData4[r * width + c][0] = pixelBlack[0];
1972  imageData4[r * width + c][1] = pixelBlack[1];
1973  imageData4[r * width + c][2] = pixelBlack[2];
1974  imageData4[r * width + c][3] = pixelBlack[3];
1975  }
1976  }
1977  }
1978 
1979  input->pix = pixCreate(input->width, input->height, 8 * input->numChannels);
1980 }
1981 
1982 struct TessDeviceScore {
1983  float time; // small time means faster device
1984  bool clError; // were there any opencl errors
1985  bool valid; // was the correct response generated
1986 };
1987 
1988 /******************************************************************************
1989  * Micro Benchmarks for Device Selection
1990  *****************************************************************************/
1991 
1992 static double composeRGBPixelMicroBench(GPUEnv* env,
1993  TessScoreEvaluationInputData input,
1994  ds_device_type type) {
1995  double time = 0;
1996 #if ON_WINDOWS
1997  LARGE_INTEGER freq, time_funct_start, time_funct_end;
1998  QueryPerformanceFrequency(&freq);
1999 #elif ON_APPLE
2000  mach_timebase_info_data_t info = {0, 0};
2001  mach_timebase_info(&info);
2002  long long start, stop;
2003 #else
2004  timespec time_funct_start, time_funct_end;
2005 #endif
2006  // input data
2007  l_uint32* tiffdata =
2008  (l_uint32*)input.imageData; // same size and random data; data doesn't
2009  // change workload
2010 
2011  // function call
2012  if (type == DS_DEVICE_OPENCL_DEVICE) {
2013 #if ON_WINDOWS
2014  QueryPerformanceCounter(&time_funct_start);
2015 #elif ON_APPLE
2016  start = mach_absolute_time();
2017 #else
2018  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2019 #endif
2020 
2021  OpenclDevice::gpuEnv = *env;
2022  int wpl = pixGetWpl(input.pix);
2023  OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height,
2024  wpl, nullptr);
2025 #if ON_WINDOWS
2026  QueryPerformanceCounter(&time_funct_end);
2027  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2028  (double)(freq.QuadPart);
2029 #elif ON_APPLE
2030  stop = mach_absolute_time();
2031  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2032 #else
2033  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2034  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2035  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2036 #endif
2037 
2038  } else {
2039 #if ON_WINDOWS
2040  QueryPerformanceCounter(&time_funct_start);
2041 #elif ON_APPLE
2042  start = mach_absolute_time();
2043 #else
2044  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2045 #endif
2046  Pix* pix = pixCreate(input.width, input.height, 32);
2047  l_uint32* pixData = pixGetData(pix);
2048  int i, j;
2049  int idx = 0;
2050  for (i = 0; i < input.height; i++) {
2051  for (j = 0; j < input.width; j++) {
2052  l_uint32 tiffword = tiffdata[i * input.width + j];
2053  l_int32 rval = ((tiffword)&0xff);
2054  l_int32 gval = (((tiffword) >> 8) & 0xff);
2055  l_int32 bval = (((tiffword) >> 16) & 0xff);
2056  l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8);
2057  pixData[idx] = value;
2058  idx++;
2059  }
2060  }
2061 #if ON_WINDOWS
2062  QueryPerformanceCounter(&time_funct_end);
2063  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2064  (double)(freq.QuadPart);
2065 #elif ON_APPLE
2066  stop = mach_absolute_time();
2067  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2068 #else
2069  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2070  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2071  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2072 #endif
2073  pixDestroy(&pix);
2074  }
2075 
2076  return time;
2077 }
2078 
2079 static double histogramRectMicroBench(GPUEnv* env,
2080  TessScoreEvaluationInputData input,
2081  ds_device_type type) {
2082  double time;
2083 #if ON_WINDOWS
2084  LARGE_INTEGER freq, time_funct_start, time_funct_end;
2085  QueryPerformanceFrequency(&freq);
2086 #elif ON_APPLE
2087  mach_timebase_info_data_t info = {0, 0};
2088  mach_timebase_info(&info);
2089  long long start, stop;
2090 #else
2091  timespec time_funct_start, time_funct_end;
2092 #endif
2093 
2094  const int left = 0;
2095  const int top = 0;
2096  int kHistogramSize = 256;
2097  int bytes_per_line = input.width * input.numChannels;
2098  int* histogramAllChannels = new int[kHistogramSize * input.numChannels];
2099  // function call
2100  if (type == DS_DEVICE_OPENCL_DEVICE) {
2101 #if ON_WINDOWS
2102  QueryPerformanceCounter(&time_funct_start);
2103 #elif ON_APPLE
2104  start = mach_absolute_time();
2105 #else
2106  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2107 #endif
2108 
2109  OpenclDevice::gpuEnv = *env;
2110  int retVal = OpenclDevice::HistogramRectOCL(
2111  input.imageData, input.numChannels, bytes_per_line, left, top,
2112  input.width, input.height, kHistogramSize, histogramAllChannels);
2113 
2114 #if ON_WINDOWS
2115  QueryPerformanceCounter(&time_funct_end);
2116  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2117  (double)(freq.QuadPart);
2118 #elif ON_APPLE
2119  stop = mach_absolute_time();
2120  if (retVal == 0) {
2121  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2122  } else {
2123  time = FLT_MAX;
2124  }
2125 #else
2126  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2127  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2128  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2129 #endif
2130  } else {
2131  int* histogram = new int[kHistogramSize];
2132 #if ON_WINDOWS
2133  QueryPerformanceCounter(&time_funct_start);
2134 #elif ON_APPLE
2135  start = mach_absolute_time();
2136 #else
2137  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2138 #endif
2139  for (int ch = 0; ch < input.numChannels; ++ch) {
2140  tesseract::HistogramRect(input.pix, input.numChannels, left, top,
2141  input.width, input.height, histogram);
2142  }
2143 #if ON_WINDOWS
2144  QueryPerformanceCounter(&time_funct_end);
2145  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2146  (double)(freq.QuadPart);
2147 #elif ON_APPLE
2148  stop = mach_absolute_time();
2149  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2150 #else
2151  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2152  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2153  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2154 #endif
2155  delete[] histogram;
2156  }
2157 
2158  // cleanup
2159  delete[] histogramAllChannels;
2160  return time;
2161 }
2162 
2163 // Reproducing the ThresholdRectToPix native version
2164 static void ThresholdRectToPix_Native(const unsigned char* imagedata,
2165  int bytes_per_pixel, int bytes_per_line,
2166  const int* thresholds,
2167  const int* hi_values, Pix** pix) {
2168  int top = 0;
2169  int left = 0;
2170  int width = pixGetWidth(*pix);
2171  int height = pixGetHeight(*pix);
2172 
2173  *pix = pixCreate(width, height, 1);
2174  uint32_t* pixdata = pixGetData(*pix);
2175  int wpl = pixGetWpl(*pix);
2176  const unsigned char* srcdata =
2177  imagedata + top * bytes_per_line + left * bytes_per_pixel;
2178  for (int y = 0; y < height; ++y) {
2179  const uint8_t* linedata = srcdata;
2180  uint32_t* pixline = pixdata + y * wpl;
2181  for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
2182  bool white_result = true;
2183  for (int ch = 0; ch < bytes_per_pixel; ++ch) {
2184  if (hi_values[ch] >= 0 &&
2185  (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
2186  white_result = false;
2187  break;
2188  }
2189  }
2190  if (white_result)
2191  CLEAR_DATA_BIT(pixline, x);
2192  else
2193  SET_DATA_BIT(pixline, x);
2194  }
2195  srcdata += bytes_per_line;
2196  }
2197 }
2198 
2199 static double thresholdRectToPixMicroBench(GPUEnv* env,
2200  TessScoreEvaluationInputData input,
2201  ds_device_type type) {
2202  double time;
2203 #if ON_WINDOWS
2204  LARGE_INTEGER freq, time_funct_start, time_funct_end;
2205  QueryPerformanceFrequency(&freq);
2206 #elif ON_APPLE
2207  mach_timebase_info_data_t info = {0, 0};
2208  mach_timebase_info(&info);
2209  long long start, stop;
2210 #else
2211  timespec time_funct_start, time_funct_end;
2212 #endif
2213 
2214  // input data
2215  unsigned char pixelHi = (unsigned char)255;
2216  int thresholds[4] = {pixelHi, pixelHi, pixelHi, pixelHi};
2217 
2218  // Pix* pix = pixCreate(width, height, 1);
2219  int top = 0;
2220  int left = 0;
2221  int bytes_per_line = input.width * input.numChannels;
2222 
2223  // function call
2224  if (type == DS_DEVICE_OPENCL_DEVICE) {
2225 #if ON_WINDOWS
2226  QueryPerformanceCounter(&time_funct_start);
2227 #elif ON_APPLE
2228  start = mach_absolute_time();
2229 #else
2230  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2231 #endif
2232 
2233  OpenclDevice::gpuEnv = *env;
2234  int hi_values[4];
2235  int retVal = OpenclDevice::ThresholdRectToPixOCL(
2236  input.imageData, input.numChannels, bytes_per_line, thresholds,
2237  hi_values, &input.pix, input.height, input.width, top, left);
2238 
2239 #if ON_WINDOWS
2240  QueryPerformanceCounter(&time_funct_end);
2241  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2242  (double)(freq.QuadPart);
2243 #elif ON_APPLE
2244  stop = mach_absolute_time();
2245  if (retVal == 0) {
2246  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2247  } else {
2248  time = FLT_MAX;
2249  }
2250 
2251 #else
2252  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2253  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2254  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2255 #endif
2256  } else {
2257  tesseract::ImageThresholder thresholder;
2258  thresholder.SetImage(input.pix);
2259 #if ON_WINDOWS
2260  QueryPerformanceCounter(&time_funct_start);
2261 #elif ON_APPLE
2262  start = mach_absolute_time();
2263 #else
2264  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2265 #endif
2266  int hi_values[4] = {};
2267  ThresholdRectToPix_Native(input.imageData, input.numChannels,
2268  bytes_per_line, thresholds, hi_values,
2269  &input.pix);
2270 
2271 #if ON_WINDOWS
2272  QueryPerformanceCounter(&time_funct_end);
2273  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2274  (double)(freq.QuadPart);
2275 #elif ON_APPLE
2276  stop = mach_absolute_time();
2277  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2278 #else
2279  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2280  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2281  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2282 #endif
2283  }
2284 
2285  return time;
2286 }
2287 
2288 static double getLineMasksMorphMicroBench(GPUEnv* env,
2289  TessScoreEvaluationInputData input,
2290  ds_device_type type) {
2291  double time = 0;
2292 #if ON_WINDOWS
2293  LARGE_INTEGER freq, time_funct_start, time_funct_end;
2294  QueryPerformanceFrequency(&freq);
2295 #elif ON_APPLE
2296  mach_timebase_info_data_t info = {0, 0};
2297  mach_timebase_info(&info);
2298  long long start, stop;
2299 #else
2300  timespec time_funct_start, time_funct_end;
2301 #endif
2302 
2303  // input data
2304  int resolution = 300;
2305  int wpl = pixGetWpl(input.pix);
2306  int kThinLineFraction = 20; // tess constant
2307  int kMinLineLengthFraction = 4; // tess constant
2308  int max_line_width = resolution / kThinLineFraction;
2309  int min_line_length = resolution / kMinLineLengthFraction;
2310  int closing_brick = max_line_width / 3;
2311 
2312  // function call
2313  if (type == DS_DEVICE_OPENCL_DEVICE) {
2314 #if ON_WINDOWS
2315  QueryPerformanceCounter(&time_funct_start);
2316 #elif ON_APPLE
2317  start = mach_absolute_time();
2318 #else
2319  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2320 #endif
2321  OpenclDevice::gpuEnv = *env;
2322  OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
2323  Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
2324  OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline,
2325  &pix_closed, true, closing_brick, closing_brick,
2326  max_line_width, max_line_width, min_line_length,
2327  min_line_length);
2328 
2329  OpenclDevice::releaseMorphCLBuffers();
2330 
2331 #if ON_WINDOWS
2332  QueryPerformanceCounter(&time_funct_end);
2333  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2334  (double)(freq.QuadPart);
2335 #elif ON_APPLE
2336  stop = mach_absolute_time();
2337  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2338 #else
2339  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2340  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2341  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2342 #endif
2343  } else {
2344 #if ON_WINDOWS
2345  QueryPerformanceCounter(&time_funct_start);
2346 #elif ON_APPLE
2347  start = mach_absolute_time();
2348 #else
2349  clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
2350 #endif
2351 
2352  // native serial code
2353  Pix* src_pix = input.pix;
2354  Pix* pix_closed =
2355  pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
2356  Pix* pix_solid =
2357  pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
2358  Pix* pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
2359  pixDestroy(&pix_solid);
2360  Pix* pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
2361  Pix* pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
2362  pixDestroy(&pix_hline);
2363  pixDestroy(&pix_vline);
2364  pixDestroy(&pix_hollow);
2365 
2366 #if ON_WINDOWS
2367  QueryPerformanceCounter(&time_funct_end);
2368  time = (time_funct_end.QuadPart - time_funct_start.QuadPart) /
2369  (double)(freq.QuadPart);
2370 #elif ON_APPLE
2371  stop = mach_absolute_time();
2372  time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9;
2373 #else
2374  clock_gettime(CLOCK_MONOTONIC, &time_funct_end);
2375  time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
2376  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
2377 #endif
2378  }
2379 
2380  return time;
2381 }
2382 
2383 /******************************************************************************
2384  * Device Selection
2385  *****************************************************************************/
2386 
2387 #include <cstdlib>
2388 
2389 // encode score object as byte string
2390 static ds_status serializeScore(ds_device* device, uint8_t** serializedScore,
2391  unsigned int* serializedScoreSize) {
2392  *serializedScoreSize = sizeof(TessDeviceScore);
2393  *serializedScore = new uint8_t[*serializedScoreSize];
2394  memcpy(*serializedScore, device->score, *serializedScoreSize);
2395  return DS_SUCCESS;
2396 }
2397 
2398 // parses byte string and stores in score object
2399 static ds_status deserializeScore(ds_device* device,
2400  const uint8_t* serializedScore,
2401  unsigned int serializedScoreSize) {
2402  // check that serializedScoreSize == sizeof(TessDeviceScore);
2403  device->score = new TessDeviceScore;
2404  memcpy(device->score, serializedScore, serializedScoreSize);
2405  return DS_SUCCESS;
2406 }
2407 
2408 static ds_status releaseScore(TessDeviceScore* score) {
2409  delete score;
2410  return DS_SUCCESS;
2411 }
2412 
2413 // evaluate devices
2414 static ds_status evaluateScoreForDevice(ds_device* device, void* inputData) {
2415  // overwrite statuc gpuEnv w/ current device
2416  // so native opencl calls can be used; they use static gpuEnv
2417  tprintf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName,
2418  device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
2419  GPUEnv* env = nullptr;
2420  if (device->type == DS_DEVICE_OPENCL_DEVICE) {
2421  env = &OpenclDevice::gpuEnv;
2422  memset(env, 0, sizeof(*env));
2423  // tprintf("[DS] populating tmp GPUEnv from device\n");
2424  populateGPUEnvFromDevice(env, device->oclDeviceID);
2425  env->mnFileCount = 0; // argc;
2426  env->mnKernelCount = 0UL;
2427  // tprintf("[DS] compiling kernels for tmp GPUEnv\n");
2428  OpenclDevice::CompileKernelFile(env, "");
2429  }
2430 
2431  TessScoreEvaluationInputData* input =
2432  static_cast<TessScoreEvaluationInputData*>(inputData);
2433 
2434  // pixReadTiff
2435  double composeRGBPixelTime =
2436  composeRGBPixelMicroBench(env, *input, device->type);
2437 
2438  // HistogramRect
2439  double histogramRectTime = histogramRectMicroBench(env, *input, device->type);
2440 
2441  // ThresholdRectToPix
2442  double thresholdRectToPixTime =
2443  thresholdRectToPixMicroBench(env, *input, device->type);
2444 
2445  // getLineMasks
2446  double getLineMasksMorphTime =
2447  getLineMasksMorphMicroBench(env, *input, device->type);
2448 
2449  // weigh times (% of cpu time)
2450  // these weights should be the % execution time that the native cpu code took
2451  float composeRGBPixelWeight = 1.2f;
2452  float histogramRectWeight = 2.4f;
2453  float thresholdRectToPixWeight = 4.5f;
2454  float getLineMasksMorphWeight = 5.0f;
2455 
2456  float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
2457  histogramRectWeight * histogramRectTime +
2458  thresholdRectToPixWeight * thresholdRectToPixTime +
2459  getLineMasksMorphWeight * getLineMasksMorphTime;
2460  device->score = new TessDeviceScore;
2461  device->score->time = weightedTime;
2462 
2463  tprintf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName,
2464  device->type == DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native");
2465  tprintf("[DS]%25s: %f (w=%.1f)\n", "composeRGBPixel", composeRGBPixelTime,
2466  composeRGBPixelWeight);
2467  tprintf("[DS]%25s: %f (w=%.1f)\n", "HistogramRect", histogramRectTime,
2468  histogramRectWeight);
2469  tprintf("[DS]%25s: %f (w=%.1f)\n", "ThresholdRectToPix",
2470  thresholdRectToPixTime, thresholdRectToPixWeight);
2471  tprintf("[DS]%25s: %f (w=%.1f)\n", "getLineMasksMorph", getLineMasksMorphTime,
2472  getLineMasksMorphWeight);
2473  tprintf("[DS]%25s: %f\n", "Score", device->score->time);
2474  return DS_SUCCESS;
2475 }
2476 
2477 // initial call to select device
2478 ds_device OpenclDevice::getDeviceSelection() {
2479  if (!deviceIsSelected) {
2480  // check if opencl is available at runtime
2481  if (1 == LoadOpencl()) {
2482  // opencl is available
2483  // setup devices
2484  ds_status status;
2485  ds_profile* profile;
2486  status = initDSProfile(&profile, "v0.1");
2487  // try reading scores from file
2488  const char* fileName = "tesseract_opencl_profile_devices.dat";
2489  status = readProfileFromFile(profile, deserializeScore, fileName);
2490  if (status != DS_SUCCESS) {
2491  // need to run evaluation
2492  tprintf("[DS] Profile file not available (%s); performing profiling.\n",
2493  fileName);
2494 
2495  // create input data
2496  TessScoreEvaluationInputData input;
2497  populateTessScoreEvaluationInputData(&input);
2498  // perform evaluations
2499  unsigned int numUpdates;
2500  status = profileDevices(profile, DS_EVALUATE_ALL,
2501  evaluateScoreForDevice, &input, &numUpdates);
2502  // write scores to file
2503  if (status == DS_SUCCESS) {
2504  status = writeProfileToFile(profile, serializeScore, fileName);
2505  if (status == DS_SUCCESS) {
2506  tprintf("[DS] Scores written to file (%s).\n", fileName);
2507  } else {
2508  tprintf(
2509  "[DS] Error saving scores to file (%s); scores not written to "
2510  "file.\n",
2511  fileName);
2512  }
2513  } else {
2514  tprintf(
2515  "[DS] Unable to evaluate performance; scores not written to "
2516  "file.\n");
2517  }
2518  } else {
2519  tprintf("[DS] Profile read from file (%s).\n", fileName);
2520  }
2521 
2522  // we now have device scores either from file or evaluation
2523  // select fastest using custom Tesseract selection algorithm
2524  float bestTime = FLT_MAX; // begin search with worst possible time
2525  int bestDeviceIdx = -1;
2526  for (unsigned d = 0; d < profile->numDevices; d++) {
2527  ds_device device = profile->devices[d];
2528  if (device.score == nullptr) continue;
2529  TessDeviceScore score = *device.score;
2530 
2531  float time = score.time;
2532  tprintf("[DS] Device[%u] %i:%s score is %f\n", d + 1, device.type,
2533  device.oclDeviceName, time);
2534  if (time < bestTime) {
2535  bestTime = time;
2536  bestDeviceIdx = d;
2537  }
2538  }
2539  if (bestDeviceIdx >= 0) {
2540  tprintf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
2541  profile->devices[bestDeviceIdx].oclDeviceName,
2542  profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
2543  ? "OpenCL"
2544  : "Native");
2545  }
2546  // cleanup
2547  // TODO: call destructor for profile object?
2548 
2549  bool overridden = false;
2550  char* overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE");
2551  if (overrideDeviceStr != nullptr) {
2552  int overrideDeviceIdx = atoi(overrideDeviceStr);
2553  if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) {
2554  tprintf(
2555  "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, "
2556  "%i)\n",
2557  overrideDeviceStr, overrideDeviceIdx);
2558  bestDeviceIdx = overrideDeviceIdx - 1;
2559  overridden = true;
2560  } else {
2561  tprintf(
2562  "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are "
2563  "valid devices).\n",
2564  overrideDeviceStr, profile->numDevices);
2565  }
2566  }
2567 
2568  if (overridden) {
2569  tprintf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
2570  profile->devices[bestDeviceIdx].oclDeviceName,
2571  profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
2572  ? "OpenCL"
2573  : "Native");
2574  }
2575  selectedDevice = profile->devices[bestDeviceIdx];
2576  // cleanup
2577  releaseDSProfile(profile, releaseScore);
2578  } else {
2579  // opencl isn't available at runtime, select native cpu device
2580  tprintf("[DS] OpenCL runtime not available.\n");
2581  selectedDevice.type = DS_DEVICE_NATIVE_CPU;
2582  selectedDevice.oclDeviceName = "(null)";
2583  selectedDevice.score = nullptr;
2584  selectedDevice.oclDeviceID = nullptr;
2585  selectedDevice.oclDriverVersion = nullptr;
2586  }
2587  deviceIsSelected = true;
2588  }
2589  return selectedDevice;
2590 }
2591 
2592 bool OpenclDevice::selectedDeviceIsOpenCL() {
2593  ds_device device = getDeviceSelection();
2594  return (device.type == DS_DEVICE_OPENCL_DEVICE);
2595 }
2596 
2597 #endif
tesseract::kMinLineLengthFraction
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
Definition: linefind.cpp:43
tesseract::kThinLineFraction
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
Definition: linefind.cpp:41
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesstrain_utils.cleanup
def cleanup(ctx)
Definition: tesstrain_utils.py:288
openclwrapper.h
file
Definition: include_gunit.h:22
tesseract::kHistogramSize
const int kHistogramSize
Definition: otsuthr.h:27
otsuthr.h
tesseract::HistogramRect
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
Definition: otsuthr.cpp:166
thresholder.h
tesseract::ImageThresholder::SetImage
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65
oclkernels.h
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
errcode.h
tesstrain_utils.type
type
Definition: tesstrain_utils.py:141
tesseract::ImageThresholder
Definition: thresholder.h:35