tesseract  4.0.0-1-g2a2b
openclwrapper.h
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 
11 #ifndef TESSERACT_OPENCL_OPENCLWRAPPER_H_
12 #define TESSERACT_OPENCL_OPENCLWRAPPER_H_
13 
14 #include <cstdio>
15 #include "allheaders.h"
16 #include "pix.h"
17 #include "tprintf.h"
18 
19 // including CL/cl.h doesn't occur until USE_OPENCL defined below
20 
21 // platform preprocessor commands
22 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
23  defined(__CYGWIN__) || defined(__MINGW32__)
24 #define ON_WINDOWS 1
25 #define ON_LINUX 0
26 #define ON_APPLE 0
27 #define ON_OTHER 0
28 #define IF_WINDOWS(X) X
29 #define IF_LINUX(X)
30 #define IF_APPLE(X)
31 #define IF_OTHER(X)
32 #define NOT_WINDOWS(X)
33 #elif defined(__linux__)
34 #define ON_WINDOWS 0
35 #define ON_LINUX 1
36 #define ON_APPLE 0
37 #define ON_OTHER 0
38 #define IF_WINDOWS(X)
39 #define IF_LINUX(X) X
40 #define IF_APPLE(X)
41 #define IF_OTHER(X)
42 #define NOT_WINDOWS(X) X
43 #elif defined(__APPLE__)
44 #define ON_WINDOWS 0
45 #define ON_LINUX 0
46 #define ON_APPLE 1
47 #define ON_OTHER 0
48 #define IF_WINDOWS(X)
49 #define IF_LINUX(X)
50 #define IF_APPLE(X) X
51 #define IF_OTHER(X)
52 #define NOT_WINDOWS(X) X
53 #else
54 #define ON_WINDOWS 0
55 #define ON_LINUX 0
56 #define ON_APPLE 0
57 #define ON_OTHER 1
58 #define IF_WINDOWS(X)
59 #define IF_LINUX(X)
60 #define IF_APPLE(X)
61 #define IF_OTHER(X) X
62 #define NOT_WINDOWS(X) X
63 #endif
64 
65 #if ON_LINUX
66 #include <ctime>
67 #endif
68 
69 /************************************************************************************
70  * enable/disable reporting of performance
71  * PERF_REPORT_LEVEL
72  * 0 - no reporting
73  * 1 - no reporting
74  * 2 - report total function call time for functions we're tracking
75  * 3 - optionally report breakdown of function calls (kernel launch, kernel
76  *time, data copies)
77  ************************************************************************************/
78 #define PERF_COUNT_VERBOSE 1
79 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
80 
81 #if ON_WINDOWS
82 
83 #if PERF_COUNT_VERBOSE >= 2
84 #define PERF_COUNT_START(FUNCT_NAME) \
85  char* funct_name = FUNCT_NAME; \
86  double elapsed_time_sec; \
87  LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, \
88  time_sub_end; \
89  QueryPerformanceFrequency(&freq); \
90  QueryPerformanceCounter(&time_funct_start); \
91  time_sub_start = time_funct_start; \
92  time_sub_end = time_funct_start;
93 
94 #define PERF_COUNT_END \
95  QueryPerformanceCounter(&time_funct_end); \
96  elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \
97  (double)(freq.QuadPart); \
98  tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
99 #else
100 #define PERF_COUNT_START(FUNCT_NAME)
101 #define PERF_COUNT_END
102 #endif
103 
104 #if PERF_COUNT_VERBOSE >= 3
105 #define PERF_COUNT_SUB(SUB) \
106  QueryPerformanceCounter(&time_sub_end); \
107  elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \
108  (double)(freq.QuadPart); \
109  tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
110  time_sub_start = time_sub_end;
111 #else
112 #define PERF_COUNT_SUB(SUB)
113 #endif
114 
115 // not on windows
116 #else
117 
118 #if PERF_COUNT_VERBOSE >= 2
119 #define PERF_COUNT_START(FUNCT_NAME) \
120  char* funct_name = FUNCT_NAME; \
121  double elapsed_time_sec; \
122  timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
123  clock_gettime(CLOCK_MONOTONIC, &time_funct_start); \
124  time_sub_start = time_funct_start; \
125  time_sub_end = time_funct_start;
126 
127 #define PERF_COUNT_END \
128  clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \
129  elapsed_time_sec = \
130  (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \
131  (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \
132  tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
133 #else
134 #define PERF_COUNT_START(FUNCT_NAME)
135 #define PERF_COUNT_END
136 #endif
137 
138 #if PERF_COUNT_VERBOSE >= 3
139 #define PERF_COUNT_SUB(SUB) \
140  clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \
141  elapsed_time_sec = \
142  (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \
143  (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \
144  tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
145  time_sub_start = time_sub_end;
146 #else
147 #define PERF_COUNT_SUB(SUB)
148 #endif
149 
150 #endif
151 /**************************************************************************
152  * enable/disable use of OpenCL
153  **************************************************************************/
154 
155 #ifdef USE_OPENCL
156 #include "opencl_device_selection.h"
157 
158 #ifndef strcasecmp
159 #define strcasecmp strcmp
160 #endif
161 
162 #define MAX_KERNEL_STRING_LEN 64
163 #define MAX_CLFILE_NUM 50
164 #define MAX_CLKERNEL_NUM 200
165 #define MAX_KERNEL_NAME_LEN 64
166 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
167 #define GROUPSIZE_X 16
168 #define GROUPSIZE_Y 16
169 #define GROUPSIZE_HMORX 256
170 #define GROUPSIZE_HMORY 1
171 
172 struct KernelEnv {
173  cl_context mpkContext;
174  cl_command_queue mpkCmdQueue;
175  cl_program mpkProgram;
176  cl_kernel mpkKernel;
177  char mckKernelName[150];
178 };
179 
180 struct OpenCLEnv {
181  cl_platform_id mpOclPlatformID;
182  cl_context mpOclContext;
183  cl_device_id mpOclDevsID;
184  cl_command_queue mpOclCmdQueue;
185 };
186 typedef int (*cl_kernel_function)(void** userdata, KernelEnv* kenv);
187 
188 #define CHECK_OPENCL(status, name) \
189  if (status != CL_SUCCESS) { \
190  tprintf("OpenCL error code is %d at when %s .\n", status, name); \
191  }
192 
193 struct GPUEnv {
194  // share vb in all modules in hb library
195  cl_platform_id mpPlatformID;
196  cl_device_type mDevType;
197  cl_context mpContext;
198  cl_device_id* mpArryDevsID;
199  cl_device_id mpDevID;
200  cl_command_queue mpCmdQueue;
201  cl_kernel mpArryKernels[MAX_CLFILE_NUM];
202  cl_program mpArryPrograms[MAX_CLFILE_NUM]; // one program object maps one
203  // kernel source file
204  char mArryKnelSrcFile[MAX_CLFILE_NUM]
205  [256], // the max len of kernel file name is 256
206  mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
207  cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
208  int mnKernelCount, mnFileCount, // only one kernel file
209  mnIsUserCreated; // 1: created , 0:no create and needed to create by
210  // opencl wrapper
211  int mnKhrFp64Flag;
212  int mnAmdFp64Flag;
213 };
214 
215 class OpenclDevice {
216  public:
217  static GPUEnv gpuEnv;
218  static int isInited;
219  OpenclDevice();
220  ~OpenclDevice();
221  static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
222  static int InitOpenclRunEnv(
223  int argc); // RegistOpenclKernel, double flags, compile kernels
224  static int InitOpenclRunEnv_DeviceSelection(
225  int argc); // RegistOpenclKernel, double flags, compile kernels
226  static int RegistOpenclKernel();
227  static int ReleaseOpenclRunEnv();
228  static int ReleaseOpenclEnv(GPUEnv* gpuInfo);
229  static int CompileKernelFile(GPUEnv* gpuInfo, const char* buildOption);
230  static int CachedOfKernerPrg(const GPUEnv* gpuEnvCached,
231  const char* clFileName);
232  static int GeneratBinFromKernelSource(cl_program program,
233  const char* clFileName);
234  static int WriteBinaryToFile(const char* fileName, const char* birary,
235  size_t numBytes);
236  static int BinaryGenerated(const char* clFileName, FILE** fhandle);
237  // static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const
238  // char *buildOption );
239  static l_uint32* pixReadFromTiffKernel(l_uint32* tiffdata, l_int32 w,
240  l_int32 h, l_int32 wpl,
241  l_uint32* line);
242  static int composeRGBPixelCl(int* tiffdata, int* line, int h, int w);
243 
244  /* OpenCL implementations of Morphological operations*/
245 
246  // Initialization of OCL buffers used in Morph operations
247  static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix* pixs);
248  static void releaseMorphCLBuffers();
249 
250  static void pixGetLinesCL(Pix* pixd, Pix* pixs, Pix** pix_vline,
251  Pix** pix_hline, Pix** pixClosed, bool getpixClosed,
252  l_int32 close_hsize, l_int32 close_vsize,
253  l_int32 open_hsize, l_int32 open_vsize,
254  l_int32 line_hsize, l_int32 line_vsize);
255 
256  // int InitOpenclAttr( OpenCLEnv * env );
257  // int ReleaseKernel( KernelEnv * env );
258  static int SetKernelEnv(KernelEnv* envInfo);
259  // int CreateKernel( char * kernelname, KernelEnv * env );
260  // int RunKernel( const char *kernelName, void **userdata );
261  // int ConvertToString( const char *filename, char **source );
262  // int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
263  // int RegisterKernelWrapper( const char *kernelName, cl_kernel_function
264  // function ); int RunKernelWrapper( cl_kernel_function function, const char *
265  // kernelName, void **usrdata ); int GetKernelEnvAndFunc( const char
266  // *kernelName, KernelEnv *env, cl_kernel_function *function );
267 
268  static int LoadOpencl();
269 #ifdef WIN32
270  // static int OpenclInite();
271  static void FreeOpenclDll();
272 #endif
273 
274  inline static int AddKernelConfig(int kCount, const char* kName);
275 
276  /* for binarization */
277  static int HistogramRectOCL(void* imagedata, int bytes_per_pixel,
278  int bytes_per_line, int left, int top, int width,
279  int height, int kHistogramSize,
280  int* histogramAllChannels);
281 
282  static int ThresholdRectToPixOCL(unsigned char* imagedata,
283  int bytes_per_pixel, int bytes_per_line,
284  int* thresholds, int* hi_values, Pix** pix,
285  int rect_height, int rect_width,
286  int rect_top, int rect_left);
287 
288  static ds_device getDeviceSelection();
289  static ds_device selectedDevice;
290  static bool deviceIsSelected;
291  static bool selectedDeviceIsOpenCL();
292 };
293 
294 #endif // USE_OPENCL
295 #endif // TESSERACT_OPENCL_OPENCLWRAPPER_H_
const int kHistogramSize
Definition: otsuthr.h:27