All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
openclwrapper.h
Go to the documentation of this file.
1 #include <stdio.h>
2 #include "allheaders.h"
3 #include "pix.h"
4 #ifdef USE_OPENCL
5 #include "tiff.h"
6 #include "tiffio.h"
7 #endif
8 #include "tprintf.h"
9 
10 // including CL/cl.h doesn't occur until USE_OPENCL defined below
11 
12 // platform preprocessor commands
13 #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ )
14 #define ON_WINDOWS 1
15 #define ON_LINUX 0
16 #define ON_APPLE 0
17 #define ON_OTHER 0
18 #define IF_WINDOWS(X) X
19 #define IF_LINUX(X)
20 #define IF_APPLE(X)
21 #define IF_OTHER(X)
22 #define NOT_WINDOWS(X)
23 #elif defined( __linux__ )
24 #define ON_WINDOWS 0
25 #define ON_LINUX 1
26 #define ON_APPLE 0
27 #define ON_OTHER 0
28 #define IF_WINDOWS(X)
29 #define IF_LINUX(X) X
30 #define IF_APPLE(X)
31 #define IF_OTHER(X)
32 #define NOT_WINDOWS(X) X
33 #elif defined( __APPLE__ )
34 #define ON_WINDOWS 0
35 #define ON_LINUX 0
36 #define ON_APPLE 1
37 #define ON_OTHER 0
38 #define IF_WINDOWS(X)
39 #define IF_LINUX(X)
40 #define IF_APPLE(X) X
41 #define IF_OTHER(X)
42 #define NOT_WINDOWS(X) X
43 #else
44 #define ON_WINDOWS 0
45 #define ON_LINUX 0
46 #define ON_APPLE 0
47 #define ON_OTHER 1
48 #define IF_WINDOWS(X)
49 #define IF_LINUX(X)
50 #define IF_APPLE(X)
51 #define IF_OTHER(X) X
52 #define NOT_WINDOWS(X) X
53 #endif
54 
55 #if ON_LINUX
56 #include <time.h>
57 #endif
58 
59 #if ON_APPLE
60 #include <mach/clock.h>
61 #include <mach/mach.h>
62 #define CLOCK_MONOTONIC SYSTEM_CLOCK
63 #define clock_gettime clock_get_time
64 #endif
65 
66 /************************************************************************************
67  * enable/disable reporting of performance
68  * PERF_REPORT_LEVEL
69  * 0 - no reporting
70  * 1 - no reporting
71  * 2 - report total function call time for functions we're tracking
72  * 3 - optionally report breakdown of function calls (kernel launch, kernel time, data copies)
73  ************************************************************************************/
74 #define PERF_COUNT_VERBOSE 1
75 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
76 
77 #if ON_APPLE
78 #include <time.h>
79 #include <mach/clock.h>
80 #include <mach/mach.h>
81 #define CLOCK_MONOTONIC SYSTEM_CLOCK
82 #define clock_gettime clock_get_time
83 #endif
84 
85 #if ON_WINDOWS
86 
87 #if PERF_COUNT_VERBOSE >= 2
88 #define PERF_COUNT_START(FUNCT_NAME) \
89  char *funct_name = FUNCT_NAME; \
90  double elapsed_time_sec; \
91  LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
92  QueryPerformanceFrequency(&freq); \
93  QueryPerformanceCounter(&time_funct_start); \
94  time_sub_start = time_funct_start; \
95  time_sub_end = time_funct_start;
96 
97 #define PERF_COUNT_END \
98  QueryPerformanceCounter(&time_funct_end); \
99  elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
100  tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
101 #else
102 #define PERF_COUNT_START(FUNCT_NAME)
103 #define PERF_COUNT_END
104 #endif
105 
106 #if PERF_COUNT_VERBOSE >= 3
107 #define PERF_COUNT_SUB(SUB) \
108  QueryPerformanceCounter(&time_sub_end); \
109  elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
110  tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
111  time_sub_start = time_sub_end;
112 #else
113 #define PERF_COUNT_SUB(SUB)
114 #endif
115 
116 
117 // not on windows
118 #else
119 
120 #if PERF_COUNT_VERBOSE >= 2
121 #define PERF_COUNT_START(FUNCT_NAME) \
122  char *funct_name = FUNCT_NAME; \
123  double elapsed_time_sec; \
124  timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
125  clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
126  time_sub_start = time_funct_start; \
127  time_sub_end = time_funct_start;
128 
129 #define PERF_COUNT_END \
130  clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
131  elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
132  tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
133 #else
134 #define PERF_COUNT_START(FUNCT_NAME)
135 #define PERF_COUNT_END
136 #endif
137 
138 #if PERF_COUNT_VERBOSE >= 3
139 #define PERF_COUNT_SUB(SUB) \
140  clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
141  elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
142  tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
143  time_sub_start = time_sub_end;
144 #else
145 #define PERF_COUNT_SUB(SUB)
146 #endif
147 
148 #endif
149 /**************************************************************************
150  * enable/disable use of OpenCL
151  **************************************************************************/
152 
153 #ifdef USE_OPENCL
154 
155 #define USE_DEVICE_SELECTION 1
156 
157 #include "opencl_device_selection.h"
158 
159 #ifndef strcasecmp
160 #define strcasecmp strcmp
161 #endif
162 
163 #define MAX_KERNEL_STRING_LEN 64
164 #define MAX_CLFILE_NUM 50
165 #define MAX_CLKERNEL_NUM 200
166 #define MAX_KERNEL_NAME_LEN 64
167 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
168 #define GROUPSIZE_X 16
169 #define GROUPSIZE_Y 16
170 #define GROUPSIZE_HMORX 256
171 #define GROUPSIZE_HMORY 1
172 
173 typedef struct _KernelEnv
174 {
175  cl_context mpkContext;
176  cl_command_queue mpkCmdQueue;
177  cl_program mpkProgram;
178  cl_kernel mpkKernel;
179  char mckKernelName[150];
180 } KernelEnv;
181 
182 typedef struct _OpenCLEnv
183 {
184  cl_platform_id mpOclPlatformID;
185  cl_context mpOclContext;
186  cl_device_id mpOclDevsID;
187  cl_command_queue mpOclCmdQueue;
188 } OpenCLEnv;
189 typedef int ( *cl_kernel_function )( void **userdata, KernelEnv *kenv );
190 
191 
192 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
193 
194 static const l_uint32 lmask32[] = {0x0,
195  0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
196  0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
197  0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
198  0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
199  0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
200  0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
201  0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
202  0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff};
203 
204 static const l_uint32 rmask32[] = {0x0,
205  0x00000001, 0x00000003, 0x00000007, 0x0000000f,
206  0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
207  0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
208  0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
209  0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
210  0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
211  0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
212  0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff};
213 
214 #define CHECK_OPENCL(status,name) \
215 if( status != CL_SUCCESS ) \
216 { \
217  printf ("OpenCL error code is %d at when %s .\n", status, name); \
218 }
219 
220 
221 typedef struct _GPUEnv
222 {
223  //share vb in all modules in hb library
224  cl_platform_id mpPlatformID;
225  cl_device_type mDevType;
226  cl_context mpContext;
227  cl_device_id *mpArryDevsID;
228  cl_device_id mpDevID;
229  cl_command_queue mpCmdQueue;
230  cl_kernel mpArryKernels[MAX_CLFILE_NUM];
231  cl_program mpArryPrograms[MAX_CLFILE_NUM]; //one program object maps one kernel source file
232  char mArryKnelSrcFile[MAX_CLFILE_NUM][256], //the max len of kernel file name is 256
233  mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
234  cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
235  int mnKernelCount, mnFileCount, // only one kernel file
236  mnIsUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
237  int mnKhrFp64Flag;
238  int mnAmdFp64Flag;
239 
240 } GPUEnv;
241 
242 
243 class OpenclDevice
244 {
245 
246 public:
247  static GPUEnv gpuEnv;
248  static int isInited;
249  OpenclDevice();
250  ~OpenclDevice();
251  static int InitEnv(); // load dll, call InitOpenclRunEnv(0)
252  static int InitOpenclRunEnv( int argc ); // RegistOpenclKernel, double flags, compile kernels
253  static int InitOpenclRunEnv_DeviceSelection( int argc ); // RegistOpenclKernel, double flags, compile kernels
254  static int InitOpenclRunEnv( GPUEnv *gpu ); // select device by env_CPU or selector
255  static int RegistOpenclKernel();
256  static int ReleaseOpenclRunEnv();
257  static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
258  static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
259  static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
260  static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
261  static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
262  static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
263  //static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
264  static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
265  static Pix* pixReadTiffCl( const char *filename, l_int32 n );
266  static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
267  static PIX * pixReadMemTiffCl(const l_uint8 *data, size_t size, l_int32 n);
268  static PIX* pixReadFromTiffStreamCl(TIFF *tif);
269  static int composeRGBPixelCl(int *tiffdata,int *line,int h,int w);
270  static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres);
271  static TIFF* fopenTiffCl(FILE *fp,const char *modestring);
272 
273 /* OpenCL implementations of Morphological operations*/
274 
275  //Initialiation of OCL buffers used in Morph operations
276  static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs);
277  static void releaseMorphCLBuffers();
278 
279  // OpenCL implementation of Morphology Dilate
280  static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
281 
282  // OpenCL implementation of Morphology Erode
283  static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
284 
285  // OpenCL implementation of Morphology Close
286  static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
287 
288  // OpenCL implementation of Morphology Open
289  static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy);
290 
291  // OpenCL implementation of Morphology Open
292  static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCopy);
293 
294  // OpenCL implementation of Morphology (Hollow = Closed - Open)
295  static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, bool reqDataCopy);
296 
297  static void pixGetLinesCL(PIX *pixd, PIX *pixs,
298  PIX** pix_vline, PIX** pix_hline,
299  PIX** pixClosed, bool getpixClosed,
300  l_int32 close_hsize, l_int32 close_vsize,
301  l_int32 open_hsize, l_int32 open_vsize,
302  l_int32 line_hsize, l_int32 line_vsize);
303 
304  //int InitOpenclAttr( OpenCLEnv * env );
305  //int ReleaseKernel( KernelEnv * env );
306  static int SetKernelEnv( KernelEnv *envInfo );
307  //int CreateKernel( char * kernelname, KernelEnv * env );
308  //int RunKernel( const char *kernelName, void **userdata );
309  //int ConvertToString( const char *filename, char **source );
310  //int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
311  //int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
312  //int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
313  //int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
314  // static cl_device_id performDeviceSelection( );
315  //static bool thresholdRectToPixMicroBench( TessScoreEvaluationInputData input, ds_device_type type);
316 
317  static int LoadOpencl();
318 #ifdef WIN32
319  //static int OpenclInite();
320  static void FreeOpenclDll();
321 #endif
322 
323  //int GetOpenclState();
324  //void SetOpenclState( int state );
325  inline static int AddKernelConfig( int kCount, const char *kName );
326 
327  /* for binarization */
328  static void HistogramRectOCL(
329  const unsigned char *imagedata,
330  int bytes_per_pixel,
331  int bytes_per_line,
332  int left,
333  int top,
334  int width,
335  int height,
336  int kHistogramSize,
337  int *histogramAllChannels);
338  static void ThresholdRectToPixOCL(
339  const unsigned char* imagedata,
340  int bytes_per_pixel,
341  int bytes_per_line,
342  const int* thresholds,
343  const int* hi_values,
344  Pix** pix,
345  int rect_height,
346  int rect_width,
347  int rect_top,
348  int rect_left);
349 #if USE_DEVICE_SELECTION
350  static ds_device getDeviceSelection();
351  static ds_device selectedDevice;
352  static bool deviceIsSelected;
353 #endif
354  static bool selectedDeviceIsOpenCL();
355  static bool selectedDeviceIsNativeCPU();
356 
357 };
358 
359 
360 #endif
const int kHistogramSize
Definition: otsuthr.h:27