2 #include "allheaders.h"
13 #if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ )
18 #define IF_WINDOWS(X) X
22 #define NOT_WINDOWS(X)
23 #elif defined( __linux__ )
32 #define NOT_WINDOWS(X) X
33 #elif defined( __APPLE__ )
42 #define NOT_WINDOWS(X) X
52 #define NOT_WINDOWS(X) X
60 #include <mach/clock.h>
61 #include <mach/mach.h>
62 #define CLOCK_MONOTONIC SYSTEM_CLOCK
63 #define clock_gettime clock_get_time
74 #define PERF_COUNT_VERBOSE 1
75 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
79 #include <mach/clock.h>
80 #include <mach/mach.h>
81 #define CLOCK_MONOTONIC SYSTEM_CLOCK
82 #define clock_gettime clock_get_time
87 #if PERF_COUNT_VERBOSE >= 2
88 #define PERF_COUNT_START(FUNCT_NAME) \
89 char *funct_name = FUNCT_NAME; \
90 double elapsed_time_sec; \
91 LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
92 QueryPerformanceFrequency(&freq); \
93 QueryPerformanceCounter(&time_funct_start); \
94 time_sub_start = time_funct_start; \
95 time_sub_end = time_funct_start;
97 #define PERF_COUNT_END \
98 QueryPerformanceCounter(&time_funct_end); \
99 elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \
100 tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
102 #define PERF_COUNT_START(FUNCT_NAME)
103 #define PERF_COUNT_END
106 #if PERF_COUNT_VERBOSE >= 3
107 #define PERF_COUNT_SUB(SUB) \
108 QueryPerformanceCounter(&time_sub_end); \
109 elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \
110 tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
111 time_sub_start = time_sub_end;
113 #define PERF_COUNT_SUB(SUB)
120 #if PERF_COUNT_VERBOSE >= 2
121 #define PERF_COUNT_START(FUNCT_NAME) \
122 char *funct_name = FUNCT_NAME; \
123 double elapsed_time_sec; \
124 timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
125 clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
126 time_sub_start = time_funct_start; \
127 time_sub_end = time_funct_start;
129 #define PERF_COUNT_END \
130 clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \
131 elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \
132 tprintf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
134 #define PERF_COUNT_START(FUNCT_NAME)
135 #define PERF_COUNT_END
138 #if PERF_COUNT_VERBOSE >= 3
139 #define PERF_COUNT_SUB(SUB) \
140 clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \
141 elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \
142 tprintf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
143 time_sub_start = time_sub_end;
145 #define PERF_COUNT_SUB(SUB)
155 #define USE_DEVICE_SELECTION 1
160 #define strcasecmp strcmp
163 #define MAX_KERNEL_STRING_LEN 64
164 #define MAX_CLFILE_NUM 50
165 #define MAX_CLKERNEL_NUM 200
166 #define MAX_KERNEL_NAME_LEN 64
167 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
168 #define GROUPSIZE_X 16
169 #define GROUPSIZE_Y 16
170 #define GROUPSIZE_HMORX 256
171 #define GROUPSIZE_HMORY 1
173 typedef struct _KernelEnv
175 cl_context mpkContext;
176 cl_command_queue mpkCmdQueue;
177 cl_program mpkProgram;
179 char mckKernelName[150];
182 typedef struct _OpenCLEnv
184 cl_platform_id mpOclPlatformID;
185 cl_context mpOclContext;
186 cl_device_id mpOclDevsID;
187 cl_command_queue mpOclCmdQueue;
189 typedef int ( *cl_kernel_function )(
void **userdata, KernelEnv *kenv );
192 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
194 static const l_uint32 lmask32[] = {0x0,
195 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000,
196 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000,
197 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
198 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000,
199 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000,
200 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
201 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0,
202 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff};
204 static const l_uint32 rmask32[] = {0x0,
205 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
206 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
207 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
208 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
209 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
210 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
211 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
212 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff};
214 #define CHECK_OPENCL(status,name) \
215 if( status != CL_SUCCESS ) \
217 printf ("OpenCL error code is %d at when %s .\n", status, name); \
221 typedef struct _GPUEnv
224 cl_platform_id mpPlatformID;
225 cl_device_type mDevType;
226 cl_context mpContext;
227 cl_device_id *mpArryDevsID;
228 cl_device_id mpDevID;
229 cl_command_queue mpCmdQueue;
230 cl_kernel mpArryKernels[MAX_CLFILE_NUM];
231 cl_program mpArryPrograms[MAX_CLFILE_NUM];
232 char mArryKnelSrcFile[MAX_CLFILE_NUM][256],
233 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
234 cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
235 int mnKernelCount, mnFileCount,
247 static GPUEnv gpuEnv;
251 static int InitEnv();
252 static int InitOpenclRunEnv(
int argc );
253 static int InitOpenclRunEnv_DeviceSelection(
int argc );
254 static int InitOpenclRunEnv( GPUEnv *gpu );
255 static int RegistOpenclKernel();
256 static int ReleaseOpenclRunEnv();
257 static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
258 static int CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption );
259 static int CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName );
260 static int GeneratBinFromKernelSource( cl_program program,
const char * clFileName );
261 static int WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes );
262 static int BinaryGenerated(
const char * clFileName, FILE ** fhandle );
264 static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
265 static Pix* pixReadTiffCl(
const char *
filename, l_int32 n );
266 static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
267 static PIX * pixReadMemTiffCl(
const l_uint8 *data,
size_t size, l_int32 n);
268 static PIX* pixReadFromTiffStreamCl(TIFF *tif);
269 static int composeRGBPixelCl(
int *tiffdata,
int *line,
int h,
int w);
270 static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres);
271 static TIFF* fopenTiffCl(FILE *fp,
const char *modestring);
276 static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs);
277 static void releaseMorphCLBuffers();
280 static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
283 static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
286 static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
289 static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
292 static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
bool reqDataCopy);
295 static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
bool reqDataCopy);
297 static void pixGetLinesCL(PIX *pixd, PIX *pixs,
298 PIX** pix_vline, PIX** pix_hline,
299 PIX** pixClosed,
bool getpixClosed,
300 l_int32 close_hsize, l_int32 close_vsize,
301 l_int32 open_hsize, l_int32 open_vsize,
302 l_int32 line_hsize, l_int32 line_vsize);
306 static int SetKernelEnv( KernelEnv *envInfo );
317 static int LoadOpencl();
320 static void FreeOpenclDll();
325 inline static int AddKernelConfig(
int kCount,
const char *kName );
328 static void HistogramRectOCL(
329 const unsigned char *imagedata,
337 int *histogramAllChannels);
338 static void ThresholdRectToPixOCL(
339 const unsigned char* imagedata,
342 const int* thresholds,
343 const int* hi_values,
349 #if USE_DEVICE_SELECTION
350 static ds_device getDeviceSelection();
351 static ds_device selectedDevice;
352 static bool deviceIsSelected;
354 static bool selectedDeviceIsOpenCL();
355 static bool selectedDeviceIsNativeCPU();