20 #define TIMESPEC mach_timespec
22 #define TIMESPEC timespec
26 GPUEnv OpenclDevice::gpuEnv;
28 #if USE_DEVICE_SELECTION
29 bool OpenclDevice::deviceIsSelected =
false;
30 ds_device OpenclDevice::selectedDevice;
33 int OpenclDevice::isInited =0;
35 struct tiff_transform {
43 static struct tiff_transform tiff_orientation_transforms[] = {
54 static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000;
56 cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate;
62 void legalizeFileName(
char *fileName) {
64 char *invalidChars =
"/\?:*\"><| ";
66 for (
int i = 0; i < strlen(invalidChars); i++) {
68 invalidStr[0] = invalidChars[i];
75 for (
char *pos = strstr(fileName, invalidStr); pos !=
NULL; pos = strstr(pos+1, invalidStr)) {
83 void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
86 gpuInfo->mnIsUserCreated = 1;
88 gpuInfo->mpDevID = device;
89 gpuInfo->mpArryDevsID =
new cl_device_id[1];
90 gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
91 clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE ,
sizeof(cl_device_type), (
void *) &gpuInfo->mDevType , &size);
92 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(TYPE)");
94 clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM ,
sizeof(cl_platform_id), (
void *) &gpuInfo->mpPlatformID , &size);
95 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(PLATFORM)");
97 cl_context_properties props[3];
98 props[0] = CL_CONTEXT_PLATFORM;
99 props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
101 gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID,
NULL,
NULL, &clStatus);
102 CHECK_OPENCL( clStatus,
"populateGPUEnv::createContext");
104 cl_command_queue_properties queueProperties = 0;
105 gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
106 CHECK_OPENCL( clStatus,
"populateGPUEnv::createCommandQueue");
110 int OpenclDevice::LoadOpencl()
113 HINSTANCE HOpenclDll =
NULL;
114 void * OpenclDll =
NULL;
116 OpenclDll =
static_cast<HINSTANCE
>( HOpenclDll );
117 OpenclDll = LoadLibrary(
"openCL.dll" );
118 if ( !static_cast<HINSTANCE>( OpenclDll ) )
120 fprintf(stderr,
"[OD] Load opencl.dll failed!\n");
121 FreeLibrary( static_cast<HINSTANCE>( OpenclDll ) );
125 fprintf(stderr,
"[OD] Load opencl.dll successful!\n");
129 int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
131 envInfo->mpkContext = gpuEnv.mpContext;
132 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
133 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
138 cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer,
size_t nElements, cl_mem_flags flags, cl_int *pStatus)
140 cl_mem membuffer = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (flags),
141 nElements *
sizeof(l_uint32), hostbuffer, pStatus);
146 PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs,
int elements, cl_mem_flags flags,
bool memcopy =
false,
bool sync =
true)
148 PROCNAME(
"mapOutputCLBuffer");
153 if ((pixd = pixCreateTemplate(pixs)) ==
NULL)
154 (PIX *)ERROR_PTR(
"pixd not made", procName,
NULL);
158 if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) ==
NULL)
159 (PIX *)ERROR_PTR(
"pixd not made", procName,
NULL);
162 l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
167 memcpy(pixGetData(pixd), pValues, elements *
sizeof(l_uint32));
171 pixSetData(pixd, pValues);
174 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,clbuffer,pValues,0,NULL,NULL);
178 clFinish( rEnv.mpkCmdQueue );
184 cl_mem allocateIntBuffer( KernelEnv rEnv,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus ,
bool sync =
false)
186 cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE),
187 nElements *
sizeof(l_int32), NULL, pStatus);
189 if (_pValues != NULL)
191 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
194 memcpy(pValues, _pValues, nElements *
sizeof(l_int32));
196 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
199 clFinish( rEnv.mpkCmdQueue );
205 int OpenclDevice::InitOpenclRunEnv( GPUEnv *gpuInfo )
209 cl_uint numPlatforms, numDevices;
210 cl_platform_id *platforms;
211 cl_context_properties cps[3];
212 char platformName[256];
218 if ( !gpuInfo->mnIsUserCreated )
220 clStatus = clGetPlatformIDs( 0, NULL, &numPlatforms );
221 if ( clStatus != CL_SUCCESS )
225 gpuInfo->mpPlatformID =
NULL;
227 if ( 0 < numPlatforms )
229 platforms = (cl_platform_id*) malloc( numPlatforms *
sizeof( cl_platform_id ) );
230 if ( platforms == (cl_platform_id*)
NULL )
234 clStatus = clGetPlatformIDs( numPlatforms, platforms, NULL );
236 if ( clStatus != CL_SUCCESS )
241 for ( i = 0; i < numPlatforms; i++ )
243 clStatus = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR,
244 sizeof( platformName ), platformName, NULL );
246 if ( clStatus != CL_SUCCESS )
250 gpuInfo->mpPlatformID = platforms[i];
255 gpuInfo->mpPlatformID = platforms[i];
257 if ( getenv(
"SC_OPENCLCPU") )
259 clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID,
264 printf(
"Selecting OpenCL device: CPU (a)\n");
268 clStatus = clGetDeviceIDs(gpuInfo->mpPlatformID,
273 printf(
"Selecting OpenCL device: GPU (a)\n");
275 if ( clStatus != CL_SUCCESS )
282 if ( clStatus != CL_SUCCESS )
286 if ( NULL == gpuInfo->mpPlatformID )
290 cps[0] = CL_CONTEXT_PLATFORM;
291 cps[1] = (cl_context_properties) gpuInfo->mpPlatformID;
295 if ( getenv(
"SC_OPENCLCPU") )
297 gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
298 printf(
"Selecting OpenCL device: CPU (b)\n");
302 gpuInfo->mDevType = CL_DEVICE_TYPE_GPU;
303 printf(
"Selecting OpenCL device: GPU (b)\n");
306 gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
308 if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
310 gpuInfo->mDevType = CL_DEVICE_TYPE_CPU;
311 gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
312 printf(
"Selecting OpenCL device: CPU (c)\n");
314 if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
316 gpuInfo->mDevType = CL_DEVICE_TYPE_DEFAULT;
317 gpuInfo->mpContext = clCreateContextFromType( cps, gpuInfo->mDevType, NULL, NULL, &clStatus );
318 printf(
"Selecting OpenCL device: DEFAULT (c)\n");
320 if ( ( gpuInfo->mpContext == (cl_context) NULL) || ( clStatus != CL_SUCCESS ) )
324 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, 0, NULL, &length );
325 if ( ( clStatus != CL_SUCCESS ) || ( length == 0 ) )
328 gpuInfo->mpArryDevsID = (cl_device_id*) malloc( length );
329 if ( gpuInfo->mpArryDevsID == (cl_device_id*)
NULL )
332 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, length,
333 gpuInfo->mpArryDevsID, NULL );
334 if ( clStatus != CL_SUCCESS )
338 gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpArryDevsID[0], 0, &clStatus );
340 if ( clStatus != CL_SUCCESS )
344 clStatus = clGetCommandQueueInfo( gpuInfo->mpCmdQueue, CL_QUEUE_THREAD_HANDLE_AMD, 0, NULL, NULL );
346 size_t aDevExtInfoSize = 0;
348 clStatus = clGetDeviceInfo( gpuInfo->mpArryDevsID[0], CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
349 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
351 char *aExtInfo =
new char[aDevExtInfoSize];
353 clStatus = clGetDeviceInfo( gpuInfo->mpArryDevsID[0], CL_DEVICE_EXTENSIONS,
354 sizeof(
char) * aDevExtInfoSize, aExtInfo, NULL);
355 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
357 gpuInfo->mnKhrFp64Flag = 0;
358 gpuInfo->mnAmdFp64Flag = 0;
360 if ( strstr( aExtInfo,
"cl_khr_fp64" ) )
362 gpuInfo->mnKhrFp64Flag = 1;
367 if ( strstr( aExtInfo,
"cl_amd_fp64" ) )
368 gpuInfo->mnAmdFp64Flag = 1;
375 void OpenclDevice::releaseMorphCLBuffers()
377 if (pixdCLIntermediate != NULL)
378 clReleaseMemObject(pixdCLIntermediate);
379 if (pixsCLBuffer != NULL)
380 clReleaseMemObject(pixsCLBuffer);
381 if (pixdCLBuffer != NULL)
382 clReleaseMemObject(pixdCLBuffer);
383 if (pixThBuffer != NULL)
384 clReleaseMemObject(pixThBuffer);
387 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs)
389 SetKernelEnv( &rEnv );
391 if (pixThBuffer != NULL)
393 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
396 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
sizeof(l_uint32) * wpl*h, 0, NULL, NULL);
401 l_uint32* srcdata = (l_uint32*) malloc(wpl*h*
sizeof(l_uint32));
402 memcpy(srcdata, pixGetData(pixs), wpl*h*
sizeof(l_uint32));
404 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
407 pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
409 pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
411 return (
int)clStatus;
414 int OpenclDevice::InitEnv()
421 if( 1 == LoadOpencl() )
429 #if USE_DEVICE_SELECTION
431 InitOpenclRunEnv_DeviceSelection( 0 );
435 InitOpenclRunEnv( 0 );
441 int OpenclDevice::ReleaseOpenclRunEnv()
443 ReleaseOpenclEnv( &gpuEnv );
449 inline int OpenclDevice::AddKernelConfig(
int kCount,
const char *kName )
452 fprintf(stderr,
"Error: ( KCount < 1 ) AddKernelConfig\n" );
453 strcpy( gpuEnv.mArrykernelNames[kCount-1], kName );
454 gpuEnv.mnKernelCount++;
457 int OpenclDevice::RegistOpenclKernel()
459 if ( !gpuEnv.mnIsUserCreated )
460 memset( &gpuEnv, 0,
sizeof(gpuEnv) );
462 gpuEnv.mnFileCount = 0;
463 gpuEnv.mnKernelCount = 0UL;
465 AddKernelConfig( 1, (
const char*)
"oclAverageSub1" );
468 int OpenclDevice::InitOpenclRunEnv(
int argc )
471 if ( MAX_CLKERNEL_NUM <= 0 )
475 if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
480 RegistOpenclKernel();
482 status = InitOpenclRunEnv( &gpuEnv );
485 fprintf(stderr,
"init_opencl_env failed.\n");
488 fprintf(stderr,
"init_opencl_env successed.\n");
490 if( getenv(
"SC_FLOAT" ) )
492 gpuEnv.mnKhrFp64Flag = 0;
493 gpuEnv.mnAmdFp64Flag = 0;
495 if( gpuEnv.mnKhrFp64Flag )
497 fprintf(stderr,
"----use khr double type in kernel----\n");
498 status = CompileKernelFile( &gpuEnv,
"-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16" );
500 else if( gpuEnv.mnAmdFp64Flag )
502 fprintf(stderr,
"----use amd double type in kernel----\n");
503 status = CompileKernelFile( &gpuEnv,
"-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16" );
507 fprintf(stderr,
"----use float type in kernel----\n");
508 status = CompileKernelFile( &gpuEnv,
"-Dfp_t=float -Dfp_t4=float4 -Dfp_t16=float16" );
510 if ( status == 0 || gpuEnv.mnKernelCount == 0 )
512 fprintf(stderr,
"CompileKernelFile failed.\n");
515 fprintf(stderr,
"CompileKernelFile successed.\n");
521 int OpenclDevice::InitOpenclRunEnv_DeviceSelection(
int argc ) {
523 #if USE_DEVICE_SELECTION
527 ds_device bestDevice_DS = getDeviceSelection( );
529 cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
531 if (selectedDeviceIsOpenCL() ) {
533 populateGPUEnvFromDevice( &gpuEnv, bestDevice );
534 gpuEnv.mnFileCount = 0;
535 gpuEnv.mnKernelCount = 0UL;
537 CompileKernelFile(&gpuEnv,
"");
550 OpenclDevice::OpenclDevice()
555 OpenclDevice::~OpenclDevice()
560 int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
570 for ( i = 0; i < gpuEnv.mnFileCount; i++ )
572 if ( gpuEnv.mpArryPrograms[i] )
574 clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
575 CHECK_OPENCL( clStatus,
"clReleaseProgram" );
576 gpuEnv.mpArryPrograms[i] =
NULL;
579 if ( gpuEnv.mpCmdQueue )
581 clReleaseCommandQueue( gpuEnv.mpCmdQueue );
582 gpuEnv.mpCmdQueue =
NULL;
584 if ( gpuEnv.mpContext )
586 clReleaseContext( gpuEnv.mpContext );
587 gpuEnv.mpContext =
NULL;
590 gpuInfo->mnIsUserCreated = 0;
591 free( gpuInfo->mpArryDevsID );
594 int OpenclDevice::BinaryGenerated(
const char * clFileName, FILE ** fhandle )
601 cl_uint numDevices=0;
602 if ( getenv(
"SC_OPENCLCPU") )
604 clStatus = clGetDeviceIDs(gpuEnv.mpPlatformID,
612 clStatus = clGetDeviceIDs(gpuEnv.mpPlatformID,
618 CHECK_OPENCL( clStatus,
"clGetDeviceIDs" );
619 for ( i = 0; i < numDevices; i++ )
621 char fileName[256] = { 0 }, cl_name[128] = { 0 };
622 if ( gpuEnv.mpArryDevsID[i] != 0 )
624 char deviceName[1024];
625 clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
sizeof(deviceName), deviceName, NULL );
626 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
627 str = (
char*) strstr( clFileName, (
char*)
".cl" );
628 memcpy( cl_name, clFileName, str - clFileName );
629 cl_name[str - clFileName] =
'\0';
630 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
631 legalizeFileName(fileName);
632 fd = fopen( fileName,
"rb" );
633 status = ( fd !=
NULL ) ? 1 : 0;
643 int OpenclDevice::CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName )
646 for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
648 if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
650 if ( gpuEnvCached->mpArryPrograms[i] != NULL )
659 int OpenclDevice::WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes )
662 output = fopen( fileName,
"wb" );
663 if ( output == NULL )
668 fwrite( birary,
sizeof(
char), numBytes, output );
674 int OpenclDevice::GeneratBinFromKernelSource( cl_program program,
const char * clFileName )
678 size_t *binarySizes, numDevices;
679 cl_device_id *mpArryDevsID;
680 char **binaries, *str =
NULL;
682 clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
683 sizeof(numDevices), &numDevices, NULL );
684 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
686 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
687 if ( mpArryDevsID == NULL )
692 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
693 sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL );
694 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
697 binarySizes = (
size_t*) malloc(
sizeof(
size_t) * numDevices );
699 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
700 sizeof(
size_t) * numDevices, binarySizes, NULL );
701 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
704 binaries = (
char**) malloc(
sizeof(
char *) * numDevices );
705 if ( binaries == NULL )
710 for ( i = 0; i < numDevices; i++ )
712 if ( binarySizes[i] != 0 )
714 binaries[i] = (
char*) malloc(
sizeof(
char) * binarySizes[i] );
715 if ( binaries[i] == NULL )
718 for(
int cleanupIndex = 0; cleanupIndex < i; ++cleanupIndex)
720 free(binaries[cleanupIndex]);
734 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
735 sizeof(
char *) * numDevices, binaries, NULL );
736 CHECK_OPENCL(clStatus,
"clGetProgramInfo");
739 for ( i = 0; i < numDevices; i++ )
741 char fileName[256] = { 0 }, cl_name[128] = { 0 };
743 if ( binarySizes[i] != 0 )
745 char deviceName[1024];
746 clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
747 sizeof(deviceName), deviceName, NULL);
748 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
750 str = (
char*) strstr( clFileName, (
char*)
".cl" );
751 memcpy( cl_name, clFileName, str - clFileName );
752 cl_name[str - clFileName] =
'\0';
753 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
754 legalizeFileName(fileName);
755 if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
757 printf(
"[OD] write binary[%s] failed\n", fileName);
760 printf(
"[OD] write binary[%s] succesfully\n", fileName);
765 for ( i = 0; i < numDevices; i++ )
767 if ( binaries[i] != NULL )
774 if ( binaries != NULL )
780 if ( binarySizes != NULL )
786 if ( mpArryDevsID != NULL )
788 free( mpArryDevsID );
794 void copyIntBuffer( KernelEnv rEnv, cl_mem xValues,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus )
796 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
798 clFinish( rEnv.mpkCmdQueue );
799 if (_pValues != NULL)
801 for (
int i = 0; i < (int)nElements; i++ )
802 pValues[i] = (l_int32)_pValues[i];
805 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
810 int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption )
815 char *buildLog =
NULL, *binary;
817 size_t source_size[1];
818 int b_error, binary_status, binaryExisted, idx;
820 cl_device_id *mpArryDevsID;
824 if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
829 idx = gpuInfo->mnFileCount;
833 source_size[0] = strlen( source );
835 binaryExisted = BinaryGenerated( filename, &fd );
837 if ( binaryExisted == 1 )
839 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
840 sizeof(numDevices), &numDevices, NULL );
841 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
843 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
844 if ( mpArryDevsID == NULL )
851 b_error |= fseek( fd, 0, SEEK_END ) < 0;
852 b_error |= ( length = ftell(fd) ) <= 0;
853 b_error |= fseek( fd, 0, SEEK_SET ) < 0;
859 binary = (
char*) malloc( length + 2 );
865 memset( binary, 0, length + 2 );
866 b_error |= fread( binary, 1, length, fd ) != length;
873 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
874 sizeof( cl_device_id ) * numDevices, mpArryDevsID, NULL );
875 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
878 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
879 mpArryDevsID, &length, (
const unsigned char**) &binary,
880 &binary_status, &clStatus );
881 CHECK_OPENCL( clStatus,
"clCreateProgramWithBinary" );
884 free( mpArryDevsID );
892 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source,
893 source_size, &clStatus);
894 CHECK_OPENCL( clStatus,
"clCreateProgramWithSource" );
898 if ( gpuInfo->mpArryPrograms[idx] == (cl_program) NULL )
907 if (!gpuInfo->mnIsUserCreated)
909 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
910 buildOption, NULL, NULL);
915 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
916 buildOption, NULL, NULL);
920 if ( clStatus != CL_SUCCESS )
922 printf (
"BuildProgram error!\n");
923 if ( !gpuInfo->mnIsUserCreated )
925 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
926 CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
930 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
931 CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
933 if ( clStatus != CL_SUCCESS )
935 printf(
"opencl create build log fail\n");
938 buildLog = (
char*) malloc( length );
939 if ( buildLog == (
char*)
NULL )
943 if ( !gpuInfo->mnIsUserCreated )
945 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
946 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
950 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
951 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
953 if ( clStatus != CL_SUCCESS )
955 printf(
"opencl program build info fail\n");
959 fd1 = fopen(
"kernel-build.log",
"w+" );
962 fwrite( buildLog,
sizeof(
char), length, fd1 );
971 strcpy( gpuInfo->mArryKnelSrcFile[idx], filename );
973 if ( binaryExisted == 0 ) {
974 GeneratBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
978 gpuInfo->mnFileCount += 1;
983 l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line)
988 size_t globalThreads[2];
989 size_t localThreads[2];
995 gsize = (w + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
996 globalThreads[0] = gsize;
997 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
998 globalThreads[1] = gsize;
999 localThreads[0] = GROUPSIZE_X;
1000 localThreads[1] = GROUPSIZE_Y;
1002 SetKernelEnv( &rEnv );
1004 l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32));
1005 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus );
1006 CHECK_OPENCL( clStatus, "clCreateKernel");
1009 valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1010 outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
1013 clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (
void *)&valuesCl );
1014 CHECK_OPENCL( clStatus, "clSetKernelArg");
1015 clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), (
void *)&w );
1016 CHECK_OPENCL( clStatus, "clSetKernelArg" );
1017 clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), (
void *)&h );
1018 CHECK_OPENCL( clStatus, "clSetKernelArg" );
1019 clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), (
void *)&wpl );
1020 CHECK_OPENCL( clStatus, "clSetKernelArg" );
1021 clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (
void *)&outputCl );
1022 CHECK_OPENCL( clStatus, "clSetKernelArg");
1026 clStatus = clEnqueueNDRangeKernel( rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL );
1027 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" );
1030 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, 0, w*h * sizeof(l_uint32), 0, NULL, NULL, &clStatus);
1031 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer outputCl");
1032 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL);
1035 clFinish( rEnv.mpkCmdQueue );
1042 PIX * OpenclDevice::pixReadTiffCl ( const
char *filename, l_int32 n )
1049 PROCNAME("pixReadTiff");
1052 return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
1054 if ((fp = fopenReadStream(filename)) == NULL)
1055 return (PIX *)ERROR_PTR("image file not found", procName, NULL);
1056 if ((pix = pixReadStreamTiffCl(fp, n)) == NULL) {
1058 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1066 OpenclDevice::fopenTiffCl(FILE *fp,
1067 const char *modestring)
1071 PROCNAME(
"fopenTiff");
1074 return (TIFF *)ERROR_PTR(
"stream not opened", procName, NULL);
1076 return (TIFF *)ERROR_PTR(
"modestring not defined", procName, NULL);
1078 if ((fd = fileno(fp)) < 0)
1079 return (TIFF *)ERROR_PTR(
"invalid file descriptor", procName, NULL);
1080 lseek(fd, 0, SEEK_SET);
1082 return TIFFFdOpen(fd,
"TIFFstream", modestring);
1084 l_int32 OpenclDevice::getTiffStreamResolutionCl(TIFF *tif,
1089 l_int32 foundxres, foundyres;
1090 l_float32 fxres, fyres;
1092 PROCNAME(
"getTiffStreamResolution");
1095 return ERROR_INT(
"tif not opened", procName, 1);
1096 if (!pxres || !pyres)
1097 return ERROR_INT(
"&xres and &yres not both defined", procName, 1);
1098 *pxres = *pyres = 0;
1100 TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit);
1101 foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres);
1102 foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres);
1103 if (!foundxres && !foundyres)
return 1;
1104 if (!foundxres && foundyres)
1106 else if (foundxres && !foundyres)
1109 if (resunit == RESUNIT_CENTIMETER) {
1110 *pxres = (l_int32)(2.54 * fxres + 0.5);
1111 *pyres = (l_int32)(2.54 * fyres + 0.5);
1114 *pxres = (l_int32)fxres;
1115 *pyres = (l_int32)fyres;
1132 typedef struct L_Memstream L_MEMSTREAM;
1135 static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata,
size_t pinsize);
1136 static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata,
1138 static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length);
1139 static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data,
1141 static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence);
1142 static l_int32 tiffCloseCallback(thandle_t handle);
1143 static toff_t tiffSizeCallback(thandle_t handle);
1144 static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length);
1145 static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length);
1148 static L_MEMSTREAM *
1149 memstreamCreateForRead(l_uint8 *indata,
1152 L_MEMSTREAM *mstream;
1154 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
1155 mstream->buffer = indata;
1156 mstream->bufsize = insize;
1157 mstream->hw = insize;
1158 mstream->offset = 0;
1163 static L_MEMSTREAM *
1164 memstreamCreateForWrite(l_uint8 **poutdata,
1167 L_MEMSTREAM *mstream;
1169 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
1170 mstream->buffer = (l_uint8 *)CALLOC(8 * 1024, 1);
1171 mstream->bufsize = 8 * 1024;
1172 mstream->poutdata = poutdata;
1173 mstream->poutsize = poutsize;
1174 mstream->hw = mstream->offset = 0;
1180 tiffReadCallback(thandle_t handle,
1184 L_MEMSTREAM *mstream;
1187 mstream = (L_MEMSTREAM *)handle;
1188 amount = L_MIN((
size_t)length, mstream->hw - mstream->offset);
1189 memcpy(data, mstream->buffer + mstream->offset, amount);
1190 mstream->offset += amount;
1196 tiffWriteCallback(thandle_t handle,
1200 L_MEMSTREAM *mstream;
1207 mstream = (L_MEMSTREAM *)handle;
1208 if (mstream->offset + length > mstream->bufsize) {
1209 newsize = 2 * (mstream->offset + length);
1210 mstream->buffer = (l_uint8 *)reallocNew((
void **)&mstream->buffer,
1211 mstream->offset, newsize);
1212 mstream->bufsize = newsize;
1215 memcpy(mstream->buffer + mstream->offset, data, length);
1216 mstream->offset += length;
1217 mstream->hw = L_MAX(mstream->offset, mstream->hw);
1223 tiffSeekCallback(thandle_t handle,
1227 L_MEMSTREAM *mstream;
1229 PROCNAME(
"tiffSeekCallback");
1230 mstream = (L_MEMSTREAM *)handle;
1234 mstream->offset = offset;
1238 mstream->offset += offset;
1243 mstream->offset = mstream->hw - offset;
1246 return (toff_t)ERROR_INT(
"bad whence value", procName,
1250 return mstream->offset;
1255 tiffCloseCallback(thandle_t handle)
1257 L_MEMSTREAM *mstream;
1259 mstream = (L_MEMSTREAM *)handle;
1260 if (mstream->poutdata) {
1261 *mstream->poutdata = mstream->buffer;
1262 *mstream->poutsize = mstream->hw;
1270 tiffSizeCallback(thandle_t handle)
1272 L_MEMSTREAM *mstream;
1274 mstream = (L_MEMSTREAM *)handle;
1280 tiffMapCallback(thandle_t handle,
1284 L_MEMSTREAM *mstream;
1286 mstream = (L_MEMSTREAM *)handle;
1287 *data = mstream->buffer;
1288 *length = mstream->hw;
1294 tiffUnmapCallback(thandle_t handle,
1319 fopenTiffMemstream(
const char *filename,
1320 const char *operation,
1324 L_MEMSTREAM *mstream;
1326 PROCNAME(
"fopenTiffMemstream");
1329 return (TIFF *)ERROR_PTR(
"filename not defined", procName, NULL);
1331 return (TIFF *)ERROR_PTR(
"operation not defined", procName, NULL);
1333 return (TIFF *)ERROR_PTR(
"&data not defined", procName, NULL);
1335 return (TIFF *)ERROR_PTR(
"&datasize not defined", procName, NULL);
1336 if (!strcmp(operation,
"r") && !strcmp(operation,
"w"))
1337 return (TIFF *)ERROR_PTR(
"operation not 'r' or 'w'}", procName, NULL);
1339 if (!strcmp(operation,
"r"))
1340 mstream = memstreamCreateForRead(*pdata, *pdatasize);
1342 mstream = memstreamCreateForWrite(pdata, pdatasize);
1344 return TIFFClientOpen(filename, operation, mstream,
1345 tiffReadCallback, tiffWriteCallback,
1346 tiffSeekCallback, tiffCloseCallback,
1347 tiffSizeCallback, tiffMapCallback,
1354 OpenclDevice::pixReadMemTiffCl(
const l_uint8 *data,
size_t size,l_int32 n)
1356 l_int32 i, pagefound;
1359 L_MEMSTREAM *memStream;
1360 PROCNAME(
"pixReadMemTiffCl");
1363 return (PIX *)ERROR_PTR(
"data pointer is NULL", procName, NULL);
1365 if ((tif = fopenTiffMemstream(
"",
"r", (l_uint8 **)&data, &size)) == NULL)
1366 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1370 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1373 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1375 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1379 if (TIFFReadDirectory(tif) == 0)
1383 if (pagefound ==
FALSE) {
1384 L_WARNING(
"tiff page %d not found", procName);
1394 OpenclDevice::pixReadStreamTiffCl(FILE *fp,
1397 l_int32 i, pagefound;
1401 PROCNAME(
"pixReadStreamTiff");
1404 return (PIX *)ERROR_PTR(
"stream not defined", procName, NULL);
1406 if ((tif = fopenTiffCl(fp,
"rb")) == NULL)
1407 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1411 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1414 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1416 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1420 if (TIFFReadDirectory(tif) == 0)
1424 if (pagefound ==
FALSE) {
1425 L_WARNING(
"tiff page %d not found", procName, n);
1435 getTiffCompressedFormat(l_uint16 tiffcomp)
1441 case COMPRESSION_CCITTFAX4:
1442 comptype = IFF_TIFF_G4;
1444 case COMPRESSION_CCITTFAX3:
1445 comptype = IFF_TIFF_G3;
1447 case COMPRESSION_CCITTRLE:
1448 comptype = IFF_TIFF_RLE;
1450 case COMPRESSION_PACKBITS:
1451 comptype = IFF_TIFF_PACKBITS;
1453 case COMPRESSION_LZW:
1454 comptype = IFF_TIFF_LZW;
1456 case COMPRESSION_ADOBE_DEFLATE:
1457 comptype = IFF_TIFF_ZIP;
1460 comptype = IFF_TIFF;
1466 void compare(l_uint32 *cpu, l_uint32 *gpu,
int size)
1468 for(
int i=0;i<size;i++)
1472 printf(
"\ndoesnot match\n");
1476 printf(
"\nit matches\n");
1483 OpenclDevice::pixReadFromTiffStreamCl(TIFF *tif)
1485 l_uint8 *linebuf, *data;
1486 l_uint16 spp, bps, bpp, tiffbpl, photometry, tiffcomp, orientation;
1487 l_uint16 *redmap, *greenmap, *bluemap;
1488 l_int32 d, wpl, bpl, comptype, i, ncolors;
1491 l_uint32 *line, *tiffdata;
1495 PROCNAME(
"pixReadFromTiffStream");
1498 return (PIX *)ERROR_PTR(
"tif not defined", procName, NULL);
1501 TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
1502 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
1505 return (PIX *)ERROR_PTR(
"can't handle bpp > 32", procName, NULL);
1508 else if (spp == 3 || spp == 4)
1511 return (PIX *)ERROR_PTR(
"spp not in set {1,3,4}", procName, NULL);
1513 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
1514 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
1515 tiffbpl = TIFFScanlineSize(tif);
1517 if ((pix = pixCreate(w, h, d)) == NULL)
1518 return (PIX *)ERROR_PTR(
"pix not made", procName, NULL);
1519 data = (l_uint8 *)pixGetData(pix);
1520 wpl = pixGetWpl(pix);
1525 if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1,
sizeof(l_uint8))) == NULL)
1526 return (PIX *)ERROR_PTR(
"calloc fail for linebuf", procName, NULL);
1528 for (i = 0 ; i < h ; i++) {
1529 if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
1532 return (PIX *)ERROR_PTR(
"line read fail", procName, NULL);
1534 memcpy((
char *)data, (
char *)linebuf, tiffbpl);
1538 pixEndianByteSwap(pix);
1540 pixEndianTwoByteSwap(pix);
1544 if ((tiffdata = (l_uint32 *)CALLOC(w * h,
sizeof(l_uint32))) == NULL) {
1546 return (PIX *)ERROR_PTR(
"calloc fail for tiffdata", procName, NULL);
1548 if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata,
1549 ORIENTATION_TOPLEFT, 0)) {
1552 return (PIX *)ERROR_PTR(
"failed to read tiffdata", procName, NULL);
1554 line = pixGetData(pix);
1557 l_uint32* output_gpu=pixReadFromTiffKernel(tiffdata,w,h,wpl,line);
1558 pixSetData(pix, output_gpu);
1563 if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) {
1564 pixSetXRes(pix, xres);
1565 pixSetYRes(pix, yres);
1569 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
1570 comptype = getTiffCompressedFormat(tiffcomp);
1571 pixSetInputFormat(pix, comptype);
1573 if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
1575 if ((cmap = pixcmapCreate(bps)) == NULL) {
1577 return (PIX *)ERROR_PTR(
"cmap not made", procName, NULL);
1580 for (i = 0; i < ncolors; i++)
1581 pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
1583 pixSetColormap(pix, cmap);
1586 if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
1588 if (tiffcomp == COMPRESSION_CCITTFAX3 ||
1589 tiffcomp == COMPRESSION_CCITTFAX4 ||
1590 tiffcomp == COMPRESSION_CCITTRLE ||
1591 tiffcomp == COMPRESSION_CCITTRLEW) {
1592 photometry = PHOTOMETRIC_MINISWHITE;
1595 photometry = PHOTOMETRIC_MINISBLACK;
1597 if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
1598 (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
1599 pixInvert(pix, pix);
1602 if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
1603 if (orientation >= 1 && orientation <= 8) {
1604 struct tiff_transform *transform =
1605 &tiff_orientation_transforms[orientation - 1];
1606 if (transform->vflip) pixFlipTB(pix, pix);
1607 if (transform->hflip) pixFlipLR(pix, pix);
1608 if (transform->rotate) {
1610 pix = pixRotate90(oldpix, transform->rotate);
1611 pixDestroy(&oldpix);
1621 pixDilateCL_55(l_int32 wpl, l_int32 h)
1623 size_t globalThreads[2];
1627 size_t localThreads[2];
1630 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1631 globalThreads[0] = gsize;
1632 globalThreads[1] = GROUPSIZE_HMORY;
1633 localThreads[0] = GROUPSIZE_HMORX;
1634 localThreads[1] = GROUPSIZE_HMORY;
1636 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_5x5", &status );
1638 status = clSetKernelArg(rEnv.mpkKernel,
1642 status = clSetKernelArg(rEnv.mpkKernel,
1646 status = clSetKernelArg(rEnv.mpkKernel,
1649 (
const void *)&wpl);
1650 status = clSetKernelArg(rEnv.mpkKernel,
1655 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1666 pixtemp = pixsCLBuffer;
1667 pixsCLBuffer = pixdCLBuffer;
1668 pixdCLBuffer = pixtemp;
1671 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1672 globalThreads[0] = gsize;
1673 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1674 globalThreads[1] = gsize;
1675 localThreads[0] = GROUPSIZE_X;
1676 localThreads[1] = GROUPSIZE_Y;
1678 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer_5x5", &status );
1680 status = clSetKernelArg(rEnv.mpkKernel,
1684 status = clSetKernelArg(rEnv.mpkKernel,
1688 status = clSetKernelArg(rEnv.mpkKernel,
1691 (
const void *)&wpl);
1692 status = clSetKernelArg(rEnv.mpkKernel,
1696 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1711 pixErodeCL_55(l_int32 wpl, l_int32 h)
1713 size_t globalThreads[2];
1717 l_uint32 fwmask, lwmask;
1718 size_t localThreads[2];
1720 lwmask = lmask32[32 - 2];
1721 fwmask = rmask32[32 - 2];
1724 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1725 globalThreads[0] = gsize;
1726 globalThreads[1] = GROUPSIZE_HMORY;
1727 localThreads[0] = GROUPSIZE_HMORX;
1728 localThreads[1] = GROUPSIZE_HMORY;
1730 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_5x5", &status );
1732 status = clSetKernelArg(rEnv.mpkKernel,
1736 status = clSetKernelArg(rEnv.mpkKernel,
1740 status = clSetKernelArg(rEnv.mpkKernel,
1743 (
const void *)&wpl);
1744 status = clSetKernelArg(rEnv.mpkKernel,
1749 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1760 pixtemp = pixsCLBuffer;
1761 pixsCLBuffer = pixdCLBuffer;
1762 pixdCLBuffer = pixtemp;
1765 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1766 globalThreads[0] = gsize;
1767 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1768 globalThreads[1] = gsize;
1769 localThreads[0] = GROUPSIZE_X;
1770 localThreads[1] = GROUPSIZE_Y;
1772 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer_5x5", &status );
1774 status = clSetKernelArg(rEnv.mpkKernel,
1778 status = clSetKernelArg(rEnv.mpkKernel,
1782 status = clSetKernelArg(rEnv.mpkKernel,
1785 (
const void *)&wpl);
1786 status = clSetKernelArg(rEnv.mpkKernel,
1790 status = clSetKernelArg(rEnv.mpkKernel,
1793 (
const void *)&fwmask);
1794 status = clSetKernelArg(rEnv.mpkKernel,
1797 (
const void *)&lwmask);
1798 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1813 pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1815 l_int32 xp, yp, xn, yn;
1817 size_t globalThreads[2];
1821 size_t localThreads[2];
1824 OpenclDevice::SetKernelEnv( &rEnv );
1826 if (hsize == 5 && vsize == 5)
1829 status = pixDilateCL_55(wpl, h);
1833 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1835 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1838 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1839 globalThreads[0] = gsize;
1840 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1841 globalThreads[1] = gsize;
1842 localThreads[0] = GROUPSIZE_X;
1843 localThreads[1] = GROUPSIZE_Y;
1845 if (xp > 31 || xn > 31)
1848 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor", &status );
1850 status = clSetKernelArg(rEnv.mpkKernel,
1854 status = clSetKernelArg(rEnv.mpkKernel,
1858 status = clSetKernelArg(rEnv.mpkKernel,
1862 status = clSetKernelArg(rEnv.mpkKernel,
1866 status = clSetKernelArg(rEnv.mpkKernel,
1869 (
const void *)&wpl);
1870 status = clSetKernelArg(rEnv.mpkKernel,
1874 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1884 if (yp > 0 || yn > 0)
1886 pixtemp = pixsCLBuffer;
1887 pixsCLBuffer = pixdCLBuffer;
1888 pixdCLBuffer = pixtemp;
1891 else if (xp > 0 || xn > 0 )
1894 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_32word", &status );
1895 isEven = (xp != xn);
1897 status = clSetKernelArg(rEnv.mpkKernel,
1901 status = clSetKernelArg(rEnv.mpkKernel,
1905 status = clSetKernelArg(rEnv.mpkKernel,
1909 status = clSetKernelArg(rEnv.mpkKernel,
1912 (
const void *)&wpl);
1913 status = clSetKernelArg(rEnv.mpkKernel,
1917 status = clSetKernelArg(rEnv.mpkKernel,
1920 (
const void *)&isEven);
1921 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1931 if (yp > 0 || yn > 0)
1933 pixtemp = pixsCLBuffer;
1934 pixsCLBuffer = pixdCLBuffer;
1935 pixdCLBuffer = pixtemp;
1939 if (yp > 0 || yn > 0)
1941 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer", &status );
1943 status = clSetKernelArg(rEnv.mpkKernel,
1947 status = clSetKernelArg(rEnv.mpkKernel,
1951 status = clSetKernelArg(rEnv.mpkKernel,
1955 status = clSetKernelArg(rEnv.mpkKernel,
1958 (
const void *)&wpl);
1959 status = clSetKernelArg(rEnv.mpkKernel,
1963 status = clSetKernelArg(rEnv.mpkKernel,
1967 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
1984 pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h)
1987 l_int32 xp, yp, xn, yn;
1989 size_t globalThreads[2];
1990 size_t localThreads[2];
1994 char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
1995 l_uint32 rwmask, lwmask;
1998 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
2000 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
2002 OpenclDevice::SetKernelEnv( &rEnv );
2004 if (hsize == 5 && vsize == 5 && isAsymmetric)
2007 status = pixErodeCL_55(wpl, h);
2011 rwmask = rmask32[32 - (xp & 31)];
2012 lwmask = lmask32[32 - (xn & 31)];
2015 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2016 globalThreads[0] = gsize;
2017 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2018 globalThreads[1] = gsize;
2019 localThreads[0] = GROUPSIZE_X;
2020 localThreads[1] = GROUPSIZE_Y;
2023 if (xp > 31 || xn > 31 )
2026 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor", &status );
2028 status = clSetKernelArg(rEnv.mpkKernel,
2032 status = clSetKernelArg(rEnv.mpkKernel,
2036 status = clSetKernelArg(rEnv.mpkKernel,
2040 status = clSetKernelArg(rEnv.mpkKernel,
2044 status = clSetKernelArg(rEnv.mpkKernel,
2047 (
const void *)&wpl);
2048 status = clSetKernelArg(rEnv.mpkKernel,
2052 status = clSetKernelArg(rEnv.mpkKernel,
2054 sizeof(isAsymmetric),
2055 (
const void *)&isAsymmetric);
2056 status = clSetKernelArg(rEnv.mpkKernel,
2059 (
const void *)&rwmask);
2060 status = clSetKernelArg(rEnv.mpkKernel,
2063 (
const void *)&lwmask);
2064 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2074 if (yp > 0 || yn > 0)
2076 pixtemp = pixsCLBuffer;
2077 pixsCLBuffer = pixdCLBuffer;
2078 pixdCLBuffer = pixtemp;
2081 else if (xp > 0 || xn > 0)
2083 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_32word", &status );
2084 isEven = (xp != xn);
2086 status = clSetKernelArg(rEnv.mpkKernel,
2090 status = clSetKernelArg(rEnv.mpkKernel,
2094 status = clSetKernelArg(rEnv.mpkKernel,
2098 status = clSetKernelArg(rEnv.mpkKernel,
2101 (
const void *)&wpl);
2102 status = clSetKernelArg(rEnv.mpkKernel,
2106 status = clSetKernelArg(rEnv.mpkKernel,
2108 sizeof(isAsymmetric),
2109 (
const void *)&isAsymmetric);
2110 status = clSetKernelArg(rEnv.mpkKernel,
2113 (
const void *)&rwmask);
2114 status = clSetKernelArg(rEnv.mpkKernel,
2117 (
const void *)&lwmask);
2118 status = clSetKernelArg(rEnv.mpkKernel,
2121 (
const void *)&isEven);
2122 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2132 if (yp > 0 || yn > 0)
2134 pixtemp = pixsCLBuffer;
2135 pixsCLBuffer = pixdCLBuffer;
2136 pixdCLBuffer = pixtemp;
2141 if (yp > 0 || yn > 0)
2143 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer", &status );
2145 status = clSetKernelArg(rEnv.mpkKernel,
2149 status = clSetKernelArg(rEnv.mpkKernel,
2153 status = clSetKernelArg(rEnv.mpkKernel,
2157 status = clSetKernelArg(rEnv.mpkKernel,
2160 (
const void *)&wpl);
2161 status = clSetKernelArg(rEnv.mpkKernel,
2165 status = clSetKernelArg(rEnv.mpkKernel,
2167 sizeof(isAsymmetric),
2168 (
const void *)&isAsymmetric);
2169 status = clSetKernelArg(rEnv.mpkKernel,
2173 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2190 OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy =
false)
2194 wpl = pixGetWpl(pixs);
2195 h = pixGetHeight(pixs);
2197 clStatus = pixDilateCL(hsize, vsize, wpl, h);
2201 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ,
false);
2210 OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy =
false)
2214 wpl = pixGetWpl(pixs);
2215 h = pixGetHeight(pixs);
2217 clStatus = pixErodeCL(hsize, vsize, wpl, h);
2221 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2229 pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
2235 status = pixErodeCL(hsize, vsize, wpl, h);
2237 pixtemp = pixsCLBuffer;
2238 pixsCLBuffer = pixdCLBuffer;
2239 pixdCLBuffer = pixtemp;
2241 status = pixDilateCL(hsize, vsize, wpl, h);
2248 pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
2254 status = pixDilateCL(hsize, vsize, wpl, h);
2256 pixtemp = pixsCLBuffer;
2257 pixsCLBuffer = pixdCLBuffer;
2258 pixdCLBuffer = pixtemp;
2260 status = pixErodeCL(hsize, vsize, wpl, h);
2268 OpenclDevice::pixCloseBrickCL(PIX *pixd,
2272 bool reqDataCopy =
false)
2276 wpl = pixGetWpl(pixs);
2277 h = pixGetHeight(pixs);
2279 clStatus = pixCloseCL(hsize, vsize, wpl, h);
2283 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2292 OpenclDevice::pixOpenBrickCL(PIX *pixd,
2296 bool reqDataCopy =
false)
2300 wpl = pixGetWpl(pixs);
2301 h = pixGetHeight(pixs);
2303 clStatus = pixOpenCL(hsize, vsize, wpl, h);
2307 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2315 pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
2318 size_t globalThreads[2];
2320 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2322 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2323 globalThreads[0] = gsize;
2324 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2325 globalThreads[1] = gsize;
2327 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixOR", &status );
2329 status = clSetKernelArg(rEnv.mpkKernel,
2333 status = clSetKernelArg(rEnv.mpkKernel,
2337 status = clSetKernelArg(rEnv.mpkKernel,
2341 status = clSetKernelArg(rEnv.mpkKernel,
2344 (
const void *)&wpl);
2345 status = clSetKernelArg(rEnv.mpkKernel,
2349 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2364 pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
2367 size_t globalThreads[2];
2369 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2371 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2372 globalThreads[0] = gsize;
2373 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2374 globalThreads[1] = gsize;
2376 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixAND", &status );
2379 status = clSetKernelArg(rEnv.mpkKernel,
2383 status = clSetKernelArg(rEnv.mpkKernel,
2387 status = clSetKernelArg(rEnv.mpkKernel,
2391 status = clSetKernelArg(rEnv.mpkKernel,
2394 (
const void *)&wpl);
2395 status = clSetKernelArg(rEnv.mpkKernel,
2399 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2414 pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outBuffer = NULL)
2417 size_t globalThreads[2];
2419 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
2421 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
2422 globalThreads[0] = gsize;
2423 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
2424 globalThreads[1] = gsize;
2426 if (outBuffer != NULL)
2428 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixSubtract", &status );
2432 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixSubtract_inplace", &status );
2436 status = clSetKernelArg(rEnv.mpkKernel,
2440 status = clSetKernelArg(rEnv.mpkKernel,
2444 status = clSetKernelArg(rEnv.mpkKernel,
2447 (
const void *)&wpl);
2448 status = clSetKernelArg(rEnv.mpkKernel,
2452 if (outBuffer != NULL)
2454 status = clSetKernelArg(rEnv.mpkKernel,
2457 (
const void *)&outBuffer);
2459 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue,
2475 OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
bool reqDataCopy =
false)
2479 PROCNAME(
"pixSubtractCL");
2482 return (PIX *)ERROR_PTR(
"pixs1 not defined", procName, pixd);
2484 return (PIX *)ERROR_PTR(
"pixs2 not defined", procName, pixd);
2485 if (pixGetDepth(pixs1) != pixGetDepth(pixs2))
2486 return (PIX *)ERROR_PTR(
"depths of pixs* unequal", procName, pixd);
2488 #if EQUAL_SIZE_WARNING
2489 if (!pixSizesEqual(pixs1, pixs2))
2490 L_WARNING(
"pixs1 and pixs2 not equal sizes", procName);
2493 wpl = pixGetWpl(pixs1);
2494 h = pixGetHeight(pixs1);
2496 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2501 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs1, wpl*h, CL_MAP_READ);
2510 OpenclDevice::pixHollowCL(PIX *pixd,
2512 l_int32 close_hsize,
2513 l_int32 close_vsize,
2516 bool reqDataCopy =
false)
2521 wpl = pixGetWpl(pixs);
2522 h = pixGetHeight(pixs);
2525 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2529 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2532 pixtemp = pixsCLBuffer;
2533 pixsCLBuffer = pixdCLBuffer;
2534 pixdCLBuffer = pixtemp;
2536 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2539 pixtemp = pixsCLBuffer;
2540 pixsCLBuffer = pixdCLBuffer;
2541 pixdCLBuffer = pixdCLIntermediate;
2542 pixdCLIntermediate = pixtemp;
2544 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2549 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ);
2557 OpenclDevice::pixGetLinesCL(PIX *pixd,
2563 l_int32 close_hsize, l_int32 close_vsize,
2564 l_int32 open_hsize, l_int32 open_vsize,
2565 l_int32 line_hsize, l_int32 line_vsize)
2570 wpl = pixGetWpl(pixs);
2571 h = pixGetHeight(pixs);
2574 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2579 *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl*h, CL_MAP_READ,
true,
false);
2584 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2587 pixtemp = pixsCLBuffer;
2588 pixsCLBuffer = pixdCLBuffer;
2589 pixdCLBuffer = pixtemp;
2591 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2594 pixtemp = pixsCLBuffer;
2595 pixsCLBuffer = pixdCLBuffer;
2596 pixdCLBuffer = pixdCLIntermediate;
2597 pixdCLIntermediate = pixtemp;
2599 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2603 clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0,
sizeof(
int) * wpl*h, 0, NULL, NULL);
2605 pixtemp = pixsCLBuffer;
2606 pixsCLBuffer = pixdCLBuffer;
2607 pixdCLBuffer = pixtemp;
2611 clStatus = pixOpenCL(1, line_vsize, wpl, h);
2614 *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl*h, CL_MAP_READ,
true,
false);
2616 pixtemp = pixsCLBuffer;
2617 pixsCLBuffer = pixdCLIntermediate;
2618 pixdCLIntermediate = pixtemp;
2622 clStatus = pixOpenCL(line_hsize, 1, wpl, h);
2625 *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl*h, CL_MAP_READ,
true,
true);
2637 void OpenclDevice::HistogramRectOCL(
2638 const unsigned char* imageData,
2639 int bytes_per_pixel,
2646 int* histogramAllChannels)
2651 SetKernelEnv( &histKern );
2652 KernelEnv histRedKern;
2653 SetKernelEnv( &histRedKern );
2658 cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(
char), (
void *)imageData, &clStatus );
2659 CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
2662 int block_size = 256;
2664 clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL);
2665 CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer");
2667 int requestedOccupancy = 10;
2668 int numWorkGroups = numCUs * requestedOccupancy;
2669 int numThreads = block_size*numWorkGroups;
2670 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
2671 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
2672 size_t red_global_work_size[] = {
2673 static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
2676 int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
2678 cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize*bytes_per_pixel*
sizeof(
int), (
void *)histogramAllChannels, &clStatus );
2679 CHECK_OPENCL( clStatus,
"clCreateBuffer histogramBuffer");
2683 int tmpHistogramBins = kHistogramSize*bytes_per_pixel*histRed;
2685 cl_mem tmpHistogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins*
sizeof(cl_uint), NULL, &clStatus );
2686 CHECK_OPENCL( clStatus,
"clCreateBuffer tmpHistogramBuffer");
2689 int *zeroBuffer =
new int[1];
2691 cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(cl_int), (
void *)zeroBuffer, &clStatus );
2692 CHECK_OPENCL( clStatus,
"clCreateBuffer atomicSyncBuffer");
2695 if (bytes_per_pixel == 1)
2697 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectOneChannel", &clStatus );
2698 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectOneChannel");
2700 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectOneChannelReduction", &clStatus );
2701 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectOneChannelReduction");
2703 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectAllChannels", &clStatus );
2704 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannels");
2706 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectAllChannelsReduction", &clStatus );
2707 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannelsReduction");
2713 ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, tmpHistogramBins*
sizeof(cl_uint), 0, NULL, NULL, &clStatus);
2714 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer tmpHistogramBuffer");
2716 memset(ptr, 0, tmpHistogramBins*
sizeof(cl_uint));
2717 clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, NULL, NULL);
2720 clStatus = clSetKernelArg( histKern.mpkKernel, 0,
sizeof(cl_mem), (
void *)&imageBuffer );
2721 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2722 cl_uint numPixels = width*height;
2723 clStatus = clSetKernelArg( histKern.mpkKernel, 1,
sizeof(cl_uint), (
void *)&numPixels );
2724 CHECK_OPENCL( clStatus,
"clSetKernelArg numPixels" );
2725 clStatus = clSetKernelArg( histKern.mpkKernel, 2,
sizeof(cl_mem), (
void *)&tmpHistogramBuffer );
2726 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2729 int n = numThreads/bytes_per_pixel;
2730 clStatus = clSetKernelArg( histRedKern.mpkKernel, 0,
sizeof(cl_int), (
void *)&n );
2731 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2732 clStatus = clSetKernelArg( histRedKern.mpkKernel, 1,
sizeof(cl_mem), (
void *)&tmpHistogramBuffer );
2733 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2734 clStatus = clSetKernelArg( histRedKern.mpkKernel, 2,
sizeof(cl_mem), (
void *)&histogramBuffer );
2735 CHECK_OPENCL( clStatus,
"clSetKernelArg histogramBuffer");
2739 clStatus = clEnqueueNDRangeKernel(
2740 histKern.mpkCmdQueue,
2742 1, NULL, global_work_size, local_work_size,
2744 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" );
2745 clFinish( histKern.mpkCmdQueue );
2748 clStatus = clEnqueueNDRangeKernel(
2749 histRedKern.mpkCmdQueue,
2750 histRedKern.mpkKernel,
2751 1, NULL, red_global_work_size, local_work_size,
2753 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
2754 clFinish( histRedKern.mpkCmdQueue );
2759 ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(
int), 0, NULL, NULL, &clStatus);
2760 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
2762 clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL);
2764 clReleaseMemObject(histogramBuffer);
2765 clReleaseMemObject(imageBuffer);
2776 void OpenclDevice::ThresholdRectToPixOCL(
2777 const
unsigned char* imageData,
2778 int bytes_per_pixel,
2780 const
int* thresholds,
2781 const
int* hi_values,
2790 *pix = pixCreate(width, height, 1);
2791 uinT32* pixData = pixGetData(*pix);
2792 int wpl = pixGetWpl(*pix);
2793 int pixSize = wpl*height*
sizeof(
uinT32);
2797 SetKernelEnv( &rEnv );
2800 int block_size = 256;
2802 clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(numCUs), &numCUs, NULL);
2803 CHECK_OPENCL( clStatus,
"clCreateBuffer imageBuffer");
2805 int requestedOccupancy = 10;
2806 int numWorkGroups = numCUs * requestedOccupancy;
2807 int numThreads = block_size*numWorkGroups;
2808 size_t local_work_size[] = {(size_t) block_size};
2809 size_t global_work_size[] = {(size_t) numThreads};
2815 cl_mem imageBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*
sizeof(
char), (
void *)imageData, &clStatus );
2816 CHECK_OPENCL( clStatus,
"clCreateBuffer imageBuffer");
2819 pixThBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, (
void *)pixData, &clStatus );
2820 CHECK_OPENCL( clStatus,
"clCreateBuffer pix");
2823 cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*
sizeof(
int), (
void *)thresholds, &clStatus );
2824 CHECK_OPENCL( clStatus,
"clCreateBuffer thresholdBuffer");
2825 cl_mem hiValuesBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*
sizeof(
int), (
void *)hi_values, &clStatus );
2826 CHECK_OPENCL( clStatus,
"clCreateBuffer hiValuesBuffer");
2829 if (bytes_per_pixel == 4) {
2830 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"kernel_ThresholdRectToPix", &clStatus );
2831 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_ThresholdRectToPix");
2833 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"kernel_ThresholdRectToPix_OneChan", &clStatus );
2834 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_ThresholdRectToPix_OneChan");
2838 clStatus = clSetKernelArg( rEnv.mpkKernel, 0,
sizeof(cl_mem), (
void *)&imageBuffer );
2839 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2840 cl_uint numPixels = width*height;
2841 clStatus = clSetKernelArg( rEnv.mpkKernel, 1,
sizeof(
int), (
void *)&height );
2842 CHECK_OPENCL( clStatus,
"clSetKernelArg height" );
2843 clStatus = clSetKernelArg( rEnv.mpkKernel, 2,
sizeof(
int), (
void *)&width );
2844 CHECK_OPENCL( clStatus,
"clSetKernelArg width" );
2845 clStatus = clSetKernelArg( rEnv.mpkKernel, 3,
sizeof(
int), (
void *)&wpl );
2846 CHECK_OPENCL( clStatus,
"clSetKernelArg wpl" );
2847 clStatus = clSetKernelArg( rEnv.mpkKernel, 4,
sizeof(cl_mem), (
void *)&thresholdsBuffer );
2848 CHECK_OPENCL( clStatus,
"clSetKernelArg thresholdsBuffer" );
2849 clStatus = clSetKernelArg( rEnv.mpkKernel, 5,
sizeof(cl_mem), (
void *)&hiValuesBuffer );
2850 CHECK_OPENCL( clStatus,
"clSetKernelArg hiValuesBuffer" );
2851 clStatus = clSetKernelArg( rEnv.mpkKernel, 6,
sizeof(cl_mem), (
void *)&pixThBuffer );
2852 CHECK_OPENCL( clStatus,
"clSetKernelArg pixThBuffer");
2856 clStatus = clEnqueueNDRangeKernel(
2859 1, NULL, global_work_size, local_work_size,
2861 CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix" );
2862 clFinish( rEnv.mpkCmdQueue );
2866 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus);
2867 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
2868 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL);
2870 clReleaseMemObject(imageBuffer);
2871 clReleaseMemObject(thresholdsBuffer);
2872 clReleaseMemObject(hiValuesBuffer);
2879 #if USE_DEVICE_SELECTION
2885 typedef struct _TessScoreEvaluationInputData {
2889 unsigned char *imageData;
2891 } TessScoreEvaluationInputData;
2893 void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input ) {
2898 int numChannels = 4;
2899 input->height = height;
2900 input->width = width;
2901 input->numChannels = numChannels;
2902 unsigned char (*imageData4)[4] = (
unsigned char (*)[4]) malloc(height*width*numChannels*
sizeof(
unsigned char));
2903 input->imageData = (
unsigned char *) &imageData4[0];
2906 unsigned char pixelWhite[4] = { 0, 0, 0, 255};
2907 unsigned char pixelBlack[4] = {255, 255, 255, 255};
2908 for (
int p = 0; p < height*width; p++) {
2910 imageData4[p][0] = pixelWhite[0];
2911 imageData4[p][1] = pixelWhite[1];
2912 imageData4[p][2] = pixelWhite[2];
2913 imageData4[p][3] = pixelWhite[3];
2916 int maxLineWidth = 64;
2919 for (
int i = 0; i < numLines; i++) {
2920 int lineWidth = rand()%maxLineWidth;
2921 int vertLinePos = lineWidth + rand()%(width-2*lineWidth);
2923 for (
int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
2924 for (
int col = 0; col < height; col++) {
2926 imageData4[row*width+col][0] = pixelBlack[0];
2927 imageData4[row*width+col][1] = pixelBlack[1];
2928 imageData4[row*width+col][2] = pixelBlack[2];
2929 imageData4[row*width+col][3] = pixelBlack[3];
2934 for (
int i = 0; i < numLines; i++) {
2935 int lineWidth = rand()%maxLineWidth;
2936 int horLinePos = lineWidth + rand()%(height-2*lineWidth);
2938 for (
int row = 0; row < width; row++) {
2939 for (
int col = horLinePos-lineWidth/2; col < horLinePos+lineWidth/2; col++) {
2942 imageData4[row*width+col][0] = pixelBlack[0];
2943 imageData4[row*width+col][1] = pixelBlack[1];
2944 imageData4[row*width+col][2] = pixelBlack[2];
2945 imageData4[row*width+col][3] = pixelBlack[3];
2950 float fractionBlack = 0.1;
2951 int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
2952 for (
int i = 0; i < numSpots; i++) {
2954 int lineWidth = rand()%maxLineWidth;
2955 int col = lineWidth + rand()%(width-2*lineWidth);
2956 int row = lineWidth + rand()%(height-2*lineWidth);
2958 for (
int r = row-lineWidth/2; r < row+lineWidth/2; r++) {
2959 for (
int c = col-lineWidth/2; c < col+lineWidth/2; c++) {
2962 imageData4[r*width+c][0] = pixelBlack[0];
2963 imageData4[r*width+c][1] = pixelBlack[1];
2964 imageData4[r*width+c][2] = pixelBlack[2];
2965 imageData4[r*width+c][3] = pixelBlack[3];
2970 input->pix = pixCreate(input->width, input->height, 1);
2973 typedef struct _TessDeviceScore {
2983 double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2987 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2988 QueryPerformanceFrequency(&freq);
2990 mach_timespec_t time_funct_start, time_funct_end;
2992 TIMESPEC time_funct_start, time_funct_end;
2995 l_uint32 *tiffdata = (l_uint32 *)input.imageData;
2998 if (type == DS_DEVICE_OPENCL_DEVICE) {
3000 QueryPerformanceCounter(&time_funct_start);
3002 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3005 OpenclDevice::gpuEnv = *env;
3006 int wpl = pixGetWpl(input.pix);
3007 OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, wpl, NULL);
3009 QueryPerformanceCounter(&time_funct_end);
3010 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3012 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3013 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3018 QueryPerformanceCounter(&time_funct_start);
3020 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3022 Pix *pix = pixCreate(input.width, input.height, 32);
3023 l_uint32 *pixData = pixGetData(pix);
3024 int wpl = pixGetWpl(pix);
3029 for (i = 0; i < input.height ; i++) {
3030 for (j = 0; j < input.width; j++) {
3032 l_uint32 tiffword = tiffdata[i * input.width + j];
3033 l_int32 rval = ((tiffword) & 0xff);
3034 l_int32 gval = (((tiffword) >> 8) & 0xff);
3035 l_int32 bval = (((tiffword) >> 16) & 0xff);
3036 l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8);
3037 pixData[idx] = value;
3042 QueryPerformanceCounter(&time_funct_end);
3043 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3045 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3046 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3057 double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
3061 LARGE_INTEGER freq, time_funct_start, time_funct_end;
3062 QueryPerformanceFrequency(&freq);
3064 mach_timespec_t time_funct_start, time_funct_end;
3066 TIMESPEC time_funct_start, time_funct_end;
3069 unsigned char pixelHi = (
unsigned char)255;
3073 int kHistogramSize = 256;
3074 int bytes_per_line = input.width*input.numChannels;
3075 int *histogramAllChannels =
new int[kHistogramSize*input.numChannels];
3078 if (type == DS_DEVICE_OPENCL_DEVICE) {
3080 QueryPerformanceCounter(&time_funct_start);
3082 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3085 OpenclDevice::gpuEnv = *env;
3086 int wpl = pixGetWpl(input.pix);
3087 OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, top, left, input.width, input.height, kHistogramSize, histogramAllChannels);
3090 QueryPerformanceCounter(&time_funct_end);
3091 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3093 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3094 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3100 QueryPerformanceCounter(&time_funct_start);
3102 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3104 for (
int ch = 0; ch < input.numChannels; ++ch) {
3106 left, top, input.width, input.height, histogram);
3109 QueryPerformanceCounter(&time_funct_end);
3110 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3112 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3113 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3120 delete[] histogramAllChannels;
3125 void ThresholdRectToPix_Native(
const unsigned char* imagedata,
3126 int bytes_per_pixel,
3128 const int* thresholds,
3129 const int* hi_values,
3133 int width = pixGetWidth(*pix);
3134 int height = pixGetHeight(*pix);
3136 *pix = pixCreate(width, height, 1);
3137 uinT32* pixdata = pixGetData(*pix);
3138 int wpl = pixGetWpl(*pix);
3139 const unsigned char* srcdata = imagedata + top * bytes_per_line +
3140 left * bytes_per_pixel;
3141 for (
int y = 0; y < height; ++y) {
3142 const uinT8* linedata = srcdata;
3143 uinT32* pixline = pixdata + y * wpl;
3144 for (
int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
3145 bool white_result =
true;
3146 for (
int ch = 0; ch < bytes_per_pixel; ++ch) {
3147 if (hi_values[ch] >= 0 &&
3148 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
3149 white_result =
false;
3154 CLEAR_DATA_BIT(pixline, x);
3156 SET_DATA_BIT(pixline, x);
3158 srcdata += bytes_per_line;
3162 double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
3166 LARGE_INTEGER freq, time_funct_start, time_funct_end;
3167 QueryPerformanceFrequency(&freq);
3169 mach_timespec_t time_funct_start, time_funct_end;
3171 TIMESPEC time_funct_start, time_funct_end;
3175 unsigned char pixelHi = (
unsigned char)255;
3176 int* thresholds =
new int[4];
3177 thresholds[0] = pixelHi/2;
3178 thresholds[1] = pixelHi/2;
3179 thresholds[2] = pixelHi/2;
3180 thresholds[3] = pixelHi/2;
3181 int *hi_values =
new int[4];
3182 thresholds[0] = pixelHi;
3183 thresholds[1] = pixelHi;
3184 thresholds[2] = pixelHi;
3185 thresholds[3] = pixelHi;
3189 int bytes_per_line = input.width*input.numChannels;
3192 if (type == DS_DEVICE_OPENCL_DEVICE) {
3194 QueryPerformanceCounter(&time_funct_start);
3196 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3199 OpenclDevice::gpuEnv = *env;
3200 int wpl = pixGetWpl(input.pix);
3201 OpenclDevice::ThresholdRectToPixOCL(input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, input.height, input.width, top, left);
3204 QueryPerformanceCounter(&time_funct_end);
3205 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3207 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3208 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3216 QueryPerformanceCounter(&time_funct_start);
3218 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3220 ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line,
3221 thresholds, hi_values, &input.pix );
3224 QueryPerformanceCounter(&time_funct_end);
3225 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3227 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3228 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3233 delete[] thresholds;
3238 double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
3242 LARGE_INTEGER freq, time_funct_start, time_funct_end;
3243 QueryPerformanceFrequency(&freq);
3245 mach_timespec_t time_funct_start, time_funct_end;
3247 TIMESPEC time_funct_start, time_funct_end;
3251 int resolution = 300;
3252 int wpl = pixGetWpl(input.pix);
3257 int closing_brick = max_line_width / 3;
3260 if (type == DS_DEVICE_OPENCL_DEVICE) {
3262 QueryPerformanceCounter(&time_funct_start);
3264 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3266 Pix *src_pix = input.pix;
3267 OpenclDevice::gpuEnv = *env;
3268 OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
3269 Pix *pix_vline =
NULL, *pix_hline =
NULL, *pix_closed =
NULL;
3270 OpenclDevice::pixGetLinesCL(NULL, input.pix, &pix_vline, &pix_hline, &pix_closed,
true, closing_brick, closing_brick, max_line_width, max_line_width, min_line_length, min_line_length);
3272 OpenclDevice::releaseMorphCLBuffers();
3275 QueryPerformanceCounter(&time_funct_end);
3276 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3278 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3279 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3283 QueryPerformanceCounter(&time_funct_start);
3285 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
3289 Pix *src_pix = input.pix;
3290 Pix *pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
3291 Pix *pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width);
3292 Pix *pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
3293 pixDestroy(&pix_solid);
3294 Pix *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
3295 Pix *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
3296 pixDestroy(&pix_hollow);
3299 QueryPerformanceCounter(&time_funct_end);
3300 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
3302 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
3303 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
3320 ds_status serializeScore( ds_device* device,
void **serializedScore,
unsigned int* serializedScoreSize ) {
3321 *serializedScoreSize =
sizeof(TessDeviceScore);
3322 *serializedScore = (
void *)
new unsigned char[*serializedScoreSize];
3323 memcpy(*serializedScore, device->score, *serializedScoreSize);
3328 ds_status deserializeScore( ds_device* device,
const unsigned char* serializedScore,
unsigned int serializedScoreSize ) {
3330 device->score =
new TessDeviceScore;
3331 memcpy(device->score, serializedScore, serializedScoreSize);
3338 ds_status evaluateScoreForDevice( ds_device *device,
void *inputData) {
3342 printf(
"\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
3344 if (device->type == DS_DEVICE_OPENCL_DEVICE) {
3347 populateGPUEnvFromDevice( env, device->oclDeviceID);
3348 env->mnFileCount = 0;
3349 env->mnKernelCount = 0UL;
3351 OpenclDevice::gpuEnv = *env;
3352 OpenclDevice::CompileKernelFile(env,
"");
3356 TessScoreEvaluationInputData *input = (TessScoreEvaluationInputData *)inputData;
3359 double composeRGBPixelTime = composeRGBPixelMicroBench( env, *input, device->type );
3362 double histogramRectTime = histogramRectMicroBench( env, *input, device->type );
3365 double thresholdRectToPixTime = thresholdRectToPixMicroBench( env, *input, device->type );
3368 double getLineMasksMorphTime = getLineMasksMorphMicroBench( env, *input, device->type );
3373 float composeRGBPixelWeight = 1.2f;
3374 float histogramRectWeight = 2.4f;
3375 float thresholdRectToPixWeight = 4.5f;
3376 float getLineMasksMorphWeight = 5.0f;
3378 float weightedTime =
3379 composeRGBPixelWeight * composeRGBPixelTime +
3380 histogramRectWeight * histogramRectTime +
3381 thresholdRectToPixWeight * thresholdRectToPixTime +
3382 getLineMasksMorphWeight * getLineMasksMorphTime
3384 device->score = (
void *)
new TessDeviceScore;
3385 ((TessDeviceScore *)device->score)->time = weightedTime;
3387 printf(
"[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
3388 printf(
"[DS]%25s: %f (w=%.1f)\n",
"composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight );
3389 printf(
"[DS]%25s: %f (w=%.1f)\n",
"HistogramRect", histogramRectTime, histogramRectWeight );
3390 printf(
"[DS]%25s: %f (w=%.1f)\n",
"ThresholdRectToPix", thresholdRectToPixTime, thresholdRectToPixWeight );
3391 printf(
"[DS]%25s: %f (w=%.1f)\n",
"getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight );
3392 printf(
"[DS]%25s: %f\n",
"Score", ((TessDeviceScore *)device->score)->time );
3397 ds_device OpenclDevice::getDeviceSelection( ) {
3399 if (!deviceIsSelected) {
3402 if( 1 == LoadOpencl() ) {
3407 ds_profile *profile;
3408 status = initDSProfile( &profile,
"v0.1" );
3411 char *fileName = "tesseract_opencl_profile_devices.dat";
3412 status = readProfileFromFile( profile, deserializeScore, fileName);
3413 if (status != DS_SUCCESS) {
3415 printf(
"[DS] Profile file not available (%s); performing profiling.\n", fileName);
3418 TessScoreEvaluationInputData input;
3419 populateTessScoreEvaluationInputData( &input );
3422 unsigned int numUpdates;
3423 status = profileDevices( profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (
void *)&input, &numUpdates );
3426 if ( status == DS_SUCCESS ) {
3427 status = writeProfileToFile( profile, serializeScore, fileName);
3429 if ( status == DS_SUCCESS ) {
3430 printf(
"[DS] Scores written to file (%s).\n", fileName);
3432 printf(
"[DS] Error saving scores to file (%s); scores not written to file.\n", fileName);
3435 printf(
"[DS] Unable to evaluate performance; scores not written to file.\n");
3441 printf("[DS] Profile read from file (%s).\n", fileName);
3446 float bestTime = FLT_MAX;
3447 int bestDeviceIdx = -1;
3448 for (
int d = 0; d < profile->numDevices; d++) {
3450 ds_device device = profile->devices[d];
3451 TessDeviceScore score = *(TessDeviceScore *)device.score;
3453 float time = score.time;
3454 printf(
"[DS] Device[%i] %i:%s score is %f\n", d+1, device.type, device.oclDeviceName, time);
3455 if (time < bestTime) {
3460 printf(
"[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native");
3464 bool overrided =
false;
3465 char *overrideDeviceStr = getenv(
"TESSERACT_OPENCL_DEVICE");
3466 if (overrideDeviceStr != NULL) {
3467 int overrideDeviceIdx = atoi(overrideDeviceStr);
3468 if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) {
3469 printf(
"[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx);
3470 bestDeviceIdx = overrideDeviceIdx - 1;
3473 printf(
"[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices);
3478 printf(
"[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native");
3480 selectedDevice = profile->devices[bestDeviceIdx];
3484 printf(
"[DS] OpenCL runtime not available.\n");
3485 selectedDevice.type = DS_DEVICE_NATIVE_CPU;
3486 selectedDevice.oclDeviceName =
"(null)";
3487 selectedDevice.score =
NULL;
3488 selectedDevice.oclDeviceID =
NULL;
3489 selectedDevice.oclDriverVersion =
NULL;
3491 deviceIsSelected =
true;
3496 return selectedDevice;
3501 bool OpenclDevice::selectedDeviceIsOpenCL() {
3502 #if USE_DEVICE_SELECTION
3503 ds_device device = getDeviceSelection();
3504 return (device.type == DS_DEVICE_OPENCL_DEVICE);
3510 bool OpenclDevice::selectedDeviceIsNativeCPU() {
3511 #if USE_DEVICE_SELECTION
3512 ds_device device = getDeviceSelection();
3513 return (device.type == DS_DEVICE_NATIVE_CPU);
#define PERF_COUNT_SUB(SUB)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
#define PERF_COUNT_START(FUNCT_NAME)
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.