tesseract  5.0.0-alpha-619-ge9db
oclkernels.h
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 
11 #ifndef TESSERACT_OPENCL_OCLKERNELS_H_
12 #define TESSERACT_OPENCL_OCLKERNELS_H_
13 
14 #ifndef USE_EXTERNAL_KERNEL
15 #define KERNEL(...) #__VA_ARGS__ "\n"
16 // Double precision is a default of spreadsheets
17 // cl_khr_fp64: Khronos extension
18 // cl_amd_fp64: AMD extension
19 // use build option outside to define fp_t
21 static const char* kernel_src = KERNEL(
22 \n#ifdef KHR_DP_EXTENSION\n
23 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
24 \n#elif AMD_DP_EXTENSION\n
25 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
26 \n#else\n
27 \n#endif\n
28 __kernel void composeRGBPixel(__global uint *tiffdata, int w, int h,int wpl, __global uint *output)
29 {
30  int i = get_global_id(1);
31  int j = get_global_id(0);
32  int tiffword,rval,gval,bval;
33 
34  //Ignore the excess
35  if ((i >= h) || (j >= w))
36  return;
37 
38  tiffword = tiffdata[i * w + j];
39  rval = ((tiffword) & 0xff);
40  gval = (((tiffword) >> 8) & 0xff);
41  bval = (((tiffword) >> 16) & 0xff);
42  output[i*wpl+j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
43 }
44 )
45 
46 KERNEL(
47 \n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword,
48  const int wpl, const int h)
49 {
50  const unsigned int row = get_global_id(1);
51  const unsigned int col = get_global_id(0);
52  const unsigned int pos = row * wpl + col;
53 
54  //Ignore the execss
55  if (row >= h || col >= wpl)
56  return;
57 
58  *(dword + pos) &= ~(*(sword + pos));
59 }\n
60 )
61 
62 KERNEL(
63 \n__kernel void morphoDilateHor_5x5(__global int *sword,__global int *dword,
64  const int wpl, const int h)
65 {
66  const unsigned int pos = get_global_id(0);
67  unsigned int prevword, nextword, currword,tempword;
68  unsigned int destword;
69  const int col = pos % wpl;
70 
71  //Ignore the execss
72  if (pos >= (wpl * h))
73  return;
74 
75 
76  currword = *(sword + pos);
77  destword = currword;
78 
79  //Handle boundary conditions
80  if(col==0)
81  prevword=0;
82  else
83  prevword = *(sword + pos - 1);
84 
85  if(col==(wpl - 1))
86  nextword=0;
87  else
88  nextword = *(sword + pos + 1);
89 
90  //Loop unrolled
91 
92  //1 bit to left and 1 bit to right
93  //Get the max value on LHS of every pixel
94  tempword = (prevword << (31)) | ((currword >> 1));
95  destword |= tempword;
96  //Get max value on RHS of every pixel
97  tempword = (currword << 1) | (nextword >> (31));
98  destword |= tempword;
99 
100  //2 bit to left and 2 bit to right
101  //Get the max value on LHS of every pixel
102  tempword = (prevword << (30)) | ((currword >> 2));
103  destword |= tempword;
104  //Get max value on RHS of every pixel
105  tempword = (currword << 2) | (nextword >> (30));
106  destword |= tempword;
107 
108 
109  *(dword + pos) = destword;
110 
111 }\n
112 )
113 
114 KERNEL(
115 \n__kernel void morphoDilateVer_5x5(__global int *sword,__global int *dword,
116  const int wpl, const int h)
117 {
118  const int col = get_global_id(0);
119  const int row = get_global_id(1);
120  const unsigned int pos = row * wpl + col;
121  unsigned int tempword;
122  unsigned int destword;
123  int i;
124 
125  //Ignore the execss
126  if (row >= h || col >= wpl)
127  return;
128 
129  destword = *(sword + pos);
130 
131  //2 words above
132  i = (row - 2) < 0 ? row : (row - 2);
133  tempword = *(sword + i*wpl + col);
134  destword |= tempword;
135 
136  //1 word above
137  i = (row - 1) < 0 ? row : (row - 1);
138  tempword = *(sword + i*wpl + col);
139  destword |= tempword;
140 
141  //1 word below
142  i = (row >= (h - 1)) ? row : (row + 1);
143  tempword = *(sword + i*wpl + col);
144  destword |= tempword;
145 
146  //2 words below
147  i = (row >= (h - 2)) ? row : (row + 2);
148  tempword = *(sword + i*wpl + col);
149  destword |= tempword;
150 
151  *(dword + pos) = destword;
152 }\n
153 )
154 
155 KERNEL(
156 \n__kernel void morphoDilateHor(__global int *sword,__global int *dword,const int xp, const int xn, const int wpl, const int h)
157 {
158  const int col = get_global_id(0);
159  const int row = get_global_id(1);
160  const unsigned int pos = row * wpl + col;
161  unsigned int parbitsxp, parbitsxn, nwords;
162  unsigned int destword, tempword, lastword, currword;
163  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
164  int i, j, siter, eiter;
165 
166  //Ignore the execss
167  if (pos >= (wpl*h) || (xn < 1 && xp < 1))
168  return;
169 
170  currword = *(sword + pos);
171  destword = currword;
172 
173  parbitsxp = xp & 31;
174  parbitsxn = xn & 31;
175  nwords = xp >> 5;
176 
177  if (parbitsxp > 0)
178  nwords += 1;
179  else
180  parbitsxp = 31;
181 
182  siter = (col - nwords);
183  eiter = (col + nwords);
184 
185  //Get prev word
186  if (col==0)
187  firstword = 0x0;
188  else
189  firstword = *(sword + pos - 1);
190 
191  //Get next word
192  if (col == (wpl - 1))
193  secondword = 0x0;
194  else
195  secondword = *(sword + pos + 1);
196 
197  //Last partial bits on either side
198  for (i = 1; i <= parbitsxp; i++)
199  {
200  //Get the max value on LHS of every pixel
201  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i));
202 
203  destword |= tempword;
204 
205  //Get max value on RHS of every pixel
206  tempword = (currword << i) | (secondword >> (32 - i));
207  destword |= tempword;
208  }
209 
210  //Return if halfwidth <= 1 word
211  if (nwords == 1)
212  {
213  if (xn == 32)
214  {
215  destword |= firstword;
216  }
217  if (xp == 32)
218  {
219  destword |= secondword;
220  }
221 
222  *(dword + pos) = destword;
223  return;
224  }
225 
226  if (siter < 0)
227  firstword = 0x0;
228  else
229  firstword = *(sword + row*wpl + siter);
230 
231  if (eiter >= wpl)
232  lastword = 0x0;
233  else
234  lastword = *(sword + row*wpl + eiter);
235 
236  for (i = 1; i < nwords; i++)
237  {
238  //Gets LHS words
239  if ((siter + i) < 0)
240  secondword = 0x0;
241  else
242  secondword = *(sword + row*wpl + siter + i);
243 
244  lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
245 
246  firstword = secondword;
247 
248  if ((siter + i + 1) < 0)
249  secondword = 0x0;
250  else
251  secondword = *(sword + row*wpl + siter + i + 1);
252 
253  lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
254 
255  //Gets RHS words
256  if ((eiter - i) >= wpl)
257  firstword = 0x0;
258  else
259  firstword = *(sword + row*wpl + eiter - i);
260 
261  rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
262 
263  lastword = firstword;
264  if ((eiter - i - 1) >= wpl)
265  firstword = 0x0;
266  else
267  firstword = *(sword + row*wpl + eiter - i - 1);
268 
269  rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
270 
271  for (j = 1; j < 32; j++)
272  {
273  //OR LHS full words
274  tempword = (lprevword << j) | (lnextword >> (32 - j));
275  destword |= tempword;
276 
277  //OR RHS full words
278  tempword = (rprevword << j) | (rnextword >> (32 - j));
279  destword |= tempword;
280  }
281 
282  destword |= lprevword;
283  destword |= lnextword;
284  destword |= rprevword;
285  destword |= rnextword;
286 
287  lastword = firstword;
288  firstword = secondword;
289  }
290 
291  *(dword + pos) = destword;
292 }\n
293 )
294 
295 KERNEL(
296 \n__kernel void morphoDilateHor_32word(__global int *sword,__global int *dword,
297  const int halfwidth,
298  const int wpl, const int h,
299  const char isEven)
300 {
301  const int col = get_global_id(0);
302  const int row = get_global_id(1);
303  const unsigned int pos = row * wpl + col;
304  unsigned int prevword, nextword, currword,tempword;
305  unsigned int destword;
306  int i;
307 
308  //Ignore the execss
309  if (pos >= (wpl * h))
310  return;
311 
312  currword = *(sword + pos);
313  destword = currword;
314 
315  //Handle boundary conditions
316  if(col==0)
317  prevword=0;
318  else
319  prevword = *(sword + pos - 1);
320 
321  if(col==(wpl - 1))
322  nextword=0;
323  else
324  nextword = *(sword + pos + 1);
325 
326  for (i = 1; i <= halfwidth; i++)
327  {
328  //Get the max value on LHS of every pixel
329  if (i == halfwidth && isEven)
330  {
331  tempword = 0x0;
332  }
333  else
334  {
335  tempword = (prevword << (32-i)) | ((currword >> i));
336  }
337 
338  destword |= tempword;
339 
340  //Get max value on RHS of every pixel
341  tempword = (currword << i) | (nextword >> (32 - i));
342 
343  destword |= tempword;
344  }
345 
346  *(dword + pos) = destword;
347 }\n
348 )
349 
350 KERNEL(
351 \n__kernel void morphoDilateVer(__global int *sword,__global int *dword,
352  const int yp,
353  const int wpl, const int h,
354  const int yn)
355 {
356  const int col = get_global_id(0);
357  const int row = get_global_id(1);
358  const unsigned int pos = row * wpl + col;
359  unsigned int tempword;
360  unsigned int destword;
361  int i, siter, eiter;
362 
363  //Ignore the execss
364  if (row >= h || col >= wpl)
365  return;
366 
367  destword = *(sword + pos);
368 
369  //Set start position and end position considering the boundary conditions
370  siter = (row - yn) < 0 ? 0 : (row - yn);
371  eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
372 
373  for (i = siter; i <= eiter; i++)
374  {
375  tempword = *(sword + i*wpl + col);
376 
377  destword |= tempword;
378  }
379 
380  *(dword + pos) = destword;
381 }\n
382 )
383 
384 KERNEL(
385 \n__kernel void morphoErodeHor_5x5(__global int *sword,__global int *dword,
386  const int wpl, const int h)
387 {
388  const unsigned int pos = get_global_id(0);
389  unsigned int prevword, nextword, currword,tempword;
390  unsigned int destword;
391  const int col = pos % wpl;
392 
393  //Ignore the execss
394  if (pos >= (wpl * h))
395  return;
396 
397  currword = *(sword + pos);
398  destword = currword;
399 
400  //Handle boundary conditions
401  if(col==0)
402  prevword=0xffffffff;
403  else
404  prevword = *(sword + pos - 1);
405 
406  if(col==(wpl - 1))
407  nextword=0xffffffff;
408  else
409  nextword = *(sword + pos + 1);
410 
411  //Loop unrolled
412 
413  //1 bit to left and 1 bit to right
414  //Get the min value on LHS of every pixel
415  tempword = (prevword << (31)) | ((currword >> 1));
416  destword &= tempword;
417  //Get min value on RHS of every pixel
418  tempword = (currword << 1) | (nextword >> (31));
419  destword &= tempword;
420 
421  //2 bit to left and 2 bit to right
422  //Get the min value on LHS of every pixel
423  tempword = (prevword << (30)) | ((currword >> 2));
424  destword &= tempword;
425  //Get min value on RHS of every pixel
426  tempword = (currword << 2) | (nextword >> (30));
427  destword &= tempword;
428 
429 
430  *(dword + pos) = destword;
431 
432 }\n
433 )
434 
435 KERNEL(
436 \n__kernel void morphoErodeVer_5x5(__global int *sword,__global int *dword,
437  const int wpl, const int h,
438  const int fwmask, const int lwmask)
439 {
440  const int col = get_global_id(0);
441  const int row = get_global_id(1);
442  const unsigned int pos = row * wpl + col;
443  unsigned int tempword;
444  unsigned int destword;
445  int i;
446 
447  //Ignore the execss
448  if (row >= h || col >= wpl)
449  return;
450 
451  destword = *(sword + pos);
452 
453  if (row < 2 || row >= (h - 2))
454  {
455  destword = 0x0;
456  }
457  else
458  {
459  //2 words above
460  //i = (row - 2) < 0 ? row : (row - 2);
461  i = (row - 2);
462  tempword = *(sword + i*wpl + col);
463  destword &= tempword;
464 
465  //1 word above
466  //i = (row - 1) < 0 ? row : (row - 1);
467  i = (row - 1);
468  tempword = *(sword + i*wpl + col);
469  destword &= tempword;
470 
471  //1 word below
472  //i = (row >= (h - 1)) ? row : (row + 1);
473  i = (row + 1);
474  tempword = *(sword + i*wpl + col);
475  destword &= tempword;
476 
477  //2 words below
478  //i = (row >= (h - 2)) ? row : (row + 2);
479  i = (row + 2);
480  tempword = *(sword + i*wpl + col);
481  destword &= tempword;
482 
483  if (col == 0)
484  {
485  destword &= fwmask;
486  }
487  if (col == (wpl - 1))
488  {
489  destword &= lwmask;
490  }
491  }
492 
493 
494  *(dword + pos) = destword;
495 }\n
496 )
497 
498 KERNEL(
499 \n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl,
500  const int h, const char isAsymmetric, const int rwmask, const int lwmask)
501 {
502  const int col = get_global_id(0);
503  const int row = get_global_id(1);
504  const unsigned int pos = row * wpl + col;
505  unsigned int parbitsxp, parbitsxn, nwords;
506  unsigned int destword, tempword, lastword, currword;
507  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
508  int i, j, siter, eiter;
509 
510  //Ignore the execss
511  if (pos >= (wpl*h) || (xn < 1 && xp < 1))
512  return;
513 
514  currword = *(sword + pos);
515  destword = currword;
516 
517  parbitsxp = xp & 31;
518  parbitsxn = xn & 31;
519  nwords = xp >> 5;
520 
521  if (parbitsxp > 0)
522  nwords += 1;
523  else
524  parbitsxp = 31;
525 
526  siter = (col - nwords);
527  eiter = (col + nwords);
528 
529  //Get prev word
530  if (col==0)
531  firstword = 0xffffffff;
532  else
533  firstword = *(sword + pos - 1);
534 
535  //Get next word
536  if (col == (wpl - 1))
537  secondword = 0xffffffff;
538  else
539  secondword = *(sword + pos + 1);
540 
541  //Last partial bits on either side
542  for (i = 1; i <= parbitsxp; i++)
543  {
544  //Get the max value on LHS of every pixel
545  tempword = (firstword << (32-i)) | ((currword >> i));
546  destword &= tempword;
547 
548  //Get max value on RHS of every pixel
549  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
550 
551  //tempword = (currword << i) | (secondword >> (32 - i));
552  destword &= tempword;
553  }
554 
555  //Return if halfwidth <= 1 word
556  if (nwords == 1)
557  {
558  if (xp == 32)
559  {
560  destword &= firstword;
561  }
562  if (xn == 32)
563  {
564  destword &= secondword;
565  }
566 
567  //Clear boundary pixels
568  if (isAsymmetric)
569  {
570  if (col == 0)
571  destword &= rwmask;
572  if (col == (wpl - 1))
573  destword &= lwmask;
574  }
575 
576  *(dword + pos) = destword;
577  return;
578  }
579 
580  if (siter < 0)
581  firstword = 0xffffffff;
582  else
583  firstword = *(sword + row*wpl + siter);
584 
585  if (eiter >= wpl)
586  lastword = 0xffffffff;
587  else
588  lastword = *(sword + row*wpl + eiter);
589 
590 
591  for (i = 1; i < nwords; i++)
592  {
593  //Gets LHS words
594  if ((siter + i) < 0)
595  secondword = 0xffffffff;
596  else
597  secondword = *(sword + row*wpl + siter + i);
598 
599  lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
600 
601  firstword = secondword;
602 
603  if ((siter + i + 1) < 0)
604  secondword = 0xffffffff;
605  else
606  secondword = *(sword + row*wpl + siter + i + 1);
607 
608  lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
609 
610  //Gets RHS words
611  if ((eiter - i) >= wpl)
612  firstword = 0xffffffff;
613  else
614  firstword = *(sword + row*wpl + eiter - i);
615 
616  rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
617 
618  lastword = firstword;
619  if ((eiter - i - 1) >= wpl)
620  firstword = 0xffffffff;
621  else
622  firstword = *(sword + row*wpl + eiter - i - 1);
623 
624  rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
625 
626  for (j = 0; j < 32; j++)
627  {
628  //OR LHS full words
629  tempword = (lprevword << j) | (lnextword >> (32 - j));
630  destword &= tempword;
631 
632  //OR RHS full words
633  tempword = (rprevword << j) | (rnextword >> (32 - j));
634  destword &= tempword;
635  }
636 
637  destword &= lprevword;
638  destword &= lnextword;
639  destword &= rprevword;
640  destword &= rnextword;
641 
642  lastword = firstword;
643  firstword = secondword;
644  }
645 
646  if (isAsymmetric)
647  {
648  //Clear boundary pixels
649  if (col < (nwords - 1))
650  destword = 0x0;
651  else if (col == (nwords - 1))
652  destword &= rwmask;
653  else if (col > (wpl - nwords))
654  destword = 0x0;
655  else if (col == (wpl - nwords))
656  destword &= lwmask;
657  }
658 
659  *(dword + pos) = destword;
660 }\n
661 )
662 
663 KERNEL(
664 \n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword,
665  const int halfwidth, const int wpl,
666  const int h, const char clearBoundPixH,
667  const int rwmask, const int lwmask,
668  const char isEven)
669 {
670  const int col = get_global_id(0);
671  const int row = get_global_id(1);
672  const unsigned int pos = row * wpl + col;
673  unsigned int prevword, nextword, currword,tempword, destword;
674  int i;
675 
676  //Ignore the execss
677  if (pos >= (wpl * h))
678  return;
679 
680  currword = *(sword + pos);
681  destword = currword;
682 
683  //Handle boundary conditions
684  if(col==0)
685  prevword=0xffffffff;
686  else
687  prevword = *(sword + pos - 1);
688 
689  if(col==(wpl - 1))
690  nextword=0xffffffff;
691  else
692  nextword = *(sword + pos + 1);
693 
694  for (i = 1; i <= halfwidth; i++)
695  {
696  //Get the min value on LHS of every pixel
697  tempword = (prevword << (32-i)) | ((currword >> i));
698 
699  destword &= tempword;
700 
701  //Get min value on RHS of every pixel
702  if (i == halfwidth && isEven)
703  {
704  tempword = 0xffffffff;
705  }
706  else
707  {
708  tempword = (currword << i) | (nextword >> (32 - i));
709  }
710 
711  destword &= tempword;
712  }
713 
714  if (clearBoundPixH)
715  {
716  if (col == 0)
717  {
718  destword &= rwmask;
719  }
720  else if (col == (wpl - 1))
721  {
722  destword &= lwmask;
723  }
724  }
725 
726  *(dword + pos) = destword;
727 }\n
728 )
729 
730 KERNEL(
731 \n__kernel void morphoErodeVer(__global int *sword,__global int *dword,
732  const int yp,
733  const int wpl, const int h,
734  const char clearBoundPixV, const int yn)
735 {
736  const int col = get_global_id(0);
737  const int row = get_global_id(1);
738  const unsigned int pos = row * wpl + col;
739  unsigned int tempword, destword;
740  int i, siter, eiter;
741 
742  //Ignore the execss
743  if (row >= h || col >= wpl)
744  return;
745 
746  destword = *(sword + pos);
747 
748  //Set start position and end position considering the boundary conditions
749  siter = (row - yp) < 0 ? 0 : (row - yp);
750  eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
751 
752  for (i = siter; i <= eiter; i++)
753  {
754  tempword = *(sword + i*wpl + col);
755 
756  destword &= tempword;
757  }
758 
759  //Clear boundary pixels
760  if (clearBoundPixV && ((row < yp) || ((h - row) <= yn)))
761  {
762  destword = 0x0;
763  }
764 
765  *(dword + pos) = destword;
766 }\n
767 )
768 
769 // HistogramRect Kernel: Accumulate
770 // assumes 4 channels, i.e., bytes_per_pixel = 4
771 // assumes number of pixels is multiple of 8
772 // data is laid out as
773 // ch0 ch1 ...
774 // bin0 bin1 bin2... bin0...
775 // rpt0,1,2...256 rpt0,1,2...
776 KERNEL(
777 \n#define HIST_REDUNDANCY 256\n
778 \n#define GROUP_SIZE 256\n
779 \n#define HIST_SIZE 256\n
780 \n#define NUM_CHANNELS 4\n
781 \n#define HR_UNROLL_SIZE 8 \n
782 \n#define HR_UNROLL_TYPE uchar8 \n
783 
784 __attribute__((reqd_work_group_size(256, 1, 1)))
785 __kernel
786 void kernel_HistogramRectAllChannels(
787  __global const uchar8 *data,
788  uint numPixels,
789  __global uint *histBuffer) {
790 
791  // declare variables
792  uchar8 pixels;
793  int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
794 
795  // for each pixel/channel, accumulate in global memory
796  for (uint pc = get_global_id(0); pc < numPixels*NUM_CHANNELS/HR_UNROLL_SIZE; pc += get_global_size(0)) {
797  pixels = data[pc];
798  // channel bin thread
799  atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s0*HIST_REDUNDANCY + threadOffset]); // ch0
800  atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s4*HIST_REDUNDANCY + threadOffset]); // ch0
801  atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s1*HIST_REDUNDANCY + threadOffset]); // ch1
802  atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s5*HIST_REDUNDANCY + threadOffset]); // ch1
803  atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s2*HIST_REDUNDANCY + threadOffset]); // ch2
804  atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s6*HIST_REDUNDANCY + threadOffset]); // ch2
805  atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s3*HIST_REDUNDANCY + threadOffset]); // ch3
806  atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s7*HIST_REDUNDANCY + threadOffset]); // ch3
807  }
808 }
809 )
810 
811 KERNEL(
812 // NUM_CHANNELS = 1
813 __attribute__((reqd_work_group_size(256, 1, 1)))
814 __kernel
815 void kernel_HistogramRectOneChannel(
816  __global const uchar8 *data,
817  uint numPixels,
818  __global uint *histBuffer) {
819 
820  // declare variables
821  uchar8 pixels;
822  int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
823 
824  // for each pixel/channel, accumulate in global memory
825  for (uint pc = get_global_id(0); pc < numPixels/HR_UNROLL_SIZE; pc += get_global_size(0)) {
826  pixels = data[pc];
827  // bin thread
828  atomic_inc(&histBuffer[pixels.s0*HIST_REDUNDANCY + threadOffset]);
829  atomic_inc(&histBuffer[pixels.s1*HIST_REDUNDANCY + threadOffset]);
830  atomic_inc(&histBuffer[pixels.s2*HIST_REDUNDANCY + threadOffset]);
831  atomic_inc(&histBuffer[pixels.s3*HIST_REDUNDANCY + threadOffset]);
832  atomic_inc(&histBuffer[pixels.s4*HIST_REDUNDANCY + threadOffset]);
833  atomic_inc(&histBuffer[pixels.s5*HIST_REDUNDANCY + threadOffset]);
834  atomic_inc(&histBuffer[pixels.s6*HIST_REDUNDANCY + threadOffset]);
835  atomic_inc(&histBuffer[pixels.s7*HIST_REDUNDANCY + threadOffset]);
836  }
837 }
838 )
839 
840 // HistogramRect Kernel: Reduction
841 // only supports 4 channels
842 // each work group handles a single channel of a single histogram bin
843 KERNEL(
844 __attribute__((reqd_work_group_size(256, 1, 1)))
845 __kernel
846 void kernel_HistogramRectAllChannelsReduction(
847  int n, // unused pixel redundancy
848  __global uint *histBuffer,
849  __global int* histResult) {
850 
851  // declare variables
852  int channel = get_group_id(0)/HIST_SIZE;
853  int bin = get_group_id(0)%HIST_SIZE;
854  int value = 0;
855 
856  // accumulate in register
857  for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
858  value += histBuffer[ channel*HIST_SIZE*HIST_REDUNDANCY+bin*HIST_REDUNDANCY+i];
859  }
860 
861  // reduction in local memory
862  __local int localHist[GROUP_SIZE];
863  localHist[get_local_id(0)] = value;
864  barrier(CLK_LOCAL_MEM_FENCE);
865  for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
866  if (get_local_id(0) < stride) {
867  value = localHist[ get_local_id(0)+stride];
868  }
869  barrier(CLK_LOCAL_MEM_FENCE);
870  if (get_local_id(0) < stride) {
871  localHist[ get_local_id(0)] += value;
872  }
873  barrier(CLK_LOCAL_MEM_FENCE);
874  }
875 
876  // write reduction to final result
877  if (get_local_id(0) == 0) {
878  histResult[get_group_id(0)] = localHist[0];
879  }
880 } // kernel_HistogramRectAllChannels
881 )
882 
883 
884 KERNEL(
885 // NUM_CHANNELS = 1
886 __attribute__((reqd_work_group_size(256, 1, 1)))
887 __kernel
888 void kernel_HistogramRectOneChannelReduction(
889  int n, // unused pixel redundancy
890  __global uint *histBuffer,
891  __global int* histResult) {
892 
893  // declare variables
894  // int channel = get_group_id(0)/HIST_SIZE;
895  int bin = get_group_id(0)%HIST_SIZE;
896  int value = 0;
897 
898  // accumulate in register
899  for (int i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
900  value += histBuffer[ bin*HIST_REDUNDANCY+i];
901  }
902 
903  // reduction in local memory
904  __local int localHist[GROUP_SIZE];
905  localHist[get_local_id(0)] = value;
906  barrier(CLK_LOCAL_MEM_FENCE);
907  for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
908  if (get_local_id(0) < stride) {
909  value = localHist[ get_local_id(0)+stride];
910  }
911  barrier(CLK_LOCAL_MEM_FENCE);
912  if (get_local_id(0) < stride) {
913  localHist[ get_local_id(0)] += value;
914  }
915  barrier(CLK_LOCAL_MEM_FENCE);
916  }
917 
918  // write reduction to final result
919  if (get_local_id(0) == 0) {
920  histResult[get_group_id(0)] = localHist[0];
921  }
922 } // kernel_HistogramRectOneChannelReduction
923 )
924 
925 // ThresholdRectToPix Kernel
926 // only supports 4 channels
927 // imageData is input image (24-bits/pixel)
928 // pix is output image (1-bit/pixel)
929 KERNEL(
930 \n#define CHAR_VEC_WIDTH 4 \n
931 \n#define PIXELS_PER_WORD 32 \n
932 \n#define PIXELS_PER_BURST 8 \n
933 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
934  typedef union {
935  uchar s[PIXELS_PER_BURST*NUM_CHANNELS];
936  uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH];
937  } charVec;
938 
939 __attribute__((reqd_work_group_size(256, 1, 1)))
940 __kernel
941 void kernel_ThresholdRectToPix(
942  __global const uchar4 *imageData,
943  int height,
944  int width,
945  int wpl, // words per line
946  __global int *thresholds,
947  __global int *hi_values,
948  __global int *pix) {
949 
950  // declare variables
951  int pThresholds[NUM_CHANNELS];
952  int pHi_Values[NUM_CHANNELS];
953  for (int i = 0; i < NUM_CHANNELS; i++) {
954  pThresholds[i] = thresholds[i];
955  pHi_Values[i] = hi_values[i];
956  }
957 
958  // for each word (32 pixels) in output image
959  for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) {
960  unsigned int word = 0; // all bits start at zero
961  // for each burst in word
962  for (int b = 0; b < BURSTS_PER_WORD; b++) {
963  // load burst
964  charVec pixels;
965  int offset = (w / wpl) * width;
966  offset += (w % wpl) * PIXELS_PER_WORD;
967  offset += b * PIXELS_PER_BURST;
968 
969  for (int i = 0; i < PIXELS_PER_BURST; ++i)
970  pixels.v[i] = imageData[offset + i];
971 
972  // for each pixel in burst
973  for (int p = 0; p < PIXELS_PER_BURST; p++) {
974  for (int c = 0; c < NUM_CHANNELS; c++) {
975  unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c];
976  if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
977  const uint kTopBit = 0x80000000;
978  word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
979  }
980  }
981  }
982  }
983  pix[w] = word;
984  }
985 }
986 
987 \n#define CHAR_VEC_WIDTH 8 \n
988 \n#define PIXELS_PER_WORD 32 \n
989 \n#define PIXELS_PER_BURST 8 \n
990 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
991  typedef union {
992  uchar s[PIXELS_PER_BURST*1];
993  uchar8 v[(PIXELS_PER_BURST*1)/CHAR_VEC_WIDTH];
994  } charVec1;
995 
996 __attribute__((reqd_work_group_size(256, 1, 1)))
997 __kernel
998 void kernel_ThresholdRectToPix_OneChan(
999  __global const uchar8 *imageData,
1000  int height,
1001  int width,
1002  int wpl, // words per line of output image
1003  __global int *thresholds,
1004  __global int *hi_values,
1005  __global int *pix) {
1006 
1007  // declare variables
1008  int pThresholds[1];
1009  int pHi_Values[1];
1010  for (int i = 0; i < 1; i++) {
1011  pThresholds[i] = thresholds[i];
1012  pHi_Values[i] = hi_values[i];
1013  }
1014 
1015  // for each word (32 pixels) in output image
1016  for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) {
1017  unsigned int word = 0; // all bits start at zero
1018 
1019  // for each burst in word
1020  for (int b = 0; b < BURSTS_PER_WORD; b++) {
1021 
1022  // load burst
1023  charVec1 pixels;
1024  // for each char8 in burst
1025  pixels.v[0] = imageData[
1026  w*BURSTS_PER_WORD
1027  + b
1028  + 0 ];
1029 
1030  // for each pixel in burst
1031  for (int p = 0; p < PIXELS_PER_BURST; p++) {
1032 
1033  //int littleEndianIdx = p ^ 3;
1034  //int bigEndianIdx = p;
1035  int idx =
1036 \n#ifdef __ENDIAN_LITTLE__\n
1037  p ^ 3;
1038 \n#else\n
1039  p;
1040 \n#endif\n
1041  unsigned char pixChan = pixels.s[idx];
1042  if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
1043  const uint kTopBit = 0x80000000;
1044  word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1045  }
1046  }
1047  }
1048  pix[w] = word;
1049  }
1050 }
1051 )
1052 
1053  ; // close char*
1054 
1055 #endif // USE_EXTERNAL_KERNEL
1056 #endif // TESSERACT_OPENCL_OCLKERNELS_H_
1057 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
uchar
unsigned char uchar
Definition: utfdef.h:8
KERNEL
#define KERNEL(...)
Definition: oclkernels.h:15
uint
unsigned int uint
Definition: utfdef.h:10