MagickCore  7.0.7
Convert, Edit, Or Compose Bitmap Images
accelerate-kernels-private.h
Go to the documentation of this file.
1 /*
2  Copyright 1999-2018 ImageMagick Studio LLC, a non-profit organization
3  dedicated to making software imaging solutions freely available.
4 
5  You may not use this file except in compliance with the License.
6  obtain a copy of the License at
7 
8  https://www.imagemagick.org/script/license.php
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  MagickCore private kernels for accelerated functions.
17 */
18 
19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
27 
28 /*
29  Define declarations.
30 */
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
37 
38 const char *accelerateKernels =
39 
40 /*
41  Define declarations.
42 */
43  OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
44  OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
45  OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
46  OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
47  OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
48  OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
49  OPENCL_DEFINE(SigmaRandom, (attenuate))
50  OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
51  OPENCL_DEFINE(MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
52  OPENCL_DEFINE(MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
53 
54 /*
55  Typedef declarations.
56 */
57  STRINGIFY(
58  typedef enum
59  {
61  CMYColorspace, /* negated linear RGB colorspace */
62  CMYKColorspace, /* CMY with Black separation */
63  GRAYColorspace, /* Single Channel greyscale (non-linear) image */
69  HSVColorspace, /* alias for HSB */
72  LCHColorspace, /* alias for LCHuv */
73  LCHabColorspace, /* Cylindrical (Polar) Lab */
74  LCHuvColorspace, /* Cylindrical (Polar) Luv */
81  RGBColorspace, /* Linear RGB colorspace */
82  scRGBColorspace, /* ??? */
83  sRGBColorspace, /* Default: non-linear sRGB colorspace */
86  XYZColorspace, /* IEEE Color Reference colorspace */
93  LinearGRAYColorspace /* Single Channel greyscale (linear) image */
95  )
96 
97  STRINGIFY(
98  typedef enum
99  {
173  )
174 
175  STRINGIFY(
176  typedef enum
177  {
183  } MagickFunction;
184  )
185 
186  STRINGIFY(
187  typedef enum
188  {
190  UniformNoise,
193  ImpulseNoise,
195  PoissonNoise,
197  } NoiseType;
198  )
199 
200  STRINGIFY(
201  typedef enum
202  {
214  )
215 
216  STRINGIFY(
217  typedef enum
218  {
236  )
237 
238  STRINGIFY(
239  typedef enum
240  {
241  UndefinedChannel = 0x0000,
242  RedChannel = 0x0001,
243  GrayChannel = 0x0001,
244  CyanChannel = 0x0001,
245  GreenChannel = 0x0002,
246  MagentaChannel = 0x0002,
247  BlueChannel = 0x0004,
248  YellowChannel = 0x0004,
249  BlackChannel = 0x0008,
250  AlphaChannel = 0x0010,
251  OpacityChannel = 0x0010,
252  IndexChannel = 0x0020, /* Color Index Table? */
253  ReadMaskChannel = 0x0040, /* Pixel is Not Readable? */
254  WriteMaskChannel = 0x0080, /* Pixel is Write Protected? */
255  MetaChannel = 0x0100, /* ???? */
256  CompositeChannels = 0x001F,
257  AllChannels = 0x7ffffff,
258  /*
259  Special purpose channel types.
260  FUTURE: are these needed any more - they are more like hacks
261  SyncChannels for example is NOT a real channel but a 'flag'
262  It really says -- "User has not defined channels"
263  Though it does have extra meaning in the "-auto-level" operator
264  */
265  TrueAlphaChannel = 0x0100, /* extract actual alpha channel from opacity */
266  RGBChannels = 0x0200, /* set alpha from grayscale mask in RGB */
267  GrayChannels = 0x0400,
268  SyncChannels = 0x20000, /* channels modified as a single unit */
270  } ChannelType; /* must correspond to PixelChannel */
271  )
272 
273 /*
274  Helper functions.
275 */
276 
277 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
278 
279  STRINGIFY(
280  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
281  {
282  return((CLQuantum) value);
283  }
284  )
285 
286 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
287 
288  STRINGIFY(
289  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
290  {
291  return((CLQuantum) (257.0f*value));
292  }
293  )
294 
295 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
296 
297  STRINGIFY(
298  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
299  {
300  return((CLQuantum) (16843009.0*value));
301  }
302  )
303 
304 OPENCL_ENDIF()
305 
306  STRINGIFY(
307  inline int ClampToCanvas(const int offset,const int range)
308  {
309  return clamp(offset, (int)0, range-1);
310  }
311  )
312 
313  STRINGIFY(
314  inline CLQuantum ClampToQuantum(const float value)
315  {
316  return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
317  }
318  )
319 
320  STRINGIFY(
321  inline uint ScaleQuantumToMap(CLQuantum value)
322  {
323  if (value >= (CLQuantum) MaxMap)
324  return ((uint)MaxMap);
325  else
326  return ((uint)value);
327  }
328  )
329 
330  STRINGIFY(
331  inline float PerceptibleReciprocal(const float x)
332  {
333  float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
334  return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
335  }
336  )
337 
338  STRINGIFY(
339  inline float RoundToUnity(const float value)
340  {
341  return clamp(value,0.0f,1.0f);
342  }
343  )
344 
345  STRINGIFY(
346 
347  inline unsigned int getPixelIndex(const unsigned int number_channels,
348  const unsigned int columns, const unsigned int x, const unsigned int y)
349  {
350  return (x * number_channels) + (y * columns * number_channels);
351  }
352 
353  inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
354  inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
355  inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
356  inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
357 
358  inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
359  inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
360  inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
361  inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
362 
363  inline CLQuantum getBlue(CLPixelType p) { return p.x; }
364  inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
365  inline float getBlueF4(float4 p) { return p.x; }
366  inline void setBlueF4(float4* p, float value) { (*p).x = value; }
367 
368  inline CLQuantum getGreen(CLPixelType p) { return p.y; }
369  inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
370  inline float getGreenF4(float4 p) { return p.y; }
371  inline void setGreenF4(float4* p, float value) { (*p).y = value; }
372 
373  inline CLQuantum getRed(CLPixelType p) { return p.z; }
374  inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
375  inline float getRedF4(float4 p) { return p.z; }
376  inline void setRedF4(float4* p, float value) { (*p).z = value; }
377 
378  inline CLQuantum getAlpha(CLPixelType p) { return p.w; }
379  inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
380  inline float getAlphaF4(float4 p) { return p.w; }
381  inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
382 
383  inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
384  const ChannelType channel, float *red, float *green, float *blue, float *alpha)
385  {
386  if ((channel & RedChannel) != 0)
387  *red=getPixelRed(p);
388 
389  if (number_channels > 2)
390  {
391  if ((channel & GreenChannel) != 0)
392  *green=getPixelGreen(p);
393 
394  if ((channel & BlueChannel) != 0)
395  *blue=getPixelBlue(p);
396  }
397 
398  if (((number_channels == 4) || (number_channels == 2)) &&
399  ((channel & AlphaChannel) != 0))
400  *alpha=getPixelAlpha(p,number_channels);
401  }
402 
403  inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels,
404  const unsigned int columns, const unsigned int x, const unsigned int y)
405  {
406  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
407 
408  float4 pixel;
409 
410  pixel.x=getPixelRed(p);
411 
412  if (number_channels > 2)
413  {
414  pixel.y=getPixelGreen(p);
415  pixel.z=getPixelBlue(p);
416  }
417 
418  if ((number_channels == 4) || (number_channels == 2))
419  pixel.w=getPixelAlpha(p,number_channels);
420  return(pixel);
421  }
422 
423  inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
424  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
425  {
426  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
427 
428  float red = 0.0f;
429  float green = 0.0f;
430  float blue = 0.0f;
431  float alpha = 0.0f;
432 
433  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
434  return (float4)(red, green, blue, alpha);
435  }
436 
437  inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels,
438  const ChannelType channel, float red, float green, float blue, float alpha)
439  {
440  if ((channel & RedChannel) != 0)
441  setPixelRed(p,ClampToQuantum(red));
442 
443  if (number_channels > 2)
444  {
445  if ((channel & GreenChannel) != 0)
446  setPixelGreen(p,ClampToQuantum(green));
447 
448  if ((channel & BlueChannel) != 0)
449  setPixelBlue(p,ClampToQuantum(blue));
450  }
451 
452  if (((number_channels == 4) || (number_channels == 2)) &&
453  ((channel & AlphaChannel) != 0))
454  setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
455  }
456 
457  inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels,
458  const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel)
459  {
460  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
461 
462  setPixelRed(p,ClampToQuantum(pixel.x));
463 
464  if (number_channels > 2)
465  {
466  setPixelGreen(p,ClampToQuantum(pixel.y));
467  setPixelBlue(p,ClampToQuantum(pixel.z));
468  }
469 
470  if ((number_channels == 4) || (number_channels == 2))
471  setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w));
472  }
473 
474  inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
475  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
476  float4 pixel)
477  {
478  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
479  WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
480  }
481 
482  inline float GetPixelIntensity(const unsigned int colorspace,
483  const unsigned int method,float red,float green,float blue)
484  {
485  float intensity;
486 
487  if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace))
488  return red;
489 
490  switch (method)
491  {
493  {
494  intensity=(red+green+blue)/3.0;
495  break;
496  }
498  {
499  intensity=MagickMax(MagickMax(red,green),blue);
500  break;
501  }
503  {
504  intensity=(MagickMin(MagickMin(red,green),blue)+
505  MagickMax(MagickMax(red,green),blue))/2.0;
506  break;
507  }
509  {
510  intensity=(float) (((float) red*red+green*green+blue*blue)/
511  (3.0*QuantumRange));
512  break;
513  }
515  {
516  /*
517  if (image->colorspace == RGBColorspace)
518  {
519  red=EncodePixelGamma(red);
520  green=EncodePixelGamma(green);
521  blue=EncodePixelGamma(blue);
522  }
523  */
524  intensity=0.298839*red+0.586811*green+0.114350*blue;
525  break;
526  }
528  {
529  /*
530  if (image->colorspace == sRGBColorspace)
531  {
532  red=DecodePixelGamma(red);
533  green=DecodePixelGamma(green);
534  blue=DecodePixelGamma(blue);
535  }
536  */
537  intensity=0.298839*red+0.586811*green+0.114350*blue;
538  break;
539  }
541  default:
542  {
543  /*
544  if (image->colorspace == RGBColorspace)
545  {
546  red=EncodePixelGamma(red);
547  green=EncodePixelGamma(green);
548  blue=EncodePixelGamma(blue);
549  }
550  */
551  intensity=0.212656*red+0.715158*green+0.072186*blue;
552  break;
553  }
555  {
556  /*
557  if (image->colorspace == sRGBColorspace)
558  {
559  red=DecodePixelGamma(red);
560  green=DecodePixelGamma(green);
561  blue=DecodePixelGamma(blue);
562  }
563  */
564  intensity=0.212656*red+0.715158*green+0.072186*blue;
565  break;
566  }
568  {
569  intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
570  sqrt(3.0));
571  break;
572  }
573  }
574 
575  return intensity;
576  }
577 
578  inline int mirrorBottom(int value)
579  {
580  return (value < 0) ? - (value) : value;
581  }
582 
583  inline int mirrorTop(int value, int width)
584  {
585  return (value >= width) ? (2 * width - value - 1) : value;
586  }
587  )
588 
589 /*
590 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
591 % %
592 % %
593 % %
594 % A d d N o i s e %
595 % %
596 % %
597 % %
598 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
599 */
600 
601  STRINGIFY(
602  /*
603  Part of MWC64X by David Thomas, dt10@imperial.ac.uk
604  This is provided under BSD, full license is with the main package.
605  See http://www.doc.ic.ac.uk/~dt10/research
606  */
607 
608  // Pre: a<M, b<M
609  // Post: r=(a+b) mod M
610  ulong MWC_AddMod64(ulong a, ulong b, ulong M)
611  {
612  ulong v=a+b;
613  //if( (v>=M) || (v<a) )
614  if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
615  v=v-M;
616  return v;
617  }
618 
619  // Pre: a<M,b<M
620  // Post: r=(a*b) mod M
621  // This could be done more efficently, but it is portable, and should
622  // be easy to understand. It can be replaced with any of the better
623  // modular multiplication algorithms (for example if you know you have
624  // double precision available or something).
625  ulong MWC_MulMod64(ulong a, ulong b, ulong M)
626  {
627  ulong r=0;
628  while(a!=0){
629  if(a&1)
630  r=MWC_AddMod64(r,b,M);
631  b=MWC_AddMod64(b,b,M);
632  a=a>>1;
633  }
634  return r;
635  }
636 
637  // Pre: a<M, e>=0
638  // Post: r=(a^b) mod M
639  // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
640  // most architectures
641  ulong MWC_PowMod64(ulong a, ulong e, ulong M)
642  {
643  ulong sqr=a, acc=1;
644  while(e!=0){
645  if(e&1)
646  acc=MWC_MulMod64(acc,sqr,M);
647  sqr=MWC_MulMod64(sqr,sqr,M);
648  e=e>>1;
649  }
650  return acc;
651  }
652 
653  uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
654  {
655  ulong m=MWC_PowMod64(A, distance, M);
656  ulong x=curr.x*(ulong)A+curr.y;
657  x=MWC_MulMod64(x, m, M);
658  return (uint2)((uint)(x/A), (uint)(x%A));
659  }
660 
661  uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
662  {
663  // This is an arbitrary constant for starting LCG jumping from. I didn't
664  // want to start from 1, as then you end up with the two or three first values
665  // being a bit poor in ones - once you've decided that, one constant is as
666  // good as any another. There is no deep mathematical reason for it, I just
667  // generated a random number.
668  enum{ MWC_BASEID = 4077358422479273989UL };
669 
670  ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
671  ulong m=MWC_PowMod64(A, dist, M);
672 
673  ulong x=MWC_MulMod64(MWC_BASEID, m, M);
674  return (uint2)((uint)(x/A), (uint)(x%A));
675  }
676 
678  typedef struct{ uint x; uint c; } mwc64x_state_t;
679 
680  enum{ MWC64X_A = 4294883355U };
681  enum{ MWC64X_M = 18446383549859758079UL };
682 
683  void MWC64X_Step(mwc64x_state_t *s)
684  {
685  uint X=s->x, C=s->c;
686 
687  uint Xn=MWC64X_A*X+C;
688  uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
689  uint Cn=mad_hi(MWC64X_A,X,carry);
690 
691  s->x=Xn;
692  s->c=Cn;
693  }
694 
695  void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
696  {
697  uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
698  s->x=tmp.x;
699  s->c=tmp.y;
700  }
701 
702  void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
703  {
704  uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
705  s->x=tmp.x;
706  s->c=tmp.y;
707  }
708 
710  uint MWC64X_NextUint(mwc64x_state_t *s)
711  {
712  uint res=s->x ^ s->c;
713  MWC64X_Step(s);
714  return res;
715  }
716 
717  //
718  // End of MWC64X excerpt
719  //
720 
721  float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
722  {
723  return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
724  }
725 
726  float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate)
727  {
728  float
729  alpha,
730  beta,
731  noise,
732  sigma;
733 
734  noise = 0.0f;
735  alpha=mwcReadPseudoRandomValue(r);
736  switch(noise_type)
737  {
738  case UniformNoise:
739  default:
740  {
741  noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
742  break;
743  }
744  case GaussianNoise:
745  {
746  float
747  gamma,
748  tau;
749 
750  if (alpha == 0.0f)
751  alpha=1.0f;
752  beta=mwcReadPseudoRandomValue(r);
753  gamma=sqrt(-2.0f*log(alpha));
754  sigma=gamma*cospi((2.0f*beta));
755  tau=gamma*sinpi((2.0f*beta));
756  noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau;
757  break;
758  }
759  case ImpulseNoise:
760  {
761  if (alpha < (SigmaImpulse/2.0f))
762  noise=0.0f;
763  else
764  if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
765  noise=QuantumRange;
766  else
767  noise=pixel;
768  break;
769  }
770  case LaplacianNoise:
771  {
772  if (alpha <= 0.5f)
773  {
774  if (alpha <= MagickEpsilon)
775  noise=(pixel-QuantumRange);
776  else
777  noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
778  0.5f);
779  break;
780  }
781  beta=1.0f-alpha;
782  if (beta <= (0.5f*MagickEpsilon))
783  noise=(pixel+QuantumRange);
784  else
785  noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
786  break;
787  }
789  {
790  sigma=1.0f;
791  if (alpha > MagickEpsilon)
792  sigma=sqrt(-2.0f*log(alpha));
793  beta=mwcReadPseudoRandomValue(r);
794  noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma*
795  cospi((2.0f*beta))/2.0f);
796  break;
797  }
798  case PoissonNoise:
799  {
800  float
801  poisson;
802  unsigned int i;
803  poisson=exp(-SigmaPoisson*QuantumScale*pixel);
804  for (i=0; alpha > poisson; i++)
805  {
806  beta=mwcReadPseudoRandomValue(r);
807  alpha*=beta;
808  }
809  noise=(QuantumRange*i/SigmaPoisson);
810  break;
811  }
812  case RandomNoise:
813  {
814  noise=(QuantumRange*SigmaRandom*alpha);
815  break;
816  }
817  }
818  return noise;
819  }
820 
821  __kernel
822  void AddNoise(const __global CLQuantum *image,
823  const unsigned int number_channels,const ChannelType channel,
824  const unsigned int length,const unsigned int pixelsPerWorkItem,
825  const NoiseType noise_type,const float attenuate,const unsigned int seed0,
826  const unsigned int seed1,const unsigned int numRandomNumbersPerPixel,
827  __global CLQuantum *filteredImage)
828  {
829  mwc64x_state_t rng;
830  rng.x = seed0;
831  rng.c = seed1;
832 
833  uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
834  uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
835  MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
836 
837  uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
838  uint count = pixelsPerWorkItem;
839 
840  while (count > 0 && pos < length)
841  {
842  const __global CLQuantum *p = image + pos;
843  __global CLQuantum *q = filteredImage + pos;
844 
845  float red;
846  float green;
847  float blue;
848  float alpha;
849 
850  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
851 
852  if ((channel & RedChannel) != 0)
853  red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
854 
855  if (number_channels > 2)
856  {
857  if ((channel & GreenChannel) != 0)
858  green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
859 
860  if ((channel & BlueChannel) != 0)
861  blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
862  }
863 
864  if (((number_channels == 4) || (number_channels == 2)) &&
865  ((channel & AlphaChannel) != 0))
866  alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
867 
868  WriteChannels(q, number_channels, channel, red, green, blue, alpha);
869 
870  pos += (get_local_size(0) * number_channels);
871  count--;
872  }
873  }
874  )
875 
876 /*
877 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
878 % %
879 % %
880 % %
881 % B l u r %
882 % %
883 % %
884 % %
885 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
886 */
887 
888  STRINGIFY(
889  /*
890  Reduce image noise and reduce detail levels by row
891  */
892  __kernel void BlurRow(const __global CLQuantum *image,
893  const unsigned int number_channels,const ChannelType channel,
894  __constant float *filter,const unsigned int width,
895  const unsigned int imageColumns,const unsigned int imageRows,
896  __local float4 *temp,__global float4 *tempImage)
897  {
898  const int x = get_global_id(0);
899  const int y = get_global_id(1);
900 
901  const int columns = imageColumns;
902 
903  const unsigned int radius = (width-1)/2;
904  const int wsize = get_local_size(0);
905  const unsigned int loadSize = wsize+width;
906 
907  //group coordinate
908  const int groupX=get_local_size(0)*get_group_id(0);
909 
910  //parallel load and clamp
911  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
912  {
913  int cx = ClampToCanvas(i + groupX - radius, columns);
914  temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
915  }
916 
917  // barrier
918  barrier(CLK_LOCAL_MEM_FENCE);
919 
920  // only do the work if this is not a patched item
921  if (get_global_id(0) < columns)
922  {
923  // compute
924  float4 result = (float4) 0;
925 
926  int i = 0;
927 
928  for ( ; i+7 < width; )
929  {
930  for (int j=0; j < 8; j++)
931  result+=filter[i+j]*temp[i+j+get_local_id(0)];
932  i+=8;
933  }
934 
935  for ( ; i < width; i++)
936  result+=filter[i]*temp[i+get_local_id(0)];
937 
938  // write back to global
939  tempImage[y*columns+x] = result;
940  }
941  }
942  )
943 
944  STRINGIFY(
945  /*
946  Reduce image noise and reduce detail levels by line
947  */
948  __kernel void BlurColumn(const __global float4 *blurRowData,
949  const unsigned int number_channels,const ChannelType channel,
950  __constant float *filter,const unsigned int width,
951  const unsigned int imageColumns,const unsigned int imageRows,
952  __local float4 *temp,__global CLQuantum *filteredImage)
953  {
954  const int x = get_global_id(0);
955  const int y = get_global_id(1);
956 
957  const int columns = imageColumns;
958  const int rows = imageRows;
959 
960  unsigned int radius = (width-1)/2;
961  const int wsize = get_local_size(1);
962  const unsigned int loadSize = wsize+width;
963 
964  //group coordinate
965  const int groupX=get_local_size(0)*get_group_id(0);
966  const int groupY=get_local_size(1)*get_group_id(1);
967  //notice that get_local_size(0) is 1, so
968  //groupX=get_group_id(0);
969 
970  //parallel load and clamp
971  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
972  temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
973 
974  // barrier
975  barrier(CLK_LOCAL_MEM_FENCE);
976 
977  // only do the work if this is not a patched item
978  if (get_global_id(1) < rows)
979  {
980  // compute
981  float4 result = (float4) 0;
982 
983  int i = 0;
984 
985  for ( ; i+7 < width; )
986  {
987  for (int j=0; j < 8; j++)
988  result+=filter[i+j]*temp[i+j+get_local_id(1)];
989  i+=8;
990  }
991 
992  for ( ; i < width; i++)
993  result+=filter[i]*temp[i+get_local_id(1)];
994 
995  // write back to global
996  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
997  }
998  }
999  )
1000 
1001 /*
1002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1003 % %
1004 % %
1005 % %
1006 % C o n t r a s t %
1007 % %
1008 % %
1009 % %
1010 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1011 */
1012 
1013  STRINGIFY(
1014 
1015  inline float4 ConvertRGBToHSB(const float4 pixel)
1016  {
1017  float4 result=0.0f;
1018  result.w=pixel.w;
1019  float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z);
1020  if (tmax != 0.0f)
1021  {
1022  float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z);
1023  float delta=tmax-tmin;
1024 
1025  result.y=delta/tmax;
1026  result.z=QuantumScale*tmax;
1027  if (delta != 0.0f)
1028  {
1029  result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
1030  result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
1031  (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
1032  result.x/=6.0f;
1033  result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
1034  }
1035  }
1036  return(result);
1037  }
1038 
1039  inline float4 ConvertHSBToRGB(const float4 pixel)
1040  {
1041  float hue=pixel.x;
1042  float saturation=pixel.y;
1043  float brightness=pixel.z;
1044 
1045  float4 result=pixel;
1046 
1047  if (saturation == 0.0f)
1048  {
1049  result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness);
1050  }
1051  else
1052  {
1053  float h=6.0f*(hue-floor(hue));
1054  float f=h-floor(h);
1055  float p=brightness*(1.0f-saturation);
1056  float q=brightness*(1.0f-saturation*f);
1057  float t=brightness*(1.0f-(saturation*(1.0f-f)));
1058  int ih = (int)h;
1059 
1060  if (ih == 1)
1061  {
1062  result.x=ClampToQuantum(QuantumRange*q);
1063  result.y=ClampToQuantum(QuantumRange*brightness);
1064  result.z=ClampToQuantum(QuantumRange*p);
1065  }
1066  else if (ih == 2)
1067  {
1068  result.x=ClampToQuantum(QuantumRange*p);
1069  result.y=ClampToQuantum(QuantumRange*brightness);
1070  result.z=ClampToQuantum(QuantumRange*t);
1071  }
1072  else if (ih == 3)
1073  {
1074  result.x=ClampToQuantum(QuantumRange*p);
1075  result.y=ClampToQuantum(QuantumRange*q);
1076  result.z=ClampToQuantum(QuantumRange*brightness);
1077  }
1078  else if (ih == 4)
1079  {
1080  result.x=ClampToQuantum(QuantumRange*t);
1081  result.y=ClampToQuantum(QuantumRange*p);
1082  result.z=ClampToQuantum(QuantumRange*brightness);
1083  }
1084  else if (ih == 5)
1085  {
1086  result.x=ClampToQuantum(QuantumRange*brightness);
1087  result.y=ClampToQuantum(QuantumRange*p);
1088  result.z=ClampToQuantum(QuantumRange*q);
1089  }
1090  else
1091  {
1092  result.x=ClampToQuantum(QuantumRange*brightness);
1093  result.y=ClampToQuantum(QuantumRange*t);
1094  result.z=ClampToQuantum(QuantumRange*p);
1095  }
1096  }
1097  return(result);
1098  }
1099 
1100  __kernel void Contrast(__global CLQuantum *image,
1101  const unsigned int number_channels,const int sign)
1102  {
1103  const int x=get_global_id(0);
1104  const int y=get_global_id(1);
1105  const unsigned int columns=get_global_size(0);
1106 
1107  float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
1108  if (number_channels < 3)
1109  pixel.y=pixel.z=pixel.x;
1110 
1111  pixel=ConvertRGBToHSB(pixel);
1112  float brightness=pixel.z;
1113  brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1114  brightness=clamp(brightness,0.0f,1.0f);
1115  pixel.z=brightness;
1116  pixel=ConvertHSBToRGB(pixel);
1117 
1118  WriteAllChannels(image,number_channels,columns,x,y,pixel);
1119  }
1120  )
1121 
1122 /*
1123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1124 % %
1125 % %
1126 % %
1127 % C o n t r a s t S t r e t c h %
1128 % %
1129 % %
1130 % %
1131 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1132 */
1133 
1134  STRINGIFY(
1135  /*
1136  */
1137  __kernel void Histogram(__global CLPixelType * restrict im,
1138  const ChannelType channel,
1139  const unsigned int colorspace,
1140  const unsigned int method,
1141  __global uint4 * restrict histogram)
1142  {
1143  const int x = get_global_id(0);
1144  const int y = get_global_id(1);
1145  const int columns = get_global_size(0);
1146  const int c = x + y * columns;
1147  if ((channel & SyncChannels) != 0)
1148  {
1149  float red=(float)getRed(im[c]);
1150  float green=(float)getGreen(im[c]);
1151  float blue=(float)getBlue(im[c]);
1152 
1153  float intensity = GetPixelIntensity(colorspace, method, red, green, blue);
1154  uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1155  atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
1156  }
1157  else
1158  {
1159  // for equalizing, we always need all channels?
1160  // otherwise something more
1161  }
1162  }
1163  )
1164 
1165  STRINGIFY(
1166  /*
1167  */
1168  __kernel void ContrastStretch(__global CLPixelType * restrict im,
1169  const ChannelType channel,
1170  __global CLPixelType * restrict stretch_map,
1171  const float4 white, const float4 black)
1172  {
1173  const int x = get_global_id(0);
1174  const int y = get_global_id(1);
1175  const int columns = get_global_size(0);
1176  const int c = x + y * columns;
1177 
1178  uint ePos;
1179  CLPixelType oValue, eValue;
1180  CLQuantum red, green, blue, alpha;
1181 
1182  //read from global
1183  oValue=im[c];
1184 
1185  if ((channel & RedChannel) != 0)
1186  {
1187  if (getRedF4(white) != getRedF4(black))
1188  {
1189  ePos = ScaleQuantumToMap(getRed(oValue));
1190  eValue = stretch_map[ePos];
1191  red = getRed(eValue);
1192  }
1193  }
1194 
1195  if ((channel & GreenChannel) != 0)
1196  {
1197  if (getGreenF4(white) != getGreenF4(black))
1198  {
1199  ePos = ScaleQuantumToMap(getGreen(oValue));
1200  eValue = stretch_map[ePos];
1201  green = getGreen(eValue);
1202  }
1203  }
1204 
1205  if ((channel & BlueChannel) != 0)
1206  {
1207  if (getBlueF4(white) != getBlueF4(black))
1208  {
1209  ePos = ScaleQuantumToMap(getBlue(oValue));
1210  eValue = stretch_map[ePos];
1211  blue = getBlue(eValue);
1212  }
1213  }
1214 
1215  if ((channel & AlphaChannel) != 0)
1216  {
1217  if (getAlphaF4(white) != getAlphaF4(black))
1218  {
1219  ePos = ScaleQuantumToMap(getAlpha(oValue));
1220  eValue = stretch_map[ePos];
1221  alpha = getAlpha(eValue);
1222  }
1223  }
1224 
1225  //write back
1226  im[c]=(CLPixelType)(blue, green, red, alpha);
1227 
1228  }
1229  )
1230 
1231 /*
1232 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1233 % %
1234 % %
1235 % %
1236 % C o n v o l v e %
1237 % %
1238 % %
1239 % %
1240 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1241 */
1242 
1243  STRINGIFY(
1244  __kernel
1245  void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
1246  const unsigned int imageWidth, const unsigned int imageHeight,
1247  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1248  const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
1249 
1250  int2 blockID;
1251  blockID.x = get_global_id(0) / get_local_size(0);
1252  blockID.y = get_global_id(1) / get_local_size(1);
1253 
1254  // image area processed by this workgroup
1255  int2 imageAreaOrg;
1256  imageAreaOrg.x = blockID.x * get_local_size(0);
1257  imageAreaOrg.y = blockID.y * get_local_size(1);
1258 
1259  int2 midFilterDimen;
1260  midFilterDimen.x = (filterWidth-1)/2;
1261  midFilterDimen.y = (filterHeight-1)/2;
1262 
1263  int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1264 
1265  // dimension of the local cache
1266  int2 cachedAreaDimen;
1267  cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1268  cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1269 
1270  // cache the pixels accessed by this workgroup in local memory
1271  int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1272  int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1273  int groupSize = get_local_size(0) * get_local_size(1);
1274  for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1275 
1276  int2 cachedAreaIndex;
1277  cachedAreaIndex.x = i % cachedAreaDimen.x;
1278  cachedAreaIndex.y = i / cachedAreaDimen.x;
1279 
1280  int2 imagePixelIndex;
1281  imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1282 
1283  // only support EdgeVirtualPixelMethod through ClampToCanvas
1284  // TODO: implement other virtual pixel method
1285  imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1286  imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1287 
1288  pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1289  }
1290 
1291  // cache the filter
1292  for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1293  filterCache[i] = filter[i];
1294  }
1295  barrier(CLK_LOCAL_MEM_FENCE);
1296 
1297 
1298  int2 imageIndex;
1299  imageIndex.x = imageAreaOrg.x + get_local_id(0);
1300  imageIndex.y = imageAreaOrg.y + get_local_id(1);
1301 
1302  // if out-of-range, stops here and quit
1303  if (imageIndex.x >= imageWidth
1304  || imageIndex.y >= imageHeight) {
1305  return;
1306  }
1307 
1308  int filterIndex = 0;
1309  float4 sum = (float4)0.0f;
1310  float gamma = 0.0f;
1311  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1312  int cacheIndexY = get_local_id(1);
1313  for (int j = 0; j < filterHeight; j++) {
1314  int cacheIndexX = get_local_id(0);
1315  for (int i = 0; i < filterWidth; i++) {
1316  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1317  float f = filterCache[filterIndex];
1318 
1319  sum.x += f * p.x;
1320  sum.y += f * p.y;
1321  sum.z += f * p.z;
1322  sum.w += f * p.w;
1323 
1324  gamma += f;
1325  filterIndex++;
1326  cacheIndexX++;
1327  }
1328  cacheIndexY++;
1329  }
1330  }
1331  else {
1332  int cacheIndexY = get_local_id(1);
1333  for (int j = 0; j < filterHeight; j++) {
1334  int cacheIndexX = get_local_id(0);
1335  for (int i = 0; i < filterWidth; i++) {
1336 
1337  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1338  float alpha = QuantumScale*p.w;
1339  float f = filterCache[filterIndex];
1340  float g = alpha * f;
1341 
1342  sum.x += g*p.x;
1343  sum.y += g*p.y;
1344  sum.z += g*p.z;
1345  sum.w += f*p.w;
1346 
1347  gamma += g;
1348  filterIndex++;
1349  cacheIndexX++;
1350  }
1351  cacheIndexY++;
1352  }
1353  gamma = PerceptibleReciprocal(gamma);
1354  sum.xyz = gamma*sum.xyz;
1355  }
1356  CLPixelType outputPixel;
1357  outputPixel.x = ClampToQuantum(sum.x);
1358  outputPixel.y = ClampToQuantum(sum.y);
1359  outputPixel.z = ClampToQuantum(sum.z);
1360  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1361 
1362  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1363  }
1364  )
1365 
1366  STRINGIFY(
1367  __kernel
1368  void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
1369  const uint imageWidth, const uint imageHeight,
1370  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1371  const uint matte, const ChannelType channel) {
1372 
1373  int2 imageIndex;
1374  imageIndex.x = get_global_id(0);
1375  imageIndex.y = get_global_id(1);
1376 
1377  /*
1378  unsigned int imageWidth = get_global_size(0);
1379  unsigned int imageHeight = get_global_size(1);
1380  */
1381  if (imageIndex.x >= imageWidth
1382  || imageIndex.y >= imageHeight)
1383  return;
1384 
1385  int2 midFilterDimen;
1386  midFilterDimen.x = (filterWidth-1)/2;
1387  midFilterDimen.y = (filterHeight-1)/2;
1388 
1389  int filterIndex = 0;
1390  float4 sum = (float4)0.0f;
1391  float gamma = 0.0f;
1392  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1393  for (int j = 0; j < filterHeight; j++) {
1394  int2 inputPixelIndex;
1395  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1396  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1397  for (int i = 0; i < filterWidth; i++) {
1398  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1399  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1400 
1401  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1402  float f = filter[filterIndex];
1403 
1404  sum.x += f * p.x;
1405  sum.y += f * p.y;
1406  sum.z += f * p.z;
1407  sum.w += f * p.w;
1408 
1409  gamma += f;
1410 
1411  filterIndex++;
1412  }
1413  }
1414  }
1415  else {
1416 
1417  for (int j = 0; j < filterHeight; j++) {
1418  int2 inputPixelIndex;
1419  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1420  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1421  for (int i = 0; i < filterWidth; i++) {
1422  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1423  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1424 
1425  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1426  float alpha = QuantumScale*p.w;
1427  float f = filter[filterIndex];
1428  float g = alpha * f;
1429 
1430  sum.x += g*p.x;
1431  sum.y += g*p.y;
1432  sum.z += g*p.z;
1433  sum.w += f*p.w;
1434 
1435  gamma += g;
1436 
1437 
1438  filterIndex++;
1439  }
1440  }
1441  gamma = PerceptibleReciprocal(gamma);
1442  sum.xyz = gamma*sum.xyz;
1443  }
1444 
1445  CLPixelType outputPixel;
1446  outputPixel.x = ClampToQuantum(sum.x);
1447  outputPixel.y = ClampToQuantum(sum.y);
1448  outputPixel.z = ClampToQuantum(sum.z);
1449  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1450 
1451  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1452  }
1453  )
1454 
1455 /*
1456 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1457 % %
1458 % %
1459 % %
1460 % D e s p e c k l e %
1461 % %
1462 % %
1463 % %
1464 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1465 */
1466 
1467  STRINGIFY(
1468 
1469  __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1470  , const unsigned int imageWidth, const unsigned int imageHeight
1471  , const int2 offset, const int polarity, const int matte) {
1472 
1473  int x = get_global_id(0);
1474  int y = get_global_id(1);
1475 
1476  CLPixelType v = inputImage[y*imageWidth+x];
1477 
1478  int2 neighbor;
1479  neighbor.y = y + offset.y;
1480  neighbor.x = x + offset.x;
1481 
1482  int2 clampedNeighbor;
1483  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1484  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1485 
1486  CLPixelType r = (clampedNeighbor.x == neighbor.x
1487  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1488  :(CLPixelType)0;
1489 
1490  int sv[4];
1491  sv[0] = (int)v.x;
1492  sv[1] = (int)v.y;
1493  sv[2] = (int)v.z;
1494  sv[3] = (int)v.w;
1495 
1496  int sr[4];
1497  sr[0] = (int)r.x;
1498  sr[1] = (int)r.y;
1499  sr[2] = (int)r.z;
1500  sr[3] = (int)r.w;
1501 
1502  if (polarity > 0) {
1503  \n #pragma unroll 4\n
1504  for (unsigned int i = 0; i < 4; i++) {
1505  sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1506  }
1507  }
1508  else {
1509  \n #pragma unroll 4\n
1510  for (unsigned int i = 0; i < 4; i++) {
1511  sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1512  }
1513 
1514  }
1515 
1516  v.x = (CLQuantum)sv[0];
1517  v.y = (CLQuantum)sv[1];
1518  v.z = (CLQuantum)sv[2];
1519 
1520  if (matte!=0)
1521  v.w = (CLQuantum)sv[3];
1522 
1523  outputImage[y*imageWidth+x] = v;
1524 
1525  }
1526 
1527 
1528  )
1529 
1530  STRINGIFY(
1531 
1532  __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1533  , const unsigned int imageWidth, const unsigned int imageHeight
1534  , const int2 offset, const int polarity, const int matte) {
1535 
1536  int x = get_global_id(0);
1537  int y = get_global_id(1);
1538 
1539  CLPixelType v = inputImage[y*imageWidth+x];
1540 
1541  int2 neighbor, clampedNeighbor;
1542 
1543  neighbor.y = y + offset.y;
1544  neighbor.x = x + offset.x;
1545  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1546  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1547 
1548  CLPixelType r = (clampedNeighbor.x == neighbor.x
1549  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1550  :(CLPixelType)0;
1551 
1552 
1553  neighbor.y = y - offset.y;
1554  neighbor.x = x - offset.x;
1555  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1556  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1557 
1558  CLPixelType s = (clampedNeighbor.x == neighbor.x
1559  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1560  :(CLPixelType)0;
1561 
1562 
1563  int sv[4];
1564  sv[0] = (int)v.x;
1565  sv[1] = (int)v.y;
1566  sv[2] = (int)v.z;
1567  sv[3] = (int)v.w;
1568 
1569  int sr[4];
1570  sr[0] = (int)r.x;
1571  sr[1] = (int)r.y;
1572  sr[2] = (int)r.z;
1573  sr[3] = (int)r.w;
1574 
1575  int ss[4];
1576  ss[0] = (int)s.x;
1577  ss[1] = (int)s.y;
1578  ss[2] = (int)s.z;
1579  ss[3] = (int)s.w;
1580 
1581  if (polarity > 0) {
1582  \n #pragma unroll 4\n
1583  for (unsigned int i = 0; i < 4; i++) {
1584  //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1585  //
1586  //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1587  //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1588  sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1589  }
1590  }
1591  else {
1592  \n #pragma unroll 4\n
1593  for (unsigned int i = 0; i < 4; i++) {
1594  //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1595  //
1596  //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1597  sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1598  }
1599  }
1600 
1601  v.x = (CLQuantum)sv[0];
1602  v.y = (CLQuantum)sv[1];
1603  v.z = (CLQuantum)sv[2];
1604 
1605  if (matte!=0)
1606  v.w = (CLQuantum)sv[3];
1607 
1608  outputImage[y*imageWidth+x] = v;
1609 
1610  }
1611  )
1612 
1613 /*
1614 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1615 % %
1616 % %
1617 % %
1618 % E q u a l i z e %
1619 % %
1620 % %
1621 % %
1622 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1623 */
1624 
1625  STRINGIFY(
1626  /*
1627  */
1628  __kernel void Equalize(__global CLPixelType * restrict im,
1629  const ChannelType channel,
1630  __global CLPixelType * restrict equalize_map,
1631  const float4 white, const float4 black)
1632  {
1633  const int x = get_global_id(0);
1634  const int y = get_global_id(1);
1635  const int columns = get_global_size(0);
1636  const int c = x + y * columns;
1637 
1638  uint ePos;
1639  CLPixelType oValue, eValue;
1640  CLQuantum red, green, blue, alpha;
1641 
1642  //read from global
1643  oValue=im[c];
1644 
1645  if ((channel & SyncChannels) != 0)
1646  {
1647  if (getRedF4(white) != getRedF4(black))
1648  {
1649  ePos = ScaleQuantumToMap(getRed(oValue));
1650  eValue = equalize_map[ePos];
1651  red = getRed(eValue);
1652  ePos = ScaleQuantumToMap(getGreen(oValue));
1653  eValue = equalize_map[ePos];
1654  green = getRed(eValue);
1655  ePos = ScaleQuantumToMap(getBlue(oValue));
1656  eValue = equalize_map[ePos];
1657  blue = getRed(eValue);
1658  ePos = ScaleQuantumToMap(getAlpha(oValue));
1659  eValue = equalize_map[ePos];
1660  alpha = getRed(eValue);
1661 
1662  //write back
1663  im[c]=(CLPixelType)(blue, green, red, alpha);
1664  }
1665 
1666  }
1667 
1668  // for equalizing, we always need all channels?
1669  // otherwise something more
1670 
1671  }
1672  )
1673 
1674 /*
1675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1676 % %
1677 % %
1678 % %
1679 % F u n c t i o n %
1680 % %
1681 % %
1682 % %
1683 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1684 */
1685 
1686  STRINGIFY(
1687  /*
1688  apply FunctionImageChannel(braightness-contrast)
1689  */
1690  CLQuantum ApplyFunction(float pixel,const MagickFunction function,
1691  const unsigned int number_parameters,__constant float *parameters)
1692  {
1693  float result = 0.0f;
1694 
1695  switch (function)
1696  {
1697  case PolynomialFunction:
1698  {
1699  for (unsigned int i=0; i < number_parameters; i++)
1700  result = result*QuantumScale*pixel + parameters[i];
1701  result *= QuantumRange;
1702  break;
1703  }
1704  case SinusoidFunction:
1705  {
1706  float freq,phase,ampl,bias;
1707  freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1708  phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1709  ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1710  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1711  result = QuantumRange*(ampl*sin(2.0f*MagickPI*
1712  (freq*QuantumScale*pixel + phase/360.0f)) + bias);
1713  break;
1714  }
1715  case ArcsinFunction:
1716  {
1717  float width,range,center,bias;
1718  width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1719  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1720  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1721  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1722 
1723  result = 2.0f/width*(QuantumScale*pixel - center);
1724  result = range/MagickPI*asin(result)+bias;
1725  result = ( result <= -1.0f ) ? bias - range/2.0f : result;
1726  result = ( result >= 1.0f ) ? bias + range/2.0f : result;
1727  result *= QuantumRange;
1728  break;
1729  }
1730  case ArctanFunction:
1731  {
1732  float slope,range,center,bias;
1733  slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1734  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1735  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1736  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1737  result = MagickPI*slope*(QuantumScale*pixel-center);
1738  result = QuantumRange*(range/MagickPI*atan(result) + bias);
1739  break;
1740  }
1741  case UndefinedFunction:
1742  break;
1743  }
1744  return(ClampToQuantum(result));
1745  }
1746  )
1747 
1748  STRINGIFY(
1749  /*
1750  Improve brightness / contrast of the image
1751  channel : define which channel is improved
1752  function : the function called to enchance the brightness contrast
1753  number_parameters : numbers of parameters
1754  parameters : the parameter
1755  */
1756  __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
1757  const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
1758  __constant float *parameters)
1759  {
1760  const unsigned int x = get_global_id(0);
1761  const unsigned int y = get_global_id(1);
1762  const unsigned int columns = get_global_size(0);
1763  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1764 
1765  float red;
1766  float green;
1767  float blue;
1768  float alpha;
1769 
1770  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
1771 
1772  if ((channel & RedChannel) != 0)
1773  red=ApplyFunction(red, function, number_parameters, parameters);
1774 
1775  if (number_channels > 2)
1776  {
1777  if ((channel & GreenChannel) != 0)
1778  green=ApplyFunction(green, function, number_parameters, parameters);
1779 
1780  if ((channel & BlueChannel) != 0)
1781  blue=ApplyFunction(blue, function, number_parameters, parameters);
1782  }
1783 
1784  if (((number_channels == 4) || (number_channels == 2)) &&
1785  ((channel & AlphaChannel) != 0))
1786  alpha=ApplyFunction(alpha, function, number_parameters, parameters);
1787 
1788  WriteChannels(p, number_channels, channel, red, green, blue, alpha);
1789  }
1790  )
1791 
1792 /*
1793 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1794 % %
1795 % %
1796 % %
1797 % G r a y s c a l e %
1798 % %
1799 % %
1800 % %
1801 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1802 */
1803 
1804  STRINGIFY(
1805  __kernel void Grayscale(__global CLQuantum *image,const int number_channels,
1806  const unsigned int colorspace,const unsigned int method)
1807  {
1808  const unsigned int x = get_global_id(0);
1809  const unsigned int y = get_global_id(1);
1810  const unsigned int columns = get_global_size(0);
1811  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1812 
1813  float
1814  blue,
1815  green,
1816  red;
1817 
1818  red=getPixelRed(p);
1819  green=getPixelGreen(p);
1820  blue=getPixelBlue(p);
1821 
1822  CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue));
1823 
1824  setPixelRed(p,intensity);
1825  setPixelGreen(p,intensity);
1826  setPixelBlue(p,intensity);
1827  }
1828  )
1829 
1830 /*
1831 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1832 % %
1833 % %
1834 % %
1835 % L o c a l C o n t r a s t %
1836 % %
1837 % %
1838 % %
1839 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1840 */
1841 
1842  STRINGIFY(
1843 
1844  __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
1845  const int radius,
1846  const int imageWidth,
1847  const int imageHeight)
1848  {
1849  const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
1850 
1851  int x = get_local_id(0);
1852  int y = get_global_id(1);
1853 
1854  if ((x >= imageWidth) || (y >= imageHeight))
1855  return;
1856 
1857  global CLPixelType *src = srcImage + y * imageWidth;
1858 
1859  for (int i = x; i < imageWidth; i += get_local_size(0)) {
1860  float sum = 0.0f;
1861  float weight = 1.0f;
1862 
1863  int j = i - radius;
1864  while ((j + 7) < i) {
1865  for (int k = 0; k < 8; ++k) // Unroll 8x
1866  sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
1867  weight += 8.0f;
1868  j+=8;
1869  }
1870  while (j < i) {
1871  sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
1872  weight += 1.0f;
1873  ++j;
1874  }
1875 
1876  while ((j + 7) < radius + i) {
1877  for (int k = 0; k < 8; ++k) // Unroll 8x
1878  sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
1879  weight -= 8.0f;
1880  j+=8;
1881  }
1882  while (j < radius + i) {
1883  sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
1884  weight -= 1.0f;
1885  ++j;
1886  }
1887 
1888  tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
1889  }
1890  }
1891  )
1892 
1893  STRINGIFY(
1894  __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
1895  const int radius,
1896  const float strength,
1897  const int imageWidth,
1898  const int imageHeight)
1899  {
1900  const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
1901 
1902  int x = get_global_id(0);
1903  int y = get_global_id(1);
1904 
1905  if ((x >= imageWidth) || (y >= imageHeight))
1906  return;
1907 
1908  global float *src = blurImage + x;
1909 
1910  float sum = 0.0f;
1911  float weight = 1.0f;
1912 
1913  int j = y - radius;
1914  while ((j + 7) < y) {
1915  for (int k = 0; k < 8; ++k) // Unroll 8x
1916  sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
1917  weight += 8.0f;
1918  j+=8;
1919  }
1920  while (j < y) {
1921  sum += weight * src[mirrorBottom(j) * imageWidth];
1922  weight += 1.0f;
1923  ++j;
1924  }
1925 
1926  while ((j + 7) < radius + y) {
1927  for (int k = 0; k < 8; ++k) // Unroll 8x
1928  sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
1929  weight -= 8.0f;
1930  j+=8;
1931  }
1932  while (j < radius + y) {
1933  sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
1934  weight -= 1.0f;
1935  ++j;
1936  }
1937 
1938  CLPixelType pixel = srcImage[x + y * imageWidth];
1939  float srcVal = dot(RGB, convert_float4(pixel));
1940  float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
1941  mult = (srcVal + mult) / srcVal;
1942 
1943  pixel.x = ClampToQuantum(pixel.x * mult);
1944  pixel.y = ClampToQuantum(pixel.y * mult);
1945  pixel.z = ClampToQuantum(pixel.z * mult);
1946 
1947  dstImage[x + y * imageWidth] = pixel;
1948  }
1949  )
1950 
1951 /*
1952 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1953 % %
1954 % %
1955 % %
1956 % M o d u l a t e %
1957 % %
1958 % %
1959 % %
1960 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1961 */
1962 
1963  STRINGIFY(
1964 
1965  inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1966  float *hue, float *saturation, float *lightness)
1967  {
1968  float
1969  c,
1970  tmax,
1971  tmin;
1972 
1973  /*
1974  Convert RGB to HSL colorspace.
1975  */
1976  tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
1977  tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
1978 
1979  c=tmax-tmin;
1980 
1981  *lightness=(tmax+tmin)/2.0;
1982  if (c <= 0.0)
1983  {
1984  *hue=0.0;
1985  *saturation=0.0;
1986  return;
1987  }
1988 
1989  if (tmax == (QuantumScale*red))
1990  {
1991  *hue=(QuantumScale*green-QuantumScale*blue)/c;
1992  if ((QuantumScale*green) < (QuantumScale*blue))
1993  *hue+=6.0;
1994  }
1995  else
1996  if (tmax == (QuantumScale*green))
1997  *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
1998  else
1999  *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2000 
2001  *hue*=60.0/360.0;
2002  if (*lightness <= 0.5)
2003  *saturation=c/(2.0*(*lightness));
2004  else
2005  *saturation=c/(2.0-2.0*(*lightness));
2006  }
2007 
2008  inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
2009  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2010  {
2011  float
2012  b,
2013  c,
2014  g,
2015  h,
2016  tmin,
2017  r,
2018  x;
2019 
2020  /*
2021  Convert HSL to RGB colorspace.
2022  */
2023  h=hue*360.0;
2024  if (lightness <= 0.5)
2025  c=2.0*lightness*saturation;
2026  else
2027  c=(2.0-2.0*lightness)*saturation;
2028  tmin=lightness-0.5*c;
2029  h-=360.0*floor(h/360.0);
2030  h/=60.0;
2031  x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2032  switch ((int) floor(h) % 6)
2033  {
2034  case 0:
2035  {
2036  r=tmin+c;
2037  g=tmin+x;
2038  b=tmin;
2039  break;
2040  }
2041  case 1:
2042  {
2043  r=tmin+x;
2044  g=tmin+c;
2045  b=tmin;
2046  break;
2047  }
2048  case 2:
2049  {
2050  r=tmin;
2051  g=tmin+c;
2052  b=tmin+x;
2053  break;
2054  }
2055  case 3:
2056  {
2057  r=tmin;
2058  g=tmin+x;
2059  b=tmin+c;
2060  break;
2061  }
2062  case 4:
2063  {
2064  r=tmin+x;
2065  g=tmin;
2066  b=tmin+c;
2067  break;
2068  }
2069  case 5:
2070  {
2071  r=tmin+c;
2072  g=tmin;
2073  b=tmin+x;
2074  break;
2075  }
2076  default:
2077  {
2078  r=0.0;
2079  g=0.0;
2080  b=0.0;
2081  }
2082  }
2083  *red=ClampToQuantum(QuantumRange*r);
2084  *green=ClampToQuantum(QuantumRange*g);
2085  *blue=ClampToQuantum(QuantumRange*b);
2086  }
2087 
2088  inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
2089  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2090  {
2091  float
2092  hue,
2093  lightness,
2094  saturation;
2095 
2096  /*
2097  Increase or decrease color lightness, saturation, or hue.
2098  */
2099  ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2100  hue+=0.5*(0.01*percent_hue-1.0);
2101  while (hue < 0.0)
2102  hue+=1.0;
2103  while (hue >= 1.0)
2104  hue-=1.0;
2105  saturation*=0.01*percent_saturation;
2106  lightness*=0.01*percent_lightness;
2107  ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2108  }
2109 
2110  __kernel void Modulate(__global CLPixelType *im,
2111  const float percent_brightness,
2112  const float percent_hue,
2113  const float percent_saturation,
2114  const int colorspace)
2115  {
2116 
2117  const int x = get_global_id(0);
2118  const int y = get_global_id(1);
2119  const int columns = get_global_size(0);
2120  const int c = x + y * columns;
2121 
2122  CLPixelType pixel = im[c];
2123 
2124  CLQuantum
2125  blue,
2126  green,
2127  red;
2128 
2129  red=getRed(pixel);
2130  green=getGreen(pixel);
2131  blue=getBlue(pixel);
2132 
2133  switch (colorspace)
2134  {
2135  case HSLColorspace:
2136  default:
2137  {
2138  ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2139  &red, &green, &blue);
2140  }
2141 
2142  }
2143 
2144  CLPixelType filteredPixel;
2145 
2146  setRed(&filteredPixel, red);
2147  setGreen(&filteredPixel, green);
2148  setBlue(&filteredPixel, blue);
2149  filteredPixel.w = pixel.w;
2150 
2151  im[c] = filteredPixel;
2152  }
2153  )
2154 
2155 /*
2156 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2157 % %
2158 % %
2159 % %
2160 % M o t i o n B l u r %
2161 % %
2162 % %
2163 % %
2164 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2165 */
2166 
2167  STRINGIFY(
2168  __kernel
2169  void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2170  const unsigned int imageWidth, const unsigned int imageHeight,
2171  const __global float *filter, const unsigned int width, const __global int2* offset,
2172  const float4 bias,
2173  const ChannelType channel, const unsigned int matte) {
2174 
2175  int2 currentPixel;
2176  currentPixel.x = get_global_id(0);
2177  currentPixel.y = get_global_id(1);
2178 
2179  if (currentPixel.x >= imageWidth
2180  || currentPixel.y >= imageHeight)
2181  return;
2182 
2183  float4 pixel;
2184  pixel.x = (float)bias.x;
2185  pixel.y = (float)bias.y;
2186  pixel.z = (float)bias.z;
2187  pixel.w = (float)bias.w;
2188 
2189  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
2190 
2191  for (int i = 0; i < width; i++) {
2192  // only support EdgeVirtualPixelMethod through ClampToCanvas
2193  // TODO: implement other virtual pixel method
2194  int2 samplePixel = currentPixel + offset[i];
2195  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2196  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2197  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2198 
2199  pixel.x += (filter[i] * (float)samplePixelValue.x);
2200  pixel.y += (filter[i] * (float)samplePixelValue.y);
2201  pixel.z += (filter[i] * (float)samplePixelValue.z);
2202  pixel.w += (filter[i] * (float)samplePixelValue.w);
2203  }
2204 
2205  CLPixelType outputPixel;
2206  outputPixel.x = ClampToQuantum(pixel.x);
2207  outputPixel.y = ClampToQuantum(pixel.y);
2208  outputPixel.z = ClampToQuantum(pixel.z);
2209  outputPixel.w = ClampToQuantum(pixel.w);
2210  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2211  }
2212  else {
2213 
2214  float gamma = 0.0f;
2215  for (int i = 0; i < width; i++) {
2216  // only support EdgeVirtualPixelMethod through ClampToCanvas
2217  // TODO: implement other virtual pixel method
2218  int2 samplePixel = currentPixel + offset[i];
2219  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2220  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2221 
2222  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2223 
2224  float alpha = QuantumScale*samplePixelValue.w;
2225  float k = filter[i];
2226  pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2227  pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2228  pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2229 
2230  pixel.w += k * alpha * samplePixelValue.w;
2231 
2232  gamma+=k*alpha;
2233  }
2234  gamma = PerceptibleReciprocal(gamma);
2235  pixel.xyz = gamma*pixel.xyz;
2236 
2237  CLPixelType outputPixel;
2238  outputPixel.x = ClampToQuantum(pixel.x);
2239  outputPixel.y = ClampToQuantum(pixel.y);
2240  outputPixel.z = ClampToQuantum(pixel.z);
2241  outputPixel.w = ClampToQuantum(pixel.w);
2242  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2243  }
2244  }
2245  )
2246 
2247 /*
2248 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2249 % %
2250 % %
2251 % %
2252 % R e s i z e %
2253 % %
2254 % %
2255 % %
2256 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2257 */
2258 
2259  STRINGIFY(
2260  // Based on Box from resize.c
2261  float BoxResizeFilter(const float x)
2262  {
2263  return 1.0f;
2264  }
2265  )
2266 
2267  STRINGIFY(
2268  // Based on CubicBC from resize.c
2269  float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2270  {
2271  /*
2272  Cubic Filters using B,C determined values:
2273  Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2274  Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2275  Spline B = 1 C = 0 B-Spline Gaussian approximation
2276  Hermite B = 0 C = 0 B-Spline interpolator
2277 
2278  See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2279  Graphics Computer Graphics, Volume 22, Number 4, August 1988
2280  http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2281  Mitchell.pdf.
2282 
2283  Coefficents are determined from B,C values:
2284  P0 = ( 6 - 2*B )/6 = coeff[0]
2285  P1 = 0
2286  P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2287  P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2288  Q0 = ( 8*B +24*C )/6 = coeff[3]
2289  Q1 = ( -12*B -48*C )/6 = coeff[4]
2290  Q2 = ( 6*B +30*C )/6 = coeff[5]
2291  Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2292 
2293  which are used to define the filter:
2294 
2295  P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2296  Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2297 
2298  which ensures function is continuous in value and derivative (slope).
2299  */
2300  if (x < 1.0)
2301  return(resizeFilterCoefficients[0]+x*(x*
2302  (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2303  if (x < 2.0)
2304  return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2305  (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2306  return(0.0);
2307  }
2308  )
2309 
2310  STRINGIFY(
2311  float Sinc(const float x)
2312  {
2313  if (x != 0.0f)
2314  {
2315  const float alpha=(float) (MagickPI*x);
2316  return sinpi(x)/alpha;
2317  }
2318  return(1.0f);
2319  }
2320  )
2321 
2322  STRINGIFY(
2323  float Triangle(const float x)
2324  {
2325  /*
2326  1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2327  a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2328  for Sinc().
2329  */
2330  return ((x<1.0f)?(1.0f-x):0.0f);
2331  }
2332  )
2333 
2334 
2335  STRINGIFY(
2336  float Hann(const float x)
2337  {
2338  /*
2339  Cosine window function:
2340  0.5+0.5*cos(pi*x).
2341  */
2342  const float cosine=cos((MagickPI*x));
2343  return(0.5f+0.5f*cosine);
2344  }
2345  )
2346 
2347  STRINGIFY(
2348  float Hamming(const float x)
2349  {
2350  /*
2351  Offset cosine window function:
2352  .54 + .46 cos(pi x).
2353  */
2354  const float cosine=cos((MagickPI*x));
2355  return(0.54f+0.46f*cosine);
2356  }
2357  )
2358 
2359  STRINGIFY(
2360  float Blackman(const float x)
2361  {
2362  /*
2363  Blackman: 2nd order cosine windowing function:
2364  0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2365 
2366  Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2367  five flops.
2368  */
2369  const float cosine=cos((MagickPI*x));
2370  return(0.34f+cosine*(0.5f+cosine*0.16f));
2371  }
2372  )
2373 
2374  STRINGIFY(
2375  inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2376  {
2377  switch (filterType)
2378  {
2379  /* Call Sinc even for SincFast to get better precision on GPU
2380  and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2381  case SincWeightingFunction:
2383  return Sinc(x);
2385  return CubicBC(x,filterCoefficients);
2386  case BoxWeightingFunction:
2387  return BoxResizeFilter(x);
2389  return Triangle(x);
2390  case HannWeightingFunction:
2391  return Hann(x);
2393  return Hamming(x);
2395  return Blackman(x);
2396 
2397  default:
2398  return 0.0f;
2399  }
2400  }
2401  )
2402 
2403 
2404  STRINGIFY(
2405  inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2406  , const ResizeWeightingFunctionType resizeWindowType
2407  , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2408  {
2409  float scale;
2410  float xBlur = fabs(x/resizeFilterBlur);
2411  if (resizeWindowSupport < MagickEpsilon
2412  || resizeWindowType == BoxWeightingFunction)
2413  {
2414  scale = 1.0f;
2415  }
2416  else
2417  {
2418  scale = resizeFilterScale;
2419  scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2420  }
2421  float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2422  return weight;
2423  }
2424 
2425  )
2426 
2427  ;
2428  const char *accelerateKernels2 =
2429 
2430  STRINGIFY(
2431 
2432  inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2433  return (numWorkItems/pixelPerWorkgroup);
2434  }
2435 
2436  // returns the index of the pixel for the current workitem to compute.
2437  // returns -1 if this workitem doesn't need to participate in any computation
2438  inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2439  const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2440  int pixelIndex = itemID/numWorkItemsPerPixel;
2441  pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2442  return pixelIndex;
2443  }
2444 
2445  )
2446 
2447  STRINGIFY(
2448  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2449  void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2450  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2451  const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
2452  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2453  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2454  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2455  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2456  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2457  {
2458  // calculate the range of resized image pixels computed by this workgroup
2459  const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2460  const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2461  const unsigned int actualNumPixelToCompute = stopX - startX;
2462 
2463  // calculate the range of input image pixels to cache
2464  float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2465  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2466  scale = PerceptibleReciprocal(scale);
2467 
2468  const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2469  const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2470 
2471  // cache the input pixels into local memory
2472  const unsigned int y = get_global_id(1);
2473  const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
2474  const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
2475  event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
2476  wait_group_events(1, &e);
2477 
2478  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2479  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2480  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2481  {
2482  const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2483  const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2484  const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2485 
2486  // determine which resized pixel computed by this workitem
2487  const unsigned int itemID = get_local_id(0);
2488  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2489 
2490  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2491 
2492  float4 filteredPixel = (float4)0.0f;
2493  float density = 0.0f;
2494  float gamma = 0.0f;
2495  // -1 means this workitem doesn't participate in the computation
2496  if (pixelIndex != -1)
2497  {
2498  // x coordinated of the resized pixel computed by this workitem
2499  const int x = chunkStartX + pixelIndex;
2500 
2501  // calculate how many steps required for this pixel
2502  const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2503  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2504  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
2505  const unsigned int n = stop - start;
2506 
2507  // calculate how many steps this workitem will contribute
2508  unsigned int numStepsPerWorkItem = n / numItems;
2509  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2510 
2511  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2512  if (startStep < n)
2513  {
2514  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2515 
2516  unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2517  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2518  {
2519  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2520  (ResizeWeightingFunctionType) resizeFilterType,
2521  (ResizeWeightingFunctionType) resizeWindowType,
2522  resizeFilterScale, resizeFilterWindowSupport,
2523  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2524 
2525  float4 cp = (float4)0.0f;
2526 
2527  __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
2528  cp.x = (float) *(p);
2529  if (number_channels > 2)
2530  {
2531  cp.y = (float) *(p + 1);
2532  cp.z = (float) *(p + 2);
2533  }
2534 
2535  if (alpha_index != 0)
2536  {
2537  cp.w = (float) *(p + alpha_index);
2538 
2539  float alpha = weight * QuantumScale * cp.w;
2540 
2541  filteredPixel.x += alpha * cp.x;
2542  filteredPixel.y += alpha * cp.y;
2543  filteredPixel.z += alpha * cp.z;
2544  filteredPixel.w += weight * cp.w;
2545  gamma += alpha;
2546  }
2547  else
2548  filteredPixel += ((float4) weight)*cp;
2549 
2550  density += weight;
2551  }
2552  }
2553  }
2554 
2555  // initialize the accumulators to zero
2556  if (itemID < actualNumPixelInThisChunk) {
2557  outputPixelCache[itemID] = (float4)0.0f;
2558  densityCache[itemID] = 0.0f;
2559  if (alpha_index != 0)
2560  gammaCache[itemID] = 0.0f;
2561  }
2562  barrier(CLK_LOCAL_MEM_FENCE);
2563 
2564  // accumulatte the filtered pixel value and the density
2565  for (unsigned int i = 0; i < numItems; i++) {
2566  if (pixelIndex != -1) {
2567  if (itemID%numItems == i) {
2568  outputPixelCache[pixelIndex]+=filteredPixel;
2569  densityCache[pixelIndex]+=density;
2570  if (alpha_index != 0)
2571  gammaCache[pixelIndex]+=gamma;
2572  }
2573  }
2574  barrier(CLK_LOCAL_MEM_FENCE);
2575  }
2576 
2577  if (itemID < actualNumPixelInThisChunk)
2578  {
2579  float4 filteredPixel = outputPixelCache[itemID];
2580 
2581  float gamma = 0.0f;
2582  if (alpha_index != 0)
2583  gamma = gammaCache[itemID];
2584 
2585  float density = densityCache[itemID];
2586  if ((density != 0.0f) && (density != 1.0f))
2587  {
2588  density = PerceptibleReciprocal(density);
2589  filteredPixel *= (float4) density;
2590  if (alpha_index != 0)
2591  gamma *= density;
2592  }
2593 
2594  if (alpha_index != 0)
2595  {
2596  gamma = PerceptibleReciprocal(gamma);
2597  filteredPixel.x *= gamma;
2598  filteredPixel.y *= gamma;
2599  filteredPixel.z *= gamma;
2600  }
2601 
2602  WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
2603  }
2604  }
2605  }
2606  )
2607 
2608 
2609  STRINGIFY(
2610  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2611  void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2612  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2613  const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
2614  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2615  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2616  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2617  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2618  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2619  {
2620  // calculate the range of resized image pixels computed by this workgroup
2621  const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2622  const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2623  const unsigned int actualNumPixelToCompute = stopY - startY;
2624 
2625  // calculate the range of input image pixels to cache
2626  float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2627  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2628  scale = PerceptibleReciprocal(scale);
2629 
2630  const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2631  const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2632 
2633  // cache the input pixels into local memory
2634  const unsigned int x = get_global_id(0);
2635  unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
2636  unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
2637  unsigned int stride = inputColumns * number_channels;
2638  for (unsigned int i = 0; i < number_channels; i++)
2639  {
2640  event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
2641  wait_group_events(1,&e);
2642  }
2643 
2644  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2645  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2646  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2647  {
2648  const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2649  const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2650  const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2651 
2652  // determine which resized pixel computed by this workitem
2653  const unsigned int itemID = get_local_id(1);
2654  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2655 
2656  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2657 
2658  float4 filteredPixel = (float4)0.0f;
2659  float density = 0.0f;
2660  float gamma = 0.0f;
2661  // -1 means this workitem doesn't participate in the computation
2662  if (pixelIndex != -1)
2663  {
2664  // x coordinated of the resized pixel computed by this workitem
2665  const int y = chunkStartY + pixelIndex;
2666 
2667  // calculate how many steps required for this pixel
2668  const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2669  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2670  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
2671  const unsigned int n = stop - start;
2672 
2673  // calculate how many steps this workitem will contribute
2674  unsigned int numStepsPerWorkItem = n / numItems;
2675  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2676 
2677  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2678  if (startStep < n)
2679  {
2680  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2681 
2682  unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2683  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2684  {
2685  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2686  (ResizeWeightingFunctionType) resizeFilterType,
2687  (ResizeWeightingFunctionType) resizeWindowType,
2688  resizeFilterScale, resizeFilterWindowSupport,
2689  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2690 
2691  float4 cp = (float4)0.0f;
2692 
2693  __local CLQuantum *p = inputImageCache + cacheIndex;
2694  cp.x = (float) *(p);
2695  if (number_channels > 2)
2696  {
2697  cp.y = (float) *(p + rangeLength);
2698  cp.z = (float) *(p + (rangeLength * 2));
2699  }
2700 
2701  if (alpha_index != 0)
2702  {
2703  cp.w = (float) *(p + (rangeLength * alpha_index));
2704 
2705  float alpha = weight * QuantumScale * cp.w;
2706 
2707  filteredPixel.x += alpha * cp.x;
2708  filteredPixel.y += alpha * cp.y;
2709  filteredPixel.z += alpha * cp.z;
2710  filteredPixel.w += weight * cp.w;
2711  gamma += alpha;
2712  }
2713  else
2714  filteredPixel += ((float4) weight)*cp;
2715 
2716  density += weight;
2717  }
2718  }
2719  }
2720 
2721  // initialize the accumulators to zero
2722  if (itemID < actualNumPixelInThisChunk) {
2723  outputPixelCache[itemID] = (float4)0.0f;
2724  densityCache[itemID] = 0.0f;
2725  if (alpha_index != 0)
2726  gammaCache[itemID] = 0.0f;
2727  }
2728  barrier(CLK_LOCAL_MEM_FENCE);
2729 
2730  // accumulatte the filtered pixel value and the density
2731  for (unsigned int i = 0; i < numItems; i++) {
2732  if (pixelIndex != -1) {
2733  if (itemID%numItems == i) {
2734  outputPixelCache[pixelIndex]+=filteredPixel;
2735  densityCache[pixelIndex]+=density;
2736  if (alpha_index != 0)
2737  gammaCache[pixelIndex]+=gamma;
2738  }
2739  }
2740  barrier(CLK_LOCAL_MEM_FENCE);
2741  }
2742 
2743  if (itemID < actualNumPixelInThisChunk)
2744  {
2745  float4 filteredPixel = outputPixelCache[itemID];
2746 
2747  float gamma = 0.0f;
2748  if (alpha_index != 0)
2749  gamma = gammaCache[itemID];
2750 
2751  float density = densityCache[itemID];
2752  if ((density != 0.0f) && (density != 1.0f))
2753  {
2754  density = PerceptibleReciprocal(density);
2755  filteredPixel *= (float4) density;
2756  if (alpha_index != 0)
2757  gamma *= density;
2758  }
2759 
2760  if (alpha_index != 0)
2761  {
2762  gamma = PerceptibleReciprocal(gamma);
2763  filteredPixel.x *= gamma;
2764  filteredPixel.y *= gamma;
2765  filteredPixel.z *= gamma;
2766  }
2767 
2768  WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
2769  }
2770  }
2771  }
2772  )
2773 
2774 /*
2775 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2776 % %
2777 % %
2778 % %
2779 % R o t a t i o n a l B l u r %
2780 % %
2781 % %
2782 % %
2783 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2784 */
2785 
2786  STRINGIFY(
2787  __kernel void RotationalBlur(const __global CLQuantum *image,
2788  const unsigned int number_channels,const unsigned int channel,
2789  const float2 blurCenter,__constant float *cos_theta,
2790  __constant float *sin_theta,const unsigned int cossin_theta_size,
2791  __global CLQuantum *filteredImage)
2792  {
2793  const int x = get_global_id(0);
2794  const int y = get_global_id(1);
2795  const int columns = get_global_size(0);
2796  const int rows = get_global_size(1);
2797  unsigned int step = 1;
2798  float center_x = (float) x - blurCenter.x;
2799  float center_y = (float) y - blurCenter.y;
2800  float radius = hypot(center_x, center_y);
2801 
2802  float blur_radius = hypot(blurCenter.x, blurCenter.y);
2803 
2804  if (radius > MagickEpsilon)
2805  {
2806  step = (unsigned int) (blur_radius / radius);
2807  if (step == 0)
2808  step = 1;
2809  if (step >= cossin_theta_size)
2810  step = cossin_theta_size-1;
2811  }
2812 
2813  float4 result = 0.0f;
2814  float normalize = 0.0f;
2815  float gamma = 0.0f;
2816 
2817  for (unsigned int i=0; i<cossin_theta_size; i+=step)
2818  {
2819  int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
2820  int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
2821 
2822  float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
2823 
2824  if ((number_channels == 4) || (number_channels == 2))
2825  {
2826  float alpha = (float)(QuantumScale*pixel.w);
2827 
2828  gamma += alpha;
2829 
2830  result.x += alpha * pixel.x;
2831  result.y += alpha * pixel.y;
2832  result.z += alpha * pixel.z;
2833  result.w += pixel.w;
2834  }
2835  else
2836  result += pixel;
2837 
2838  normalize += 1.0f;
2839  }
2840 
2841  normalize = PerceptibleReciprocal(normalize);
2842 
2843  if ((number_channels == 4) || (number_channels == 2))
2844  {
2845  gamma = PerceptibleReciprocal(gamma);
2846  result.x *= gamma;
2847  result.y *= gamma;
2848  result.z *= gamma;
2849  result.w *= normalize;
2850  }
2851  else
2852  result *= normalize;
2853 
2854  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
2855  }
2856  )
2857 
2858 /*
2859 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2860 % %
2861 % %
2862 % %
2863 % U n s h a r p M a s k %
2864 % %
2865 % %
2866 % %
2867 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2868 */
2869 
2870  STRINGIFY(
2871  __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
2872  const __global float4 *blurRowData,const unsigned int number_channels,
2873  const ChannelType channel,const unsigned int columns,
2874  const unsigned int rows,__local float4* cachedData,
2875  __local float* cachedFilter,const __global float *filter,
2876  const unsigned int width,const float gain, const float threshold,
2877  __global CLQuantum *filteredImage)
2878  {
2879  const unsigned int radius = (width-1)/2;
2880 
2881  // cache the pixel shared by the workgroup
2882  const int groupX = get_group_id(0);
2883  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
2884  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
2885 
2886  if ((groupStartY >= 0) && (groupStopY < rows))
2887  {
2888  event_t e = async_work_group_strided_copy(cachedData,
2889  blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
2890  wait_group_events(1,&e);
2891  }
2892  else
2893  {
2894  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
2895  cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
2896 
2897  barrier(CLK_LOCAL_MEM_FENCE);
2898  }
2899  // cache the filter as well
2900  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
2901  wait_group_events(1,&e);
2902 
2903  // only do the work if this is not a patched item
2904  const int cy = get_global_id(1);
2905 
2906  if (cy < rows)
2907  {
2908  float4 blurredPixel = (float4) 0.0f;
2909 
2910  int i = 0;
2911 
2912  for ( ; i+7 < width; )
2913  {
2914  for (int j=0; j < 8; j++, i++)
2915  blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
2916  }
2917 
2918  for ( ; i < width; i++)
2919  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
2920 
2921  float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
2922  float4 outputPixel = inputImagePixel - blurredPixel;
2923 
2924  float quantumThreshold = QuantumRange*threshold;
2925 
2926  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
2927  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
2928 
2929  //write back
2930  WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
2931  }
2932  }
2933  )
2934 
2935  STRINGIFY(
2936  __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
2937  const ChannelType channel,__constant float *filter,const unsigned int width,
2938  const unsigned int columns,const unsigned int rows,__local float4 *pixels,
2939  const float gain,const float threshold,__global CLQuantum *filteredImage)
2940  {
2941  const unsigned int x = get_global_id(0);
2942  const unsigned int y = get_global_id(1);
2943 
2944  const unsigned int radius = (width - 1) / 2;
2945 
2946  int row = y - radius;
2947  int baseRow = get_group_id(1) * get_local_size(1) - radius;
2948  int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
2949 
2950  while (row < endRow) {
2951  int srcy = (row < 0) ? -row : row; // mirror pad
2952  srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
2953 
2954  float4 value = 0.0f;
2955 
2956  int ix = x - radius;
2957  int i = 0;
2958 
2959  while (i + 7 < width) {
2960  for (int j = 0; j < 8; ++j) { // unrolled
2961  int srcx = ix + j;
2962  srcx = (srcx < 0) ? -srcx : srcx;
2963  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2964  value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2965  }
2966  ix += 8;
2967  i += 8;
2968  }
2969 
2970  while (i < width) {
2971  int srcx = (ix < 0) ? -ix : ix; // mirror pad
2972  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2973  value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2974  ++i;
2975  ++ix;
2976  }
2977  pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
2978  row += get_local_size(1);
2979  }
2980 
2981  barrier(CLK_LOCAL_MEM_FENCE);
2982 
2983  const int px = get_local_id(0);
2984  const int py = get_local_id(1);
2985  const int prp = get_local_size(0);
2986  float4 value = (float4)(0.0f);
2987 
2988  int i = 0;
2989  while (i + 7 < width) {
2990  for (int j = 0; j < 8; ++j) // unrolled
2991  value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
2992  i += 8;
2993  }
2994  while (i < width) {
2995  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
2996  ++i;
2997  }
2998 
2999  if ((x < columns) && (y < rows)) {
3000  float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
3001  float4 diff = srcPixel - value;
3002 
3003  float quantumThreshold = QuantumRange*threshold;
3004 
3005  int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3006  value = select(srcPixel + diff * gain, srcPixel, mask);
3007 
3008  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
3009  }
3010  }
3011  )
3012 
3013  STRINGIFY(
3014  __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
3015  void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
3016  const unsigned int number_channels,const unsigned int max_channels,
3017  const float threshold,const int passes,const unsigned int imageWidth,
3018  const unsigned int imageHeight)
3019  {
3020  const int pad = (1 << (passes - 1));
3021  const int tileSize = 64;
3022  const int tileRowPixels = 64;
3023  const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3024 
3025  CLQuantum stage[48]; // 16 * 3 (we only need 3 channels)
3026 
3027  local float buffer[64 * 64];
3028 
3029  int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3030  int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3031 
3032  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3033  int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
3034  (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
3035 
3036  for (int channel = 0; channel < max_channels; ++channel)
3037  stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
3038  }
3039 
3040  for (int channel = 0; channel < max_channels; ++channel) {
3041  // Load LDS
3042  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3043  buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
3044 
3045  // Process
3046 
3047  float tmp[16];
3048  float accum[16];
3049  float pixel;
3050 
3051  for (int i = 0; i < 16; i++)
3052  accum[i]=0.0f;
3053 
3054  for (int pass = 0; pass < passes; ++pass) {
3055  const int radius = 1 << pass;
3056  const int x = get_local_id(0);
3057  const float thresh = threshold * noise[pass];
3058 
3059  // Apply horizontal hat
3060  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3061  const int offset = i * tileRowPixels;
3062  if (pass == 0)
3063  tmp[i / 4] = buffer[x + offset]; // snapshot input on first pass
3064  pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3065  barrier(CLK_LOCAL_MEM_FENCE);
3066  buffer[x + offset] = pixel;
3067  }
3068  barrier(CLK_LOCAL_MEM_FENCE);
3069 
3070  // Apply vertical hat
3071  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3072  pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3073  float delta = tmp[i / 4] - pixel;
3074  tmp[i / 4] = pixel; // hold output in tmp until all workitems are done
3075  if (delta < -thresh)
3076  delta += thresh;
3077  else if (delta > thresh)
3078  delta -= thresh;
3079  else
3080  delta = 0;
3081  accum[i / 4] += delta;
3082  }
3083  barrier(CLK_LOCAL_MEM_FENCE);
3084 
3085  if (pass < passes - 1)
3086  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3087  buffer[x + i * tileRowPixels] = tmp[i / 4]; // store lowpass for next pass
3088  else // last pass
3089  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3090  accum[i / 4] += tmp[i / 4]; // add the lowpass signal back to output
3091  barrier(CLK_LOCAL_MEM_FENCE);
3092  }
3093 
3094  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3095  stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]);
3096 
3097  barrier(CLK_LOCAL_MEM_FENCE);
3098  }
3099 
3100  // Write from stage to output
3101 
3102  if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3103  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3104  if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3105  int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
3106  for (int channel = 0; channel < max_channels; ++channel) {
3107  dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
3108  }
3109  }
3110  }
3111  }
3112  }
3113  )
3114 
3115  ;
3116 
3117 #endif // MAGICKCORE_OPENCL_SUPPORT
3118 
3119 #if defined(__cplusplus) || defined(c_plusplus)
3120 }
3121 #endif
3122 
3123 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
#define SigmaPoisson
MagickExport void ConvertRGBToHSL(const double red, const double green, const double blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1099
#define SigmaRandom
static double Hann(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:322
PixelIntensityMethod
Definition: pixel.h:94
static double Blackman(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:149
#define MagickPI
Definition: image-private.h:30
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, double *red, double *green, double *blue)
Definition: enhance.c:3035
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
#define MagickEpsilon
Definition: magick-type.h:110
#define SigmaLaplacian
MagickPrivate void ConvertRGBToHSB(const double, const double, const double, double *, double *, double *)
NoiseType
Definition: fx.h:27
#define SigmaUniform
static double PerceptibleReciprocal(const double x)
#define SigmaGaussian
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:919
static void Contrast(const int sign, double *red, double *green, double *blue)
Definition: enhance.c:841
#define SigmaMultiplicativeGaussian
#define TauGaussian
static double Triangle(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:543
#define QuantumScale
Definition: magick-type.h:113
ChannelType
Definition: pixel.h:33
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, double *red, double *green, double *blue)
Definition: gem.c:462
#define MaxMap
Definition: magick-type.h:75
static double RoundToUnity(const double value)
#define MagickMax(x, y)
Definition: image-private.h:26
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:84
static double Sinc(const double, const ResizeFilter *)
static double CubicBC(const double x, const ResizeFilter *resize_filter)
Definition: resize.c:207
ResizeWeightingFunctionType
static double Hamming(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:334
#define MagickMin(x, y)
Definition: image-private.h:27
ColorspaceType
Definition: colorspace.h:25
#define SigmaImpulse
CompositeOperator
Definition: composite.h:25
MagickPrivate void ConvertHSBToRGB(const double, const double, const double, double *, double *, double *)
MagickExport MagickRealType GetPixelIntensity(const Image *magick_restrict image, const Quantum *magick_restrict pixel)
Definition: pixel.c:2358
MagickFunction
Definition: statistic.h:121
#define QuantumRange
Definition: magick-type.h:83
Definition: fx.h:36