MagickCore  7.0.8
Convert, Edit, Or Compose Bitmap Images
accelerate-kernels-private.h
Go to the documentation of this file.
1 /*
2  Copyright 1999-2018 ImageMagick Studio LLC, a non-profit organization
3  dedicated to making software imaging solutions freely available.
4 
5  You may not use this file except in compliance with the License.
6  obtain a copy of the License at
7 
8  https://www.imagemagick.org/script/license.php
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  MagickCore private kernels for accelerated functions.
17 */
18 
19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
27 
28 /*
29  Define declarations.
30 */
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
37 
38 const char *accelerateKernels =
39 
40 /*
41  Define declarations.
42 */
43  OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
44  OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
45  OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
46  OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
47  OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
48  OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
49  OPENCL_DEFINE(SigmaRandom, (attenuate))
50  OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
51  OPENCL_DEFINE(MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
52  OPENCL_DEFINE(MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
53 
54 /*
55  Typedef declarations.
56 */
57  STRINGIFY(
58  typedef enum
59  {
61  CMYColorspace, /* negated linear RGB colorspace */
62  CMYKColorspace, /* CMY with Black separation */
63  GRAYColorspace, /* Single Channel greyscale (non-linear) image */
69  HSVColorspace, /* alias for HSB */
72  LCHColorspace, /* alias for LCHuv */
73  LCHabColorspace, /* Cylindrical (Polar) Lab */
74  LCHuvColorspace, /* Cylindrical (Polar) Luv */
81  RGBColorspace, /* Linear RGB colorspace */
82  scRGBColorspace, /* ??? */
83  sRGBColorspace, /* Default: non-linear sRGB colorspace */
86  XYZColorspace, /* IEEE Color Reference colorspace */
93  LinearGRAYColorspace /* Single Channel greyscale (linear) image */
95  )
96 
97  STRINGIFY(
98  typedef enum
99  {
173  )
174 
175  STRINGIFY(
176  typedef enum
177  {
183  } MagickFunction;
184  )
185 
186  STRINGIFY(
187  typedef enum
188  {
190  UniformNoise,
193  ImpulseNoise,
195  PoissonNoise,
197  } NoiseType;
198  )
199 
200  STRINGIFY(
201  typedef enum
202  {
214  )
215 
216  STRINGIFY(
217  typedef enum
218  {
236  )
237 
238  STRINGIFY(
239  typedef enum
240  {
241  UndefinedChannel = 0x0000,
242  RedChannel = 0x0001,
243  GrayChannel = 0x0001,
244  CyanChannel = 0x0001,
245  GreenChannel = 0x0002,
246  MagentaChannel = 0x0002,
247  BlueChannel = 0x0004,
248  YellowChannel = 0x0004,
249  BlackChannel = 0x0008,
250  AlphaChannel = 0x0010,
251  OpacityChannel = 0x0010,
252  IndexChannel = 0x0020, /* Color Index Table? */
253  ReadMaskChannel = 0x0040, /* Pixel is Not Readable? */
254  WriteMaskChannel = 0x0080, /* Pixel is Write Protected? */
255  MetaChannel = 0x0100, /* ???? */
256  CompositeChannels = 0x001F,
257  AllChannels = 0x7ffffff,
258  /*
259  Special purpose channel types.
260  FUTURE: are these needed any more - they are more like hacks
261  SyncChannels for example is NOT a real channel but a 'flag'
262  It really says -- "User has not defined channels"
263  Though it does have extra meaning in the "-auto-level" operator
264  */
265  TrueAlphaChannel = 0x0100, /* extract actual alpha channel from opacity */
266  RGBChannels = 0x0200, /* set alpha from grayscale mask in RGB */
267  GrayChannels = 0x0400,
268  SyncChannels = 0x20000, /* channels modified as a single unit */
270  } ChannelType; /* must correspond to PixelChannel */
271  )
272 
273 /*
274  Helper functions.
275 */
276 
277 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
278 
279  STRINGIFY(
280  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
281  {
282  return((CLQuantum) value);
283  }
284  )
285 
286 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
287 
288  STRINGIFY(
289  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
290  {
291  return((CLQuantum) (257.0f*value));
292  }
293  )
294 
295 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
296 
297  STRINGIFY(
298  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
299  {
300  return((CLQuantum) (16843009.0*value));
301  }
302  )
303 
304 OPENCL_ENDIF()
305 
306 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
307 
308  STRINGIFY(
309  inline CLQuantum ClampToQuantum(const float value)
310  {
311  return (CLQuantum) clamp(value, 0.0f, QuantumRange);
312  }
313  )
314 
315 OPENCL_ELSE()
316 
317  STRINGIFY(
318  inline CLQuantum ClampToQuantum(const float value)
319  {
320  return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
321  }
322  )
323 
324 OPENCL_ENDIF()
325 
326  STRINGIFY(
327  inline int ClampToCanvas(const int offset,const int range)
328  {
329  return clamp(offset, (int)0, range-1);
330  }
331  )
332 
333  STRINGIFY(
334  inline uint ScaleQuantumToMap(CLQuantum value)
335  {
336  if (value >= (CLQuantum) MaxMap)
337  return ((uint)MaxMap);
338  else
339  return ((uint)value);
340  }
341  )
342 
343  STRINGIFY(
344  inline float PerceptibleReciprocal(const float x)
345  {
346  float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
347  return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
348  }
349  )
350 
351  STRINGIFY(
352  inline float RoundToUnity(const float value)
353  {
354  return clamp(value,0.0f,1.0f);
355  }
356  )
357 
358  STRINGIFY(
359 
360  inline unsigned int getPixelIndex(const unsigned int number_channels,
361  const unsigned int columns, const unsigned int x, const unsigned int y)
362  {
363  return (x * number_channels) + (y * columns * number_channels);
364  }
365 
366  inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
367  inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
368  inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
369  inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
370 
371  inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
372  inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
373  inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
374  inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
375 
376  inline CLQuantum getBlue(CLPixelType p) { return p.x; }
377  inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
378  inline float getBlueF4(float4 p) { return p.x; }
379  inline void setBlueF4(float4* p, float value) { (*p).x = value; }
380 
381  inline CLQuantum getGreen(CLPixelType p) { return p.y; }
382  inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
383  inline float getGreenF4(float4 p) { return p.y; }
384  inline void setGreenF4(float4* p, float value) { (*p).y = value; }
385 
386  inline CLQuantum getRed(CLPixelType p) { return p.z; }
387  inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
388  inline float getRedF4(float4 p) { return p.z; }
389  inline void setRedF4(float4* p, float value) { (*p).z = value; }
390 
391  inline CLQuantum getAlpha(CLPixelType p) { return p.w; }
392  inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
393  inline float getAlphaF4(float4 p) { return p.w; }
394  inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
395 
396  inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
397  const ChannelType channel, float *red, float *green, float *blue, float *alpha)
398  {
399  if ((channel & RedChannel) != 0)
400  *red=getPixelRed(p);
401 
402  if (number_channels > 2)
403  {
404  if ((channel & GreenChannel) != 0)
405  *green=getPixelGreen(p);
406 
407  if ((channel & BlueChannel) != 0)
408  *blue=getPixelBlue(p);
409  }
410 
411  if (((number_channels == 4) || (number_channels == 2)) &&
412  ((channel & AlphaChannel) != 0))
413  *alpha=getPixelAlpha(p,number_channels);
414  }
415 
416  inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels,
417  const unsigned int columns, const unsigned int x, const unsigned int y)
418  {
419  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
420 
421  float4 pixel;
422 
423  pixel.x=getPixelRed(p);
424 
425  if (number_channels > 2)
426  {
427  pixel.y=getPixelGreen(p);
428  pixel.z=getPixelBlue(p);
429  }
430 
431  if ((number_channels == 4) || (number_channels == 2))
432  pixel.w=getPixelAlpha(p,number_channels);
433  return(pixel);
434  }
435 
436  inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
437  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
438  {
439  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
440 
441  float red = 0.0f;
442  float green = 0.0f;
443  float blue = 0.0f;
444  float alpha = 0.0f;
445 
446  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
447  return (float4)(red, green, blue, alpha);
448  }
449 
450  inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels,
451  const ChannelType channel, float red, float green, float blue, float alpha)
452  {
453  if ((channel & RedChannel) != 0)
454  setPixelRed(p,ClampToQuantum(red));
455 
456  if (number_channels > 2)
457  {
458  if ((channel & GreenChannel) != 0)
459  setPixelGreen(p,ClampToQuantum(green));
460 
461  if ((channel & BlueChannel) != 0)
462  setPixelBlue(p,ClampToQuantum(blue));
463  }
464 
465  if (((number_channels == 4) || (number_channels == 2)) &&
466  ((channel & AlphaChannel) != 0))
467  setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
468  }
469 
470  inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels,
471  const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel)
472  {
473  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
474 
475  setPixelRed(p,ClampToQuantum(pixel.x));
476 
477  if (number_channels > 2)
478  {
479  setPixelGreen(p,ClampToQuantum(pixel.y));
480  setPixelBlue(p,ClampToQuantum(pixel.z));
481  }
482 
483  if ((number_channels == 4) || (number_channels == 2))
484  setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w));
485  }
486 
487  inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
488  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
489  float4 pixel)
490  {
491  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
492  WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
493  }
494 
495  inline float GetPixelIntensity(const unsigned int colorspace,
496  const unsigned int method,float red,float green,float blue)
497  {
498  float intensity;
499 
500  if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace))
501  return red;
502 
503  switch (method)
504  {
506  {
507  intensity=(red+green+blue)/3.0;
508  break;
509  }
511  {
512  intensity=MagickMax(MagickMax(red,green),blue);
513  break;
514  }
516  {
517  intensity=(MagickMin(MagickMin(red,green),blue)+
518  MagickMax(MagickMax(red,green),blue))/2.0;
519  break;
520  }
522  {
523  intensity=(float) (((float) red*red+green*green+blue*blue)/
524  (3.0*QuantumRange));
525  break;
526  }
528  {
529  /*
530  if (image->colorspace == RGBColorspace)
531  {
532  red=EncodePixelGamma(red);
533  green=EncodePixelGamma(green);
534  blue=EncodePixelGamma(blue);
535  }
536  */
537  intensity=0.298839*red+0.586811*green+0.114350*blue;
538  break;
539  }
541  {
542  /*
543  if (image->colorspace == sRGBColorspace)
544  {
545  red=DecodePixelGamma(red);
546  green=DecodePixelGamma(green);
547  blue=DecodePixelGamma(blue);
548  }
549  */
550  intensity=0.298839*red+0.586811*green+0.114350*blue;
551  break;
552  }
554  default:
555  {
556  /*
557  if (image->colorspace == RGBColorspace)
558  {
559  red=EncodePixelGamma(red);
560  green=EncodePixelGamma(green);
561  blue=EncodePixelGamma(blue);
562  }
563  */
564  intensity=0.212656*red+0.715158*green+0.072186*blue;
565  break;
566  }
568  {
569  /*
570  if (image->colorspace == sRGBColorspace)
571  {
572  red=DecodePixelGamma(red);
573  green=DecodePixelGamma(green);
574  blue=DecodePixelGamma(blue);
575  }
576  */
577  intensity=0.212656*red+0.715158*green+0.072186*blue;
578  break;
579  }
581  {
582  intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
583  sqrt(3.0));
584  break;
585  }
586  }
587 
588  return intensity;
589  }
590 
591  inline int mirrorBottom(int value)
592  {
593  return (value < 0) ? - (value) : value;
594  }
595 
596  inline int mirrorTop(int value, int width)
597  {
598  return (value >= width) ? (2 * width - value - 1) : value;
599  }
600  )
601 
602 /*
603 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
604 % %
605 % %
606 % %
607 % A d d N o i s e %
608 % %
609 % %
610 % %
611 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
612 */
613 
614  STRINGIFY(
615  /*
616  Part of MWC64X by David Thomas, dt10@imperial.ac.uk
617  This is provided under BSD, full license is with the main package.
618  See http://www.doc.ic.ac.uk/~dt10/research
619  */
620 
621  // Pre: a<M, b<M
622  // Post: r=(a+b) mod M
623  ulong MWC_AddMod64(ulong a, ulong b, ulong M)
624  {
625  ulong v=a+b;
626  //if( (v>=M) || (v<a) )
627  if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
628  v=v-M;
629  return v;
630  }
631 
632  // Pre: a<M,b<M
633  // Post: r=(a*b) mod M
634  // This could be done more efficently, but it is portable, and should
635  // be easy to understand. It can be replaced with any of the better
636  // modular multiplication algorithms (for example if you know you have
637  // double precision available or something).
638  ulong MWC_MulMod64(ulong a, ulong b, ulong M)
639  {
640  ulong r=0;
641  while(a!=0){
642  if(a&1)
643  r=MWC_AddMod64(r,b,M);
644  b=MWC_AddMod64(b,b,M);
645  a=a>>1;
646  }
647  return r;
648  }
649 
650  // Pre: a<M, e>=0
651  // Post: r=(a^b) mod M
652  // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
653  // most architectures
654  ulong MWC_PowMod64(ulong a, ulong e, ulong M)
655  {
656  ulong sqr=a, acc=1;
657  while(e!=0){
658  if(e&1)
659  acc=MWC_MulMod64(acc,sqr,M);
660  sqr=MWC_MulMod64(sqr,sqr,M);
661  e=e>>1;
662  }
663  return acc;
664  }
665 
666  uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
667  {
668  ulong m=MWC_PowMod64(A, distance, M);
669  ulong x=curr.x*(ulong)A+curr.y;
670  x=MWC_MulMod64(x, m, M);
671  return (uint2)((uint)(x/A), (uint)(x%A));
672  }
673 
674  uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
675  {
676  // This is an arbitrary constant for starting LCG jumping from. I didn't
677  // want to start from 1, as then you end up with the two or three first values
678  // being a bit poor in ones - once you've decided that, one constant is as
679  // good as any another. There is no deep mathematical reason for it, I just
680  // generated a random number.
681  enum{ MWC_BASEID = 4077358422479273989UL };
682 
683  ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
684  ulong m=MWC_PowMod64(A, dist, M);
685 
686  ulong x=MWC_MulMod64(MWC_BASEID, m, M);
687  return (uint2)((uint)(x/A), (uint)(x%A));
688  }
689 
691  typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
692 
693  void MWC64X_Step(mwc64x_state_t *s)
694  {
695  uint X=s->x, C=s->c;
696 
697  uint Xn=s->seed0*X+C;
698  uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
699  uint Cn=mad_hi(s->seed0,X,carry);
700 
701  s->x=Xn;
702  s->c=Cn;
703  }
704 
705  void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
706  {
707  uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
708  s->x=tmp.x;
709  s->c=tmp.y;
710  }
711 
712  void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
713  {
714  uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
715  s->x=tmp.x;
716  s->c=tmp.y;
717  }
718 
720  uint MWC64X_NextUint(mwc64x_state_t *s)
721  {
722  uint res=s->x ^ s->c;
723  MWC64X_Step(s);
724  return res;
725  }
726 
727  //
728  // End of MWC64X excerpt
729  //
730 
731  float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
732  {
733  return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
734  }
735 
736  float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate)
737  {
738  float
739  alpha,
740  beta,
741  noise,
742  sigma;
743 
744  noise = 0.0f;
745  alpha=mwcReadPseudoRandomValue(r);
746  switch(noise_type)
747  {
748  case UniformNoise:
749  default:
750  {
751  noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
752  break;
753  }
754  case GaussianNoise:
755  {
756  float
757  gamma,
758  tau;
759 
760  if (alpha == 0.0f)
761  alpha=1.0f;
762  beta=mwcReadPseudoRandomValue(r);
763  gamma=sqrt(-2.0f*log(alpha));
764  sigma=gamma*cospi((2.0f*beta));
765  tau=gamma*sinpi((2.0f*beta));
766  noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau;
767  break;
768  }
769  case ImpulseNoise:
770  {
771  if (alpha < (SigmaImpulse/2.0f))
772  noise=0.0f;
773  else
774  if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
775  noise=QuantumRange;
776  else
777  noise=pixel;
778  break;
779  }
780  case LaplacianNoise:
781  {
782  if (alpha <= 0.5f)
783  {
784  if (alpha <= MagickEpsilon)
785  noise=(pixel-QuantumRange);
786  else
787  noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
788  0.5f);
789  break;
790  }
791  beta=1.0f-alpha;
792  if (beta <= (0.5f*MagickEpsilon))
793  noise=(pixel+QuantumRange);
794  else
795  noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
796  break;
797  }
799  {
800  sigma=1.0f;
801  if (alpha > MagickEpsilon)
802  sigma=sqrt(-2.0f*log(alpha));
803  beta=mwcReadPseudoRandomValue(r);
804  noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma*
805  cospi((2.0f*beta))/2.0f);
806  break;
807  }
808  case PoissonNoise:
809  {
810  float
811  poisson;
812  unsigned int i;
813  poisson=exp(-SigmaPoisson*QuantumScale*pixel);
814  for (i=0; alpha > poisson; i++)
815  {
816  beta=mwcReadPseudoRandomValue(r);
817  alpha*=beta;
818  }
819  noise=(QuantumRange*i/SigmaPoisson);
820  break;
821  }
822  case RandomNoise:
823  {
824  noise=(QuantumRange*SigmaRandom*alpha);
825  break;
826  }
827  }
828  return noise;
829  }
830 
831  __kernel
832  void AddNoise(const __global CLQuantum *image,
833  const unsigned int number_channels,const ChannelType channel,
834  const unsigned int length,const unsigned int pixelsPerWorkItem,
835  const NoiseType noise_type,const float attenuate,const unsigned int seed0,
836  const unsigned int seed1,const unsigned int numRandomNumbersPerPixel,
837  __global CLQuantum *filteredImage)
838  {
839  mwc64x_state_t rng;
840  rng.seed0 = seed0;
841  rng.seed1 = seed1;
842 
843  uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
844  uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
845  MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
846 
847  uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
848  uint count = pixelsPerWorkItem;
849 
850  while (count > 0 && pos < length)
851  {
852  const __global CLQuantum *p = image + pos;
853  __global CLQuantum *q = filteredImage + pos;
854 
855  float red;
856  float green;
857  float blue;
858  float alpha;
859 
860  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
861 
862  if ((channel & RedChannel) != 0)
863  red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
864 
865  if (number_channels > 2)
866  {
867  if ((channel & GreenChannel) != 0)
868  green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
869 
870  if ((channel & BlueChannel) != 0)
871  blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
872  }
873 
874  if (((number_channels == 4) || (number_channels == 2)) &&
875  ((channel & AlphaChannel) != 0))
876  alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
877 
878  WriteChannels(q, number_channels, channel, red, green, blue, alpha);
879 
880  pos += (get_local_size(0) * number_channels);
881  count--;
882  }
883  }
884  )
885 
886 /*
887 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
888 % %
889 % %
890 % %
891 % B l u r %
892 % %
893 % %
894 % %
895 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
896 */
897 
898  STRINGIFY(
899  /*
900  Reduce image noise and reduce detail levels by row
901  */
902  __kernel void BlurRow(const __global CLQuantum *image,
903  const unsigned int number_channels,const ChannelType channel,
904  __constant float *filter,const unsigned int width,
905  const unsigned int imageColumns,const unsigned int imageRows,
906  __local float4 *temp,__global float4 *tempImage)
907  {
908  const int x = get_global_id(0);
909  const int y = get_global_id(1);
910 
911  const int columns = imageColumns;
912 
913  const unsigned int radius = (width-1)/2;
914  const int wsize = get_local_size(0);
915  const unsigned int loadSize = wsize+width;
916 
917  //group coordinate
918  const int groupX=get_local_size(0)*get_group_id(0);
919 
920  //parallel load and clamp
921  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
922  {
923  int cx = ClampToCanvas(i + groupX - radius, columns);
924  temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
925  }
926 
927  // barrier
928  barrier(CLK_LOCAL_MEM_FENCE);
929 
930  // only do the work if this is not a patched item
931  if (get_global_id(0) < columns)
932  {
933  // compute
934  float4 result = (float4) 0;
935 
936  int i = 0;
937 
938  for ( ; i+7 < width; )
939  {
940  for (int j=0; j < 8; j++)
941  result+=filter[i+j]*temp[i+j+get_local_id(0)];
942  i+=8;
943  }
944 
945  for ( ; i < width; i++)
946  result+=filter[i]*temp[i+get_local_id(0)];
947 
948  // write back to global
949  tempImage[y*columns+x] = result;
950  }
951  }
952  )
953 
954  STRINGIFY(
955  /*
956  Reduce image noise and reduce detail levels by line
957  */
958  __kernel void BlurColumn(const __global float4 *blurRowData,
959  const unsigned int number_channels,const ChannelType channel,
960  __constant float *filter,const unsigned int width,
961  const unsigned int imageColumns,const unsigned int imageRows,
962  __local float4 *temp,__global CLQuantum *filteredImage)
963  {
964  const int x = get_global_id(0);
965  const int y = get_global_id(1);
966 
967  const int columns = imageColumns;
968  const int rows = imageRows;
969 
970  unsigned int radius = (width-1)/2;
971  const int wsize = get_local_size(1);
972  const unsigned int loadSize = wsize+width;
973 
974  //group coordinate
975  const int groupX=get_local_size(0)*get_group_id(0);
976  const int groupY=get_local_size(1)*get_group_id(1);
977  //notice that get_local_size(0) is 1, so
978  //groupX=get_group_id(0);
979 
980  //parallel load and clamp
981  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
982  temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
983 
984  // barrier
985  barrier(CLK_LOCAL_MEM_FENCE);
986 
987  // only do the work if this is not a patched item
988  if (get_global_id(1) < rows)
989  {
990  // compute
991  float4 result = (float4) 0;
992 
993  int i = 0;
994 
995  for ( ; i+7 < width; )
996  {
997  for (int j=0; j < 8; j++)
998  result+=filter[i+j]*temp[i+j+get_local_id(1)];
999  i+=8;
1000  }
1001 
1002  for ( ; i < width; i++)
1003  result+=filter[i]*temp[i+get_local_id(1)];
1004 
1005  // write back to global
1006  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
1007  }
1008  }
1009  )
1010 
1011 /*
1012 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1013 % %
1014 % %
1015 % %
1016 % C o n t r a s t %
1017 % %
1018 % %
1019 % %
1020 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1021 */
1022 
1023  STRINGIFY(
1024 
1025  inline float4 ConvertRGBToHSB(const float4 pixel)
1026  {
1027  float4 result=0.0f;
1028  result.w=pixel.w;
1029  float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z);
1030  if (tmax != 0.0f)
1031  {
1032  float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z);
1033  float delta=tmax-tmin;
1034 
1035  result.y=delta/tmax;
1036  result.z=QuantumScale*tmax;
1037  if (delta != 0.0f)
1038  {
1039  result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
1040  result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
1041  (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
1042  result.x/=6.0f;
1043  result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
1044  }
1045  }
1046  return(result);
1047  }
1048 
1049  inline float4 ConvertHSBToRGB(const float4 pixel)
1050  {
1051  float hue=pixel.x;
1052  float saturation=pixel.y;
1053  float brightness=pixel.z;
1054 
1055  float4 result=pixel;
1056 
1057  if (saturation == 0.0f)
1058  {
1059  result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness);
1060  }
1061  else
1062  {
1063  float h=6.0f*(hue-floor(hue));
1064  float f=h-floor(h);
1065  float p=brightness*(1.0f-saturation);
1066  float q=brightness*(1.0f-saturation*f);
1067  float t=brightness*(1.0f-(saturation*(1.0f-f)));
1068  int ih = (int)h;
1069 
1070  if (ih == 1)
1071  {
1072  result.x=ClampToQuantum(QuantumRange*q);
1073  result.y=ClampToQuantum(QuantumRange*brightness);
1074  result.z=ClampToQuantum(QuantumRange*p);
1075  }
1076  else if (ih == 2)
1077  {
1078  result.x=ClampToQuantum(QuantumRange*p);
1079  result.y=ClampToQuantum(QuantumRange*brightness);
1080  result.z=ClampToQuantum(QuantumRange*t);
1081  }
1082  else if (ih == 3)
1083  {
1084  result.x=ClampToQuantum(QuantumRange*p);
1085  result.y=ClampToQuantum(QuantumRange*q);
1086  result.z=ClampToQuantum(QuantumRange*brightness);
1087  }
1088  else if (ih == 4)
1089  {
1090  result.x=ClampToQuantum(QuantumRange*t);
1091  result.y=ClampToQuantum(QuantumRange*p);
1092  result.z=ClampToQuantum(QuantumRange*brightness);
1093  }
1094  else if (ih == 5)
1095  {
1096  result.x=ClampToQuantum(QuantumRange*brightness);
1097  result.y=ClampToQuantum(QuantumRange*p);
1098  result.z=ClampToQuantum(QuantumRange*q);
1099  }
1100  else
1101  {
1102  result.x=ClampToQuantum(QuantumRange*brightness);
1103  result.y=ClampToQuantum(QuantumRange*t);
1104  result.z=ClampToQuantum(QuantumRange*p);
1105  }
1106  }
1107  return(result);
1108  }
1109 
1110  __kernel void Contrast(__global CLQuantum *image,
1111  const unsigned int number_channels,const int sign)
1112  {
1113  const int x=get_global_id(0);
1114  const int y=get_global_id(1);
1115  const unsigned int columns=get_global_size(0);
1116 
1117  float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
1118  if (number_channels < 3)
1119  pixel.y=pixel.z=pixel.x;
1120 
1121  pixel=ConvertRGBToHSB(pixel);
1122  float brightness=pixel.z;
1123  brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1124  brightness=clamp(brightness,0.0f,1.0f);
1125  pixel.z=brightness;
1126  pixel=ConvertHSBToRGB(pixel);
1127 
1128  WriteAllChannels(image,number_channels,columns,x,y,pixel);
1129  }
1130  )
1131 
1132 /*
1133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1134 % %
1135 % %
1136 % %
1137 % C o n t r a s t S t r e t c h %
1138 % %
1139 % %
1140 % %
1141 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1142 */
1143 
1144  STRINGIFY(
1145  /*
1146  */
1147  __kernel void Histogram(__global CLPixelType * restrict im,
1148  const ChannelType channel,
1149  const unsigned int colorspace,
1150  const unsigned int method,
1151  __global uint4 * restrict histogram)
1152  {
1153  const int x = get_global_id(0);
1154  const int y = get_global_id(1);
1155  const int columns = get_global_size(0);
1156  const int c = x + y * columns;
1157  if ((channel & SyncChannels) != 0)
1158  {
1159  float red=(float)getRed(im[c]);
1160  float green=(float)getGreen(im[c]);
1161  float blue=(float)getBlue(im[c]);
1162 
1163  float intensity = GetPixelIntensity(colorspace, method, red, green, blue);
1164  uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1165  atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
1166  }
1167  else
1168  {
1169  // for equalizing, we always need all channels?
1170  // otherwise something more
1171  }
1172  }
1173  )
1174 
1175  STRINGIFY(
1176  /*
1177  */
1178  __kernel void ContrastStretch(__global CLPixelType * restrict im,
1179  const ChannelType channel,
1180  __global CLPixelType * restrict stretch_map,
1181  const float4 white, const float4 black)
1182  {
1183  const int x = get_global_id(0);
1184  const int y = get_global_id(1);
1185  const int columns = get_global_size(0);
1186  const int c = x + y * columns;
1187 
1188  uint ePos;
1189  CLPixelType oValue, eValue;
1190  CLQuantum red, green, blue, alpha;
1191 
1192  //read from global
1193  oValue=im[c];
1194 
1195  if ((channel & RedChannel) != 0)
1196  {
1197  if (getRedF4(white) != getRedF4(black))
1198  {
1199  ePos = ScaleQuantumToMap(getRed(oValue));
1200  eValue = stretch_map[ePos];
1201  red = getRed(eValue);
1202  }
1203  }
1204 
1205  if ((channel & GreenChannel) != 0)
1206  {
1207  if (getGreenF4(white) != getGreenF4(black))
1208  {
1209  ePos = ScaleQuantumToMap(getGreen(oValue));
1210  eValue = stretch_map[ePos];
1211  green = getGreen(eValue);
1212  }
1213  }
1214 
1215  if ((channel & BlueChannel) != 0)
1216  {
1217  if (getBlueF4(white) != getBlueF4(black))
1218  {
1219  ePos = ScaleQuantumToMap(getBlue(oValue));
1220  eValue = stretch_map[ePos];
1221  blue = getBlue(eValue);
1222  }
1223  }
1224 
1225  if ((channel & AlphaChannel) != 0)
1226  {
1227  if (getAlphaF4(white) != getAlphaF4(black))
1228  {
1229  ePos = ScaleQuantumToMap(getAlpha(oValue));
1230  eValue = stretch_map[ePos];
1231  alpha = getAlpha(eValue);
1232  }
1233  }
1234 
1235  //write back
1236  im[c]=(CLPixelType)(blue, green, red, alpha);
1237 
1238  }
1239  )
1240 
1241 /*
1242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1243 % %
1244 % %
1245 % %
1246 % C o n v o l v e %
1247 % %
1248 % %
1249 % %
1250 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1251 */
1252 
1253  STRINGIFY(
1254  __kernel
1255  void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
1256  const unsigned int imageWidth, const unsigned int imageHeight,
1257  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1258  const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
1259 
1260  int2 blockID;
1261  blockID.x = get_global_id(0) / get_local_size(0);
1262  blockID.y = get_global_id(1) / get_local_size(1);
1263 
1264  // image area processed by this workgroup
1265  int2 imageAreaOrg;
1266  imageAreaOrg.x = blockID.x * get_local_size(0);
1267  imageAreaOrg.y = blockID.y * get_local_size(1);
1268 
1269  int2 midFilterDimen;
1270  midFilterDimen.x = (filterWidth-1)/2;
1271  midFilterDimen.y = (filterHeight-1)/2;
1272 
1273  int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1274 
1275  // dimension of the local cache
1276  int2 cachedAreaDimen;
1277  cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1278  cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1279 
1280  // cache the pixels accessed by this workgroup in local memory
1281  int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1282  int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1283  int groupSize = get_local_size(0) * get_local_size(1);
1284  for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1285 
1286  int2 cachedAreaIndex;
1287  cachedAreaIndex.x = i % cachedAreaDimen.x;
1288  cachedAreaIndex.y = i / cachedAreaDimen.x;
1289 
1290  int2 imagePixelIndex;
1291  imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1292 
1293  // only support EdgeVirtualPixelMethod through ClampToCanvas
1294  // TODO: implement other virtual pixel method
1295  imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1296  imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1297 
1298  pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1299  }
1300 
1301  // cache the filter
1302  for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1303  filterCache[i] = filter[i];
1304  }
1305  barrier(CLK_LOCAL_MEM_FENCE);
1306 
1307 
1308  int2 imageIndex;
1309  imageIndex.x = imageAreaOrg.x + get_local_id(0);
1310  imageIndex.y = imageAreaOrg.y + get_local_id(1);
1311 
1312  // if out-of-range, stops here and quit
1313  if (imageIndex.x >= imageWidth
1314  || imageIndex.y >= imageHeight) {
1315  return;
1316  }
1317 
1318  int filterIndex = 0;
1319  float4 sum = (float4)0.0f;
1320  float gamma = 0.0f;
1321  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1322  int cacheIndexY = get_local_id(1);
1323  for (int j = 0; j < filterHeight; j++) {
1324  int cacheIndexX = get_local_id(0);
1325  for (int i = 0; i < filterWidth; i++) {
1326  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1327  float f = filterCache[filterIndex];
1328 
1329  sum.x += f * p.x;
1330  sum.y += f * p.y;
1331  sum.z += f * p.z;
1332  sum.w += f * p.w;
1333 
1334  gamma += f;
1335  filterIndex++;
1336  cacheIndexX++;
1337  }
1338  cacheIndexY++;
1339  }
1340  }
1341  else {
1342  int cacheIndexY = get_local_id(1);
1343  for (int j = 0; j < filterHeight; j++) {
1344  int cacheIndexX = get_local_id(0);
1345  for (int i = 0; i < filterWidth; i++) {
1346 
1347  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1348  float alpha = QuantumScale*p.w;
1349  float f = filterCache[filterIndex];
1350  float g = alpha * f;
1351 
1352  sum.x += g*p.x;
1353  sum.y += g*p.y;
1354  sum.z += g*p.z;
1355  sum.w += f*p.w;
1356 
1357  gamma += g;
1358  filterIndex++;
1359  cacheIndexX++;
1360  }
1361  cacheIndexY++;
1362  }
1363  gamma = PerceptibleReciprocal(gamma);
1364  sum.xyz = gamma*sum.xyz;
1365  }
1366  CLPixelType outputPixel;
1367  outputPixel.x = ClampToQuantum(sum.x);
1368  outputPixel.y = ClampToQuantum(sum.y);
1369  outputPixel.z = ClampToQuantum(sum.z);
1370  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1371 
1372  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1373  }
1374  )
1375 
1376  STRINGIFY(
1377  __kernel
1378  void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
1379  const uint imageWidth, const uint imageHeight,
1380  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1381  const uint matte, const ChannelType channel) {
1382 
1383  int2 imageIndex;
1384  imageIndex.x = get_global_id(0);
1385  imageIndex.y = get_global_id(1);
1386 
1387  /*
1388  unsigned int imageWidth = get_global_size(0);
1389  unsigned int imageHeight = get_global_size(1);
1390  */
1391  if (imageIndex.x >= imageWidth
1392  || imageIndex.y >= imageHeight)
1393  return;
1394 
1395  int2 midFilterDimen;
1396  midFilterDimen.x = (filterWidth-1)/2;
1397  midFilterDimen.y = (filterHeight-1)/2;
1398 
1399  int filterIndex = 0;
1400  float4 sum = (float4)0.0f;
1401  float gamma = 0.0f;
1402  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1403  for (int j = 0; j < filterHeight; j++) {
1404  int2 inputPixelIndex;
1405  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1406  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1407  for (int i = 0; i < filterWidth; i++) {
1408  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1409  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1410 
1411  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1412  float f = filter[filterIndex];
1413 
1414  sum.x += f * p.x;
1415  sum.y += f * p.y;
1416  sum.z += f * p.z;
1417  sum.w += f * p.w;
1418 
1419  gamma += f;
1420 
1421  filterIndex++;
1422  }
1423  }
1424  }
1425  else {
1426 
1427  for (int j = 0; j < filterHeight; j++) {
1428  int2 inputPixelIndex;
1429  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1430  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1431  for (int i = 0; i < filterWidth; i++) {
1432  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1433  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1434 
1435  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1436  float alpha = QuantumScale*p.w;
1437  float f = filter[filterIndex];
1438  float g = alpha * f;
1439 
1440  sum.x += g*p.x;
1441  sum.y += g*p.y;
1442  sum.z += g*p.z;
1443  sum.w += f*p.w;
1444 
1445  gamma += g;
1446 
1447 
1448  filterIndex++;
1449  }
1450  }
1451  gamma = PerceptibleReciprocal(gamma);
1452  sum.xyz = gamma*sum.xyz;
1453  }
1454 
1455  CLPixelType outputPixel;
1456  outputPixel.x = ClampToQuantum(sum.x);
1457  outputPixel.y = ClampToQuantum(sum.y);
1458  outputPixel.z = ClampToQuantum(sum.z);
1459  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1460 
1461  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1462  }
1463  )
1464 
1465 /*
1466 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1467 % %
1468 % %
1469 % %
1470 % D e s p e c k l e %
1471 % %
1472 % %
1473 % %
1474 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1475 */
1476 
1477  STRINGIFY(
1478 
1479  __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1480  , const unsigned int imageWidth, const unsigned int imageHeight
1481  , const int2 offset, const int polarity, const int matte) {
1482 
1483  int x = get_global_id(0);
1484  int y = get_global_id(1);
1485 
1486  CLPixelType v = inputImage[y*imageWidth+x];
1487 
1488  int2 neighbor;
1489  neighbor.y = y + offset.y;
1490  neighbor.x = x + offset.x;
1491 
1492  int2 clampedNeighbor;
1493  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1494  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1495 
1496  CLPixelType r = (clampedNeighbor.x == neighbor.x
1497  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1498  :(CLPixelType)0;
1499 
1500  int sv[4];
1501  sv[0] = (int)v.x;
1502  sv[1] = (int)v.y;
1503  sv[2] = (int)v.z;
1504  sv[3] = (int)v.w;
1505 
1506  int sr[4];
1507  sr[0] = (int)r.x;
1508  sr[1] = (int)r.y;
1509  sr[2] = (int)r.z;
1510  sr[3] = (int)r.w;
1511 
1512  if (polarity > 0) {
1513  \n #pragma unroll 4\n
1514  for (unsigned int i = 0; i < 4; i++) {
1515  sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1516  }
1517  }
1518  else {
1519  \n #pragma unroll 4\n
1520  for (unsigned int i = 0; i < 4; i++) {
1521  sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1522  }
1523 
1524  }
1525 
1526  v.x = (CLQuantum)sv[0];
1527  v.y = (CLQuantum)sv[1];
1528  v.z = (CLQuantum)sv[2];
1529 
1530  if (matte!=0)
1531  v.w = (CLQuantum)sv[3];
1532 
1533  outputImage[y*imageWidth+x] = v;
1534 
1535  }
1536 
1537 
1538  )
1539 
1540  STRINGIFY(
1541 
1542  __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1543  , const unsigned int imageWidth, const unsigned int imageHeight
1544  , const int2 offset, const int polarity, const int matte) {
1545 
1546  int x = get_global_id(0);
1547  int y = get_global_id(1);
1548 
1549  CLPixelType v = inputImage[y*imageWidth+x];
1550 
1551  int2 neighbor, clampedNeighbor;
1552 
1553  neighbor.y = y + offset.y;
1554  neighbor.x = x + offset.x;
1555  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1556  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1557 
1558  CLPixelType r = (clampedNeighbor.x == neighbor.x
1559  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1560  :(CLPixelType)0;
1561 
1562 
1563  neighbor.y = y - offset.y;
1564  neighbor.x = x - offset.x;
1565  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1566  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1567 
1568  CLPixelType s = (clampedNeighbor.x == neighbor.x
1569  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1570  :(CLPixelType)0;
1571 
1572 
1573  int sv[4];
1574  sv[0] = (int)v.x;
1575  sv[1] = (int)v.y;
1576  sv[2] = (int)v.z;
1577  sv[3] = (int)v.w;
1578 
1579  int sr[4];
1580  sr[0] = (int)r.x;
1581  sr[1] = (int)r.y;
1582  sr[2] = (int)r.z;
1583  sr[3] = (int)r.w;
1584 
1585  int ss[4];
1586  ss[0] = (int)s.x;
1587  ss[1] = (int)s.y;
1588  ss[2] = (int)s.z;
1589  ss[3] = (int)s.w;
1590 
1591  if (polarity > 0) {
1592  \n #pragma unroll 4\n
1593  for (unsigned int i = 0; i < 4; i++) {
1594  //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1595  //
1596  //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1597  //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1598  sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1599  }
1600  }
1601  else {
1602  \n #pragma unroll 4\n
1603  for (unsigned int i = 0; i < 4; i++) {
1604  //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1605  //
1606  //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1607  sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1608  }
1609  }
1610 
1611  v.x = (CLQuantum)sv[0];
1612  v.y = (CLQuantum)sv[1];
1613  v.z = (CLQuantum)sv[2];
1614 
1615  if (matte!=0)
1616  v.w = (CLQuantum)sv[3];
1617 
1618  outputImage[y*imageWidth+x] = v;
1619 
1620  }
1621  )
1622 
1623 /*
1624 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1625 % %
1626 % %
1627 % %
1628 % E q u a l i z e %
1629 % %
1630 % %
1631 % %
1632 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1633 */
1634 
1635  STRINGIFY(
1636  /*
1637  */
1638  __kernel void Equalize(__global CLPixelType * restrict im,
1639  const ChannelType channel,
1640  __global CLPixelType * restrict equalize_map,
1641  const float4 white, const float4 black)
1642  {
1643  const int x = get_global_id(0);
1644  const int y = get_global_id(1);
1645  const int columns = get_global_size(0);
1646  const int c = x + y * columns;
1647 
1648  uint ePos;
1649  CLPixelType oValue, eValue;
1650  CLQuantum red, green, blue, alpha;
1651 
1652  //read from global
1653  oValue=im[c];
1654 
1655  if ((channel & SyncChannels) != 0)
1656  {
1657  if (getRedF4(white) != getRedF4(black))
1658  {
1659  ePos = ScaleQuantumToMap(getRed(oValue));
1660  eValue = equalize_map[ePos];
1661  red = getRed(eValue);
1662  ePos = ScaleQuantumToMap(getGreen(oValue));
1663  eValue = equalize_map[ePos];
1664  green = getRed(eValue);
1665  ePos = ScaleQuantumToMap(getBlue(oValue));
1666  eValue = equalize_map[ePos];
1667  blue = getRed(eValue);
1668  ePos = ScaleQuantumToMap(getAlpha(oValue));
1669  eValue = equalize_map[ePos];
1670  alpha = getRed(eValue);
1671 
1672  //write back
1673  im[c]=(CLPixelType)(blue, green, red, alpha);
1674  }
1675 
1676  }
1677 
1678  // for equalizing, we always need all channels?
1679  // otherwise something more
1680 
1681  }
1682  )
1683 
1684 /*
1685 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1686 % %
1687 % %
1688 % %
1689 % F u n c t i o n %
1690 % %
1691 % %
1692 % %
1693 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1694 */
1695 
1696  STRINGIFY(
1697  /*
1698  apply FunctionImageChannel(braightness-contrast)
1699  */
1700  CLQuantum ApplyFunction(float pixel,const MagickFunction function,
1701  const unsigned int number_parameters,__constant float *parameters)
1702  {
1703  float result = 0.0f;
1704 
1705  switch (function)
1706  {
1707  case PolynomialFunction:
1708  {
1709  for (unsigned int i=0; i < number_parameters; i++)
1710  result = result*QuantumScale*pixel + parameters[i];
1711  result *= QuantumRange;
1712  break;
1713  }
1714  case SinusoidFunction:
1715  {
1716  float freq,phase,ampl,bias;
1717  freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1718  phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1719  ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1720  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1721  result = QuantumRange*(ampl*sin(2.0f*MagickPI*
1722  (freq*QuantumScale*pixel + phase/360.0f)) + bias);
1723  break;
1724  }
1725  case ArcsinFunction:
1726  {
1727  float width,range,center,bias;
1728  width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1729  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1730  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1731  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1732 
1733  result = 2.0f/width*(QuantumScale*pixel - center);
1734  result = range/MagickPI*asin(result)+bias;
1735  result = ( result <= -1.0f ) ? bias - range/2.0f : result;
1736  result = ( result >= 1.0f ) ? bias + range/2.0f : result;
1737  result *= QuantumRange;
1738  break;
1739  }
1740  case ArctanFunction:
1741  {
1742  float slope,range,center,bias;
1743  slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1744  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1745  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1746  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1747  result = MagickPI*slope*(QuantumScale*pixel-center);
1748  result = QuantumRange*(range/MagickPI*atan(result) + bias);
1749  break;
1750  }
1751  case UndefinedFunction:
1752  break;
1753  }
1754  return(ClampToQuantum(result));
1755  }
1756  )
1757 
1758  STRINGIFY(
1759  /*
1760  Improve brightness / contrast of the image
1761  channel : define which channel is improved
1762  function : the function called to enchance the brightness contrast
1763  number_parameters : numbers of parameters
1764  parameters : the parameter
1765  */
1766  __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
1767  const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
1768  __constant float *parameters)
1769  {
1770  const unsigned int x = get_global_id(0);
1771  const unsigned int y = get_global_id(1);
1772  const unsigned int columns = get_global_size(0);
1773  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1774 
1775  float red;
1776  float green;
1777  float blue;
1778  float alpha;
1779 
1780  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
1781 
1782  if ((channel & RedChannel) != 0)
1783  red=ApplyFunction(red, function, number_parameters, parameters);
1784 
1785  if (number_channels > 2)
1786  {
1787  if ((channel & GreenChannel) != 0)
1788  green=ApplyFunction(green, function, number_parameters, parameters);
1789 
1790  if ((channel & BlueChannel) != 0)
1791  blue=ApplyFunction(blue, function, number_parameters, parameters);
1792  }
1793 
1794  if (((number_channels == 4) || (number_channels == 2)) &&
1795  ((channel & AlphaChannel) != 0))
1796  alpha=ApplyFunction(alpha, function, number_parameters, parameters);
1797 
1798  WriteChannels(p, number_channels, channel, red, green, blue, alpha);
1799  }
1800  )
1801 
1802 /*
1803 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1804 % %
1805 % %
1806 % %
1807 % G r a y s c a l e %
1808 % %
1809 % %
1810 % %
1811 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1812 */
1813 
1814  STRINGIFY(
1815  __kernel void Grayscale(__global CLQuantum *image,const int number_channels,
1816  const unsigned int colorspace,const unsigned int method)
1817  {
1818  const unsigned int x = get_global_id(0);
1819  const unsigned int y = get_global_id(1);
1820  const unsigned int columns = get_global_size(0);
1821  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1822 
1823  float
1824  blue,
1825  green,
1826  red;
1827 
1828  red=getPixelRed(p);
1829  green=getPixelGreen(p);
1830  blue=getPixelBlue(p);
1831 
1832  CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue));
1833 
1834  setPixelRed(p,intensity);
1835  setPixelGreen(p,intensity);
1836  setPixelBlue(p,intensity);
1837  }
1838  )
1839 
1840 /*
1841 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1842 % %
1843 % %
1844 % %
1845 % L o c a l C o n t r a s t %
1846 % %
1847 % %
1848 % %
1849 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1850 */
1851 
1852  STRINGIFY(
1853 
1854  __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
1855  const int radius,
1856  const int imageWidth,
1857  const int imageHeight)
1858  {
1859  const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
1860 
1861  int x = get_local_id(0);
1862  int y = get_global_id(1);
1863 
1864  if ((x >= imageWidth) || (y >= imageHeight))
1865  return;
1866 
1867  global CLPixelType *src = srcImage + y * imageWidth;
1868 
1869  for (int i = x; i < imageWidth; i += get_local_size(0)) {
1870  float sum = 0.0f;
1871  float weight = 1.0f;
1872 
1873  int j = i - radius;
1874  while ((j + 7) < i) {
1875  for (int k = 0; k < 8; ++k) // Unroll 8x
1876  sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
1877  weight += 8.0f;
1878  j+=8;
1879  }
1880  while (j < i) {
1881  sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
1882  weight += 1.0f;
1883  ++j;
1884  }
1885 
1886  while ((j + 7) < radius + i) {
1887  for (int k = 0; k < 8; ++k) // Unroll 8x
1888  sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
1889  weight -= 8.0f;
1890  j+=8;
1891  }
1892  while (j < radius + i) {
1893  sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
1894  weight -= 1.0f;
1895  ++j;
1896  }
1897 
1898  tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
1899  }
1900  }
1901  )
1902 
1903  STRINGIFY(
1904  __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
1905  const int radius,
1906  const float strength,
1907  const int imageWidth,
1908  const int imageHeight)
1909  {
1910  const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
1911 
1912  int x = get_global_id(0);
1913  int y = get_global_id(1);
1914 
1915  if ((x >= imageWidth) || (y >= imageHeight))
1916  return;
1917 
1918  global float *src = blurImage + x;
1919 
1920  float sum = 0.0f;
1921  float weight = 1.0f;
1922 
1923  int j = y - radius;
1924  while ((j + 7) < y) {
1925  for (int k = 0; k < 8; ++k) // Unroll 8x
1926  sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
1927  weight += 8.0f;
1928  j+=8;
1929  }
1930  while (j < y) {
1931  sum += weight * src[mirrorBottom(j) * imageWidth];
1932  weight += 1.0f;
1933  ++j;
1934  }
1935 
1936  while ((j + 7) < radius + y) {
1937  for (int k = 0; k < 8; ++k) // Unroll 8x
1938  sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
1939  weight -= 8.0f;
1940  j+=8;
1941  }
1942  while (j < radius + y) {
1943  sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
1944  weight -= 1.0f;
1945  ++j;
1946  }
1947 
1948  CLPixelType pixel = srcImage[x + y * imageWidth];
1949  float srcVal = dot(RGB, convert_float4(pixel));
1950  float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
1951  mult = (srcVal + mult) / srcVal;
1952 
1953  pixel.x = ClampToQuantum(pixel.x * mult);
1954  pixel.y = ClampToQuantum(pixel.y * mult);
1955  pixel.z = ClampToQuantum(pixel.z * mult);
1956 
1957  dstImage[x + y * imageWidth] = pixel;
1958  }
1959  )
1960 
1961 /*
1962 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1963 % %
1964 % %
1965 % %
1966 % M o d u l a t e %
1967 % %
1968 % %
1969 % %
1970 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1971 */
1972 
1973  STRINGIFY(
1974 
1975  inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1976  float *hue, float *saturation, float *lightness)
1977  {
1978  float
1979  c,
1980  tmax,
1981  tmin;
1982 
1983  /*
1984  Convert RGB to HSL colorspace.
1985  */
1986  tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
1987  tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
1988 
1989  c=tmax-tmin;
1990 
1991  *lightness=(tmax+tmin)/2.0;
1992  if (c <= 0.0)
1993  {
1994  *hue=0.0;
1995  *saturation=0.0;
1996  return;
1997  }
1998 
1999  if (tmax == (QuantumScale*red))
2000  {
2001  *hue=(QuantumScale*green-QuantumScale*blue)/c;
2002  if ((QuantumScale*green) < (QuantumScale*blue))
2003  *hue+=6.0;
2004  }
2005  else
2006  if (tmax == (QuantumScale*green))
2007  *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
2008  else
2009  *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2010 
2011  *hue*=60.0/360.0;
2012  if (*lightness <= 0.5)
2013  *saturation=c/(2.0*(*lightness));
2014  else
2015  *saturation=c/(2.0-2.0*(*lightness));
2016  }
2017 
2018  inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
2019  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2020  {
2021  float
2022  b,
2023  c,
2024  g,
2025  h,
2026  tmin,
2027  r,
2028  x;
2029 
2030  /*
2031  Convert HSL to RGB colorspace.
2032  */
2033  h=hue*360.0;
2034  if (lightness <= 0.5)
2035  c=2.0*lightness*saturation;
2036  else
2037  c=(2.0-2.0*lightness)*saturation;
2038  tmin=lightness-0.5*c;
2039  h-=360.0*floor(h/360.0);
2040  h/=60.0;
2041  x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2042  switch ((int) floor(h) % 6)
2043  {
2044  case 0:
2045  {
2046  r=tmin+c;
2047  g=tmin+x;
2048  b=tmin;
2049  break;
2050  }
2051  case 1:
2052  {
2053  r=tmin+x;
2054  g=tmin+c;
2055  b=tmin;
2056  break;
2057  }
2058  case 2:
2059  {
2060  r=tmin;
2061  g=tmin+c;
2062  b=tmin+x;
2063  break;
2064  }
2065  case 3:
2066  {
2067  r=tmin;
2068  g=tmin+x;
2069  b=tmin+c;
2070  break;
2071  }
2072  case 4:
2073  {
2074  r=tmin+x;
2075  g=tmin;
2076  b=tmin+c;
2077  break;
2078  }
2079  case 5:
2080  {
2081  r=tmin+c;
2082  g=tmin;
2083  b=tmin+x;
2084  break;
2085  }
2086  default:
2087  {
2088  r=0.0;
2089  g=0.0;
2090  b=0.0;
2091  }
2092  }
2093  *red=ClampToQuantum(QuantumRange*r);
2094  *green=ClampToQuantum(QuantumRange*g);
2095  *blue=ClampToQuantum(QuantumRange*b);
2096  }
2097 
2098  inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
2099  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2100  {
2101  float
2102  hue,
2103  lightness,
2104  saturation;
2105 
2106  /*
2107  Increase or decrease color lightness, saturation, or hue.
2108  */
2109  ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2110  hue+=0.5*(0.01*percent_hue-1.0);
2111  while (hue < 0.0)
2112  hue+=1.0;
2113  while (hue >= 1.0)
2114  hue-=1.0;
2115  saturation*=0.01*percent_saturation;
2116  lightness*=0.01*percent_lightness;
2117  ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2118  }
2119 
2120  __kernel void Modulate(__global CLPixelType *im,
2121  const float percent_brightness,
2122  const float percent_hue,
2123  const float percent_saturation,
2124  const int colorspace)
2125  {
2126 
2127  const int x = get_global_id(0);
2128  const int y = get_global_id(1);
2129  const int columns = get_global_size(0);
2130  const int c = x + y * columns;
2131 
2132  CLPixelType pixel = im[c];
2133 
2134  CLQuantum
2135  blue,
2136  green,
2137  red;
2138 
2139  red=getRed(pixel);
2140  green=getGreen(pixel);
2141  blue=getBlue(pixel);
2142 
2143  switch (colorspace)
2144  {
2145  case HSLColorspace:
2146  default:
2147  {
2148  ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2149  &red, &green, &blue);
2150  }
2151 
2152  }
2153 
2154  CLPixelType filteredPixel;
2155 
2156  setRed(&filteredPixel, red);
2157  setGreen(&filteredPixel, green);
2158  setBlue(&filteredPixel, blue);
2159  filteredPixel.w = pixel.w;
2160 
2161  im[c] = filteredPixel;
2162  }
2163  )
2164 
2165 /*
2166 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2167 % %
2168 % %
2169 % %
2170 % M o t i o n B l u r %
2171 % %
2172 % %
2173 % %
2174 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2175 */
2176 
2177  STRINGIFY(
2178  __kernel
2179  void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2180  const unsigned int imageWidth, const unsigned int imageHeight,
2181  const __global float *filter, const unsigned int width, const __global int2* offset,
2182  const float4 bias,
2183  const ChannelType channel, const unsigned int matte) {
2184 
2185  int2 currentPixel;
2186  currentPixel.x = get_global_id(0);
2187  currentPixel.y = get_global_id(1);
2188 
2189  if (currentPixel.x >= imageWidth
2190  || currentPixel.y >= imageHeight)
2191  return;
2192 
2193  float4 pixel;
2194  pixel.x = (float)bias.x;
2195  pixel.y = (float)bias.y;
2196  pixel.z = (float)bias.z;
2197  pixel.w = (float)bias.w;
2198 
2199  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
2200 
2201  for (int i = 0; i < width; i++) {
2202  // only support EdgeVirtualPixelMethod through ClampToCanvas
2203  // TODO: implement other virtual pixel method
2204  int2 samplePixel = currentPixel + offset[i];
2205  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2206  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2207  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2208 
2209  pixel.x += (filter[i] * (float)samplePixelValue.x);
2210  pixel.y += (filter[i] * (float)samplePixelValue.y);
2211  pixel.z += (filter[i] * (float)samplePixelValue.z);
2212  pixel.w += (filter[i] * (float)samplePixelValue.w);
2213  }
2214 
2215  CLPixelType outputPixel;
2216  outputPixel.x = ClampToQuantum(pixel.x);
2217  outputPixel.y = ClampToQuantum(pixel.y);
2218  outputPixel.z = ClampToQuantum(pixel.z);
2219  outputPixel.w = ClampToQuantum(pixel.w);
2220  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2221  }
2222  else {
2223 
2224  float gamma = 0.0f;
2225  for (int i = 0; i < width; i++) {
2226  // only support EdgeVirtualPixelMethod through ClampToCanvas
2227  // TODO: implement other virtual pixel method
2228  int2 samplePixel = currentPixel + offset[i];
2229  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2230  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2231 
2232  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2233 
2234  float alpha = QuantumScale*samplePixelValue.w;
2235  float k = filter[i];
2236  pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2237  pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2238  pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2239 
2240  pixel.w += k * alpha * samplePixelValue.w;
2241 
2242  gamma+=k*alpha;
2243  }
2244  gamma = PerceptibleReciprocal(gamma);
2245  pixel.xyz = gamma*pixel.xyz;
2246 
2247  CLPixelType outputPixel;
2248  outputPixel.x = ClampToQuantum(pixel.x);
2249  outputPixel.y = ClampToQuantum(pixel.y);
2250  outputPixel.z = ClampToQuantum(pixel.z);
2251  outputPixel.w = ClampToQuantum(pixel.w);
2252  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2253  }
2254  }
2255  )
2256 
2257 /*
2258 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2259 % %
2260 % %
2261 % %
2262 % R e s i z e %
2263 % %
2264 % %
2265 % %
2266 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2267 */
2268 
2269  STRINGIFY(
2270  // Based on Box from resize.c
2271  float BoxResizeFilter(const float x)
2272  {
2273  return 1.0f;
2274  }
2275  )
2276 
2277  STRINGIFY(
2278  // Based on CubicBC from resize.c
2279  float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2280  {
2281  /*
2282  Cubic Filters using B,C determined values:
2283  Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2284  Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2285  Spline B = 1 C = 0 B-Spline Gaussian approximation
2286  Hermite B = 0 C = 0 B-Spline interpolator
2287 
2288  See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2289  Graphics Computer Graphics, Volume 22, Number 4, August 1988
2290  http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2291  Mitchell.pdf.
2292 
2293  Coefficents are determined from B,C values:
2294  P0 = ( 6 - 2*B )/6 = coeff[0]
2295  P1 = 0
2296  P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2297  P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2298  Q0 = ( 8*B +24*C )/6 = coeff[3]
2299  Q1 = ( -12*B -48*C )/6 = coeff[4]
2300  Q2 = ( 6*B +30*C )/6 = coeff[5]
2301  Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2302 
2303  which are used to define the filter:
2304 
2305  P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2306  Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2307 
2308  which ensures function is continuous in value and derivative (slope).
2309  */
2310  if (x < 1.0)
2311  return(resizeFilterCoefficients[0]+x*(x*
2312  (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2313  if (x < 2.0)
2314  return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2315  (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2316  return(0.0);
2317  }
2318  )
2319 
2320  STRINGIFY(
2321  float Sinc(const float x)
2322  {
2323  if (x != 0.0f)
2324  {
2325  const float alpha=(float) (MagickPI*x);
2326  return sinpi(x)/alpha;
2327  }
2328  return(1.0f);
2329  }
2330  )
2331 
2332  STRINGIFY(
2333  float Triangle(const float x)
2334  {
2335  /*
2336  1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2337  a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2338  for Sinc().
2339  */
2340  return ((x<1.0f)?(1.0f-x):0.0f);
2341  }
2342  )
2343 
2344 
2345  STRINGIFY(
2346  float Hann(const float x)
2347  {
2348  /*
2349  Cosine window function:
2350  0.5+0.5*cos(pi*x).
2351  */
2352  const float cosine=cos((MagickPI*x));
2353  return(0.5f+0.5f*cosine);
2354  }
2355  )
2356 
2357  STRINGIFY(
2358  float Hamming(const float x)
2359  {
2360  /*
2361  Offset cosine window function:
2362  .54 + .46 cos(pi x).
2363  */
2364  const float cosine=cos((MagickPI*x));
2365  return(0.54f+0.46f*cosine);
2366  }
2367  )
2368 
2369  STRINGIFY(
2370  float Blackman(const float x)
2371  {
2372  /*
2373  Blackman: 2nd order cosine windowing function:
2374  0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2375 
2376  Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2377  five flops.
2378  */
2379  const float cosine=cos((MagickPI*x));
2380  return(0.34f+cosine*(0.5f+cosine*0.16f));
2381  }
2382  )
2383 
2384  STRINGIFY(
2385  inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2386  {
2387  switch (filterType)
2388  {
2389  /* Call Sinc even for SincFast to get better precision on GPU
2390  and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2391  case SincWeightingFunction:
2393  return Sinc(x);
2395  return CubicBC(x,filterCoefficients);
2396  case BoxWeightingFunction:
2397  return BoxResizeFilter(x);
2399  return Triangle(x);
2400  case HannWeightingFunction:
2401  return Hann(x);
2403  return Hamming(x);
2405  return Blackman(x);
2406 
2407  default:
2408  return 0.0f;
2409  }
2410  }
2411  )
2412 
2413 
2414  STRINGIFY(
2415  inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2416  , const ResizeWeightingFunctionType resizeWindowType
2417  , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2418  {
2419  float scale;
2420  float xBlur = fabs(x/resizeFilterBlur);
2421  if (resizeWindowSupport < MagickEpsilon
2422  || resizeWindowType == BoxWeightingFunction)
2423  {
2424  scale = 1.0f;
2425  }
2426  else
2427  {
2428  scale = resizeFilterScale;
2429  scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2430  }
2431  float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2432  return weight;
2433  }
2434 
2435  )
2436 
2437  ;
2438  const char *accelerateKernels2 =
2439 
2440  STRINGIFY(
2441 
2442  inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2443  return (numWorkItems/pixelPerWorkgroup);
2444  }
2445 
2446  // returns the index of the pixel for the current workitem to compute.
2447  // returns -1 if this workitem doesn't need to participate in any computation
2448  inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2449  const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2450  int pixelIndex = itemID/numWorkItemsPerPixel;
2451  pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2452  return pixelIndex;
2453  }
2454 
2455  )
2456 
2457  STRINGIFY(
2458  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2459  void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2460  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2461  const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
2462  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2463  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2464  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2465  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2466  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2467  {
2468  // calculate the range of resized image pixels computed by this workgroup
2469  const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2470  const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2471  const unsigned int actualNumPixelToCompute = stopX - startX;
2472 
2473  // calculate the range of input image pixels to cache
2474  float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2475  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2476  scale = PerceptibleReciprocal(scale);
2477 
2478  const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2479  const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2480 
2481  // cache the input pixels into local memory
2482  const unsigned int y = get_global_id(1);
2483  const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
2484  const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
2485  event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
2486  wait_group_events(1, &e);
2487 
2488  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2489  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2490  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2491  {
2492  const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2493  const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2494  const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2495 
2496  // determine which resized pixel computed by this workitem
2497  const unsigned int itemID = get_local_id(0);
2498  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2499 
2500  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2501 
2502  float4 filteredPixel = (float4)0.0f;
2503  float density = 0.0f;
2504  float gamma = 0.0f;
2505  // -1 means this workitem doesn't participate in the computation
2506  if (pixelIndex != -1)
2507  {
2508  // x coordinated of the resized pixel computed by this workitem
2509  const int x = chunkStartX + pixelIndex;
2510 
2511  // calculate how many steps required for this pixel
2512  const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2513  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2514  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
2515  const unsigned int n = stop - start;
2516 
2517  // calculate how many steps this workitem will contribute
2518  unsigned int numStepsPerWorkItem = n / numItems;
2519  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2520 
2521  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2522  if (startStep < n)
2523  {
2524  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2525 
2526  unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2527  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2528  {
2529  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2530  (ResizeWeightingFunctionType) resizeFilterType,
2531  (ResizeWeightingFunctionType) resizeWindowType,
2532  resizeFilterScale, resizeFilterWindowSupport,
2533  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2534 
2535  float4 cp = (float4)0.0f;
2536 
2537  __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
2538  cp.x = (float) *(p);
2539  if (number_channels > 2)
2540  {
2541  cp.y = (float) *(p + 1);
2542  cp.z = (float) *(p + 2);
2543  }
2544 
2545  if (alpha_index != 0)
2546  {
2547  cp.w = (float) *(p + alpha_index);
2548 
2549  float alpha = weight * QuantumScale * cp.w;
2550 
2551  filteredPixel.x += alpha * cp.x;
2552  filteredPixel.y += alpha * cp.y;
2553  filteredPixel.z += alpha * cp.z;
2554  filteredPixel.w += weight * cp.w;
2555  gamma += alpha;
2556  }
2557  else
2558  filteredPixel += ((float4) weight)*cp;
2559 
2560  density += weight;
2561  }
2562  }
2563  }
2564 
2565  // initialize the accumulators to zero
2566  if (itemID < actualNumPixelInThisChunk) {
2567  outputPixelCache[itemID] = (float4)0.0f;
2568  densityCache[itemID] = 0.0f;
2569  if (alpha_index != 0)
2570  gammaCache[itemID] = 0.0f;
2571  }
2572  barrier(CLK_LOCAL_MEM_FENCE);
2573 
2574  // accumulatte the filtered pixel value and the density
2575  for (unsigned int i = 0; i < numItems; i++) {
2576  if (pixelIndex != -1) {
2577  if (itemID%numItems == i) {
2578  outputPixelCache[pixelIndex]+=filteredPixel;
2579  densityCache[pixelIndex]+=density;
2580  if (alpha_index != 0)
2581  gammaCache[pixelIndex]+=gamma;
2582  }
2583  }
2584  barrier(CLK_LOCAL_MEM_FENCE);
2585  }
2586 
2587  if (itemID < actualNumPixelInThisChunk)
2588  {
2589  float4 filteredPixel = outputPixelCache[itemID];
2590 
2591  float gamma = 0.0f;
2592  if (alpha_index != 0)
2593  gamma = gammaCache[itemID];
2594 
2595  float density = densityCache[itemID];
2596  if ((density != 0.0f) && (density != 1.0f))
2597  {
2598  density = PerceptibleReciprocal(density);
2599  filteredPixel *= (float4) density;
2600  if (alpha_index != 0)
2601  gamma *= density;
2602  }
2603 
2604  if (alpha_index != 0)
2605  {
2606  gamma = PerceptibleReciprocal(gamma);
2607  filteredPixel.x *= gamma;
2608  filteredPixel.y *= gamma;
2609  filteredPixel.z *= gamma;
2610  }
2611 
2612  WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
2613  }
2614  }
2615  }
2616  )
2617 
2618 
2619  STRINGIFY(
2620  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2621  void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2622  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2623  const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
2624  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2625  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2626  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2627  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2628  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2629  {
2630  // calculate the range of resized image pixels computed by this workgroup
2631  const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2632  const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2633  const unsigned int actualNumPixelToCompute = stopY - startY;
2634 
2635  // calculate the range of input image pixels to cache
2636  float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2637  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2638  scale = PerceptibleReciprocal(scale);
2639 
2640  const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2641  const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2642 
2643  // cache the input pixels into local memory
2644  const unsigned int x = get_global_id(0);
2645  unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
2646  unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
2647  unsigned int stride = inputColumns * number_channels;
2648  for (unsigned int i = 0; i < number_channels; i++)
2649  {
2650  event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
2651  wait_group_events(1,&e);
2652  }
2653 
2654  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2655  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2656  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2657  {
2658  const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2659  const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2660  const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2661 
2662  // determine which resized pixel computed by this workitem
2663  const unsigned int itemID = get_local_id(1);
2664  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2665 
2666  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2667 
2668  float4 filteredPixel = (float4)0.0f;
2669  float density = 0.0f;
2670  float gamma = 0.0f;
2671  // -1 means this workitem doesn't participate in the computation
2672  if (pixelIndex != -1)
2673  {
2674  // x coordinated of the resized pixel computed by this workitem
2675  const int y = chunkStartY + pixelIndex;
2676 
2677  // calculate how many steps required for this pixel
2678  const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2679  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2680  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
2681  const unsigned int n = stop - start;
2682 
2683  // calculate how many steps this workitem will contribute
2684  unsigned int numStepsPerWorkItem = n / numItems;
2685  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2686 
2687  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2688  if (startStep < n)
2689  {
2690  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2691 
2692  unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2693  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2694  {
2695  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2696  (ResizeWeightingFunctionType) resizeFilterType,
2697  (ResizeWeightingFunctionType) resizeWindowType,
2698  resizeFilterScale, resizeFilterWindowSupport,
2699  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2700 
2701  float4 cp = (float4)0.0f;
2702 
2703  __local CLQuantum *p = inputImageCache + cacheIndex;
2704  cp.x = (float) *(p);
2705  if (number_channels > 2)
2706  {
2707  cp.y = (float) *(p + rangeLength);
2708  cp.z = (float) *(p + (rangeLength * 2));
2709  }
2710 
2711  if (alpha_index != 0)
2712  {
2713  cp.w = (float) *(p + (rangeLength * alpha_index));
2714 
2715  float alpha = weight * QuantumScale * cp.w;
2716 
2717  filteredPixel.x += alpha * cp.x;
2718  filteredPixel.y += alpha * cp.y;
2719  filteredPixel.z += alpha * cp.z;
2720  filteredPixel.w += weight * cp.w;
2721  gamma += alpha;
2722  }
2723  else
2724  filteredPixel += ((float4) weight)*cp;
2725 
2726  density += weight;
2727  }
2728  }
2729  }
2730 
2731  // initialize the accumulators to zero
2732  if (itemID < actualNumPixelInThisChunk) {
2733  outputPixelCache[itemID] = (float4)0.0f;
2734  densityCache[itemID] = 0.0f;
2735  if (alpha_index != 0)
2736  gammaCache[itemID] = 0.0f;
2737  }
2738  barrier(CLK_LOCAL_MEM_FENCE);
2739 
2740  // accumulatte the filtered pixel value and the density
2741  for (unsigned int i = 0; i < numItems; i++) {
2742  if (pixelIndex != -1) {
2743  if (itemID%numItems == i) {
2744  outputPixelCache[pixelIndex]+=filteredPixel;
2745  densityCache[pixelIndex]+=density;
2746  if (alpha_index != 0)
2747  gammaCache[pixelIndex]+=gamma;
2748  }
2749  }
2750  barrier(CLK_LOCAL_MEM_FENCE);
2751  }
2752 
2753  if (itemID < actualNumPixelInThisChunk)
2754  {
2755  float4 filteredPixel = outputPixelCache[itemID];
2756 
2757  float gamma = 0.0f;
2758  if (alpha_index != 0)
2759  gamma = gammaCache[itemID];
2760 
2761  float density = densityCache[itemID];
2762  if ((density != 0.0f) && (density != 1.0f))
2763  {
2764  density = PerceptibleReciprocal(density);
2765  filteredPixel *= (float4) density;
2766  if (alpha_index != 0)
2767  gamma *= density;
2768  }
2769 
2770  if (alpha_index != 0)
2771  {
2772  gamma = PerceptibleReciprocal(gamma);
2773  filteredPixel.x *= gamma;
2774  filteredPixel.y *= gamma;
2775  filteredPixel.z *= gamma;
2776  }
2777 
2778  WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
2779  }
2780  }
2781  }
2782  )
2783 
2784 /*
2785 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2786 % %
2787 % %
2788 % %
2789 % R o t a t i o n a l B l u r %
2790 % %
2791 % %
2792 % %
2793 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2794 */
2795 
2796  STRINGIFY(
2797  __kernel void RotationalBlur(const __global CLQuantum *image,
2798  const unsigned int number_channels,const unsigned int channel,
2799  const float2 blurCenter,__constant float *cos_theta,
2800  __constant float *sin_theta,const unsigned int cossin_theta_size,
2801  __global CLQuantum *filteredImage)
2802  {
2803  const int x = get_global_id(0);
2804  const int y = get_global_id(1);
2805  const int columns = get_global_size(0);
2806  const int rows = get_global_size(1);
2807  unsigned int step = 1;
2808  float center_x = (float) x - blurCenter.x;
2809  float center_y = (float) y - blurCenter.y;
2810  float radius = hypot(center_x, center_y);
2811 
2812  float blur_radius = hypot(blurCenter.x, blurCenter.y);
2813 
2814  if (radius > MagickEpsilon)
2815  {
2816  step = (unsigned int) (blur_radius / radius);
2817  if (step == 0)
2818  step = 1;
2819  if (step >= cossin_theta_size)
2820  step = cossin_theta_size-1;
2821  }
2822 
2823  float4 result = 0.0f;
2824  float normalize = 0.0f;
2825  float gamma = 0.0f;
2826 
2827  for (unsigned int i=0; i<cossin_theta_size; i+=step)
2828  {
2829  int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
2830  int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
2831 
2832  float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
2833 
2834  if ((number_channels == 4) || (number_channels == 2))
2835  {
2836  float alpha = (float)(QuantumScale*pixel.w);
2837 
2838  gamma += alpha;
2839 
2840  result.x += alpha * pixel.x;
2841  result.y += alpha * pixel.y;
2842  result.z += alpha * pixel.z;
2843  result.w += pixel.w;
2844  }
2845  else
2846  result += pixel;
2847 
2848  normalize += 1.0f;
2849  }
2850 
2851  normalize = PerceptibleReciprocal(normalize);
2852 
2853  if ((number_channels == 4) || (number_channels == 2))
2854  {
2855  gamma = PerceptibleReciprocal(gamma);
2856  result.x *= gamma;
2857  result.y *= gamma;
2858  result.z *= gamma;
2859  result.w *= normalize;
2860  }
2861  else
2862  result *= normalize;
2863 
2864  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
2865  }
2866  )
2867 
2868 /*
2869 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2870 % %
2871 % %
2872 % %
2873 % U n s h a r p M a s k %
2874 % %
2875 % %
2876 % %
2877 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2878 */
2879 
2880  STRINGIFY(
2881  __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
2882  const __global float4 *blurRowData,const unsigned int number_channels,
2883  const ChannelType channel,const unsigned int columns,
2884  const unsigned int rows,__local float4* cachedData,
2885  __local float* cachedFilter,const __global float *filter,
2886  const unsigned int width,const float gain, const float threshold,
2887  __global CLQuantum *filteredImage)
2888  {
2889  const unsigned int radius = (width-1)/2;
2890 
2891  // cache the pixel shared by the workgroup
2892  const int groupX = get_group_id(0);
2893  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
2894  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
2895 
2896  if ((groupStartY >= 0) && (groupStopY < rows))
2897  {
2898  event_t e = async_work_group_strided_copy(cachedData,
2899  blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
2900  wait_group_events(1,&e);
2901  }
2902  else
2903  {
2904  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
2905  cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
2906 
2907  barrier(CLK_LOCAL_MEM_FENCE);
2908  }
2909  // cache the filter as well
2910  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
2911  wait_group_events(1,&e);
2912 
2913  // only do the work if this is not a patched item
2914  const int cy = get_global_id(1);
2915 
2916  if (cy < rows)
2917  {
2918  float4 blurredPixel = (float4) 0.0f;
2919 
2920  int i = 0;
2921 
2922  for ( ; i+7 < width; )
2923  {
2924  for (int j=0; j < 8; j++, i++)
2925  blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
2926  }
2927 
2928  for ( ; i < width; i++)
2929  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
2930 
2931  float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
2932  float4 outputPixel = inputImagePixel - blurredPixel;
2933 
2934  float quantumThreshold = QuantumRange*threshold;
2935 
2936  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
2937  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
2938 
2939  //write back
2940  WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
2941  }
2942  }
2943  )
2944 
2945  STRINGIFY(
2946  __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
2947  const ChannelType channel,__constant float *filter,const unsigned int width,
2948  const unsigned int columns,const unsigned int rows,__local float4 *pixels,
2949  const float gain,const float threshold,__global CLQuantum *filteredImage)
2950  {
2951  const unsigned int x = get_global_id(0);
2952  const unsigned int y = get_global_id(1);
2953 
2954  const unsigned int radius = (width - 1) / 2;
2955 
2956  int row = y - radius;
2957  int baseRow = get_group_id(1) * get_local_size(1) - radius;
2958  int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
2959 
2960  while (row < endRow) {
2961  int srcy = (row < 0) ? -row : row; // mirror pad
2962  srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
2963 
2964  float4 value = 0.0f;
2965 
2966  int ix = x - radius;
2967  int i = 0;
2968 
2969  while (i + 7 < width) {
2970  for (int j = 0; j < 8; ++j) { // unrolled
2971  int srcx = ix + j;
2972  srcx = (srcx < 0) ? -srcx : srcx;
2973  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2974  value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2975  }
2976  ix += 8;
2977  i += 8;
2978  }
2979 
2980  while (i < width) {
2981  int srcx = (ix < 0) ? -ix : ix; // mirror pad
2982  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2983  value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2984  ++i;
2985  ++ix;
2986  }
2987  pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
2988  row += get_local_size(1);
2989  }
2990 
2991  barrier(CLK_LOCAL_MEM_FENCE);
2992 
2993  const int px = get_local_id(0);
2994  const int py = get_local_id(1);
2995  const int prp = get_local_size(0);
2996  float4 value = (float4)(0.0f);
2997 
2998  int i = 0;
2999  while (i + 7 < width) {
3000  for (int j = 0; j < 8; ++j) // unrolled
3001  value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
3002  i += 8;
3003  }
3004  while (i < width) {
3005  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3006  ++i;
3007  }
3008 
3009  if ((x < columns) && (y < rows)) {
3010  float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
3011  float4 diff = srcPixel - value;
3012 
3013  float quantumThreshold = QuantumRange*threshold;
3014 
3015  int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3016  value = select(srcPixel + diff * gain, srcPixel, mask);
3017 
3018  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
3019  }
3020  }
3021  )
3022 
3023  STRINGIFY(
3024  __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
3025  void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
3026  const unsigned int number_channels,const unsigned int max_channels,
3027  const float threshold,const int passes,const unsigned int imageWidth,
3028  const unsigned int imageHeight)
3029  {
3030  const int pad = (1 << (passes - 1));
3031  const int tileSize = 64;
3032  const int tileRowPixels = 64;
3033  const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3034 
3035  CLQuantum stage[48]; // 16 * 3 (we only need 3 channels)
3036 
3037  local float buffer[64 * 64];
3038 
3039  int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3040  int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3041 
3042  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3043  int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
3044  (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
3045 
3046  for (int channel = 0; channel < max_channels; ++channel)
3047  stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
3048  }
3049 
3050  for (int channel = 0; channel < max_channels; ++channel) {
3051  // Load LDS
3052  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3053  buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
3054 
3055  // Process
3056 
3057  float tmp[16];
3058  float accum[16];
3059  float pixel;
3060 
3061  for (int i = 0; i < 16; i++)
3062  accum[i]=0.0f;
3063 
3064  for (int pass = 0; pass < passes; ++pass) {
3065  const int radius = 1 << pass;
3066  const int x = get_local_id(0);
3067  const float thresh = threshold * noise[pass];
3068 
3069  // Apply horizontal hat
3070  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3071  const int offset = i * tileRowPixels;
3072  if (pass == 0)
3073  tmp[i / 4] = buffer[x + offset]; // snapshot input on first pass
3074  pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3075  barrier(CLK_LOCAL_MEM_FENCE);
3076  buffer[x + offset] = pixel;
3077  }
3078  barrier(CLK_LOCAL_MEM_FENCE);
3079 
3080  // Apply vertical hat
3081  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3082  pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3083  float delta = tmp[i / 4] - pixel;
3084  tmp[i / 4] = pixel; // hold output in tmp until all workitems are done
3085  if (delta < -thresh)
3086  delta += thresh;
3087  else if (delta > thresh)
3088  delta -= thresh;
3089  else
3090  delta = 0;
3091  accum[i / 4] += delta;
3092  }
3093  barrier(CLK_LOCAL_MEM_FENCE);
3094 
3095  if (pass < passes - 1)
3096  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3097  buffer[x + i * tileRowPixels] = tmp[i / 4]; // store lowpass for next pass
3098  else // last pass
3099  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3100  accum[i / 4] += tmp[i / 4]; // add the lowpass signal back to output
3101  barrier(CLK_LOCAL_MEM_FENCE);
3102  }
3103 
3104  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3105  stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]);
3106 
3107  barrier(CLK_LOCAL_MEM_FENCE);
3108  }
3109 
3110  // Write from stage to output
3111 
3112  if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3113  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3114  if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3115  int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
3116  for (int channel = 0; channel < max_channels; ++channel) {
3117  dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
3118  }
3119  }
3120  }
3121  }
3122  }
3123  )
3124 
3125  ;
3126 
3127 #endif // MAGICKCORE_OPENCL_SUPPORT
3128 
3129 #if defined(__cplusplus) || defined(c_plusplus)
3130 }
3131 #endif
3132 
3133 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
#define SigmaPoisson
MagickExport void ConvertRGBToHSL(const double red, const double green, const double blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1099
#define SigmaRandom
static double Hann(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:322
PixelIntensityMethod
Definition: pixel.h:96
static double Blackman(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:149
#define MagickPI
Definition: image-private.h:30
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, double *red, double *green, double *blue)
Definition: enhance.c:2973
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
#define MagickEpsilon
Definition: magick-type.h:110
#define SigmaLaplacian
MagickPrivate void ConvertRGBToHSB(const double, const double, const double, double *, double *, double *)
NoiseType
Definition: fx.h:27
#define SigmaUniform
static double PerceptibleReciprocal(const double x)
#define SigmaGaussian
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:908
static void Contrast(const int sign, double *red, double *green, double *blue)
Definition: enhance.c:836
#define SigmaMultiplicativeGaussian
#define TauGaussian
static double Triangle(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:543
#define QuantumScale
Definition: magick-type.h:113
ChannelType
Definition: pixel.h:33
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, double *red, double *green, double *blue)
Definition: gem.c:462
#define MaxMap
Definition: magick-type.h:75
static double RoundToUnity(const double value)
#define MagickMax(x, y)
Definition: image-private.h:26
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:84
static double Sinc(const double, const ResizeFilter *)
static double CubicBC(const double x, const ResizeFilter *resize_filter)
Definition: resize.c:207
ResizeWeightingFunctionType
static double Hamming(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:334
#define MagickMin(x, y)
Definition: image-private.h:27
ColorspaceType
Definition: colorspace.h:25
#define SigmaImpulse
CompositeOperator
Definition: composite.h:25
MagickPrivate void ConvertHSBToRGB(const double, const double, const double, double *, double *, double *)
MagickExport MagickRealType GetPixelIntensity(const Image *magick_restrict image, const Quantum *magick_restrict pixel)
Definition: pixel.c:2358
MagickFunction
Definition: statistic.h:121
#define QuantumRange
Definition: magick-type.h:83
Definition: fx.h:36