MagickCore  7.0.7
Convert, Edit, Or Compose Bitmap Images
accelerate-kernels-private.h
Go to the documentation of this file.
1 /*
2  Copyright 1999-2018 ImageMagick Studio LLC, a non-profit organization
3  dedicated to making software imaging solutions freely available.
4 
5  You may not use this file except in compliance with the License.
6  obtain a copy of the License at
7 
8  https://www.imagemagick.org/script/license.php
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  MagickCore private kernels for accelerated functions.
17 */
18 
19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
27 
28 /*
29  Define declarations.
30 */
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
37 
38 const char *accelerateKernels =
39 
40 /*
41  Define declarations.
42 */
43  OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
44  OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
45  OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
46  OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
47  OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
48  OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
49  OPENCL_DEFINE(SigmaRandom, (attenuate))
50  OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
51  OPENCL_DEFINE(MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
52  OPENCL_DEFINE(MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
53 
54 /*
55  Typedef declarations.
56 */
57  STRINGIFY(
58  typedef enum
59  {
61  RGBColorspace, /* Linear RGB colorspace */
62  GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */
72  CMYKColorspace, /* negared linear RGB with black separated */
73  sRGBColorspace, /* Default: non-lienar sRGB colorspace */
77  Rec601LumaColorspace,
79  Rec709LumaColorspace,
82  CMYColorspace, /* negated linear RGB colorspace */
85  LCHColorspace, /* alias for LCHuv */
87  LCHabColorspace, /* Cylindrical (Polar) Lab */
88  LCHuvColorspace, /* Cylindrical (Polar) Luv */
91  HSVColorspace, /* alias for HSB */
95  )
96 
97  STRINGIFY(
98  typedef enum
99  {
117  CopyOpacityCompositeOp,
155  /* These are new operators, added after the above was last sorted.
156  * The list should be re-sorted only when a new library version is
157  * created.
158  */
173  )
174 
175  STRINGIFY(
176  typedef enum
177  {
183  } MagickFunction;
184  )
185 
186  STRINGIFY(
187  typedef enum
188  {
190  UniformNoise,
193  ImpulseNoise,
195  PoissonNoise,
197  } NoiseType;
198  )
199 
200  STRINGIFY(
201  typedef enum
202  {
214  )
215 
216  STRINGIFY(
217  typedef enum {
230  WelshWeightingFunction,
235  )
236 
237  STRINGIFY(
238  typedef enum
239  {
240  UndefinedChannel = 0x0000,
241  RedChannel = 0x0001,
242  GrayChannel = 0x0001,
243  CyanChannel = 0x0001,
244  GreenChannel = 0x0002,
245  MagentaChannel = 0x0002,
246  BlueChannel = 0x0004,
247  YellowChannel = 0x0004,
248  BlackChannel = 0x0008,
249  AlphaChannel = 0x0010,
250  OpacityChannel = 0x0010,
251  IndexChannel = 0x0020, /* Color Index Table? */
252  ReadMaskChannel = 0x0040, /* Pixel is Not Readable? */
253  WriteMaskChannel = 0x0080, /* Pixel is Write Protected? */
254  MetaChannel = 0x0100, /* ???? */
255  CompositeChannels = 0x001F,
256  AllChannels = 0x7ffffff,
257  /*
258  Special purpose channel types.
259  FUTURE: are these needed any more - they are more like hacks
260  SyncChannels for example is NOT a real channel but a 'flag'
261  It really says -- "User has not defined channels"
262  Though it does have extra meaning in the "-auto-level" operator
263  */
264  TrueAlphaChannel = 0x0100, /* extract actual alpha channel from opacity */
265  RGBChannels = 0x0200, /* set alpha from grayscale mask in RGB */
266  GrayChannels = 0x0400,
267  SyncChannels = 0x20000, /* channels modified as a single unit */
269  } ChannelType; /* must correspond to PixelChannel */
270  )
271 
272 /*
273  Helper functions.
274 */
275 
276 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
277 
278  STRINGIFY(
279  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
280  {
281  return((CLQuantum) value);
282  }
283  )
284 
285 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
286 
287  STRINGIFY(
288  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
289  {
290  return((CLQuantum) (257.0f*value));
291  }
292  )
293 
294 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
295 
296  STRINGIFY(
297  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
298  {
299  return((CLQuantum) (16843009.0*value));
300  }
301  )
302 
303 OPENCL_ENDIF()
304 
305  STRINGIFY(
306  inline int ClampToCanvas(const int offset,const int range)
307  {
308  return clamp(offset, (int)0, range-1);
309  }
310  )
311 
312  STRINGIFY(
313  inline CLQuantum ClampToQuantum(const float value)
314  {
315  return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
316  }
317  )
318 
319  STRINGIFY(
320  inline uint ScaleQuantumToMap(CLQuantum value)
321  {
322  if (value >= (CLQuantum) MaxMap)
323  return ((uint)MaxMap);
324  else
325  return ((uint)value);
326  }
327  )
328 
329  STRINGIFY(
330  inline float PerceptibleReciprocal(const float x)
331  {
332  float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
333  return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
334  }
335  )
336 
337  STRINGIFY(
338  inline float RoundToUnity(const float value)
339  {
340  return clamp(value,0.0f,1.0f);
341  }
342  )
343 
344  STRINGIFY(
345 
346  inline unsigned int getPixelIndex(const unsigned int number_channels,
347  const unsigned int columns, const unsigned int x, const unsigned int y)
348  {
349  return (x * number_channels) + (y * columns * number_channels);
350  }
351 
352  inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
353  inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
354  inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
355  inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
356 
357  inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
358  inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
359  inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
360  inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
361 
362  inline CLQuantum getBlue(CLPixelType p) { return p.x; }
363  inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
364  inline float getBlueF4(float4 p) { return p.x; }
365  inline void setBlueF4(float4* p, float value) { (*p).x = value; }
366 
367  inline CLQuantum getGreen(CLPixelType p) { return p.y; }
368  inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
369  inline float getGreenF4(float4 p) { return p.y; }
370  inline void setGreenF4(float4* p, float value) { (*p).y = value; }
371 
372  inline CLQuantum getRed(CLPixelType p) { return p.z; }
373  inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
374  inline float getRedF4(float4 p) { return p.z; }
375  inline void setRedF4(float4* p, float value) { (*p).z = value; }
376 
377  inline CLQuantum getAlpha(CLPixelType p) { return p.w; }
378  inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
379  inline float getAlphaF4(float4 p) { return p.w; }
380  inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
381 
382  inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
383  const ChannelType channel, float *red, float *green, float *blue, float *alpha)
384  {
385  if ((channel & RedChannel) != 0)
386  *red=getPixelRed(p);
387 
388  if (number_channels > 2)
389  {
390  if ((channel & GreenChannel) != 0)
391  *green=getPixelGreen(p);
392 
393  if ((channel & BlueChannel) != 0)
394  *blue=getPixelBlue(p);
395  }
396 
397  if (((number_channels == 4) || (number_channels == 2)) &&
398  ((channel & AlphaChannel) != 0))
399  *alpha=getPixelAlpha(p,number_channels);
400  }
401 
402  inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels,
403  const unsigned int columns, const unsigned int x, const unsigned int y)
404  {
405  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
406 
407  float4 pixel;
408 
409  pixel.x=getPixelRed(p);
410 
411  if (number_channels > 2)
412  {
413  pixel.y=getPixelGreen(p);
414  pixel.z=getPixelBlue(p);
415  }
416 
417  if ((number_channels == 4) || (number_channels == 2))
418  pixel.w=getPixelAlpha(p,number_channels);
419  return(pixel);
420  }
421 
422  inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
423  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
424  {
425  const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
426 
427  float red = 0.0f;
428  float green = 0.0f;
429  float blue = 0.0f;
430  float alpha = 0.0f;
431 
432  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
433  return (float4)(red, green, blue, alpha);
434  }
435 
436  inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels,
437  const ChannelType channel, float red, float green, float blue, float alpha)
438  {
439  if ((channel & RedChannel) != 0)
440  setPixelRed(p,ClampToQuantum(red));
441 
442  if (number_channels > 2)
443  {
444  if ((channel & GreenChannel) != 0)
445  setPixelGreen(p,ClampToQuantum(green));
446 
447  if ((channel & BlueChannel) != 0)
448  setPixelBlue(p,ClampToQuantum(blue));
449  }
450 
451  if (((number_channels == 4) || (number_channels == 2)) &&
452  ((channel & AlphaChannel) != 0))
453  setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
454  }
455 
456  inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels,
457  const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel)
458  {
459  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
460 
461  setPixelRed(p,ClampToQuantum(pixel.x));
462 
463  if (number_channels > 2)
464  {
465  setPixelGreen(p,ClampToQuantum(pixel.y));
466  setPixelBlue(p,ClampToQuantum(pixel.z));
467  }
468 
469  if ((number_channels == 4) || (number_channels == 2))
470  setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w));
471  }
472 
473  inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
474  const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
475  float4 pixel)
476  {
477  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
478  WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
479  }
480 
481  inline float GetPixelIntensity(const unsigned int colorspace,
482  const unsigned int method,float red,float green,float blue)
483  {
484  float intensity;
485 
486  if (colorspace == GRAYColorspace)
487  return red;
488 
489  switch (method)
490  {
492  {
493  intensity=(red+green+blue)/3.0;
494  break;
495  }
497  {
498  intensity=MagickMax(MagickMax(red,green),blue);
499  break;
500  }
502  {
503  intensity=(MagickMin(MagickMin(red,green),blue)+
504  MagickMax(MagickMax(red,green),blue))/2.0;
505  break;
506  }
508  {
509  intensity=(float) (((float) red*red+green*green+blue*blue)/
510  (3.0*QuantumRange));
511  break;
512  }
514  {
515  /*
516  if (image->colorspace == RGBColorspace)
517  {
518  red=EncodePixelGamma(red);
519  green=EncodePixelGamma(green);
520  blue=EncodePixelGamma(blue);
521  }
522  */
523  intensity=0.298839*red+0.586811*green+0.114350*blue;
524  break;
525  }
527  {
528  /*
529  if (image->colorspace == sRGBColorspace)
530  {
531  red=DecodePixelGamma(red);
532  green=DecodePixelGamma(green);
533  blue=DecodePixelGamma(blue);
534  }
535  */
536  intensity=0.298839*red+0.586811*green+0.114350*blue;
537  break;
538  }
540  default:
541  {
542  /*
543  if (image->colorspace == RGBColorspace)
544  {
545  red=EncodePixelGamma(red);
546  green=EncodePixelGamma(green);
547  blue=EncodePixelGamma(blue);
548  }
549  */
550  intensity=0.212656*red+0.715158*green+0.072186*blue;
551  break;
552  }
554  {
555  /*
556  if (image->colorspace == sRGBColorspace)
557  {
558  red=DecodePixelGamma(red);
559  green=DecodePixelGamma(green);
560  blue=DecodePixelGamma(blue);
561  }
562  */
563  intensity=0.212656*red+0.715158*green+0.072186*blue;
564  break;
565  }
567  {
568  intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
569  sqrt(3.0));
570  break;
571  }
572  }
573 
574  return intensity;
575  }
576 
577  inline int mirrorBottom(int value)
578  {
579  return (value < 0) ? - (value) : value;
580  }
581 
582  inline int mirrorTop(int value, int width)
583  {
584  return (value >= width) ? (2 * width - value - 1) : value;
585  }
586  )
587 
588 /*
589 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
590 % %
591 % %
592 % %
593 % A d d N o i s e %
594 % %
595 % %
596 % %
597 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
598 */
599 
600  STRINGIFY(
601  /*
602  Part of MWC64X by David Thomas, dt10@imperial.ac.uk
603  This is provided under BSD, full license is with the main package.
604  See http://www.doc.ic.ac.uk/~dt10/research
605  */
606 
607  // Pre: a<M, b<M
608  // Post: r=(a+b) mod M
609  ulong MWC_AddMod64(ulong a, ulong b, ulong M)
610  {
611  ulong v=a+b;
612  //if( (v>=M) || (v<a) )
613  if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
614  v=v-M;
615  return v;
616  }
617 
618  // Pre: a<M,b<M
619  // Post: r=(a*b) mod M
620  // This could be done more efficently, but it is portable, and should
621  // be easy to understand. It can be replaced with any of the better
622  // modular multiplication algorithms (for example if you know you have
623  // double precision available or something).
624  ulong MWC_MulMod64(ulong a, ulong b, ulong M)
625  {
626  ulong r=0;
627  while(a!=0){
628  if(a&1)
629  r=MWC_AddMod64(r,b,M);
630  b=MWC_AddMod64(b,b,M);
631  a=a>>1;
632  }
633  return r;
634  }
635 
636  // Pre: a<M, e>=0
637  // Post: r=(a^b) mod M
638  // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
639  // most architectures
640  ulong MWC_PowMod64(ulong a, ulong e, ulong M)
641  {
642  ulong sqr=a, acc=1;
643  while(e!=0){
644  if(e&1)
645  acc=MWC_MulMod64(acc,sqr,M);
646  sqr=MWC_MulMod64(sqr,sqr,M);
647  e=e>>1;
648  }
649  return acc;
650  }
651 
652  uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
653  {
654  ulong m=MWC_PowMod64(A, distance, M);
655  ulong x=curr.x*(ulong)A+curr.y;
656  x=MWC_MulMod64(x, m, M);
657  return (uint2)((uint)(x/A), (uint)(x%A));
658  }
659 
660  uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
661  {
662  // This is an arbitrary constant for starting LCG jumping from. I didn't
663  // want to start from 1, as then you end up with the two or three first values
664  // being a bit poor in ones - once you've decided that, one constant is as
665  // good as any another. There is no deep mathematical reason for it, I just
666  // generated a random number.
667  enum{ MWC_BASEID = 4077358422479273989UL };
668 
669  ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
670  ulong m=MWC_PowMod64(A, dist, M);
671 
672  ulong x=MWC_MulMod64(MWC_BASEID, m, M);
673  return (uint2)((uint)(x/A), (uint)(x%A));
674  }
675 
677  typedef struct{ uint x; uint c; } mwc64x_state_t;
678 
679  enum{ MWC64X_A = 4294883355U };
680  enum{ MWC64X_M = 18446383549859758079UL };
681 
682  void MWC64X_Step(mwc64x_state_t *s)
683  {
684  uint X=s->x, C=s->c;
685 
686  uint Xn=MWC64X_A*X+C;
687  uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
688  uint Cn=mad_hi(MWC64X_A,X,carry);
689 
690  s->x=Xn;
691  s->c=Cn;
692  }
693 
694  void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
695  {
696  uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
697  s->x=tmp.x;
698  s->c=tmp.y;
699  }
700 
701  void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
702  {
703  uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
704  s->x=tmp.x;
705  s->c=tmp.y;
706  }
707 
709  uint MWC64X_NextUint(mwc64x_state_t *s)
710  {
711  uint res=s->x ^ s->c;
712  MWC64X_Step(s);
713  return res;
714  }
715 
716  //
717  // End of MWC64X excerpt
718  //
719 
720  float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
721  {
722  return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
723  }
724 
725  float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate)
726  {
727  float
728  alpha,
729  beta,
730  noise,
731  sigma;
732 
733  noise = 0.0f;
734  alpha=mwcReadPseudoRandomValue(r);
735  switch(noise_type)
736  {
737  case UniformNoise:
738  default:
739  {
740  noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
741  break;
742  }
743  case GaussianNoise:
744  {
745  float
746  gamma,
747  tau;
748 
749  if (alpha == 0.0f)
750  alpha=1.0f;
751  beta=mwcReadPseudoRandomValue(r);
752  gamma=sqrt(-2.0f*log(alpha));
753  sigma=gamma*cospi((2.0f*beta));
754  tau=gamma*sinpi((2.0f*beta));
755  noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau;
756  break;
757  }
758  case ImpulseNoise:
759  {
760  if (alpha < (SigmaImpulse/2.0f))
761  noise=0.0f;
762  else
763  if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
764  noise=QuantumRange;
765  else
766  noise=pixel;
767  break;
768  }
769  case LaplacianNoise:
770  {
771  if (alpha <= 0.5f)
772  {
773  if (alpha <= MagickEpsilon)
774  noise=(pixel-QuantumRange);
775  else
776  noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
777  0.5f);
778  break;
779  }
780  beta=1.0f-alpha;
781  if (beta <= (0.5f*MagickEpsilon))
782  noise=(pixel+QuantumRange);
783  else
784  noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
785  break;
786  }
788  {
789  sigma=1.0f;
790  if (alpha > MagickEpsilon)
791  sigma=sqrt(-2.0f*log(alpha));
792  beta=mwcReadPseudoRandomValue(r);
793  noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma*
794  cospi((2.0f*beta))/2.0f);
795  break;
796  }
797  case PoissonNoise:
798  {
799  float
800  poisson;
801  unsigned int i;
802  poisson=exp(-SigmaPoisson*QuantumScale*pixel);
803  for (i=0; alpha > poisson; i++)
804  {
805  beta=mwcReadPseudoRandomValue(r);
806  alpha*=beta;
807  }
808  noise=(QuantumRange*i/SigmaPoisson);
809  break;
810  }
811  case RandomNoise:
812  {
813  noise=(QuantumRange*SigmaRandom*alpha);
814  break;
815  }
816  }
817  return noise;
818  }
819 
820  __kernel
821  void AddNoise(const __global CLQuantum *image,
822  const unsigned int number_channels,const ChannelType channel,
823  const unsigned int length,const unsigned int pixelsPerWorkItem,
824  const NoiseType noise_type,const float attenuate,const unsigned int seed0,
825  const unsigned int seed1,const unsigned int numRandomNumbersPerPixel,
826  __global CLQuantum *filteredImage)
827  {
828  mwc64x_state_t rng;
829  rng.x = seed0;
830  rng.c = seed1;
831 
832  uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
833  uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
834  MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
835 
836  uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
837  uint count = pixelsPerWorkItem;
838 
839  while (count > 0 && pos < length)
840  {
841  const __global CLQuantum *p = image + pos;
842  __global CLQuantum *q = filteredImage + pos;
843 
844  float red;
845  float green;
846  float blue;
847  float alpha;
848 
849  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
850 
851  if ((channel & RedChannel) != 0)
852  red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
853 
854  if (number_channels > 2)
855  {
856  if ((channel & GreenChannel) != 0)
857  green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
858 
859  if ((channel & BlueChannel) != 0)
860  blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
861  }
862 
863  if (((number_channels == 4) || (number_channels == 2)) &&
864  ((channel & AlphaChannel) != 0))
865  alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
866 
867  WriteChannels(q, number_channels, channel, red, green, blue, alpha);
868 
869  pos += (get_local_size(0) * number_channels);
870  count--;
871  }
872  }
873  )
874 
875 /*
876 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
877 % %
878 % %
879 % %
880 % B l u r %
881 % %
882 % %
883 % %
884 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
885 */
886 
887  STRINGIFY(
888  /*
889  Reduce image noise and reduce detail levels by row
890  */
891  __kernel void BlurRow(const __global CLQuantum *image,
892  const unsigned int number_channels,const ChannelType channel,
893  __constant float *filter,const unsigned int width,
894  const unsigned int imageColumns,const unsigned int imageRows,
895  __local float4 *temp,__global float4 *tempImage)
896  {
897  const int x = get_global_id(0);
898  const int y = get_global_id(1);
899 
900  const int columns = imageColumns;
901 
902  const unsigned int radius = (width-1)/2;
903  const int wsize = get_local_size(0);
904  const unsigned int loadSize = wsize+width;
905 
906  //group coordinate
907  const int groupX=get_local_size(0)*get_group_id(0);
908 
909  //parallel load and clamp
910  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
911  {
912  int cx = ClampToCanvas(i + groupX - radius, columns);
913  temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
914  }
915 
916  // barrier
917  barrier(CLK_LOCAL_MEM_FENCE);
918 
919  // only do the work if this is not a patched item
920  if (get_global_id(0) < columns)
921  {
922  // compute
923  float4 result = (float4) 0;
924 
925  int i = 0;
926 
927  for ( ; i+7 < width; )
928  {
929  for (int j=0; j < 8; j++)
930  result+=filter[i+j]*temp[i+j+get_local_id(0)];
931  i+=8;
932  }
933 
934  for ( ; i < width; i++)
935  result+=filter[i]*temp[i+get_local_id(0)];
936 
937  // write back to global
938  tempImage[y*columns+x] = result;
939  }
940  }
941  )
942 
943  STRINGIFY(
944  /*
945  Reduce image noise and reduce detail levels by line
946  */
947  __kernel void BlurColumn(const __global float4 *blurRowData,
948  const unsigned int number_channels,const ChannelType channel,
949  __constant float *filter,const unsigned int width,
950  const unsigned int imageColumns,const unsigned int imageRows,
951  __local float4 *temp,__global CLQuantum *filteredImage)
952  {
953  const int x = get_global_id(0);
954  const int y = get_global_id(1);
955 
956  const int columns = imageColumns;
957  const int rows = imageRows;
958 
959  unsigned int radius = (width-1)/2;
960  const int wsize = get_local_size(1);
961  const unsigned int loadSize = wsize+width;
962 
963  //group coordinate
964  const int groupX=get_local_size(0)*get_group_id(0);
965  const int groupY=get_local_size(1)*get_group_id(1);
966  //notice that get_local_size(0) is 1, so
967  //groupX=get_group_id(0);
968 
969  //parallel load and clamp
970  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
971  temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
972 
973  // barrier
974  barrier(CLK_LOCAL_MEM_FENCE);
975 
976  // only do the work if this is not a patched item
977  if (get_global_id(1) < rows)
978  {
979  // compute
980  float4 result = (float4) 0;
981 
982  int i = 0;
983 
984  for ( ; i+7 < width; )
985  {
986  for (int j=0; j < 8; j++)
987  result+=filter[i+j]*temp[i+j+get_local_id(1)];
988  i+=8;
989  }
990 
991  for ( ; i < width; i++)
992  result+=filter[i]*temp[i+get_local_id(1)];
993 
994  // write back to global
995  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
996  }
997  }
998  )
999 
1000 /*
1001 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1002 % %
1003 % %
1004 % %
1005 % C o n t r a s t %
1006 % %
1007 % %
1008 % %
1009 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1010 */
1011 
1012  STRINGIFY(
1013 
1014  inline float4 ConvertRGBToHSB(const float4 pixel)
1015  {
1016  float4 result=0.0f;
1017  result.w=pixel.w;
1018  float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z);
1019  if (tmax != 0.0f)
1020  {
1021  float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z);
1022  float delta=tmax-tmin;
1023 
1024  result.y=delta/tmax;
1025  result.z=QuantumScale*tmax;
1026  if (delta != 0.0f)
1027  {
1028  result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
1029  result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
1030  (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
1031  result.x/=6.0f;
1032  result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
1033  }
1034  }
1035  return(result);
1036  }
1037 
1038  inline float4 ConvertHSBToRGB(const float4 pixel)
1039  {
1040  float hue=pixel.x;
1041  float saturation=pixel.y;
1042  float brightness=pixel.z;
1043 
1044  float4 result=pixel;
1045 
1046  if (saturation == 0.0f)
1047  {
1048  result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness);
1049  }
1050  else
1051  {
1052  float h=6.0f*(hue-floor(hue));
1053  float f=h-floor(h);
1054  float p=brightness*(1.0f-saturation);
1055  float q=brightness*(1.0f-saturation*f);
1056  float t=brightness*(1.0f-(saturation*(1.0f-f)));
1057  int ih = (int)h;
1058 
1059  if (ih == 1)
1060  {
1061  result.x=ClampToQuantum(QuantumRange*q);
1062  result.y=ClampToQuantum(QuantumRange*brightness);
1063  result.z=ClampToQuantum(QuantumRange*p);
1064  }
1065  else if (ih == 2)
1066  {
1067  result.x=ClampToQuantum(QuantumRange*p);
1068  result.y=ClampToQuantum(QuantumRange*brightness);
1069  result.z=ClampToQuantum(QuantumRange*t);
1070  }
1071  else if (ih == 3)
1072  {
1073  result.x=ClampToQuantum(QuantumRange*p);
1074  result.y=ClampToQuantum(QuantumRange*q);
1075  result.z=ClampToQuantum(QuantumRange*brightness);
1076  }
1077  else if (ih == 4)
1078  {
1079  result.x=ClampToQuantum(QuantumRange*t);
1080  result.y=ClampToQuantum(QuantumRange*p);
1081  result.z=ClampToQuantum(QuantumRange*brightness);
1082  }
1083  else if (ih == 5)
1084  {
1085  result.x=ClampToQuantum(QuantumRange*brightness);
1086  result.y=ClampToQuantum(QuantumRange*p);
1087  result.z=ClampToQuantum(QuantumRange*q);
1088  }
1089  else
1090  {
1091  result.x=ClampToQuantum(QuantumRange*brightness);
1092  result.y=ClampToQuantum(QuantumRange*t);
1093  result.z=ClampToQuantum(QuantumRange*p);
1094  }
1095  }
1096  return(result);
1097  }
1098 
1099  __kernel void Contrast(__global CLQuantum *image,
1100  const unsigned int number_channels,const int sign)
1101  {
1102  const int x=get_global_id(0);
1103  const int y=get_global_id(1);
1104  const unsigned int columns=get_global_size(0);
1105 
1106  float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
1107  if (number_channels < 3)
1108  pixel.y=pixel.z=pixel.x;
1109 
1110  pixel=ConvertRGBToHSB(pixel);
1111  float brightness=pixel.z;
1112  brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1113  brightness=clamp(brightness,0.0f,1.0f);
1114  pixel.z=brightness;
1115  pixel=ConvertHSBToRGB(pixel);
1116 
1117  WriteAllChannels(image,number_channels,columns,x,y,pixel);
1118  }
1119  )
1120 
1121 /*
1122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1123 % %
1124 % %
1125 % %
1126 % C o n t r a s t S t r e t c h %
1127 % %
1128 % %
1129 % %
1130 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1131 */
1132 
1133  STRINGIFY(
1134  /*
1135  */
1136  __kernel void Histogram(__global CLPixelType * restrict im,
1137  const ChannelType channel,
1138  const unsigned int colorspace,
1139  const unsigned int method,
1140  __global uint4 * restrict histogram)
1141  {
1142  const int x = get_global_id(0);
1143  const int y = get_global_id(1);
1144  const int columns = get_global_size(0);
1145  const int c = x + y * columns;
1146  if ((channel & SyncChannels) != 0)
1147  {
1148  float red=(float)getRed(im[c]);
1149  float green=(float)getGreen(im[c]);
1150  float blue=(float)getBlue(im[c]);
1151 
1152  float intensity = GetPixelIntensity(colorspace, method, red, green, blue);
1153  uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1154  atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
1155  }
1156  else
1157  {
1158  // for equalizing, we always need all channels?
1159  // otherwise something more
1160  }
1161  }
1162  )
1163 
1164  STRINGIFY(
1165  /*
1166  */
1167  __kernel void ContrastStretch(__global CLPixelType * restrict im,
1168  const ChannelType channel,
1169  __global CLPixelType * restrict stretch_map,
1170  const float4 white, const float4 black)
1171  {
1172  const int x = get_global_id(0);
1173  const int y = get_global_id(1);
1174  const int columns = get_global_size(0);
1175  const int c = x + y * columns;
1176 
1177  uint ePos;
1178  CLPixelType oValue, eValue;
1179  CLQuantum red, green, blue, alpha;
1180 
1181  //read from global
1182  oValue=im[c];
1183 
1184  if ((channel & RedChannel) != 0)
1185  {
1186  if (getRedF4(white) != getRedF4(black))
1187  {
1188  ePos = ScaleQuantumToMap(getRed(oValue));
1189  eValue = stretch_map[ePos];
1190  red = getRed(eValue);
1191  }
1192  }
1193 
1194  if ((channel & GreenChannel) != 0)
1195  {
1196  if (getGreenF4(white) != getGreenF4(black))
1197  {
1198  ePos = ScaleQuantumToMap(getGreen(oValue));
1199  eValue = stretch_map[ePos];
1200  green = getGreen(eValue);
1201  }
1202  }
1203 
1204  if ((channel & BlueChannel) != 0)
1205  {
1206  if (getBlueF4(white) != getBlueF4(black))
1207  {
1208  ePos = ScaleQuantumToMap(getBlue(oValue));
1209  eValue = stretch_map[ePos];
1210  blue = getBlue(eValue);
1211  }
1212  }
1213 
1214  if ((channel & AlphaChannel) != 0)
1215  {
1216  if (getAlphaF4(white) != getAlphaF4(black))
1217  {
1218  ePos = ScaleQuantumToMap(getAlpha(oValue));
1219  eValue = stretch_map[ePos];
1220  alpha = getAlpha(eValue);
1221  }
1222  }
1223 
1224  //write back
1225  im[c]=(CLPixelType)(blue, green, red, alpha);
1226 
1227  }
1228  )
1229 
1230 /*
1231 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1232 % %
1233 % %
1234 % %
1235 % C o n v o l v e %
1236 % %
1237 % %
1238 % %
1239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1240 */
1241 
1242  STRINGIFY(
1243  __kernel
1244  void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
1245  const unsigned int imageWidth, const unsigned int imageHeight,
1246  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1247  const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
1248 
1249  int2 blockID;
1250  blockID.x = get_global_id(0) / get_local_size(0);
1251  blockID.y = get_global_id(1) / get_local_size(1);
1252 
1253  // image area processed by this workgroup
1254  int2 imageAreaOrg;
1255  imageAreaOrg.x = blockID.x * get_local_size(0);
1256  imageAreaOrg.y = blockID.y * get_local_size(1);
1257 
1258  int2 midFilterDimen;
1259  midFilterDimen.x = (filterWidth-1)/2;
1260  midFilterDimen.y = (filterHeight-1)/2;
1261 
1262  int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1263 
1264  // dimension of the local cache
1265  int2 cachedAreaDimen;
1266  cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1267  cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1268 
1269  // cache the pixels accessed by this workgroup in local memory
1270  int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1271  int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1272  int groupSize = get_local_size(0) * get_local_size(1);
1273  for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1274 
1275  int2 cachedAreaIndex;
1276  cachedAreaIndex.x = i % cachedAreaDimen.x;
1277  cachedAreaIndex.y = i / cachedAreaDimen.x;
1278 
1279  int2 imagePixelIndex;
1280  imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1281 
1282  // only support EdgeVirtualPixelMethod through ClampToCanvas
1283  // TODO: implement other virtual pixel method
1284  imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1285  imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1286 
1287  pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1288  }
1289 
1290  // cache the filter
1291  for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1292  filterCache[i] = filter[i];
1293  }
1294  barrier(CLK_LOCAL_MEM_FENCE);
1295 
1296 
1297  int2 imageIndex;
1298  imageIndex.x = imageAreaOrg.x + get_local_id(0);
1299  imageIndex.y = imageAreaOrg.y + get_local_id(1);
1300 
1301  // if out-of-range, stops here and quit
1302  if (imageIndex.x >= imageWidth
1303  || imageIndex.y >= imageHeight) {
1304  return;
1305  }
1306 
1307  int filterIndex = 0;
1308  float4 sum = (float4)0.0f;
1309  float gamma = 0.0f;
1310  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1311  int cacheIndexY = get_local_id(1);
1312  for (int j = 0; j < filterHeight; j++) {
1313  int cacheIndexX = get_local_id(0);
1314  for (int i = 0; i < filterWidth; i++) {
1315  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1316  float f = filterCache[filterIndex];
1317 
1318  sum.x += f * p.x;
1319  sum.y += f * p.y;
1320  sum.z += f * p.z;
1321  sum.w += f * p.w;
1322 
1323  gamma += f;
1324  filterIndex++;
1325  cacheIndexX++;
1326  }
1327  cacheIndexY++;
1328  }
1329  }
1330  else {
1331  int cacheIndexY = get_local_id(1);
1332  for (int j = 0; j < filterHeight; j++) {
1333  int cacheIndexX = get_local_id(0);
1334  for (int i = 0; i < filterWidth; i++) {
1335 
1336  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1337  float alpha = QuantumScale*p.w;
1338  float f = filterCache[filterIndex];
1339  float g = alpha * f;
1340 
1341  sum.x += g*p.x;
1342  sum.y += g*p.y;
1343  sum.z += g*p.z;
1344  sum.w += f*p.w;
1345 
1346  gamma += g;
1347  filterIndex++;
1348  cacheIndexX++;
1349  }
1350  cacheIndexY++;
1351  }
1352  gamma = PerceptibleReciprocal(gamma);
1353  sum.xyz = gamma*sum.xyz;
1354  }
1355  CLPixelType outputPixel;
1356  outputPixel.x = ClampToQuantum(sum.x);
1357  outputPixel.y = ClampToQuantum(sum.y);
1358  outputPixel.z = ClampToQuantum(sum.z);
1359  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1360 
1361  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1362  }
1363  )
1364 
1365  STRINGIFY(
1366  __kernel
1367  void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
1368  const uint imageWidth, const uint imageHeight,
1369  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1370  const uint matte, const ChannelType channel) {
1371 
1372  int2 imageIndex;
1373  imageIndex.x = get_global_id(0);
1374  imageIndex.y = get_global_id(1);
1375 
1376  /*
1377  unsigned int imageWidth = get_global_size(0);
1378  unsigned int imageHeight = get_global_size(1);
1379  */
1380  if (imageIndex.x >= imageWidth
1381  || imageIndex.y >= imageHeight)
1382  return;
1383 
1384  int2 midFilterDimen;
1385  midFilterDimen.x = (filterWidth-1)/2;
1386  midFilterDimen.y = (filterHeight-1)/2;
1387 
1388  int filterIndex = 0;
1389  float4 sum = (float4)0.0f;
1390  float gamma = 0.0f;
1391  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1392  for (int j = 0; j < filterHeight; j++) {
1393  int2 inputPixelIndex;
1394  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1395  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1396  for (int i = 0; i < filterWidth; i++) {
1397  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1398  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1399 
1400  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1401  float f = filter[filterIndex];
1402 
1403  sum.x += f * p.x;
1404  sum.y += f * p.y;
1405  sum.z += f * p.z;
1406  sum.w += f * p.w;
1407 
1408  gamma += f;
1409 
1410  filterIndex++;
1411  }
1412  }
1413  }
1414  else {
1415 
1416  for (int j = 0; j < filterHeight; j++) {
1417  int2 inputPixelIndex;
1418  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1419  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1420  for (int i = 0; i < filterWidth; i++) {
1421  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1422  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1423 
1424  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1425  float alpha = QuantumScale*p.w;
1426  float f = filter[filterIndex];
1427  float g = alpha * f;
1428 
1429  sum.x += g*p.x;
1430  sum.y += g*p.y;
1431  sum.z += g*p.z;
1432  sum.w += f*p.w;
1433 
1434  gamma += g;
1435 
1436 
1437  filterIndex++;
1438  }
1439  }
1440  gamma = PerceptibleReciprocal(gamma);
1441  sum.xyz = gamma*sum.xyz;
1442  }
1443 
1444  CLPixelType outputPixel;
1445  outputPixel.x = ClampToQuantum(sum.x);
1446  outputPixel.y = ClampToQuantum(sum.y);
1447  outputPixel.z = ClampToQuantum(sum.z);
1448  outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1449 
1450  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1451  }
1452  )
1453 
1454 /*
1455 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1456 % %
1457 % %
1458 % %
1459 % D e s p e c k l e %
1460 % %
1461 % %
1462 % %
1463 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1464 */
1465 
1466  STRINGIFY(
1467 
1468  __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1469  , const unsigned int imageWidth, const unsigned int imageHeight
1470  , const int2 offset, const int polarity, const int matte) {
1471 
1472  int x = get_global_id(0);
1473  int y = get_global_id(1);
1474 
1475  CLPixelType v = inputImage[y*imageWidth+x];
1476 
1477  int2 neighbor;
1478  neighbor.y = y + offset.y;
1479  neighbor.x = x + offset.x;
1480 
1481  int2 clampedNeighbor;
1482  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1483  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1484 
1485  CLPixelType r = (clampedNeighbor.x == neighbor.x
1486  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1487  :(CLPixelType)0;
1488 
1489  int sv[4];
1490  sv[0] = (int)v.x;
1491  sv[1] = (int)v.y;
1492  sv[2] = (int)v.z;
1493  sv[3] = (int)v.w;
1494 
1495  int sr[4];
1496  sr[0] = (int)r.x;
1497  sr[1] = (int)r.y;
1498  sr[2] = (int)r.z;
1499  sr[3] = (int)r.w;
1500 
1501  if (polarity > 0) {
1502  \n #pragma unroll 4\n
1503  for (unsigned int i = 0; i < 4; i++) {
1504  sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1505  }
1506  }
1507  else {
1508  \n #pragma unroll 4\n
1509  for (unsigned int i = 0; i < 4; i++) {
1510  sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1511  }
1512 
1513  }
1514 
1515  v.x = (CLQuantum)sv[0];
1516  v.y = (CLQuantum)sv[1];
1517  v.z = (CLQuantum)sv[2];
1518 
1519  if (matte!=0)
1520  v.w = (CLQuantum)sv[3];
1521 
1522  outputImage[y*imageWidth+x] = v;
1523 
1524  }
1525 
1526 
1527  )
1528 
1529  STRINGIFY(
1530 
1531  __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1532  , const unsigned int imageWidth, const unsigned int imageHeight
1533  , const int2 offset, const int polarity, const int matte) {
1534 
1535  int x = get_global_id(0);
1536  int y = get_global_id(1);
1537 
1538  CLPixelType v = inputImage[y*imageWidth+x];
1539 
1540  int2 neighbor, clampedNeighbor;
1541 
1542  neighbor.y = y + offset.y;
1543  neighbor.x = x + offset.x;
1544  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1545  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1546 
1547  CLPixelType r = (clampedNeighbor.x == neighbor.x
1548  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1549  :(CLPixelType)0;
1550 
1551 
1552  neighbor.y = y - offset.y;
1553  neighbor.x = x - offset.x;
1554  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1555  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1556 
1557  CLPixelType s = (clampedNeighbor.x == neighbor.x
1558  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1559  :(CLPixelType)0;
1560 
1561 
1562  int sv[4];
1563  sv[0] = (int)v.x;
1564  sv[1] = (int)v.y;
1565  sv[2] = (int)v.z;
1566  sv[3] = (int)v.w;
1567 
1568  int sr[4];
1569  sr[0] = (int)r.x;
1570  sr[1] = (int)r.y;
1571  sr[2] = (int)r.z;
1572  sr[3] = (int)r.w;
1573 
1574  int ss[4];
1575  ss[0] = (int)s.x;
1576  ss[1] = (int)s.y;
1577  ss[2] = (int)s.z;
1578  ss[3] = (int)s.w;
1579 
1580  if (polarity > 0) {
1581  \n #pragma unroll 4\n
1582  for (unsigned int i = 0; i < 4; i++) {
1583  //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1584  //
1585  //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1586  //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1587  sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1588  }
1589  }
1590  else {
1591  \n #pragma unroll 4\n
1592  for (unsigned int i = 0; i < 4; i++) {
1593  //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1594  //
1595  //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1596  sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1597  }
1598  }
1599 
1600  v.x = (CLQuantum)sv[0];
1601  v.y = (CLQuantum)sv[1];
1602  v.z = (CLQuantum)sv[2];
1603 
1604  if (matte!=0)
1605  v.w = (CLQuantum)sv[3];
1606 
1607  outputImage[y*imageWidth+x] = v;
1608 
1609  }
1610  )
1611 
1612 /*
1613 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1614 % %
1615 % %
1616 % %
1617 % E q u a l i z e %
1618 % %
1619 % %
1620 % %
1621 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1622 */
1623 
1624  STRINGIFY(
1625  /*
1626  */
1627  __kernel void Equalize(__global CLPixelType * restrict im,
1628  const ChannelType channel,
1629  __global CLPixelType * restrict equalize_map,
1630  const float4 white, const float4 black)
1631  {
1632  const int x = get_global_id(0);
1633  const int y = get_global_id(1);
1634  const int columns = get_global_size(0);
1635  const int c = x + y * columns;
1636 
1637  uint ePos;
1638  CLPixelType oValue, eValue;
1639  CLQuantum red, green, blue, alpha;
1640 
1641  //read from global
1642  oValue=im[c];
1643 
1644  if ((channel & SyncChannels) != 0)
1645  {
1646  if (getRedF4(white) != getRedF4(black))
1647  {
1648  ePos = ScaleQuantumToMap(getRed(oValue));
1649  eValue = equalize_map[ePos];
1650  red = getRed(eValue);
1651  ePos = ScaleQuantumToMap(getGreen(oValue));
1652  eValue = equalize_map[ePos];
1653  green = getRed(eValue);
1654  ePos = ScaleQuantumToMap(getBlue(oValue));
1655  eValue = equalize_map[ePos];
1656  blue = getRed(eValue);
1657  ePos = ScaleQuantumToMap(getAlpha(oValue));
1658  eValue = equalize_map[ePos];
1659  alpha = getRed(eValue);
1660 
1661  //write back
1662  im[c]=(CLPixelType)(blue, green, red, alpha);
1663  }
1664 
1665  }
1666 
1667  // for equalizing, we always need all channels?
1668  // otherwise something more
1669 
1670  }
1671  )
1672 
1673 /*
1674 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1675 % %
1676 % %
1677 % %
1678 % F u n c t i o n %
1679 % %
1680 % %
1681 % %
1682 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1683 */
1684 
1685  STRINGIFY(
1686  /*
1687  apply FunctionImageChannel(braightness-contrast)
1688  */
1689  CLQuantum ApplyFunction(float pixel,const MagickFunction function,
1690  const unsigned int number_parameters,__constant float *parameters)
1691  {
1692  float result = 0.0f;
1693 
1694  switch (function)
1695  {
1696  case PolynomialFunction:
1697  {
1698  for (unsigned int i=0; i < number_parameters; i++)
1699  result = result*QuantumScale*pixel + parameters[i];
1700  result *= QuantumRange;
1701  break;
1702  }
1703  case SinusoidFunction:
1704  {
1705  float freq,phase,ampl,bias;
1706  freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1707  phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1708  ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1709  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1710  result = QuantumRange*(ampl*sin(2.0f*MagickPI*
1711  (freq*QuantumScale*pixel + phase/360.0f)) + bias);
1712  break;
1713  }
1714  case ArcsinFunction:
1715  {
1716  float width,range,center,bias;
1717  width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1718  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1719  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1720  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1721 
1722  result = 2.0f/width*(QuantumScale*pixel - center);
1723  result = range/MagickPI*asin(result)+bias;
1724  result = ( result <= -1.0f ) ? bias - range/2.0f : result;
1725  result = ( result >= 1.0f ) ? bias + range/2.0f : result;
1726  result *= QuantumRange;
1727  break;
1728  }
1729  case ArctanFunction:
1730  {
1731  float slope,range,center,bias;
1732  slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1733  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1734  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1735  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1736  result = MagickPI*slope*(QuantumScale*pixel-center);
1737  result = QuantumRange*(range/MagickPI*atan(result) + bias);
1738  break;
1739  }
1740  case UndefinedFunction:
1741  break;
1742  }
1743  return(ClampToQuantum(result));
1744  }
1745  )
1746 
1747  STRINGIFY(
1748  /*
1749  Improve brightness / contrast of the image
1750  channel : define which channel is improved
1751  function : the function called to enchance the brightness contrast
1752  number_parameters : numbers of parameters
1753  parameters : the parameter
1754  */
1755  __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
1756  const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
1757  __constant float *parameters)
1758  {
1759  const unsigned int x = get_global_id(0);
1760  const unsigned int y = get_global_id(1);
1761  const unsigned int columns = get_global_size(0);
1762  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1763 
1764  float red;
1765  float green;
1766  float blue;
1767  float alpha;
1768 
1769  ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
1770 
1771  if ((channel & RedChannel) != 0)
1772  red=ApplyFunction(red, function, number_parameters, parameters);
1773 
1774  if (number_channels > 2)
1775  {
1776  if ((channel & GreenChannel) != 0)
1777  green=ApplyFunction(green, function, number_parameters, parameters);
1778 
1779  if ((channel & BlueChannel) != 0)
1780  blue=ApplyFunction(blue, function, number_parameters, parameters);
1781  }
1782 
1783  if (((number_channels == 4) || (number_channels == 2)) &&
1784  ((channel & AlphaChannel) != 0))
1785  alpha=ApplyFunction(alpha, function, number_parameters, parameters);
1786 
1787  WriteChannels(p, number_channels, channel, red, green, blue, alpha);
1788  }
1789  )
1790 
1791 /*
1792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1793 % %
1794 % %
1795 % %
1796 % G r a y s c a l e %
1797 % %
1798 % %
1799 % %
1800 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1801 */
1802 
1803  STRINGIFY(
1804  __kernel void Grayscale(__global CLQuantum *image,const int number_channels,
1805  const unsigned int colorspace,const unsigned int method)
1806  {
1807  const unsigned int x = get_global_id(0);
1808  const unsigned int y = get_global_id(1);
1809  const unsigned int columns = get_global_size(0);
1810  __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1811 
1812  float
1813  blue,
1814  green,
1815  red;
1816 
1817  red=getPixelRed(p);
1818  green=getPixelGreen(p);
1819  blue=getPixelBlue(p);
1820 
1821  CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue));
1822 
1823  setPixelRed(p,intensity);
1824  setPixelGreen(p,intensity);
1825  setPixelBlue(p,intensity);
1826  }
1827  )
1828 
1829 /*
1830 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1831 % %
1832 % %
1833 % %
1834 % L o c a l C o n t r a s t %
1835 % %
1836 % %
1837 % %
1838 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1839 */
1840 
1841  STRINGIFY(
1842 
1843  __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
1844  const int radius,
1845  const int imageWidth,
1846  const int imageHeight)
1847  {
1848  const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
1849 
1850  int x = get_local_id(0);
1851  int y = get_global_id(1);
1852 
1853  if ((x >= imageWidth) || (y >= imageHeight))
1854  return;
1855 
1856  global CLPixelType *src = srcImage + y * imageWidth;
1857 
1858  for (int i = x; i < imageWidth; i += get_local_size(0)) {
1859  float sum = 0.0f;
1860  float weight = 1.0f;
1861 
1862  int j = i - radius;
1863  while ((j + 7) < i) {
1864  for (int k = 0; k < 8; ++k) // Unroll 8x
1865  sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
1866  weight += 8.0f;
1867  j+=8;
1868  }
1869  while (j < i) {
1870  sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
1871  weight += 1.0f;
1872  ++j;
1873  }
1874 
1875  while ((j + 7) < radius + i) {
1876  for (int k = 0; k < 8; ++k) // Unroll 8x
1877  sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
1878  weight -= 8.0f;
1879  j+=8;
1880  }
1881  while (j < radius + i) {
1882  sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
1883  weight -= 1.0f;
1884  ++j;
1885  }
1886 
1887  tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
1888  }
1889  }
1890  )
1891 
1892  STRINGIFY(
1893  __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
1894  const int radius,
1895  const float strength,
1896  const int imageWidth,
1897  const int imageHeight)
1898  {
1899  const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
1900 
1901  int x = get_global_id(0);
1902  int y = get_global_id(1);
1903 
1904  if ((x >= imageWidth) || (y >= imageHeight))
1905  return;
1906 
1907  global float *src = blurImage + x;
1908 
1909  float sum = 0.0f;
1910  float weight = 1.0f;
1911 
1912  int j = y - radius;
1913  while ((j + 7) < y) {
1914  for (int k = 0; k < 8; ++k) // Unroll 8x
1915  sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
1916  weight += 8.0f;
1917  j+=8;
1918  }
1919  while (j < y) {
1920  sum += weight * src[mirrorBottom(j) * imageWidth];
1921  weight += 1.0f;
1922  ++j;
1923  }
1924 
1925  while ((j + 7) < radius + y) {
1926  for (int k = 0; k < 8; ++k) // Unroll 8x
1927  sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
1928  weight -= 8.0f;
1929  j+=8;
1930  }
1931  while (j < radius + y) {
1932  sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
1933  weight -= 1.0f;
1934  ++j;
1935  }
1936 
1937  CLPixelType pixel = srcImage[x + y * imageWidth];
1938  float srcVal = dot(RGB, convert_float4(pixel));
1939  float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
1940  mult = (srcVal + mult) / srcVal;
1941 
1942  pixel.x = ClampToQuantum(pixel.x * mult);
1943  pixel.y = ClampToQuantum(pixel.y * mult);
1944  pixel.z = ClampToQuantum(pixel.z * mult);
1945 
1946  dstImage[x + y * imageWidth] = pixel;
1947  }
1948  )
1949 
1950 /*
1951 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1952 % %
1953 % %
1954 % %
1955 % M o d u l a t e %
1956 % %
1957 % %
1958 % %
1959 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1960 */
1961 
1962  STRINGIFY(
1963 
1964  inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1965  float *hue, float *saturation, float *lightness)
1966  {
1967  float
1968  c,
1969  tmax,
1970  tmin;
1971 
1972  /*
1973  Convert RGB to HSL colorspace.
1974  */
1975  tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
1976  tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
1977 
1978  c=tmax-tmin;
1979 
1980  *lightness=(tmax+tmin)/2.0;
1981  if (c <= 0.0)
1982  {
1983  *hue=0.0;
1984  *saturation=0.0;
1985  return;
1986  }
1987 
1988  if (tmax == (QuantumScale*red))
1989  {
1990  *hue=(QuantumScale*green-QuantumScale*blue)/c;
1991  if ((QuantumScale*green) < (QuantumScale*blue))
1992  *hue+=6.0;
1993  }
1994  else
1995  if (tmax == (QuantumScale*green))
1996  *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
1997  else
1998  *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
1999 
2000  *hue*=60.0/360.0;
2001  if (*lightness <= 0.5)
2002  *saturation=c/(2.0*(*lightness));
2003  else
2004  *saturation=c/(2.0-2.0*(*lightness));
2005  }
2006 
2007  inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
2008  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2009  {
2010  float
2011  b,
2012  c,
2013  g,
2014  h,
2015  tmin,
2016  r,
2017  x;
2018 
2019  /*
2020  Convert HSL to RGB colorspace.
2021  */
2022  h=hue*360.0;
2023  if (lightness <= 0.5)
2024  c=2.0*lightness*saturation;
2025  else
2026  c=(2.0-2.0*lightness)*saturation;
2027  tmin=lightness-0.5*c;
2028  h-=360.0*floor(h/360.0);
2029  h/=60.0;
2030  x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2031  switch ((int) floor(h) % 6)
2032  {
2033  case 0:
2034  {
2035  r=tmin+c;
2036  g=tmin+x;
2037  b=tmin;
2038  break;
2039  }
2040  case 1:
2041  {
2042  r=tmin+x;
2043  g=tmin+c;
2044  b=tmin;
2045  break;
2046  }
2047  case 2:
2048  {
2049  r=tmin;
2050  g=tmin+c;
2051  b=tmin+x;
2052  break;
2053  }
2054  case 3:
2055  {
2056  r=tmin;
2057  g=tmin+x;
2058  b=tmin+c;
2059  break;
2060  }
2061  case 4:
2062  {
2063  r=tmin+x;
2064  g=tmin;
2065  b=tmin+c;
2066  break;
2067  }
2068  case 5:
2069  {
2070  r=tmin+c;
2071  g=tmin;
2072  b=tmin+x;
2073  break;
2074  }
2075  default:
2076  {
2077  r=0.0;
2078  g=0.0;
2079  b=0.0;
2080  }
2081  }
2082  *red=ClampToQuantum(QuantumRange*r);
2083  *green=ClampToQuantum(QuantumRange*g);
2084  *blue=ClampToQuantum(QuantumRange*b);
2085  }
2086 
2087  inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
2088  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2089  {
2090  float
2091  hue,
2092  lightness,
2093  saturation;
2094 
2095  /*
2096  Increase or decrease color lightness, saturation, or hue.
2097  */
2098  ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2099  hue+=0.5*(0.01*percent_hue-1.0);
2100  while (hue < 0.0)
2101  hue+=1.0;
2102  while (hue >= 1.0)
2103  hue-=1.0;
2104  saturation*=0.01*percent_saturation;
2105  lightness*=0.01*percent_lightness;
2106  ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2107  }
2108 
2109  __kernel void Modulate(__global CLPixelType *im,
2110  const float percent_brightness,
2111  const float percent_hue,
2112  const float percent_saturation,
2113  const int colorspace)
2114  {
2115 
2116  const int x = get_global_id(0);
2117  const int y = get_global_id(1);
2118  const int columns = get_global_size(0);
2119  const int c = x + y * columns;
2120 
2121  CLPixelType pixel = im[c];
2122 
2123  CLQuantum
2124  blue,
2125  green,
2126  red;
2127 
2128  red=getRed(pixel);
2129  green=getGreen(pixel);
2130  blue=getBlue(pixel);
2131 
2132  switch (colorspace)
2133  {
2134  case HSLColorspace:
2135  default:
2136  {
2137  ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2138  &red, &green, &blue);
2139  }
2140 
2141  }
2142 
2143  CLPixelType filteredPixel;
2144 
2145  setRed(&filteredPixel, red);
2146  setGreen(&filteredPixel, green);
2147  setBlue(&filteredPixel, blue);
2148  filteredPixel.w = pixel.w;
2149 
2150  im[c] = filteredPixel;
2151  }
2152  )
2153 
2154 /*
2155 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2156 % %
2157 % %
2158 % %
2159 % M o t i o n B l u r %
2160 % %
2161 % %
2162 % %
2163 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2164 */
2165 
2166  STRINGIFY(
2167  __kernel
2168  void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2169  const unsigned int imageWidth, const unsigned int imageHeight,
2170  const __global float *filter, const unsigned int width, const __global int2* offset,
2171  const float4 bias,
2172  const ChannelType channel, const unsigned int matte) {
2173 
2174  int2 currentPixel;
2175  currentPixel.x = get_global_id(0);
2176  currentPixel.y = get_global_id(1);
2177 
2178  if (currentPixel.x >= imageWidth
2179  || currentPixel.y >= imageHeight)
2180  return;
2181 
2182  float4 pixel;
2183  pixel.x = (float)bias.x;
2184  pixel.y = (float)bias.y;
2185  pixel.z = (float)bias.z;
2186  pixel.w = (float)bias.w;
2187 
2188  if (((channel & AlphaChannel) == 0) || (matte == 0)) {
2189 
2190  for (int i = 0; i < width; i++) {
2191  // only support EdgeVirtualPixelMethod through ClampToCanvas
2192  // TODO: implement other virtual pixel method
2193  int2 samplePixel = currentPixel + offset[i];
2194  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2195  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2196  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2197 
2198  pixel.x += (filter[i] * (float)samplePixelValue.x);
2199  pixel.y += (filter[i] * (float)samplePixelValue.y);
2200  pixel.z += (filter[i] * (float)samplePixelValue.z);
2201  pixel.w += (filter[i] * (float)samplePixelValue.w);
2202  }
2203 
2204  CLPixelType outputPixel;
2205  outputPixel.x = ClampToQuantum(pixel.x);
2206  outputPixel.y = ClampToQuantum(pixel.y);
2207  outputPixel.z = ClampToQuantum(pixel.z);
2208  outputPixel.w = ClampToQuantum(pixel.w);
2209  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2210  }
2211  else {
2212 
2213  float gamma = 0.0f;
2214  for (int i = 0; i < width; i++) {
2215  // only support EdgeVirtualPixelMethod through ClampToCanvas
2216  // TODO: implement other virtual pixel method
2217  int2 samplePixel = currentPixel + offset[i];
2218  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2219  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2220 
2221  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2222 
2223  float alpha = QuantumScale*samplePixelValue.w;
2224  float k = filter[i];
2225  pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2226  pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2227  pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2228 
2229  pixel.w += k * alpha * samplePixelValue.w;
2230 
2231  gamma+=k*alpha;
2232  }
2233  gamma = PerceptibleReciprocal(gamma);
2234  pixel.xyz = gamma*pixel.xyz;
2235 
2236  CLPixelType outputPixel;
2237  outputPixel.x = ClampToQuantum(pixel.x);
2238  outputPixel.y = ClampToQuantum(pixel.y);
2239  outputPixel.z = ClampToQuantum(pixel.z);
2240  outputPixel.w = ClampToQuantum(pixel.w);
2241  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2242  }
2243  }
2244  )
2245 
2246 /*
2247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2248 % %
2249 % %
2250 % %
2251 % R e s i z e %
2252 % %
2253 % %
2254 % %
2255 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2256 */
2257 
2258  STRINGIFY(
2259  // Based on Box from resize.c
2260  float BoxResizeFilter(const float x)
2261  {
2262  return 1.0f;
2263  }
2264  )
2265 
2266  STRINGIFY(
2267  // Based on CubicBC from resize.c
2268  float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2269  {
2270  /*
2271  Cubic Filters using B,C determined values:
2272  Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2273  Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2274  Spline B = 1 C = 0 B-Spline Gaussian approximation
2275  Hermite B = 0 C = 0 B-Spline interpolator
2276 
2277  See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2278  Graphics Computer Graphics, Volume 22, Number 4, August 1988
2279  http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2280  Mitchell.pdf.
2281 
2282  Coefficents are determined from B,C values:
2283  P0 = ( 6 - 2*B )/6 = coeff[0]
2284  P1 = 0
2285  P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2286  P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2287  Q0 = ( 8*B +24*C )/6 = coeff[3]
2288  Q1 = ( -12*B -48*C )/6 = coeff[4]
2289  Q2 = ( 6*B +30*C )/6 = coeff[5]
2290  Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2291 
2292  which are used to define the filter:
2293 
2294  P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2295  Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2296 
2297  which ensures function is continuous in value and derivative (slope).
2298  */
2299  if (x < 1.0)
2300  return(resizeFilterCoefficients[0]+x*(x*
2301  (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2302  if (x < 2.0)
2303  return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2304  (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2305  return(0.0);
2306  }
2307  )
2308 
2309  STRINGIFY(
2310  float Sinc(const float x)
2311  {
2312  if (x != 0.0f)
2313  {
2314  const float alpha=(float) (MagickPI*x);
2315  return sinpi(x)/alpha;
2316  }
2317  return(1.0f);
2318  }
2319  )
2320 
2321  STRINGIFY(
2322  float Triangle(const float x)
2323  {
2324  /*
2325  1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2326  a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2327  for Sinc().
2328  */
2329  return ((x<1.0f)?(1.0f-x):0.0f);
2330  }
2331  )
2332 
2333 
2334  STRINGIFY(
2335  float Hann(const float x)
2336  {
2337  /*
2338  Cosine window function:
2339  0.5+0.5*cos(pi*x).
2340  */
2341  const float cosine=cos((MagickPI*x));
2342  return(0.5f+0.5f*cosine);
2343  }
2344  )
2345 
2346  STRINGIFY(
2347  float Hamming(const float x)
2348  {
2349  /*
2350  Offset cosine window function:
2351  .54 + .46 cos(pi x).
2352  */
2353  const float cosine=cos((MagickPI*x));
2354  return(0.54f+0.46f*cosine);
2355  }
2356  )
2357 
2358  STRINGIFY(
2359  float Blackman(const float x)
2360  {
2361  /*
2362  Blackman: 2nd order cosine windowing function:
2363  0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2364 
2365  Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2366  five flops.
2367  */
2368  const float cosine=cos((MagickPI*x));
2369  return(0.34f+cosine*(0.5f+cosine*0.16f));
2370  }
2371  )
2372 
2373  STRINGIFY(
2374  inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2375  {
2376  switch (filterType)
2377  {
2378  /* Call Sinc even for SincFast to get better precision on GPU
2379  and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2380  case SincWeightingFunction:
2382  return Sinc(x);
2384  return CubicBC(x,filterCoefficients);
2385  case BoxWeightingFunction:
2386  return BoxResizeFilter(x);
2388  return Triangle(x);
2389  case HannWeightingFunction:
2390  return Hann(x);
2392  return Hamming(x);
2394  return Blackman(x);
2395 
2396  default:
2397  return 0.0f;
2398  }
2399  }
2400  )
2401 
2402 
2403  STRINGIFY(
2404  inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2405  , const ResizeWeightingFunctionType resizeWindowType
2406  , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2407  {
2408  float scale;
2409  float xBlur = fabs(x/resizeFilterBlur);
2410  if (resizeWindowSupport < MagickEpsilon
2411  || resizeWindowType == BoxWeightingFunction)
2412  {
2413  scale = 1.0f;
2414  }
2415  else
2416  {
2417  scale = resizeFilterScale;
2418  scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2419  }
2420  float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2421  return weight;
2422  }
2423 
2424  )
2425 
2426  ;
2427  const char *accelerateKernels2 =
2428 
2429  STRINGIFY(
2430 
2431  inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2432  return (numWorkItems/pixelPerWorkgroup);
2433  }
2434 
2435  // returns the index of the pixel for the current workitem to compute.
2436  // returns -1 if this workitem doesn't need to participate in any computation
2437  inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2438  const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2439  int pixelIndex = itemID/numWorkItemsPerPixel;
2440  pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2441  return pixelIndex;
2442  }
2443 
2444  )
2445 
2446  STRINGIFY(
2447  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2448  void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2449  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2450  const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
2451  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2452  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2453  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2454  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2455  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2456  {
2457  // calculate the range of resized image pixels computed by this workgroup
2458  const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2459  const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2460  const unsigned int actualNumPixelToCompute = stopX - startX;
2461 
2462  // calculate the range of input image pixels to cache
2463  float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2464  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2465  scale = PerceptibleReciprocal(scale);
2466 
2467  const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2468  const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2469 
2470  // cache the input pixels into local memory
2471  const unsigned int y = get_global_id(1);
2472  const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
2473  const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
2474  event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
2475  wait_group_events(1, &e);
2476 
2477  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2478  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2479  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2480  {
2481  const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2482  const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2483  const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2484 
2485  // determine which resized pixel computed by this workitem
2486  const unsigned int itemID = get_local_id(0);
2487  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2488 
2489  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2490 
2491  float4 filteredPixel = (float4)0.0f;
2492  float density = 0.0f;
2493  float gamma = 0.0f;
2494  // -1 means this workitem doesn't participate in the computation
2495  if (pixelIndex != -1)
2496  {
2497  // x coordinated of the resized pixel computed by this workitem
2498  const int x = chunkStartX + pixelIndex;
2499 
2500  // calculate how many steps required for this pixel
2501  const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2502  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2503  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
2504  const unsigned int n = stop - start;
2505 
2506  // calculate how many steps this workitem will contribute
2507  unsigned int numStepsPerWorkItem = n / numItems;
2508  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2509 
2510  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2511  if (startStep < n)
2512  {
2513  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2514 
2515  unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2516  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2517  {
2518  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2519  (ResizeWeightingFunctionType) resizeFilterType,
2520  (ResizeWeightingFunctionType) resizeWindowType,
2521  resizeFilterScale, resizeFilterWindowSupport,
2522  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2523 
2524  float4 cp = (float4)0.0f;
2525 
2526  __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
2527  cp.x = (float) *(p);
2528  if (number_channels > 2)
2529  {
2530  cp.y = (float) *(p + 1);
2531  cp.z = (float) *(p + 2);
2532  }
2533 
2534  if (alpha_index != 0)
2535  {
2536  cp.w = (float) *(p + alpha_index);
2537 
2538  float alpha = weight * QuantumScale * cp.w;
2539 
2540  filteredPixel.x += alpha * cp.x;
2541  filteredPixel.y += alpha * cp.y;
2542  filteredPixel.z += alpha * cp.z;
2543  filteredPixel.w += weight * cp.w;
2544  gamma += alpha;
2545  }
2546  else
2547  filteredPixel += ((float4) weight)*cp;
2548 
2549  density += weight;
2550  }
2551  }
2552  }
2553 
2554  // initialize the accumulators to zero
2555  if (itemID < actualNumPixelInThisChunk) {
2556  outputPixelCache[itemID] = (float4)0.0f;
2557  densityCache[itemID] = 0.0f;
2558  if (alpha_index != 0)
2559  gammaCache[itemID] = 0.0f;
2560  }
2561  barrier(CLK_LOCAL_MEM_FENCE);
2562 
2563  // accumulatte the filtered pixel value and the density
2564  for (unsigned int i = 0; i < numItems; i++) {
2565  if (pixelIndex != -1) {
2566  if (itemID%numItems == i) {
2567  outputPixelCache[pixelIndex]+=filteredPixel;
2568  densityCache[pixelIndex]+=density;
2569  if (alpha_index != 0)
2570  gammaCache[pixelIndex]+=gamma;
2571  }
2572  }
2573  barrier(CLK_LOCAL_MEM_FENCE);
2574  }
2575 
2576  if (itemID < actualNumPixelInThisChunk)
2577  {
2578  float4 filteredPixel = outputPixelCache[itemID];
2579 
2580  float gamma = 0.0f;
2581  if (alpha_index != 0)
2582  gamma = gammaCache[itemID];
2583 
2584  float density = densityCache[itemID];
2585  if ((density != 0.0f) && (density != 1.0f))
2586  {
2587  density = PerceptibleReciprocal(density);
2588  filteredPixel *= (float4) density;
2589  if (alpha_index != 0)
2590  gamma *= density;
2591  }
2592 
2593  if (alpha_index != 0)
2594  {
2595  gamma = PerceptibleReciprocal(gamma);
2596  filteredPixel.x *= gamma;
2597  filteredPixel.y *= gamma;
2598  filteredPixel.z *= gamma;
2599  }
2600 
2601  WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
2602  }
2603  }
2604  }
2605  )
2606 
2607 
2608  STRINGIFY(
2609  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2610  void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
2611  const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
2612  const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
2613  const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
2614  const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
2615  const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
2616  const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
2617  __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
2618  {
2619  // calculate the range of resized image pixels computed by this workgroup
2620  const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2621  const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2622  const unsigned int actualNumPixelToCompute = stopY - startY;
2623 
2624  // calculate the range of input image pixels to cache
2625  float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2626  const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2627  scale = PerceptibleReciprocal(scale);
2628 
2629  const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2630  const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2631 
2632  // cache the input pixels into local memory
2633  const unsigned int x = get_global_id(0);
2634  unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
2635  unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
2636  unsigned int stride = inputColumns * number_channels;
2637  for (unsigned int i = 0; i < number_channels; i++)
2638  {
2639  event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
2640  wait_group_events(1,&e);
2641  }
2642 
2643  unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2644  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2645  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2646  {
2647  const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2648  const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2649  const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2650 
2651  // determine which resized pixel computed by this workitem
2652  const unsigned int itemID = get_local_id(1);
2653  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2654 
2655  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2656 
2657  float4 filteredPixel = (float4)0.0f;
2658  float density = 0.0f;
2659  float gamma = 0.0f;
2660  // -1 means this workitem doesn't participate in the computation
2661  if (pixelIndex != -1)
2662  {
2663  // x coordinated of the resized pixel computed by this workitem
2664  const int y = chunkStartY + pixelIndex;
2665 
2666  // calculate how many steps required for this pixel
2667  const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2668  const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2669  const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
2670  const unsigned int n = stop - start;
2671 
2672  // calculate how many steps this workitem will contribute
2673  unsigned int numStepsPerWorkItem = n / numItems;
2674  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2675 
2676  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2677  if (startStep < n)
2678  {
2679  const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2680 
2681  unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2682  for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2683  {
2684  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2685  (ResizeWeightingFunctionType) resizeFilterType,
2686  (ResizeWeightingFunctionType) resizeWindowType,
2687  resizeFilterScale, resizeFilterWindowSupport,
2688  resizeFilterBlur, scale*(start + i - bisect + 0.5));
2689 
2690  float4 cp = (float4)0.0f;
2691 
2692  __local CLQuantum *p = inputImageCache + cacheIndex;
2693  cp.x = (float) *(p);
2694  if (number_channels > 2)
2695  {
2696  cp.y = (float) *(p + rangeLength);
2697  cp.z = (float) *(p + (rangeLength * 2));
2698  }
2699 
2700  if (alpha_index != 0)
2701  {
2702  cp.w = (float) *(p + (rangeLength * alpha_index));
2703 
2704  float alpha = weight * QuantumScale * cp.w;
2705 
2706  filteredPixel.x += alpha * cp.x;
2707  filteredPixel.y += alpha * cp.y;
2708  filteredPixel.z += alpha * cp.z;
2709  filteredPixel.w += weight * cp.w;
2710  gamma += alpha;
2711  }
2712  else
2713  filteredPixel += ((float4) weight)*cp;
2714 
2715  density += weight;
2716  }
2717  }
2718  }
2719 
2720  // initialize the accumulators to zero
2721  if (itemID < actualNumPixelInThisChunk) {
2722  outputPixelCache[itemID] = (float4)0.0f;
2723  densityCache[itemID] = 0.0f;
2724  if (alpha_index != 0)
2725  gammaCache[itemID] = 0.0f;
2726  }
2727  barrier(CLK_LOCAL_MEM_FENCE);
2728 
2729  // accumulatte the filtered pixel value and the density
2730  for (unsigned int i = 0; i < numItems; i++) {
2731  if (pixelIndex != -1) {
2732  if (itemID%numItems == i) {
2733  outputPixelCache[pixelIndex]+=filteredPixel;
2734  densityCache[pixelIndex]+=density;
2735  if (alpha_index != 0)
2736  gammaCache[pixelIndex]+=gamma;
2737  }
2738  }
2739  barrier(CLK_LOCAL_MEM_FENCE);
2740  }
2741 
2742  if (itemID < actualNumPixelInThisChunk)
2743  {
2744  float4 filteredPixel = outputPixelCache[itemID];
2745 
2746  float gamma = 0.0f;
2747  if (alpha_index != 0)
2748  gamma = gammaCache[itemID];
2749 
2750  float density = densityCache[itemID];
2751  if ((density != 0.0f) && (density != 1.0f))
2752  {
2753  density = PerceptibleReciprocal(density);
2754  filteredPixel *= (float4) density;
2755  if (alpha_index != 0)
2756  gamma *= density;
2757  }
2758 
2759  if (alpha_index != 0)
2760  {
2761  gamma = PerceptibleReciprocal(gamma);
2762  filteredPixel.x *= gamma;
2763  filteredPixel.y *= gamma;
2764  filteredPixel.z *= gamma;
2765  }
2766 
2767  WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
2768  }
2769  }
2770  }
2771  )
2772 
2773 /*
2774 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2775 % %
2776 % %
2777 % %
2778 % R o t a t i o n a l B l u r %
2779 % %
2780 % %
2781 % %
2782 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2783 */
2784 
2785  STRINGIFY(
2786  __kernel void RotationalBlur(const __global CLQuantum *image,
2787  const unsigned int number_channels,const unsigned int channel,
2788  const float2 blurCenter,__constant float *cos_theta,
2789  __constant float *sin_theta,const unsigned int cossin_theta_size,
2790  __global CLQuantum *filteredImage)
2791  {
2792  const int x = get_global_id(0);
2793  const int y = get_global_id(1);
2794  const int columns = get_global_size(0);
2795  const int rows = get_global_size(1);
2796  unsigned int step = 1;
2797  float center_x = (float) x - blurCenter.x;
2798  float center_y = (float) y - blurCenter.y;
2799  float radius = hypot(center_x, center_y);
2800 
2801  float blur_radius = hypot(blurCenter.x, blurCenter.y);
2802 
2803  if (radius > MagickEpsilon)
2804  {
2805  step = (unsigned int) (blur_radius / radius);
2806  if (step == 0)
2807  step = 1;
2808  if (step >= cossin_theta_size)
2809  step = cossin_theta_size-1;
2810  }
2811 
2812  float4 result = 0.0f;
2813  float normalize = 0.0f;
2814  float gamma = 0.0f;
2815 
2816  for (unsigned int i=0; i<cossin_theta_size; i+=step)
2817  {
2818  int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
2819  int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
2820 
2821  float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
2822 
2823  if ((number_channels == 4) || (number_channels == 2))
2824  {
2825  float alpha = (float)(QuantumScale*pixel.w);
2826 
2827  gamma += alpha;
2828 
2829  result.x += alpha * pixel.x;
2830  result.y += alpha * pixel.y;
2831  result.z += alpha * pixel.z;
2832  result.w += pixel.w;
2833  }
2834  else
2835  result += pixel;
2836 
2837  normalize += 1.0f;
2838  }
2839 
2840  normalize = PerceptibleReciprocal(normalize);
2841 
2842  if ((number_channels == 4) || (number_channels == 2))
2843  {
2844  gamma = PerceptibleReciprocal(gamma);
2845  result.x *= gamma;
2846  result.y *= gamma;
2847  result.z *= gamma;
2848  result.w *= normalize;
2849  }
2850  else
2851  result *= normalize;
2852 
2853  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
2854  }
2855  )
2856 
2857 /*
2858 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2859 % %
2860 % %
2861 % %
2862 % U n s h a r p M a s k %
2863 % %
2864 % %
2865 % %
2866 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2867 */
2868 
2869  STRINGIFY(
2870  __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
2871  const __global float4 *blurRowData,const unsigned int number_channels,
2872  const ChannelType channel,const unsigned int columns,
2873  const unsigned int rows,__local float4* cachedData,
2874  __local float* cachedFilter,const __global float *filter,
2875  const unsigned int width,const float gain, const float threshold,
2876  __global CLQuantum *filteredImage)
2877  {
2878  const unsigned int radius = (width-1)/2;
2879 
2880  // cache the pixel shared by the workgroup
2881  const int groupX = get_group_id(0);
2882  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
2883  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
2884 
2885  if ((groupStartY >= 0) && (groupStopY < rows))
2886  {
2887  event_t e = async_work_group_strided_copy(cachedData,
2888  blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
2889  wait_group_events(1,&e);
2890  }
2891  else
2892  {
2893  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
2894  cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
2895 
2896  barrier(CLK_LOCAL_MEM_FENCE);
2897  }
2898  // cache the filter as well
2899  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
2900  wait_group_events(1,&e);
2901 
2902  // only do the work if this is not a patched item
2903  const int cy = get_global_id(1);
2904 
2905  if (cy < rows)
2906  {
2907  float4 blurredPixel = (float4) 0.0f;
2908 
2909  int i = 0;
2910 
2911  for ( ; i+7 < width; )
2912  {
2913  for (int j=0; j < 8; j++, i++)
2914  blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
2915  }
2916 
2917  for ( ; i < width; i++)
2918  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
2919 
2920  float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
2921  float4 outputPixel = inputImagePixel - blurredPixel;
2922 
2923  float quantumThreshold = QuantumRange*threshold;
2924 
2925  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
2926  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
2927 
2928  //write back
2929  WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
2930  }
2931  }
2932  )
2933 
2934  STRINGIFY(
2935  __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
2936  const ChannelType channel,__constant float *filter,const unsigned int width,
2937  const unsigned int columns,const unsigned int rows,__local float4 *pixels,
2938  const float gain,const float threshold,__global CLQuantum *filteredImage)
2939  {
2940  const unsigned int x = get_global_id(0);
2941  const unsigned int y = get_global_id(1);
2942 
2943  const unsigned int radius = (width - 1) / 2;
2944 
2945  int row = y - radius;
2946  int baseRow = get_group_id(1) * get_local_size(1) - radius;
2947  int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
2948 
2949  while (row < endRow) {
2950  int srcy = (row < 0) ? -row : row; // mirror pad
2951  srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
2952 
2953  float4 value = 0.0f;
2954 
2955  int ix = x - radius;
2956  int i = 0;
2957 
2958  while (i + 7 < width) {
2959  for (int j = 0; j < 8; ++j) { // unrolled
2960  int srcx = ix + j;
2961  srcx = (srcx < 0) ? -srcx : srcx;
2962  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2963  value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2964  }
2965  ix += 8;
2966  i += 8;
2967  }
2968 
2969  while (i < width) {
2970  int srcx = (ix < 0) ? -ix : ix; // mirror pad
2971  srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2972  value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2973  ++i;
2974  ++ix;
2975  }
2976  pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
2977  row += get_local_size(1);
2978  }
2979 
2980  barrier(CLK_LOCAL_MEM_FENCE);
2981 
2982  const int px = get_local_id(0);
2983  const int py = get_local_id(1);
2984  const int prp = get_local_size(0);
2985  float4 value = (float4)(0.0f);
2986 
2987  int i = 0;
2988  while (i + 7 < width) {
2989  for (int j = 0; j < 8; ++j) // unrolled
2990  value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
2991  i += 8;
2992  }
2993  while (i < width) {
2994  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
2995  ++i;
2996  }
2997 
2998  if ((x < columns) && (y < rows)) {
2999  float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
3000  float4 diff = srcPixel - value;
3001 
3002  float quantumThreshold = QuantumRange*threshold;
3003 
3004  int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3005  value = select(srcPixel + diff * gain, srcPixel, mask);
3006 
3007  WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
3008  }
3009  }
3010  )
3011 
3012  STRINGIFY(
3013  __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
3014  void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
3015  const unsigned int number_channels,const unsigned int max_channels,
3016  const float threshold,const int passes,const unsigned int imageWidth,
3017  const unsigned int imageHeight)
3018  {
3019  const int pad = (1 << (passes - 1));
3020  const int tileSize = 64;
3021  const int tileRowPixels = 64;
3022  const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3023 
3024  CLQuantum stage[48]; // 16 * 3 (we only need 3 channels)
3025 
3026  local float buffer[64 * 64];
3027 
3028  int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3029  int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3030 
3031  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3032  int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
3033  (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
3034 
3035  for (int channel = 0; channel < max_channels; ++channel)
3036  stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
3037  }
3038 
3039  for (int channel = 0; channel < max_channels; ++channel) {
3040  // Load LDS
3041  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3042  buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
3043 
3044  // Process
3045 
3046  float tmp[16];
3047  float accum[16];
3048  float pixel;
3049 
3050  for (int i = 0; i < 16; i++)
3051  accum[i]=0.0f;
3052 
3053  for (int pass = 0; pass < passes; ++pass) {
3054  const int radius = 1 << pass;
3055  const int x = get_local_id(0);
3056  const float thresh = threshold * noise[pass];
3057 
3058  // Apply horizontal hat
3059  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3060  const int offset = i * tileRowPixels;
3061  if (pass == 0)
3062  tmp[i / 4] = buffer[x + offset]; // snapshot input on first pass
3063  pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3064  barrier(CLK_LOCAL_MEM_FENCE);
3065  buffer[x + offset] = pixel;
3066  }
3067  barrier(CLK_LOCAL_MEM_FENCE);
3068 
3069  // Apply vertical hat
3070  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3071  pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3072  float delta = tmp[i / 4] - pixel;
3073  tmp[i / 4] = pixel; // hold output in tmp until all workitems are done
3074  if (delta < -thresh)
3075  delta += thresh;
3076  else if (delta > thresh)
3077  delta -= thresh;
3078  else
3079  delta = 0;
3080  accum[i / 4] += delta;
3081  }
3082  barrier(CLK_LOCAL_MEM_FENCE);
3083 
3084  if (pass < passes - 1)
3085  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3086  buffer[x + i * tileRowPixels] = tmp[i / 4]; // store lowpass for next pass
3087  else // last pass
3088  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3089  accum[i / 4] += tmp[i / 4]; // add the lowpass signal back to output
3090  barrier(CLK_LOCAL_MEM_FENCE);
3091  }
3092 
3093  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3094  stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]);
3095 
3096  barrier(CLK_LOCAL_MEM_FENCE);
3097  }
3098 
3099  // Write from stage to output
3100 
3101  if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3102  for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3103  if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3104  int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
3105  for (int channel = 0; channel < max_channels; ++channel) {
3106  dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
3107  }
3108  }
3109  }
3110  }
3111  }
3112  )
3113 
3114  ;
3115 
3116 #endif // MAGICKCORE_OPENCL_SUPPORT
3117 
3118 #if defined(__cplusplus) || defined(c_plusplus)
3119 }
3120 #endif
3121 
3122 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
#define SigmaPoisson
MagickExport void ConvertRGBToHSL(const double red, const double green, const double blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1099
#define SigmaRandom
static double Hann(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:322
PixelIntensityMethod
Definition: pixel.h:94
static double Blackman(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:149
#define MagickPI
Definition: image-private.h:30
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, double *red, double *green, double *blue)
Definition: enhance.c:3016
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
#define MagickEpsilon
Definition: magick-type.h:110
#define SigmaLaplacian
MagickPrivate void ConvertRGBToHSB(const double, const double, const double, double *, double *, double *)
NoiseType
Definition: fx.h:27
#define SigmaUniform
static double PerceptibleReciprocal(const double x)
#define SigmaGaussian
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:919
static void Contrast(const int sign, double *red, double *green, double *blue)
Definition: enhance.c:839
#define SigmaMultiplicativeGaussian
#define TauGaussian
static double Triangle(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:543
#define QuantumScale
Definition: magick-type.h:113
ChannelType
Definition: pixel.h:33
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, double *red, double *green, double *blue)
Definition: gem.c:462
#define MaxMap
Definition: magick-type.h:75
static double RoundToUnity(const double value)
#define MagickMax(x, y)
Definition: image-private.h:26
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:84
static double Sinc(const double, const ResizeFilter *)
static double CubicBC(const double x, const ResizeFilter *resize_filter)
Definition: resize.c:207
ResizeWeightingFunctionType
static double Hamming(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:334
#define MagickMin(x, y)
Definition: image-private.h:27
ColorspaceType
Definition: colorspace.h:25
#define SigmaImpulse
CompositeOperator
Definition: composite.h:25
MagickPrivate void ConvertHSBToRGB(const double, const double, const double, double *, double *, double *)
MagickExport MagickRealType GetPixelIntensity(const Image *magick_restrict image, const Quantum *magick_restrict pixel)
Definition: pixel.c:2356
MagickFunction
Definition: statistic.h:121
#define QuantumRange
Definition: magick-type.h:83
Definition: fx.h:36