api/MagickCore/token_8c_source.html

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

%                    TTTTT   OOO   K   K  EEEEE  N   N                        %

%                      T    O   O  K  K   E      NN  N                        %

%                      T    O   O  KKK    EEE    N N N                        %

%                      T    O   O  K  K   E      N  NN                        %

%                      T     OOO   K   K  EEEEE  N   N                        %

%                                                                             %

%                                                                             %

%                         MagickCore Token Methods                            %

%                                                                             %

%                             Software Design                                 %

%                                  Cristy                                     %

%                              January 1993                                   %

%                                                                             %

%                                                                             %

%  Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization         %

%  dedicated to making software imaging solutions freely available.           %

%                                                                             %

%  You may not use this file except in compliance with the License.  You may  %

%  obtain a copy of the License at                                            %

%                                                                             %

%    https://imagemagick.org/license/                                         %

%                                                                             %

%  Unless required by applicable law or agreed to in writing, software        %

%  distributed under the License is distributed on an "AS IS" BASIS,          %

%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %

%  See the License for the specific language governing permissions and        %

%  limitations under the License.                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%

%

*/

␌

/*

  Include declarations.

*/

#include "MagickCore/studio.h"

#include "MagickCore/exception.h"

#include "MagickCore/exception-private.h"

#include "MagickCore/image.h"

#include "MagickCore/image-private.h"

#include "MagickCore/locale-private.h"

#include "MagickCore/memory_.h"

#include "MagickCore/memory-private.h"

#include "MagickCore/string_.h"

#include "MagickCore/string-private.h"

#include "MagickCore/token.h"

#include "MagickCore/token-private.h"

#include "MagickCore/utility.h"

#include "MagickCore/utility-private.h"

␌

/*

  Typedef declarations.

*/


struct _TokenInfo

{

  int

    state;


  MagickStatusType

    flag;


  ssize_t

    offset;


  char

    quote;


  size_t

    signature;

};


␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

%   A c q u i r e T o k e n I n f o                                           %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  AcquireTokenInfo() allocates the TokenInfo structure.

%

%  The format of the AcquireTokenInfo method is:

%

%      TokenInfo *AcquireTokenInfo()

%

*/

MagickExport TokenInfo *AcquireTokenInfo(void)

{

  TokenInfo

    *token_info;


  token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));

  token_info->signature=MagickCoreSignature;

  return(token_info);

}

␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

%   D e s t r o y T o k e n I n f o                                           %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  DestroyTokenInfo() deallocates memory associated with an TokenInfo

%  structure.

%

%  The format of the DestroyTokenInfo method is:

%

%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)

%

%  A description of each parameter follows:

%

%    o token_info: Specifies a pointer to an TokenInfo structure.

%

*/

MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)

{

  assert(token_info != (TokenInfo *) NULL);

  assert(token_info->signature == MagickCoreSignature);

  if (IsEventLogging() != MagickFalse)

    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");

  token_info->signature=(~MagickCoreSignature);

  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);

  return(token_info);

}

␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

+   G e t N e x t T o k e n                                                   %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  GetNextToken() gets a token from the token stream.  A token is defined as

%  a sequence of characters delimited by whitespace (e.g. clip-path), a

%  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in

%  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these

%  separator characters: ':', '=', ',', and ';'.  GetNextToken() returns the

%  length of the consumed token.

%

%  The format of the GetNextToken method is:

%

%      size_t GetNextToken(const char *magick_restrict start,

%        const char **magick_restrict end,const size_t extent,

%        char *magick_restrict token)

%

%  A description of each parameter follows:

%

%    o start: the start of the token sequence.

%

%    o end: point to the end of the token sequence.

%

%    o extent: maximum extent of the token.

%

%    o token: copy the token to this buffer.

%

*/

MagickExport magick_hot_spot size_t GetNextToken(

  const char *magick_restrict start,const char **magick_restrict end,

  const size_t extent,char *magick_restrict token)

{

  char

    *magick_restrict q;


  const char

    *magick_restrict p;


  double

    value;


  ssize_t

    i;


  assert(start != (const char *) NULL);

  assert(token != (char *) NULL);

  i=0;

  p=start;

  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))

    p++;

  switch (*p)

  {

    case '\0':

      break;

    case '"':

    case '\'':

    case '`':

    case '{':

    {

      char

        escape;


      switch (*p)

      {

        case '"': escape='"'; break;

        case '\'': escape='\''; break;

        case '`': escape='\''; break;

        case '{': escape='}'; break;

        default: escape=(*p); break;

      }

      for (p++; *p != '\0'; p++)

      {

        if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))

          p++;

        else

          if (*p == escape)

            {

              p++;

              break;

            }

        if (i < (ssize_t) (extent-1))

          token[i++]=(*p);

        if ((size_t) (p-start) >= (extent-1))

          break;

      }

      break;

    }

    case '/':

    {

      if (i < (ssize_t) (extent-1))

        token[i++]=(*p);

      p++;

      if ((*p == '>') || (*p == '/'))

        {

          if (i < (ssize_t) (extent-1))

            token[i++]=(*p);

          p++;

        }

      break;

    }

    default:

    {

      value=StringToDouble(p,&q);

      (void) value;

      if ((p != q) && (*p != ','))

        {

          for ( ; (p < q) && (*p != ','); p++)

          {

            if (i < (ssize_t) (extent-1))

              token[i++]=(*p);

            if ((size_t) (p-start) >= (extent-1))

              break;

          }

          if (*p == '%')

            {

              if (i < (ssize_t) (extent-1))

                token[i++]=(*p);

              p++;

            }

          break;

        }

      if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&

          (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))

        {

          if (i < (ssize_t) (extent-1))

            token[i++]=(*p);

          p++;

          break;

        }

      for ( ; *p != '\0'; p++)

      {

        if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||

            (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))

          break;

        if ((i > 0) && (*p == '<'))

          break;

        if (i < (ssize_t) (extent-1))

          token[i++]=(*p);

        if (*p == '>')

          break;

        if (*p == '(')

          {

            for (p++; *p != '\0'; p++)

            {

              if (i < (ssize_t) (extent-1))

                token[i++]=(*p);

              if ((*p == ')') && (*(p-1) != '\\'))

                break;

              if ((size_t) (p-start) >= (extent-1))

                break;

            }

            if (*p == '\0')

              break;

          }

        if ((size_t) (p-start) >= (extent-1))

          break;

      }

      break;

    }

  }

  token[i]='\0';

  if (LocaleNCompare(token,"url(#",5) == 0)

    {

      q=strrchr(token,')');

      if (q != (char *) NULL)

        {

          *q='\0';

          (void) memmove(token,token+5,(size_t) (q-token-4));

        }

    }

  while (isspace((int) ((unsigned char) *p)) != 0)

    p++;

  if (end != (const char **) NULL)

    *end=(const char *) p;

  return((size_t) (p-start+1));

}

␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

%   G l o b E x p r e s s i o n                                               %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  GlobExpression() returns MagickTrue if the expression matches the pattern.

%

%  The format of the GlobExpression function is:

%

%      MagickBooleanType GlobExpression(const char *magick_restrict expression,

%        const char *magick_restrict pattern,

%        const MagickBooleanType case_insensitive)

%

%  A description of each parameter follows:

%

%    o expression: Specifies a pointer to a text string containing a file name.

%

%    o pattern: Specifies a pointer to a text string containing a pattern.

%

%    o case_insensitive: set to MagickTrue to ignore the case when matching

%      an expression.

%

*/


static MagickBooleanType GlobExpression_(const char *magick_restrict expression,

  const char *magick_restrict pattern,const MagickBooleanType case_insensitive,

  const size_t depth)

{

  if (depth > MagickMaxRecursionDepth)

    {

      errno=EOVERFLOW;

      return(MagickFalse);

    }

  /*

    Empty pattern or single '*' always matches.

  */

  if (pattern == (const char *) NULL)

    return(MagickTrue);

  if (GetUTFCode(pattern) == 0)

    return(MagickTrue);

  if ((GetUTFCode(pattern) == '*') &&

      (GetUTFCode(pattern+GetUTFOctets(pattern)) == 0))

    return(MagickTrue);

  if ((strchr(pattern,'{') == NULL) &&

      (strchr(pattern,'*') == NULL) &&

      (strchr(pattern,'?') == NULL))

    {

      char

        path[MagickPathExtent]= { 0 };


      /*

        If no glob characters exist, ensure no subimage specifier.

      */

      GetPathComponent(pattern,SubimagePath,path);

      if (*path != '\0')

        return(MagickFalse);

    }

  while (GetUTFCode(pattern) != 0)

  {

    int

      ecode = GetUTFCode(expression),

      pcode = GetUTFCode(pattern);


    if ((ecode == 0) && (pcode != '*') && (pcode != '{'))

      break;

    switch (pcode)

    {

      case '*':

      {

        do

        {

          /*

            Skip consecutive '*'.

          */

          pattern+=GetUTFOctets(pattern);

        }

        while (GetUTFCode(pattern) == '*');

        while (1)

        {

          /*

            Try to match at each position.

          */

          if (GlobExpression_(expression,pattern,case_insensitive,depth+1) != MagickFalse)

            {

              /*

                Consume rest of expression and pattern.

              */

              while (GetUTFCode(expression) != 0)

                expression+=GetUTFOctets(expression);

              while (GetUTFCode(pattern) != 0)

                pattern+=GetUTFOctets(pattern);

              return(MagickTrue);

            }

            if (GetUTFCode(expression) == 0)

              break;

            expression+=GetUTFOctets(expression);

          }

        return(MagickFalse);

      }

      case '?':

      {

        if (ecode == 0)

          return(MagickFalse);

        pattern+=GetUTFOctets(pattern);

        expression+=GetUTFOctets(expression);

        break;

      }

      case '[':

      {

        const char

          *p = pattern+GetUTFOctets(pattern),

          *q = pattern+GetUTFOctets(pattern);


        MagickBooleanType

          matched = MagickFalse;


        if (ecode == 0)

          return(MagickFalse);

        while ((GetUTFCode(q) != 0) && (GetUTFCode(q) != ']'))

          q+=GetUTFOctets(q);

        if (GetUTFCode(q) == 0)

          return(MagickFalse);  /* malformed */

        while (p < q)

        {

          const char

            *next;


          int

            code = GetUTFCode(p);


          size_t

            octets = GetUTFOctets(p);


          if (code == '\\')

            {

              p+=octets;

              code=GetUTFCode(p);

              octets=GetUTFOctets(p);

            }

          next=p+octets;

          if ((next < q) && (GetUTFCode(next) == '-'))

            {

              int

                ncode;


              next+=GetUTFOctets(next);

              ncode=GetUTFCode(next);

              if (ncode == '\\')

                {

                  next+=GetUTFOctets(next);

                  ncode=GetUTFCode(next);

                }

              if ((ecode >= code) && (ecode <= ncode))

                matched=MagickTrue;

              p=next+GetUTFOctets(next);

            }

          else

            {

              if (ecode == code)

                matched=MagickTrue;

              p+=octets;

            }

        }

        /*

          Skip consecutive '*'.

        */

        if (matched == MagickFalse)

          return(MagickFalse);

        pattern=q+GetUTFOctets(q);  /* skip ']' */

        expression+=GetUTFOctets(expression);

        break;

      }

      case '{':

      {

        char

          *a,

          *alternative;


        const char

          *p,

          *q;


        size_t

          remaining = MagickPathExtent;


        pattern+=GetUTFOctets(pattern);  /* Skip '{' */

        if (GetUTFCode(pattern) == 0)

          return(MagickFalse);

        /*

          End of brace expression: append remaining pattern.

        */

        p=pattern;

        while ((GetUTFCode(p) != 0) && (GetUTFCode(p) != '}'))

        {

#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)

          if (GetUTFCode(p) == '\\')

            {

              p+=GetUTFOctets(p);

              if (GetUTFCode(p) == 0)

                break;

            }

#endif

          p+=GetUTFOctets(p);

        }

        if (GetUTFCode(p) != '}')

          return(MagickFalse);  /* malformed */

        q=p+GetUTFOctets(p);

        alternative=AcquireString(pattern);

        a=alternative;

        while (1)

        {

          int

            code = GetUTFCode(pattern);


          size_t

            octets;


          if ((code == 0) || (code == ',') || (code == '}'))

            {

              char

                *subpattern;


              MagickBooleanType

                match;


              /*

                Try alternative as a full sub-pattern.

              */

              *a='\0';

              subpattern=AcquireString(alternative);

              if (ConcatenateString(&subpattern,q) == MagickFalse)

                {

                  subpattern=DestroyString(subpattern);

                  alternative=DestroyString(alternative);

                  return(MagickFalse);

                }

              match=GlobExpression_(expression,subpattern,case_insensitive,

                depth+1);

              subpattern=DestroyString(subpattern);

              if (match != MagickFalse)

                {

                  /*

                    Consume rest of expression and pattern.

                  */

                  while (GetUTFCode(expression) != 0)

                    expression+=GetUTFOctets(expression);

                  pattern=q;

                  while (GetUTFCode(pattern) != 0)

                    pattern+=GetUTFOctets(pattern);

                  alternative=DestroyString(alternative);

                  return(MagickTrue);

                }

              /*

                Reset buffer for next alternative.

              */

              a=alternative;

              remaining=MagickPathExtent;

              if (code == ',')

                {

                  pattern+=GetUTFOctets(pattern);  /* skip ',' */

                  continue;

                }

              break;  /* '}' or end */

            }

          /*

            Copy UTF-8 sequence into alternative.

          */

          octets=GetUTFOctets(pattern);

          if ((octets == 0) || (octets >= remaining))

            break;

          (void) memcpy(a,pattern,octets);

          a+=octets;

          remaining-=octets;

          pattern+=octets;

        }

        alternative=DestroyString(alternative);

        return(MagickFalse);

      }

#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)

      case '\\':

      {

        pattern+=GetUTFOctets(pattern);

        if (GetUTFCode(pattern) == 0)

          return(MagickFalse);

        magick_fallthrough;

      }

#endif

      default:

      {

        int

          ec = ecode,

          pc = pcode;


        if (ecode == 0)

          return(MagickFalse);

        if (case_insensitive != MagickFalse)

          {

            pc=LocaleToLowercase(pc);

            ec=LocaleToLowercase(ec);

          }

        if (pc != ec)

          return(MagickFalse);

        pattern+=GetUTFOctets(pattern);

        expression+=GetUTFOctets(expression);

        break;

      }

    }

  }

  while (GetUTFCode(pattern) == '*')

    pattern+=GetUTFOctets(pattern);

  return(((GetUTFCode(expression) == 0) &&

          (GetUTFCode(pattern) == 0)) ? MagickTrue : MagickFalse);

}


MagickExport MagickBooleanType GlobExpression(

  const char *magick_restrict expression,const char *magick_restrict pattern,

  const MagickBooleanType case_insensitive)

{

  return(GlobExpression_(expression,pattern,case_insensitive,0));

}

␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

+     I s G l o b                                                             %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  IsGlob() returns MagickTrue if the path specification contains a globbing

%  pattern.

%

%  The format of the IsGlob method is:

%

%      MagickBooleanType IsGlob(const char *geometry)

%

%  A description of each parameter follows:

%

%    o path: the path.

%

*/

MagickPrivate MagickBooleanType IsGlob(const char *path)

{

  MagickBooleanType

    status = MagickFalse;


  const char

    *p;


  if (IsPathAccessible(path) != MagickFalse)

    return(MagickFalse);

  for (p=path; *p != '\0'; p++)

  {

    switch (*p)

    {

      case '*':

      case '?':

      case '{':

      case '}':

      case '[':

      case ']':

      {

        status=MagickTrue;

        break;

      }

      default:

        break;

    }

  }

  return(status);

}

␌

/*

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%                                                                             %

%                                                                             %

%                                                                             %

%   T o k e n i z e r                                                         %

%                                                                             %

%                                                                             %

%                                                                             %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%

%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens

%  one at a time from a string of characters.  The characters used for white

%  space, for break characters, and for quotes can be specified.  Also,

%  characters in the string can be preceded by a specifiable escape character

%  which removes any special meaning the character may have.

%

%  Here is some terminology:

%

%    o token: A single unit of information in the form of a group of

%      characters.

%

%    o white space: Apace that gets ignored (except within quotes or when

%      escaped), like blanks and tabs. in addition, white space terminates a

%      non-quoted token.

%

%    o break set: One or more characters that separates non-quoted tokens.

%      Commas are a common break character. The usage of break characters to

%      signal the end of a token is the same as that of white space, except

%      multiple break characters with nothing or only white space between

%      generate a null token for each two break characters together.

%

%      For example, if blank is set to be the white space and comma is set to

%      be the break character, the line

%

%        A, B, C ,  , DEF

%

%        ... consists of 5 tokens:

%

%        1)  "A"

%        2)  "B"

%        3)  "C"

%        4)  "" (the null string)

%        5)  "DEF"

%

%    o Quote character: A character that, when surrounding a group of other

%      characters, causes the group of characters to be treated as a single

%      token, no matter how many white spaces or break characters exist in

%      the group. Also, a token always terminates after the closing quote.

%      For example, if ' is the quote character, blank is white space, and

%      comma is the break character, the following string

%

%        A, ' B, CD'EF GHI

%

%        ... consists of 4 tokens:

%

%        1)  "A"

%        2)  " B, CD" (note the blanks & comma)

%        3)  "EF"

%        4)  "GHI"

%

%      The quote characters themselves do not appear in the resultant

%      tokens.  The double quotes are delimiters i use here for

%      documentation purposes only.

%

%    o Escape character: A character which itself is ignored but which

%      causes the next character to be used as is.  ^ and \ are often used

%      as escape characters. An escape in the last position of the string

%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,

%      and non-escape) character. For example, assume white space, break

%      character, and quote are the same as in the above examples, and

%      further, assume that ^ is the escape character. Then, in the string

%

%        ABC, ' DEF ^' GH' I ^ J K^ L ^

%

%        ... there are 7 tokens:

%

%        1)  "ABC"

%        2)  " DEF ' GH"

%        3)  "I"

%        4)  " "     (a lone blank)

%        5)  "J"

%        6)  "K L"

%        7)  "^"     (passed as is at end of line)

%

%  The format of the Tokenizer method is:

%

%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,

%        const size_t max_token_length,const char *line,const char *white,

%        const char *break_set,const char *quote,const char escape,

%        char *breaker,int *next,char *quoted)

%

%  A description of each parameter follows:

%

%    o flag: right now, only the low order 3 bits are used.

%

%        1 => convert non-quoted tokens to upper case

%        2 => convert non-quoted tokens to lower case

%        0 => do not convert non-quoted tokens

%

%    o token: a character string containing the returned next token

%

%    o max_token_length: the maximum size of "token".  Characters beyond

%      "max_token_length" are truncated.

%

%    o string: the string to be parsed.

%

%    o white: a string of the valid white spaces.  example:

%

%        char whitesp[]={" \t"};

%

%      blank and tab will be valid white space.

%

%    o break: a string of the valid break characters. example:

%

%        char breakch[]={";,"};

%

%      semicolon and comma will be valid break characters.

%

%    o quote: a string of the valid quote characters. An example would be

%

%        char whitesp[]={"'\"");

%

%      (this causes single and double quotes to be valid) Note that a

%      token starting with one of these characters needs the same quote

%      character to terminate it.

%

%      for example:

%

%        "ABC '

%

%      is unterminated, but

%

%        "DEF" and 'GHI'

%

%      are properly terminated.  Note that different quote characters

%      can appear on the same line; only for a given token do the quote

%      characters have to be the same.

%

%    o escape: the escape character (NOT a string ... only one

%      allowed). Use zero if none is desired.

%

%    o breaker: the break character used to terminate the current

%      token.  If the token was quoted, this will be the quote used.  If

%      the token is the last one on the line, this will be zero.

%

%    o next: this variable points to the first character of the

%      next token.  it gets reset by "tokenizer" as it steps through the

%      string.  Set it to 0 upon initialization, and leave it alone

%      after that.  You can change it if you want to jump around in the

%      string or re-parse from the beginning, but be careful.

%

%    o quoted: set to True if the token was quoted and MagickFalse

%      if not.  You may need this information (for example:  in C, a

%      string with quotes around it is a character string, while one

%      without is an identifier).

%

%    o result: 0 if we haven't reached EOS (end of string), and 1

%      if we have.

%

*/


#define IN_WHITE 0

#define IN_TOKEN 1

#define IN_QUOTE 2

#define IN_OZONE 3


static ssize_t sindex(int c,const char *string)

{

  const char

    *p;


  for (p=string; *p != '\0'; p++)

    if (c == (int) (*p))

      return((ssize_t) (p-string));

  return(-1);

}


static void StoreToken(TokenInfo *token_info,char *string,

  size_t max_token_length,int c)

{

  ssize_t

    i;


  if ((token_info->offset < 0) ||

      ((size_t) token_info->offset >= (max_token_length-1)))

    return;

  i=token_info->offset++;

  string[i]=(char) c;

  if (token_info->state == IN_QUOTE)

    return;

  switch (token_info->flag & 0x03)

  {

    case 1:

    {

      string[i]=(char) LocaleToUppercase(c);

      break;

    }

    case 2:

    {

      string[i]=(char) LocaleToLowercase(c);

      break;

    }

    default:

      break;

  }

}


MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,

  char *token,const size_t max_token_length,const char *line,const char *white,

  const char *break_set,const char *quote,const char escape,char *breaker,

  int *next,char *quoted)

{

  int

    c;


  ssize_t

    i;


  *breaker='\0';

  *quoted='\0';

  if (line[*next] == '\0')

    return(1);

  token_info->state=IN_WHITE;

  token_info->quote=(char) MagickFalse;

  token_info->flag=flag;

  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)

  {

    c=(int) line[*next];

    i=sindex(c,break_set);

    if (i >= 0)

      {

        switch (token_info->state)

        {

          case IN_WHITE:

          case IN_TOKEN:

          case IN_OZONE:

          {

            (*next)++;

            *breaker=break_set[i];

            token[token_info->offset]='\0';

            return(0);

          }

          case IN_QUOTE:

          {

            StoreToken(token_info,token,max_token_length,c);

            break;

          }

        }

        continue;

      }

    i=sindex(c,quote);

    if (i >= 0)

      {

        switch (token_info->state)

        {

          case IN_WHITE:

          {

            token_info->state=IN_QUOTE;

            token_info->quote=quote[i];

            *quoted=(char) MagickTrue;

            break;

          }

          case IN_QUOTE:

          {

            if (quote[i] != token_info->quote)

              StoreToken(token_info,token,max_token_length,c);

            else

              {

                token_info->state=IN_OZONE;

                token_info->quote='\0';

              }

            break;

          }

          case IN_TOKEN:

          case IN_OZONE:

          {

            *breaker=(char) c;

            token[token_info->offset]='\0';

            return(0);

          }

        }

        continue;

      }

    i=sindex(c,white);

    if (i >= 0)

      {

        switch (token_info->state)

        {

          case IN_WHITE:

          case IN_OZONE:

            break;

          case IN_TOKEN:

          {

            token_info->state=IN_OZONE;

            break;

          }

          case IN_QUOTE:

          {

            StoreToken(token_info,token,max_token_length,c);

            break;

          }

        }

        continue;

      }

    if (c == (int) escape)

      {

        if (line[(*next)+1] == '\0')

          {

            *breaker='\0';

            StoreToken(token_info,token,max_token_length,c);

            (*next)++;

            token[token_info->offset]='\0';

            return(0);

          }

        switch (token_info->state)

        {

          case IN_WHITE:

          {

            (*next)--;

            token_info->state=IN_TOKEN;

            break;

          }

          case IN_TOKEN:

          case IN_QUOTE:

          {

            (*next)++;

            c=(int) line[*next];

            StoreToken(token_info,token,max_token_length,c);

            break;

          }

          case IN_OZONE:

          {

            token[token_info->offset]='\0';

            return(0);

          }

        }

        continue;

      }

    switch (token_info->state)

    {

      case IN_WHITE:

      {

        token_info->state=IN_TOKEN;

        StoreToken(token_info,token,max_token_length,c);

        break;

      }

      case IN_TOKEN:

      case IN_QUOTE:

      {

        StoreToken(token_info,token,max_token_length,c);

        break;

      }

      case IN_OZONE:

      {

        token[token_info->offset]='\0';

        return(0);

      }

    }

  }

  token[token_info->offset]='\0';

  return(0);

}

_TokenInfo
Definition token.c:62