MagickCore 7.1.2
Convert, Edit, Or Compose Bitmap Images
Loading...
Searching...
No Matches
token.c
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% Cristy %
17% January 1993 %
18% %
19% %
20% Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization %
21% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% https://imagemagick.org/license/ %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "MagickCore/studio.h"
44#include "MagickCore/exception.h"
45#include "MagickCore/exception-private.h"
46#include "MagickCore/image.h"
47#include "MagickCore/image-private.h"
48#include "MagickCore/locale-private.h"
49#include "MagickCore/memory_.h"
50#include "MagickCore/memory-private.h"
51#include "MagickCore/string_.h"
52#include "MagickCore/string-private.h"
53#include "MagickCore/token.h"
54#include "MagickCore/token-private.h"
55#include "MagickCore/utility.h"
56#include "MagickCore/utility-private.h"
57
58/*
59 Typedef declarations.
60*/
62{
63 int
64 state;
65
66 MagickStatusType
67 flag;
68
69 ssize_t
70 offset;
71
72 char
73 quote;
74
75 size_t
76 signature;
77};
78
79/*
80%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
81% %
82% %
83% %
84% A c q u i r e T o k e n I n f o %
85% %
86% %
87% %
88%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
89%
90% AcquireTokenInfo() allocates the TokenInfo structure.
91%
92% The format of the AcquireTokenInfo method is:
93%
94% TokenInfo *AcquireTokenInfo()
95%
96*/
97MagickExport TokenInfo *AcquireTokenInfo(void)
98{
99 TokenInfo
100 *token_info;
101
102 token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
103 token_info->signature=MagickCoreSignature;
104 return(token_info);
105}
106
107/*
108%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109% %
110% %
111% %
112% D e s t r o y T o k e n I n f o %
113% %
114% %
115% %
116%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117%
118% DestroyTokenInfo() deallocates memory associated with an TokenInfo
119% structure.
120%
121% The format of the DestroyTokenInfo method is:
122%
123% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124%
125% A description of each parameter follows:
126%
127% o token_info: Specifies a pointer to an TokenInfo structure.
128%
129*/
130MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131{
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickCoreSignature);
134 if (IsEventLogging() != MagickFalse)
135 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136 token_info->signature=(~MagickCoreSignature);
137 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138 return(token_info);
139}
140
141/*
142%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143% %
144% %
145% %
146+ G e t N e x t T o k e n %
147% %
148% %
149% %
150%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151%
152% GetNextToken() gets a token from the token stream. A token is defined as
153% a sequence of characters delimited by whitespace (e.g. clip-path), a
154% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155% parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
156% separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
157% length of the consumed token.
158%
159% The format of the GetNextToken method is:
160%
161% size_t GetNextToken(const char *magick_restrict start,
162% const char **magick_restrict end,const size_t extent,
163% char *magick_restrict token)
164%
165% A description of each parameter follows:
166%
167% o start: the start of the token sequence.
168%
169% o end: point to the end of the token sequence.
170%
171% o extent: maximum extent of the token.
172%
173% o token: copy the token to this buffer.
174%
175*/
176MagickExport magick_hot_spot size_t GetNextToken(
177 const char *magick_restrict start,const char **magick_restrict end,
178 const size_t extent,char *magick_restrict token)
179{
180 char
181 *magick_restrict q;
182
183 const char
184 *magick_restrict p;
185
186 double
187 value;
188
189 ssize_t
190 i;
191
192 assert(start != (const char *) NULL);
193 assert(token != (char *) NULL);
194 i=0;
195 p=start;
196 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197 p++;
198 switch (*p)
199 {
200 case '\0':
201 break;
202 case '"':
203 case '\'':
204 case '`':
205 case '{':
206 {
207 char
208 escape;
209
210 switch (*p)
211 {
212 case '"': escape='"'; break;
213 case '\'': escape='\''; break;
214 case '`': escape='\''; break;
215 case '{': escape='}'; break;
216 default: escape=(*p); break;
217 }
218 for (p++; *p != '\0'; p++)
219 {
220 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221 p++;
222 else
223 if (*p == escape)
224 {
225 p++;
226 break;
227 }
228 if (i < (ssize_t) (extent-1))
229 token[i++]=(*p);
230 if ((size_t) (p-start) >= (extent-1))
231 break;
232 }
233 break;
234 }
235 case '/':
236 {
237 if (i < (ssize_t) (extent-1))
238 token[i++]=(*p);
239 p++;
240 if ((*p == '>') || (*p == '/'))
241 {
242 if (i < (ssize_t) (extent-1))
243 token[i++]=(*p);
244 p++;
245 }
246 break;
247 }
248 default:
249 {
250 value=StringToDouble(p,&q);
251 (void) value;
252 if ((p != q) && (*p != ','))
253 {
254 for ( ; (p < q) && (*p != ','); p++)
255 {
256 if (i < (ssize_t) (extent-1))
257 token[i++]=(*p);
258 if ((size_t) (p-start) >= (extent-1))
259 break;
260 }
261 if (*p == '%')
262 {
263 if (i < (ssize_t) (extent-1))
264 token[i++]=(*p);
265 p++;
266 }
267 break;
268 }
269 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
270 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
271 {
272 if (i < (ssize_t) (extent-1))
273 token[i++]=(*p);
274 p++;
275 break;
276 }
277 for ( ; *p != '\0'; p++)
278 {
279 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
280 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
281 break;
282 if ((i > 0) && (*p == '<'))
283 break;
284 if (i < (ssize_t) (extent-1))
285 token[i++]=(*p);
286 if (*p == '>')
287 break;
288 if (*p == '(')
289 {
290 for (p++; *p != '\0'; p++)
291 {
292 if (i < (ssize_t) (extent-1))
293 token[i++]=(*p);
294 if ((*p == ')') && (*(p-1) != '\\'))
295 break;
296 if ((size_t) (p-start) >= (extent-1))
297 break;
298 }
299 if (*p == '\0')
300 break;
301 }
302 if ((size_t) (p-start) >= (extent-1))
303 break;
304 }
305 break;
306 }
307 }
308 token[i]='\0';
309 if (LocaleNCompare(token,"url(#",5) == 0)
310 {
311 q=strrchr(token,')');
312 if (q != (char *) NULL)
313 {
314 *q='\0';
315 (void) memmove(token,token+5,(size_t) (q-token-4));
316 }
317 }
318 while (isspace((int) ((unsigned char) *p)) != 0)
319 p++;
320 if (end != (const char **) NULL)
321 *end=(const char *) p;
322 return((size_t) (p-start+1));
323}
324
325/*
326%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
327% %
328% %
329% %
330% G l o b E x p r e s s i o n %
331% %
332% %
333% %
334%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335%
336% GlobExpression() returns MagickTrue if the expression matches the pattern.
337%
338% The format of the GlobExpression function is:
339%
340% MagickBooleanType GlobExpression(const char *magick_restrict expression,
341% const char *magick_restrict pattern,
342% const MagickBooleanType case_insensitive)
343%
344% A description of each parameter follows:
345%
346% o expression: Specifies a pointer to a text string containing a file name.
347%
348% o pattern: Specifies a pointer to a text string containing a pattern.
349%
350% o case_insensitive: set to MagickTrue to ignore the case when matching
351% an expression.
352%
353*/
354
355static MagickBooleanType GlobExpression_(const char *magick_restrict expression,
356 const char *magick_restrict pattern,const MagickBooleanType case_insensitive,
357 const size_t depth)
358{
359 if (depth > MagickMaxRecursionDepth)
360 {
361 errno=EOVERFLOW;
362 return(MagickFalse);
363 }
364 /*
365 Empty pattern or single '*' always matches.
366 */
367 if (pattern == (const char *) NULL)
368 return(MagickTrue);
369 if (GetUTFCode(pattern) == 0)
370 return(MagickTrue);
371 if ((GetUTFCode(pattern) == '*') &&
372 (GetUTFCode(pattern+GetUTFOctets(pattern)) == 0))
373 return(MagickTrue);
374 if ((strchr(pattern,'{') == NULL) &&
375 (strchr(pattern,'*') == NULL) &&
376 (strchr(pattern,'?') == NULL))
377 {
378 char
379 path[MagickPathExtent]= { 0 };
380
381 /*
382 If no glob characters exist, ensure no subimage specifier.
383 */
384 GetPathComponent(pattern,SubimagePath,path);
385 if (*path != '\0')
386 return(MagickFalse);
387 }
388 while (GetUTFCode(pattern) != 0)
389 {
390 int
391 ecode = GetUTFCode(expression),
392 pcode = GetUTFCode(pattern);
393
394 if ((ecode == 0) && (pcode != '*') && (pcode != '{'))
395 break;
396 switch (pcode)
397 {
398 case '*':
399 {
400 do
401 {
402 /*
403 Skip consecutive '*'.
404 */
405 pattern+=GetUTFOctets(pattern);
406 }
407 while (GetUTFCode(pattern) == '*');
408 while (1)
409 {
410 /*
411 Try to match at each position.
412 */
413 if (GlobExpression_(expression,pattern,case_insensitive,depth+1) != MagickFalse)
414 {
415 /*
416 Consume rest of expression and pattern.
417 */
418 while (GetUTFCode(expression) != 0)
419 expression+=GetUTFOctets(expression);
420 while (GetUTFCode(pattern) != 0)
421 pattern+=GetUTFOctets(pattern);
422 return(MagickTrue);
423 }
424 if (GetUTFCode(expression) == 0)
425 break;
426 expression+=GetUTFOctets(expression);
427 }
428 return(MagickFalse);
429 }
430 case '?':
431 {
432 if (ecode == 0)
433 return(MagickFalse);
434 pattern+=GetUTFOctets(pattern);
435 expression+=GetUTFOctets(expression);
436 break;
437 }
438 case '[':
439 {
440 const char
441 *p = pattern+GetUTFOctets(pattern),
442 *q = pattern+GetUTFOctets(pattern);
443
444 MagickBooleanType
445 matched = MagickFalse;
446
447 if (ecode == 0)
448 return(MagickFalse);
449 while ((GetUTFCode(q) != 0) && (GetUTFCode(q) != ']'))
450 q+=GetUTFOctets(q);
451 if (GetUTFCode(q) == 0)
452 return(MagickFalse); /* malformed */
453 while (p < q)
454 {
455 const char
456 *next;
457
458 int
459 code = GetUTFCode(p);
460
461 size_t
462 octets = GetUTFOctets(p);
463
464 if (code == '\\')
465 {
466 p+=octets;
467 code=GetUTFCode(p);
468 octets=GetUTFOctets(p);
469 }
470 next=p+octets;
471 if ((next < q) && (GetUTFCode(next) == '-'))
472 {
473 int
474 ncode;
475
476 next+=GetUTFOctets(next);
477 ncode=GetUTFCode(next);
478 if (ncode == '\\')
479 {
480 next+=GetUTFOctets(next);
481 ncode=GetUTFCode(next);
482 }
483 if ((ecode >= code) && (ecode <= ncode))
484 matched=MagickTrue;
485 p=next+GetUTFOctets(next);
486 }
487 else
488 {
489 if (ecode == code)
490 matched=MagickTrue;
491 p+=octets;
492 }
493 }
494 /*
495 Skip consecutive '*'.
496 */
497 if (matched == MagickFalse)
498 return(MagickFalse);
499 pattern=q+GetUTFOctets(q); /* skip ']' */
500 expression+=GetUTFOctets(expression);
501 break;
502 }
503 case '{':
504 {
505 char
506 *a,
507 *alternative;
508
509 const char
510 *p,
511 *q;
512
513 size_t
514 remaining = MagickPathExtent;
515
516 pattern+=GetUTFOctets(pattern); /* Skip '{' */
517 if (GetUTFCode(pattern) == 0)
518 return(MagickFalse);
519 /*
520 End of brace expression: append remaining pattern.
521 */
522 p=pattern;
523 while ((GetUTFCode(p) != 0) && (GetUTFCode(p) != '}'))
524 {
525#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
526 if (GetUTFCode(p) == '\\')
527 {
528 p+=GetUTFOctets(p);
529 if (GetUTFCode(p) == 0)
530 break;
531 }
532#endif
533 p+=GetUTFOctets(p);
534 }
535 if (GetUTFCode(p) != '}')
536 return(MagickFalse); /* malformed */
537 q=p+GetUTFOctets(p);
538 alternative=AcquireString(pattern);
539 a=alternative;
540 while (1)
541 {
542 int
543 code = GetUTFCode(pattern);
544
545 size_t
546 octets;
547
548 if ((code == 0) || (code == ',') || (code == '}'))
549 {
550 char
551 *subpattern;
552
553 MagickBooleanType
554 match;
555
556 /*
557 Try alternative as a full sub-pattern.
558 */
559 *a='\0';
560 subpattern=AcquireString(alternative);
561 if (ConcatenateString(&subpattern,q) == MagickFalse)
562 {
563 subpattern=DestroyString(subpattern);
564 alternative=DestroyString(alternative);
565 return(MagickFalse);
566 }
567 match=GlobExpression_(expression,subpattern,case_insensitive,
568 depth+1);
569 subpattern=DestroyString(subpattern);
570 if (match != MagickFalse)
571 {
572 /*
573 Consume rest of expression and pattern.
574 */
575 while (GetUTFCode(expression) != 0)
576 expression+=GetUTFOctets(expression);
577 pattern=q;
578 while (GetUTFCode(pattern) != 0)
579 pattern+=GetUTFOctets(pattern);
580 alternative=DestroyString(alternative);
581 return(MagickTrue);
582 }
583 /*
584 Reset buffer for next alternative.
585 */
586 a=alternative;
587 remaining=MagickPathExtent;
588 if (code == ',')
589 {
590 pattern+=GetUTFOctets(pattern); /* skip ',' */
591 continue;
592 }
593 break; /* '}' or end */
594 }
595 /*
596 Copy UTF-8 sequence into alternative.
597 */
598 octets=GetUTFOctets(pattern);
599 if ((octets == 0) || (octets >= remaining))
600 break;
601 (void) memcpy(a,pattern,octets);
602 a+=octets;
603 remaining-=octets;
604 pattern+=octets;
605 }
606 alternative=DestroyString(alternative);
607 return(MagickFalse);
608 }
609#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
610 case '\\':
611 {
612 pattern+=GetUTFOctets(pattern);
613 if (GetUTFCode(pattern) == 0)
614 return(MagickFalse);
615 magick_fallthrough;
616 }
617#endif
618 default:
619 {
620 int
621 ec = ecode,
622 pc = pcode;
623
624 if (ecode == 0)
625 return(MagickFalse);
626 if (case_insensitive != MagickFalse)
627 {
628 pc=LocaleToLowercase(pc);
629 ec=LocaleToLowercase(ec);
630 }
631 if (pc != ec)
632 return(MagickFalse);
633 pattern+=GetUTFOctets(pattern);
634 expression+=GetUTFOctets(expression);
635 break;
636 }
637 }
638 }
639 while (GetUTFCode(pattern) == '*')
640 pattern+=GetUTFOctets(pattern);
641 return(((GetUTFCode(expression) == 0) &&
642 (GetUTFCode(pattern) == 0)) ? MagickTrue : MagickFalse);
643}
644
645MagickExport MagickBooleanType GlobExpression(
646 const char *magick_restrict expression,const char *magick_restrict pattern,
647 const MagickBooleanType case_insensitive)
648{
649 return(GlobExpression_(expression,pattern,case_insensitive,0));
650}
651
652/*
653%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
654% %
655% %
656% %
657+ I s G l o b %
658% %
659% %
660% %
661%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
662%
663% IsGlob() returns MagickTrue if the path specification contains a globbing
664% pattern.
665%
666% The format of the IsGlob method is:
667%
668% MagickBooleanType IsGlob(const char *geometry)
669%
670% A description of each parameter follows:
671%
672% o path: the path.
673%
674*/
675MagickPrivate MagickBooleanType IsGlob(const char *path)
676{
677 MagickBooleanType
678 status = MagickFalse;
679
680 const char
681 *p;
682
683 if (IsPathAccessible(path) != MagickFalse)
684 return(MagickFalse);
685 for (p=path; *p != '\0'; p++)
686 {
687 switch (*p)
688 {
689 case '*':
690 case '?':
691 case '{':
692 case '}':
693 case '[':
694 case ']':
695 {
696 status=MagickTrue;
697 break;
698 }
699 default:
700 break;
701 }
702 }
703 return(status);
704}
705
706/*
707%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
708% %
709% %
710% %
711% T o k e n i z e r %
712% %
713% %
714% %
715%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
716%
717% Tokenizer() is a generalized, finite state token parser. It extracts tokens
718% one at a time from a string of characters. The characters used for white
719% space, for break characters, and for quotes can be specified. Also,
720% characters in the string can be preceded by a specifiable escape character
721% which removes any special meaning the character may have.
722%
723% Here is some terminology:
724%
725% o token: A single unit of information in the form of a group of
726% characters.
727%
728% o white space: Apace that gets ignored (except within quotes or when
729% escaped), like blanks and tabs. in addition, white space terminates a
730% non-quoted token.
731%
732% o break set: One or more characters that separates non-quoted tokens.
733% Commas are a common break character. The usage of break characters to
734% signal the end of a token is the same as that of white space, except
735% multiple break characters with nothing or only white space between
736% generate a null token for each two break characters together.
737%
738% For example, if blank is set to be the white space and comma is set to
739% be the break character, the line
740%
741% A, B, C , , DEF
742%
743% ... consists of 5 tokens:
744%
745% 1) "A"
746% 2) "B"
747% 3) "C"
748% 4) "" (the null string)
749% 5) "DEF"
750%
751% o Quote character: A character that, when surrounding a group of other
752% characters, causes the group of characters to be treated as a single
753% token, no matter how many white spaces or break characters exist in
754% the group. Also, a token always terminates after the closing quote.
755% For example, if ' is the quote character, blank is white space, and
756% comma is the break character, the following string
757%
758% A, ' B, CD'EF GHI
759%
760% ... consists of 4 tokens:
761%
762% 1) "A"
763% 2) " B, CD" (note the blanks & comma)
764% 3) "EF"
765% 4) "GHI"
766%
767% The quote characters themselves do not appear in the resultant
768% tokens. The double quotes are delimiters i use here for
769% documentation purposes only.
770%
771% o Escape character: A character which itself is ignored but which
772% causes the next character to be used as is. ^ and \ are often used
773% as escape characters. An escape in the last position of the string
774% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
775% and non-escape) character. For example, assume white space, break
776% character, and quote are the same as in the above examples, and
777% further, assume that ^ is the escape character. Then, in the string
778%
779% ABC, ' DEF ^' GH' I ^ J K^ L ^
780%
781% ... there are 7 tokens:
782%
783% 1) "ABC"
784% 2) " DEF ' GH"
785% 3) "I"
786% 4) " " (a lone blank)
787% 5) "J"
788% 6) "K L"
789% 7) "^" (passed as is at end of line)
790%
791% The format of the Tokenizer method is:
792%
793% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
794% const size_t max_token_length,const char *line,const char *white,
795% const char *break_set,const char *quote,const char escape,
796% char *breaker,int *next,char *quoted)
797%
798% A description of each parameter follows:
799%
800% o flag: right now, only the low order 3 bits are used.
801%
802% 1 => convert non-quoted tokens to upper case
803% 2 => convert non-quoted tokens to lower case
804% 0 => do not convert non-quoted tokens
805%
806% o token: a character string containing the returned next token
807%
808% o max_token_length: the maximum size of "token". Characters beyond
809% "max_token_length" are truncated.
810%
811% o string: the string to be parsed.
812%
813% o white: a string of the valid white spaces. example:
814%
815% char whitesp[]={" \t"};
816%
817% blank and tab will be valid white space.
818%
819% o break: a string of the valid break characters. example:
820%
821% char breakch[]={";,"};
822%
823% semicolon and comma will be valid break characters.
824%
825% o quote: a string of the valid quote characters. An example would be
826%
827% char whitesp[]={"'\"");
828%
829% (this causes single and double quotes to be valid) Note that a
830% token starting with one of these characters needs the same quote
831% character to terminate it.
832%
833% for example:
834%
835% "ABC '
836%
837% is unterminated, but
838%
839% "DEF" and 'GHI'
840%
841% are properly terminated. Note that different quote characters
842% can appear on the same line; only for a given token do the quote
843% characters have to be the same.
844%
845% o escape: the escape character (NOT a string ... only one
846% allowed). Use zero if none is desired.
847%
848% o breaker: the break character used to terminate the current
849% token. If the token was quoted, this will be the quote used. If
850% the token is the last one on the line, this will be zero.
851%
852% o next: this variable points to the first character of the
853% next token. it gets reset by "tokenizer" as it steps through the
854% string. Set it to 0 upon initialization, and leave it alone
855% after that. You can change it if you want to jump around in the
856% string or re-parse from the beginning, but be careful.
857%
858% o quoted: set to True if the token was quoted and MagickFalse
859% if not. You may need this information (for example: in C, a
860% string with quotes around it is a character string, while one
861% without is an identifier).
862%
863% o result: 0 if we haven't reached EOS (end of string), and 1
864% if we have.
865%
866*/
867
868#define IN_WHITE 0
869#define IN_TOKEN 1
870#define IN_QUOTE 2
871#define IN_OZONE 3
872
873static ssize_t sindex(int c,const char *string)
874{
875 const char
876 *p;
877
878 for (p=string; *p != '\0'; p++)
879 if (c == (int) (*p))
880 return((ssize_t) (p-string));
881 return(-1);
882}
883
884static void StoreToken(TokenInfo *token_info,char *string,
885 size_t max_token_length,int c)
886{
887 ssize_t
888 i;
889
890 if ((token_info->offset < 0) ||
891 ((size_t) token_info->offset >= (max_token_length-1)))
892 return;
893 i=token_info->offset++;
894 string[i]=(char) c;
895 if (token_info->state == IN_QUOTE)
896 return;
897 switch (token_info->flag & 0x03)
898 {
899 case 1:
900 {
901 string[i]=(char) LocaleToUppercase(c);
902 break;
903 }
904 case 2:
905 {
906 string[i]=(char) LocaleToLowercase(c);
907 break;
908 }
909 default:
910 break;
911 }
912}
913
914MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
915 char *token,const size_t max_token_length,const char *line,const char *white,
916 const char *break_set,const char *quote,const char escape,char *breaker,
917 int *next,char *quoted)
918{
919 int
920 c;
921
922 ssize_t
923 i;
924
925 *breaker='\0';
926 *quoted='\0';
927 if (line[*next] == '\0')
928 return(1);
929 token_info->state=IN_WHITE;
930 token_info->quote=(char) MagickFalse;
931 token_info->flag=flag;
932 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
933 {
934 c=(int) line[*next];
935 i=sindex(c,break_set);
936 if (i >= 0)
937 {
938 switch (token_info->state)
939 {
940 case IN_WHITE:
941 case IN_TOKEN:
942 case IN_OZONE:
943 {
944 (*next)++;
945 *breaker=break_set[i];
946 token[token_info->offset]='\0';
947 return(0);
948 }
949 case IN_QUOTE:
950 {
951 StoreToken(token_info,token,max_token_length,c);
952 break;
953 }
954 }
955 continue;
956 }
957 i=sindex(c,quote);
958 if (i >= 0)
959 {
960 switch (token_info->state)
961 {
962 case IN_WHITE:
963 {
964 token_info->state=IN_QUOTE;
965 token_info->quote=quote[i];
966 *quoted=(char) MagickTrue;
967 break;
968 }
969 case IN_QUOTE:
970 {
971 if (quote[i] != token_info->quote)
972 StoreToken(token_info,token,max_token_length,c);
973 else
974 {
975 token_info->state=IN_OZONE;
976 token_info->quote='\0';
977 }
978 break;
979 }
980 case IN_TOKEN:
981 case IN_OZONE:
982 {
983 *breaker=(char) c;
984 token[token_info->offset]='\0';
985 return(0);
986 }
987 }
988 continue;
989 }
990 i=sindex(c,white);
991 if (i >= 0)
992 {
993 switch (token_info->state)
994 {
995 case IN_WHITE:
996 case IN_OZONE:
997 break;
998 case IN_TOKEN:
999 {
1000 token_info->state=IN_OZONE;
1001 break;
1002 }
1003 case IN_QUOTE:
1004 {
1005 StoreToken(token_info,token,max_token_length,c);
1006 break;
1007 }
1008 }
1009 continue;
1010 }
1011 if (c == (int) escape)
1012 {
1013 if (line[(*next)+1] == '\0')
1014 {
1015 *breaker='\0';
1016 StoreToken(token_info,token,max_token_length,c);
1017 (*next)++;
1018 token[token_info->offset]='\0';
1019 return(0);
1020 }
1021 switch (token_info->state)
1022 {
1023 case IN_WHITE:
1024 {
1025 (*next)--;
1026 token_info->state=IN_TOKEN;
1027 break;
1028 }
1029 case IN_TOKEN:
1030 case IN_QUOTE:
1031 {
1032 (*next)++;
1033 c=(int) line[*next];
1034 StoreToken(token_info,token,max_token_length,c);
1035 break;
1036 }
1037 case IN_OZONE:
1038 {
1039 token[token_info->offset]='\0';
1040 return(0);
1041 }
1042 }
1043 continue;
1044 }
1045 switch (token_info->state)
1046 {
1047 case IN_WHITE:
1048 {
1049 token_info->state=IN_TOKEN;
1050 StoreToken(token_info,token,max_token_length,c);
1051 break;
1052 }
1053 case IN_TOKEN:
1054 case IN_QUOTE:
1055 {
1056 StoreToken(token_info,token,max_token_length,c);
1057 break;
1058 }
1059 case IN_OZONE:
1060 {
1061 token[token_info->offset]='\0';
1062 return(0);
1063 }
1064 }
1065 }
1066 token[token_info->offset]='\0';
1067 return(0);
1068}