1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2008 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* This file can be parametrized with the following macros:
19 CHAR_T The element type of the format string.
20 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
21 in the format string are ASCII.
22 DIRECTIVE Structure denoting a format directive.
23 Depends on CHAR_T.
24 DIRECTIVES Structure denoting the set of format directives of a
25 format string. Depends on CHAR_T.
26 PRINTF_PARSE Function that parses a format string.
27 Depends on CHAR_T.
28 STATIC Set to 'static' to declare the function static.
29 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
31 #ifndef PRINTF_PARSE
32 # include <config.h>
33 #endif
35 /* Specification. */
36 #ifndef PRINTF_PARSE
37 # include "printf-parse.h"
38 #endif
40 /* Default parameters. */
41 #ifndef PRINTF_PARSE
42 # define PRINTF_PARSE printf_parse
43 # define CHAR_T char
44 # define DIRECTIVE char_directive
45 # define DIRECTIVES char_directives
46 #endif
48 /* Get size_t, NULL. */
49 #include <stddef.h>
51 /* Get intmax_t. */
52 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
53 # if HAVE_STDINT_H_WITH_UINTMAX
54 # include <stdint.h>
55 # endif
56 # if HAVE_INTTYPES_H_WITH_UINTMAX
57 # include <inttypes.h>
58 # endif
59 #else
60 # include <stdint.h>
61 #endif
63 /* malloc(), realloc(), free(). */
64 #include <stdlib.h>
66 /* errno. */
67 #include <errno.h>
69 /* Checked size_t computations. */
70 #include "xsize.h"
72 #if CHAR_T_ONLY_ASCII
73 /* c_isascii(). */
74 # include "c-ctype.h"
75 #endif
77 #ifdef STATIC
78 STATIC
79 #endif
80 int
81 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
82 {
83 const CHAR_T *cp = format; /* pointer into format */
84 size_t arg_posn = 0; /* number of regular arguments consumed */
85 size_t d_allocated; /* allocated elements of d->dir */
86 size_t a_allocated; /* allocated elements of a->arg */
87 size_t max_width_length = 0;
88 size_t max_precision_length = 0;
90 d->count = 0;
91 d_allocated = 1;
92 d->dir = (DIRECTIVE *) malloc (d_allocated * sizeof (DIRECTIVE));
93 if (d->dir == NULL)
94 /* Out of memory. */
95 goto out_of_memory_1;
97 a->count = 0;
98 a_allocated = 0;
99 a->arg = NULL;
101 #define REGISTER_ARG(_index_,_type_) \
102 { \
103 size_t n = (_index_); \
104 if (n >= a_allocated) \
105 { \
106 size_t memory_size; \
107 argument *memory; \
108 \
109 a_allocated = xtimes (a_allocated, 2); \
110 if (a_allocated <= n) \
111 a_allocated = xsum (n, 1); \
112 memory_size = xtimes (a_allocated, sizeof (argument)); \
113 if (size_overflow_p (memory_size)) \
114 /* Overflow, would lead to out of memory. */ \
115 goto out_of_memory; \
116 memory = (argument *) (a->arg \
117 ? realloc (a->arg, memory_size) \
118 : malloc (memory_size)); \
119 if (memory == NULL) \
120 /* Out of memory. */ \
121 goto out_of_memory; \
122 a->arg = memory; \
123 } \
124 while (a->count <= n) \
125 a->arg[a->count++].type = TYPE_NONE; \
126 if (a->arg[n].type == TYPE_NONE) \
127 a->arg[n].type = (_type_); \
128 else if (a->arg[n].type != (_type_)) \
129 /* Ambiguous type for positional argument. */ \
130 goto error; \
131 }
133 while (*cp != '\0')
134 {
135 CHAR_T c = *cp++;
136 if (c == '%')
137 {
138 size_t arg_index = ARG_NONE;
139 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
141 /* Initialize the next directive. */
142 dp->dir_start = cp - 1;
143 dp->flags = 0;
144 dp->width_start = NULL;
145 dp->width_end = NULL;
146 dp->width_arg_index = ARG_NONE;
147 dp->precision_start = NULL;
148 dp->precision_end = NULL;
149 dp->precision_arg_index = ARG_NONE;
150 dp->arg_index = ARG_NONE;
152 /* Test for positional argument. */
153 if (*cp >= '0' && *cp <= '9')
154 {
155 const CHAR_T *np;
157 for (np = cp; *np >= '0' && *np <= '9'; np++)
158 ;
159 if (*np == '$')
160 {
161 size_t n = 0;
163 for (np = cp; *np >= '0' && *np <= '9'; np++)
164 n = xsum (xtimes (n, 10), *np - '0');
165 if (n == 0)
166 /* Positional argument 0. */
167 goto error;
168 if (size_overflow_p (n))
169 /* n too large, would lead to out of memory later. */
170 goto error;
171 arg_index = n - 1;
172 cp = np + 1;
173 }
174 }
176 /* Read the flags. */
177 for (;;)
178 {
179 if (*cp == '\'')
180 {
181 dp->flags |= FLAG_GROUP;
182 cp++;
183 }
184 else if (*cp == '-')
185 {
186 dp->flags |= FLAG_LEFT;
187 cp++;
188 }
189 else if (*cp == '+')
190 {
191 dp->flags |= FLAG_SHOWSIGN;
192 cp++;
193 }
194 else if (*cp == ' ')
195 {
196 dp->flags |= FLAG_SPACE;
197 cp++;
198 }
199 else if (*cp == '#')
200 {
201 dp->flags |= FLAG_ALT;
202 cp++;
203 }
204 else if (*cp == '0')
205 {
206 dp->flags |= FLAG_ZERO;
207 cp++;
208 }
209 else
210 break;
211 }
213 /* Parse the field width. */
214 if (*cp == '*')
215 {
216 dp->width_start = cp;
217 cp++;
218 dp->width_end = cp;
219 if (max_width_length < 1)
220 max_width_length = 1;
222 /* Test for positional argument. */
223 if (*cp >= '0' && *cp <= '9')
224 {
225 const CHAR_T *np;
227 for (np = cp; *np >= '0' && *np <= '9'; np++)
228 ;
229 if (*np == '$')
230 {
231 size_t n = 0;
233 for (np = cp; *np >= '0' && *np <= '9'; np++)
234 n = xsum (xtimes (n, 10), *np - '0');
235 if (n == 0)
236 /* Positional argument 0. */
237 goto error;
238 if (size_overflow_p (n))
239 /* n too large, would lead to out of memory later. */
240 goto error;
241 dp->width_arg_index = n - 1;
242 cp = np + 1;
243 }
244 }
245 if (dp->width_arg_index == ARG_NONE)
246 {
247 dp->width_arg_index = arg_posn++;
248 if (dp->width_arg_index == ARG_NONE)
249 /* arg_posn wrapped around. */
250 goto error;
251 }
252 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
253 }
254 else if (*cp >= '0' && *cp <= '9')
255 {
256 size_t width_length;
258 dp->width_start = cp;
259 for (; *cp >= '0' && *cp <= '9'; cp++)
260 ;
261 dp->width_end = cp;
262 width_length = dp->width_end - dp->width_start;
263 if (max_width_length < width_length)
264 max_width_length = width_length;
265 }
267 /* Parse the precision. */
268 if (*cp == '.')
269 {
270 cp++;
271 if (*cp == '*')
272 {
273 dp->precision_start = cp - 1;
274 cp++;
275 dp->precision_end = cp;
276 if (max_precision_length < 2)
277 max_precision_length = 2;
279 /* Test for positional argument. */
280 if (*cp >= '0' && *cp <= '9')
281 {
282 const CHAR_T *np;
284 for (np = cp; *np >= '0' && *np <= '9'; np++)
285 ;
286 if (*np == '$')
287 {
288 size_t n = 0;
290 for (np = cp; *np >= '0' && *np <= '9'; np++)
291 n = xsum (xtimes (n, 10), *np - '0');
292 if (n == 0)
293 /* Positional argument 0. */
294 goto error;
295 if (size_overflow_p (n))
296 /* n too large, would lead to out of memory
297 later. */
298 goto error;
299 dp->precision_arg_index = n - 1;
300 cp = np + 1;
301 }
302 }
303 if (dp->precision_arg_index == ARG_NONE)
304 {
305 dp->precision_arg_index = arg_posn++;
306 if (dp->precision_arg_index == ARG_NONE)
307 /* arg_posn wrapped around. */
308 goto error;
309 }
310 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
311 }
312 else
313 {
314 size_t precision_length;
316 dp->precision_start = cp - 1;
317 for (; *cp >= '0' && *cp <= '9'; cp++)
318 ;
319 dp->precision_end = cp;
320 precision_length = dp->precision_end - dp->precision_start;
321 if (max_precision_length < precision_length)
322 max_precision_length = precision_length;
323 }
324 }
326 {
327 arg_type type;
329 /* Parse argument type/size specifiers. */
330 {
331 int flags = 0;
333 for (;;)
334 {
335 if (*cp == 'h')
336 {
337 flags |= (1 << (flags & 1));
338 cp++;
339 }
340 else if (*cp == 'L')
341 {
342 flags |= 4;
343 cp++;
344 }
345 else if (*cp == 'l')
346 {
347 flags += 8;
348 cp++;
349 }
350 else if (*cp == 'j')
351 {
352 if (sizeof (intmax_t) > sizeof (long))
353 {
354 /* intmax_t = long long */
355 flags += 16;
356 }
357 else if (sizeof (intmax_t) > sizeof (int))
358 {
359 /* intmax_t = long */
360 flags += 8;
361 }
362 cp++;
363 }
364 else if (*cp == 'z' || *cp == 'Z')
365 {
366 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
367 because the warning facility in gcc-2.95.2 understands
368 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
369 if (sizeof (size_t) > sizeof (long))
370 {
371 /* size_t = long long */
372 flags += 16;
373 }
374 else if (sizeof (size_t) > sizeof (int))
375 {
376 /* size_t = long */
377 flags += 8;
378 }
379 cp++;
380 }
381 else if (*cp == 't')
382 {
383 if (sizeof (ptrdiff_t) > sizeof (long))
384 {
385 /* ptrdiff_t = long long */
386 flags += 16;
387 }
388 else if (sizeof (ptrdiff_t) > sizeof (int))
389 {
390 /* ptrdiff_t = long */
391 flags += 8;
392 }
393 cp++;
394 }
395 #if defined __APPLE__ && defined __MACH__
396 /* On MacOS X 10.3, PRIdMAX is defined as "qd".
397 We cannot change it to "lld" because PRIdMAX must also
398 be understood by the system's printf routines. */
399 else if (*cp == 'q')
400 {
401 if (64 / 8 > sizeof (long))
402 {
403 /* int64_t = long long */
404 flags += 16;
405 }
406 else
407 {
408 /* int64_t = long */
409 flags += 8;
410 }
411 cp++;
412 }
413 #endif
414 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
415 /* On native Win32, PRIdMAX is defined as "I64d".
416 We cannot change it to "lld" because PRIdMAX must also
417 be understood by the system's printf routines. */
418 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
419 {
420 if (64 / 8 > sizeof (long))
421 {
422 /* __int64 = long long */
423 flags += 16;
424 }
425 else
426 {
427 /* __int64 = long */
428 flags += 8;
429 }
430 cp += 3;
431 }
432 #endif
433 else
434 break;
435 }
437 /* Read the conversion character. */
438 c = *cp++;
439 switch (c)
440 {
441 case 'd': case 'i':
442 #if HAVE_LONG_LONG_INT
443 /* If 'long long' exists and is larger than 'long': */
444 if (flags >= 16 || (flags & 4))
445 type = TYPE_LONGLONGINT;
446 else
447 #endif
448 /* If 'long long' exists and is the same as 'long', we parse
449 "lld" into TYPE_LONGINT. */
450 if (flags >= 8)
451 type = TYPE_LONGINT;
452 else if (flags & 2)
453 type = TYPE_SCHAR;
454 else if (flags & 1)
455 type = TYPE_SHORT;
456 else
457 type = TYPE_INT;
458 break;
459 case 'o': case 'u': case 'x': case 'X':
460 #if HAVE_LONG_LONG_INT
461 /* If 'long long' exists and is larger than 'long': */
462 if (flags >= 16 || (flags & 4))
463 type = TYPE_ULONGLONGINT;
464 else
465 #endif
466 /* If 'unsigned long long' exists and is the same as
467 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
468 if (flags >= 8)
469 type = TYPE_ULONGINT;
470 else if (flags & 2)
471 type = TYPE_UCHAR;
472 else if (flags & 1)
473 type = TYPE_USHORT;
474 else
475 type = TYPE_UINT;
476 break;
477 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
478 case 'a': case 'A':
479 if (flags >= 16 || (flags & 4))
480 type = TYPE_LONGDOUBLE;
481 else
482 type = TYPE_DOUBLE;
483 break;
484 case 'c':
485 if (flags >= 8)
486 #if HAVE_WINT_T
487 type = TYPE_WIDE_CHAR;
488 #else
489 goto error;
490 #endif
491 else
492 type = TYPE_CHAR;
493 break;
494 #if HAVE_WINT_T
495 case 'C':
496 type = TYPE_WIDE_CHAR;
497 c = 'c';
498 break;
499 #endif
500 case 's':
501 if (flags >= 8)
502 #if HAVE_WCHAR_T
503 type = TYPE_WIDE_STRING;
504 #else
505 goto error;
506 #endif
507 else
508 type = TYPE_STRING;
509 break;
510 #if HAVE_WCHAR_T
511 case 'S':
512 type = TYPE_WIDE_STRING;
513 c = 's';
514 break;
515 #endif
516 case 'p':
517 type = TYPE_POINTER;
518 break;
519 case 'n':
520 #if HAVE_LONG_LONG_INT
521 /* If 'long long' exists and is larger than 'long': */
522 if (flags >= 16 || (flags & 4))
523 type = TYPE_COUNT_LONGLONGINT_POINTER;
524 else
525 #endif
526 /* If 'long long' exists and is the same as 'long', we parse
527 "lln" into TYPE_COUNT_LONGINT_POINTER. */
528 if (flags >= 8)
529 type = TYPE_COUNT_LONGINT_POINTER;
530 else if (flags & 2)
531 type = TYPE_COUNT_SCHAR_POINTER;
532 else if (flags & 1)
533 type = TYPE_COUNT_SHORT_POINTER;
534 else
535 type = TYPE_COUNT_INT_POINTER;
536 break;
537 #if ENABLE_UNISTDIO
538 /* The unistdio extensions. */
539 case 'U':
540 if (flags >= 16)
541 type = TYPE_U32_STRING;
542 else if (flags >= 8)
543 type = TYPE_U16_STRING;
544 else
545 type = TYPE_U8_STRING;
546 break;
547 #endif
548 case '%':
549 type = TYPE_NONE;
550 break;
551 default:
552 /* Unknown conversion character. */
553 goto error;
554 }
555 }
557 if (type != TYPE_NONE)
558 {
559 dp->arg_index = arg_index;
560 if (dp->arg_index == ARG_NONE)
561 {
562 dp->arg_index = arg_posn++;
563 if (dp->arg_index == ARG_NONE)
564 /* arg_posn wrapped around. */
565 goto error;
566 }
567 REGISTER_ARG (dp->arg_index, type);
568 }
569 dp->conversion = c;
570 dp->dir_end = cp;
571 }
573 d->count++;
574 if (d->count >= d_allocated)
575 {
576 size_t memory_size;
577 DIRECTIVE *memory;
579 d_allocated = xtimes (d_allocated, 2);
580 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
581 if (size_overflow_p (memory_size))
582 /* Overflow, would lead to out of memory. */
583 goto out_of_memory;
584 memory = (DIRECTIVE *) realloc (d->dir, memory_size);
585 if (memory == NULL)
586 /* Out of memory. */
587 goto out_of_memory;
588 d->dir = memory;
589 }
590 }
591 #if CHAR_T_ONLY_ASCII
592 else if (!c_isascii (c))
593 {
594 /* Non-ASCII character. Not supported. */
595 goto error;
596 }
597 #endif
598 }
599 d->dir[d->count].dir_start = cp;
601 d->max_width_length = max_width_length;
602 d->max_precision_length = max_precision_length;
603 return 0;
605 error:
606 if (a->arg)
607 free (a->arg);
608 if (d->dir)
609 free (d->dir);
610 errno = EINVAL;
611 return -1;
613 out_of_memory:
614 if (a->arg)
615 free (a->arg);
616 if (d->dir)
617 free (d->dir);
618 out_of_memory_1:
619 errno = ENOMEM;
620 return -1;
621 }
623 #undef PRINTF_PARSE
624 #undef DIRECTIVES
625 #undef DIRECTIVE
626 #undef CHAR_T_ONLY_ASCII
627 #undef CHAR_T