1 /* -*- Mode: C; indent-tabs-mode:nil; c-basic-offset: 8-*- */
3 /*
4 * This file is part of The Croco Library
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2.1 of the GNU Lesser General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 *
20 * Author: Dodji Seketeli
21 * See the COPYRIGHTS file for copyrights information.
22 */
24 /**
25 *@file
26 *The definition of the #CRTknzr (tokenizer)
27 *class.
28 */
30 #include "string.h"
31 #include "cr-tknzr.h"
32 #include "cr-doc-handler.h"
34 struct _CRTknzrPriv {
35 /**The parser input stream of bytes*/
36 CRInput *input;
38 /**
39 *A cache where tknzr_unget_token()
40 *puts back the token. tknzr_get_next_token()
41 *first look in this cache, and if and
42 *only if it's empty, fetches the next token
43 *from the input stream.
44 */
45 CRToken *token_cache;
47 /**
48 *The position of the end of the previous token
49 *or char fetched.
50 */
51 CRInputPos prev_pos;
53 CRDocHandler *sac_handler;
55 /**
56 *The reference count of the current instance
57 *of #CRTknzr. Is manipulated by cr_tknzr_ref()
58 *and cr_tknzr_unref().
59 */
60 glong ref_count;
61 };
63 #define PRIVATE(obj) ((obj)->priv)
65 /**
66 *return TRUE if the character is a number ([0-9]), FALSE otherwise
67 *@param a_char the char to test.
68 */
69 #define IS_NUM(a_char) (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
71 /**
72 *Checks if 'status' equals CR_OK. If not, goto the 'error' label.
73 *
74 *@param status the status (of type enum CRStatus) to test.
75 *@param is_exception if set to FALSE, the final status returned the
76 *current function will be CR_PARSING_ERROR. If set to TRUE, the
77 *current status will be the current value of the 'status' variable.
78 *
79 */
80 #define CHECK_PARSING_STATUS(status, is_exception) \
81 if ((status) != CR_OK) \
82 { \
83 if (is_exception == FALSE) \
84 { \
85 status = CR_PARSING_ERROR ; \
86 } \
87 goto error ; \
88 }
90 /**
91 *Peeks the next char from the input stream of the current tokenizer.
92 *invokes CHECK_PARSING_STATUS on the status returned by
93 *cr_tknzr_input_peek_char().
94 *
95 *@param the current instance of #CRTkzr.
96 *@param to_char a pointer to the char where to store the
97 *char peeked.
98 */
99 #define PEEK_NEXT_CHAR(a_tknzr, a_to_char) \
100 {\
101 status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \
102 CHECK_PARSING_STATUS (status, TRUE) \
103 }
105 /**
106 *Reads the next char from the input stream of the current parser.
107 *In case of error, jumps to the "error:" label located in the
108 *function where this macro is called.
109 *@param parser the curent instance of #CRTknzr
110 *@param to_char a pointer to the guint32 char where to store
111 *the character read.
112 */
113 #define READ_NEXT_CHAR(a_tknzr, to_char) \
114 status = cr_tknzr_read_char (a_tknzr, to_char) ;\
115 CHECK_PARSING_STATUS (status, TRUE)
117 /**
118 *Gets information about the current position in
119 *the input of the parser.
120 *In case of failure, this macro returns from the
121 *calling function and
122 *returns a status code of type enum #CRStatus.
123 *@param parser the current instance of #CRTknzr.
124 *@param pos out parameter. A pointer to the position
125 *inside the current parser input. Must
126 */
127 #define RECORD_INITIAL_POS(a_tknzr, a_pos) \
128 status = cr_input_get_cur_pos (PRIVATE \
129 (a_tknzr)->input, a_pos) ; \
130 g_return_val_if_fail (status == CR_OK, status)
132 /**
133 *Gets the address of the current byte inside the
134 *parser input.
135 *@param parser the current instance of #CRTknzr.
136 *@param addr out parameter a pointer (guchar*)
137 *to where the address must be put.
138 */
139 #define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr) \
140 status = cr_input_get_cur_byte_addr \
141 (PRIVATE (a_tknzr)->input, a_addr) ; \
142 CHECK_PARSING_STATUS (status, TRUE)
144 /**
145 *Peeks a byte from the topmost parser input at
146 *a given offset from the current position.
147 *If it fails, goto the "error:" label.
148 *
149 *@param a_parser the current instance of #CRTknzr.
150 *@param a_offset the offset of the byte to peek, the
151 *current byte having the offset '0'.
152 *@param a_byte_ptr out parameter a pointer (guchar*) to
153 *where the peeked char is to be stored.
154 */
155 #define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr) \
156 status = cr_tknzr_peek_byte (a_tknzr, \
157 a_offset, \
158 a_byte_ptr) ; \
159 CHECK_PARSING_STATUS (status, TRUE) ;
161 #define BYTE(a_input, a_n, a_eof) \
162 cr_input_peek_byte2 (a_input, a_n, a_eof)
164 /**
165 *Reads a byte from the topmost parser input
166 *steam.
167 *If it fails, goto the "error" label.
168 *@param a_parser the current instance of #CRTknzr.
169 *@param a_byte_ptr the guchar * where to put the read char.
170 */
171 #define READ_NEXT_BYTE(a_tknzr, a_byte_ptr) \
172 status = \
173 cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\
174 CHECK_PARSING_STATUS (status, TRUE) ;
176 /**
177 *Skips a given number of byte in the topmost
178 *parser input. Don't update line and column number.
179 *In case of error, jumps to the "error:" label
180 *of the surrounding function.
181 *@param a_parser the current instance of #CRTknzr.
182 *@param a_nb_bytes the number of bytes to skip.
183 */
184 #define SKIP_BYTES(a_tknzr, a_nb_bytes) \
185 status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \
186 CR_SEEK_CUR, a_nb_bytes) ; \
187 CHECK_PARSING_STATUS (status, TRUE) ;
189 /**
190 *Skip utf8 encoded characters.
191 *Updates line and column numbers.
192 *@param a_parser the current instance of #CRTknzr.
193 *@param a_nb_chars the number of chars to skip. Must be of
194 *type glong.
195 */
196 #define SKIP_CHARS(a_tknzr, a_nb_chars) \
197 { \
198 gulong nb_chars = a_nb_chars ; \
199 status = cr_input_consume_chars \
200 (PRIVATE (a_tknzr)->input,0, &nb_chars) ; \
201 CHECK_PARSING_STATUS (status, TRUE) ; \
202 }
204 /**
205 *Tests the condition and if it is false, sets
206 *status to "CR_PARSING_ERROR" and goto the 'error'
207 *label.
208 *@param condition the condition to test.
209 */
210 #define ENSURE_PARSING_COND(condition) \
211 if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
213 static enum CRStatus cr_tknzr_parse_nl (CRTknzr * a_this,
214 guchar ** a_start,
215 guchar ** a_end,
216 CRParsingLocation *a_location);
218 static enum CRStatus cr_tknzr_parse_w (CRTknzr * a_this,
219 guchar ** a_start,
220 guchar ** a_end,
221 CRParsingLocation *a_location) ;
223 static enum CRStatus cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
224 guint32 * a_unicode,
225 CRParsingLocation *a_location) ;
227 static enum CRStatus cr_tknzr_parse_escape (CRTknzr * a_this,
228 guint32 * a_esc_code,
229 CRParsingLocation *a_location);
231 static enum CRStatus cr_tknzr_parse_string (CRTknzr * a_this,
232 CRString ** a_str);
234 static enum CRStatus cr_tknzr_parse_comment (CRTknzr * a_this,
235 CRString ** a_comment);
237 static enum CRStatus cr_tknzr_parse_nmstart (CRTknzr * a_this,
238 guint32 * a_char,
239 CRParsingLocation *a_location);
241 static enum CRStatus cr_tknzr_parse_num (CRTknzr * a_this,
242 CRNum ** a_num);
244 /**********************************
245 *PRIVATE methods
246 **********************************/
248 /**
249 *Parses a "w" as defined by the css spec at [4.1.1]:
250 * w ::= [ \t\r\n\f]*
251 *
252 *@param a_this the current instance of #CRTknzr.
253 *@param a_start out param. Upon successfull completion, points
254 *to the beginning of the parsed white space, points to NULL otherwise.
255 *Can also point to NULL is there is no white space actually.
256 *@param a_end out param. Upon successfull completion, points
257 *to the end of the parsed white space, points to NULL otherwise.
258 *Can also point to NULL is there is no white space actually.
259 */
260 static enum CRStatus
261 cr_tknzr_parse_w (CRTknzr * a_this,
262 guchar ** a_start,
263 guchar ** a_end,
264 CRParsingLocation *a_location)
265 {
266 guint32 cur_char = 0;
267 CRInputPos init_pos;
268 enum CRStatus status = CR_OK;
270 g_return_val_if_fail (a_this && PRIVATE (a_this)
271 && PRIVATE (a_this)->input
272 && a_start && a_end,
273 CR_BAD_PARAM_ERROR);
275 RECORD_INITIAL_POS (a_this, &init_pos);
277 *a_start = NULL;
278 *a_end = NULL;
280 READ_NEXT_CHAR (a_this, &cur_char);
282 if (cr_utils_is_white_space (cur_char) == FALSE) {
283 status = CR_PARSING_ERROR;
284 goto error;
285 }
286 if (a_location) {
287 cr_tknzr_get_parsing_location (a_this,
288 a_location) ;
289 }
290 RECORD_CUR_BYTE_ADDR (a_this, a_start);
291 *a_end = *a_start;
293 for (;;) {
294 gboolean is_eof = FALSE;
296 cr_input_get_end_of_file (PRIVATE (a_this)->input, &is_eof);
297 if (is_eof)
298 break;
300 status = cr_tknzr_peek_char (a_this, &cur_char);
301 if (status == CR_END_OF_INPUT_ERROR) {
302 status = CR_OK;
303 break;
304 } else if (status != CR_OK) {
305 goto error;
306 }
308 if (cr_utils_is_white_space (cur_char) == TRUE) {
309 READ_NEXT_CHAR (a_this, &cur_char);
310 RECORD_CUR_BYTE_ADDR (a_this, a_end);
311 } else {
312 break;
313 }
314 }
316 return CR_OK;
318 error:
319 cr_tknzr_set_cur_pos (a_this, &init_pos);
321 return status;
322 }
324 /**
325 *Parses a newline as defined in the css2 spec:
326 * nl ::= \n|\r\n|\r|\f
327 *
328 *@param a_this the "this pointer" of the current instance of #CRTknzr.
329 *@param a_start a pointer to the first character of the successfully
330 *parsed string.
331 *@param a_end a pointer to the last character of the successfully parsed
332 *string.
333 *@result CR_OK uppon successfull completion, an error code otherwise.
334 */
335 static enum CRStatus
336 cr_tknzr_parse_nl (CRTknzr * a_this,
337 guchar ** a_start,
338 guchar ** a_end,
339 CRParsingLocation *a_location)
340 {
341 CRInputPos init_pos;
342 guchar next_chars[2] = { 0 };
343 enum CRStatus status = CR_PARSING_ERROR;
345 g_return_val_if_fail (a_this && PRIVATE (a_this)
346 && a_start && a_end, CR_BAD_PARAM_ERROR);
348 RECORD_INITIAL_POS (a_this, &init_pos);
350 PEEK_BYTE (a_this, 1, &next_chars[0]);
351 PEEK_BYTE (a_this, 2, &next_chars[1]);
353 if ((next_chars[0] == '\r' && next_chars[1] == '\n')) {
354 SKIP_BYTES (a_this, 1);
355 if (a_location) {
356 cr_tknzr_get_parsing_location
357 (a_this, a_location) ;
358 }
359 SKIP_CHARS (a_this, 1);
361 RECORD_CUR_BYTE_ADDR (a_this, a_end);
363 status = CR_OK;
364 } else if (next_chars[0] == '\n'
365 || next_chars[0] == '\r' || next_chars[0] == '\f') {
366 SKIP_CHARS (a_this, 1);
367 if (a_location) {
368 cr_tknzr_get_parsing_location
369 (a_this, a_location) ;
370 }
371 RECORD_CUR_BYTE_ADDR (a_this, a_start);
372 *a_end = *a_start;
373 status = CR_OK;
374 } else {
375 status = CR_PARSING_ERROR;
376 goto error;
377 }
378 return CR_OK ;
380 error:
381 cr_tknzr_set_cur_pos (a_this, &init_pos) ;
382 return status;
383 }
385 /**
386 *Go ahead in the parser input, skipping all the spaces.
387 *If the next char if not a white space, this function does nothing.
388 *In any cases, it stops when it encounters a non white space character.
389 *
390 *@param a_this the current instance of #CRTknzr.
391 *@return CR_OK upon successfull completion, an error code otherwise.
392 */
393 static enum CRStatus
394 cr_tknzr_try_to_skip_spaces (CRTknzr * a_this)
395 {
396 enum CRStatus status = CR_ERROR;
397 guint32 cur_char = 0;
399 g_return_val_if_fail (a_this && PRIVATE (a_this)
400 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
402 status = cr_input_peek_char (PRIVATE (a_this)->input, &cur_char);
404 if (status != CR_OK) {
405 if (status == CR_END_OF_INPUT_ERROR)
406 return CR_OK;
407 return status;
408 }
410 if (cr_utils_is_white_space (cur_char) == TRUE) {
411 gulong nb_chars = -1; /*consume all spaces */
413 status = cr_input_consume_white_spaces
414 (PRIVATE (a_this)->input, &nb_chars);
415 }
417 return status;
418 }
420 /**
421 *Parses a "comment" as defined in the css spec at [4.1.1]:
422 *COMMENT ::= \/\*[^*]*\*+([^/][^*]*\*+)*\/ .
423 *This complex regexp is just to say that comments start
424 *with the two chars '/''*' and ends with the two chars '*''/'.
425 *It also means that comments cannot be nested.
426 *So based on that, I've just tried to implement the parsing function
427 *simply and in a straight forward manner.
428 */
429 static enum CRStatus
430 cr_tknzr_parse_comment (CRTknzr * a_this,
431 CRString ** a_comment)
432 {
433 enum CRStatus status = CR_OK;
434 CRInputPos init_pos;
435 guint32 cur_char = 0, next_char= 0;
436 CRString *comment = NULL;
437 CRParsingLocation loc = {0,0,0} ;
439 g_return_val_if_fail (a_this && PRIVATE (a_this)
440 && PRIVATE (a_this)->input,
441 CR_BAD_PARAM_ERROR);
443 RECORD_INITIAL_POS (a_this, &init_pos);
444 READ_NEXT_CHAR (a_this, &cur_char) ;
445 ENSURE_PARSING_COND (cur_char == '/');
446 cr_tknzr_get_parsing_location (a_this, &loc) ;
448 READ_NEXT_CHAR (a_this, &cur_char);
449 ENSURE_PARSING_COND (cur_char == '*');
450 comment = cr_string_new ();
451 for (;;) {
452 READ_NEXT_CHAR (a_this, &cur_char);
454 /*make sure there are no nested comments */
455 if (cur_char == '/') {
456 READ_NEXT_CHAR (a_this, &cur_char);
457 ENSURE_PARSING_COND (cur_char != '*');
458 g_string_append_c (comment->stryng, '/');
459 g_string_append_unichar (comment->stryng,
460 cur_char);
461 continue;
462 }
464 /*Detect the end of the comments region */
465 if (cur_char == '*') {
466 PEEK_NEXT_CHAR (a_this, &next_char);
468 if (next_char == '/') {
469 /*
470 *end of comments region
471 *Now, call the right SAC callback.
472 */
473 SKIP_CHARS (a_this, 1) ;
474 status = CR_OK;
475 break;
476 } else {
477 g_string_append_c (comment->stryng,
478 '*');
479 }
480 }
481 g_string_append_unichar (comment->stryng, cur_char);
482 }
484 if (status == CR_OK) {
485 cr_parsing_location_copy (&comment->location,
486 &loc) ;
487 *a_comment = comment;
488 return CR_OK;
489 }
490 error:
492 if (comment) {
493 cr_string_destroy (comment);
494 comment = NULL;
495 }
497 cr_tknzr_set_cur_pos (a_this, &init_pos);
499 return status;
500 }
502 /**
503 *Parses an 'unicode' escape sequence defined
504 *in css spec at chap 4.1.1:
505 *unicode ::= \\[0-9a-f]{1,6}[ \n\r\t\f]?
506 *@param a_this the current instance of #CRTknzr.
507 *@param a_start out parameter. A pointer to the start
508 *of the unicode escape sequence. Must *NOT* be deleted by
509 *the caller.
510 *@param a_end out parameter. A pointer to the last character
511 *of the unicode escape sequence. Must *NOT* be deleted by the caller.
512 *@return CR_OK if parsing succeded, an error code otherwise.
513 *Error code can be either CR_PARSING_ERROR if the string
514 *parsed just doesn't
515 *respect the production or another error if a
516 *lower level error occured.
517 */
518 static enum CRStatus
519 cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
520 guint32 * a_unicode,
521 CRParsingLocation *a_location)
522 {
523 guint32 cur_char;
524 CRInputPos init_pos;
525 glong occur = 0;
526 guint32 unicode = 0;
527 guchar *tmp_char_ptr1 = NULL,
528 *tmp_char_ptr2 = NULL;
529 enum CRStatus status = CR_OK;
531 g_return_val_if_fail (a_this && PRIVATE (a_this)
532 && a_unicode, CR_BAD_PARAM_ERROR);
534 /*first, let's backup the current position pointer */
535 RECORD_INITIAL_POS (a_this, &init_pos);
537 READ_NEXT_CHAR (a_this, &cur_char);
539 if (cur_char != '\\') {
540 status = CR_PARSING_ERROR;
541 goto error;
542 }
543 if (a_location) {
544 cr_tknzr_get_parsing_location
545 (a_this, a_location) ;
546 }
547 PEEK_NEXT_CHAR (a_this, &cur_char);
549 for (occur = 0, unicode = 0; ((cur_char >= '0' && cur_char <= '9')
550 || (cur_char >= 'a' && cur_char <= 'f')
551 || (cur_char >= 'A' && cur_char <= 'F'))
552 && occur < 6; occur++) {
553 gint cur_char_val = 0;
555 READ_NEXT_CHAR (a_this, &cur_char);
557 if ((cur_char >= '0' && cur_char <= '9')) {
558 cur_char_val = (cur_char - '0');
559 } else if ((cur_char >= 'a' && cur_char <= 'f')) {
560 cur_char_val = 10 + (cur_char - 'a');
561 } else if ((cur_char >= 'A' && cur_char <= 'F')) {
562 cur_char_val = 10 + (cur_char - 'A');
563 }
565 unicode = unicode * 10 + cur_char_val;
567 PEEK_NEXT_CHAR (a_this, &cur_char);
568 }
570 if (occur == 5) {
571 /*
572 *the unicode escape is 6 digit length
573 */
575 /*
576 *parse one space that may
577 *appear just after the unicode
578 *escape.
579 */
580 cr_tknzr_parse_w (a_this, &tmp_char_ptr1,
581 &tmp_char_ptr2, NULL);
582 status = CR_OK;
583 } else {
584 /*
585 *The unicode escape is less than
586 *6 digit length. The character
587 *that comes right after the escape
588 *must be a white space.
589 */
590 status = cr_tknzr_parse_w (a_this, &tmp_char_ptr1,
591 &tmp_char_ptr2, NULL);
592 }
594 if (status == CR_OK) {
595 *a_unicode = unicode;
596 return CR_OK;
597 }
599 error:
600 /*
601 *restore the initial position pointer backuped at
602 *the beginning of this function.
603 */
604 cr_tknzr_set_cur_pos (a_this, &init_pos);
606 return status;
607 }
609 /**
610 *parses an escape sequence as defined by the css spec:
611 *escape ::= {unicode}|\\[ -~\200-\4177777]
612 *@param a_this the current instance of #CRTknzr .
613 */
614 static enum CRStatus
615 cr_tknzr_parse_escape (CRTknzr * a_this, guint32 * a_esc_code,
616 CRParsingLocation *a_location)
617 {
618 enum CRStatus status = CR_OK;
619 guint32 cur_char = 0;
620 CRInputPos init_pos;
621 guchar next_chars[2];
623 g_return_val_if_fail (a_this && PRIVATE (a_this)
624 && a_esc_code, CR_BAD_PARAM_ERROR);
626 RECORD_INITIAL_POS (a_this, &init_pos);
628 PEEK_BYTE (a_this, 1, &next_chars[0]);
629 PEEK_BYTE (a_this, 2, &next_chars[1]);
631 if (next_chars[0] != '\\') {
632 status = CR_PARSING_ERROR;
633 goto error;
634 }
636 if ((next_chars[1] >= '0' && next_chars[1] <= '9')
637 || (next_chars[1] >= 'a' && next_chars[1] <= 'f')
638 || (next_chars[1] >= 'A' && next_chars[1] <= 'F')) {
639 status = cr_tknzr_parse_unicode_escape (a_this, a_esc_code,
640 a_location);
641 } else {
642 /*consume the '\' char */
643 READ_NEXT_CHAR (a_this, &cur_char);
644 if (a_location) {
645 cr_tknzr_get_parsing_location (a_this,
646 a_location) ;
647 }
648 /*then read the char after the '\' */
649 READ_NEXT_CHAR (a_this, &cur_char);
651 if (cur_char != ' ' && (cur_char < 200 || cur_char > 4177777)) {
652 status = CR_PARSING_ERROR;
653 goto error;
654 }
655 *a_esc_code = cur_char;
657 }
658 if (status == CR_OK) {
659 return CR_OK;
660 }
661 error:
662 cr_tknzr_set_cur_pos (a_this, &init_pos);
663 return status;
664 }
666 /**
667 *Parses a string type as defined in css spec [4.1.1]:
668 *
669 *string ::= {string1}|{string2}
670 *string1 ::= \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
671 *string2 ::= \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
672 *
673 *@param a_this the current instance of #CRTknzr.
674 *@param a_start out parameter. Upon successfull completion,
675 *points to the beginning of the string, points to an undefined value
676 *otherwise.
677 *@param a_end out parameter. Upon successfull completion, points to
678 *the beginning of the string, points to an undefined value otherwise.
679 *@return CR_OK upon successfull completion, an error code otherwise.
680 */
681 static enum CRStatus
682 cr_tknzr_parse_string (CRTknzr * a_this, CRString ** a_str)
683 {
684 guint32 cur_char = 0,
685 delim = 0;
686 CRInputPos init_pos;
687 enum CRStatus status = CR_OK;
688 CRString *str = NULL;
690 g_return_val_if_fail (a_this && PRIVATE (a_this)
691 && PRIVATE (a_this)->input
692 && a_str, CR_BAD_PARAM_ERROR);
694 RECORD_INITIAL_POS (a_this, &init_pos);
695 READ_NEXT_CHAR (a_this, &cur_char);
697 if (cur_char == '"')
698 delim = '"';
699 else if (cur_char == '\'')
700 delim = '\'';
701 else {
702 status = CR_PARSING_ERROR;
703 goto error;
704 }
705 str = cr_string_new ();
706 if (str) {
707 cr_tknzr_get_parsing_location
708 (a_this, &str->location) ;
709 }
710 for (;;) {
711 guchar next_chars[2] = { 0 };
713 PEEK_BYTE (a_this, 1, &next_chars[0]);
715 if (next_chars[0] == '\\') {
716 guchar *tmp_char_ptr1 = NULL,
717 *tmp_char_ptr2 = NULL;
718 guint32 esc_code = 0;
720 PEEK_BYTE (a_this, 2, &next_chars[1]);
722 if (next_chars[1] == '\'' || next_chars[1] == '"') {
723 g_string_append_unichar (str->stryng,
724 next_chars[1]);
725 SKIP_BYTES (a_this, 2);
726 status = CR_OK;
727 } else {
728 status = cr_tknzr_parse_escape
729 (a_this, &esc_code, NULL);
731 if (status == CR_OK) {
732 g_string_append_unichar
733 (str->stryng,
734 esc_code);
735 }
736 }
738 if (status != CR_OK) {
739 /*
740 *consume the '\' char, and try to parse
741 *a newline.
742 */
743 READ_NEXT_CHAR (a_this, &cur_char);
745 status = cr_tknzr_parse_nl
746 (a_this, &tmp_char_ptr1,
747 &tmp_char_ptr2, NULL);
748 }
750 CHECK_PARSING_STATUS (status, FALSE);
751 } else if (strchr ("\t !#$%&", next_chars[0])
752 || (next_chars[0] >= '(' && next_chars[0] <= '~')) {
753 READ_NEXT_CHAR (a_this, &cur_char);
754 g_string_append_unichar (str->stryng,
755 cur_char);
756 status = CR_OK;
757 }
759 else if (cr_utils_is_nonascii (next_chars[0])) {
760 READ_NEXT_CHAR (a_this, &cur_char);
761 g_string_append_unichar (str->stryng, cur_char);
762 } else if (next_chars[0] == delim) {
763 READ_NEXT_CHAR (a_this, &cur_char);
764 break;
765 } else {
766 status = CR_PARSING_ERROR;
767 goto error;
768 }
769 }
771 if (status == CR_OK) {
772 if (*a_str == NULL) {
773 *a_str = str;
774 str = NULL;
775 } else {
776 (*a_str)->stryng = g_string_append_len
777 ((*a_str)->stryng,
778 str->stryng->str,
779 str->stryng->len);
780 cr_string_destroy (str);
781 }
782 return CR_OK;
783 }
785 error:
787 if (str) {
788 cr_string_destroy (str) ;
789 str = NULL;
790 }
791 cr_tknzr_set_cur_pos (a_this, &init_pos);
792 return status;
793 }
795 /**
796 *Parses the an nmstart as defined by the css2 spec [4.1.1]:
797 * nmstart [a-zA-Z]|{nonascii}|{escape}
798 *
799 *@param a_this the current instance of #CRTknzr.
800 *@param a_start out param. A pointer to the starting point of
801 *the token.
802 *@param a_end out param. A pointer to the ending point of the
803 *token.
804 *@param a_char out param. The actual parsed nmchar.
805 *@return CR_OK upon successfull completion,
806 *an error code otherwise.
807 */
808 static enum CRStatus
809 cr_tknzr_parse_nmstart (CRTknzr * a_this,
810 guint32 * a_char,
811 CRParsingLocation *a_location)
812 {
813 CRInputPos init_pos;
814 enum CRStatus status = CR_OK;
815 guint32 cur_char = 0,
816 next_char = 0;
818 g_return_val_if_fail (a_this && PRIVATE (a_this)
819 && PRIVATE (a_this)->input
820 && a_char, CR_BAD_PARAM_ERROR);
822 RECORD_INITIAL_POS (a_this, &init_pos);
824 PEEK_NEXT_CHAR (a_this, &next_char);
826 if (next_char == '\\') {
827 status = cr_tknzr_parse_escape (a_this, a_char,
828 a_location);
830 if (status != CR_OK)
831 goto error;
833 } else if (cr_utils_is_nonascii (next_char) == TRUE
834 || ((next_char >= 'a') && (next_char <= 'z'))
835 || ((next_char >= 'A') && (next_char <= 'Z'))
836 ) {
837 READ_NEXT_CHAR (a_this, &cur_char);
838 if (a_location) {
839 cr_tknzr_get_parsing_location (a_this,
840 a_location) ;
841 }
842 *a_char = cur_char;
843 status = CR_OK;
844 } else {
845 status = CR_PARSING_ERROR;
846 goto error;
847 }
849 return CR_OK;
851 error:
852 cr_tknzr_set_cur_pos (a_this, &init_pos);
854 return status;
856 }
858 /**
859 *Parses an nmchar as described in the css spec at
860 *chap 4.1.1:
861 *nmchar ::= [a-z0-9-]|{nonascii}|{escape}
862 *
863 *Humm, I have added the possibility for nmchar to
864 *contain upper case letters.
865 *
866 *@param a_this the current instance of #CRTknzr.
867 *@param a_start out param. A pointer to the starting point of
868 *the token.
869 *@param a_end out param. A pointer to the ending point of the
870 *token.
871 *@param a_char out param. The actual parsed nmchar.
872 *@return CR_OK upon successfull completion,
873 *an error code otherwise.
874 */
875 static enum CRStatus
876 cr_tknzr_parse_nmchar (CRTknzr * a_this, guint32 * a_char,
877 CRParsingLocation *a_location)
878 {
879 guint32 cur_char = 0,
880 next_char = 0;
881 enum CRStatus status = CR_OK;
882 CRInputPos init_pos;
884 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
885 CR_BAD_PARAM_ERROR);
887 RECORD_INITIAL_POS (a_this, &init_pos);
889 status = cr_input_peek_char (PRIVATE (a_this)->input,
890 &next_char) ;
891 if (status != CR_OK)
892 goto error;
894 if (next_char == '\\') {
895 status = cr_tknzr_parse_escape (a_this, a_char,
896 a_location);
898 if (status != CR_OK)
899 goto error;
901 } else if (cr_utils_is_nonascii (next_char) == TRUE
902 || ((next_char >= 'a') && (next_char <= 'z'))
903 || ((next_char >= 'A') && (next_char <= 'Z'))
904 || ((next_char >= '0') && (next_char <= '9'))
905 || (next_char == '-')
906 || (next_char == '_') /*'_' not allowed by the spec. */
907 ) {
908 READ_NEXT_CHAR (a_this, &cur_char);
909 *a_char = cur_char;
910 status = CR_OK;
911 if (a_location) {
912 cr_tknzr_get_parsing_location
913 (a_this, a_location) ;
914 }
915 } else {
916 status = CR_PARSING_ERROR;
917 goto error;
918 }
919 return CR_OK;
921 error:
922 cr_tknzr_set_cur_pos (a_this, &init_pos);
923 return status;
924 }
926 /**
927 *Parses an "ident" as defined in css spec [4.1.1]:
928 *ident ::= {nmstart}{nmchar}*
929 *
930 *Actually parses it using the css3 grammar:
931 *ident ::= -?{nmstart}{nmchar}*
932 *@param a_this the currens instance of #CRTknzr.
933 *
934 *@param a_str a pointer to parsed ident. If *a_str is NULL,
935 *this function allocates a new instance of CRString. If not,
936 *the function just appends the parsed string to the one passed.
937 *In both cases it is up to the caller to free *a_str.
938 *
939 *@return CR_OK upon successfull completion, an error code
940 *otherwise.
941 */
942 static enum CRStatus
943 cr_tknzr_parse_ident (CRTknzr * a_this, CRString ** a_str)
944 {
945 guint32 tmp_char = 0;
946 CRString *stringue = NULL ;
947 CRInputPos init_pos;
948 enum CRStatus status = CR_OK;
949 gboolean location_is_set = FALSE ;
951 g_return_val_if_fail (a_this && PRIVATE (a_this)
952 && PRIVATE (a_this)->input
953 && a_str, CR_BAD_PARAM_ERROR);
955 RECORD_INITIAL_POS (a_this, &init_pos);
956 PEEK_NEXT_CHAR (a_this, &tmp_char) ;
957 stringue = cr_string_new () ;
958 g_return_val_if_fail (stringue,
959 CR_OUT_OF_MEMORY_ERROR) ;
961 if (tmp_char == '-') {
962 READ_NEXT_CHAR (a_this, &tmp_char) ;
963 cr_tknzr_get_parsing_location
964 (a_this, &stringue->location) ;
965 location_is_set = TRUE ;
966 g_string_append_unichar (stringue->stryng,
967 tmp_char) ;
968 }
969 status = cr_tknzr_parse_nmstart (a_this, &tmp_char, NULL);
970 if (status != CR_OK) {
971 status = CR_PARSING_ERROR;
972 goto end ;
973 }
974 if (location_is_set == FALSE) {
975 cr_tknzr_get_parsing_location
976 (a_this, &stringue->location) ;
977 location_is_set = TRUE ;
978 }
979 g_string_append_unichar (stringue->stryng, tmp_char);
980 for (;;) {
981 status = cr_tknzr_parse_nmchar (a_this,
982 &tmp_char,
983 NULL);
984 if (status != CR_OK) {
985 status = CR_OK ;
986 break;
987 }
988 g_string_append_unichar (stringue->stryng, tmp_char);
989 }
990 if (status == CR_OK) {
991 if (!*a_str) {
992 *a_str = stringue ;
994 } else {
995 g_string_append_len ((*a_str)->stryng,
996 stringue->stryng->str,
997 stringue->stryng->len) ;
998 cr_string_destroy (stringue) ;
999 }
1000 stringue = NULL ;
1001 }
1003 error:
1004 end:
1005 if (stringue) {
1006 cr_string_destroy (stringue) ;
1007 stringue = NULL ;
1008 }
1009 if (status != CR_OK ) {
1010 cr_tknzr_set_cur_pos (a_this, &init_pos) ;
1011 }
1012 return status ;
1013 }
1016 /**
1017 *Parses a "name" as defined by css spec [4.1.1]:
1018 *name ::= {nmchar}+
1019 *
1020 *@param a_this the current instance of #CRTknzr.
1021 *
1022 *@param a_str out parameter. A pointer to the successfully parsed
1023 *name. If *a_str is set to NULL, this function allocates a new instance
1024 *of CRString. If not, it just appends the parsed name to the passed *a_str.
1025 *In both cases, it is up to the caller to free *a_str.
1026 *
1027 *@return CR_OK upon successfull completion, an error code otherwise.
1028 */
1029 static enum CRStatus
1030 cr_tknzr_parse_name (CRTknzr * a_this,
1031 CRString ** a_str)
1032 {
1033 guint32 tmp_char = 0;
1034 CRInputPos init_pos;
1035 enum CRStatus status = CR_OK;
1036 gboolean str_needs_free = FALSE,
1037 is_first_nmchar=TRUE ;
1038 glong i = 0;
1039 CRParsingLocation loc = {0,0,0} ;
1041 g_return_val_if_fail (a_this && PRIVATE (a_this)
1042 && PRIVATE (a_this)->input
1043 && a_str,
1044 CR_BAD_PARAM_ERROR) ;
1046 RECORD_INITIAL_POS (a_this, &init_pos);
1048 if (*a_str == NULL) {
1049 *a_str = cr_string_new ();
1050 str_needs_free = TRUE;
1051 }
1052 for (i = 0;; i++) {
1053 if (is_first_nmchar == TRUE) {
1054 status = cr_tknzr_parse_nmchar
1055 (a_this, &tmp_char,
1056 &loc) ;
1057 is_first_nmchar = FALSE ;
1058 } else {
1059 status = cr_tknzr_parse_nmchar
1060 (a_this, &tmp_char, NULL) ;
1061 }
1062 if (status != CR_OK)
1063 break;
1064 g_string_append_unichar ((*a_str)->stryng,
1065 tmp_char);
1066 }
1067 if (i > 0) {
1068 cr_parsing_location_copy
1069 (&(*a_str)->location, &loc) ;
1070 return CR_OK;
1071 }
1072 if (str_needs_free == TRUE && *a_str) {
1073 cr_string_destroy (*a_str);
1074 *a_str = NULL;
1075 }
1076 cr_tknzr_set_cur_pos (a_this, &init_pos);
1077 return CR_PARSING_ERROR;
1078 }
1080 /**
1081 *Parses a "hash" as defined by the css spec in [4.1.1]:
1082 *HASH ::= #{name}
1083 */
1084 static enum CRStatus
1085 cr_tknzr_parse_hash (CRTknzr * a_this, CRString ** a_str)
1086 {
1087 guint32 cur_char = 0;
1088 CRInputPos init_pos;
1089 enum CRStatus status = CR_OK;
1090 gboolean str_needs_free = FALSE;
1091 CRParsingLocation loc = {0,0,0} ;
1093 g_return_val_if_fail (a_this && PRIVATE (a_this)
1094 && PRIVATE (a_this)->input,
1095 CR_BAD_PARAM_ERROR);
1097 RECORD_INITIAL_POS (a_this, &init_pos);
1098 READ_NEXT_CHAR (a_this, &cur_char);
1099 if (cur_char != '#') {
1100 status = CR_PARSING_ERROR;
1101 goto error;
1102 }
1103 if (*a_str == NULL) {
1104 *a_str = cr_string_new ();
1105 str_needs_free = TRUE;
1106 }
1107 cr_tknzr_get_parsing_location (a_this,
1108 &loc) ;
1109 status = cr_tknzr_parse_name (a_this, a_str);
1110 cr_parsing_location_copy (&(*a_str)->location, &loc) ;
1111 if (status != CR_OK) {
1112 goto error;
1113 }
1114 return CR_OK;
1116 error:
1117 if (str_needs_free == TRUE && *a_str) {
1118 cr_string_destroy (*a_str);
1119 *a_str = NULL;
1120 }
1122 cr_tknzr_set_cur_pos (a_this, &init_pos);
1123 return status;
1124 }
1126 /**
1127 *Parses an uri as defined by the css spec [4.1.1]:
1128 * URI ::= url\({w}{string}{w}\)
1129 * |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
1130 *
1131 *@param a_this the current instance of #CRTknzr.
1132 *@param a_str the successfully parsed url.
1133 *@return CR_OK upon successfull completion, an error code otherwise.
1134 */
1135 static enum CRStatus
1136 cr_tknzr_parse_uri (CRTknzr * a_this,
1137 CRString ** a_str)
1138 {
1139 guint32 cur_char = 0;
1140 CRInputPos init_pos;
1141 enum CRStatus status = CR_PARSING_ERROR;
1142 guchar tab[4] = { 0 }, *tmp_ptr1 = NULL, *tmp_ptr2 = NULL;
1143 CRString *str = NULL;
1144 CRParsingLocation location = {0,0,0} ;
1146 g_return_val_if_fail (a_this
1147 && PRIVATE (a_this)
1148 && PRIVATE (a_this)->input
1149 && a_str,
1150 CR_BAD_PARAM_ERROR);
1152 RECORD_INITIAL_POS (a_this, &init_pos);
1154 PEEK_BYTE (a_this, 1, &tab[0]);
1155 PEEK_BYTE (a_this, 2, &tab[1]);
1156 PEEK_BYTE (a_this, 3, &tab[2]);
1157 PEEK_BYTE (a_this, 4, &tab[3]);
1159 if (tab[0] != 'u' || tab[1] != 'r' || tab[2] != 'l' || tab[3] != '(') {
1160 status = CR_PARSING_ERROR;
1161 goto error;
1162 }
1163 /*
1164 *Here, we want to skip 4 bytes ('u''r''l''(').
1165 *But we also need to keep track of the parsing location
1166 *of the 'u'. So, we skip 1 byte, we record the parsing
1167 *location, then we skip the 3 remaining bytes.
1168 */
1169 SKIP_CHARS (a_this, 1);
1170 cr_tknzr_get_parsing_location (a_this, &location) ;
1171 SKIP_CHARS (a_this, 3);
1172 cr_tknzr_try_to_skip_spaces (a_this);
1173 status = cr_tknzr_parse_string (a_this, a_str);
1175 if (status == CR_OK) {
1176 guint32 next_char = 0;
1177 status = cr_tknzr_parse_w (a_this, &tmp_ptr1,
1178 &tmp_ptr2, NULL);
1179 cr_tknzr_try_to_skip_spaces (a_this);
1180 PEEK_NEXT_CHAR (a_this, &next_char);
1181 if (next_char == ')') {
1182 READ_NEXT_CHAR (a_this, &cur_char);
1183 status = CR_OK;
1184 } else {
1185 status = CR_PARSING_ERROR;
1186 }
1187 }
1188 if (status != CR_OK) {
1189 str = cr_string_new ();
1190 for (;;) {
1191 guint32 next_char = 0;
1192 PEEK_NEXT_CHAR (a_this, &next_char);
1193 if (strchr ("!#$%&", next_char)
1194 || (next_char >= '*' && next_char <= '~')
1195 || (cr_utils_is_nonascii (next_char) == TRUE)) {
1196 READ_NEXT_CHAR (a_this, &cur_char);
1197 g_string_append_unichar
1198 (str->stryng, cur_char);
1199 status = CR_OK;
1200 } else {
1201 guint32 esc_code = 0;
1202 status = cr_tknzr_parse_escape
1203 (a_this, &esc_code, NULL);
1204 if (status == CR_OK) {
1205 g_string_append_unichar
1206 (str->stryng,
1207 esc_code);
1208 } else {
1209 status = CR_OK;
1210 break;
1211 }
1212 }
1213 }
1214 cr_tknzr_try_to_skip_spaces (a_this);
1215 READ_NEXT_CHAR (a_this, &cur_char);
1216 if (cur_char == ')') {
1217 status = CR_OK;
1218 } else {
1219 status = CR_PARSING_ERROR;
1220 goto error;
1221 }
1222 if (str) {
1223 if (*a_str == NULL) {
1224 *a_str = str;
1225 str = NULL;
1226 } else {
1227 g_string_append_len
1228 ((*a_str)->stryng,
1229 str->stryng->str,
1230 str->stryng->len);
1231 cr_string_destroy (str);
1232 }
1233 }
1234 }
1236 cr_parsing_location_copy
1237 (&(*a_str)->location,
1238 &location) ;
1239 return CR_OK ;
1240 error:
1241 if (str) {
1242 cr_string_destroy (str);
1243 str = NULL;
1244 }
1245 cr_tknzr_set_cur_pos (a_this, &init_pos);
1246 return status;
1247 }
1249 /**
1250 *parses an RGB as defined in the css2 spec.
1251 *rgb: rgb '('S*{num}%?S* ',' {num}#?S*,S*{num}#?S*')'
1252 *
1253 *@param a_this the "this pointer" of the current instance of
1254 *@param a_rgb out parameter the parsed rgb.
1255 *@return CR_OK upon successfull completion, an error code otherwise.
1256 */
1257 static enum CRStatus
1258 cr_tknzr_parse_rgb (CRTknzr * a_this, CRRgb ** a_rgb)
1259 {
1260 enum CRStatus status = CR_OK;
1261 CRInputPos init_pos;
1262 CRNum *num = NULL;
1263 guchar next_bytes[3] = { 0 }, cur_byte = 0;
1264 glong red = 0,
1265 green = 0,
1266 blue = 0,
1267 i = 0;
1268 gboolean is_percentage = FALSE;
1269 CRParsingLocation location = {0,0,0} ;
1271 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1273 RECORD_INITIAL_POS (a_this, &init_pos);
1275 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1276 PEEK_BYTE (a_this, 2, &next_bytes[1]);
1277 PEEK_BYTE (a_this, 3, &next_bytes[2]);
1279 if (((next_bytes[0] == 'r') || (next_bytes[0] == 'R'))
1280 && ((next_bytes[1] == 'g') || (next_bytes[1] == 'G'))
1281 && ((next_bytes[2] == 'b') || (next_bytes[2] == 'B'))) {
1282 SKIP_CHARS (a_this, 1);
1283 cr_tknzr_get_parsing_location (a_this, &location) ;
1284 SKIP_CHARS (a_this, 2);
1285 } else {
1286 status = CR_PARSING_ERROR;
1287 goto error;
1288 }
1289 READ_NEXT_BYTE (a_this, &cur_byte);
1290 ENSURE_PARSING_COND (cur_byte == '(');
1292 cr_tknzr_try_to_skip_spaces (a_this);
1293 status = cr_tknzr_parse_num (a_this, &num);
1294 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
1296 red = (glong)num->val;
1297 cr_num_destroy (num);
1298 num = NULL;
1300 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1301 if (next_bytes[0] == '%') {
1302 SKIP_CHARS (a_this, 1);
1303 is_percentage = TRUE;
1304 }
1305 cr_tknzr_try_to_skip_spaces (a_this);
1307 for (i = 0; i < 2; i++) {
1308 READ_NEXT_BYTE (a_this, &cur_byte);
1309 ENSURE_PARSING_COND (cur_byte == ',');
1311 cr_tknzr_try_to_skip_spaces (a_this);
1312 status = cr_tknzr_parse_num (a_this, &num);
1313 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
1315 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1316 if (next_bytes[0] == '%') {
1317 SKIP_CHARS (a_this, 1);
1318 is_percentage = 1;
1319 }
1321 if (i == 0) {
1322 green = (glong)num->val;
1323 } else if (i == 1) {
1324 blue = (glong)num->val;
1325 }
1327 if (num) {
1328 cr_num_destroy (num);
1329 num = NULL;
1330 }
1331 cr_tknzr_try_to_skip_spaces (a_this);
1332 }
1334 READ_NEXT_BYTE (a_this, &cur_byte);
1335 if (*a_rgb == NULL) {
1336 *a_rgb = cr_rgb_new_with_vals (red, green, blue,
1337 is_percentage);
1339 if (*a_rgb == NULL) {
1340 status = CR_ERROR;
1341 goto error;
1342 }
1343 status = CR_OK;
1344 } else {
1345 (*a_rgb)->red = red;
1346 (*a_rgb)->green = green;
1347 (*a_rgb)->blue = blue;
1348 (*a_rgb)->is_percentage = is_percentage;
1350 status = CR_OK;
1351 }
1353 if (status == CR_OK) {
1354 if (a_rgb && *a_rgb) {
1355 cr_parsing_location_copy
1356 (&(*a_rgb)->location,
1357 &location) ;
1358 }
1359 return CR_OK;
1360 }
1362 error:
1363 if (num) {
1364 cr_num_destroy (num);
1365 num = NULL;
1366 }
1368 cr_tknzr_set_cur_pos (a_this, &init_pos);
1369 return CR_OK;
1370 }
1372 /**
1373 *Parses a atkeyword as defined by the css spec in [4.1.1]:
1374 *ATKEYWORD ::= @{ident}
1375 *
1376 *@param a_this the "this pointer" of the current instance of
1377 *#CRTknzr.
1378 *
1379 *@param a_str out parameter. The parsed atkeyword. If *a_str is
1380 *set to NULL this function allocates a new instance of CRString and
1381 *sets it to the parsed atkeyword. If not, this function just appends
1382 *the parsed atkeyword to the end of *a_str. In both cases it is up to
1383 *the caller to free *a_str.
1384 *
1385 *@return CR_OK upon successfull completion, an error code otherwise.
1386 */
1387 static enum CRStatus
1388 cr_tknzr_parse_atkeyword (CRTknzr * a_this,
1389 CRString ** a_str)
1390 {
1391 guint32 cur_char = 0;
1392 CRInputPos init_pos;
1393 gboolean str_needs_free = FALSE;
1394 enum CRStatus status = CR_OK;
1396 g_return_val_if_fail (a_this && PRIVATE (a_this)
1397 && PRIVATE (a_this)->input
1398 && a_str, CR_BAD_PARAM_ERROR);
1400 RECORD_INITIAL_POS (a_this, &init_pos);
1402 READ_NEXT_CHAR (a_this, &cur_char);
1404 if (cur_char != '@') {
1405 status = CR_PARSING_ERROR;
1406 goto error;
1407 }
1409 if (*a_str == NULL) {
1410 *a_str = cr_string_new ();
1411 str_needs_free = TRUE;
1412 }
1413 status = cr_tknzr_parse_ident (a_this, a_str);
1414 if (status != CR_OK) {
1415 goto error;
1416 }
1417 return CR_OK;
1418 error:
1420 if (str_needs_free == TRUE && *a_str) {
1421 cr_string_destroy (*a_str);
1422 *a_str = NULL;
1423 }
1424 cr_tknzr_set_cur_pos (a_this, &init_pos);
1425 return status;
1426 }
1428 static enum CRStatus
1429 cr_tknzr_parse_important (CRTknzr * a_this,
1430 CRParsingLocation *a_location)
1431 {
1432 guint32 cur_char = 0;
1433 CRInputPos init_pos;
1434 enum CRStatus status = CR_OK;
1436 g_return_val_if_fail (a_this && PRIVATE (a_this)
1437 && PRIVATE (a_this)->input,
1438 CR_BAD_PARAM_ERROR);
1440 RECORD_INITIAL_POS (a_this, &init_pos);
1441 READ_NEXT_CHAR (a_this, &cur_char);
1442 ENSURE_PARSING_COND (cur_char == '!');
1443 if (a_location) {
1444 cr_tknzr_get_parsing_location (a_this,
1445 a_location) ;
1446 }
1447 cr_tknzr_try_to_skip_spaces (a_this);
1449 if (BYTE (PRIVATE (a_this)->input, 1, NULL) == 'i'
1450 && BYTE (PRIVATE (a_this)->input, 2, NULL) == 'm'
1451 && BYTE (PRIVATE (a_this)->input, 3, NULL) == 'p'
1452 && BYTE (PRIVATE (a_this)->input, 4, NULL) == 'o'
1453 && BYTE (PRIVATE (a_this)->input, 5, NULL) == 'r'
1454 && BYTE (PRIVATE (a_this)->input, 6, NULL) == 't'
1455 && BYTE (PRIVATE (a_this)->input, 7, NULL) == 'a'
1456 && BYTE (PRIVATE (a_this)->input, 8, NULL) == 'n'
1457 && BYTE (PRIVATE (a_this)->input, 9, NULL) == 't') {
1458 SKIP_BYTES (a_this, 9);
1459 if (a_location) {
1460 cr_tknzr_get_parsing_location (a_this,
1461 a_location) ;
1462 }
1463 return CR_OK;
1464 } else {
1465 status = CR_PARSING_ERROR;
1466 }
1468 error:
1469 cr_tknzr_set_cur_pos (a_this, &init_pos);
1471 return status;
1472 }
1474 /**
1475 *Parses a num as defined in the css spec [4.1.1]:
1476 *[0-9]+|[0-9]*\.[0-9]+
1477 *@param a_this the current instance of #CRTknzr.
1478 *@param a_num out parameter. The parsed number.
1479 *@return CR_OK upon successfull completion,
1480 *an error code otherwise.
1481 */
1482 static enum CRStatus
1483 cr_tknzr_parse_num (CRTknzr * a_this,
1484 CRNum ** a_num)
1485 {
1486 enum CRStatus status = CR_PARSING_ERROR;
1487 enum CRNumType val_type = NUM_GENERIC;
1488 gboolean parsing_dec, /* true iff seen decimal point. */
1489 parsed; /* true iff the substring seen so far is a valid CSS
1490 number, i.e. `[0-9]+|[0-9]*\.[0-9]+'. */
1491 guint32 cur_char = 0,
1492 next_char = 0;
1493 gdouble numerator, denominator = 1;
1494 CRInputPos init_pos;
1495 CRParsingLocation location = {0,0,0} ;
1497 g_return_val_if_fail (a_this && PRIVATE (a_this)
1498 && PRIVATE (a_this)->input,
1499 CR_BAD_PARAM_ERROR);
1501 RECORD_INITIAL_POS (a_this, &init_pos);
1502 READ_NEXT_CHAR (a_this, &cur_char);
1503 if (IS_NUM (cur_char)) {
1504 numerator = (cur_char - '0');
1505 parsing_dec = FALSE;
1506 parsed = TRUE;
1507 } else if (cur_char == '.') {
1508 numerator = 0;
1509 parsing_dec = TRUE;
1510 parsed = FALSE;
1511 } else {
1512 status = CR_PARSING_ERROR;
1513 goto error;
1514 }
1515 cr_tknzr_get_parsing_location (a_this, &location) ;
1517 for (;;) {
1518 status = cr_tknzr_peek_char (a_this, &next_char);
1519 if (status != CR_OK) {
1520 if (status == CR_END_OF_INPUT_ERROR)
1521 status = CR_OK;
1522 break;
1523 }
1524 if (next_char == '.') {
1525 if (parsing_dec) {
1526 status = CR_PARSING_ERROR;
1527 goto error;
1528 }
1530 READ_NEXT_CHAR (a_this, &cur_char);
1531 parsing_dec = TRUE;
1532 parsed = FALSE; /* In CSS, there must be at least
1533 one digit after `.'. */
1534 } else if (IS_NUM (next_char)) {
1535 READ_NEXT_CHAR (a_this, &cur_char);
1536 parsed = TRUE;
1538 numerator = numerator * 10 + (cur_char - '0');
1539 if (parsing_dec) {
1540 denominator *= 10;
1541 }
1542 } else {
1543 break;
1544 }
1545 }
1547 if (!parsed) {
1548 status = CR_PARSING_ERROR;
1549 }
1551 /*
1552 *Now, set the output param values.
1553 */
1554 if (status == CR_OK) {
1555 gdouble val = numerator / denominator;
1556 if (*a_num == NULL) {
1557 *a_num = cr_num_new_with_val (val, val_type);
1559 if (*a_num == NULL) {
1560 status = CR_ERROR;
1561 goto error;
1562 }
1563 } else {
1564 (*a_num)->val = val;
1565 (*a_num)->type = val_type;
1566 }
1567 cr_parsing_location_copy (&(*a_num)->location,
1568 &location) ;
1569 return CR_OK;
1570 }
1572 error:
1574 cr_tknzr_set_cur_pos (a_this, &init_pos);
1576 return status;
1577 }
1579 /*********************************************
1580 *PUBLIC methods
1581 ********************************************/
1583 CRTknzr *
1584 cr_tknzr_new (CRInput * a_input)
1585 {
1586 CRTknzr *result = (CRTknzr *)g_try_malloc (sizeof (CRTknzr));
1588 if (result == NULL) {
1589 cr_utils_trace_info ("Out of memory");
1590 return NULL;
1591 }
1593 memset (result, 0, sizeof (CRTknzr));
1595 result->priv = (CRTknzrPriv *)g_try_malloc (sizeof (CRTknzrPriv));
1597 if (result->priv == NULL) {
1598 cr_utils_trace_info ("Out of memory");
1600 if (result) {
1601 g_free (result);
1602 result = NULL;
1603 }
1605 return NULL;
1606 }
1607 memset (result->priv, 0, sizeof (CRTknzrPriv));
1608 if (a_input)
1609 cr_tknzr_set_input (result, a_input);
1610 return result;
1611 }
1613 CRTknzr *
1614 cr_tknzr_new_from_buf (guchar * a_buf, gulong a_len,
1615 enum CREncoding a_enc,
1616 gboolean a_free_at_destroy)
1617 {
1618 CRTknzr *result = NULL;
1619 CRInput *input = NULL;
1621 input = cr_input_new_from_buf (a_buf, a_len, a_enc,
1622 a_free_at_destroy);
1624 g_return_val_if_fail (input != NULL, NULL);
1626 result = cr_tknzr_new (input);
1628 return result;
1629 }
1631 CRTknzr *
1632 cr_tknzr_new_from_uri (const guchar * a_file_uri,
1633 enum CREncoding a_enc)
1634 {
1635 CRTknzr *result = NULL;
1636 CRInput *input = cr_input_new_from_uri ((gchar *)a_file_uri, a_enc);
1637 g_return_val_if_fail (input != NULL, NULL);
1639 result = cr_tknzr_new (input);
1641 return result;
1642 }
1644 void
1645 cr_tknzr_ref (CRTknzr * a_this)
1646 {
1647 g_return_if_fail (a_this && PRIVATE (a_this));
1649 PRIVATE (a_this)->ref_count++;
1650 }
1652 gboolean
1653 cr_tknzr_unref (CRTknzr * a_this)
1654 {
1655 g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
1657 if (PRIVATE (a_this)->ref_count > 0) {
1658 PRIVATE (a_this)->ref_count--;
1659 }
1661 if (PRIVATE (a_this)->ref_count == 0) {
1662 cr_tknzr_destroy (a_this);
1663 return TRUE;
1664 }
1666 return FALSE;
1667 }
1669 enum CRStatus
1670 cr_tknzr_set_input (CRTknzr * a_this, CRInput * a_input)
1671 {
1672 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1674 if (PRIVATE (a_this)->input) {
1675 cr_input_unref (PRIVATE (a_this)->input);
1676 }
1678 PRIVATE (a_this)->input = a_input;
1680 cr_input_ref (PRIVATE (a_this)->input);
1682 return CR_OK;
1683 }
1685 enum CRStatus
1686 cr_tknzr_get_input (CRTknzr * a_this, CRInput ** a_input)
1687 {
1688 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1690 *a_input = PRIVATE (a_this)->input;
1692 return CR_OK;
1693 }
1695 /*********************************
1696 *Tokenizer input handling routines
1697 *********************************/
1699 /**
1700 *Reads the next byte from the parser input stream.
1701 *@param a_this the "this pointer" of the current instance of
1702 *#CRParser.
1703 *@param a_byte out parameter the place where to store the byte
1704 *read.
1705 *@return CR_OK upon successfull completion, an error
1706 *code otherwise.
1707 */
1708 enum CRStatus
1709 cr_tknzr_read_byte (CRTknzr * a_this, guchar * a_byte)
1710 {
1711 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1713 return cr_input_read_byte (PRIVATE (a_this)->input, a_byte);
1715 }
1717 /**
1718 *Reads the next char from the parser input stream.
1719 *@param a_this the current instance of #CRTknzr.
1720 *@param a_char out parameter. The read char.
1721 *@return CR_OK upon successfull completion, an error code
1722 *otherwise.
1723 */
1724 enum CRStatus
1725 cr_tknzr_read_char (CRTknzr * a_this, guint32 * a_char)
1726 {
1727 g_return_val_if_fail (a_this && PRIVATE (a_this)
1728 && PRIVATE (a_this)->input
1729 && a_char, CR_BAD_PARAM_ERROR);
1731 if (PRIVATE (a_this)->token_cache) {
1732 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1733 &PRIVATE (a_this)->prev_pos);
1734 cr_token_destroy (PRIVATE (a_this)->token_cache);
1735 PRIVATE (a_this)->token_cache = NULL;
1736 }
1738 return cr_input_read_char (PRIVATE (a_this)->input, a_char);
1739 }
1741 /**
1742 *Peeks a char from the parser input stream.
1743 *To "peek a char" means reads the next char without consuming it.
1744 *Subsequent calls to this function return the same char.
1745 *@param a_this the current instance of #CRTknzr.
1746 *@param a_char out parameter. The peeked char uppon successfull completion.
1747 *@return CR_OK upon successfull completion, an error code otherwise.
1748 */
1749 enum CRStatus
1750 cr_tknzr_peek_char (CRTknzr * a_this, guint32 * a_char)
1751 {
1752 g_return_val_if_fail (a_this && PRIVATE (a_this)
1753 && PRIVATE (a_this)->input
1754 && a_char, CR_BAD_PARAM_ERROR);
1756 if (PRIVATE (a_this)->token_cache) {
1757 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1758 &PRIVATE (a_this)->prev_pos);
1759 cr_token_destroy (PRIVATE (a_this)->token_cache);
1760 PRIVATE (a_this)->token_cache = NULL;
1761 }
1763 return cr_input_peek_char (PRIVATE (a_this)->input, a_char);
1764 }
1766 /**
1767 *Peeks a byte ahead at a given postion in the parser input stream.
1768 *@param a_this the current instance of #CRTknzr.
1769 *@param a_offset the offset of the peeked byte starting from the current
1770 *byte in the parser input stream.
1771 *@param a_byte out parameter. The peeked byte upon
1772 *successfull completion.
1773 *@return CR_OK upon successfull completion, an error code otherwise.
1774 */
1775 enum CRStatus
1776 cr_tknzr_peek_byte (CRTknzr * a_this, gulong a_offset, guchar * a_byte)
1777 {
1778 g_return_val_if_fail (a_this && PRIVATE (a_this)
1779 && PRIVATE (a_this)->input && a_byte,
1780 CR_BAD_PARAM_ERROR);
1782 if (PRIVATE (a_this)->token_cache) {
1783 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1784 &PRIVATE (a_this)->prev_pos);
1785 cr_token_destroy (PRIVATE (a_this)->token_cache);
1786 PRIVATE (a_this)->token_cache = NULL;
1787 }
1789 return cr_input_peek_byte (PRIVATE (a_this)->input,
1790 CR_SEEK_CUR, a_offset, a_byte);
1791 }
1793 /**
1794 *Same as cr_tknzr_peek_byte() but this api returns the byte peeked.
1795 *@param a_this the current instance of #CRTknzr.
1796 *@param a_offset the offset of the peeked byte starting from the current
1797 *byte in the parser input stream.
1798 *@param a_eof out parameter. If not NULL, is set to TRUE if we reached end of
1799 *file, FALE otherwise. If the caller sets it to NULL, this parameter
1800 *is just ignored.
1801 *@return the peeked byte.
1802 */
1803 guchar
1804 cr_tknzr_peek_byte2 (CRTknzr * a_this, gulong a_offset, gboolean * a_eof)
1805 {
1806 g_return_val_if_fail (a_this && PRIVATE (a_this)
1807 && PRIVATE (a_this)->input, 0);
1809 return cr_input_peek_byte2 (PRIVATE (a_this)->input, a_offset, a_eof);
1810 }
1812 /**
1813 *Gets the number of bytes left in the topmost input stream
1814 *associated to this parser.
1815 *@param a_this the current instance of #CRTknzr
1816 *@return the number of bytes left or -1 in case of error.
1817 */
1818 glong
1819 cr_tknzr_get_nb_bytes_left (CRTknzr * a_this)
1820 {
1821 g_return_val_if_fail (a_this && PRIVATE (a_this)
1822 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1824 if (PRIVATE (a_this)->token_cache) {
1825 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1826 &PRIVATE (a_this)->prev_pos);
1827 cr_token_destroy (PRIVATE (a_this)->token_cache);
1828 PRIVATE (a_this)->token_cache = NULL;
1829 }
1831 return cr_input_get_nb_bytes_left (PRIVATE (a_this)->input);
1832 }
1834 enum CRStatus
1835 cr_tknzr_get_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
1836 {
1837 g_return_val_if_fail (a_this && PRIVATE (a_this)
1838 && PRIVATE (a_this)->input
1839 && a_pos, CR_BAD_PARAM_ERROR);
1841 if (PRIVATE (a_this)->token_cache) {
1842 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1843 &PRIVATE (a_this)->prev_pos);
1844 cr_token_destroy (PRIVATE (a_this)->token_cache);
1845 PRIVATE (a_this)->token_cache = NULL;
1846 }
1848 return cr_input_get_cur_pos (PRIVATE (a_this)->input, a_pos);
1849 }
1851 enum CRStatus
1852 cr_tknzr_get_parsing_location (CRTknzr *a_this,
1853 CRParsingLocation *a_loc)
1854 {
1855 g_return_val_if_fail (a_this
1856 && PRIVATE (a_this)
1857 && a_loc,
1858 CR_BAD_PARAM_ERROR) ;
1860 return cr_input_get_parsing_location
1861 (PRIVATE (a_this)->input, a_loc) ;
1862 }
1864 enum CRStatus
1865 cr_tknzr_get_cur_byte_addr (CRTknzr * a_this, guchar ** a_addr)
1866 {
1867 g_return_val_if_fail (a_this && PRIVATE (a_this)
1868 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1869 if (PRIVATE (a_this)->token_cache) {
1870 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1871 &PRIVATE (a_this)->prev_pos);
1872 cr_token_destroy (PRIVATE (a_this)->token_cache);
1873 PRIVATE (a_this)->token_cache = NULL;
1874 }
1876 return cr_input_get_cur_byte_addr (PRIVATE (a_this)->input, a_addr);
1877 }
1879 enum CRStatus
1880 cr_tknzr_seek_index (CRTknzr * a_this, enum CRSeekPos a_origin, gint a_pos)
1881 {
1882 g_return_val_if_fail (a_this && PRIVATE (a_this)
1883 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1885 if (PRIVATE (a_this)->token_cache) {
1886 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1887 &PRIVATE (a_this)->prev_pos);
1888 cr_token_destroy (PRIVATE (a_this)->token_cache);
1889 PRIVATE (a_this)->token_cache = NULL;
1890 }
1892 return cr_input_seek_index (PRIVATE (a_this)->input, a_origin, a_pos);
1893 }
1895 enum CRStatus
1896 cr_tknzr_consume_chars (CRTknzr * a_this, guint32 a_char, glong * a_nb_char)
1897 {
1898 g_return_val_if_fail (a_this && PRIVATE (a_this)
1899 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1901 if (PRIVATE (a_this)->token_cache) {
1902 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1903 &PRIVATE (a_this)->prev_pos);
1904 cr_token_destroy (PRIVATE (a_this)->token_cache);
1905 PRIVATE (a_this)->token_cache = NULL;
1906 }
1908 return cr_input_consume_chars (PRIVATE (a_this)->input,
1909 a_char, (gulong *)a_nb_char);
1910 }
1912 enum CRStatus
1913 cr_tknzr_set_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
1914 {
1915 g_return_val_if_fail (a_this && PRIVATE (a_this)
1916 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1918 if (PRIVATE (a_this)->token_cache) {
1919 cr_token_destroy (PRIVATE (a_this)->token_cache);
1920 PRIVATE (a_this)->token_cache = NULL;
1921 }
1923 return cr_input_set_cur_pos (PRIVATE (a_this)->input, a_pos);
1924 }
1926 enum CRStatus
1927 cr_tknzr_unget_token (CRTknzr * a_this, CRToken * a_token)
1928 {
1929 g_return_val_if_fail (a_this && PRIVATE (a_this)
1930 && PRIVATE (a_this)->token_cache == NULL,
1931 CR_BAD_PARAM_ERROR);
1933 PRIVATE (a_this)->token_cache = a_token;
1935 return CR_OK;
1936 }
1938 /**
1939 *Returns the next token of the input stream.
1940 *This method is really central. Each parsing
1941 *method calls it.
1942 *@param a_this the current tokenizer.
1943 *@param a_tk out parameter. The returned token.
1944 *for the sake of mem leak avoidance, *a_tk must
1945 *be NULL.
1946 *@param CR_OK upon successfull completion, an error code
1947 *otherwise.
1948 */
1949 enum CRStatus
1950 cr_tknzr_get_next_token (CRTknzr * a_this, CRToken ** a_tk)
1951 {
1952 enum CRStatus status = CR_OK;
1953 CRToken *token = NULL;
1954 CRInputPos init_pos;
1955 guint32 next_char = 0;
1956 guchar next_bytes[4] = { 0 };
1957 gboolean reached_eof = FALSE;
1958 CRInput *input = NULL;
1959 CRString *str = NULL;
1960 CRRgb *rgb = NULL;
1961 CRParsingLocation location = {0,0,0} ;
1963 g_return_val_if_fail (a_this && PRIVATE (a_this)
1964 && a_tk && *a_tk == NULL
1965 && PRIVATE (a_this)->input,
1966 CR_BAD_PARAM_ERROR);
1968 if (PRIVATE (a_this)->token_cache) {
1969 *a_tk = PRIVATE (a_this)->token_cache;
1970 PRIVATE (a_this)->token_cache = NULL;
1971 return CR_OK;
1972 }
1974 RECORD_INITIAL_POS (a_this, &init_pos);
1976 status = cr_input_get_end_of_file
1977 (PRIVATE (a_this)->input, &reached_eof);
1978 ENSURE_PARSING_COND (status == CR_OK);
1980 if (reached_eof == TRUE) {
1981 status = CR_END_OF_INPUT_ERROR;
1982 goto error;
1983 }
1985 input = PRIVATE (a_this)->input;
1987 PEEK_NEXT_CHAR (a_this, &next_char);
1988 token = cr_token_new ();
1989 ENSURE_PARSING_COND (token);
1991 switch (next_char) {
1992 case '@':
1993 {
1994 if (BYTE (input, 2, NULL) == 'f'
1995 && BYTE (input, 3, NULL) == 'o'
1996 && BYTE (input, 4, NULL) == 'n'
1997 && BYTE (input, 5, NULL) == 't'
1998 && BYTE (input, 6, NULL) == '-'
1999 && BYTE (input, 7, NULL) == 'f'
2000 && BYTE (input, 8, NULL) == 'a'
2001 && BYTE (input, 9, NULL) == 'c'
2002 && BYTE (input, 10, NULL) == 'e') {
2003 SKIP_CHARS (a_this, 1);
2004 cr_tknzr_get_parsing_location
2005 (a_this, &location) ;
2006 SKIP_CHARS (a_this, 9);
2007 status = cr_token_set_font_face_sym (token);
2008 CHECK_PARSING_STATUS (status, TRUE);
2009 cr_parsing_location_copy (&token->location,
2010 &location) ;
2011 goto done;
2012 }
2014 if (BYTE (input, 2, NULL) == 'c'
2015 && BYTE (input, 3, NULL) == 'h'
2016 && BYTE (input, 4, NULL) == 'a'
2017 && BYTE (input, 5, NULL) == 'r'
2018 && BYTE (input, 6, NULL) == 's'
2019 && BYTE (input, 7, NULL) == 'e'
2020 && BYTE (input, 8, NULL) == 't') {
2021 SKIP_CHARS (a_this, 1);
2022 cr_tknzr_get_parsing_location
2023 (a_this, &location) ;
2024 SKIP_CHARS (a_this, 7);
2025 status = cr_token_set_charset_sym (token);
2026 CHECK_PARSING_STATUS (status, TRUE);
2027 cr_parsing_location_copy (&token->location,
2028 &location) ;
2029 goto done;
2030 }
2032 if (BYTE (input, 2, NULL) == 'i'
2033 && BYTE (input, 3, NULL) == 'm'
2034 && BYTE (input, 4, NULL) == 'p'
2035 && BYTE (input, 5, NULL) == 'o'
2036 && BYTE (input, 6, NULL) == 'r'
2037 && BYTE (input, 7, NULL) == 't') {
2038 SKIP_CHARS (a_this, 1);
2039 cr_tknzr_get_parsing_location
2040 (a_this, &location) ;
2041 SKIP_CHARS (a_this, 6);
2042 status = cr_token_set_import_sym (token);
2043 CHECK_PARSING_STATUS (status, TRUE);
2044 cr_parsing_location_copy (&token->location,
2045 &location) ;
2046 goto done;
2047 }
2049 if (BYTE (input, 2, NULL) == 'm'
2050 && BYTE (input, 3, NULL) == 'e'
2051 && BYTE (input, 4, NULL) == 'd'
2052 && BYTE (input, 5, NULL) == 'i'
2053 && BYTE (input, 6, NULL) == 'a') {
2054 SKIP_CHARS (a_this, 1);
2055 cr_tknzr_get_parsing_location (a_this,
2056 &location) ;
2057 SKIP_CHARS (a_this, 5);
2058 status = cr_token_set_media_sym (token);
2059 CHECK_PARSING_STATUS (status, TRUE);
2060 cr_parsing_location_copy (&token->location,
2061 &location) ;
2062 goto done;
2063 }
2065 if (BYTE (input, 2, NULL) == 'p'
2066 && BYTE (input, 3, NULL) == 'a'
2067 && BYTE (input, 4, NULL) == 'g'
2068 && BYTE (input, 5, NULL) == 'e') {
2069 SKIP_CHARS (a_this, 1);
2070 cr_tknzr_get_parsing_location (a_this,
2071 &location) ;
2072 SKIP_CHARS (a_this, 4);
2073 status = cr_token_set_page_sym (token);
2074 CHECK_PARSING_STATUS (status, TRUE);
2075 cr_parsing_location_copy (&token->location,
2076 &location) ;
2077 goto done;
2078 }
2079 status = cr_tknzr_parse_atkeyword (a_this, &str);
2080 if (status == CR_OK) {
2081 status = cr_token_set_atkeyword (token, str);
2082 CHECK_PARSING_STATUS (status, TRUE);
2083 if (str) {
2084 cr_parsing_location_copy (&token->location,
2085 &str->location) ;
2086 }
2087 goto done;
2088 }
2089 }
2090 break;
2092 case 'u':
2094 if (BYTE (input, 2, NULL) == 'r'
2095 && BYTE (input, 3, NULL) == 'l'
2096 && BYTE (input, 4, NULL) == '(') {
2097 CRString *str = NULL;
2099 status = cr_tknzr_parse_uri (a_this, &str);
2100 if (status == CR_OK) {
2101 status = cr_token_set_uri (token, str);
2102 CHECK_PARSING_STATUS (status, TRUE);
2103 if (str) {
2104 cr_parsing_location_copy (&token->location,
2105 &str->location) ;
2106 }
2107 goto done;
2108 }
2109 } else {
2110 status = cr_tknzr_parse_ident (a_this, &str);
2111 if (status == CR_OK && str) {
2112 status = cr_token_set_ident (token, str);
2113 CHECK_PARSING_STATUS (status, TRUE);
2114 if (str) {
2115 cr_parsing_location_copy (&token->location,
2116 &str->location) ;
2117 }
2118 goto done;
2119 }
2120 }
2121 break;
2123 case 'r':
2124 if (BYTE (input, 2, NULL) == 'g'
2125 && BYTE (input, 3, NULL) == 'b'
2126 && BYTE (input, 4, NULL) == '(') {
2127 status = cr_tknzr_parse_rgb (a_this, &rgb);
2128 if (status == CR_OK && rgb) {
2129 status = cr_token_set_rgb (token, rgb);
2130 CHECK_PARSING_STATUS (status, TRUE);
2131 if (rgb) {
2132 cr_parsing_location_copy (&token->location,
2133 &rgb->location) ;
2134 }
2135 rgb = NULL;
2136 goto done;
2137 }
2139 } else {
2140 status = cr_tknzr_parse_ident (a_this, &str);
2141 if (status == CR_OK) {
2142 status = cr_token_set_ident (token, str);
2143 CHECK_PARSING_STATUS (status, TRUE);
2144 if (str) {
2145 cr_parsing_location_copy (&token->location,
2146 &str->location) ;
2147 }
2148 str = NULL;
2149 goto done;
2150 }
2151 }
2152 break;
2154 case '<':
2155 if (BYTE (input, 2, NULL) == '-'
2156 && BYTE (input, 3, NULL) == '-') {
2157 SKIP_CHARS (a_this, 1);
2158 cr_tknzr_get_parsing_location (a_this,
2159 &location) ;
2160 SKIP_CHARS (a_this, 2);
2161 status = cr_token_set_cdo (token);
2162 CHECK_PARSING_STATUS (status, TRUE);
2163 cr_parsing_location_copy (&token->location,
2164 &location) ;
2165 goto done;
2166 }
2167 break;
2169 case '-':
2170 if (BYTE (input, 2, NULL) == '-'
2171 && BYTE (input, 3, NULL) == '>') {
2172 SKIP_CHARS (a_this, 1);
2173 cr_tknzr_get_parsing_location (a_this,
2174 &location) ;
2175 SKIP_CHARS (a_this, 2);
2176 status = cr_token_set_cdc (token);
2177 CHECK_PARSING_STATUS (status, TRUE);
2178 cr_parsing_location_copy (&token->location,
2179 &location) ;
2180 goto done;
2181 } else {
2182 status = cr_tknzr_parse_ident
2183 (a_this, &str);
2184 if (status == CR_OK) {
2185 cr_token_set_ident
2186 (token, str);
2187 if (str) {
2188 cr_parsing_location_copy (&token->location,
2189 &str->location) ;
2190 }
2191 goto done;
2192 }
2193 }
2194 break;
2196 case '~':
2197 if (BYTE (input, 2, NULL) == '=') {
2198 SKIP_CHARS (a_this, 1);
2199 cr_tknzr_get_parsing_location (a_this,
2200 &location) ;
2201 SKIP_CHARS (a_this, 1);
2202 status = cr_token_set_includes (token);
2203 CHECK_PARSING_STATUS (status, TRUE);
2204 cr_parsing_location_copy (&token->location,
2205 &location) ;
2206 goto done;
2207 }
2208 break;
2210 case '|':
2211 if (BYTE (input, 2, NULL) == '=') {
2212 SKIP_CHARS (a_this, 1);
2213 cr_tknzr_get_parsing_location (a_this,
2214 &location) ;
2215 SKIP_CHARS (a_this, 1);
2216 status = cr_token_set_dashmatch (token);
2217 CHECK_PARSING_STATUS (status, TRUE);
2218 cr_parsing_location_copy (&token->location,
2219 &location) ;
2220 goto done;
2221 }
2222 break;
2224 case '/':
2225 if (BYTE (input, 2, NULL) == '*') {
2226 status = cr_tknzr_parse_comment (a_this, &str);
2228 if (status == CR_OK) {
2229 status = cr_token_set_comment (token, str);
2230 str = NULL;
2231 CHECK_PARSING_STATUS (status, TRUE);
2232 if (str) {
2233 cr_parsing_location_copy (&token->location,
2234 &str->location) ;
2235 }
2236 goto done;
2237 }
2238 }
2239 break ;
2241 case ';':
2242 SKIP_CHARS (a_this, 1);
2243 cr_tknzr_get_parsing_location (a_this,
2244 &location) ;
2245 status = cr_token_set_semicolon (token);
2246 CHECK_PARSING_STATUS (status, TRUE);
2247 cr_parsing_location_copy (&token->location,
2248 &location) ;
2249 goto done;
2251 case '{':
2252 SKIP_CHARS (a_this, 1);
2253 cr_tknzr_get_parsing_location (a_this,
2254 &location) ;
2255 status = cr_token_set_cbo (token);
2256 CHECK_PARSING_STATUS (status, TRUE);
2257 cr_tknzr_get_parsing_location (a_this,
2258 &location) ;
2259 goto done;
2261 case '}':
2262 SKIP_CHARS (a_this, 1);
2263 cr_tknzr_get_parsing_location (a_this,
2264 &location) ;
2265 status = cr_token_set_cbc (token);
2266 CHECK_PARSING_STATUS (status, TRUE);
2267 cr_parsing_location_copy (&token->location,
2268 &location) ;
2269 goto done;
2271 case '(':
2272 SKIP_CHARS (a_this, 1);
2273 cr_tknzr_get_parsing_location (a_this,
2274 &location) ;
2275 status = cr_token_set_po (token);
2276 CHECK_PARSING_STATUS (status, TRUE);
2277 cr_parsing_location_copy (&token->location,
2278 &location) ;
2279 goto done;
2281 case ')':
2282 SKIP_CHARS (a_this, 1);
2283 cr_tknzr_get_parsing_location (a_this,
2284 &location) ;
2285 status = cr_token_set_pc (token);
2286 CHECK_PARSING_STATUS (status, TRUE);
2287 cr_parsing_location_copy (&token->location,
2288 &location) ;
2289 goto done;
2291 case '[':
2292 SKIP_CHARS (a_this, 1);
2293 cr_tknzr_get_parsing_location (a_this,
2294 &location) ;
2295 status = cr_token_set_bo (token);
2296 CHECK_PARSING_STATUS (status, TRUE);
2297 cr_parsing_location_copy (&token->location,
2298 &location) ;
2299 goto done;
2301 case ']':
2302 SKIP_CHARS (a_this, 1);
2303 cr_tknzr_get_parsing_location (a_this,
2304 &location) ;
2305 status = cr_token_set_bc (token);
2306 CHECK_PARSING_STATUS (status, TRUE);
2307 cr_parsing_location_copy (&token->location,
2308 &location) ;
2309 goto done;
2311 case ' ':
2312 case '\t':
2313 case '\n':
2314 case '\f':
2315 case '\r':
2316 {
2317 guchar *start = NULL,
2318 *end = NULL;
2320 status = cr_tknzr_parse_w (a_this, &start,
2321 &end, &location);
2322 if (status == CR_OK) {
2323 status = cr_token_set_s (token);
2324 CHECK_PARSING_STATUS (status, TRUE);
2325 cr_tknzr_get_parsing_location (a_this,
2326 &location) ;
2327 goto done;
2328 }
2329 }
2330 break;
2332 case '#':
2333 {
2334 status = cr_tknzr_parse_hash (a_this, &str);
2335 if (status == CR_OK && str) {
2336 status = cr_token_set_hash (token, str);
2337 CHECK_PARSING_STATUS (status, TRUE);
2338 if (str) {
2339 cr_parsing_location_copy (&token->location,
2340 &str->location) ;
2341 }
2342 str = NULL;
2343 goto done;
2344 }
2345 }
2346 break;
2348 case '\'':
2349 case '"':
2350 status = cr_tknzr_parse_string (a_this, &str);
2351 if (status == CR_OK && str) {
2352 status = cr_token_set_string (token, str);
2353 CHECK_PARSING_STATUS (status, TRUE);
2354 if (str) {
2355 cr_parsing_location_copy (&token->location,
2356 &str->location) ;
2357 }
2358 str = NULL;
2359 goto done;
2360 }
2361 break;
2363 case '!':
2364 status = cr_tknzr_parse_important (a_this, &location);
2365 if (status == CR_OK) {
2366 status = cr_token_set_important_sym (token);
2367 CHECK_PARSING_STATUS (status, TRUE);
2368 cr_parsing_location_copy (&token->location,
2369 &location) ;
2370 goto done;
2371 }
2372 break;
2374 case '0':
2375 case '1':
2376 case '2':
2377 case '3':
2378 case '4':
2379 case '5':
2380 case '6':
2381 case '7':
2382 case '8':
2383 case '9':
2384 case '.':
2385 {
2386 CRNum *num = NULL;
2388 status = cr_tknzr_parse_num (a_this, &num);
2389 if (status == CR_OK && num) {
2390 next_bytes[0] = BYTE (input, 1, NULL);
2391 next_bytes[1] = BYTE (input, 2, NULL);
2392 next_bytes[2] = BYTE (input, 3, NULL);
2393 next_bytes[3] = BYTE (input, 4, NULL);
2395 if (next_bytes[0] == 'e'
2396 && next_bytes[1] == 'm') {
2397 num->type = NUM_LENGTH_EM;
2398 status = cr_token_set_ems (token,
2399 num);
2400 num = NULL;
2401 SKIP_CHARS (a_this, 2);
2402 } else if (next_bytes[0] == 'e'
2403 && next_bytes[1] == 'x') {
2404 num->type = NUM_LENGTH_EX;
2405 status = cr_token_set_exs (token,
2406 num);
2407 num = NULL;
2408 SKIP_CHARS (a_this, 2);
2409 } else if (next_bytes[0] == 'p'
2410 && next_bytes[1] == 'x') {
2411 num->type = NUM_LENGTH_PX;
2412 status = cr_token_set_length
2413 (token, num, LENGTH_PX_ET);
2414 num = NULL;
2415 SKIP_CHARS (a_this, 2);
2416 } else if (next_bytes[0] == 'c'
2417 && next_bytes[1] == 'm') {
2418 num->type = NUM_LENGTH_CM;
2419 status = cr_token_set_length
2420 (token, num, LENGTH_CM_ET);
2421 num = NULL;
2422 SKIP_CHARS (a_this, 2);
2423 } else if (next_bytes[0] == 'm'
2424 && next_bytes[1] == 'm') {
2425 num->type = NUM_LENGTH_MM;
2426 status = cr_token_set_length
2427 (token, num, LENGTH_MM_ET);
2428 num = NULL;
2429 SKIP_CHARS (a_this, 2);
2430 } else if (next_bytes[0] == 'i'
2431 && next_bytes[1] == 'n') {
2432 num->type = NUM_LENGTH_IN;
2433 status = cr_token_set_length
2434 (token, num, LENGTH_IN_ET);
2435 num = NULL;
2436 SKIP_CHARS (a_this, 2);
2437 } else if (next_bytes[0] == 'p'
2438 && next_bytes[1] == 't') {
2439 num->type = NUM_LENGTH_PT;
2440 status = cr_token_set_length
2441 (token, num, LENGTH_PT_ET);
2442 num = NULL;
2443 SKIP_CHARS (a_this, 2);
2444 } else if (next_bytes[0] == 'p'
2445 && next_bytes[1] == 'c') {
2446 num->type = NUM_LENGTH_PC;
2447 status = cr_token_set_length
2448 (token, num, LENGTH_PC_ET);
2449 num = NULL;
2450 SKIP_CHARS (a_this, 2);
2451 } else if (next_bytes[0] == 'd'
2452 && next_bytes[1] == 'e'
2453 && next_bytes[2] == 'g') {
2454 num->type = NUM_ANGLE_DEG;
2455 status = cr_token_set_angle
2456 (token, num, ANGLE_DEG_ET);
2457 num = NULL;
2458 SKIP_CHARS (a_this, 3);
2459 } else if (next_bytes[0] == 'r'
2460 && next_bytes[1] == 'a'
2461 && next_bytes[2] == 'd') {
2462 num->type = NUM_ANGLE_RAD;
2463 status = cr_token_set_angle
2464 (token, num, ANGLE_RAD_ET);
2465 num = NULL;
2466 SKIP_CHARS (a_this, 3);
2467 } else if (next_bytes[0] == 'g'
2468 && next_bytes[1] == 'r'
2469 && next_bytes[2] == 'a'
2470 && next_bytes[3] == 'd') {
2471 num->type = NUM_ANGLE_GRAD;
2472 status = cr_token_set_angle
2473 (token, num, ANGLE_GRAD_ET);
2474 num = NULL;
2475 SKIP_CHARS (a_this, 4);
2476 } else if (next_bytes[0] == 'm'
2477 && next_bytes[1] == 's') {
2478 num->type = NUM_TIME_MS;
2479 status = cr_token_set_time
2480 (token, num, TIME_MS_ET);
2481 num = NULL;
2482 SKIP_CHARS (a_this, 2);
2483 } else if (next_bytes[0] == 's') {
2484 num->type = NUM_TIME_S;
2485 status = cr_token_set_time
2486 (token, num, TIME_S_ET);
2487 num = NULL;
2488 SKIP_CHARS (a_this, 1);
2489 } else if (next_bytes[0] == 'H'
2490 && next_bytes[1] == 'z') {
2491 num->type = NUM_FREQ_HZ;
2492 status = cr_token_set_freq
2493 (token, num, FREQ_HZ_ET);
2494 num = NULL;
2495 SKIP_CHARS (a_this, 2);
2496 } else if (next_bytes[0] == 'k'
2497 && next_bytes[1] == 'H'
2498 && next_bytes[2] == 'z') {
2499 num->type = NUM_FREQ_KHZ;
2500 status = cr_token_set_freq
2501 (token, num, FREQ_KHZ_ET);
2502 num = NULL;
2503 SKIP_CHARS (a_this, 3);
2504 } else if (next_bytes[0] == '%') {
2505 num->type = NUM_PERCENTAGE;
2506 status = cr_token_set_percentage
2507 (token, num);
2508 num = NULL;
2509 SKIP_CHARS (a_this, 1);
2510 } else {
2511 status = cr_tknzr_parse_ident (a_this,
2512 &str);
2513 if (status == CR_OK && str) {
2514 num->type = NUM_UNKNOWN_TYPE;
2515 status = cr_token_set_dimen
2516 (token, num, str);
2517 num = NULL;
2518 CHECK_PARSING_STATUS (status,
2519 TRUE);
2520 str = NULL;
2521 } else {
2522 status = cr_token_set_number
2523 (token, num);
2524 num = NULL;
2525 CHECK_PARSING_STATUS (status, CR_OK);
2526 str = NULL;
2527 }
2528 }
2529 if (token && token->u.num) {
2530 cr_parsing_location_copy (&token->location,
2531 &token->u.num->location) ;
2532 } else {
2533 status = CR_ERROR ;
2534 }
2535 goto done ;
2536 }
2537 }
2538 break;
2540 default:
2541 /*process the fallback cases here */
2543 if (next_char == '\\'
2544 || (cr_utils_is_nonascii (next_bytes[0]) == TRUE)
2545 || ((next_char >= 'a') && (next_char <= 'z'))
2546 || ((next_char >= 'A') && (next_char <= 'Z'))) {
2547 status = cr_tknzr_parse_ident (a_this, &str);
2548 if (status == CR_OK && str) {
2549 guint32 next_c = 0;
2551 status = cr_input_peek_char
2552 (PRIVATE (a_this)->input, &next_c);
2554 if (status == CR_OK && next_c == '(') {
2556 SKIP_CHARS (a_this, 1);
2557 status = cr_token_set_function
2558 (token, str);
2559 CHECK_PARSING_STATUS (status, TRUE);
2560 /*ownership is transfered
2561 *to token by cr_token_set_function.
2562 */
2563 if (str) {
2564 cr_parsing_location_copy (&token->location,
2565 &str->location) ;
2566 }
2567 str = NULL;
2568 } else {
2569 status = cr_token_set_ident (token,
2570 str);
2571 CHECK_PARSING_STATUS (status, TRUE);
2572 if (str) {
2573 cr_parsing_location_copy (&token->location,
2574 &str->location) ;
2575 }
2576 str = NULL;
2577 }
2578 goto done;
2579 } else {
2580 if (str) {
2581 cr_string_destroy (str);
2582 str = NULL;
2583 }
2584 }
2585 }
2586 break;
2587 }
2589 READ_NEXT_CHAR (a_this, &next_char);
2590 cr_tknzr_get_parsing_location (a_this,
2591 &location) ;
2592 status = cr_token_set_delim (token, next_char);
2593 CHECK_PARSING_STATUS (status, TRUE);
2594 cr_parsing_location_copy (&token->location,
2595 &location) ;
2596 done:
2598 if (status == CR_OK && token) {
2599 *a_tk = token;
2600 /*
2601 *store the previous position input stream pos.
2602 */
2603 memmove (&PRIVATE (a_this)->prev_pos,
2604 &init_pos, sizeof (CRInputPos));
2605 return CR_OK;
2606 }
2608 error:
2609 if (token) {
2610 cr_token_destroy (token);
2611 token = NULL;
2612 }
2614 if (str) {
2615 cr_string_destroy (str);
2616 str = NULL;
2617 }
2618 cr_tknzr_set_cur_pos (a_this, &init_pos);
2619 return status;
2621 }
2623 enum CRStatus
2624 cr_tknzr_parse_token (CRTknzr * a_this, enum CRTokenType a_type,
2625 enum CRTokenExtraType a_et, gpointer a_res,
2626 gpointer a_extra_res)
2627 {
2628 enum CRStatus status = CR_OK;
2629 CRToken *token = NULL;
2631 g_return_val_if_fail (a_this && PRIVATE (a_this)
2632 && PRIVATE (a_this)->input
2633 && a_res, CR_BAD_PARAM_ERROR);
2635 status = cr_tknzr_get_next_token (a_this, &token);
2636 if (status != CR_OK)
2637 return status;
2638 if (token == NULL)
2639 return CR_PARSING_ERROR;
2641 if (token->type == a_type) {
2642 switch (a_type) {
2643 case NO_TK:
2644 case S_TK:
2645 case CDO_TK:
2646 case CDC_TK:
2647 case INCLUDES_TK:
2648 case DASHMATCH_TK:
2649 case IMPORT_SYM_TK:
2650 case PAGE_SYM_TK:
2651 case MEDIA_SYM_TK:
2652 case FONT_FACE_SYM_TK:
2653 case CHARSET_SYM_TK:
2654 case IMPORTANT_SYM_TK:
2655 status = CR_OK;
2656 break;
2658 case STRING_TK:
2659 case IDENT_TK:
2660 case HASH_TK:
2661 case ATKEYWORD_TK:
2662 case FUNCTION_TK:
2663 case COMMENT_TK:
2664 case URI_TK:
2665 *((CRString **) a_res) = token->u.str;
2666 token->u.str = NULL;
2667 status = CR_OK;
2668 break;
2670 case EMS_TK:
2671 case EXS_TK:
2672 case PERCENTAGE_TK:
2673 case NUMBER_TK:
2674 *((CRNum **) a_res) = token->u.num;
2675 token->u.num = NULL;
2676 status = CR_OK;
2677 break;
2679 case LENGTH_TK:
2680 case ANGLE_TK:
2681 case TIME_TK:
2682 case FREQ_TK:
2683 if (token->extra_type == a_et) {
2684 *((CRNum **) a_res) = token->u.num;
2685 token->u.num = NULL;
2686 status = CR_OK;
2687 }
2688 break;
2690 case DIMEN_TK:
2691 *((CRNum **) a_res) = token->u.num;
2692 if (a_extra_res == NULL) {
2693 status = CR_BAD_PARAM_ERROR;
2694 goto error;
2695 }
2697 *((CRString **) a_extra_res) = token->dimen;
2698 token->u.num = NULL;
2699 token->dimen = NULL;
2700 status = CR_OK;
2701 break;
2703 case DELIM_TK:
2704 *((guint32 *) a_res) = token->u.unichar;
2705 status = CR_OK;
2706 break;
2708 case UNICODERANGE_TK:
2709 default:
2710 status = CR_PARSING_ERROR;
2711 break;
2712 }
2714 cr_token_destroy (token);
2715 token = NULL;
2716 } else {
2717 cr_tknzr_unget_token (a_this, token);
2718 token = NULL;
2719 status = CR_PARSING_ERROR;
2720 }
2722 return status;
2724 error:
2726 if (token) {
2727 cr_tknzr_unget_token (a_this, token);
2728 token = NULL;
2729 }
2731 return status;
2732 }
2734 void
2735 cr_tknzr_destroy (CRTknzr * a_this)
2736 {
2737 g_return_if_fail (a_this);
2739 if (PRIVATE (a_this) && PRIVATE (a_this)->input) {
2740 if (cr_input_unref (PRIVATE (a_this)->input)
2741 == TRUE) {
2742 PRIVATE (a_this)->input = NULL;
2743 }
2744 }
2746 if (PRIVATE (a_this)->token_cache) {
2747 cr_token_destroy (PRIVATE (a_this)->token_cache);
2748 PRIVATE (a_this)->token_cache = NULL;
2749 }
2751 if (PRIVATE (a_this)) {
2752 g_free (PRIVATE (a_this));
2753 PRIVATE (a_this) = NULL;
2754 }
2756 g_free (a_this);
2757 }