1 /* -*- Mode: C; indent-tabs-mode:nil; c-basic-offset: 8-*- */
3 /*
4 * This file is part of The Croco Library
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2.1 of the GNU Lesser General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 *
20 * Author: Dodji Seketeli
21 * See the COPYRIGHTS file for copyrights information.
22 */
24 /**
25 *@file
26 *The definition of the #CRTknzr (tokenizer)
27 *class.
28 */
30 #include "string.h"
31 #include "cr-tknzr.h"
32 #include "cr-doc-handler.h"
34 struct _CRTknzrPriv {
35 /**The parser input stream of bytes*/
36 CRInput *input;
38 /**
39 *A cache where tknzr_unget_token()
40 *puts back the token. tknzr_get_next_token()
41 *first look in this cache, and if and
42 *only if it's empty, fetches the next token
43 *from the input stream.
44 */
45 CRToken *token_cache;
47 /**
48 *The position of the end of the previous token
49 *or char fetched.
50 */
51 CRInputPos prev_pos;
53 CRDocHandler *sac_handler;
55 /**
56 *The reference count of the current instance
57 *of #CRTknzr. Is manipulated by cr_tknzr_ref()
58 *and cr_tknzr_unref().
59 */
60 glong ref_count;
61 };
63 #define PRIVATE(obj) ((obj)->priv)
65 /**
66 *return TRUE if the character is a number ([0-9]), FALSE otherwise
67 *@param a_char the char to test.
68 */
69 #define IS_NUM(a_char) (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
71 /**
72 *Checks if 'status' equals CR_OK. If not, goto the 'error' label.
73 *
74 *@param status the status (of type enum CRStatus) to test.
75 *@param is_exception if set to FALSE, the final status returned the
76 *current function will be CR_PARSING_ERROR. If set to TRUE, the
77 *current status will be the current value of the 'status' variable.
78 *
79 */
80 #define CHECK_PARSING_STATUS(status, is_exception) \
81 if ((status) != CR_OK) \
82 { \
83 if (is_exception == FALSE) \
84 { \
85 status = CR_PARSING_ERROR ; \
86 } \
87 goto error ; \
88 }
90 /**
91 *Peeks the next char from the input stream of the current tokenizer.
92 *invokes CHECK_PARSING_STATUS on the status returned by
93 *cr_tknzr_input_peek_char().
94 *
95 *@param the current instance of #CRTkzr.
96 *@param to_char a pointer to the char where to store the
97 *char peeked.
98 */
99 #define PEEK_NEXT_CHAR(a_tknzr, a_to_char) \
100 {\
101 status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \
102 CHECK_PARSING_STATUS (status, TRUE) \
103 }
105 /**
106 *Reads the next char from the input stream of the current parser.
107 *In case of error, jumps to the "error:" label located in the
108 *function where this macro is called.
109 *@param parser the curent instance of #CRTknzr
110 *@param to_char a pointer to the guint32 char where to store
111 *the character read.
112 */
113 #define READ_NEXT_CHAR(a_tknzr, to_char) \
114 status = cr_tknzr_read_char (a_tknzr, to_char) ;\
115 CHECK_PARSING_STATUS (status, TRUE)
117 /**
118 *Gets information about the current position in
119 *the input of the parser.
120 *In case of failure, this macro returns from the
121 *calling function and
122 *returns a status code of type enum #CRStatus.
123 *@param parser the current instance of #CRTknzr.
124 *@param pos out parameter. A pointer to the position
125 *inside the current parser input. Must
126 */
127 #define RECORD_INITIAL_POS(a_tknzr, a_pos) \
128 status = cr_input_get_cur_pos (PRIVATE \
129 (a_tknzr)->input, a_pos) ; \
130 g_return_val_if_fail (status == CR_OK, status)
132 /**
133 *Gets the address of the current byte inside the
134 *parser input.
135 *@param parser the current instance of #CRTknzr.
136 *@param addr out parameter a pointer (guchar*)
137 *to where the address must be put.
138 */
139 #define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr) \
140 status = cr_input_get_cur_byte_addr \
141 (PRIVATE (a_tknzr)->input, a_addr) ; \
142 CHECK_PARSING_STATUS (status, TRUE)
144 /**
145 *Peeks a byte from the topmost parser input at
146 *a given offset from the current position.
147 *If it fails, goto the "error:" label.
148 *
149 *@param a_parser the current instance of #CRTknzr.
150 *@param a_offset the offset of the byte to peek, the
151 *current byte having the offset '0'.
152 *@param a_byte_ptr out parameter a pointer (guchar*) to
153 *where the peeked char is to be stored.
154 */
155 #define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr) \
156 status = cr_tknzr_peek_byte (a_tknzr, \
157 a_offset, \
158 a_byte_ptr) ; \
159 CHECK_PARSING_STATUS (status, TRUE) ;
161 #define BYTE(a_input, a_n, a_eof) \
162 cr_input_peek_byte2 (a_input, a_n, a_eof)
164 /**
165 *Reads a byte from the topmost parser input
166 *steam.
167 *If it fails, goto the "error" label.
168 *@param a_parser the current instance of #CRTknzr.
169 *@param a_byte_ptr the guchar * where to put the read char.
170 */
171 #define READ_NEXT_BYTE(a_tknzr, a_byte_ptr) \
172 status = \
173 cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\
174 CHECK_PARSING_STATUS (status, TRUE) ;
176 /**
177 *Skips a given number of byte in the topmost
178 *parser input. Don't update line and column number.
179 *In case of error, jumps to the "error:" label
180 *of the surrounding function.
181 *@param a_parser the current instance of #CRTknzr.
182 *@param a_nb_bytes the number of bytes to skip.
183 */
184 #define SKIP_BYTES(a_tknzr, a_nb_bytes) \
185 status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \
186 CR_SEEK_CUR, a_nb_bytes) ; \
187 CHECK_PARSING_STATUS (status, TRUE) ;
189 /**
190 *Skip utf8 encoded characters.
191 *Updates line and column numbers.
192 *@param a_parser the current instance of #CRTknzr.
193 *@param a_nb_chars the number of chars to skip. Must be of
194 *type glong.
195 */
196 #define SKIP_CHARS(a_tknzr, a_nb_chars) \
197 { \
198 glong nb_chars = a_nb_chars ; \
199 status = cr_input_consume_chars \
200 (PRIVATE (a_tknzr)->input,0, &nb_chars) ; \
201 CHECK_PARSING_STATUS (status, TRUE) ; \
202 }
204 /**
205 *Tests the condition and if it is false, sets
206 *status to "CR_PARSING_ERROR" and goto the 'error'
207 *label.
208 *@param condition the condition to test.
209 */
210 #define ENSURE_PARSING_COND(condition) \
211 if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
213 static enum CRStatus cr_tknzr_parse_nl (CRTknzr * a_this,
214 guchar ** a_start,
215 guchar ** a_end,
216 CRParsingLocation *a_location);
218 static enum CRStatus cr_tknzr_parse_w (CRTknzr * a_this,
219 guchar ** a_start,
220 guchar ** a_end,
221 CRParsingLocation *a_location) ;
223 static enum CRStatus cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
224 guint32 * a_unicode,
225 CRParsingLocation *a_location) ;
227 static enum CRStatus cr_tknzr_parse_escape (CRTknzr * a_this,
228 guint32 * a_esc_code,
229 CRParsingLocation *a_location);
231 static enum CRStatus cr_tknzr_parse_string (CRTknzr * a_this,
232 CRString ** a_str);
234 static enum CRStatus cr_tknzr_parse_comment (CRTknzr * a_this,
235 CRString ** a_comment);
237 static enum CRStatus cr_tknzr_parse_nmstart (CRTknzr * a_this,
238 guint32 * a_char,
239 CRParsingLocation *a_location);
241 static enum CRStatus cr_tknzr_parse_num (CRTknzr * a_this,
242 CRNum ** a_num);
244 /**********************************
245 *PRIVATE methods
246 **********************************/
248 /**
249 *Parses a "w" as defined by the css spec at [4.1.1]:
250 * w ::= [ \t\r\n\f]*
251 *
252 *@param a_this the current instance of #CRTknzr.
253 *@param a_start out param. Upon successfull completion, points
254 *to the beginning of the parsed white space, points to NULL otherwise.
255 *Can also point to NULL is there is no white space actually.
256 *@param a_end out param. Upon successfull completion, points
257 *to the end of the parsed white space, points to NULL otherwise.
258 *Can also point to NULL is there is no white space actually.
259 */
260 static enum CRStatus
261 cr_tknzr_parse_w (CRTknzr * a_this,
262 guchar ** a_start,
263 guchar ** a_end,
264 CRParsingLocation *a_location)
265 {
266 guint32 cur_char = 0;
267 CRInputPos init_pos;
268 enum CRStatus status = CR_OK;
270 g_return_val_if_fail (a_this && PRIVATE (a_this)
271 && PRIVATE (a_this)->input
272 && a_start && a_end,
273 CR_BAD_PARAM_ERROR);
275 RECORD_INITIAL_POS (a_this, &init_pos);
277 *a_start = NULL;
278 *a_end = NULL;
280 READ_NEXT_CHAR (a_this, &cur_char);
282 if (cr_utils_is_white_space (cur_char) == FALSE) {
283 status = CR_PARSING_ERROR;
284 goto error;
285 }
286 if (a_location) {
287 cr_tknzr_get_parsing_location (a_this,
288 a_location) ;
289 }
290 RECORD_CUR_BYTE_ADDR (a_this, a_start);
291 *a_end = *a_start;
293 for (;;) {
294 gboolean is_eof = FALSE;
296 cr_input_get_end_of_file (PRIVATE (a_this)->input, &is_eof);
297 if (is_eof)
298 break;
300 status = cr_tknzr_peek_char (a_this, &cur_char);
301 if (status == CR_END_OF_INPUT_ERROR) {
302 status = CR_OK;
303 break;
304 } else if (status != CR_OK) {
305 goto error;
306 }
308 if (cr_utils_is_white_space (cur_char) == TRUE) {
309 READ_NEXT_CHAR (a_this, &cur_char);
310 RECORD_CUR_BYTE_ADDR (a_this, a_end);
311 } else {
312 break;
313 }
314 }
316 return CR_OK;
318 error:
319 cr_tknzr_set_cur_pos (a_this, &init_pos);
321 return status;
322 }
324 /**
325 *Parses a newline as defined in the css2 spec:
326 * nl ::= \n|\r\n|\r|\f
327 *
328 *@param a_this the "this pointer" of the current instance of #CRTknzr.
329 *@param a_start a pointer to the first character of the successfully
330 *parsed string.
331 *@param a_end a pointer to the last character of the successfully parsed
332 *string.
333 *@result CR_OK uppon successfull completion, an error code otherwise.
334 */
335 static enum CRStatus
336 cr_tknzr_parse_nl (CRTknzr * a_this,
337 guchar ** a_start,
338 guchar ** a_end,
339 CRParsingLocation *a_location)
340 {
341 CRInputPos init_pos;
342 guchar next_chars[2] = { 0 };
343 enum CRStatus status = CR_PARSING_ERROR;
345 g_return_val_if_fail (a_this && PRIVATE (a_this)
346 && a_start && a_end, CR_BAD_PARAM_ERROR);
348 RECORD_INITIAL_POS (a_this, &init_pos);
350 PEEK_BYTE (a_this, 1, &next_chars[0]);
351 PEEK_BYTE (a_this, 2, &next_chars[1]);
353 if ((next_chars[0] == '\r' && next_chars[1] == '\n')) {
354 SKIP_BYTES (a_this, 1);
355 if (a_location) {
356 cr_tknzr_get_parsing_location
357 (a_this, a_location) ;
358 }
359 SKIP_CHARS (a_this, 1);
361 RECORD_CUR_BYTE_ADDR (a_this, a_end);
363 status = CR_OK;
364 } else if (next_chars[0] == '\n'
365 || next_chars[0] == '\r' || next_chars[0] == '\f') {
366 SKIP_CHARS (a_this, 1);
367 if (a_location) {
368 cr_tknzr_get_parsing_location
369 (a_this, a_location) ;
370 }
371 RECORD_CUR_BYTE_ADDR (a_this, a_start);
372 *a_end = *a_start;
373 status = CR_OK;
374 } else {
375 status = CR_PARSING_ERROR;
376 goto error;
377 }
378 return CR_OK ;
380 error:
381 cr_tknzr_set_cur_pos (a_this, &init_pos) ;
382 return status;
383 }
385 /**
386 *Go ahead in the parser input, skipping all the spaces.
387 *If the next char if not a white space, this function does nothing.
388 *In any cases, it stops when it encounters a non white space character.
389 *
390 *@param a_this the current instance of #CRTknzr.
391 *@return CR_OK upon successfull completion, an error code otherwise.
392 */
393 static enum CRStatus
394 cr_tknzr_try_to_skip_spaces (CRTknzr * a_this)
395 {
396 enum CRStatus status = CR_ERROR;
397 guint32 cur_char = 0;
399 g_return_val_if_fail (a_this && PRIVATE (a_this)
400 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
402 status = cr_input_peek_char (PRIVATE (a_this)->input, &cur_char);
404 if (status != CR_OK) {
405 if (status == CR_END_OF_INPUT_ERROR)
406 return CR_OK;
407 return status;
408 }
410 if (cr_utils_is_white_space (cur_char) == TRUE) {
411 glong nb_chars = -1; /*consume all spaces */
413 status = cr_input_consume_white_spaces
414 (PRIVATE (a_this)->input, &nb_chars);
415 }
417 return status;
418 }
420 /**
421 *Parses a "comment" as defined in the css spec at [4.1.1]:
422 *COMMENT ::= \/\*[^*]*\*+([^/][^*]*\*+)*\/ .
423 *This complex regexp is just to say that comments start
424 *with the two chars '/''*' and ends with the two chars '*''/'.
425 *It also means that comments cannot be nested.
426 *So based on that, I've just tried to implement the parsing function
427 *simply and in a straight forward manner.
428 */
429 static enum CRStatus
430 cr_tknzr_parse_comment (CRTknzr * a_this,
431 CRString ** a_comment)
432 {
433 enum CRStatus status = CR_OK;
434 CRInputPos init_pos;
435 guint32 cur_char = 0, next_char= 0;
436 CRString *comment = NULL;
437 CRParsingLocation loc = {0,0,0} ;
439 g_return_val_if_fail (a_this && PRIVATE (a_this)
440 && PRIVATE (a_this)->input,
441 CR_BAD_PARAM_ERROR);
443 RECORD_INITIAL_POS (a_this, &init_pos);
444 READ_NEXT_CHAR (a_this, &cur_char) ;
445 ENSURE_PARSING_COND (cur_char == '/');
446 cr_tknzr_get_parsing_location (a_this, &loc) ;
448 READ_NEXT_CHAR (a_this, &cur_char);
449 ENSURE_PARSING_COND (cur_char == '*');
450 comment = cr_string_new ();
451 for (;;) {
452 READ_NEXT_CHAR (a_this, &cur_char);
454 /*make sure there are no nested comments */
455 if (cur_char == '/') {
456 READ_NEXT_CHAR (a_this, &cur_char);
457 ENSURE_PARSING_COND (cur_char != '*');
458 g_string_append_c (comment->stryng, '/');
459 g_string_append_unichar (comment->stryng,
460 cur_char);
461 continue;
462 }
464 /*Detect the end of the comments region */
465 if (cur_char == '*') {
466 PEEK_NEXT_CHAR (a_this, &next_char);
468 if (next_char == '/') {
469 /*
470 *end of comments region
471 *Now, call the right SAC callback.
472 */
473 SKIP_CHARS (a_this, 1) ;
474 status = CR_OK;
475 break;
476 } else {
477 g_string_append_c (comment->stryng,
478 '*');
479 }
480 }
481 g_string_append_unichar (comment->stryng, cur_char);
482 }
484 if (status == CR_OK) {
485 cr_parsing_location_copy (&comment->location,
486 &loc) ;
487 *a_comment = comment;
488 return CR_OK;
489 }
490 error:
492 if (comment) {
493 cr_string_destroy (comment);
494 comment = NULL;
495 }
497 cr_tknzr_set_cur_pos (a_this, &init_pos);
499 return status;
500 }
502 /**
503 *Parses an 'unicode' escape sequence defined
504 *in css spec at chap 4.1.1:
505 *unicode ::= \\[0-9a-f]{1,6}[ \n\r\t\f]?
506 *@param a_this the current instance of #CRTknzr.
507 *@param a_start out parameter. A pointer to the start
508 *of the unicode escape sequence. Must *NOT* be deleted by
509 *the caller.
510 *@param a_end out parameter. A pointer to the last character
511 *of the unicode escape sequence. Must *NOT* be deleted by the caller.
512 *@return CR_OK if parsing succeded, an error code otherwise.
513 *Error code can be either CR_PARSING_ERROR if the string
514 *parsed just doesn't
515 *respect the production or another error if a
516 *lower level error occured.
517 */
518 static enum CRStatus
519 cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
520 guint32 * a_unicode,
521 CRParsingLocation *a_location)
522 {
523 guint32 cur_char;
524 CRInputPos init_pos;
525 glong occur = 0;
526 guint32 unicode = 0;
527 guchar *tmp_char_ptr1 = NULL,
528 *tmp_char_ptr2 = NULL;
529 enum CRStatus status = CR_OK;
531 g_return_val_if_fail (a_this && PRIVATE (a_this)
532 && a_unicode, CR_BAD_PARAM_ERROR);
534 /*first, let's backup the current position pointer */
535 RECORD_INITIAL_POS (a_this, &init_pos);
537 READ_NEXT_CHAR (a_this, &cur_char);
539 if (cur_char != '\\') {
540 status = CR_PARSING_ERROR;
541 goto error;
542 }
543 if (a_location) {
544 cr_tknzr_get_parsing_location
545 (a_this, a_location) ;
546 }
547 PEEK_NEXT_CHAR (a_this, &cur_char);
549 for (occur = 0, unicode = 0; ((cur_char >= '0' && cur_char <= '9')
550 || (cur_char >= 'a' && cur_char <= 'f')
551 || (cur_char >= 'A' && cur_char <= 'F'))
552 && occur < 6; occur++) {
553 gint cur_char_val = 0;
555 READ_NEXT_CHAR (a_this, &cur_char);
557 if ((cur_char >= '0' && cur_char <= '9')) {
558 cur_char_val = (cur_char - '0');
559 } else if ((cur_char >= 'a' && cur_char <= 'f')) {
560 cur_char_val = 10 + (cur_char - 'a');
561 } else if ((cur_char >= 'A' && cur_char <= 'F')) {
562 cur_char_val = 10 + (cur_char - 'A');
563 }
565 unicode = unicode * 10 + cur_char_val;
567 PEEK_NEXT_CHAR (a_this, &cur_char);
568 }
570 if (occur == 5) {
571 /*
572 *the unicode escape is 6 digit length
573 */
575 /*
576 *parse one space that may
577 *appear just after the unicode
578 *escape.
579 */
580 cr_tknzr_parse_w (a_this, &tmp_char_ptr1,
581 &tmp_char_ptr2, NULL);
582 status = CR_OK;
583 } else {
584 /*
585 *The unicode escape is less than
586 *6 digit length. The character
587 *that comes right after the escape
588 *must be a white space.
589 */
590 status = cr_tknzr_parse_w (a_this, &tmp_char_ptr1,
591 &tmp_char_ptr2, NULL);
592 }
594 if (status == CR_OK) {
595 *a_unicode = unicode;
596 return CR_OK;
597 }
599 error:
600 /*
601 *restore the initial position pointer backuped at
602 *the beginning of this function.
603 */
604 cr_tknzr_set_cur_pos (a_this, &init_pos);
606 return status;
607 }
609 /**
610 *parses an escape sequence as defined by the css spec:
611 *escape ::= {unicode}|\\[ -~\200-\4177777]
612 *@param a_this the current instance of #CRTknzr .
613 */
614 static enum CRStatus
615 cr_tknzr_parse_escape (CRTknzr * a_this, guint32 * a_esc_code,
616 CRParsingLocation *a_location)
617 {
618 enum CRStatus status = CR_OK;
619 guint32 cur_char = 0;
620 CRInputPos init_pos;
621 guchar next_chars[2];
623 g_return_val_if_fail (a_this && PRIVATE (a_this)
624 && a_esc_code, CR_BAD_PARAM_ERROR);
626 RECORD_INITIAL_POS (a_this, &init_pos);
628 PEEK_BYTE (a_this, 1, &next_chars[0]);
629 PEEK_BYTE (a_this, 2, &next_chars[1]);
631 if (next_chars[0] != '\\') {
632 status = CR_PARSING_ERROR;
633 goto error;
634 }
636 if ((next_chars[1] >= '0' && next_chars[1] <= '9')
637 || (next_chars[1] >= 'a' && next_chars[1] <= 'f')
638 || (next_chars[1] >= 'A' && next_chars[1] <= 'F')) {
639 status = cr_tknzr_parse_unicode_escape (a_this, a_esc_code,
640 a_location);
641 } else {
642 /*consume the '\' char */
643 READ_NEXT_CHAR (a_this, &cur_char);
644 if (a_location) {
645 cr_tknzr_get_parsing_location (a_this,
646 a_location) ;
647 }
648 /*then read the char after the '\' */
649 READ_NEXT_CHAR (a_this, &cur_char);
651 if (cur_char != ' ' && (cur_char < 200 || cur_char > 4177777)) {
652 status = CR_PARSING_ERROR;
653 goto error;
654 }
655 *a_esc_code = cur_char;
657 }
658 if (status == CR_OK) {
659 return CR_OK;
660 }
661 error:
662 cr_tknzr_set_cur_pos (a_this, &init_pos);
663 return status;
664 }
666 /**
667 *Parses a string type as defined in css spec [4.1.1]:
668 *
669 *string ::= {string1}|{string2}
670 *string1 ::= \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
671 *string2 ::= \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
672 *
673 *@param a_this the current instance of #CRTknzr.
674 *@param a_start out parameter. Upon successfull completion,
675 *points to the beginning of the string, points to an undefined value
676 *otherwise.
677 *@param a_end out parameter. Upon successfull completion, points to
678 *the beginning of the string, points to an undefined value otherwise.
679 *@return CR_OK upon successfull completion, an error code otherwise.
680 */
681 static enum CRStatus
682 cr_tknzr_parse_string (CRTknzr * a_this, CRString ** a_str)
683 {
684 guint32 cur_char = 0,
685 delim = 0;
686 CRInputPos init_pos;
687 enum CRStatus status = CR_OK;
688 CRString *str = NULL;
690 g_return_val_if_fail (a_this && PRIVATE (a_this)
691 && PRIVATE (a_this)->input
692 && a_str, CR_BAD_PARAM_ERROR);
694 RECORD_INITIAL_POS (a_this, &init_pos);
695 READ_NEXT_CHAR (a_this, &cur_char);
697 if (cur_char == '"')
698 delim = '"';
699 else if (cur_char == '\'')
700 delim = '\'';
701 else {
702 status = CR_PARSING_ERROR;
703 goto error;
704 }
705 str = cr_string_new ();
706 if (str) {
707 cr_tknzr_get_parsing_location
708 (a_this, &str->location) ;
709 }
710 for (;;) {
711 guchar next_chars[2] = { 0 };
713 PEEK_BYTE (a_this, 1, &next_chars[0]);
714 PEEK_BYTE (a_this, 2, &next_chars[1]);
716 if (next_chars[0] == '\\') {
717 guchar *tmp_char_ptr1 = NULL,
718 *tmp_char_ptr2 = NULL;
719 guint32 esc_code = 0;
721 if (next_chars[1] == '\'' || next_chars[1] == '"') {
722 g_string_append_unichar (str->stryng,
723 next_chars[1]);
724 SKIP_BYTES (a_this, 2);
725 status = CR_OK;
726 } else {
727 status = cr_tknzr_parse_escape
728 (a_this, &esc_code, NULL);
730 if (status == CR_OK) {
731 g_string_append_unichar
732 (str->stryng,
733 esc_code);
734 }
735 }
737 if (status != CR_OK) {
738 /*
739 *consume the '\' char, and try to parse
740 *a newline.
741 */
742 READ_NEXT_CHAR (a_this, &cur_char);
744 status = cr_tknzr_parse_nl
745 (a_this, &tmp_char_ptr1,
746 &tmp_char_ptr2, NULL);
747 }
749 CHECK_PARSING_STATUS (status, FALSE);
750 } else if (strchr ("\t !#$%&", next_chars[0])
751 || (next_chars[0] >= '(' && next_chars[0] <= '~')) {
752 READ_NEXT_CHAR (a_this, &cur_char);
753 g_string_append_unichar (str->stryng,
754 cur_char);
755 status = CR_OK;
756 }
758 else if (cr_utils_is_nonascii (next_chars[0])) {
759 READ_NEXT_CHAR (a_this, &cur_char);
760 g_string_append_unichar (str->stryng, cur_char);
761 } else if (next_chars[0] == delim) {
762 READ_NEXT_CHAR (a_this, &cur_char);
763 break;
764 } else {
765 status = CR_PARSING_ERROR;
766 goto error;
767 }
768 }
770 if (status == CR_OK) {
771 if (*a_str == NULL) {
772 *a_str = str;
773 str = NULL;
774 } else {
775 (*a_str)->stryng = g_string_append_len
776 ((*a_str)->stryng,
777 str->stryng->str,
778 str->stryng->len);
779 cr_string_destroy (str);
780 }
781 return CR_OK;
782 }
784 error:
786 if (str) {
787 cr_string_destroy (str) ;
788 str = NULL;
789 }
790 cr_tknzr_set_cur_pos (a_this, &init_pos);
791 return status;
792 }
794 /**
795 *Parses the an nmstart as defined by the css2 spec [4.1.1]:
796 * nmstart [a-zA-Z]|{nonascii}|{escape}
797 *
798 *@param a_this the current instance of #CRTknzr.
799 *@param a_start out param. A pointer to the starting point of
800 *the token.
801 *@param a_end out param. A pointer to the ending point of the
802 *token.
803 *@param a_char out param. The actual parsed nmchar.
804 *@return CR_OK upon successfull completion,
805 *an error code otherwise.
806 */
807 static enum CRStatus
808 cr_tknzr_parse_nmstart (CRTknzr * a_this,
809 guint32 * a_char,
810 CRParsingLocation *a_location)
811 {
812 CRInputPos init_pos;
813 enum CRStatus status = CR_OK;
814 guint32 cur_char = 0,
815 next_char = 0;
817 g_return_val_if_fail (a_this && PRIVATE (a_this)
818 && PRIVATE (a_this)->input
819 && a_char, CR_BAD_PARAM_ERROR);
821 RECORD_INITIAL_POS (a_this, &init_pos);
823 PEEK_NEXT_CHAR (a_this, &next_char);
825 if (next_char == '\\') {
826 status = cr_tknzr_parse_escape (a_this, a_char,
827 a_location);
829 if (status != CR_OK)
830 goto error;
832 } else if (cr_utils_is_nonascii (next_char) == TRUE
833 || ((next_char >= 'a') && (next_char <= 'z'))
834 || ((next_char >= 'A') && (next_char <= 'Z'))
835 ) {
836 READ_NEXT_CHAR (a_this, &cur_char);
837 if (a_location) {
838 cr_tknzr_get_parsing_location (a_this,
839 a_location) ;
840 }
841 *a_char = cur_char;
842 status = CR_OK;
843 } else {
844 status = CR_PARSING_ERROR;
845 goto error;
846 }
848 return CR_OK;
850 error:
851 cr_tknzr_set_cur_pos (a_this, &init_pos);
853 return status;
855 }
857 /**
858 *Parses an nmchar as described in the css spec at
859 *chap 4.1.1:
860 *nmchar ::= [a-z0-9-]|{nonascii}|{escape}
861 *
862 *Humm, I have added the possibility for nmchar to
863 *contain upper case letters.
864 *
865 *@param a_this the current instance of #CRTknzr.
866 *@param a_start out param. A pointer to the starting point of
867 *the token.
868 *@param a_end out param. A pointer to the ending point of the
869 *token.
870 *@param a_char out param. The actual parsed nmchar.
871 *@return CR_OK upon successfull completion,
872 *an error code otherwise.
873 */
874 static enum CRStatus
875 cr_tknzr_parse_nmchar (CRTknzr * a_this, guint32 * a_char,
876 CRParsingLocation *a_location)
877 {
878 guint32 cur_char = 0,
879 next_char = 0;
880 enum CRStatus status = CR_OK;
881 CRInputPos init_pos;
883 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
884 CR_BAD_PARAM_ERROR);
886 RECORD_INITIAL_POS (a_this, &init_pos);
888 status = cr_input_peek_char (PRIVATE (a_this)->input,
889 &next_char) ;
890 if (status != CR_OK)
891 goto error;
893 if (next_char == '\\') {
894 status = cr_tknzr_parse_escape (a_this, a_char,
895 a_location);
897 if (status != CR_OK)
898 goto error;
900 } else if (cr_utils_is_nonascii (next_char) == TRUE
901 || ((next_char >= 'a') && (next_char <= 'z'))
902 || ((next_char >= 'A') && (next_char <= 'Z'))
903 || ((next_char >= '0') && (next_char <= '9'))
904 || (next_char == '-')
905 || (next_char == '_') /*'_' not allowed by the spec. */
906 ) {
907 READ_NEXT_CHAR (a_this, &cur_char);
908 *a_char = cur_char;
909 status = CR_OK;
910 if (a_location) {
911 cr_tknzr_get_parsing_location
912 (a_this, a_location) ;
913 }
914 } else {
915 status = CR_PARSING_ERROR;
916 goto error;
917 }
918 return CR_OK;
920 error:
921 cr_tknzr_set_cur_pos (a_this, &init_pos);
922 return status;
923 }
925 /**
926 *Parses an "ident" as defined in css spec [4.1.1]:
927 *ident ::= {nmstart}{nmchar}*
928 *
929 *Actually parses it using the css3 grammar:
930 *ident ::= -?{nmstart}{nmchar}*
931 *@param a_this the currens instance of #CRTknzr.
932 *
933 *@param a_str a pointer to parsed ident. If *a_str is NULL,
934 *this function allocates a new instance of CRString. If not,
935 *the function just appends the parsed string to the one passed.
936 *In both cases it is up to the caller to free *a_str.
937 *
938 *@return CR_OK upon successfull completion, an error code
939 *otherwise.
940 */
941 static enum CRStatus
942 cr_tknzr_parse_ident (CRTknzr * a_this, CRString ** a_str)
943 {
944 guint32 tmp_char = 0;
945 CRString *stringue = NULL ;
946 CRInputPos init_pos;
947 enum CRStatus status = CR_OK;
948 gboolean location_is_set = FALSE ;
950 g_return_val_if_fail (a_this && PRIVATE (a_this)
951 && PRIVATE (a_this)->input
952 && a_str, CR_BAD_PARAM_ERROR);
954 RECORD_INITIAL_POS (a_this, &init_pos);
955 PEEK_NEXT_CHAR (a_this, &tmp_char) ;
956 stringue = cr_string_new () ;
957 g_return_val_if_fail (stringue,
958 CR_OUT_OF_MEMORY_ERROR) ;
960 if (tmp_char == '-') {
961 READ_NEXT_CHAR (a_this, &tmp_char) ;
962 cr_tknzr_get_parsing_location
963 (a_this, &stringue->location) ;
964 location_is_set = TRUE ;
965 g_string_append_unichar (stringue->stryng,
966 tmp_char) ;
967 }
968 status = cr_tknzr_parse_nmstart (a_this, &tmp_char, NULL);
969 if (status != CR_OK) {
970 status = CR_PARSING_ERROR;
971 goto end ;
972 }
973 if (location_is_set == FALSE) {
974 cr_tknzr_get_parsing_location
975 (a_this, &stringue->location) ;
976 location_is_set = TRUE ;
977 }
978 g_string_append_unichar (stringue->stryng, tmp_char);
979 for (;;) {
980 status = cr_tknzr_parse_nmchar (a_this,
981 &tmp_char,
982 NULL);
983 if (status != CR_OK) {
984 status = CR_OK ;
985 break;
986 }
987 g_string_append_unichar (stringue->stryng, tmp_char);
988 }
989 if (status == CR_OK) {
990 if (!*a_str) {
991 *a_str = stringue ;
993 } else {
994 g_string_append_len ((*a_str)->stryng,
995 stringue->stryng->str,
996 stringue->stryng->len) ;
997 cr_string_destroy (stringue) ;
998 }
999 stringue = NULL ;
1000 }
1002 error:
1003 end:
1004 if (stringue) {
1005 cr_string_destroy (stringue) ;
1006 stringue = NULL ;
1007 }
1008 if (status != CR_OK ) {
1009 cr_tknzr_set_cur_pos (a_this, &init_pos) ;
1010 }
1011 return status ;
1012 }
1015 /**
1016 *Parses a "name" as defined by css spec [4.1.1]:
1017 *name ::= {nmchar}+
1018 *
1019 *@param a_this the current instance of #CRTknzr.
1020 *
1021 *@param a_str out parameter. A pointer to the successfully parsed
1022 *name. If *a_str is set to NULL, this function allocates a new instance
1023 *of CRString. If not, it just appends the parsed name to the passed *a_str.
1024 *In both cases, it is up to the caller to free *a_str.
1025 *
1026 *@return CR_OK upon successfull completion, an error code otherwise.
1027 */
1028 static enum CRStatus
1029 cr_tknzr_parse_name (CRTknzr * a_this,
1030 CRString ** a_str)
1031 {
1032 guint32 tmp_char = 0;
1033 CRInputPos init_pos;
1034 enum CRStatus status = CR_OK;
1035 gboolean str_needs_free = FALSE,
1036 is_first_nmchar=TRUE ;
1037 glong i = 0;
1038 CRParsingLocation loc = {0,0,0} ;
1040 g_return_val_if_fail (a_this && PRIVATE (a_this)
1041 && PRIVATE (a_this)->input
1042 && a_str,
1043 CR_BAD_PARAM_ERROR) ;
1045 RECORD_INITIAL_POS (a_this, &init_pos);
1047 if (*a_str == NULL) {
1048 *a_str = cr_string_new ();
1049 str_needs_free = TRUE;
1050 }
1051 for (i = 0;; i++) {
1052 if (is_first_nmchar == TRUE) {
1053 status = cr_tknzr_parse_nmchar
1054 (a_this, &tmp_char,
1055 &loc) ;
1056 is_first_nmchar = FALSE ;
1057 } else {
1058 status = cr_tknzr_parse_nmchar
1059 (a_this, &tmp_char, NULL) ;
1060 }
1061 if (status != CR_OK)
1062 break;
1063 g_string_append_unichar ((*a_str)->stryng,
1064 tmp_char);
1065 }
1066 if (i > 0) {
1067 cr_parsing_location_copy
1068 (&(*a_str)->location, &loc) ;
1069 return CR_OK;
1070 }
1071 if (str_needs_free == TRUE && *a_str) {
1072 cr_string_destroy (*a_str);
1073 *a_str = NULL;
1074 }
1075 cr_tknzr_set_cur_pos (a_this, &init_pos);
1076 return CR_PARSING_ERROR;
1077 }
1079 /**
1080 *Parses a "hash" as defined by the css spec in [4.1.1]:
1081 *HASH ::= #{name}
1082 */
1083 static enum CRStatus
1084 cr_tknzr_parse_hash (CRTknzr * a_this, CRString ** a_str)
1085 {
1086 guint32 cur_char = 0;
1087 CRInputPos init_pos;
1088 enum CRStatus status = CR_OK;
1089 gboolean str_needs_free = FALSE;
1090 CRParsingLocation loc = {0,0,0} ;
1092 g_return_val_if_fail (a_this && PRIVATE (a_this)
1093 && PRIVATE (a_this)->input,
1094 CR_BAD_PARAM_ERROR);
1096 RECORD_INITIAL_POS (a_this, &init_pos);
1097 READ_NEXT_CHAR (a_this, &cur_char);
1098 if (cur_char != '#') {
1099 status = CR_PARSING_ERROR;
1100 goto error;
1101 }
1102 if (*a_str == NULL) {
1103 *a_str = cr_string_new ();
1104 str_needs_free = TRUE;
1105 }
1106 cr_tknzr_get_parsing_location (a_this,
1107 &loc) ;
1108 status = cr_tknzr_parse_name (a_this, a_str);
1109 cr_parsing_location_copy (&(*a_str)->location, &loc) ;
1110 if (status != CR_OK) {
1111 goto error;
1112 }
1113 return CR_OK;
1115 error:
1116 if (str_needs_free == TRUE && *a_str) {
1117 cr_string_destroy (*a_str);
1118 *a_str = NULL;
1119 }
1121 cr_tknzr_set_cur_pos (a_this, &init_pos);
1122 return status;
1123 }
1125 /**
1126 *Parses an uri as defined by the css spec [4.1.1]:
1127 * URI ::= url\({w}{string}{w}\)
1128 * |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
1129 *
1130 *@param a_this the current instance of #CRTknzr.
1131 *@param a_str the successfully parsed url.
1132 *@return CR_OK upon successfull completion, an error code otherwise.
1133 */
1134 static enum CRStatus
1135 cr_tknzr_parse_uri (CRTknzr * a_this,
1136 CRString ** a_str)
1137 {
1138 guint32 cur_char = 0;
1139 CRInputPos init_pos;
1140 enum CRStatus status = CR_PARSING_ERROR;
1141 guchar tab[4] = { 0 }, *tmp_ptr1 = NULL, *tmp_ptr2 = NULL;
1142 CRString *str = NULL;
1143 CRParsingLocation location = {0,0,0} ;
1145 g_return_val_if_fail (a_this
1146 && PRIVATE (a_this)
1147 && PRIVATE (a_this)->input
1148 && a_str,
1149 CR_BAD_PARAM_ERROR);
1151 RECORD_INITIAL_POS (a_this, &init_pos);
1153 PEEK_BYTE (a_this, 1, &tab[0]);
1154 PEEK_BYTE (a_this, 2, &tab[1]);
1155 PEEK_BYTE (a_this, 3, &tab[2]);
1156 PEEK_BYTE (a_this, 4, &tab[3]);
1158 if (tab[0] != 'u' || tab[1] != 'r' || tab[2] != 'l' || tab[3] != '(') {
1159 status = CR_PARSING_ERROR;
1160 goto error;
1161 }
1162 /*
1163 *Here, we want to skip 4 bytes ('u''r''l''(').
1164 *But we also need to keep track of the parsing location
1165 *of the 'u'. So, we skip 1 byte, we record the parsing
1166 *location, then we skip the 3 remaining bytes.
1167 */
1168 SKIP_CHARS (a_this, 1);
1169 cr_tknzr_get_parsing_location (a_this, &location) ;
1170 SKIP_CHARS (a_this, 3);
1171 cr_tknzr_try_to_skip_spaces (a_this);
1172 status = cr_tknzr_parse_string (a_this, a_str);
1174 if (status == CR_OK) {
1175 guint32 next_char = 0;
1176 status = cr_tknzr_parse_w (a_this, &tmp_ptr1,
1177 &tmp_ptr2, NULL);
1178 cr_tknzr_try_to_skip_spaces (a_this);
1179 PEEK_NEXT_CHAR (a_this, &next_char);
1180 if (next_char == ')') {
1181 READ_NEXT_CHAR (a_this, &cur_char);
1182 status = CR_OK;
1183 } else {
1184 status = CR_PARSING_ERROR;
1185 }
1186 }
1187 if (status != CR_OK) {
1188 str = cr_string_new ();
1189 for (;;) {
1190 guint32 next_char = 0;
1191 PEEK_NEXT_CHAR (a_this, &next_char);
1192 if (strchr ("!#$%&", next_char)
1193 || (next_char >= '*' && next_char <= '~')
1194 || (cr_utils_is_nonascii (next_char) == TRUE)) {
1195 READ_NEXT_CHAR (a_this, &cur_char);
1196 g_string_append_unichar
1197 (str->stryng, cur_char);
1198 status = CR_OK;
1199 } else {
1200 guint32 esc_code = 0;
1201 status = cr_tknzr_parse_escape
1202 (a_this, &esc_code, NULL);
1203 if (status == CR_OK) {
1204 g_string_append_unichar
1205 (str->stryng,
1206 esc_code);
1207 } else {
1208 status = CR_OK;
1209 break;
1210 }
1211 }
1212 }
1213 cr_tknzr_try_to_skip_spaces (a_this);
1214 READ_NEXT_CHAR (a_this, &cur_char);
1215 if (cur_char == ')') {
1216 status = CR_OK;
1217 } else {
1218 status = CR_PARSING_ERROR;
1219 goto error;
1220 }
1221 if (str) {
1222 if (*a_str == NULL) {
1223 *a_str = str;
1224 str = NULL;
1225 } else {
1226 g_string_append_len
1227 ((*a_str)->stryng,
1228 str->stryng->str,
1229 str->stryng->len);
1230 cr_string_destroy (str);
1231 }
1232 }
1233 }
1235 cr_parsing_location_copy
1236 (&(*a_str)->location,
1237 &location) ;
1238 return CR_OK ;
1239 error:
1240 if (str) {
1241 cr_string_destroy (str);
1242 str = NULL;
1243 }
1244 cr_tknzr_set_cur_pos (a_this, &init_pos);
1245 return status;
1246 }
1248 /**
1249 *parses an RGB as defined in the css2 spec.
1250 *rgb: rgb '('S*{num}%?S* ',' {num}#?S*,S*{num}#?S*')'
1251 *
1252 *@param a_this the "this pointer" of the current instance of
1253 *@param a_rgb out parameter the parsed rgb.
1254 *@return CR_OK upon successfull completion, an error code otherwise.
1255 */
1256 static enum CRStatus
1257 cr_tknzr_parse_rgb (CRTknzr * a_this, CRRgb ** a_rgb)
1258 {
1259 enum CRStatus status = CR_OK;
1260 CRInputPos init_pos;
1261 CRNum *num = NULL;
1262 guchar next_bytes[3] = { 0 }, cur_byte = 0;
1263 glong red = 0,
1264 green = 0,
1265 blue = 0,
1266 i = 0;
1267 gboolean is_percentage = FALSE;
1268 CRParsingLocation location = {0,0,0} ;
1270 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1272 RECORD_INITIAL_POS (a_this, &init_pos);
1274 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1275 PEEK_BYTE (a_this, 2, &next_bytes[1]);
1276 PEEK_BYTE (a_this, 3, &next_bytes[2]);
1278 if (((next_bytes[0] == 'r') || (next_bytes[0] == 'R'))
1279 && ((next_bytes[1] == 'g') || (next_bytes[1] == 'G'))
1280 && ((next_bytes[2] == 'b') || (next_bytes[2] == 'B'))) {
1281 SKIP_CHARS (a_this, 1);
1282 cr_tknzr_get_parsing_location (a_this, &location) ;
1283 SKIP_CHARS (a_this, 2);
1284 } else {
1285 status = CR_PARSING_ERROR;
1286 goto error;
1287 }
1288 READ_NEXT_BYTE (a_this, &cur_byte);
1289 ENSURE_PARSING_COND (cur_byte == '(');
1291 cr_tknzr_try_to_skip_spaces (a_this);
1292 status = cr_tknzr_parse_num (a_this, &num);
1293 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
1295 red = num->val;
1296 cr_num_destroy (num);
1297 num = NULL;
1299 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1300 if (next_bytes[0] == '%') {
1301 SKIP_CHARS (a_this, 1);
1302 is_percentage = TRUE;
1303 }
1304 cr_tknzr_try_to_skip_spaces (a_this);
1306 for (i = 0; i < 2; i++) {
1307 READ_NEXT_BYTE (a_this, &cur_byte);
1308 ENSURE_PARSING_COND (cur_byte == ',');
1310 cr_tknzr_try_to_skip_spaces (a_this);
1311 status = cr_tknzr_parse_num (a_this, &num);
1312 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
1314 PEEK_BYTE (a_this, 1, &next_bytes[0]);
1315 if (next_bytes[0] == '%') {
1316 SKIP_CHARS (a_this, 1);
1317 is_percentage = 1;
1318 }
1320 if (i == 0) {
1321 green = num->val;
1322 } else if (i == 1) {
1323 blue = num->val;
1324 }
1326 if (num) {
1327 cr_num_destroy (num);
1328 num = NULL;
1329 }
1330 cr_tknzr_try_to_skip_spaces (a_this);
1331 }
1333 READ_NEXT_BYTE (a_this, &cur_byte);
1334 if (*a_rgb == NULL) {
1335 *a_rgb = cr_rgb_new_with_vals (red, green, blue,
1336 is_percentage);
1338 if (*a_rgb == NULL) {
1339 status = CR_ERROR;
1340 goto error;
1341 }
1342 status = CR_OK;
1343 } else {
1344 (*a_rgb)->red = red;
1345 (*a_rgb)->green = green;
1346 (*a_rgb)->blue = blue;
1347 (*a_rgb)->is_percentage = is_percentage;
1349 status = CR_OK;
1350 }
1352 if (status == CR_OK) {
1353 if (a_rgb && *a_rgb) {
1354 cr_parsing_location_copy
1355 (&(*a_rgb)->location,
1356 &location) ;
1357 }
1358 return CR_OK;
1359 }
1361 error:
1362 if (num) {
1363 cr_num_destroy (num);
1364 num = NULL;
1365 }
1367 cr_tknzr_set_cur_pos (a_this, &init_pos);
1368 return CR_OK;
1369 }
1371 /**
1372 *Parses a atkeyword as defined by the css spec in [4.1.1]:
1373 *ATKEYWORD ::= @{ident}
1374 *
1375 *@param a_this the "this pointer" of the current instance of
1376 *#CRTknzr.
1377 *
1378 *@param a_str out parameter. The parsed atkeyword. If *a_str is
1379 *set to NULL this function allocates a new instance of CRString and
1380 *sets it to the parsed atkeyword. If not, this function just appends
1381 *the parsed atkeyword to the end of *a_str. In both cases it is up to
1382 *the caller to free *a_str.
1383 *
1384 *@return CR_OK upon successfull completion, an error code otherwise.
1385 */
1386 static enum CRStatus
1387 cr_tknzr_parse_atkeyword (CRTknzr * a_this,
1388 CRString ** a_str)
1389 {
1390 guint32 cur_char = 0;
1391 CRInputPos init_pos;
1392 gboolean str_needs_free = FALSE;
1393 enum CRStatus status = CR_OK;
1395 g_return_val_if_fail (a_this && PRIVATE (a_this)
1396 && PRIVATE (a_this)->input
1397 && a_str, CR_BAD_PARAM_ERROR);
1399 RECORD_INITIAL_POS (a_this, &init_pos);
1401 READ_NEXT_CHAR (a_this, &cur_char);
1403 if (cur_char != '@') {
1404 status = CR_PARSING_ERROR;
1405 goto error;
1406 }
1408 if (*a_str == NULL) {
1409 *a_str = cr_string_new ();
1410 str_needs_free = TRUE;
1411 }
1412 status = cr_tknzr_parse_ident (a_this, a_str);
1413 if (status != CR_OK) {
1414 goto error;
1415 }
1416 return CR_OK;
1417 error:
1419 if (str_needs_free == TRUE && *a_str) {
1420 cr_string_destroy (*a_str);
1421 *a_str = NULL;
1422 }
1423 cr_tknzr_set_cur_pos (a_this, &init_pos);
1424 return status;
1425 }
1427 static enum CRStatus
1428 cr_tknzr_parse_important (CRTknzr * a_this,
1429 CRParsingLocation *a_location)
1430 {
1431 guint32 cur_char = 0;
1432 CRInputPos init_pos;
1433 enum CRStatus status = CR_OK;
1435 g_return_val_if_fail (a_this && PRIVATE (a_this)
1436 && PRIVATE (a_this)->input,
1437 CR_BAD_PARAM_ERROR);
1439 RECORD_INITIAL_POS (a_this, &init_pos);
1440 READ_NEXT_CHAR (a_this, &cur_char);
1441 ENSURE_PARSING_COND (cur_char == '!');
1442 if (a_location) {
1443 cr_tknzr_get_parsing_location (a_this,
1444 a_location) ;
1445 }
1446 cr_tknzr_try_to_skip_spaces (a_this);
1448 if (BYTE (PRIVATE (a_this)->input, 1, NULL) == 'i'
1449 && BYTE (PRIVATE (a_this)->input, 2, NULL) == 'm'
1450 && BYTE (PRIVATE (a_this)->input, 3, NULL) == 'p'
1451 && BYTE (PRIVATE (a_this)->input, 4, NULL) == 'o'
1452 && BYTE (PRIVATE (a_this)->input, 5, NULL) == 'r'
1453 && BYTE (PRIVATE (a_this)->input, 6, NULL) == 't'
1454 && BYTE (PRIVATE (a_this)->input, 7, NULL) == 'a'
1455 && BYTE (PRIVATE (a_this)->input, 8, NULL) == 'n'
1456 && BYTE (PRIVATE (a_this)->input, 9, NULL) == 't') {
1457 SKIP_BYTES (a_this, 9);
1458 if (a_location) {
1459 cr_tknzr_get_parsing_location (a_this,
1460 a_location) ;
1461 }
1462 return CR_OK;
1463 } else {
1464 status = CR_PARSING_ERROR;
1465 }
1467 error:
1468 cr_tknzr_set_cur_pos (a_this, &init_pos);
1470 return status;
1471 }
1473 /**
1474 *Parses a num as defined in the css spec [4.1.1]:
1475 *[0-9]+|[0-9]*\.[0-9]+
1476 *@param a_this the current instance of #CRTknzr.
1477 *@param a_num out parameter. The parsed number.
1478 *@return CR_OK upon successfull completion,
1479 *an error code otherwise.
1480 */
1481 static enum CRStatus
1482 cr_tknzr_parse_num (CRTknzr * a_this,
1483 CRNum ** a_num)
1484 {
1485 enum CRStatus status = CR_PARSING_ERROR;
1486 enum CRNumType val_type = NUM_GENERIC;
1487 gboolean parsing_dec, /* true iff seen decimal point. */
1488 parsed; /* true iff the substring seen so far is a valid CSS
1489 number, i.e. `[0-9]+|[0-9]*\.[0-9]+'. */
1490 guint32 cur_char = 0,
1491 next_char = 0;
1492 gdouble numerator, denominator = 1;
1493 CRInputPos init_pos;
1494 CRParsingLocation location = {0,0,0} ;
1496 g_return_val_if_fail (a_this && PRIVATE (a_this)
1497 && PRIVATE (a_this)->input,
1498 CR_BAD_PARAM_ERROR);
1500 RECORD_INITIAL_POS (a_this, &init_pos);
1501 READ_NEXT_CHAR (a_this, &cur_char);
1502 if (IS_NUM (cur_char)) {
1503 numerator = (cur_char - '0');
1504 parsing_dec = FALSE;
1505 parsed = TRUE;
1506 } else if (cur_char == '.') {
1507 numerator = 0;
1508 parsing_dec = TRUE;
1509 parsed = FALSE;
1510 } else {
1511 status = CR_PARSING_ERROR;
1512 goto error;
1513 }
1514 cr_tknzr_get_parsing_location (a_this, &location) ;
1516 for (;;) {
1517 status = cr_tknzr_peek_char (a_this, &next_char);
1518 if (status != CR_OK) {
1519 if (status == CR_END_OF_INPUT_ERROR)
1520 status = CR_OK;
1521 break;
1522 }
1523 if (next_char == '.') {
1524 if (parsing_dec) {
1525 status = CR_PARSING_ERROR;
1526 goto error;
1527 }
1529 READ_NEXT_CHAR (a_this, &cur_char);
1530 parsing_dec = TRUE;
1531 parsed = FALSE; /* In CSS, there must be at least
1532 one digit after `.'. */
1533 } else if (IS_NUM (next_char)) {
1534 READ_NEXT_CHAR (a_this, &cur_char);
1535 parsed = TRUE;
1537 numerator = numerator * 10 + (cur_char - '0');
1538 if (parsing_dec) {
1539 denominator *= 10;
1540 }
1541 } else {
1542 break;
1543 }
1544 }
1546 if (!parsed) {
1547 status = CR_PARSING_ERROR;
1548 }
1550 /*
1551 *Now, set the output param values.
1552 */
1553 if (status == CR_OK) {
1554 gdouble val = numerator / denominator;
1555 if (*a_num == NULL) {
1556 *a_num = cr_num_new_with_val (val, val_type);
1558 if (*a_num == NULL) {
1559 status = CR_ERROR;
1560 goto error;
1561 }
1562 } else {
1563 (*a_num)->val = val;
1564 (*a_num)->type = val_type;
1565 }
1566 cr_parsing_location_copy (&(*a_num)->location,
1567 &location) ;
1568 return CR_OK;
1569 }
1571 error:
1573 cr_tknzr_set_cur_pos (a_this, &init_pos);
1575 return status;
1576 }
1578 /*********************************************
1579 *PUBLIC methods
1580 ********************************************/
1582 CRTknzr *
1583 cr_tknzr_new (CRInput * a_input)
1584 {
1585 CRTknzr *result = NULL;
1587 result = g_try_malloc (sizeof (CRTknzr));
1589 if (result == NULL) {
1590 cr_utils_trace_info ("Out of memory");
1591 return NULL;
1592 }
1594 memset (result, 0, sizeof (CRTknzr));
1596 result->priv = g_try_malloc (sizeof (CRTknzrPriv));
1598 if (result->priv == NULL) {
1599 cr_utils_trace_info ("Out of memory");
1601 if (result) {
1602 g_free (result);
1603 result = NULL;
1604 }
1606 return NULL;
1607 }
1608 memset (result->priv, 0, sizeof (CRTknzrPriv));
1609 if (a_input)
1610 cr_tknzr_set_input (result, a_input);
1611 return result;
1612 }
1614 CRTknzr *
1615 cr_tknzr_new_from_buf (guchar * a_buf, gulong a_len,
1616 enum CREncoding a_enc,
1617 gboolean a_free_at_destroy)
1618 {
1619 CRTknzr *result = NULL;
1620 CRInput *input = NULL;
1622 input = cr_input_new_from_buf (a_buf, a_len, a_enc,
1623 a_free_at_destroy);
1625 g_return_val_if_fail (input != NULL, NULL);
1627 result = cr_tknzr_new (input);
1629 return result;
1630 }
1632 CRTknzr *
1633 cr_tknzr_new_from_uri (const guchar * a_file_uri,
1634 enum CREncoding a_enc)
1635 {
1636 CRTknzr *result = NULL;
1637 CRInput *input = NULL;
1639 input = cr_input_new_from_uri (a_file_uri, a_enc);
1640 g_return_val_if_fail (input != NULL, NULL);
1642 result = cr_tknzr_new (input);
1644 return result;
1645 }
1647 void
1648 cr_tknzr_ref (CRTknzr * a_this)
1649 {
1650 g_return_if_fail (a_this && PRIVATE (a_this));
1652 PRIVATE (a_this)->ref_count++;
1653 }
1655 gboolean
1656 cr_tknzr_unref (CRTknzr * a_this)
1657 {
1658 g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
1660 if (PRIVATE (a_this)->ref_count > 0) {
1661 PRIVATE (a_this)->ref_count--;
1662 }
1664 if (PRIVATE (a_this)->ref_count == 0) {
1665 cr_tknzr_destroy (a_this);
1666 return TRUE;
1667 }
1669 return FALSE;
1670 }
1672 enum CRStatus
1673 cr_tknzr_set_input (CRTknzr * a_this, CRInput * a_input)
1674 {
1675 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1677 if (PRIVATE (a_this)->input) {
1678 cr_input_unref (PRIVATE (a_this)->input);
1679 }
1681 PRIVATE (a_this)->input = a_input;
1683 cr_input_ref (PRIVATE (a_this)->input);
1685 return CR_OK;
1686 }
1688 enum CRStatus
1689 cr_tknzr_get_input (CRTknzr * a_this, CRInput ** a_input)
1690 {
1691 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1693 *a_input = PRIVATE (a_this)->input;
1695 return CR_OK;
1696 }
1698 /*********************************
1699 *Tokenizer input handling routines
1700 *********************************/
1702 /**
1703 *Reads the next byte from the parser input stream.
1704 *@param a_this the "this pointer" of the current instance of
1705 *#CRParser.
1706 *@param a_byte out parameter the place where to store the byte
1707 *read.
1708 *@return CR_OK upon successfull completion, an error
1709 *code otherwise.
1710 */
1711 enum CRStatus
1712 cr_tknzr_read_byte (CRTknzr * a_this, guchar * a_byte)
1713 {
1714 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1716 return cr_input_read_byte (PRIVATE (a_this)->input, a_byte);
1718 }
1720 /**
1721 *Reads the next char from the parser input stream.
1722 *@param a_this the current instance of #CRTknzr.
1723 *@param a_char out parameter. The read char.
1724 *@return CR_OK upon successfull completion, an error code
1725 *otherwise.
1726 */
1727 enum CRStatus
1728 cr_tknzr_read_char (CRTknzr * a_this, guint32 * a_char)
1729 {
1730 g_return_val_if_fail (a_this && PRIVATE (a_this)
1731 && PRIVATE (a_this)->input
1732 && a_char, CR_BAD_PARAM_ERROR);
1734 if (PRIVATE (a_this)->token_cache) {
1735 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1736 &PRIVATE (a_this)->prev_pos);
1737 cr_token_destroy (PRIVATE (a_this)->token_cache);
1738 PRIVATE (a_this)->token_cache = NULL;
1739 }
1741 return cr_input_read_char (PRIVATE (a_this)->input, a_char);
1742 }
1744 /**
1745 *Peeks a char from the parser input stream.
1746 *To "peek a char" means reads the next char without consuming it.
1747 *Subsequent calls to this function return the same char.
1748 *@param a_this the current instance of #CRTknzr.
1749 *@param a_char out parameter. The peeked char uppon successfull completion.
1750 *@return CR_OK upon successfull completion, an error code otherwise.
1751 */
1752 enum CRStatus
1753 cr_tknzr_peek_char (CRTknzr * a_this, guint32 * a_char)
1754 {
1755 g_return_val_if_fail (a_this && PRIVATE (a_this)
1756 && PRIVATE (a_this)->input
1757 && a_char, CR_BAD_PARAM_ERROR);
1759 if (PRIVATE (a_this)->token_cache) {
1760 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1761 &PRIVATE (a_this)->prev_pos);
1762 cr_token_destroy (PRIVATE (a_this)->token_cache);
1763 PRIVATE (a_this)->token_cache = NULL;
1764 }
1766 return cr_input_peek_char (PRIVATE (a_this)->input, a_char);
1767 }
1769 /**
1770 *Peeks a byte ahead at a given postion in the parser input stream.
1771 *@param a_this the current instance of #CRTknzr.
1772 *@param a_offset the offset of the peeked byte starting from the current
1773 *byte in the parser input stream.
1774 *@param a_byte out parameter. The peeked byte upon
1775 *successfull completion.
1776 *@return CR_OK upon successfull completion, an error code otherwise.
1777 */
1778 enum CRStatus
1779 cr_tknzr_peek_byte (CRTknzr * a_this, gulong a_offset, guchar * a_byte)
1780 {
1781 g_return_val_if_fail (a_this && PRIVATE (a_this)
1782 && PRIVATE (a_this)->input && a_byte,
1783 CR_BAD_PARAM_ERROR);
1785 if (PRIVATE (a_this)->token_cache) {
1786 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1787 &PRIVATE (a_this)->prev_pos);
1788 cr_token_destroy (PRIVATE (a_this)->token_cache);
1789 PRIVATE (a_this)->token_cache = NULL;
1790 }
1792 return cr_input_peek_byte (PRIVATE (a_this)->input,
1793 CR_SEEK_CUR, a_offset, a_byte);
1794 }
1796 /**
1797 *Same as cr_tknzr_peek_byte() but this api returns the byte peeked.
1798 *@param a_this the current instance of #CRTknzr.
1799 *@param a_offset the offset of the peeked byte starting from the current
1800 *byte in the parser input stream.
1801 *@param a_eof out parameter. If not NULL, is set to TRUE if we reached end of
1802 *file, FALE otherwise. If the caller sets it to NULL, this parameter
1803 *is just ignored.
1804 *@return the peeked byte.
1805 */
1806 guchar
1807 cr_tknzr_peek_byte2 (CRTknzr * a_this, gulong a_offset, gboolean * a_eof)
1808 {
1809 g_return_val_if_fail (a_this && PRIVATE (a_this)
1810 && PRIVATE (a_this)->input, 0);
1812 return cr_input_peek_byte2 (PRIVATE (a_this)->input, a_offset, a_eof);
1813 }
1815 /**
1816 *Gets the number of bytes left in the topmost input stream
1817 *associated to this parser.
1818 *@param a_this the current instance of #CRTknzr
1819 *@return the number of bytes left or -1 in case of error.
1820 */
1821 glong
1822 cr_tknzr_get_nb_bytes_left (CRTknzr * a_this)
1823 {
1824 g_return_val_if_fail (a_this && PRIVATE (a_this)
1825 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1827 if (PRIVATE (a_this)->token_cache) {
1828 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1829 &PRIVATE (a_this)->prev_pos);
1830 cr_token_destroy (PRIVATE (a_this)->token_cache);
1831 PRIVATE (a_this)->token_cache = NULL;
1832 }
1834 return cr_input_get_nb_bytes_left (PRIVATE (a_this)->input);
1835 }
1837 enum CRStatus
1838 cr_tknzr_get_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
1839 {
1840 g_return_val_if_fail (a_this && PRIVATE (a_this)
1841 && PRIVATE (a_this)->input
1842 && a_pos, CR_BAD_PARAM_ERROR);
1844 if (PRIVATE (a_this)->token_cache) {
1845 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1846 &PRIVATE (a_this)->prev_pos);
1847 cr_token_destroy (PRIVATE (a_this)->token_cache);
1848 PRIVATE (a_this)->token_cache = NULL;
1849 }
1851 return cr_input_get_cur_pos (PRIVATE (a_this)->input, a_pos);
1852 }
1854 enum CRStatus
1855 cr_tknzr_get_parsing_location (CRTknzr *a_this,
1856 CRParsingLocation *a_loc)
1857 {
1858 g_return_val_if_fail (a_this
1859 && PRIVATE (a_this)
1860 && a_loc,
1861 CR_BAD_PARAM_ERROR) ;
1863 return cr_input_get_parsing_location
1864 (PRIVATE (a_this)->input, a_loc) ;
1865 }
1867 enum CRStatus
1868 cr_tknzr_get_cur_byte_addr (CRTknzr * a_this, guchar ** a_addr)
1869 {
1870 g_return_val_if_fail (a_this && PRIVATE (a_this)
1871 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1872 if (PRIVATE (a_this)->token_cache) {
1873 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1874 &PRIVATE (a_this)->prev_pos);
1875 cr_token_destroy (PRIVATE (a_this)->token_cache);
1876 PRIVATE (a_this)->token_cache = NULL;
1877 }
1879 return cr_input_get_cur_byte_addr (PRIVATE (a_this)->input, a_addr);
1880 }
1882 enum CRStatus
1883 cr_tknzr_seek_index (CRTknzr * a_this, enum CRSeekPos a_origin, gint a_pos)
1884 {
1885 g_return_val_if_fail (a_this && PRIVATE (a_this)
1886 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1888 if (PRIVATE (a_this)->token_cache) {
1889 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1890 &PRIVATE (a_this)->prev_pos);
1891 cr_token_destroy (PRIVATE (a_this)->token_cache);
1892 PRIVATE (a_this)->token_cache = NULL;
1893 }
1895 return cr_input_seek_index (PRIVATE (a_this)->input, a_origin, a_pos);
1896 }
1898 enum CRStatus
1899 cr_tknzr_consume_chars (CRTknzr * a_this, guint32 a_char, glong * a_nb_char)
1900 {
1901 g_return_val_if_fail (a_this && PRIVATE (a_this)
1902 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1904 if (PRIVATE (a_this)->token_cache) {
1905 cr_input_set_cur_pos (PRIVATE (a_this)->input,
1906 &PRIVATE (a_this)->prev_pos);
1907 cr_token_destroy (PRIVATE (a_this)->token_cache);
1908 PRIVATE (a_this)->token_cache = NULL;
1909 }
1911 return cr_input_consume_chars (PRIVATE (a_this)->input,
1912 a_char, a_nb_char);
1913 }
1915 enum CRStatus
1916 cr_tknzr_set_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
1917 {
1918 g_return_val_if_fail (a_this && PRIVATE (a_this)
1919 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
1921 if (PRIVATE (a_this)->token_cache) {
1922 cr_token_destroy (PRIVATE (a_this)->token_cache);
1923 PRIVATE (a_this)->token_cache = NULL;
1924 }
1926 return cr_input_set_cur_pos (PRIVATE (a_this)->input, a_pos);
1927 }
1929 enum CRStatus
1930 cr_tknzr_unget_token (CRTknzr * a_this, CRToken * a_token)
1931 {
1932 g_return_val_if_fail (a_this && PRIVATE (a_this)
1933 && PRIVATE (a_this)->token_cache == NULL,
1934 CR_BAD_PARAM_ERROR);
1936 PRIVATE (a_this)->token_cache = a_token;
1938 return CR_OK;
1939 }
1941 /**
1942 *Returns the next token of the input stream.
1943 *This method is really central. Each parsing
1944 *method calls it.
1945 *@param a_this the current tokenizer.
1946 *@param a_tk out parameter. The returned token.
1947 *for the sake of mem leak avoidance, *a_tk must
1948 *be NULL.
1949 *@param CR_OK upon successfull completion, an error code
1950 *otherwise.
1951 */
1952 enum CRStatus
1953 cr_tknzr_get_next_token (CRTknzr * a_this, CRToken ** a_tk)
1954 {
1955 enum CRStatus status = CR_OK;
1956 CRToken *token = NULL;
1957 CRInputPos init_pos;
1958 guint32 next_char = 0;
1959 guchar next_bytes[4] = { 0 };
1960 gboolean reached_eof = FALSE;
1961 CRInput *input = NULL;
1962 CRString *str = NULL;
1963 CRRgb *rgb = NULL;
1964 CRParsingLocation location = {0,0,0} ;
1966 g_return_val_if_fail (a_this && PRIVATE (a_this)
1967 && a_tk && *a_tk == NULL
1968 && PRIVATE (a_this)->input,
1969 CR_BAD_PARAM_ERROR);
1971 if (PRIVATE (a_this)->token_cache) {
1972 *a_tk = PRIVATE (a_this)->token_cache;
1973 PRIVATE (a_this)->token_cache = NULL;
1974 return CR_OK;
1975 }
1977 RECORD_INITIAL_POS (a_this, &init_pos);
1979 status = cr_input_get_end_of_file
1980 (PRIVATE (a_this)->input, &reached_eof);
1981 ENSURE_PARSING_COND (status == CR_OK);
1983 if (reached_eof == TRUE) {
1984 status = CR_END_OF_INPUT_ERROR;
1985 goto error;
1986 }
1988 input = PRIVATE (a_this)->input;
1990 PEEK_NEXT_CHAR (a_this, &next_char);
1991 token = cr_token_new ();
1992 ENSURE_PARSING_COND (token);
1994 switch (next_char) {
1995 case '@':
1996 {
1997 if (BYTE (input, 2, NULL) == 'f'
1998 && BYTE (input, 3, NULL) == 'o'
1999 && BYTE (input, 4, NULL) == 'n'
2000 && BYTE (input, 5, NULL) == 't'
2001 && BYTE (input, 6, NULL) == '-'
2002 && BYTE (input, 7, NULL) == 'f'
2003 && BYTE (input, 8, NULL) == 'a'
2004 && BYTE (input, 9, NULL) == 'c'
2005 && BYTE (input, 10, NULL) == 'e') {
2006 SKIP_CHARS (a_this, 1);
2007 cr_tknzr_get_parsing_location
2008 (a_this, &location) ;
2009 SKIP_CHARS (a_this, 9);
2010 status = cr_token_set_font_face_sym (token);
2011 CHECK_PARSING_STATUS (status, TRUE);
2012 cr_parsing_location_copy (&token->location,
2013 &location) ;
2014 goto done;
2015 }
2017 if (BYTE (input, 2, NULL) == 'c'
2018 && BYTE (input, 3, NULL) == 'h'
2019 && BYTE (input, 4, NULL) == 'a'
2020 && BYTE (input, 5, NULL) == 'r'
2021 && BYTE (input, 6, NULL) == 's'
2022 && BYTE (input, 7, NULL) == 'e'
2023 && BYTE (input, 8, NULL) == 't') {
2024 SKIP_CHARS (a_this, 1);
2025 cr_tknzr_get_parsing_location
2026 (a_this, &location) ;
2027 SKIP_CHARS (a_this, 7);
2028 status = cr_token_set_charset_sym (token);
2029 CHECK_PARSING_STATUS (status, TRUE);
2030 cr_parsing_location_copy (&token->location,
2031 &location) ;
2032 goto done;
2033 }
2035 if (BYTE (input, 2, NULL) == 'i'
2036 && BYTE (input, 3, NULL) == 'm'
2037 && BYTE (input, 4, NULL) == 'p'
2038 && BYTE (input, 5, NULL) == 'o'
2039 && BYTE (input, 6, NULL) == 'r'
2040 && BYTE (input, 7, NULL) == 't') {
2041 SKIP_CHARS (a_this, 1);
2042 cr_tknzr_get_parsing_location
2043 (a_this, &location) ;
2044 SKIP_CHARS (a_this, 6);
2045 status = cr_token_set_import_sym (token);
2046 CHECK_PARSING_STATUS (status, TRUE);
2047 cr_parsing_location_copy (&token->location,
2048 &location) ;
2049 goto done;
2050 }
2052 if (BYTE (input, 2, NULL) == 'm'
2053 && BYTE (input, 3, NULL) == 'e'
2054 && BYTE (input, 4, NULL) == 'd'
2055 && BYTE (input, 5, NULL) == 'i'
2056 && BYTE (input, 6, NULL) == 'a') {
2057 SKIP_CHARS (a_this, 1);
2058 cr_tknzr_get_parsing_location (a_this,
2059 &location) ;
2060 SKIP_CHARS (a_this, 5);
2061 status = cr_token_set_media_sym (token);
2062 CHECK_PARSING_STATUS (status, TRUE);
2063 cr_parsing_location_copy (&token->location,
2064 &location) ;
2065 goto done;
2066 }
2068 if (BYTE (input, 2, NULL) == 'p'
2069 && BYTE (input, 3, NULL) == 'a'
2070 && BYTE (input, 4, NULL) == 'g'
2071 && BYTE (input, 5, NULL) == 'e') {
2072 SKIP_CHARS (a_this, 1);
2073 cr_tknzr_get_parsing_location (a_this,
2074 &location) ;
2075 SKIP_CHARS (a_this, 4);
2076 status = cr_token_set_page_sym (token);
2077 CHECK_PARSING_STATUS (status, TRUE);
2078 cr_parsing_location_copy (&token->location,
2079 &location) ;
2080 goto done;
2081 }
2082 status = cr_tknzr_parse_atkeyword (a_this, &str);
2083 if (status == CR_OK) {
2084 status = cr_token_set_atkeyword (token, str);
2085 CHECK_PARSING_STATUS (status, TRUE);
2086 if (str) {
2087 cr_parsing_location_copy (&token->location,
2088 &str->location) ;
2089 }
2090 goto done;
2091 }
2092 }
2093 break;
2095 case 'u':
2097 if (BYTE (input, 2, NULL) == 'r'
2098 && BYTE (input, 3, NULL) == 'l'
2099 && BYTE (input, 4, NULL) == '(') {
2100 CRString *str = NULL;
2102 status = cr_tknzr_parse_uri (a_this, &str);
2103 if (status == CR_OK) {
2104 status = cr_token_set_uri (token, str);
2105 CHECK_PARSING_STATUS (status, TRUE);
2106 if (str) {
2107 cr_parsing_location_copy (&token->location,
2108 &str->location) ;
2109 }
2110 goto done;
2111 }
2112 } else {
2113 status = cr_tknzr_parse_ident (a_this, &str);
2114 if (status == CR_OK && str) {
2115 status = cr_token_set_ident (token, str);
2116 CHECK_PARSING_STATUS (status, TRUE);
2117 if (str) {
2118 cr_parsing_location_copy (&token->location,
2119 &str->location) ;
2120 }
2121 goto done;
2122 }
2123 }
2124 break;
2126 case 'r':
2127 if (BYTE (input, 2, NULL) == 'g'
2128 && BYTE (input, 3, NULL) == 'b'
2129 && BYTE (input, 4, NULL) == '(') {
2130 status = cr_tknzr_parse_rgb (a_this, &rgb);
2131 if (status == CR_OK && rgb) {
2132 status = cr_token_set_rgb (token, rgb);
2133 CHECK_PARSING_STATUS (status, TRUE);
2134 if (rgb) {
2135 cr_parsing_location_copy (&token->location,
2136 &rgb->location) ;
2137 }
2138 rgb = NULL;
2139 goto done;
2140 }
2142 } else {
2143 status = cr_tknzr_parse_ident (a_this, &str);
2144 if (status == CR_OK) {
2145 status = cr_token_set_ident (token, str);
2146 CHECK_PARSING_STATUS (status, TRUE);
2147 if (str) {
2148 cr_parsing_location_copy (&token->location,
2149 &str->location) ;
2150 }
2151 str = NULL;
2152 goto done;
2153 }
2154 }
2155 break;
2157 case '<':
2158 if (BYTE (input, 2, NULL) == '-'
2159 && BYTE (input, 3, NULL) == '-') {
2160 SKIP_CHARS (a_this, 1);
2161 cr_tknzr_get_parsing_location (a_this,
2162 &location) ;
2163 SKIP_CHARS (a_this, 2);
2164 status = cr_token_set_cdo (token);
2165 CHECK_PARSING_STATUS (status, TRUE);
2166 cr_parsing_location_copy (&token->location,
2167 &location) ;
2168 goto done;
2169 }
2170 break;
2172 case '-':
2173 if (BYTE (input, 2, NULL) == '-'
2174 && BYTE (input, 3, NULL) == '>') {
2175 SKIP_CHARS (a_this, 1);
2176 cr_tknzr_get_parsing_location (a_this,
2177 &location) ;
2178 SKIP_CHARS (a_this, 2);
2179 status = cr_token_set_cdc (token);
2180 CHECK_PARSING_STATUS (status, TRUE);
2181 cr_parsing_location_copy (&token->location,
2182 &location) ;
2183 goto done;
2184 } else {
2185 status = cr_tknzr_parse_ident
2186 (a_this, &str);
2187 if (status == CR_OK) {
2188 cr_token_set_ident
2189 (token, str);
2190 if (str) {
2191 cr_parsing_location_copy (&token->location,
2192 &str->location) ;
2193 }
2194 goto done;
2195 }
2196 }
2197 break;
2199 case '~':
2200 if (BYTE (input, 2, NULL) == '=') {
2201 SKIP_CHARS (a_this, 1);
2202 cr_tknzr_get_parsing_location (a_this,
2203 &location) ;
2204 SKIP_CHARS (a_this, 1);
2205 status = cr_token_set_includes (token);
2206 CHECK_PARSING_STATUS (status, TRUE);
2207 cr_parsing_location_copy (&token->location,
2208 &location) ;
2209 goto done;
2210 }
2211 break;
2213 case '|':
2214 if (BYTE (input, 2, NULL) == '=') {
2215 SKIP_CHARS (a_this, 1);
2216 cr_tknzr_get_parsing_location (a_this,
2217 &location) ;
2218 SKIP_CHARS (a_this, 1);
2219 status = cr_token_set_dashmatch (token);
2220 CHECK_PARSING_STATUS (status, TRUE);
2221 cr_parsing_location_copy (&token->location,
2222 &location) ;
2223 goto done;
2224 }
2225 break;
2227 case '/':
2228 if (BYTE (input, 2, NULL) == '*') {
2229 status = cr_tknzr_parse_comment (a_this, &str);
2231 if (status == CR_OK) {
2232 status = cr_token_set_comment (token, str);
2233 str = NULL;
2234 CHECK_PARSING_STATUS (status, TRUE);
2235 if (str) {
2236 cr_parsing_location_copy (&token->location,
2237 &str->location) ;
2238 }
2239 goto done;
2240 }
2241 }
2242 break ;
2244 case ';':
2245 SKIP_CHARS (a_this, 1);
2246 cr_tknzr_get_parsing_location (a_this,
2247 &location) ;
2248 status = cr_token_set_semicolon (token);
2249 CHECK_PARSING_STATUS (status, TRUE);
2250 cr_parsing_location_copy (&token->location,
2251 &location) ;
2252 goto done;
2254 case '{':
2255 SKIP_CHARS (a_this, 1);
2256 cr_tknzr_get_parsing_location (a_this,
2257 &location) ;
2258 status = cr_token_set_cbo (token);
2259 CHECK_PARSING_STATUS (status, TRUE);
2260 cr_tknzr_get_parsing_location (a_this,
2261 &location) ;
2262 goto done;
2264 case '}':
2265 SKIP_CHARS (a_this, 1);
2266 cr_tknzr_get_parsing_location (a_this,
2267 &location) ;
2268 status = cr_token_set_cbc (token);
2269 CHECK_PARSING_STATUS (status, TRUE);
2270 cr_parsing_location_copy (&token->location,
2271 &location) ;
2272 goto done;
2274 case '(':
2275 SKIP_CHARS (a_this, 1);
2276 cr_tknzr_get_parsing_location (a_this,
2277 &location) ;
2278 status = cr_token_set_po (token);
2279 CHECK_PARSING_STATUS (status, TRUE);
2280 cr_parsing_location_copy (&token->location,
2281 &location) ;
2282 goto done;
2284 case ')':
2285 SKIP_CHARS (a_this, 1);
2286 cr_tknzr_get_parsing_location (a_this,
2287 &location) ;
2288 status = cr_token_set_pc (token);
2289 CHECK_PARSING_STATUS (status, TRUE);
2290 cr_parsing_location_copy (&token->location,
2291 &location) ;
2292 goto done;
2294 case '[':
2295 SKIP_CHARS (a_this, 1);
2296 cr_tknzr_get_parsing_location (a_this,
2297 &location) ;
2298 status = cr_token_set_bo (token);
2299 CHECK_PARSING_STATUS (status, TRUE);
2300 cr_parsing_location_copy (&token->location,
2301 &location) ;
2302 goto done;
2304 case ']':
2305 SKIP_CHARS (a_this, 1);
2306 cr_tknzr_get_parsing_location (a_this,
2307 &location) ;
2308 status = cr_token_set_bc (token);
2309 CHECK_PARSING_STATUS (status, TRUE);
2310 cr_parsing_location_copy (&token->location,
2311 &location) ;
2312 goto done;
2314 case ' ':
2315 case '\t':
2316 case '\n':
2317 case '\f':
2318 case '\r':
2319 {
2320 guchar *start = NULL,
2321 *end = NULL;
2323 status = cr_tknzr_parse_w (a_this, &start,
2324 &end, &location);
2325 if (status == CR_OK) {
2326 status = cr_token_set_s (token);
2327 CHECK_PARSING_STATUS (status, TRUE);
2328 cr_tknzr_get_parsing_location (a_this,
2329 &location) ;
2330 goto done;
2331 }
2332 }
2333 break;
2335 case '#':
2336 {
2337 status = cr_tknzr_parse_hash (a_this, &str);
2338 if (status == CR_OK && str) {
2339 status = cr_token_set_hash (token, str);
2340 CHECK_PARSING_STATUS (status, TRUE);
2341 if (str) {
2342 cr_parsing_location_copy (&token->location,
2343 &str->location) ;
2344 }
2345 str = NULL;
2346 goto done;
2347 }
2348 }
2349 break;
2351 case '\'':
2352 case '"':
2353 status = cr_tknzr_parse_string (a_this, &str);
2354 if (status == CR_OK && str) {
2355 status = cr_token_set_string (token, str);
2356 CHECK_PARSING_STATUS (status, TRUE);
2357 if (str) {
2358 cr_parsing_location_copy (&token->location,
2359 &str->location) ;
2360 }
2361 str = NULL;
2362 goto done;
2363 }
2364 break;
2366 case '!':
2367 status = cr_tknzr_parse_important (a_this, &location);
2368 if (status == CR_OK) {
2369 status = cr_token_set_important_sym (token);
2370 CHECK_PARSING_STATUS (status, TRUE);
2371 cr_parsing_location_copy (&token->location,
2372 &location) ;
2373 goto done;
2374 }
2375 break;
2377 case '0':
2378 case '1':
2379 case '2':
2380 case '3':
2381 case '4':
2382 case '5':
2383 case '6':
2384 case '7':
2385 case '8':
2386 case '9':
2387 case '.':
2388 {
2389 CRNum *num = NULL;
2391 status = cr_tknzr_parse_num (a_this, &num);
2392 if (status == CR_OK && num) {
2393 next_bytes[0] = BYTE (input, 1, NULL);
2394 next_bytes[1] = BYTE (input, 2, NULL);
2395 next_bytes[2] = BYTE (input, 3, NULL);
2396 next_bytes[3] = BYTE (input, 3, NULL);
2398 if (next_bytes[0] == 'e'
2399 && next_bytes[1] == 'm') {
2400 num->type = NUM_LENGTH_EM;
2401 status = cr_token_set_ems (token,
2402 num);
2403 num = NULL;
2404 SKIP_CHARS (a_this, 2);
2405 } else if (next_bytes[0] == 'e'
2406 && next_bytes[1] == 'x') {
2407 num->type = NUM_LENGTH_EX;
2408 status = cr_token_set_exs (token,
2409 num);
2410 num = NULL;
2411 SKIP_CHARS (a_this, 2);
2412 } else if (next_bytes[0] == 'p'
2413 && next_bytes[1] == 'x') {
2414 num->type = NUM_LENGTH_PX;
2415 status = cr_token_set_length
2416 (token, num, LENGTH_PX_ET);
2417 num = NULL;
2418 SKIP_CHARS (a_this, 2);
2419 } else if (next_bytes[0] == 'c'
2420 && next_bytes[1] == 'm') {
2421 num->type = NUM_LENGTH_CM;
2422 status = cr_token_set_length
2423 (token, num, LENGTH_CM_ET);
2424 num = NULL;
2425 SKIP_CHARS (a_this, 2);
2426 } else if (next_bytes[0] == 'm'
2427 && next_bytes[1] == 'm') {
2428 num->type = NUM_LENGTH_MM;
2429 status = cr_token_set_length
2430 (token, num, LENGTH_MM_ET);
2431 num = NULL;
2432 SKIP_CHARS (a_this, 2);
2433 } else if (next_bytes[0] == 'i'
2434 && next_bytes[1] == 'n') {
2435 num->type = NUM_LENGTH_IN;
2436 status = cr_token_set_length
2437 (token, num, LENGTH_IN_ET);
2438 num = NULL;
2439 SKIP_CHARS (a_this, 2);
2440 } else if (next_bytes[0] == 'p'
2441 && next_bytes[1] == 't') {
2442 num->type = NUM_LENGTH_PT;
2443 status = cr_token_set_length
2444 (token, num, LENGTH_PT_ET);
2445 num = NULL;
2446 SKIP_CHARS (a_this, 2);
2447 } else if (next_bytes[0] == 'p'
2448 && next_bytes[1] == 'c') {
2449 num->type = NUM_LENGTH_PC;
2450 status = cr_token_set_length
2451 (token, num, LENGTH_PC_ET);
2452 num = NULL;
2453 SKIP_CHARS (a_this, 2);
2454 } else if (next_bytes[0] == 'd'
2455 && next_bytes[1] == 'e'
2456 && next_bytes[2] == 'g') {
2457 num->type = NUM_ANGLE_DEG;
2458 status = cr_token_set_angle
2459 (token, num, ANGLE_DEG_ET);
2460 num = NULL;
2461 SKIP_CHARS (a_this, 3);
2462 } else if (next_bytes[0] == 'r'
2463 && next_bytes[1] == 'a'
2464 && next_bytes[2] == 'd') {
2465 num->type = NUM_ANGLE_RAD;
2466 status = cr_token_set_angle
2467 (token, num, ANGLE_RAD_ET);
2468 num = NULL;
2469 SKIP_CHARS (a_this, 3);
2470 } else if (next_bytes[0] == 'g'
2471 && next_bytes[1] == 'r'
2472 && next_bytes[2] == 'a'
2473 && next_bytes[3] == 'd') {
2474 num->type = NUM_ANGLE_GRAD;
2475 status = cr_token_set_angle
2476 (token, num, ANGLE_GRAD_ET);
2477 num = NULL;
2478 SKIP_CHARS (a_this, 4);
2479 } else if (next_bytes[0] == 'm'
2480 && next_bytes[1] == 's') {
2481 num->type = NUM_TIME_MS;
2482 status = cr_token_set_time
2483 (token, num, TIME_MS_ET);
2484 num = NULL;
2485 SKIP_CHARS (a_this, 2);
2486 } else if (next_bytes[0] == 's') {
2487 num->type = NUM_TIME_S;
2488 status = cr_token_set_time
2489 (token, num, TIME_S_ET);
2490 num = NULL;
2491 SKIP_CHARS (a_this, 1);
2492 } else if (next_bytes[0] == 'H'
2493 && next_bytes[1] == 'z') {
2494 num->type = NUM_FREQ_HZ;
2495 status = cr_token_set_freq
2496 (token, num, FREQ_HZ_ET);
2497 num = NULL;
2498 SKIP_CHARS (a_this, 2);
2499 } else if (next_bytes[0] == 'k'
2500 && next_bytes[1] == 'H'
2501 && next_bytes[2] == 'z') {
2502 num->type = NUM_FREQ_KHZ;
2503 status = cr_token_set_freq
2504 (token, num, FREQ_KHZ_ET);
2505 num = NULL;
2506 SKIP_CHARS (a_this, 3);
2507 } else if (next_bytes[0] == '%') {
2508 num->type = NUM_PERCENTAGE;
2509 status = cr_token_set_percentage
2510 (token, num);
2511 num = NULL;
2512 SKIP_CHARS (a_this, 1);
2513 } else {
2514 status = cr_tknzr_parse_ident (a_this,
2515 &str);
2516 if (status == CR_OK && str) {
2517 num->type = NUM_UNKNOWN_TYPE;
2518 status = cr_token_set_dimen
2519 (token, num, str);
2520 num = NULL;
2521 CHECK_PARSING_STATUS (status,
2522 TRUE);
2523 str = NULL;
2524 } else {
2525 status = cr_token_set_number
2526 (token, num);
2527 num = NULL;
2528 CHECK_PARSING_STATUS (status, CR_OK);
2529 str = NULL;
2530 }
2531 }
2532 if (token && token->u.num) {
2533 cr_parsing_location_copy (&token->location,
2534 &token->u.num->location) ;
2535 } else {
2536 status = CR_ERROR ;
2537 }
2538 goto done ;
2539 }
2540 }
2541 break;
2543 default:
2544 /*process the fallback cases here */
2546 if (next_char == '\\'
2547 || (cr_utils_is_nonascii (next_bytes[0]) == TRUE)
2548 || ((next_char >= 'a') && (next_char <= 'z'))
2549 || ((next_char >= 'A') && (next_char <= 'Z'))) {
2550 status = cr_tknzr_parse_ident (a_this, &str);
2551 if (status == CR_OK && str) {
2552 guint32 next_c = 0;
2554 status = cr_input_peek_char
2555 (PRIVATE (a_this)->input, &next_c);
2557 if (status == CR_OK && next_c == '(') {
2559 SKIP_CHARS (a_this, 1);
2560 status = cr_token_set_function
2561 (token, str);
2562 CHECK_PARSING_STATUS (status, TRUE);
2563 /*ownership is transfered
2564 *to token by cr_token_set_function.
2565 */
2566 if (str) {
2567 cr_parsing_location_copy (&token->location,
2568 &str->location) ;
2569 }
2570 str = NULL;
2571 } else {
2572 status = cr_token_set_ident (token,
2573 str);
2574 CHECK_PARSING_STATUS (status, TRUE);
2575 if (str) {
2576 cr_parsing_location_copy (&token->location,
2577 &str->location) ;
2578 }
2579 str = NULL;
2580 }
2581 goto done;
2582 } else {
2583 if (str) {
2584 cr_string_destroy (str);
2585 str = NULL;
2586 }
2587 }
2588 }
2589 break;
2590 }
2592 READ_NEXT_CHAR (a_this, &next_char);
2593 cr_tknzr_get_parsing_location (a_this,
2594 &location) ;
2595 status = cr_token_set_delim (token, next_char);
2596 CHECK_PARSING_STATUS (status, TRUE);
2597 cr_parsing_location_copy (&token->location,
2598 &location) ;
2599 done:
2601 if (status == CR_OK && token) {
2602 *a_tk = token;
2603 /*
2604 *store the previous position input stream pos.
2605 */
2606 memmove (&PRIVATE (a_this)->prev_pos,
2607 &init_pos, sizeof (CRInputPos));
2608 return CR_OK;
2609 }
2611 error:
2612 if (token) {
2613 cr_token_destroy (token);
2614 token = NULL;
2615 }
2617 if (str) {
2618 cr_string_destroy (str);
2619 str = NULL;
2620 }
2621 cr_tknzr_set_cur_pos (a_this, &init_pos);
2622 return status;
2624 }
2626 enum CRStatus
2627 cr_tknzr_parse_token (CRTknzr * a_this, enum CRTokenType a_type,
2628 enum CRTokenExtraType a_et, gpointer a_res,
2629 gpointer a_extra_res)
2630 {
2631 enum CRStatus status = CR_OK;
2632 CRToken *token = NULL;
2634 g_return_val_if_fail (a_this && PRIVATE (a_this)
2635 && PRIVATE (a_this)->input
2636 && a_res, CR_BAD_PARAM_ERROR);
2638 status = cr_tknzr_get_next_token (a_this, &token);
2639 if (status != CR_OK)
2640 return status;
2641 if (token == NULL)
2642 return CR_PARSING_ERROR;
2644 if (token->type == a_type) {
2645 switch (a_type) {
2646 case NO_TK:
2647 case S_TK:
2648 case CDO_TK:
2649 case CDC_TK:
2650 case INCLUDES_TK:
2651 case DASHMATCH_TK:
2652 case IMPORT_SYM_TK:
2653 case PAGE_SYM_TK:
2654 case MEDIA_SYM_TK:
2655 case FONT_FACE_SYM_TK:
2656 case CHARSET_SYM_TK:
2657 case IMPORTANT_SYM_TK:
2658 status = CR_OK;
2659 break;
2661 case STRING_TK:
2662 case IDENT_TK:
2663 case HASH_TK:
2664 case ATKEYWORD_TK:
2665 case FUNCTION_TK:
2666 case COMMENT_TK:
2667 case URI_TK:
2668 *((CRString **) a_res) = token->u.str;
2669 token->u.str = NULL;
2670 status = CR_OK;
2671 break;
2673 case EMS_TK:
2674 case EXS_TK:
2675 case PERCENTAGE_TK:
2676 case NUMBER_TK:
2677 *((CRNum **) a_res) = token->u.num;
2678 token->u.num = NULL;
2679 status = CR_OK;
2680 break;
2682 case LENGTH_TK:
2683 case ANGLE_TK:
2684 case TIME_TK:
2685 case FREQ_TK:
2686 if (token->extra_type == a_et) {
2687 *((CRNum **) a_res) = token->u.num;
2688 token->u.num = NULL;
2689 status = CR_OK;
2690 }
2691 break;
2693 case DIMEN_TK:
2694 *((CRNum **) a_res) = token->u.num;
2695 if (a_extra_res == NULL) {
2696 status = CR_BAD_PARAM_ERROR;
2697 goto error;
2698 }
2700 *((CRString **) a_extra_res) = token->dimen;
2701 token->u.num = NULL;
2702 token->dimen = NULL;
2703 status = CR_OK;
2704 break;
2706 case DELIM_TK:
2707 *((guint32 *) a_res) = token->u.unichar;
2708 status = CR_OK;
2709 break;
2711 case UNICODERANGE_TK:
2712 default:
2713 status = CR_PARSING_ERROR;
2714 break;
2715 }
2717 cr_token_destroy (token);
2718 token = NULL;
2719 } else {
2720 cr_tknzr_unget_token (a_this, token);
2721 token = NULL;
2722 status = CR_PARSING_ERROR;
2723 }
2725 return status;
2727 error:
2729 if (token) {
2730 cr_tknzr_unget_token (a_this, token);
2731 token = NULL;
2732 }
2734 return status;
2735 }
2737 void
2738 cr_tknzr_destroy (CRTknzr * a_this)
2739 {
2740 g_return_if_fail (a_this);
2742 if (PRIVATE (a_this) && PRIVATE (a_this)->input) {
2743 if (cr_input_unref (PRIVATE (a_this)->input)
2744 == TRUE) {
2745 PRIVATE (a_this)->input = NULL;
2746 }
2747 }
2749 if (PRIVATE (a_this)->token_cache) {
2750 cr_token_destroy (PRIVATE (a_this)->token_cache);
2751 PRIVATE (a_this)->token_cache = NULL;
2752 }
2754 if (PRIVATE (a_this)) {
2755 g_free (PRIVATE (a_this));
2756 PRIVATE (a_this) = NULL;
2757 }
2759 g_free (a_this);
2760 }