1 /*
2 * lm2latex - src/l2l_scanner.l
3 * Copyright (C) 2010 Sebastian Harl <sh@tokkee.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
28 /*
29 * A Linux-Magazin markup to LaTeX converter -- LM markup scanner.
30 */
32 %{
33 #if HAVE_CONFIG_H
34 # include "config.h"
35 #endif
37 #include <assert.h>
39 #include <ctype.h>
41 #include <stdarg.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
46 typedef struct {
47 char *data;
48 size_t size;
49 size_t len;
50 } l2l_strbuf_t;
52 #define L2L_STRBUF_INIT { NULL, 0, 0 }
53 #define L2L_STRBUF_FREE(buf) ((buf)->size - (buf)->len)
54 #define L2L_STRBUF_END(buf) ((buf)->data + (buf)->len)
55 #define L2L_STRBUF_CLEAR(buf) ((buf)->len = 0)
57 static int
58 l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...);
60 static void
61 l2l_strbuf_chomp(l2l_strbuf_t *buf);
63 static void
64 l2l_strbuf_destroy(l2l_strbuf_t *buf);
66 #define L2L_YY_APPEND_OR_ECHO(strbuf, ...) \
67 do { \
68 if ((strbuf)) { \
69 if (l2l_strbuf_append((strbuf), __VA_ARGS__)) \
70 YY_FATAL_ERROR("ERROR: internal error"); \
71 } \
72 else \
73 fprintf(yyout, __VA_ARGS__); \
74 } while (0)
76 #define L2L_STATE_HEADER_OR_ABORT \
77 do { \
78 if (l2l_scanner_state != L2L_HEADER) \
79 YY_FATAL_ERROR("ERROR: unexpected header definition"); \
80 } while (0)
82 #define L2L_PATTERN_IS(str) (! strncmp(yytext, (str), strlen(str)))
83 #define L2L_PATTERN_IS_HEADER \
84 (L2L_PATTERN_IS("@R:") || L2L_PATTERN_IS("@SW:") \
85 || L2L_PATTERN_IS("@D:") || L2L_PATTERN_IS("@T:") \
86 || L2L_PATTERN_IS("@A:") || L2L_PATTERN_IS("@V:"))
87 %}
89 %option yylineno
90 %option noyywrap
92 /* optimize for "fast scanning"; create an 8-bit scanner */
93 %option fast 8bit
95 /* generate a reentrant scanner in its own namespace */
96 %option reentrant
97 %option prefix="l2l_yy" outfile="lex.yy.c"
99 %option verbose warn
101 %s box
103 %x caption box_caption
104 %x command
105 %x headertxt
106 %x itemize
107 %x listing
108 %x title
109 %x url
111 COMMENT @#:.*
112 WHITESPACE [ \t]
114 ANY_COMMAND @(R|SW|D|T|V|A|L|ZT|LI|B|Bi|IT|IL|IE|KT|KL|KE|TT|TH|TL|TE):
116 %%
118 %{
119 enum {
120 L2L_HEADER,
121 L2L_BODY,
122 L2L_FIGURE,
124 L2L_UNQUOTED_TEXT,
125 L2L_QUOTED_TEXT,
126 };
128 int l2l_scanner_state = L2L_HEADER;
129 int l2l_quote_state = L2L_UNQUOTED_TEXT;
131 /* text formatting */
132 _Bool l2l_italic = 0;
133 _Bool l2l_bold = 0;
134 _Bool l2l_code = 0;
135 _Bool l2l_sup = 0;
136 _Bool l2l_sub = 0;
137 _Bool l2l_li_italic = 0;
139 /* title page information */
140 l2l_strbuf_t l2l_category = L2L_STRBUF_INIT;
141 l2l_strbuf_t l2l_keyword = L2L_STRBUF_INIT;
142 l2l_strbuf_t l2l_title = L2L_STRBUF_INIT;
143 l2l_strbuf_t l2l_headline = L2L_STRBUF_INIT;
144 l2l_strbuf_t l2l_author = L2L_STRBUF_INIT;
145 l2l_strbuf_t l2l_abstract = L2L_STRBUF_INIT;
147 /* temp. buffers */
148 l2l_strbuf_t l2l_caption = L2L_STRBUF_INIT;
149 l2l_strbuf_t l2l_ititle = L2L_STRBUF_INIT;
151 l2l_strbuf_t *l2l_buf_ptr = NULL;
153 int l2l_last_yystart = INITIAL;
154 %}
156 {COMMENT} { fprintf(yyout, "%%%s", yytext + strlen("@#:")); }
158 <INITIAL,box,caption,box_caption,command,headertxt,itemize,title>{
159 /* LaTeX reserved characters */
160 [#$%&_{}] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s", yytext); }
161 [\^~] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s{}", yytext); }
162 [\\] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "$\\backslash$"); }
164 /* special characters */
165 /* '---' and '--' are handled by LaTeX */
166 \*\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "~");
168 :\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\,");
170 ["] {
171 if (l2l_quote_state == L2L_UNQUOTED_TEXT) {
172 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"`");
173 l2l_quote_state = L2L_QUOTED_TEXT;
174 }
175 else {
176 assert(l2l_quote_state == L2L_QUOTED_TEXT);
177 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"'");
178 l2l_quote_state = L2L_UNQUOTED_TEXT;
179 }
180 }
182 /* text formatting */
183 \<[iI]> {
184 if (l2l_italic) {
185 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
186 l2l_italic = 0;
187 }
188 else {
189 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\itshape ");
190 l2l_italic = 1;
191 }
192 }
194 \<[bB]> {
195 if (l2l_bold) {
196 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
197 l2l_bold = 0;
198 }
199 else {
200 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\bfseries ");
201 l2l_bold = 1;
202 }
203 }
205 \<[cC]> {
206 if (l2l_code) {
207 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
208 l2l_code = 0;
209 }
210 else {
211 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\ttfamily ");
212 l2l_code = 1;
213 }
214 }
216 \<[\+]> {
217 if (l2l_sup) {
218 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}");
219 l2l_sup = 0;
220 }
221 else {
222 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
223 "\\ensuremath{^{\rmfamily ");
224 l2l_sup = 1;
225 }
226 }
228 \<[\-]> {
229 if (l2l_sub) {
230 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}");
231 l2l_sub = 0;
232 }
233 else {
234 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
235 "\\ensuremath{_{\rmfamily ");
236 l2l_sub = 1;
237 }
238 }
240 \[[0-9]+\] {
241 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
242 "{\\bfseries\\textcolor{DarkBlue}{%s}}", yytext);
243 }
244 }
246 \<[uU]> {
247 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\url{");
248 BEGIN(url);
249 }
251 <url>\<[uU]> {
252 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
253 BEGIN(l2l_last_yystart);
254 }
256 <url>.|\n {
257 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "%s", yytext);
258 }
260 (Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+ {
261 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\textcolor{DarkBlue}{%s}", yytext);
262 }
264 /*
265 * header
266 */
268 @R:{WHITESPACE}* {
269 L2L_STATE_HEADER_OR_ABORT;
270 l2l_buf_ptr = &l2l_category;
271 BEGIN(headertxt);
272 }
274 @SW:{WHITESPACE}* {
275 L2L_STATE_HEADER_OR_ABORT;
276 l2l_buf_ptr = &l2l_keyword;
277 BEGIN(headertxt);
278 }
280 @D:{WHITESPACE}* {
281 L2L_STATE_HEADER_OR_ABORT;
282 l2l_buf_ptr = &l2l_title;
283 BEGIN(headertxt);
284 }
286 @T:{WHITESPACE}* {
287 L2L_STATE_HEADER_OR_ABORT;
288 l2l_buf_ptr = &l2l_headline;
289 BEGIN(headertxt);
290 }
292 @A:{WHITESPACE}* {
293 L2L_STATE_HEADER_OR_ABORT;
294 l2l_buf_ptr = &l2l_author;
295 BEGIN(headertxt);
296 }
298 @V:{WHITESPACE}* {
299 L2L_STATE_HEADER_OR_ABORT;
300 l2l_buf_ptr = &l2l_abstract;
301 BEGIN(headertxt);
302 }
304 <headertxt>{ANY_COMMAND} {
305 if (l2l_buf_ptr) {
306 l2l_strbuf_chomp(l2l_buf_ptr);
307 l2l_buf_ptr = NULL;
308 }
310 if (! L2L_PATTERN_IS_HEADER) {
311 assert(l2l_scanner_state == L2L_HEADER);
313 if (l2l_category.len || l2l_keyword.len
314 || l2l_title.len || l2l_headline.len) {
315 fprintf(yyout, "\\title{");
317 if (l2l_category.len || l2l_keyword.len) {
318 fprintf(yyout, "{\\scriptsize [");
319 if (l2l_category.len) {
320 fprintf(yyout, "%s", l2l_category.data);
321 if (l2l_keyword.len)
322 fprintf(yyout, " $\\rightarrow$ ");
323 }
324 if (l2l_keyword.len)
325 fprintf(yyout, "%s", l2l_keyword.data);
326 fprintf(yyout, "]}\\\\\n\\vspace{5mm}\n");
327 }
329 if (l2l_headline.len) {
330 if (l2l_title.len)
331 fprintf(yyout, "\\normalsize %s\\\\\n\\LARGE ",
332 l2l_title.data);
333 fprintf(yyout, "%s", l2l_headline.data);
334 }
335 else if (l2l_title.len)
336 fprintf(yyout, "%s", l2l_title.data);
338 fprintf(yyout, "}\n");
339 }
341 if (l2l_author.len)
342 fprintf(yyout, "\\author{%s}\n", l2l_author.data);
343 fprintf(yyout, "\\date{\\today}\n\n");
345 fprintf(yyout, "\\begin{document}\n\n");
346 l2l_scanner_state = L2L_BODY;
348 fprintf(yyout, "\\twocolumn[\n"
349 "\\begin{@twocolumnfalse}\n"
350 "\\maketitle\n\n");
352 if (l2l_abstract.len)
353 fprintf(yyout, "\\begin{abstract}\n%s\n"
354 "\\end{abstract}\n\n", l2l_abstract.data);
356 fprintf(yyout, "\\rule{\\textwidth}{1pt}\n"
357 "\\begin{center}\n"
358 "\\begin{minipage}{.75\\textwidth}\n"
359 "\\tableofcontents\n"
360 "\\end{minipage}\n"
361 "\\end{center}\n"
362 "\\rule{\\textwidth}{1pt}\n"
363 "\\end{@twocolumnfalse}\n]\n\n");
364 }
366 yyless(0);
367 BEGIN(l2l_last_yystart);
368 }
370 <headertxt>{COMMENT} { /* ignore */ }
372 <headertxt>.|\n {
373 if (l2l_buf_ptr)
374 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
375 YY_FATAL_ERROR("ERROR: internal error");
376 /* else ignore */
377 }
379 /*
380 * body
381 */
383 \n\n{WHITESPACE}*\*{WHITESPACE}* {
384 fprintf(yyout, "\n\n\\begin{itemize}\n\\item ");
385 BEGIN(itemize);
386 }
388 <itemize>({WHITESPACE}|\n)*\n\n |
389 <itemize>({WHITESPACE}|\n)*{ANY_COMMAND} {
390 fprintf(yyout, "\n\\end{itemize}");
391 yyless(0);
392 BEGIN(l2l_last_yystart);
393 }
395 /* this will produce a longer match and, thus, take precedence over
396 * the pattern above matching an empty line after the itemize */
397 <itemize>({WHITESPACE}|\n)*\n{WHITESPACE}*\*{WHITESPACE}* {
398 fprintf(yyout, "\n\\item ");
399 }
400 <itemize>({WHITESPACE}|\n)*\n\n({WHITESPACE}{2,}|\t) ECHO;
402 <itemize>. ECHO;
404 @ZT:{WHITESPACE}* {
405 fprintf(yyout, "\\subsection{");
406 BEGIN(command);
407 }
409 <command>({WHITESPACE}|\n)+{ANY_COMMAND} {
410 fprintf(yyout, "}");
411 yyless(0);
412 BEGIN(l2l_last_yystart);
413 }
415 <command>. ECHO;
417 @LI:{WHITESPACE}*\n? {
418 /* \xb7 = MIDDLE DOT (iso-8859-1 and iso-8859-15) */
419 fprintf(yyout, "\\begin{lstlisting}[escapechar=\xb7]\n");
420 BEGIN(listing);
421 }
423 <listing>\n*{ANY_COMMAND} {
424 fprintf(yyout, "\n\\end{lstlisting}");
425 yyless(0);
426 BEGIN(l2l_last_yystart);
427 }
429 <listing>\<§§I> {
430 if (l2l_li_italic) {
431 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7");
432 l2l_li_italic = 0;
433 }
434 else {
435 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7\\itshape ");
436 l2l_li_italic = 1;
437 }
438 }
440 <listing>§§\n /* ignore => lstlisting will insert appropriate arrows */;
442 <listing>. ECHO;
444 /*
445 * images/boxes
446 */
448 @(B|KT):{WHITESPACE}*((Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+:)? {
449 _Bool is_image = 0;
450 _Bool is_box = 0;
452 char *label = NULL;
454 if (! strncmp(yytext, "@B:", 3))
455 is_image = 1;
456 else if (! strncmp(yytext, "@KT:", 4))
457 is_box = 1;
459 if (l2l_scanner_state != L2L_FIGURE) {
460 if (is_box)
461 fprintf(yyout, "\\begin{l2lbox}\n");
462 else
463 fprintf(yyout, "\\begin{figure}\n");
464 l2l_scanner_state = L2L_FIGURE;
465 }
466 else {
467 l2l_scanner_state = L2L_BODY;
468 }
470 label = strstr(yytext, "Abbildung");
471 if (! label)
472 label = strstr(yytext, "Listing");
474 if (label)
475 if (l2l_strbuf_append(&l2l_caption, "{\\bfseries %s}", label))
476 YY_FATAL_ERROR("ERROR: internal error");
478 l2l_buf_ptr = &l2l_caption;
479 if (is_image)
480 BEGIN(caption);
481 else if (is_box)
482 BEGIN(box_caption);
483 else
484 assert(0);
485 }
487 @Bi:.*\n {
488 char *filename;
490 yytext[yyleng - 1] = '\0';
492 filename = yytext + strlen("@Bi:");
493 while (isspace((int)*filename))
494 ++filename;
496 if (l2l_scanner_state != L2L_FIGURE) {
497 fprintf(yyout, "\\begin{figure}\n");
498 l2l_scanner_state = L2L_FIGURE;
499 }
500 else {
501 l2l_scanner_state = L2L_BODY;
502 }
504 fprintf(yyout, "\\includegraphics[width=\\columnwidth]{%s}\n", filename);
506 if (l2l_scanner_state == L2L_BODY)
507 fprintf(yyout, "\\end{figure}\n");
508 }
510 @KL: {
511 if (l2l_scanner_state != L2L_FIGURE) {
512 fprintf(yyout, "\\begin{l2lbox}\n");
513 l2l_scanner_state = L2L_FIGURE;
514 }
515 else {
516 l2l_scanner_state = L2L_BODY;
517 }
519 l2l_last_yystart = box;
520 BEGIN(box);
521 }
523 <box>@(KE|L):{WHITESPACE}* {
524 fprintf(yyout, "\\end{l2lbox}");
525 l2l_scanner_state = L2L_BODY;
526 l2l_last_yystart = INITIAL;
527 BEGIN(l2l_last_yystart);
528 }
530 <caption,box_caption>{ANY_COMMAND} {
531 if (l2l_buf_ptr) {
532 l2l_strbuf_chomp(l2l_buf_ptr);
533 l2l_buf_ptr = NULL;
534 }
536 if (l2l_caption.len) {
537 fprintf(yyout, "\\caption*{%s}\n", l2l_caption.data);
538 L2L_STRBUF_CLEAR(&l2l_caption);
539 }
541 if ((l2l_scanner_state == L2L_BODY) && (YY_START != box_caption))
542 fprintf(yyout, "\\end{figure}\n\n");
544 yyless(0);
545 if (YY_START == caption)
546 BEGIN(l2l_last_yystart);
547 else if (YY_START == box_caption) {
548 l2l_last_yystart = box;
549 BEGIN(box);
550 }
551 else
552 assert(0);
553 }
555 <caption,box_caption>{COMMENT} { /* ignore */ }
557 <caption,box_caption>.|\n {
558 if (l2l_buf_ptr)
559 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
560 YY_FATAL_ERROR("ERROR: internal error");
561 /* else ignore */
562 }
564 /*
565 * infos
566 */
568 @IT:{WHITESPACE}* {
569 l2l_buf_ptr = &l2l_ititle;
570 BEGIN(title);
571 }
573 <title>{ANY_COMMAND} {
574 if (l2l_buf_ptr) {
575 l2l_strbuf_chomp(l2l_buf_ptr);
576 l2l_buf_ptr = NULL;
577 }
579 if (l2l_ititle.len) {
580 fprintf(yyout, "\\subsection*{\\rule{\\columnwidth}{1pt}"
581 "\\newline %s}\n", l2l_ititle.data);
582 L2L_STRBUF_CLEAR(&l2l_ititle);
583 }
585 yyless(0);
586 BEGIN(l2l_last_yystart);
587 }
589 <title>{COMMENT} { /* ignore */ }
591 <title>.|\n {
592 if (l2l_buf_ptr)
593 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
594 YY_FATAL_ERROR("ERROR: internal error");
595 /* else ignore */
596 }
598 @IL: { /* ignore */ }
600 @IE: { /* ignore */ }
602 /*
603 * tables
604 */
606 @TT: /* TODO */;
608 @TH: /* TODO */;
610 @TL: /* TODO */;
612 @TE: /* TODO */;
614 /* let all other rules that match "@L:" have a higher preceedence */
615 @L:{WHITESPACE}* { /* nothing to do */ }
617 <<EOF>> {
618 if (l2l_scanner_state == L2L_BODY) {
619 fprintf(yyout, "\\end{document}\n");
620 }
621 else
622 YY_FATAL_ERROR("ERROR: incomplete document");
624 l2l_strbuf_destroy(&l2l_category);
625 l2l_strbuf_destroy(&l2l_keyword);
626 l2l_strbuf_destroy(&l2l_title);
627 l2l_strbuf_destroy(&l2l_headline);
628 l2l_strbuf_destroy(&l2l_author);
629 l2l_strbuf_destroy(&l2l_abstract);
631 l2l_strbuf_destroy(&l2l_caption);
632 l2l_strbuf_destroy(&l2l_title);
634 yyterminate();
635 }
637 %%
639 static int
640 l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...)
641 {
642 size_t str_len;
643 va_list ap;
645 int status;
647 if ((! buf) || (! fmt))
648 return 0;
650 va_start(ap, fmt);
651 status = vsnprintf(NULL, 0, fmt, ap);
652 va_end(ap);
654 if (status <= 0)
655 return -1;
657 str_len = (size_t)status;
659 if (str_len > L2L_STRBUF_FREE(buf)) {
660 char *new_data;
661 size_t new_size = 2 * buf->size;
663 if (str_len > buf->size)
664 new_size += str_len;
666 new_data = realloc(buf->data, new_size + 1);
667 if (! new_data) {
668 l2l_strbuf_destroy(buf);
669 return -1;
670 }
672 buf->data = new_data;
673 buf->size = new_size;
674 }
675 assert(buf->len + str_len <= buf->size);
677 va_start(ap, fmt);
678 status = vsnprintf(L2L_STRBUF_END(buf), L2L_STRBUF_FREE(buf) + 1, fmt, ap);
679 va_end(ap);
681 if (status <= 0)
682 return -1;
684 assert((size_t)status == str_len);
685 buf->len += str_len;
686 buf->data[buf->len] = '\0';
687 return 0;
688 } /* l2l_strbuf_append */
690 static void
691 l2l_strbuf_chomp(l2l_strbuf_t *buf)
692 {
693 if (! buf)
694 return;
696 while (buf->len && isspace((int)buf->data[buf->len - 1])) {
697 buf->data[buf->len - 1] = '\0';
698 --buf->len;
699 }
700 } /* l2l_strbuf_chomp */
702 static void
703 l2l_strbuf_destroy(l2l_strbuf_t *buf)
704 {
705 if (! buf)
706 return;
708 if (buf->data)
709 free(buf->data);
710 memset(buf, 0, sizeof(*buf));
711 } /* l2l_strbuf_destroy */
713 /* vim: set tw=78 sw=4 ts=4 noexpandtab : */