Code

3cf3c8ed0c6157011d7fc5b9acb809ea29480672
[lm2latex.git] / src / l2l_scanner.l
1 /*
2  * lm2latex - src/l2l_scanner.l
3  * Copyright (C) 2010 Sebastian Harl <sh@tokkee.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
28 /*
29  * A Linux-Magazin markup to LaTeX converter -- LM markup scanner.
30  */
32 %{
33 #if HAVE_CONFIG_H
34 #       include "config.h"
35 #endif
37 #include <assert.h>
39 #include <ctype.h>
41 #include <stdarg.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
46 typedef struct {
47         char  *data;
48         size_t size;
49         size_t len;
50 } l2l_strbuf_t;
52 #define L2L_STRBUF_INIT { NULL, 0, 0 }
53 #define L2L_STRBUF_FREE(buf) ((buf)->size - (buf)->len)
54 #define L2L_STRBUF_END(buf) ((buf)->data + (buf)->len)
55 #define L2L_STRBUF_CLEAR(buf) ((buf)->len = 0)
57 static int
58 l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...);
60 static void
61 l2l_strbuf_chomp(l2l_strbuf_t *buf);
63 static void
64 l2l_strbuf_destroy(l2l_strbuf_t *buf);
66 #define L2L_YY_APPEND_OR_ECHO(strbuf, ...) \
67         do { \
68                 if ((strbuf)) { \
69                         if (l2l_strbuf_append((strbuf), __VA_ARGS__)) \
70                                 YY_FATAL_ERROR("ERROR: internal error"); \
71                 } \
72                 else \
73                         fprintf(yyout, __VA_ARGS__); \
74         } while (0)
76 #define L2L_STATE_HEADER_OR_ABORT \
77         do { \
78                 if (l2l_scanner_state != L2L_HEADER) \
79                         YY_FATAL_ERROR("ERROR: unexpected header definition"); \
80         } while (0)
82 #define L2L_PATTERN_IS(str) (! strncmp(yytext, (str), strlen(str)))
83 #define L2L_PATTERN_IS_HEADER \
84         (L2L_PATTERN_IS("@R:") || L2L_PATTERN_IS("@SW:") \
85                 || L2L_PATTERN_IS("@D:") || L2L_PATTERN_IS("@T:") \
86                 || L2L_PATTERN_IS("@A:") || L2L_PATTERN_IS("@V:"))
87 %}
89 %option yylineno
90 %option noyywrap
92 /* optimize for "fast scanning"; create an 8-bit scanner */
93 %option fast 8bit
95 /* generate a reentrant scanner in its own namespace */
96 %option reentrant
97 %option prefix="l2l_yy" outfile="lex.yy.c"
99 %option verbose warn
101 %s box
103 %x caption box_caption
104 %x command
105 %x headertxt
106 %x itemize
107 %x listing
108 %x title
109 %x url
111 COMMENT @#:.*
112 WHITESPACE [ \t]
114 ANY_COMMAND @(R|SW|D|T|V|A|L|ZT|LI|B|Bi|IT|IL|IE|KT|KL|KE|TT|TH|TL|TE):
116 %%
118 %{
119         enum {
120                 L2L_HEADER,
121                 L2L_BODY,
122                 L2L_FIGURE,
124                 L2L_UNQUOTED_TEXT,
125                 L2L_QUOTED_TEXT,
126         };
128         int l2l_scanner_state = L2L_HEADER;
129         int l2l_quote_state   = L2L_UNQUOTED_TEXT;
131         /* text formatting */
132         _Bool l2l_italic      = 0;
133         _Bool l2l_bold        = 0;
134         _Bool l2l_code        = 0;
135         _Bool l2l_sup         = 0;
136         _Bool l2l_sub         = 0;
137         _Bool l2l_li_italic   = 0;
139         /* title page information */
140         l2l_strbuf_t l2l_category = L2L_STRBUF_INIT;
141         l2l_strbuf_t l2l_keyword  = L2L_STRBUF_INIT;
142         l2l_strbuf_t l2l_title    = L2L_STRBUF_INIT;
143         l2l_strbuf_t l2l_headline = L2L_STRBUF_INIT;
144         l2l_strbuf_t l2l_author   = L2L_STRBUF_INIT;
145         l2l_strbuf_t l2l_abstract = L2L_STRBUF_INIT;
147         /* temp. buffers */
148         l2l_strbuf_t l2l_caption  = L2L_STRBUF_INIT;
149         l2l_strbuf_t l2l_ititle   = L2L_STRBUF_INIT;
151         l2l_strbuf_t *l2l_buf_ptr = NULL;
153         int l2l_last_yystart = INITIAL;
154 %}
156 {COMMENT} { fprintf(yyout, "%%%s", yytext + strlen("@#:")); }
158 <INITIAL,box,caption,box_caption,command,headertxt,itemize,title>{
159         /* LaTeX reserved characters */
160         [#$%&_{}] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s", yytext); }
161         [\^~]     { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s{}", yytext); }
162         [\\]      { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "$\\backslash$"); }
164         /* special characters */
165         /* '---' and '--' are handled by LaTeX */
166         \*\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "~");
168         :\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\,");
170         ["] {
171                 if (l2l_quote_state == L2L_UNQUOTED_TEXT) {
172                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"`");
173                         l2l_quote_state = L2L_QUOTED_TEXT;
174                 }
175                 else {
176                         assert(l2l_quote_state == L2L_QUOTED_TEXT);
177                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"'");
178                         l2l_quote_state = L2L_UNQUOTED_TEXT;
179                 }
180         }
182         /* text formatting */
183         \<[iI]> {
184                 if (l2l_italic) {
185                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
186                         l2l_italic = 0;
187                 }
188                 else {
189                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\itshape ");
190                         l2l_italic = 1;
191                 }
192         }
194         \<[bB]> {
195                 if (l2l_bold) {
196                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
197                         l2l_bold = 0;
198                 }
199                 else {
200                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\bfseries ");
201                         l2l_bold = 1;
202                 }
203         }
205         \<[cC]> {
206                 if (l2l_code) {
207                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
208                         l2l_code = 0;
209                 }
210                 else {
211                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\ttfamily ");
212                         l2l_code = 1;
213                 }
214         }
216         \<[\+]> {
217                 if (l2l_sup) {
218                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}");
219                         l2l_sup = 0;
220                 }
221                 else {
222                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
223                                         "\\ensuremath{^{\rmfamily ");
224                         l2l_sup = 1;
225                 }
226         }
228         \<[\-]> {
229                 if (l2l_sub) {
230                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}");
231                         l2l_sub = 0;
232                 }
233                 else {
234                         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
235                                         "\\ensuremath{_{\rmfamily ");
236                         l2l_sub = 1;
237                 }
238         }
240         \[[0-9]+\] {
241                 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr,
242                                 "{\\bfseries\\textcolor{DarkBlue}{%s}}", yytext);
243         }
246 \<[uU]> {
247         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\url{");
248         BEGIN(url);
251 <url>\<[uU]> {
252         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}");
253         BEGIN(l2l_last_yystart);
256 <url>.|\n {
257         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "%s", yytext);
260 (Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+ {
261         L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\textcolor{DarkBlue}{%s}", yytext);
264         /*
265          * header
266          */
268 @R:{WHITESPACE}* {
269         L2L_STATE_HEADER_OR_ABORT;
270         l2l_buf_ptr = &l2l_category;
271         BEGIN(headertxt);
274 @SW:{WHITESPACE}* {
275         L2L_STATE_HEADER_OR_ABORT;
276         l2l_buf_ptr = &l2l_keyword;
277         BEGIN(headertxt);
280 @D:{WHITESPACE}* {
281         L2L_STATE_HEADER_OR_ABORT;
282         l2l_buf_ptr = &l2l_title;
283         BEGIN(headertxt);
286 @T:{WHITESPACE}* {
287         L2L_STATE_HEADER_OR_ABORT;
288         l2l_buf_ptr = &l2l_headline;
289         BEGIN(headertxt);
292 @A:{WHITESPACE}* {
293         L2L_STATE_HEADER_OR_ABORT;
294         l2l_buf_ptr = &l2l_author;
295         BEGIN(headertxt);
298 @V:{WHITESPACE}* {
299         L2L_STATE_HEADER_OR_ABORT;
300         l2l_buf_ptr = &l2l_abstract;
301         BEGIN(headertxt);
304 <headertxt>{ANY_COMMAND} {
305         if (l2l_buf_ptr) {
306                 l2l_strbuf_chomp(l2l_buf_ptr);
307                 l2l_buf_ptr = NULL;
308         }
310         if (! L2L_PATTERN_IS_HEADER) {
311                 assert(l2l_scanner_state == L2L_HEADER);
313                 if (l2l_category.len || l2l_keyword.len
314                                 || l2l_title.len || l2l_headline.len) {
315                         fprintf(yyout, "\\title{");
317                         if (l2l_category.len || l2l_keyword.len) {
318                                 fprintf(yyout, "{\\scriptsize [");
319                                 if (l2l_category.len) {
320                                         fprintf(yyout, "%s", l2l_category.data);
321                                         if (l2l_keyword.len)
322                                                 fprintf(yyout, " $\\rightarrow$ ");
323                                 }
324                                 if (l2l_keyword.len)
325                                         fprintf(yyout, "%s", l2l_keyword.data);
326                                 fprintf(yyout, "]}\\\\\n\\vspace{5mm}\n");
327                         }
329                         if (l2l_headline.len) {
330                                 if (l2l_title.len)
331                                         fprintf(yyout, "\\normalsize %s\\\\\n\\LARGE ",
332                                                         l2l_title.data);
333                                 fprintf(yyout, "%s", l2l_headline.data);
334                         }
335                         else if (l2l_title.len)
336                                 fprintf(yyout, "%s", l2l_title.data);
338                         fprintf(yyout, "}\n");
339                 }
341                 if (l2l_author.len)
342                         fprintf(yyout, "\\author{%s}\n", l2l_author.data);
343                 fprintf(yyout, "\\date{\\today}\n\n");
345                 fprintf(yyout, "\\begin{document}\n\n");
346                 l2l_scanner_state = L2L_BODY;
348                 fprintf(yyout, "\\maketitle\n\n");
350                 if (l2l_abstract.len)
351                         fprintf(yyout, "\\begin{abstract}\n%s\n"
352                                         "\\end{abstract}\n\n", l2l_abstract.data);
354                 fprintf(yyout, "\\vspace{10mm}\\tableofcontents\n\n");
355                 fprintf(yyout, "\\twocolumn\n\n");
356         }
358         yyless(0);
359         BEGIN(l2l_last_yystart);
362 <headertxt>{COMMENT} { /* ignore */ }
364 <headertxt>.|\n {
365         if (l2l_buf_ptr)
366                 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
367                         YY_FATAL_ERROR("ERROR: internal error");
368         /* else ignore */
371         /*
372          * body
373          */
375 \n\n{WHITESPACE}*\*{WHITESPACE}* {
376         fprintf(yyout, "\n\n\\begin{itemize}\n\\item ");
377         BEGIN(itemize);
380 <itemize>({WHITESPACE}|\n)*\n\n |
381 <itemize>({WHITESPACE}|\n)*{ANY_COMMAND} {
382         fprintf(yyout, "\n\\end{itemize}");
383         yyless(0);
384         BEGIN(l2l_last_yystart);
387         /* this will produce a longer match and, thus, take precedence over
388          * the pattern above matching an empty line after the itemize */
389 <itemize>({WHITESPACE}|\n)*\n{WHITESPACE}*\*{WHITESPACE}* {
390         fprintf(yyout, "\n\\item ");
392 <itemize>({WHITESPACE}|\n)*\n\n({WHITESPACE}{2,}|\t) ECHO;
394 <itemize>. ECHO;
396 @ZT:{WHITESPACE}* {
397         fprintf(yyout, "\\subsection{");
398         BEGIN(command);
401 <command>({WHITESPACE}|\n)+{ANY_COMMAND} {
402         fprintf(yyout, "}");
403         yyless(0);
404         BEGIN(l2l_last_yystart);
407 <command>. ECHO;
409 @LI:{WHITESPACE}*\n? {
410         /* \xb7 = MIDDLE DOT (iso-8859-1 and iso-8859-15) */
411         fprintf(yyout, "\\begin{lstlisting}[escapechar=\xb7]\n");
412         BEGIN(listing);
415 <listing>\n*{ANY_COMMAND} {
416         fprintf(yyout, "\n\\end{lstlisting}");
417         yyless(0);
418         BEGIN(l2l_last_yystart);
421 <listing>\<§§I> {
422         if (l2l_li_italic) {
423                 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7");
424                 l2l_li_italic = 0;
425         }
426         else {
427                 L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7\\itshape ");
428                 l2l_li_italic = 1;
429         }
432 <listing>§§\n /* ignore => lstlisting will insert appropriate arrows */;
434 <listing>. ECHO;
436         /*
437          * images/boxes
438          */
440 @(B|KT):{WHITESPACE}*((Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+:)? {
441         _Bool is_image = 0;
442         _Bool is_box   = 0;
444         char *label    = NULL;
446         if (! strncmp(yytext, "@B:", 3))
447                 is_image = 1;
448         else if (! strncmp(yytext, "@KT:", 4))
449                 is_box = 1;
451         if (l2l_scanner_state != L2L_FIGURE) {
452                 if (is_box)
453                         fprintf(yyout, "\\begin{l2lbox}\n");
454                 else
455                         fprintf(yyout, "\\begin{figure}\n");
456                 l2l_scanner_state = L2L_FIGURE;
457         }
458         else {
459                 l2l_scanner_state = L2L_BODY;
460         }
462         label = strstr(yytext, "Abbildung");
463         if (! label)
464                 label = strstr(yytext, "Listing");
466         if (label)
467                 if (l2l_strbuf_append(&l2l_caption, "{\\bfseries %s}", label))
468                         YY_FATAL_ERROR("ERROR: internal error");
470         l2l_buf_ptr = &l2l_caption;
471         if (is_image)
472                 BEGIN(caption);
473         else if (is_box)
474                 BEGIN(box_caption);
475         else
476                 assert(0);
479 @Bi:.*\n {
480         char *filename;
482         yytext[yyleng - 1] = '\0';
484         filename = yytext + strlen("@Bi:");
485         while (isspace((int)*filename))
486                 ++filename;
488         if (l2l_scanner_state != L2L_FIGURE) {
489                 fprintf(yyout, "\\begin{figure}\n");
490                 l2l_scanner_state = L2L_FIGURE;
491         }
492         else {
493                 l2l_scanner_state = L2L_BODY;
494         }
496         fprintf(yyout, "\\includegraphics[width=\\columnwidth]{%s}\n", filename);
498         if (l2l_scanner_state == L2L_BODY)
499                 fprintf(yyout, "\\end{figure}\n");
502 @KL: {
503         if (l2l_scanner_state != L2L_FIGURE) {
504                 fprintf(yyout, "\\begin{l2lbox}\n");
505                 l2l_scanner_state = L2L_FIGURE;
506         }
507         else {
508                 l2l_scanner_state = L2L_BODY;
509         }
511         l2l_last_yystart = box;
512         BEGIN(box);
515 <box>@(KE|L):{WHITESPACE}* {
516         fprintf(yyout, "\\end{l2lbox}");
517         l2l_scanner_state = L2L_BODY;
518         l2l_last_yystart = INITIAL;
519         BEGIN(l2l_last_yystart);
522 <caption,box_caption>{ANY_COMMAND} {
523         if (l2l_buf_ptr) {
524                 l2l_strbuf_chomp(l2l_buf_ptr);
525                 l2l_buf_ptr = NULL;
526         }
528         if (l2l_caption.len) {
529                 fprintf(yyout, "\\caption*{%s}\n", l2l_caption.data);
530                 L2L_STRBUF_CLEAR(&l2l_caption);
531         }
533         if ((l2l_scanner_state == L2L_BODY) && (YY_START != box_caption))
534                 fprintf(yyout, "\\end{figure}\n\n");
536         yyless(0);
537         if (YY_START == caption)
538                 BEGIN(l2l_last_yystart);
539         else if (YY_START == box_caption) {
540                 l2l_last_yystart = box;
541                 BEGIN(box);
542         }
543         else
544                 assert(0);
547 <caption,box_caption>{COMMENT} { /* ignore */ }
549 <caption,box_caption>.|\n {
550         if (l2l_buf_ptr)
551                 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
552                         YY_FATAL_ERROR("ERROR: internal error");
553         /* else ignore */
556         /*
557          * infos
558          */
560 @IT:{WHITESPACE}* {
561         l2l_buf_ptr = &l2l_ititle;
562         BEGIN(title);
565 <title>{ANY_COMMAND} {
566         if (l2l_buf_ptr) {
567                 l2l_strbuf_chomp(l2l_buf_ptr);
568                 l2l_buf_ptr = NULL;
569         }
571         if (l2l_ititle.len) {
572                 fprintf(yyout, "\\subsection*{\\rule{\\columnwidth}{1pt}"
573                                 "\\newline %s}\n", l2l_ititle.data);
574                 L2L_STRBUF_CLEAR(&l2l_ititle);
575         }
577         yyless(0);
578         BEGIN(l2l_last_yystart);
581 <title>{COMMENT} { /* ignore */ }
583 <title>.|\n {
584         if (l2l_buf_ptr)
585                 if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext))
586                         YY_FATAL_ERROR("ERROR: internal error");
587         /* else ignore */
590 @IL: { /* ignore */ }
592 @IE: { /* ignore */ }
594         /*
595          * tables
596          */
598 @TT: /* TODO */;
600 @TH: /* TODO */;
602 @TL: /* TODO */;
604 @TE: /* TODO */;
606         /* let all other rules that match "@L:" have a higher preceedence */
607 @L:{WHITESPACE}* { /* nothing to do */ }
609 <<EOF>> {
610         if (l2l_scanner_state == L2L_BODY) {
611                 fprintf(yyout, "\\end{document}\n");
612         }
613         else
614                 YY_FATAL_ERROR("ERROR: incomplete document");
616         l2l_strbuf_destroy(&l2l_category);
617         l2l_strbuf_destroy(&l2l_keyword);
618         l2l_strbuf_destroy(&l2l_title);
619         l2l_strbuf_destroy(&l2l_headline);
620         l2l_strbuf_destroy(&l2l_author);
621         l2l_strbuf_destroy(&l2l_abstract);
623         l2l_strbuf_destroy(&l2l_caption);
624         l2l_strbuf_destroy(&l2l_title);
626         yyterminate();
629 %%
631 static int
632 l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...)
634         size_t  str_len;
635         va_list ap;
637         int status;
639         if ((! buf) || (! fmt))
640                 return 0;
642         va_start(ap, fmt);
643         status = vsnprintf(NULL, 0, fmt, ap);
644         va_end(ap);
646         if (status <= 0)
647                 return -1;
649         str_len = (size_t)status;
651         if (str_len > L2L_STRBUF_FREE(buf)) {
652                 char  *new_data;
653                 size_t new_size = 2 * buf->size;
655                 if (str_len > buf->size)
656                         new_size += str_len;
658                 new_data = realloc(buf->data, new_size + 1);
659                 if (! new_data) {
660                         l2l_strbuf_destroy(buf);
661                         return -1;
662                 }
664                 buf->data = new_data;
665                 buf->size = new_size;
666         }
667         assert(buf->len + str_len <= buf->size);
669         va_start(ap, fmt);
670         status = vsnprintf(L2L_STRBUF_END(buf), L2L_STRBUF_FREE(buf) + 1, fmt, ap);
671         va_end(ap);
673         if (status <= 0)
674                 return -1;
676         assert((size_t)status == str_len);
677         buf->len += str_len;
678         buf->data[buf->len] = '\0';
679         return 0;
680 } /* l2l_strbuf_append */
682 static void
683 l2l_strbuf_chomp(l2l_strbuf_t *buf)
685         if (! buf)
686                 return;
688         while (buf->len && isspace((int)buf->data[buf->len - 1])) {
689                 buf->data[buf->len - 1] = '\0';
690                 --buf->len;
691         }
692 } /* l2l_strbuf_chomp */
694 static void
695 l2l_strbuf_destroy(l2l_strbuf_t *buf)
697         if (! buf)
698                 return;
700         if (buf->data)
701                 free(buf->data);
702         memset(buf, 0, sizeof(*buf));
703 } /* l2l_strbuf_destroy */
705 /* vim: set tw=78 sw=4 ts=4 noexpandtab : */