/* * lm2latex - src/l2l_scanner.l * Copyright (C) 2010 Sebastian Harl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * A Linux-Magazin markup to LaTeX converter -- LM markup scanner. */ %{ #if HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include typedef struct { char *data; size_t size; size_t len; } l2l_strbuf_t; #define L2L_STRBUF_INIT { NULL, 0, 0 } #define L2L_STRBUF_FREE(buf) ((buf)->size - (buf)->len) #define L2L_STRBUF_END(buf) ((buf)->data + (buf)->len) #define L2L_STRBUF_CLEAR(buf) ((buf)->len = 0) static int l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...); static void l2l_strbuf_chomp(l2l_strbuf_t *buf); static void l2l_strbuf_destroy(l2l_strbuf_t *buf); #define L2L_YY_APPEND_OR_ECHO(strbuf, ...) \ do { \ if ((strbuf)) { \ if (l2l_strbuf_append((strbuf), __VA_ARGS__)) \ YY_FATAL_ERROR("ERROR: internal error"); \ } \ else \ fprintf(yyout, __VA_ARGS__); \ } while (0) #define L2L_STATE_HEADER_OR_ABORT \ do { \ if (l2l_scanner_state != L2L_HEADER) \ YY_FATAL_ERROR("ERROR: unexpected header definition"); \ } while (0) #define L2L_PATTERN_IS(str) (! strncmp(yytext, (str), strlen(str))) #define L2L_PATTERN_IS_HEADER \ (L2L_PATTERN_IS("@R:") || L2L_PATTERN_IS("@SW:") \ || L2L_PATTERN_IS("@D:") || L2L_PATTERN_IS("@T:") \ || L2L_PATTERN_IS("@A:") || L2L_PATTERN_IS("@V:")) %} %option yylineno %option noyywrap /* optimize for "fast scanning"; create an 8-bit scanner */ %option fast 8bit /* generate a reentrant scanner in its own namespace */ %option reentrant %option prefix="l2l_yy" outfile="lex.yy.c" %option verbose warn %s box %x caption box_caption %x command %x headertxt %x itemize %x listing %x title %x url COMMENT @#:.* WHITESPACE [ \t] ANY_COMMAND @(R|SW|D|T|V|A|L|ZT|LI|B|Bi|IT|IL|IE|KT|KL|KE|TT|TH|TL|TE): %% %{ enum { L2L_HEADER, L2L_BODY, L2L_FIGURE, L2L_UNQUOTED_TEXT, L2L_QUOTED_TEXT, }; int l2l_scanner_state = L2L_HEADER; int l2l_quote_state = L2L_UNQUOTED_TEXT; /* text formatting */ _Bool l2l_italic = 0; _Bool l2l_bold = 0; _Bool l2l_code = 0; _Bool l2l_sup = 0; _Bool l2l_sub = 0; _Bool l2l_li_italic = 0; /* title page information */ l2l_strbuf_t l2l_category = L2L_STRBUF_INIT; l2l_strbuf_t l2l_keyword = L2L_STRBUF_INIT; l2l_strbuf_t l2l_title = L2L_STRBUF_INIT; l2l_strbuf_t l2l_headline = L2L_STRBUF_INIT; l2l_strbuf_t l2l_author = L2L_STRBUF_INIT; l2l_strbuf_t l2l_abstract = L2L_STRBUF_INIT; /* temp. buffers */ l2l_strbuf_t l2l_caption = L2L_STRBUF_INIT; l2l_strbuf_t l2l_ititle = L2L_STRBUF_INIT; l2l_strbuf_t *l2l_buf_ptr = NULL; int l2l_last_yystart = INITIAL; %} {COMMENT} { fprintf(yyout, "%%%s", yytext + strlen("@#:")); } { /* LaTeX reserved characters */ [#$%&_{}] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s", yytext); } [\^~] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\%s{}", yytext); } [\\] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "$\\backslash$"); } /* special characters */ /* '---' and '--' are handled by LaTeX */ \*\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "~"); :\* L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\,"); ["] { if (l2l_quote_state == L2L_UNQUOTED_TEXT) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"`"); l2l_quote_state = L2L_QUOTED_TEXT; } else { assert(l2l_quote_state == L2L_QUOTED_TEXT); L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\"'"); l2l_quote_state = L2L_UNQUOTED_TEXT; } } /* text formatting */ \<[iI]> { if (l2l_italic) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}"); l2l_italic = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\itshape "); l2l_italic = 1; } } \<[bB]> { if (l2l_bold) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}"); l2l_bold = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\bfseries "); l2l_bold = 1; } } \<[cC]> { if (l2l_code) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}"); l2l_code = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\ttfamily "); l2l_code = 1; } } \<[\+]> { if (l2l_sup) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}"); l2l_sup = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\ensuremath{^{\rmfamily "); l2l_sup = 1; } } \<[\-]> { if (l2l_sub) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}}"); l2l_sub = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\ensuremath{_{\rmfamily "); l2l_sub = 1; } } \[[0-9]+\] { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "{\\bfseries\\textcolor{DarkBlue}{%s}}", yytext); } } \<[uU]> { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\url{"); BEGIN(url); } \<[uU]> { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "}"); BEGIN(l2l_last_yystart); } .|\n { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "%s", yytext); } (Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+ { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\\textcolor{DarkBlue}{%s}", yytext); } /* * header */ @R:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_category; BEGIN(headertxt); } @SW:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_keyword; BEGIN(headertxt); } @D:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_title; BEGIN(headertxt); } @T:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_headline; BEGIN(headertxt); } @A:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_author; BEGIN(headertxt); } @V:{WHITESPACE}* { L2L_STATE_HEADER_OR_ABORT; l2l_buf_ptr = &l2l_abstract; BEGIN(headertxt); } {ANY_COMMAND} { if (l2l_buf_ptr) { l2l_strbuf_chomp(l2l_buf_ptr); l2l_buf_ptr = NULL; } if (! L2L_PATTERN_IS_HEADER) { assert(l2l_scanner_state == L2L_HEADER); if (l2l_category.len || l2l_keyword.len || l2l_title.len || l2l_headline.len) { fprintf(yyout, "\\title{"); if (l2l_category.len || l2l_keyword.len) { fprintf(yyout, "{\\scriptsize ["); if (l2l_category.len) { fprintf(yyout, "%s", l2l_category.data); if (l2l_keyword.len) fprintf(yyout, " $\\rightarrow$ "); } if (l2l_keyword.len) fprintf(yyout, "%s", l2l_keyword.data); fprintf(yyout, "]}\\\\\n\\vspace{5mm}\n"); } if (l2l_headline.len) { if (l2l_title.len) fprintf(yyout, "\\normalsize %s\\\\\n\\LARGE ", l2l_title.data); fprintf(yyout, "%s", l2l_headline.data); } else if (l2l_title.len) fprintf(yyout, "%s", l2l_title.data); fprintf(yyout, "}\n"); } if (l2l_author.len) fprintf(yyout, "\\author{%s}\n", l2l_author.data); fprintf(yyout, "\\date{\\today}\n\n"); fprintf(yyout, "\\begin{document}\n\n"); l2l_scanner_state = L2L_BODY; fprintf(yyout, "\\twocolumn[\n" "\\begin{@twocolumnfalse}\n" "\\maketitle\n\n"); if (l2l_abstract.len) fprintf(yyout, "\\begin{abstract}\n%s\n" "\\end{abstract}\n\n", l2l_abstract.data); fprintf(yyout, "\\rule{\\textwidth}{1pt}\n" "\\begin{center}\n" "\\begin{minipage}{.75\\textwidth}\n" "\\tableofcontents\n" "\\end{minipage}\n" "\\end{center}\n" "\\rule{\\textwidth}{1pt}\n" "\\end{@twocolumnfalse}\n]\n\n"); } yyless(0); BEGIN(l2l_last_yystart); } {COMMENT} { /* ignore */ } .|\n { if (l2l_buf_ptr) if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext)) YY_FATAL_ERROR("ERROR: internal error"); /* else ignore */ } /* * body */ \n\n{WHITESPACE}*\*{WHITESPACE}* { fprintf(yyout, "\n\n\\begin{itemize}\n\\item "); BEGIN(itemize); } ({WHITESPACE}|\n)*\n\n | ({WHITESPACE}|\n)*{ANY_COMMAND} { fprintf(yyout, "\n\\end{itemize}"); yyless(0); BEGIN(l2l_last_yystart); } /* this will produce a longer match and, thus, take precedence over * the pattern above matching an empty line after the itemize */ ({WHITESPACE}|\n)*\n{WHITESPACE}*\*{WHITESPACE}* { fprintf(yyout, "\n\\item "); } ({WHITESPACE}|\n)*\n\n({WHITESPACE}{2,}|\t) ECHO; . ECHO; @ZT:{WHITESPACE}* { fprintf(yyout, "\\subsection{"); BEGIN(command); } ({WHITESPACE}|\n)+{ANY_COMMAND} { fprintf(yyout, "}"); yyless(0); BEGIN(l2l_last_yystart); } . ECHO; @LI:{WHITESPACE}*\n? { /* \xb7 = MIDDLE DOT (iso-8859-1 and iso-8859-15) */ fprintf(yyout, "\\begin{lstlisting}[escapechar=\xb7]\n"); BEGIN(listing); } \n*{ANY_COMMAND} { fprintf(yyout, "\n\\end{lstlisting}"); yyless(0); BEGIN(l2l_last_yystart); } \<§§I> { if (l2l_li_italic) { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7"); l2l_li_italic = 0; } else { L2L_YY_APPEND_OR_ECHO(l2l_buf_ptr, "\xb7\\itshape "); l2l_li_italic = 1; } } §§\n /* ignore => lstlisting will insert appropriate arrows */; . ECHO; /* * images/boxes */ @(B|KT):{WHITESPACE}*((Abbildung|Listing)({WHITESPACE}|\n)+[0-9]+:)? { _Bool is_image = 0; _Bool is_box = 0; char *label = NULL; if (! strncmp(yytext, "@B:", 3)) is_image = 1; else if (! strncmp(yytext, "@KT:", 4)) is_box = 1; if (l2l_scanner_state != L2L_FIGURE) { if (is_box) fprintf(yyout, "\\begin{l2lbox}\n"); else fprintf(yyout, "\\begin{figure}\n"); l2l_scanner_state = L2L_FIGURE; } else { l2l_scanner_state = L2L_BODY; } label = strstr(yytext, "Abbildung"); if (! label) label = strstr(yytext, "Listing"); if (label) if (l2l_strbuf_append(&l2l_caption, "{\\bfseries %s}", label)) YY_FATAL_ERROR("ERROR: internal error"); l2l_buf_ptr = &l2l_caption; if (is_image) BEGIN(caption); else if (is_box) BEGIN(box_caption); else assert(0); } @Bi:.*\n { char *filename; yytext[yyleng - 1] = '\0'; filename = yytext + strlen("@Bi:"); while (isspace((int)*filename)) ++filename; if (l2l_scanner_state != L2L_FIGURE) { fprintf(yyout, "\\begin{figure}\n"); l2l_scanner_state = L2L_FIGURE; } else { l2l_scanner_state = L2L_BODY; } fprintf(yyout, "\\includegraphics[width=\\columnwidth]{%s}\n", filename); if (l2l_scanner_state == L2L_BODY) fprintf(yyout, "\\end{figure}\n"); } @KL: { if (l2l_scanner_state != L2L_FIGURE) { fprintf(yyout, "\\begin{l2lbox}\n"); l2l_scanner_state = L2L_FIGURE; } else { l2l_scanner_state = L2L_BODY; } l2l_last_yystart = box; BEGIN(box); } @(KE|L):{WHITESPACE}* { fprintf(yyout, "\\end{l2lbox}"); l2l_scanner_state = L2L_BODY; l2l_last_yystart = INITIAL; BEGIN(l2l_last_yystart); } {ANY_COMMAND} { if (l2l_buf_ptr) { l2l_strbuf_chomp(l2l_buf_ptr); l2l_buf_ptr = NULL; } if (l2l_caption.len) { fprintf(yyout, "\\caption*{%s}\n", l2l_caption.data); L2L_STRBUF_CLEAR(&l2l_caption); } if ((l2l_scanner_state == L2L_BODY) && (YY_START != box_caption)) fprintf(yyout, "\\end{figure}\n\n"); yyless(0); if (YY_START == caption) BEGIN(l2l_last_yystart); else if (YY_START == box_caption) { l2l_last_yystart = box; BEGIN(box); } else assert(0); } {COMMENT} { /* ignore */ } .|\n { if (l2l_buf_ptr) if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext)) YY_FATAL_ERROR("ERROR: internal error"); /* else ignore */ } /* * infos */ @IT:{WHITESPACE}* { l2l_buf_ptr = &l2l_ititle; BEGIN(title); } {ANY_COMMAND} { if (l2l_buf_ptr) { l2l_strbuf_chomp(l2l_buf_ptr); l2l_buf_ptr = NULL; } if (l2l_ititle.len) { fprintf(yyout, "\\subsection*{\\rule{\\columnwidth}{1pt}" "\\newline %s}\n", l2l_ititle.data); L2L_STRBUF_CLEAR(&l2l_ititle); } yyless(0); BEGIN(l2l_last_yystart); } <title>{COMMENT} { /* ignore */ } <title>.|\n { if (l2l_buf_ptr) if (l2l_strbuf_append(l2l_buf_ptr, "%s", yytext)) YY_FATAL_ERROR("ERROR: internal error"); /* else ignore */ } @IL: { /* ignore */ } @IE: { /* ignore */ } /* * tables */ @TT: /* TODO */; @TH: /* TODO */; @TL: /* TODO */; @TE: /* TODO */; /* let all other rules that match "@L:" have a higher preceedence */ @L:{WHITESPACE}* { /* nothing to do */ } <<EOF>> { if (l2l_scanner_state == L2L_BODY) { fprintf(yyout, "\\end{document}\n"); } else YY_FATAL_ERROR("ERROR: incomplete document"); l2l_strbuf_destroy(&l2l_category); l2l_strbuf_destroy(&l2l_keyword); l2l_strbuf_destroy(&l2l_title); l2l_strbuf_destroy(&l2l_headline); l2l_strbuf_destroy(&l2l_author); l2l_strbuf_destroy(&l2l_abstract); l2l_strbuf_destroy(&l2l_caption); l2l_strbuf_destroy(&l2l_title); yyterminate(); } %% static int l2l_strbuf_append(l2l_strbuf_t *buf, const char *fmt, ...) { size_t str_len; va_list ap; int status; if ((! buf) || (! fmt)) return 0; va_start(ap, fmt); status = vsnprintf(NULL, 0, fmt, ap); va_end(ap); if (status <= 0) return -1; str_len = (size_t)status; if (str_len > L2L_STRBUF_FREE(buf)) { char *new_data; size_t new_size = 2 * buf->size; if (str_len > buf->size) new_size += str_len; new_data = realloc(buf->data, new_size + 1); if (! new_data) { l2l_strbuf_destroy(buf); return -1; } buf->data = new_data; buf->size = new_size; } assert(buf->len + str_len <= buf->size); va_start(ap, fmt); status = vsnprintf(L2L_STRBUF_END(buf), L2L_STRBUF_FREE(buf) + 1, fmt, ap); va_end(ap); if (status <= 0) return -1; assert((size_t)status == str_len); buf->len += str_len; buf->data[buf->len] = '\0'; return 0; } /* l2l_strbuf_append */ static void l2l_strbuf_chomp(l2l_strbuf_t *buf) { if (! buf) return; while (buf->len && isspace((int)buf->data[buf->len - 1])) { buf->data[buf->len - 1] = '\0'; --buf->len; } } /* l2l_strbuf_chomp */ static void l2l_strbuf_destroy(l2l_strbuf_t *buf) { if (! buf) return; if (buf->data) free(buf->data); memset(buf, 0, sizeof(*buf)); } /* l2l_strbuf_destroy */ /* vim: set tw=78 sw=4 ts=4 noexpandtab : */