src/rrd_afm_data.h

   1 /****************************************************************************
   2  * RRDtool 1.2rc4  Copyright by Tobi Oetiker, 1997-2005
   3  ****************************************************************************
   4  * rrd_afm_data.h  Encoded afm (Adobe Font Metrics) for selected fonts.
   5  ****************************************************************************/
   6
   7 #ifndef  RRD_AFM_DATA_H
   8 #define RRD_AFM_DATA_H
   9
  10 /*
  11 Description of data structures:
  12
  13   Ideally, the struct should be a list of fonts, and each font
  14   is a list of character-info.
  15   Each character has a structure:
  16     struct charinfo {
  17       char16 thechar;
  18       int width;
  19       struct {
  20         char16 nextchar;
  21         int deltawidth;
  22       } kernings[];
  23       struct {
  24         char16 nextchar;
  25         char16 resultingchar;
  26       } ligatures[];
  27     }
  28
  29     The data for typical fonts makes this a very sparse data structure.
  30     For most fonts, only the letter "f" has ligatures.
  31     All fonts have all (or almost all) of the characters 32-126,
  32     most fonts have all 161-255,
  33     and all fonts have very few 256-65535.
  34     Most kerning pairs have both chars 32-126.
  35
  36     The most basic design decisionÊis to have all this data as
  37     const C globals all set up by array/struct initialisers
  38     so runtime setup overhead is minimal.
  39     The complete other possibility would be to parse and load
  40     this info at runtime, but for rrdtool I have preferred
  41     speed for flexibility as the same few fonts will be used
  42     zillions of times.
  43
  44     So the idea is to rewrite the above structure into
  45     something which:
  46     1) uses/wastes minimal memory
  47     2) is fast for most characters
  48     3) supports at least Iso-Latin-1, prefer full unicode.
  49     4) doesn't need full precision in char width
  50         (we can afford to loose 0.2% as rrdtool only needs to calculate
  51         overall layout of elements, not positioning individual
  52         characters)
  53     5) can be written as constant initialisers to C structs/arrays
  54        so we don't have runtime overhead starting rrdtool.
  55     6) can be easily generated by some script so it is easy
  56        to select a set of fonts and have the C data updated.
  57        So adding/removing fonts is a matter of a recompile.
  58
  59 Implementation design:
  60     All character structs are sorted by unicode value. Info for
  61     characters below 32 is discarded and the chars are treated
  62     as a space. Missing characters in the 32-126 range are
  63     substituted with default values so we can use direct array
  64     access for those. For characters above 126, binary search
  65     is used (not yet, liniar now but uses good guess for most latin 1
  66     characters).
  67
  68     Ligature handling can be discarded as ligatures have very small
  69     effects on string width. The width of the "fi" ligature
  70     is the same (or very close) to the width of "f" plus the width
  71     of "i".
  72     If implemented, it can be a simple list (global for the font,
  73     not for each character) because all fonts I've seen that have
  74     ligatures has max 3 pairs: "fi", "fl", "ffl" and no other.
  75
  76     Most characters has less than 10 kern pairs, few 10-20, and
  77     extremly few 20-30. This is implemented as a simple
  78     linear search with characters 256-65536 encoding using a prefix
  79     so most kern pairs only take 2 bytes:
  80     unsigned 8 bit char value and signed 8 bit kern width.
  81     Using a non-packed format would enable binary search, but
  82     would use almost twice as much memory for a yet unknown
  83     gain in speed.
  84
  85     Character widths are stored as unsigned bytes. Width of
  86     one character is font-size * bytevalue * (1000 / 6)
  87     AFM specifies widths as integers with 1000 representing 1 * font-size.
  88     Kerning delta widths has same scaling factor, but the value
  89     is a signed byte as many kerning widths are negative and smaller
  90     than avarage character width.
  91
  92     Kerning info is stored in a shared packed int8 array
  93     to reduce the number of structs and memory usage.
  94     This sets the maximum number of kerning pairs to
  95     approx 15000.
  96       The font I have seen with most kern pairs is
  97       "Bodoni Old Face BE Bold Italic Oldstyle Figures"
  98       which has 1718 pairs for 62 chars.
  99       Typical fonts have 100-150 pairs.
 100     For each character needs then only a 16 bit index
 101     into this shared table.
 102     The format of the sub-arrays are:
 103       count ( unicode deltawidth )
 104     with the (...) repeated count times.
 105     The count and the unicode is packed because a lot
 106     entries is less than 256, and most below 400.
 107     Therefore an escape sequence is used.
 108     If the value is >= 510
 109       1, high-8bits, low-8bits
 110     else if the value is >= 254
 111       0, value minus 254
 112     else
 113       value plus 1
 114     An index of zero is treated as a NULL pointer,
 115     and the first byte in a shared array is
 116     therefore not used (and filled with a dummy value).
 117     The array is only created if non-empty.
 118         No entries can be zero (they are redundant),
 119         and no subarray can be empty (as the index pointer
 120         then is 0 meaning no sub array).
 121         The deltawidth is stored as a non-escaped signed byte.
 122
 123     So for each character needed info is:
 124       width: unsigned 8 bit int.
 125       kerning-subarray-index: unsigned 16 bit int.
 126
 127     The first 126-32+1 entries are for the characters
 128     32-126. If any is missing, a dummy entry is created.
 129     For characters 126-65535 a font-global
 130     array of struct {unicode, char-index} is
 131     used for binary search (not yet, liniar now).
 132
 133     Ligatures can be implemented as a font-global
 134     array of struct {
 135       unicode char1, char2, resultingchar;
 136     }
 137
 138     Font-global info is stored in a struct afm_fontinfo (see below).
 139
 140     The highchars_index and ligatures structures are flattened
 141     to a simple array to avoid accidental padding between
 142     structs if the structsize is problematic for some platforms.
 143
 144     All fonts are stored in an array of this struct,
 145     sorted by fullname for binary search (not yet sorted).
 146
 147     The .afm files are compiled by a perl script which creates
 148     rrd_afm_data.c
 149     The only thing rrd_afm_data.c contains is this compiled data.
 150
 151     Compiled on Mac OS X the size of rrd_afm_data.o
 152     is 67 Kb for the standard 14 postscript fonts,
 153     and 490 Kb for a set of 276 Adobe fonts.
 154 */
 155
 156 typedef unsigned char  afm_uint8;
 157 typedef signed   char  afm_sint8;
 158 typedef unsigned short afm_uint16;
 159 typedef signed   short afm_sint16;
 160 typedef unsigned short afm_unicode;
 161
 162 typedef const afm_uint8   afm_cuint8;
 163 typedef const afm_sint8   afm_csint8;
 164 typedef const afm_uint16  afm_cuint16;
 165 typedef const afm_sint16  afm_csint16;
 166 typedef const afm_unicode afm_cunicode;
 167
 168 typedef struct afm_fontinfo {
 169   const char   *fullname; /* e.g. "Futura Bold Oblique" */
 170   const char   *postscript_name; /* e.g. "Futura-BoldOblique" */
 171   afm_cuint8   *widths;
 172   afm_csint16  *kerning_index;
 173   afm_cuint8   *kerning_data;
 174   afm_cuint16  *highchars_index;
 175   afm_cuint16   highchars_count;
 176   afm_cunicode *ligatures;
 177   afm_cuint16   ligatures_count;
 178 }      afm_fontinfo;
 179
 180 typedef struct old_afm_fontinfo {
 181   const char *fontname, *fullname;
 182   const unsigned short *charinfo, *intarray;
 183   const unsigned short charinfocount;
 184   const unsigned short fixedpitch;
 185 } old_afm_fontinfo;
 186
 187 extern const afm_fontinfo afm_fontinfolist[];
 188 extern const int afm_fontinfo_count;
 189
 190 #endif