Code

Translations. Spanish translation update by Lucas Vieites (Bug #664501).
[inkscape.git] / src / dom / ucd.h
1 /**
2  * Phoebe DOM Implementation.
3  *
4  * This is a C++ approximation of the W3C DOM model, which follows
5  * fairly closely the specifications in the various .idl files, copies of
6  * which are provided for reference.  Most important is this one:
7  *
8  * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
9  * 
10  * More thorough explanations of the various classes and their algorithms
11  * can be found there.
12  *     
13  *
14  * Authors:
15  *   Bob Jamison
16  *
17  * Copyright (C) 2006-2008 Bob Jamison
18  *
19  *  This library is free software; you can redistribute it and/or
20  *  modify it under the terms of the GNU Lesser General Public
21  *  License as published by the Free Software Foundation; either
22  *  version 2.1 of the License, or (at your option) any later version.
23  *
24  *  This library is distributed in the hope that it will be useful,
25  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
26  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27  *  Lesser General Public License for more details.
28  *
29  *  You should have received a copy of the GNU Lesser General Public
30  *  License along with this library; if not, write to the Free Software
31  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
32  *  
33  */
34 #ifndef __UCD_H__
35 #define __UCD_H__
38 /************************************************
39 ** Unicode character classification
40 ************************************************/
43 /**
44  * Enumerated Unicode general category types
45  */
46 typedef enum UniCharType
47 {
48     UNI_UNASSIGNED                =  0,  /* Cn */
49     UNI_UPPERCASE_LETTER          =  1,  /* Lu */
50     UNI_LOWERCASE_LETTER          =  2,  /* Ll */
51     UNI_TITLECASE_LETTER          =  3,  /* Lt */
52     UNI_MODIFIER_LETTER           =  4,  /* Lm */
53     UNI_OTHER_LETTER              =  5,  /* Lo */
54     UNI_NON_SPACING_MARK          =  6,  /* Mn */
55     UNI_ENCLOSING_MARK            =  7,  /* Me */
56     UNI_COMBINING_SPACING_MARK    =  8,  /* Mc */
57     UNI_DECIMAL_DIGIT_NUMBER      =  9,  /* Nd */
58     UNI_LETTER_NUMBER             = 10,  /* Nl */
59     UNI_OTHER_NUMBER              = 11,  /* No */
60     UNI_SPACE_SEPARATOR           = 12,  /* Zs */
61     UNI_LINE_SEPARATOR            = 13,  /* Zl */
62     UNI_PARAGRAPH_SEPARATOR       = 14,  /* Zp */
63     UNI_CONTROL                   = 15,  /* Cc */
64     UNI_FORMAT                    = 16,  /* Cf */
65     UNI_UNUSED_RESERVE            = 17,  /* xx */
66     UNI_PRIVATE_USE               = 18,  /* Co */
67     UNI_SURROGATE                 = 19,  /* Cs */
68     UNI_DASH_PUNCTUATION          = 20,  /* Pd */
69     UNI_START_PUNCTUATION         = 21,  /* Ps */
70     UNI_END_PUNCTUATION           = 22,  /* Pe */
71     UNI_CONNECTOR_PUNCTUATION     = 23,  /* Pc */
72     UNI_OTHER_PUNCTUATION         = 24,  /* Po */
73     UNI_MATH_SYMBOL               = 25,  /* Sm */
74     UNI_CURRENCY_SYMBOL           = 26,  /* Sc */
75     UNI_MODIFIER_SYMBOL           = 27,  /* Sk */
76     UNI_OTHER_SYMBOL              = 28,  /* So */
77     UNI_INITIAL_QUOTE_PUNCTUATION = 29,  /* Pi */
78     UNI_FINAL_QUOTE_PUNCTUATION   = 30   /* Pf */
79 } UnicodeCharType;
82 /**
83  * Get the raw table entry for this Unicode codepoint
84  * @param ch the Unicode codepoint to test
85  * @return the raw UCD property table entry 
86  */
87 unsigned int uni_code(int ch);
90 /**
91  * Get the Unicode General Category of ths character
92  * @param ch the Unicode codepoint to test
93  * @return the 'UniCharType' General Category enumeration (above)
94  */
95 unsigned int uni_type(int ch);
98 /**
99  * Test if this Unicode code point is lower case
100  * @param ch the Unicode codepoint to test
101  * @return 1 if successful, else 0
102  */
103 int uni_is_lower(int ch);
106 /**
107  * Test if this Unicode code point is upper case
108  * @param ch the Unicode codepoint to test
109  * @return 1 if successful, else 0
110  */
111 int uni_is_upper(int ch);
114 /**
115  * Test if this Unicode code point is title case
116  * @param ch the Unicode codepoint to test
117  * @return 1 if successful, else 0
118  */
119 int uni_is_title(int ch);
122 /**
123  * Test if this Unicode code point is a numeric digit
124  * @param ch the Unicode codepoint to test
125  * @return 1 if successful, else 0
126  */
127 int uni_is_digit(int ch);
130 /**
131  * Test if this Unicode code point is defined in the database
132  * @param ch the Unicode codepoint to test
133  * @return 1 if successful, else 0
134  */
135 int uni_is_defined(int ch);
137 /**
138  * Test if this Unicode code point is a letter
139  * @param ch the Unicode codepoint to test
140  * @return 1 if successful, else 0
141  */
142 int uni_is_letter(int ch);
145 /**
146  * Test if this Unicode code point is a letter or a digit
147  * @param ch the Unicode codepoint to test
148  * @return 1 if successful, else 0
149  */
150 int uni_is_letter_or_digit(int ch);
152 /**
153  * Test if this Unicode code point is considered to be a space
154  * @param ch the Unicode codepoint to test
155  * @return 1 if successful, else 0
156  */
157 int uni_is_space(int ch);
160 /************************************************
161 ** Unicode case conversion
162 ************************************************/
164 /**
165  * Convert the given codepoint to its lower case mapping.
166  * If there is none, return the codepoint.
167  * @param ch the Unicode codepoint to convert
168  * @return the converted codepoint
169  */
170 int uni_to_lower(int ch);
172 /**
173  * Convert the given codepoint to its upper case mapping.
174  * If there is none, return the codepoint.
175  * @param ch the Unicode codepoint to convert
176  * @return the converted codepoint
177  */
178 int uni_to_upper(int ch);
180 /**
181  * Convert the given codepoint to its title case mapping.
182  * If there is none, return the codepoint.
183  * @param ch the Unicode codepoint to convert
184  * @return the converted codepoint
185  */
186 int uni_to_title(int ch);
189 /************************************************
190 ** Unicode blocks
191 ************************************************/
195 /**
196  * Used to hold the information for a Unicode codepoint
197  * block
198  */
199 typedef struct
201     /**
202      * Low end of the block range
203      */
204     unsigned long low;
205     /**
206      * High end of the block range
207      */
208     unsigned long high;
209     /**
210      * Name string for the block
211      */
212     const char    *name;
213 } UcdBlockData;
216 /**
217  * Return the Unicode block (defined below) for the given
218  * codepoint.  If not found, return UCD_BLOCK_NO_BLOCK.
219  * @param ch the Unicode codepoint to search
220  * @return the block
221  */
222 int uni_block(int ch);
225 /**
226  * Return the Unicode block data for the enumerated block number.
227  * @param nr the Unicode block number
228  * @return the block data if found, else NULL
229  */
230 UcdBlockData *uni_block_data(int blockNr);
234            
235 /**
236  * The Unicode codepoint blocks as defined in Blocks.txt.
237  * Block list has 171 entries
238  */
239 typedef enum
241     /*   0, 000000 - 00007f */  UCD_BLOCK_BASIC_LATIN,
242     /*   2, 000100 - 00017f */  UCD_BLOCK_LATIN_EXTENDED_A,
243     /*   4, 000250 - 0002af */  UCD_BLOCK_IPA_EXTENSIONS,
244     /*   6, 000300 - 00036f */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS,
245     /*   8, 000400 - 0004ff */  UCD_BLOCK_CYRILLIC,
246     /*  10, 000530 - 00058f */  UCD_BLOCK_ARMENIAN,
247     /*  12, 000600 - 0006ff */  UCD_BLOCK_ARABIC,
248     /*  14, 000750 - 00077f */  UCD_BLOCK_ARABIC_SUPPLEMENT,
249     /*  16, 0007c0 - 0007ff */  UCD_BLOCK_NKO,
250     /*  18, 000980 - 0009ff */  UCD_BLOCK_BENGALI,
251     /*  20, 000a80 - 000aff */  UCD_BLOCK_GUJARATI,
252     /*  22, 000b80 - 000bff */  UCD_BLOCK_TAMIL,
253     /*  24, 000c80 - 000cff */  UCD_BLOCK_KANNADA,
254     /*  26, 000d80 - 000dff */  UCD_BLOCK_SINHALA,
255     /*  28, 000e80 - 000eff */  UCD_BLOCK_LAO,
256     /*  30, 001000 - 00109f */  UCD_BLOCK_MYANMAR,
257     /*  32, 001100 - 0011ff */  UCD_BLOCK_HANGUL_JAMO,
258     /*  34, 001380 - 00139f */  UCD_BLOCK_ETHIOPIC_SUPPLEMENT,
259     /*  36, 001400 - 00167f */  UCD_BLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
260     /*  38, 0016a0 - 0016ff */  UCD_BLOCK_RUNIC,
261     /*  40, 001720 - 00173f */  UCD_BLOCK_HANUNOO,
262     /*  42, 001760 - 00177f */  UCD_BLOCK_TAGBANWA,
263     /*  44, 001800 - 0018af */  UCD_BLOCK_MONGOLIAN,
264     /*  46, 001950 - 00197f */  UCD_BLOCK_TAI_LE,
265     /*  48, 0019e0 - 0019ff */  UCD_BLOCK_KHMER_SYMBOLS,
266     /*  50, 001b00 - 001b7f */  UCD_BLOCK_BALINESE,
267     /*  52, 001c00 - 001c4f */  UCD_BLOCK_LEPCHA,
268     /*  54, 001d00 - 001d7f */  UCD_BLOCK_PHONETIC_EXTENSIONS,
269     /*  56, 001dc0 - 001dff */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
270     /*  58, 001f00 - 001fff */  UCD_BLOCK_GREEK_EXTENDED,
271     /*  60, 002070 - 00209f */  UCD_BLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS,
272     /*  62, 0020d0 - 0020ff */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,
273     /*  64, 002150 - 00218f */  UCD_BLOCK_NUMBER_FORMS,
274     /*  66, 002200 - 0022ff */  UCD_BLOCK_MATHEMATICAL_OPERATORS,
275     /*  68, 002400 - 00243f */  UCD_BLOCK_CONTROL_PICTURES,
276     /*  70, 002460 - 0024ff */  UCD_BLOCK_ENCLOSED_ALPHANUMERICS,
277     /*  72, 002580 - 00259f */  UCD_BLOCK_BLOCK_ELEMENTS,
278     /*  74, 002600 - 0026ff */  UCD_BLOCK_MISCELLANEOUS_SYMBOLS,
279     /*  76, 0027c0 - 0027ef */  UCD_BLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
280     /*  78, 002800 - 0028ff */  UCD_BLOCK_BRAILLE_PATTERNS,
281     /*  80, 002980 - 0029ff */  UCD_BLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
282     /*  82, 002b00 - 002bff */  UCD_BLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS,
283     /*  84, 002c60 - 002c7f */  UCD_BLOCK_LATIN_EXTENDED_C,
284     /*  86, 002d00 - 002d2f */  UCD_BLOCK_GEORGIAN_SUPPLEMENT,
285     /*  88, 002d80 - 002ddf */  UCD_BLOCK_ETHIOPIC_EXTENDED,
286     /*  90, 002e00 - 002e7f */  UCD_BLOCK_SUPPLEMENTAL_PUNCTUATION,
287     /*  92, 002f00 - 002fdf */  UCD_BLOCK_KANGXI_RADICALS,
288     /*  94, 003000 - 00303f */  UCD_BLOCK_CJK_SYMBOLS_AND_PUNCTUATION,
289     /*  96, 0030a0 - 0030ff */  UCD_BLOCK_KATAKANA,
290     /*  98, 003130 - 00318f */  UCD_BLOCK_HANGUL_COMPATIBILITY_JAMO,
291     /* 100, 0031a0 - 0031bf */  UCD_BLOCK_BOPOMOFO_EXTENDED,
292     /* 102, 0031f0 - 0031ff */  UCD_BLOCK_KATAKANA_PHONETIC_EXTENSIONS,
293     /* 104, 003300 - 0033ff */  UCD_BLOCK_CJK_COMPATIBILITY,
294     /* 106, 004dc0 - 004dff */  UCD_BLOCK_YIJING_HEXAGRAM_SYMBOLS,
295     /* 108, 00a000 - 00a48f */  UCD_BLOCK_YI_SYLLABLES,
296     /* 110, 00a500 - 00a63f */  UCD_BLOCK_VAI,
297     /* 112, 00a700 - 00a71f */  UCD_BLOCK_MODIFIER_TONE_LETTERS,
298     /* 114, 00a800 - 00a82f */  UCD_BLOCK_SYLOTI_NAGRI,
299     /* 116, 00a880 - 00a8df */  UCD_BLOCK_SAURASHTRA,
300     /* 118, 00a930 - 00a95f */  UCD_BLOCK_REJANG,
301     /* 120, 00ac00 - 00d7af */  UCD_BLOCK_HANGUL_SYLLABLES,
302     /* 122, 00db80 - 00dbff */  UCD_BLOCK_HIGH_PRIVATE_USE_SURROGATES,
303     /* 124, 00e000 - 00f8ff */  UCD_BLOCK_PRIVATE_USE_AREA,
304     /* 126, 00fb00 - 00fb4f */  UCD_BLOCK_ALPHABETIC_PRESENTATION_FORMS,
305     /* 128, 00fe00 - 00fe0f */  UCD_BLOCK_VARIATION_SELECTORS,
306     /* 130, 00fe20 - 00fe2f */  UCD_BLOCK_COMBINING_HALF_MARKS,
307     /* 132, 00fe50 - 00fe6f */  UCD_BLOCK_SMALL_FORM_VARIANTS,
308     /* 134, 00ff00 - 00ffef */  UCD_BLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS,
309     /* 136, 010000 - 01007f */  UCD_BLOCK_LINEAR_B_SYLLABARY,
310     /* 138, 010100 - 01013f */  UCD_BLOCK_AEGEAN_NUMBERS,
311     /* 140, 010190 - 0101cf */  UCD_BLOCK_ANCIENT_SYMBOLS,
312     /* 142, 010280 - 01029f */  UCD_BLOCK_LYCIAN,
313     /* 144, 010300 - 01032f */  UCD_BLOCK_OLD_ITALIC,
314     /* 146, 010380 - 01039f */  UCD_BLOCK_UGARITIC,
315     /* 148, 010400 - 01044f */  UCD_BLOCK_DESERET,
316     /* 150, 010480 - 0104af */  UCD_BLOCK_OSMANYA,
317     /* 152, 010900 - 01091f */  UCD_BLOCK_PHOENICIAN,
318     /* 154, 010a00 - 010a5f */  UCD_BLOCK_KHAROSHTHI,
319     /* 156, 012400 - 01247f */  UCD_BLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION,
320     /* 158, 01d100 - 01d1ff */  UCD_BLOCK_MUSICAL_SYMBOLS,
321     /* 160, 01d300 - 01d35f */  UCD_BLOCK_TAI_XUAN_JING_SYMBOLS,
322     /* 162, 01d400 - 01d7ff */  UCD_BLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
323     /* 164, 01f030 - 01f09f */  UCD_BLOCK_DOMINO_TILES,
324     /* 166, 02f800 - 02fa1f */  UCD_BLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
325     /* 168, 0e0100 - 0e01ef */  UCD_BLOCK_VARIATION_SELECTORS_SUPPLEMENT,
326     /* 170, 100000 - 10ffff */  UCD_BLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B,
327     /* 171, 000000 - 10ffff */  UCD_BLOCK_NO_BLOCK
328 } UnicodeBlocks;
331 #endif /* __UCD_H__ */