X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=src%2Fdom%2Fucd.cpp;h=3747334d2b0861f682993c12dbfe47714520e02d;hb=aa42d0d7b4bbf2f20b5f3329d00c4a80ad1598ec;hp=862c6de2882c9fb19908755fdc3d579389ce5680;hpb=6a0dbead204c95e6f0a9df7f8f0b4c094b67f722;p=inkscape.git diff --git a/src/dom/ucd.cpp b/src/dom/ucd.cpp index 862c6de28..3747334d2 100644 --- a/src/dom/ucd.cpp +++ b/src/dom/ucd.cpp @@ -1,11 +1,11 @@ /* - * Generated by UcdReader at:Tue May 06 16:19:25 CDT 2008 + * Generated by UcdReader at:Mon Jun 30 13:52:41 GMT-06:00 2008 * block table size:2 (4 bytes) * plane table size:6 (64 bytes) * * * This table reflects information gleaned from the Unicode Character - * Database, version 5.1.0. All codepoints are included. + * Database. All codepoints are included. * * You might notice that this table is larger than some other implementations. * This is because we included the entire UCD set of codepoints, rather than @@ -71,6 +71,189 @@ +/** + * Look for comments in ucd.h + */ +#include "ucd.h" + + +static UcdBlockData ucd_blocks[] = +{ + { 0x000000, 0x00007f, "Basic Latin" }, + { 0x000080, 0x0000ff, "Latin-1 Supplement" }, + { 0x000100, 0x00017f, "Latin Extended-A" }, + { 0x000180, 0x00024f, "Latin Extended-B" }, + { 0x000250, 0x0002af, "IPA Extensions" }, + { 0x0002b0, 0x0002ff, "Spacing Modifier Letters" }, + { 0x000300, 0x00036f, "Combining Diacritical Marks" }, + { 0x000370, 0x0003ff, "Greek and Coptic" }, + { 0x000400, 0x0004ff, "Cyrillic" }, + { 0x000500, 0x00052f, "Cyrillic Supplement" }, + { 0x000530, 0x00058f, "Armenian" }, + { 0x000590, 0x0005ff, "Hebrew" }, + { 0x000600, 0x0006ff, "Arabic" }, + { 0x000700, 0x00074f, "Syriac" }, + { 0x000750, 0x00077f, "Arabic Supplement" }, + { 0x000780, 0x0007bf, "Thaana" }, + { 0x0007c0, 0x0007ff, "NKo" }, + { 0x000900, 0x00097f, "Devanagari" }, + { 0x000980, 0x0009ff, "Bengali" }, + { 0x000a00, 0x000a7f, "Gurmukhi" }, + { 0x000a80, 0x000aff, "Gujarati" }, + { 0x000b00, 0x000b7f, "Oriya" }, + { 0x000b80, 0x000bff, "Tamil" }, + { 0x000c00, 0x000c7f, "Telugu" }, + { 0x000c80, 0x000cff, "Kannada" }, + { 0x000d00, 0x000d7f, "Malayalam" }, + { 0x000d80, 0x000dff, "Sinhala" }, + { 0x000e00, 0x000e7f, "Thai" }, + { 0x000e80, 0x000eff, "Lao" }, + { 0x000f00, 0x000fff, "Tibetan" }, + { 0x001000, 0x00109f, "Myanmar" }, + { 0x0010a0, 0x0010ff, "Georgian" }, + { 0x001100, 0x0011ff, "Hangul Jamo" }, + { 0x001200, 0x00137f, "Ethiopic" }, + { 0x001380, 0x00139f, "Ethiopic Supplement" }, + { 0x0013a0, 0x0013ff, "Cherokee" }, + { 0x001400, 0x00167f, "Unified Canadian Aboriginal Syllabics" }, + { 0x001680, 0x00169f, "Ogham" }, + { 0x0016a0, 0x0016ff, "Runic" }, + { 0x001700, 0x00171f, "Tagalog" }, + { 0x001720, 0x00173f, "Hanunoo" }, + { 0x001740, 0x00175f, "Buhid" }, + { 0x001760, 0x00177f, "Tagbanwa" }, + { 0x001780, 0x0017ff, "Khmer" }, + { 0x001800, 0x0018af, "Mongolian" }, + { 0x001900, 0x00194f, "Limbu" }, + { 0x001950, 0x00197f, "Tai Le" }, + { 0x001980, 0x0019df, "New Tai Lue" }, + { 0x0019e0, 0x0019ff, "Khmer Symbols" }, + { 0x001a00, 0x001a1f, "Buginese" }, + { 0x001b00, 0x001b7f, "Balinese" }, + { 0x001b80, 0x001bbf, "Sundanese" }, + { 0x001c00, 0x001c4f, "Lepcha" }, + { 0x001c50, 0x001c7f, "Ol Chiki" }, + { 0x001d00, 0x001d7f, "Phonetic Extensions" }, + { 0x001d80, 0x001dbf, "Phonetic Extensions Supplement" }, + { 0x001dc0, 0x001dff, "Combining Diacritical Marks Supplement" }, + { 0x001e00, 0x001eff, "Latin Extended Additional" }, + { 0x001f00, 0x001fff, "Greek Extended" }, + { 0x002000, 0x00206f, "General Punctuation" }, + { 0x002070, 0x00209f, "Superscripts and Subscripts" }, + { 0x0020a0, 0x0020cf, "Currency Symbols" }, + { 0x0020d0, 0x0020ff, "Combining Diacritical Marks for Symbols" }, + { 0x002100, 0x00214f, "Letterlike Symbols" }, + { 0x002150, 0x00218f, "Number Forms" }, + { 0x002190, 0x0021ff, "Arrows" }, + { 0x002200, 0x0022ff, "Mathematical Operators" }, + { 0x002300, 0x0023ff, "Miscellaneous Technical" }, + { 0x002400, 0x00243f, "Control Pictures" }, + { 0x002440, 0x00245f, "Optical Character Recognition" }, + { 0x002460, 0x0024ff, "Enclosed Alphanumerics" }, + { 0x002500, 0x00257f, "Box Drawing" }, + { 0x002580, 0x00259f, "Block Elements" }, + { 0x0025a0, 0x0025ff, "Geometric Shapes" }, + { 0x002600, 0x0026ff, "Miscellaneous Symbols" }, + { 0x002700, 0x0027bf, "Dingbats" }, + { 0x0027c0, 0x0027ef, "Miscellaneous Mathematical Symbols-A" }, + { 0x0027f0, 0x0027ff, "Supplemental Arrows-A" }, + { 0x002800, 0x0028ff, "Braille Patterns" }, + { 0x002900, 0x00297f, "Supplemental Arrows-B" }, + { 0x002980, 0x0029ff, "Miscellaneous Mathematical Symbols-B" }, + { 0x002a00, 0x002aff, "Supplemental Mathematical Operators" }, + { 0x002b00, 0x002bff, "Miscellaneous Symbols and Arrows" }, + { 0x002c00, 0x002c5f, "Glagolitic" }, + { 0x002c60, 0x002c7f, "Latin Extended-C" }, + { 0x002c80, 0x002cff, "Coptic" }, + { 0x002d00, 0x002d2f, "Georgian Supplement" }, + { 0x002d30, 0x002d7f, "Tifinagh" }, + { 0x002d80, 0x002ddf, "Ethiopic Extended" }, + { 0x002de0, 0x002dff, "Cyrillic Extended-A" }, + { 0x002e00, 0x002e7f, "Supplemental Punctuation" }, + { 0x002e80, 0x002eff, "CJK Radicals Supplement" }, + { 0x002f00, 0x002fdf, "Kangxi Radicals" }, + { 0x002ff0, 0x002fff, "Ideographic Description Characters" }, + { 0x003000, 0x00303f, "CJK Symbols and Punctuation" }, + { 0x003040, 0x00309f, "Hiragana" }, + { 0x0030a0, 0x0030ff, "Katakana" }, + { 0x003100, 0x00312f, "Bopomofo" }, + { 0x003130, 0x00318f, "Hangul Compatibility Jamo" }, + { 0x003190, 0x00319f, "Kanbun" }, + { 0x0031a0, 0x0031bf, "Bopomofo Extended" }, + { 0x0031c0, 0x0031ef, "CJK Strokes" }, + { 0x0031f0, 0x0031ff, "Katakana Phonetic Extensions" }, + { 0x003200, 0x0032ff, "Enclosed CJK Letters and Months" }, + { 0x003300, 0x0033ff, "CJK Compatibility" }, + { 0x003400, 0x004dbf, "CJK Unified Ideographs Extension A" }, + { 0x004dc0, 0x004dff, "Yijing Hexagram Symbols" }, + { 0x004e00, 0x009fff, "CJK Unified Ideographs" }, + { 0x00a000, 0x00a48f, "Yi Syllables" }, + { 0x00a490, 0x00a4cf, "Yi Radicals" }, + { 0x00a500, 0x00a63f, "Vai" }, + { 0x00a640, 0x00a69f, "Cyrillic Extended-B" }, + { 0x00a700, 0x00a71f, "Modifier Tone Letters" }, + { 0x00a720, 0x00a7ff, "Latin Extended-D" }, + { 0x00a800, 0x00a82f, "Syloti Nagri" }, + { 0x00a840, 0x00a87f, "Phags-pa" }, + { 0x00a880, 0x00a8df, "Saurashtra" }, + { 0x00a900, 0x00a92f, "Kayah Li" }, + { 0x00a930, 0x00a95f, "Rejang" }, + { 0x00aa00, 0x00aa5f, "Cham" }, + { 0x00ac00, 0x00d7af, "Hangul Syllables" }, + { 0x00d800, 0x00db7f, "High Surrogates" }, + { 0x00db80, 0x00dbff, "High Private Use Surrogates" }, + { 0x00dc00, 0x00dfff, "Low Surrogates" }, + { 0x00e000, 0x00f8ff, "Private Use Area" }, + { 0x00f900, 0x00faff, "CJK Compatibility Ideographs" }, + { 0x00fb00, 0x00fb4f, "Alphabetic Presentation Forms" }, + { 0x00fb50, 0x00fdff, "Arabic Presentation Forms-A" }, + { 0x00fe00, 0x00fe0f, "Variation Selectors" }, + { 0x00fe10, 0x00fe1f, "Vertical Forms" }, + { 0x00fe20, 0x00fe2f, "Combining Half Marks" }, + { 0x00fe30, 0x00fe4f, "CJK Compatibility Forms" }, + { 0x00fe50, 0x00fe6f, "Small Form Variants" }, + { 0x00fe70, 0x00feff, "Arabic Presentation Forms-B" }, + { 0x00ff00, 0x00ffef, "Halfwidth and Fullwidth Forms" }, + { 0x00fff0, 0x00ffff, "Specials" }, + { 0x010000, 0x01007f, "Linear B Syllabary" }, + { 0x010080, 0x0100ff, "Linear B Ideograms" }, + { 0x010100, 0x01013f, "Aegean Numbers" }, + { 0x010140, 0x01018f, "Ancient Greek Numbers" }, + { 0x010190, 0x0101cf, "Ancient Symbols" }, + { 0x0101d0, 0x0101ff, "Phaistos Disc" }, + { 0x010280, 0x01029f, "Lycian" }, + { 0x0102a0, 0x0102df, "Carian" }, + { 0x010300, 0x01032f, "Old Italic" }, + { 0x010330, 0x01034f, "Gothic" }, + { 0x010380, 0x01039f, "Ugaritic" }, + { 0x0103a0, 0x0103df, "Old Persian" }, + { 0x010400, 0x01044f, "Deseret" }, + { 0x010450, 0x01047f, "Shavian" }, + { 0x010480, 0x0104af, "Osmanya" }, + { 0x010800, 0x01083f, "Cypriot Syllabary" }, + { 0x010900, 0x01091f, "Phoenician" }, + { 0x010920, 0x01093f, "Lydian" }, + { 0x010a00, 0x010a5f, "Kharoshthi" }, + { 0x012000, 0x0123ff, "Cuneiform" }, + { 0x012400, 0x01247f, "Cuneiform Numbers and Punctuation" }, + { 0x01d000, 0x01d0ff, "Byzantine Musical Symbols" }, + { 0x01d100, 0x01d1ff, "Musical Symbols" }, + { 0x01d200, 0x01d24f, "Ancient Greek Musical Notation" }, + { 0x01d300, 0x01d35f, "Tai Xuan Jing Symbols" }, + { 0x01d360, 0x01d37f, "Counting Rod Numerals" }, + { 0x01d400, 0x01d7ff, "Mathematical Alphanumeric Symbols" }, + { 0x01f000, 0x01f02f, "Mahjong Tiles" }, + { 0x01f030, 0x01f09f, "Domino Tiles" }, + { 0x020000, 0x02a6df, "CJK Unified Ideographs Extension B" }, + { 0x02f800, 0x02fa1f, "CJK Compatibility Ideographs Supplement" }, + { 0x0e0000, 0x0e007f, "Tags" }, + { 0x0e0100, 0x0e01ef, "Variation Selectors Supplement" }, + { 0x0f0000, 0x0fffff, "Supplementary Private Use Area-A" }, + { 0x100000, 0x10ffff, "Supplementary Private Use Area-B" }, + { 0x000000, 0x10FFFF, (char*)0 } /* No_Block */ +}; + + /** * Plane table has 4352 entries */ @@ -1497,11 +1680,11 @@ static unsigned int prop[] = 0x00067001, 0x00057002, 0x00007002, 0x00007005, /* 268 */ 0x00067001, 0x00057002, 0x00007002, 0x07217002, /* 272 */ 0x00007005, 0x00007005, 0x00007005, 0x00007005, /* 276 */ - 0x000a7001, 0x0007f003, 0x00097002, 0x000a7001, /* 280 */ - 0x0007f003, 0x00097002, 0x000a7001, 0x0007f003, /* 284 */ - 0x00097002, 0x00067001, 0x00057002, 0x00067001, /* 288 */ + 0x000af001, 0x0007f003, 0x0009f002, 0x000af001, /* 280 */ + 0x0007f003, 0x0009f002, 0x000af001, 0x0007f003, /* 284 */ + 0x0009f002, 0x00067001, 0x00057002, 0x00067001, /* 288 */ 0x00057002, 0x013d7002, 0x00067001, 0x00057002, /* 292 */ - 0x07fd7002, 0x000a7001, 0x0007f003, 0x00097002, /* 296 */ + 0x07fd7002, 0x000af001, 0x0007f003, 0x0009f002, /* 296 */ 0x00067001, 0x00057002, 0x067e7001, 0x07227001, /* 300 */ 0x05fa7001, 0x00007002, 0x00067001, 0x00057002, /* 304 */ 0x00007002, 0x00007002, 0x00007002, 0x00007002, /* 308 */ @@ -2082,11 +2265,17 @@ static unsigned int prop[] = #define UNI_CODE(ch) (prop[block[plane[(ch>>8)&8191]+((ch>>2)&63)]+(ch&3)]) +/** + * Get type part of code + + */ +#define UNI_CODE_TO_TYPE(ch) (ch & 0x1f) + /** * Fetch the category type */ -#define UNI_TYPE(ch) (UNI_CODE(ch) & 0x1f) +#define UNI_TYPE(ch) (UNI_CODE_TO_TYPE(UNI_CODE(ch))) /** * Fetch the digit offset @@ -2238,10 +2427,6 @@ static CaseMapEntry caseMap[] = -/** - * Look for comments in ucd.h - */ -#include "ucd.h" @@ -2282,28 +2467,28 @@ int uni_is_defined(int ch) int uni_is_letter(int ch) { - int c = UNI_CODE(ch); + int c = UNI_TYPE(ch); return (c>=UNI_UPPERCASE_LETTER && c<=UNI_OTHER_LETTER); } int uni_is_letter_or_digit(int ch) { - int c = UNI_CODE(ch); + int c = UNI_TYPE(ch); return ((c>=UNI_UPPERCASE_LETTER && c<=UNI_OTHER_LETTER) || c==UNI_DECIMAL_DIGIT_NUMBER); } int uni_is_space(int ch) { - int c = UNI_CODE(ch); + int c = UNI_TYPE(ch); return (c==UNI_SPACE_SEPARATOR || c==UNI_LINE_SEPARATOR - || c==UNI_PARAGRAPH_SEPARATOR); + || c==UNI_PARAGRAPH_SEPARATOR || (ch>= 0x09 && ch <= 0x0d)); } int uni_to_lower(int ch) { int c = UNI_CODE(ch); - if (c == UNI_LOWERCASE_LETTER) + if (UNI_CODE_TO_TYPE(c) == UNI_LOWERCASE_LETTER) return ch; ch -= (c>>18) & 0x1ff; return ch; @@ -2312,7 +2497,7 @@ int uni_to_lower(int ch) int uni_to_upper(int ch) { int c = UNI_CODE(ch); - if (c == UNI_UPPERCASE_LETTER) + if (UNI_CODE_TO_TYPE(c) == UNI_UPPERCASE_LETTER) return ch; ch += (c>>18) & 0x1ff; return ch; @@ -2321,12 +2506,30 @@ int uni_to_upper(int ch) int uni_to_title(int ch) { int c = UNI_CODE(ch); - if (c == UNI_TITLECASE_LETTER) + if (UNI_CODE_TO_TYPE(c) == UNI_TITLECASE_LETTER) return ch; ch += (c>>18) & 0x1ff; return ch; } +int uni_block(int ch) +{ + int ret; + UcdBlockData *entry; + for (entry = ucd_blocks, ret=0 ; entry->name ; entry++, ret++) + if (ch >= entry->low && ch <= entry->high) + return ret; + return UCD_BLOCK_NO_BLOCK; +} + +UcdBlockData *uni_block_data(int blockNr) +{ + if (blockNr >= 0 && blockNr < UCD_BLOCK_NO_BLOCK) + return &(ucd_blocks[blockNr]); + else + return (UcdBlockData *)0; +} +