Code

added unicode block (codepoint ranges) lookups
authorishmal <ishmal@users.sourceforge.net>
Tue, 24 Jun 2008 18:09:01 +0000 (18:09 +0000)
committerishmal <ishmal@users.sourceforge.net>
Tue, 24 Jun 2008 18:09:01 +0000 (18:09 +0000)
src/dom/ucd.cpp
src/dom/ucd.h

index 862c6de2882c9fb19908755fdc3d579389ce5680..602c716306f177af1e2f351eaf85b9453edfbdde 100644 (file)
@@ -1,11 +1,11 @@
 /*
- * Generated by UcdReader at:Tue May 06 16:19:25 CDT 2008
+ * Generated by UcdReader at:Tue Jun 24 11:58:24 GMT-06:00 2008
  * block table size:2 (4 bytes)
  * plane table size:6 (64 bytes)
  *
  *
  *  This table reflects information gleaned from the Unicode Character
- *  Database, version 5.1.0.  All codepoints are included.
+ *  Database.  All codepoints are included.
  *
  *  You might notice that this table is larger than some other implementations.
  *  This is because we included the entire UCD set of codepoints, rather than
 
 
 
+/**
+ * Look for comments in ucd.h
+ */
+#include "ucd.h"
+
+
+static UcdBlockData ucd_blocks[] =
+{
+    { 0x000000, 0x00007f, "Basic Latin" },
+    { 0x000080, 0x0000ff, "Latin-1 Supplement" },
+    { 0x000100, 0x00017f, "Latin Extended-A" },
+    { 0x000180, 0x00024f, "Latin Extended-B" },
+    { 0x000250, 0x0002af, "IPA Extensions" },
+    { 0x0002b0, 0x0002ff, "Spacing Modifier Letters" },
+    { 0x000300, 0x00036f, "Combining Diacritical Marks" },
+    { 0x000370, 0x0003ff, "Greek and Coptic" },
+    { 0x000400, 0x0004ff, "Cyrillic" },
+    { 0x000500, 0x00052f, "Cyrillic Supplement" },
+    { 0x000530, 0x00058f, "Armenian" },
+    { 0x000590, 0x0005ff, "Hebrew" },
+    { 0x000600, 0x0006ff, "Arabic" },
+    { 0x000700, 0x00074f, "Syriac" },
+    { 0x000750, 0x00077f, "Arabic Supplement" },
+    { 0x000780, 0x0007bf, "Thaana" },
+    { 0x0007c0, 0x0007ff, "NKo" },
+    { 0x000900, 0x00097f, "Devanagari" },
+    { 0x000980, 0x0009ff, "Bengali" },
+    { 0x000a00, 0x000a7f, "Gurmukhi" },
+    { 0x000a80, 0x000aff, "Gujarati" },
+    { 0x000b00, 0x000b7f, "Oriya" },
+    { 0x000b80, 0x000bff, "Tamil" },
+    { 0x000c00, 0x000c7f, "Telugu" },
+    { 0x000c80, 0x000cff, "Kannada" },
+    { 0x000d00, 0x000d7f, "Malayalam" },
+    { 0x000d80, 0x000dff, "Sinhala" },
+    { 0x000e00, 0x000e7f, "Thai" },
+    { 0x000e80, 0x000eff, "Lao" },
+    { 0x000f00, 0x000fff, "Tibetan" },
+    { 0x001000, 0x00109f, "Myanmar" },
+    { 0x0010a0, 0x0010ff, "Georgian" },
+    { 0x001100, 0x0011ff, "Hangul Jamo" },
+    { 0x001200, 0x00137f, "Ethiopic" },
+    { 0x001380, 0x00139f, "Ethiopic Supplement" },
+    { 0x0013a0, 0x0013ff, "Cherokee" },
+    { 0x001400, 0x00167f, "Unified Canadian Aboriginal Syllabics" },
+    { 0x001680, 0x00169f, "Ogham" },
+    { 0x0016a0, 0x0016ff, "Runic" },
+    { 0x001700, 0x00171f, "Tagalog" },
+    { 0x001720, 0x00173f, "Hanunoo" },
+    { 0x001740, 0x00175f, "Buhid" },
+    { 0x001760, 0x00177f, "Tagbanwa" },
+    { 0x001780, 0x0017ff, "Khmer" },
+    { 0x001800, 0x0018af, "Mongolian" },
+    { 0x001900, 0x00194f, "Limbu" },
+    { 0x001950, 0x00197f, "Tai Le" },
+    { 0x001980, 0x0019df, "New Tai Lue" },
+    { 0x0019e0, 0x0019ff, "Khmer Symbols" },
+    { 0x001a00, 0x001a1f, "Buginese" },
+    { 0x001b00, 0x001b7f, "Balinese" },
+    { 0x001b80, 0x001bbf, "Sundanese" },
+    { 0x001c00, 0x001c4f, "Lepcha" },
+    { 0x001c50, 0x001c7f, "Ol Chiki" },
+    { 0x001d00, 0x001d7f, "Phonetic Extensions" },
+    { 0x001d80, 0x001dbf, "Phonetic Extensions Supplement" },
+    { 0x001dc0, 0x001dff, "Combining Diacritical Marks Supplement" },
+    { 0x001e00, 0x001eff, "Latin Extended Additional" },
+    { 0x001f00, 0x001fff, "Greek Extended" },
+    { 0x002000, 0x00206f, "General Punctuation" },
+    { 0x002070, 0x00209f, "Superscripts and Subscripts" },
+    { 0x0020a0, 0x0020cf, "Currency Symbols" },
+    { 0x0020d0, 0x0020ff, "Combining Diacritical Marks for Symbols" },
+    { 0x002100, 0x00214f, "Letterlike Symbols" },
+    { 0x002150, 0x00218f, "Number Forms" },
+    { 0x002190, 0x0021ff, "Arrows" },
+    { 0x002200, 0x0022ff, "Mathematical Operators" },
+    { 0x002300, 0x0023ff, "Miscellaneous Technical" },
+    { 0x002400, 0x00243f, "Control Pictures" },
+    { 0x002440, 0x00245f, "Optical Character Recognition" },
+    { 0x002460, 0x0024ff, "Enclosed Alphanumerics" },
+    { 0x002500, 0x00257f, "Box Drawing" },
+    { 0x002580, 0x00259f, "Block Elements" },
+    { 0x0025a0, 0x0025ff, "Geometric Shapes" },
+    { 0x002600, 0x0026ff, "Miscellaneous Symbols" },
+    { 0x002700, 0x0027bf, "Dingbats" },
+    { 0x0027c0, 0x0027ef, "Miscellaneous Mathematical Symbols-A" },
+    { 0x0027f0, 0x0027ff, "Supplemental Arrows-A" },
+    { 0x002800, 0x0028ff, "Braille Patterns" },
+    { 0x002900, 0x00297f, "Supplemental Arrows-B" },
+    { 0x002980, 0x0029ff, "Miscellaneous Mathematical Symbols-B" },
+    { 0x002a00, 0x002aff, "Supplemental Mathematical Operators" },
+    { 0x002b00, 0x002bff, "Miscellaneous Symbols and Arrows" },
+    { 0x002c00, 0x002c5f, "Glagolitic" },
+    { 0x002c60, 0x002c7f, "Latin Extended-C" },
+    { 0x002c80, 0x002cff, "Coptic" },
+    { 0x002d00, 0x002d2f, "Georgian Supplement" },
+    { 0x002d30, 0x002d7f, "Tifinagh" },
+    { 0x002d80, 0x002ddf, "Ethiopic Extended" },
+    { 0x002de0, 0x002dff, "Cyrillic Extended-A" },
+    { 0x002e00, 0x002e7f, "Supplemental Punctuation" },
+    { 0x002e80, 0x002eff, "CJK Radicals Supplement" },
+    { 0x002f00, 0x002fdf, "Kangxi Radicals" },
+    { 0x002ff0, 0x002fff, "Ideographic Description Characters" },
+    { 0x003000, 0x00303f, "CJK Symbols and Punctuation" },
+    { 0x003040, 0x00309f, "Hiragana" },
+    { 0x0030a0, 0x0030ff, "Katakana" },
+    { 0x003100, 0x00312f, "Bopomofo" },
+    { 0x003130, 0x00318f, "Hangul Compatibility Jamo" },
+    { 0x003190, 0x00319f, "Kanbun" },
+    { 0x0031a0, 0x0031bf, "Bopomofo Extended" },
+    { 0x0031c0, 0x0031ef, "CJK Strokes" },
+    { 0x0031f0, 0x0031ff, "Katakana Phonetic Extensions" },
+    { 0x003200, 0x0032ff, "Enclosed CJK Letters and Months" },
+    { 0x003300, 0x0033ff, "CJK Compatibility" },
+    { 0x003400, 0x004dbf, "CJK Unified Ideographs Extension A" },
+    { 0x004dc0, 0x004dff, "Yijing Hexagram Symbols" },
+    { 0x004e00, 0x009fff, "CJK Unified Ideographs" },
+    { 0x00a000, 0x00a48f, "Yi Syllables" },
+    { 0x00a490, 0x00a4cf, "Yi Radicals" },
+    { 0x00a500, 0x00a63f, "Vai" },
+    { 0x00a640, 0x00a69f, "Cyrillic Extended-B" },
+    { 0x00a700, 0x00a71f, "Modifier Tone Letters" },
+    { 0x00a720, 0x00a7ff, "Latin Extended-D" },
+    { 0x00a800, 0x00a82f, "Syloti Nagri" },
+    { 0x00a840, 0x00a87f, "Phags-pa" },
+    { 0x00a880, 0x00a8df, "Saurashtra" },
+    { 0x00a900, 0x00a92f, "Kayah Li" },
+    { 0x00a930, 0x00a95f, "Rejang" },
+    { 0x00aa00, 0x00aa5f, "Cham" },
+    { 0x00ac00, 0x00d7af, "Hangul Syllables" },
+    { 0x00d800, 0x00db7f, "High Surrogates" },
+    { 0x00db80, 0x00dbff, "High Private Use Surrogates" },
+    { 0x00dc00, 0x00dfff, "Low Surrogates" },
+    { 0x00e000, 0x00f8ff, "Private Use Area" },
+    { 0x00f900, 0x00faff, "CJK Compatibility Ideographs" },
+    { 0x00fb00, 0x00fb4f, "Alphabetic Presentation Forms" },
+    { 0x00fb50, 0x00fdff, "Arabic Presentation Forms-A" },
+    { 0x00fe00, 0x00fe0f, "Variation Selectors" },
+    { 0x00fe10, 0x00fe1f, "Vertical Forms" },
+    { 0x00fe20, 0x00fe2f, "Combining Half Marks" },
+    { 0x00fe30, 0x00fe4f, "CJK Compatibility Forms" },
+    { 0x00fe50, 0x00fe6f, "Small Form Variants" },
+    { 0x00fe70, 0x00feff, "Arabic Presentation Forms-B" },
+    { 0x00ff00, 0x00ffef, "Halfwidth and Fullwidth Forms" },
+    { 0x00fff0, 0x00ffff, "Specials" },
+    { 0x010000, 0x01007f, "Linear B Syllabary" },
+    { 0x010080, 0x0100ff, "Linear B Ideograms" },
+    { 0x010100, 0x01013f, "Aegean Numbers" },
+    { 0x010140, 0x01018f, "Ancient Greek Numbers" },
+    { 0x010190, 0x0101cf, "Ancient Symbols" },
+    { 0x0101d0, 0x0101ff, "Phaistos Disc" },
+    { 0x010280, 0x01029f, "Lycian" },
+    { 0x0102a0, 0x0102df, "Carian" },
+    { 0x010300, 0x01032f, "Old Italic" },
+    { 0x010330, 0x01034f, "Gothic" },
+    { 0x010380, 0x01039f, "Ugaritic" },
+    { 0x0103a0, 0x0103df, "Old Persian" },
+    { 0x010400, 0x01044f, "Deseret" },
+    { 0x010450, 0x01047f, "Shavian" },
+    { 0x010480, 0x0104af, "Osmanya" },
+    { 0x010800, 0x01083f, "Cypriot Syllabary" },
+    { 0x010900, 0x01091f, "Phoenician" },
+    { 0x010920, 0x01093f, "Lydian" },
+    { 0x010a00, 0x010a5f, "Kharoshthi" },
+    { 0x012000, 0x0123ff, "Cuneiform" },
+    { 0x012400, 0x01247f, "Cuneiform Numbers and Punctuation" },
+    { 0x01d000, 0x01d0ff, "Byzantine Musical Symbols" },
+    { 0x01d100, 0x01d1ff, "Musical Symbols" },
+    { 0x01d200, 0x01d24f, "Ancient Greek Musical Notation" },
+    { 0x01d300, 0x01d35f, "Tai Xuan Jing Symbols" },
+    { 0x01d360, 0x01d37f, "Counting Rod Numerals" },
+    { 0x01d400, 0x01d7ff, "Mathematical Alphanumeric Symbols" },
+    { 0x01f000, 0x01f02f, "Mahjong Tiles" },
+    { 0x01f030, 0x01f09f, "Domino Tiles" },
+    { 0x020000, 0x02a6df, "CJK Unified Ideographs Extension B" },
+    { 0x02f800, 0x02fa1f, "CJK Compatibility Ideographs Supplement" },
+    { 0x0e0000, 0x0e007f, "Tags" },
+    { 0x0e0100, 0x0e01ef, "Variation Selectors Supplement" },
+    { 0x0f0000, 0x0fffff, "Supplementary Private Use Area-A" },
+    { 0x100000, 0x10ffff, "Supplementary Private Use Area-B" },
+    { 0x000000, 0x10FFFF, (char*)0 } /* No_Block */
+};
+
+
 /**
  * Plane table has 4352 entries
  */
@@ -1497,11 +1680,11 @@ static unsigned int prop[] =
   0x00067001,  0x00057002,  0x00007002,  0x00007005, /* 268 */
   0x00067001,  0x00057002,  0x00007002,  0x07217002, /* 272 */
   0x00007005,  0x00007005,  0x00007005,  0x00007005, /* 276 */
-  0x000a7001,  0x0007f003,  0x00097002,  0x000a7001, /* 280 */
-  0x0007f003,  0x00097002,  0x000a7001,  0x0007f003, /* 284 */
-  0x00097002,  0x00067001,  0x00057002,  0x00067001, /* 288 */
+  0x000af001,  0x0007f003,  0x0009f002,  0x000af001, /* 280 */
+  0x0007f003,  0x0009f002,  0x000af001,  0x0007f003, /* 284 */
+  0x0009f002,  0x00067001,  0x00057002,  0x00067001, /* 288 */
   0x00057002,  0x013d7002,  0x00067001,  0x00057002, /* 292 */
-  0x07fd7002,  0x000a7001,  0x0007f003,  0x00097002, /* 296 */
+  0x07fd7002,  0x000af001,  0x0007f003,  0x0009f002, /* 296 */
   0x00067001,  0x00057002,  0x067e7001,  0x07227001, /* 300 */
   0x05fa7001,  0x00007002,  0x00067001,  0x00057002, /* 304 */
   0x00007002,  0x00007002,  0x00007002,  0x00007002, /* 308 */
@@ -2238,10 +2421,6 @@ static CaseMapEntry caseMap[] =
 
 
 
-/**
- * Look for comments in ucd.h
- */
-#include "ucd.h"
 
 
 
@@ -2327,6 +2506,24 @@ int uni_to_title(int ch)
     return ch;
 }
 
+int uni_block(int ch)
+{
+    int ret;
+    UcdBlockData *entry;
+    for (entry = ucd_blocks, ret=0 ; entry->name ; entry++, ret++)
+        if (ch >= entry->low && ch <= entry->high)
+            return ret;
+    return UCD_BLOCK_NO_BLOCK;
+}
+
+UcdBlockData *uni_block_data(int blockNr)
+{
+    if (blockNr >= 0 && blockNr < UCD_BLOCK_NO_BLOCK)
+        return &(ucd_blocks[blockNr]);
+    else
+        return (UcdBlockData *)0;
+}
+
 
 
 
index 92c0a7525c0055f31560ad3f20608cef8d09e117..d8b301d476d2961314430ffaf6f1b00f673b877b 100644 (file)
@@ -1,16 +1,7 @@
 /**
  *
- * Phoebe DOM Implementation.
+ * Inkscape Unicode Character Database (UCD) 5.1.0. Utility 
  *
- * This is a C++ approximation of the W3C DOM model, which follows
- * fairly closely the specifications in the various .idl files, copies of
- * which are provided for reference.  Most important is this one:
- *
- * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
- * 
- * More thorough explanations of the various classes and their algorithms
- * can be found there.
- *     
  *
  * Authors:
  *   Bob Jamison
@@ -20,7 +11,7 @@
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Lesser General Public
  *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
+ *  version 3 of the License, or (at your option) any later version.
  *
  *  This library is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  You should have received a copy of the GNU Lesser General Public
  *  License along with this library; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- *  
- * =======================================================================
- * NOTES:   
- * 
- * This code is from another project, also by Bob Jamison, so no permissions
- * were required.  :-)   It has been tested on 32 and 64 bits. 
- *  
- * This table contains the codepoints from the Unicode Character Database (UCD),
- * version 5.1.0. 
  *
  */
 #ifndef __UCD_H__
 #define __UCD_H__
 
 
+/************************************************
+** Unicode character classification
+************************************************/
+
 
 /**
  * Enumerated Unicode general category types
@@ -82,7 +68,7 @@ typedef enum UniCharType
     UNI_OTHER_SYMBOL              = 28,  /* So */
     UNI_INITIAL_QUOTE_PUNCTUATION = 29,  /* Pi */
     UNI_FINAL_QUOTE_PUNCTUATION   = 30   /* Pf */
-} UniCharType;
+} UnicodeCharType;
 
 
 /**
@@ -162,6 +148,11 @@ int uni_is_letter_or_digit(int ch);
  */
 int uni_is_space(int ch);
 
+
+/************************************************
+** Unicode case conversion
+************************************************/
+
 /**
  * Convert the given codepoint to its lower case mapping.
  * If there is none, return the codepoint.
@@ -187,7 +178,148 @@ int uni_to_upper(int ch);
 int uni_to_title(int ch);
 
 
+/************************************************
+** Unicode blocks
+************************************************/
+
+
+
+/**
+ * Used to hold the information for a Unicode codepoint
+ * block
+ */
+typedef struct
+{
+    /**
+     * Low end of the block range
+     */
+    unsigned long low;
+    /**
+     * High end of the block range
+     */
+    unsigned long high;
+    /**
+     * Name string for the block
+     */
+    const char    *name;
+} UcdBlockData;
+
+
+/**
+ * Return the Unicode block (defined below) for the given
+ * codepoint.  If not found, return UCD_BLOCK_NO_BLOCK.
+ * @param ch the Unicode codepoint to search
+ * @return the block
+ */
+int uni_block(int ch);
+
+
+/**
+ * Return the Unicode block data for the enumerated block number.
+ * @param nr the Unicode block number
+ * @return the block data if found, else NULL
+ */
+UcdBlockData *uni_block_data(int blockNr);
+
 
-#endif /* __UCD_H__ */
 
            
+/**
+ * The Unicode codepoint blocks as defined in Blocks.txt.
+ * Block list has 171 entries
+ */
+typedef enum
+{
+    /*   0, 000000 - 00007f */  UCD_BLOCK_BASIC_LATIN,
+    /*   2, 000100 - 00017f */  UCD_BLOCK_LATIN_EXTENDED_A,
+    /*   4, 000250 - 0002af */  UCD_BLOCK_IPA_EXTENSIONS,
+    /*   6, 000300 - 00036f */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS,
+    /*   8, 000400 - 0004ff */  UCD_BLOCK_CYRILLIC,
+    /*  10, 000530 - 00058f */  UCD_BLOCK_ARMENIAN,
+    /*  12, 000600 - 0006ff */  UCD_BLOCK_ARABIC,
+    /*  14, 000750 - 00077f */  UCD_BLOCK_ARABIC_SUPPLEMENT,
+    /*  16, 0007c0 - 0007ff */  UCD_BLOCK_NKO,
+    /*  18, 000980 - 0009ff */  UCD_BLOCK_BENGALI,
+    /*  20, 000a80 - 000aff */  UCD_BLOCK_GUJARATI,
+    /*  22, 000b80 - 000bff */  UCD_BLOCK_TAMIL,
+    /*  24, 000c80 - 000cff */  UCD_BLOCK_KANNADA,
+    /*  26, 000d80 - 000dff */  UCD_BLOCK_SINHALA,
+    /*  28, 000e80 - 000eff */  UCD_BLOCK_LAO,
+    /*  30, 001000 - 00109f */  UCD_BLOCK_MYANMAR,
+    /*  32, 001100 - 0011ff */  UCD_BLOCK_HANGUL_JAMO,
+    /*  34, 001380 - 00139f */  UCD_BLOCK_ETHIOPIC_SUPPLEMENT,
+    /*  36, 001400 - 00167f */  UCD_BLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
+    /*  38, 0016a0 - 0016ff */  UCD_BLOCK_RUNIC,
+    /*  40, 001720 - 00173f */  UCD_BLOCK_HANUNOO,
+    /*  42, 001760 - 00177f */  UCD_BLOCK_TAGBANWA,
+    /*  44, 001800 - 0018af */  UCD_BLOCK_MONGOLIAN,
+    /*  46, 001950 - 00197f */  UCD_BLOCK_TAI_LE,
+    /*  48, 0019e0 - 0019ff */  UCD_BLOCK_KHMER_SYMBOLS,
+    /*  50, 001b00 - 001b7f */  UCD_BLOCK_BALINESE,
+    /*  52, 001c00 - 001c4f */  UCD_BLOCK_LEPCHA,
+    /*  54, 001d00 - 001d7f */  UCD_BLOCK_PHONETIC_EXTENSIONS,
+    /*  56, 001dc0 - 001dff */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
+    /*  58, 001f00 - 001fff */  UCD_BLOCK_GREEK_EXTENDED,
+    /*  60, 002070 - 00209f */  UCD_BLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS,
+    /*  62, 0020d0 - 0020ff */  UCD_BLOCK_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,
+    /*  64, 002150 - 00218f */  UCD_BLOCK_NUMBER_FORMS,
+    /*  66, 002200 - 0022ff */  UCD_BLOCK_MATHEMATICAL_OPERATORS,
+    /*  68, 002400 - 00243f */  UCD_BLOCK_CONTROL_PICTURES,
+    /*  70, 002460 - 0024ff */  UCD_BLOCK_ENCLOSED_ALPHANUMERICS,
+    /*  72, 002580 - 00259f */  UCD_BLOCK_BLOCK_ELEMENTS,
+    /*  74, 002600 - 0026ff */  UCD_BLOCK_MISCELLANEOUS_SYMBOLS,
+    /*  76, 0027c0 - 0027ef */  UCD_BLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
+    /*  78, 002800 - 0028ff */  UCD_BLOCK_BRAILLE_PATTERNS,
+    /*  80, 002980 - 0029ff */  UCD_BLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
+    /*  82, 002b00 - 002bff */  UCD_BLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS,
+    /*  84, 002c60 - 002c7f */  UCD_BLOCK_LATIN_EXTENDED_C,
+    /*  86, 002d00 - 002d2f */  UCD_BLOCK_GEORGIAN_SUPPLEMENT,
+    /*  88, 002d80 - 002ddf */  UCD_BLOCK_ETHIOPIC_EXTENDED,
+    /*  90, 002e00 - 002e7f */  UCD_BLOCK_SUPPLEMENTAL_PUNCTUATION,
+    /*  92, 002f00 - 002fdf */  UCD_BLOCK_KANGXI_RADICALS,
+    /*  94, 003000 - 00303f */  UCD_BLOCK_CJK_SYMBOLS_AND_PUNCTUATION,
+    /*  96, 0030a0 - 0030ff */  UCD_BLOCK_KATAKANA,
+    /*  98, 003130 - 00318f */  UCD_BLOCK_HANGUL_COMPATIBILITY_JAMO,
+    /* 100, 0031a0 - 0031bf */  UCD_BLOCK_BOPOMOFO_EXTENDED,
+    /* 102, 0031f0 - 0031ff */  UCD_BLOCK_KATAKANA_PHONETIC_EXTENSIONS,
+    /* 104, 003300 - 0033ff */  UCD_BLOCK_CJK_COMPATIBILITY,
+    /* 106, 004dc0 - 004dff */  UCD_BLOCK_YIJING_HEXAGRAM_SYMBOLS,
+    /* 108, 00a000 - 00a48f */  UCD_BLOCK_YI_SYLLABLES,
+    /* 110, 00a500 - 00a63f */  UCD_BLOCK_VAI,
+    /* 112, 00a700 - 00a71f */  UCD_BLOCK_MODIFIER_TONE_LETTERS,
+    /* 114, 00a800 - 00a82f */  UCD_BLOCK_SYLOTI_NAGRI,
+    /* 116, 00a880 - 00a8df */  UCD_BLOCK_SAURASHTRA,
+    /* 118, 00a930 - 00a95f */  UCD_BLOCK_REJANG,
+    /* 120, 00ac00 - 00d7af */  UCD_BLOCK_HANGUL_SYLLABLES,
+    /* 122, 00db80 - 00dbff */  UCD_BLOCK_HIGH_PRIVATE_USE_SURROGATES,
+    /* 124, 00e000 - 00f8ff */  UCD_BLOCK_PRIVATE_USE_AREA,
+    /* 126, 00fb00 - 00fb4f */  UCD_BLOCK_ALPHABETIC_PRESENTATION_FORMS,
+    /* 128, 00fe00 - 00fe0f */  UCD_BLOCK_VARIATION_SELECTORS,
+    /* 130, 00fe20 - 00fe2f */  UCD_BLOCK_COMBINING_HALF_MARKS,
+    /* 132, 00fe50 - 00fe6f */  UCD_BLOCK_SMALL_FORM_VARIANTS,
+    /* 134, 00ff00 - 00ffef */  UCD_BLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS,
+    /* 136, 010000 - 01007f */  UCD_BLOCK_LINEAR_B_SYLLABARY,
+    /* 138, 010100 - 01013f */  UCD_BLOCK_AEGEAN_NUMBERS,
+    /* 140, 010190 - 0101cf */  UCD_BLOCK_ANCIENT_SYMBOLS,
+    /* 142, 010280 - 01029f */  UCD_BLOCK_LYCIAN,
+    /* 144, 010300 - 01032f */  UCD_BLOCK_OLD_ITALIC,
+    /* 146, 010380 - 01039f */  UCD_BLOCK_UGARITIC,
+    /* 148, 010400 - 01044f */  UCD_BLOCK_DESERET,
+    /* 150, 010480 - 0104af */  UCD_BLOCK_OSMANYA,
+    /* 152, 010900 - 01091f */  UCD_BLOCK_PHOENICIAN,
+    /* 154, 010a00 - 010a5f */  UCD_BLOCK_KHAROSHTHI,
+    /* 156, 012400 - 01247f */  UCD_BLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION,
+    /* 158, 01d100 - 01d1ff */  UCD_BLOCK_MUSICAL_SYMBOLS,
+    /* 160, 01d300 - 01d35f */  UCD_BLOCK_TAI_XUAN_JING_SYMBOLS,
+    /* 162, 01d400 - 01d7ff */  UCD_BLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
+    /* 164, 01f030 - 01f09f */  UCD_BLOCK_DOMINO_TILES,
+    /* 166, 02f800 - 02fa1f */  UCD_BLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
+    /* 168, 0e0100 - 0e01ef */  UCD_BLOCK_VARIATION_SELECTORS_SUPPLEMENT,
+    /* 170, 100000 - 10ffff */  UCD_BLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B,
+    /* 171, 000000 - 10ffff */  UCD_BLOCK_NO_BLOCK
+} UnicodeBlocks;
+
+
+#endif /* __UCD_H__ */
+
+