ICU 49.1.1
49.1.1
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1997-2011, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File UCHAR.H 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 04/02/97 aliu Creation. 00013 * 03/29/99 helena Updated for C APIs. 00014 * 4/15/99 Madhu Updated for C Implementation and Javadoc 00015 * 5/20/99 Madhu Added the function u_getVersion() 00016 * 8/19/1999 srl Upgraded scripts to Unicode 3.0 00017 * 8/27/1999 schererm UCharDirection constants: U_... 00018 * 11/11/1999 weiv added u_isalnum(), cleaned comments 00019 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). 00020 ****************************************************************************** 00021 */ 00022 00023 #ifndef UCHAR_H 00024 #define UCHAR_H 00025 00026 #include "unicode/utypes.h" 00027 00028 U_CDECL_BEGIN 00029 00030 /*==========================================================================*/ 00031 /* Unicode version number */ 00032 /*==========================================================================*/ 00042 #define U_UNICODE_VERSION "6.1" 00043 00124 #define UCHAR_MIN_VALUE 0 00125 00134 #define UCHAR_MAX_VALUE 0x10ffff 00135 00140 #define U_MASK(x) ((uint32_t)1<<(x)) 00141 00161 typedef enum UProperty { 00162 /* 00163 * Note: UProperty constants are parsed by preparseucd.py. 00164 * It matches lines like 00165 * UCHAR_<Unicode property name>=<integer>, 00166 */ 00167 00168 /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that 00169 debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, 00170 rather than UCHAR_BINARY_START. Likewise for other *_START 00171 identifiers. */ 00172 00175 UCHAR_ALPHABETIC=0, 00177 UCHAR_BINARY_START=UCHAR_ALPHABETIC, 00179 UCHAR_ASCII_HEX_DIGIT=1, 00183 UCHAR_BIDI_CONTROL=2, 00188 UCHAR_BIDI_MIRRORED=3, 00190 UCHAR_DASH=4, 00194 UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, 00197 UCHAR_DEPRECATED=6, 00200 UCHAR_DIACRITIC=7, 00204 UCHAR_EXTENDER=8, 00208 UCHAR_FULL_COMPOSITION_EXCLUSION=9, 00212 UCHAR_GRAPHEME_BASE=10, 00216 UCHAR_GRAPHEME_EXTEND=11, 00219 UCHAR_GRAPHEME_LINK=12, 00222 UCHAR_HEX_DIGIT=13, 00225 UCHAR_HYPHEN=14, 00230 UCHAR_ID_CONTINUE=15, 00234 UCHAR_ID_START=16, 00237 UCHAR_IDEOGRAPHIC=17, 00241 UCHAR_IDS_BINARY_OPERATOR=18, 00245 UCHAR_IDS_TRINARY_OPERATOR=19, 00248 UCHAR_JOIN_CONTROL=20, 00252 UCHAR_LOGICAL_ORDER_EXCEPTION=21, 00255 UCHAR_LOWERCASE=22, 00257 UCHAR_MATH=23, 00261 UCHAR_NONCHARACTER_CODE_POINT=24, 00263 UCHAR_QUOTATION_MARK=25, 00267 UCHAR_RADICAL=26, 00272 UCHAR_SOFT_DOTTED=27, 00276 UCHAR_TERMINAL_PUNCTUATION=28, 00280 UCHAR_UNIFIED_IDEOGRAPH=29, 00283 UCHAR_UPPERCASE=30, 00287 UCHAR_WHITE_SPACE=31, 00291 UCHAR_XID_CONTINUE=32, 00294 UCHAR_XID_START=33, 00298 UCHAR_CASE_SENSITIVE=34, 00303 UCHAR_S_TERM=35, 00309 UCHAR_VARIATION_SELECTOR=36, 00316 UCHAR_NFD_INERT=37, 00323 UCHAR_NFKD_INERT=38, 00330 UCHAR_NFC_INERT=39, 00337 UCHAR_NFKC_INERT=40, 00348 UCHAR_SEGMENT_STARTER=41, 00353 UCHAR_PATTERN_SYNTAX=42, 00358 UCHAR_PATTERN_WHITE_SPACE=43, 00363 UCHAR_POSIX_ALNUM=44, 00368 UCHAR_POSIX_BLANK=45, 00373 UCHAR_POSIX_GRAPH=46, 00378 UCHAR_POSIX_PRINT=47, 00383 UCHAR_POSIX_XDIGIT=48, 00385 UCHAR_CASED=49, 00387 UCHAR_CASE_IGNORABLE=50, 00389 UCHAR_CHANGES_WHEN_LOWERCASED=51, 00391 UCHAR_CHANGES_WHEN_UPPERCASED=52, 00393 UCHAR_CHANGES_WHEN_TITLECASED=53, 00395 UCHAR_CHANGES_WHEN_CASEFOLDED=54, 00397 UCHAR_CHANGES_WHEN_CASEMAPPED=55, 00399 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, 00401 UCHAR_BINARY_LIMIT=57, 00402 00405 UCHAR_BIDI_CLASS=0x1000, 00407 UCHAR_INT_START=UCHAR_BIDI_CLASS, 00410 UCHAR_BLOCK=0x1001, 00413 UCHAR_CANONICAL_COMBINING_CLASS=0x1002, 00416 UCHAR_DECOMPOSITION_TYPE=0x1003, 00420 UCHAR_EAST_ASIAN_WIDTH=0x1004, 00423 UCHAR_GENERAL_CATEGORY=0x1005, 00426 UCHAR_JOINING_GROUP=0x1006, 00429 UCHAR_JOINING_TYPE=0x1007, 00432 UCHAR_LINE_BREAK=0x1008, 00435 UCHAR_NUMERIC_TYPE=0x1009, 00438 UCHAR_SCRIPT=0x100A, 00441 UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, 00444 UCHAR_NFD_QUICK_CHECK=0x100C, 00447 UCHAR_NFKD_QUICK_CHECK=0x100D, 00450 UCHAR_NFC_QUICK_CHECK=0x100E, 00453 UCHAR_NFKC_QUICK_CHECK=0x100F, 00460 UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, 00467 UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, 00472 UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, 00477 UCHAR_SENTENCE_BREAK=0x1013, 00482 UCHAR_WORD_BREAK=0x1014, 00484 UCHAR_INT_LIMIT=0x1015, 00485 00494 UCHAR_GENERAL_CATEGORY_MASK=0x2000, 00496 UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, 00498 UCHAR_MASK_LIMIT=0x2001, 00499 00502 UCHAR_NUMERIC_VALUE=0x3000, 00504 UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, 00506 UCHAR_DOUBLE_LIMIT=0x3001, 00507 00510 UCHAR_AGE=0x4000, 00512 UCHAR_STRING_START=UCHAR_AGE, 00515 UCHAR_BIDI_MIRRORING_GLYPH=0x4001, 00518 UCHAR_CASE_FOLDING=0x4002, 00521 UCHAR_ISO_COMMENT=0x4003, 00524 UCHAR_LOWERCASE_MAPPING=0x4004, 00527 UCHAR_NAME=0x4005, 00530 UCHAR_SIMPLE_CASE_FOLDING=0x4006, 00533 UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, 00536 UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, 00539 UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, 00542 UCHAR_TITLECASE_MAPPING=0x400A, 00547 UCHAR_UNICODE_1_NAME=0x400B, 00550 UCHAR_UPPERCASE_MAPPING=0x400C, 00552 UCHAR_STRING_LIMIT=0x400D, 00553 00554 #ifndef U_HIDE_DRAFT_API 00555 00562 UCHAR_SCRIPT_EXTENSIONS=0x7000, 00564 UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, 00567 UCHAR_OTHER_PROPERTY_LIMIT=0x7001, 00568 #endif /* U_HIDE_DRAFT_API */ 00569 00571 UCHAR_INVALID_CODE = -1 00572 } UProperty; 00573 00579 typedef enum UCharCategory 00580 { 00581 /* 00582 * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. 00583 * It matches pairs of lines like 00584 * / ** <Unicode 2-letter General_Category value> comment... * / 00585 * U_<[A-Z_]+> = <integer>, 00586 */ 00587 00589 U_UNASSIGNED = 0, 00591 U_GENERAL_OTHER_TYPES = 0, 00593 U_UPPERCASE_LETTER = 1, 00595 U_LOWERCASE_LETTER = 2, 00597 U_TITLECASE_LETTER = 3, 00599 U_MODIFIER_LETTER = 4, 00601 U_OTHER_LETTER = 5, 00603 U_NON_SPACING_MARK = 6, 00605 U_ENCLOSING_MARK = 7, 00607 U_COMBINING_SPACING_MARK = 8, 00609 U_DECIMAL_DIGIT_NUMBER = 9, 00611 U_LETTER_NUMBER = 10, 00613 U_OTHER_NUMBER = 11, 00615 U_SPACE_SEPARATOR = 12, 00617 U_LINE_SEPARATOR = 13, 00619 U_PARAGRAPH_SEPARATOR = 14, 00621 U_CONTROL_CHAR = 15, 00623 U_FORMAT_CHAR = 16, 00625 U_PRIVATE_USE_CHAR = 17, 00627 U_SURROGATE = 18, 00629 U_DASH_PUNCTUATION = 19, 00631 U_START_PUNCTUATION = 20, 00633 U_END_PUNCTUATION = 21, 00635 U_CONNECTOR_PUNCTUATION = 22, 00637 U_OTHER_PUNCTUATION = 23, 00639 U_MATH_SYMBOL = 24, 00641 U_CURRENCY_SYMBOL = 25, 00643 U_MODIFIER_SYMBOL = 26, 00645 U_OTHER_SYMBOL = 27, 00647 U_INITIAL_PUNCTUATION = 28, 00649 U_FINAL_PUNCTUATION = 29, 00651 U_CHAR_CATEGORY_COUNT 00652 } UCharCategory; 00653 00668 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) 00669 00671 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) 00672 00673 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) 00674 00675 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) 00676 00677 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) 00678 00679 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) 00680 00682 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) 00683 00684 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) 00685 00686 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) 00687 00689 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) 00690 00691 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) 00692 00693 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) 00694 00696 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) 00697 00698 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) 00699 00700 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) 00701 00703 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) 00704 00705 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) 00706 00707 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) 00708 00709 #define U_GC_CS_MASK U_MASK(U_SURROGATE) 00710 00712 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) 00713 00714 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) 00715 00716 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) 00717 00718 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) 00719 00720 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) 00721 00723 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) 00724 00725 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) 00726 00727 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) 00728 00729 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) 00730 00732 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) 00733 00734 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) 00735 00736 00738 #define U_GC_L_MASK \ 00739 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) 00740 00742 #define U_GC_LC_MASK \ 00743 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) 00744 00746 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) 00747 00749 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) 00750 00752 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) 00753 00755 #define U_GC_C_MASK \ 00756 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) 00757 00759 #define U_GC_P_MASK \ 00760 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ 00761 U_GC_PI_MASK|U_GC_PF_MASK) 00762 00764 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) 00765 00770 typedef enum UCharDirection { 00771 /* 00772 * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. 00773 * It matches pairs of lines like 00774 * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / 00775 * U_<[A-Z_]+> = <integer>, 00776 */ 00777 00779 U_LEFT_TO_RIGHT = 0, 00781 U_RIGHT_TO_LEFT = 1, 00783 U_EUROPEAN_NUMBER = 2, 00785 U_EUROPEAN_NUMBER_SEPARATOR = 3, 00787 U_EUROPEAN_NUMBER_TERMINATOR = 4, 00789 U_ARABIC_NUMBER = 5, 00791 U_COMMON_NUMBER_SEPARATOR = 6, 00793 U_BLOCK_SEPARATOR = 7, 00795 U_SEGMENT_SEPARATOR = 8, 00797 U_WHITE_SPACE_NEUTRAL = 9, 00799 U_OTHER_NEUTRAL = 10, 00801 U_LEFT_TO_RIGHT_EMBEDDING = 11, 00803 U_LEFT_TO_RIGHT_OVERRIDE = 12, 00805 U_RIGHT_TO_LEFT_ARABIC = 13, 00807 U_RIGHT_TO_LEFT_EMBEDDING = 14, 00809 U_RIGHT_TO_LEFT_OVERRIDE = 15, 00811 U_POP_DIRECTIONAL_FORMAT = 16, 00813 U_DIR_NON_SPACING_MARK = 17, 00815 U_BOUNDARY_NEUTRAL = 18, 00817 U_CHAR_DIRECTION_COUNT 00818 } UCharDirection; 00819 00824 enum UBlockCode { 00825 /* 00826 * Note: UBlockCode constants are parsed by preparseucd.py. 00827 * It matches lines like 00828 * UBLOCK_<Unicode Block value name> = <integer>, 00829 */ 00830 00832 UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ 00833 00835 UBLOCK_BASIC_LATIN = 1, /*[0000]*/ 00836 00838 UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ 00839 00841 UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ 00842 00844 UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ 00845 00847 UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ 00848 00850 UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ 00851 00853 UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ 00854 00859 UBLOCK_GREEK =8, /*[0370]*/ 00860 00862 UBLOCK_CYRILLIC =9, /*[0400]*/ 00863 00865 UBLOCK_ARMENIAN =10, /*[0530]*/ 00866 00868 UBLOCK_HEBREW =11, /*[0590]*/ 00869 00871 UBLOCK_ARABIC =12, /*[0600]*/ 00872 00874 UBLOCK_SYRIAC =13, /*[0700]*/ 00875 00877 UBLOCK_THAANA =14, /*[0780]*/ 00878 00880 UBLOCK_DEVANAGARI =15, /*[0900]*/ 00881 00883 UBLOCK_BENGALI =16, /*[0980]*/ 00884 00886 UBLOCK_GURMUKHI =17, /*[0A00]*/ 00887 00889 UBLOCK_GUJARATI =18, /*[0A80]*/ 00890 00892 UBLOCK_ORIYA =19, /*[0B00]*/ 00893 00895 UBLOCK_TAMIL =20, /*[0B80]*/ 00896 00898 UBLOCK_TELUGU =21, /*[0C00]*/ 00899 00901 UBLOCK_KANNADA =22, /*[0C80]*/ 00902 00904 UBLOCK_MALAYALAM =23, /*[0D00]*/ 00905 00907 UBLOCK_SINHALA =24, /*[0D80]*/ 00908 00910 UBLOCK_THAI =25, /*[0E00]*/ 00911 00913 UBLOCK_LAO =26, /*[0E80]*/ 00914 00916 UBLOCK_TIBETAN =27, /*[0F00]*/ 00917 00919 UBLOCK_MYANMAR =28, /*[1000]*/ 00920 00922 UBLOCK_GEORGIAN =29, /*[10A0]*/ 00923 00925 UBLOCK_HANGUL_JAMO =30, /*[1100]*/ 00926 00928 UBLOCK_ETHIOPIC =31, /*[1200]*/ 00929 00931 UBLOCK_CHEROKEE =32, /*[13A0]*/ 00932 00934 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ 00935 00937 UBLOCK_OGHAM =34, /*[1680]*/ 00938 00940 UBLOCK_RUNIC =35, /*[16A0]*/ 00941 00943 UBLOCK_KHMER =36, /*[1780]*/ 00944 00946 UBLOCK_MONGOLIAN =37, /*[1800]*/ 00947 00949 UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ 00950 00952 UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ 00953 00955 UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ 00956 00958 UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ 00959 00961 UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ 00962 00967 UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ 00968 00970 UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ 00971 00973 UBLOCK_NUMBER_FORMS =45, /*[2150]*/ 00974 00976 UBLOCK_ARROWS =46, /*[2190]*/ 00977 00979 UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ 00980 00982 UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ 00983 00985 UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ 00986 00988 UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ 00989 00991 UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ 00992 00994 UBLOCK_BOX_DRAWING =52, /*[2500]*/ 00995 00997 UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ 00998 01000 UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ 01001 01003 UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ 01004 01006 UBLOCK_DINGBATS =56, /*[2700]*/ 01007 01009 UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ 01010 01012 UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ 01013 01015 UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ 01016 01018 UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ 01019 01021 UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ 01022 01024 UBLOCK_HIRAGANA =62, /*[3040]*/ 01025 01027 UBLOCK_KATAKANA =63, /*[30A0]*/ 01028 01030 UBLOCK_BOPOMOFO =64, /*[3100]*/ 01031 01033 UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ 01034 01036 UBLOCK_KANBUN =66, /*[3190]*/ 01037 01039 UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ 01040 01042 UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ 01043 01045 UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ 01046 01048 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ 01049 01051 UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ 01052 01054 UBLOCK_YI_SYLLABLES =72, /*[A000]*/ 01055 01057 UBLOCK_YI_RADICALS =73, /*[A490]*/ 01058 01060 UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ 01061 01063 UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ 01064 01066 UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ 01067 01069 UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ 01070 01080 UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ 01090 UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, 01091 01093 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ 01094 01096 UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ 01097 01099 UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ 01100 01102 UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ 01103 01105 UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ 01106 01108 UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ 01109 01111 UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ 01112 01114 UBLOCK_SPECIALS =86, /*[FFF0]*/ 01115 01117 UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ 01118 01119 /* New blocks in Unicode 3.1 */ 01120 01122 UBLOCK_OLD_ITALIC = 88, /*[10300]*/ 01124 UBLOCK_GOTHIC = 89, /*[10330]*/ 01126 UBLOCK_DESERET = 90, /*[10400]*/ 01128 UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ 01130 UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ 01132 UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ 01134 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ 01136 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ 01138 UBLOCK_TAGS = 96, /*[E0000]*/ 01139 01140 /* New blocks in Unicode 3.2 */ 01141 01143 UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ 01148 UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, 01150 UBLOCK_TAGALOG = 98, /*[1700]*/ 01152 UBLOCK_HANUNOO = 99, /*[1720]*/ 01154 UBLOCK_BUHID = 100, /*[1740]*/ 01156 UBLOCK_TAGBANWA = 101, /*[1760]*/ 01158 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ 01160 UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ 01162 UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ 01164 UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ 01166 UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ 01168 UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ 01170 UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ 01172 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ 01174 UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ 01175 01176 /* New blocks in Unicode 4 */ 01177 01179 UBLOCK_LIMBU = 111, /*[1900]*/ 01181 UBLOCK_TAI_LE = 112, /*[1950]*/ 01183 UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ 01185 UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ 01187 UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ 01189 UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ 01191 UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ 01193 UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ 01195 UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ 01197 UBLOCK_UGARITIC = 120, /*[10380]*/ 01199 UBLOCK_SHAVIAN = 121, /*[10450]*/ 01201 UBLOCK_OSMANYA = 122, /*[10480]*/ 01203 UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ 01205 UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ 01207 UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ 01208 01209 /* New blocks in Unicode 4.1 */ 01210 01212 UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ 01214 UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ 01216 UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ 01218 UBLOCK_BUGINESE = 129, /*[1A00]*/ 01220 UBLOCK_CJK_STROKES = 130, /*[31C0]*/ 01222 UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ 01224 UBLOCK_COPTIC = 132, /*[2C80]*/ 01226 UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ 01228 UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ 01230 UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ 01232 UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ 01234 UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ 01236 UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ 01238 UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ 01240 UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ 01242 UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ 01244 UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ 01246 UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ 01248 UBLOCK_TIFINAGH = 144, /*[2D30]*/ 01250 UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ 01251 01252 /* New blocks in Unicode 5.0 */ 01253 01255 UBLOCK_NKO = 146, /*[07C0]*/ 01257 UBLOCK_BALINESE = 147, /*[1B00]*/ 01259 UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ 01261 UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ 01263 UBLOCK_PHAGS_PA = 150, /*[A840]*/ 01265 UBLOCK_PHOENICIAN = 151, /*[10900]*/ 01267 UBLOCK_CUNEIFORM = 152, /*[12000]*/ 01269 UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ 01271 UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ 01272 01273 /* New blocks in Unicode 5.1 */ 01274 01276 UBLOCK_SUNDANESE = 155, /*[1B80]*/ 01278 UBLOCK_LEPCHA = 156, /*[1C00]*/ 01280 UBLOCK_OL_CHIKI = 157, /*[1C50]*/ 01282 UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ 01284 UBLOCK_VAI = 159, /*[A500]*/ 01286 UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ 01288 UBLOCK_SAURASHTRA = 161, /*[A880]*/ 01290 UBLOCK_KAYAH_LI = 162, /*[A900]*/ 01292 UBLOCK_REJANG = 163, /*[A930]*/ 01294 UBLOCK_CHAM = 164, /*[AA00]*/ 01296 UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ 01298 UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ 01300 UBLOCK_LYCIAN = 167, /*[10280]*/ 01302 UBLOCK_CARIAN = 168, /*[102A0]*/ 01304 UBLOCK_LYDIAN = 169, /*[10920]*/ 01306 UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ 01308 UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ 01309 01310 /* New blocks in Unicode 5.2 */ 01311 01313 UBLOCK_SAMARITAN = 172, /*[0800]*/ 01315 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ 01317 UBLOCK_TAI_THAM = 174, /*[1A20]*/ 01319 UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ 01321 UBLOCK_LISU = 176, /*[A4D0]*/ 01323 UBLOCK_BAMUM = 177, /*[A6A0]*/ 01325 UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ 01327 UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ 01329 UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ 01331 UBLOCK_JAVANESE = 181, /*[A980]*/ 01333 UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ 01335 UBLOCK_TAI_VIET = 183, /*[AA80]*/ 01337 UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ 01339 UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ 01341 UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ 01343 UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ 01345 UBLOCK_AVESTAN = 188, /*[10B00]*/ 01347 UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ 01349 UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ 01351 UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ 01353 UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ 01355 UBLOCK_KAITHI = 193, /*[11080]*/ 01357 UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ 01359 UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ 01361 UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ 01363 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ 01364 01365 /* New blocks in Unicode 6.0 */ 01366 01368 UBLOCK_MANDAIC = 198, /*[0840]*/ 01370 UBLOCK_BATAK = 199, /*[1BC0]*/ 01372 UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ 01374 UBLOCK_BRAHMI = 201, /*[11000]*/ 01376 UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ 01378 UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ 01380 UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ 01382 UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ 01384 UBLOCK_EMOTICONS = 206, /*[1F600]*/ 01386 UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ 01388 UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ 01390 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ 01391 01392 /* New blocks in Unicode 6.1 */ 01393 01395 UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ 01397 UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ 01399 UBLOCK_CHAKMA = 212, /*[11100]*/ 01401 UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ 01403 UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ 01405 UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ 01407 UBLOCK_MIAO = 216, /*[16F00]*/ 01409 UBLOCK_SHARADA = 217, /*[11180]*/ 01411 UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ 01413 UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ 01415 UBLOCK_TAKRI = 220, /*[11680]*/ 01416 01418 UBLOCK_COUNT = 221, 01419 01421 UBLOCK_INVALID_CODE=-1 01422 }; 01423 01425 typedef enum UBlockCode UBlockCode; 01426 01434 typedef enum UEastAsianWidth { 01435 /* 01436 * Note: UEastAsianWidth constants are parsed by preparseucd.py. 01437 * It matches lines like 01438 * U_EA_<Unicode East_Asian_Width value name> 01439 */ 01440 01441 U_EA_NEUTRAL, /*[N]*/ 01442 U_EA_AMBIGUOUS, /*[A]*/ 01443 U_EA_HALFWIDTH, /*[H]*/ 01444 U_EA_FULLWIDTH, /*[F]*/ 01445 U_EA_NARROW, /*[Na]*/ 01446 U_EA_WIDE, /*[W]*/ 01447 U_EA_COUNT 01448 } UEastAsianWidth; 01449 01461 typedef enum UCharNameChoice { 01463 U_UNICODE_CHAR_NAME, 01469 U_UNICODE_10_CHAR_NAME, 01471 U_EXTENDED_CHAR_NAME, 01473 U_CHAR_NAME_ALIAS, 01475 U_CHAR_NAME_CHOICE_COUNT 01476 } UCharNameChoice; 01477 01491 typedef enum UPropertyNameChoice { 01492 U_SHORT_PROPERTY_NAME, 01493 U_LONG_PROPERTY_NAME, 01494 U_PROPERTY_NAME_CHOICE_COUNT 01495 } UPropertyNameChoice; 01496 01503 typedef enum UDecompositionType { 01504 /* 01505 * Note: UDecompositionType constants are parsed by preparseucd.py. 01506 * It matches lines like 01507 * U_DT_<Unicode Decomposition_Type value name> 01508 */ 01509 01510 U_DT_NONE, /*[none]*/ 01511 U_DT_CANONICAL, /*[can]*/ 01512 U_DT_COMPAT, /*[com]*/ 01513 U_DT_CIRCLE, /*[enc]*/ 01514 U_DT_FINAL, /*[fin]*/ 01515 U_DT_FONT, /*[font]*/ 01516 U_DT_FRACTION, /*[fra]*/ 01517 U_DT_INITIAL, /*[init]*/ 01518 U_DT_ISOLATED, /*[iso]*/ 01519 U_DT_MEDIAL, /*[med]*/ 01520 U_DT_NARROW, /*[nar]*/ 01521 U_DT_NOBREAK, /*[nb]*/ 01522 U_DT_SMALL, /*[sml]*/ 01523 U_DT_SQUARE, /*[sqr]*/ 01524 U_DT_SUB, /*[sub]*/ 01525 U_DT_SUPER, /*[sup]*/ 01526 U_DT_VERTICAL, /*[vert]*/ 01527 U_DT_WIDE, /*[wide]*/ 01528 U_DT_COUNT /* 18 */ 01529 } UDecompositionType; 01530 01537 typedef enum UJoiningType { 01538 /* 01539 * Note: UJoiningType constants are parsed by preparseucd.py. 01540 * It matches lines like 01541 * U_JT_<Unicode Joining_Type value name> 01542 */ 01543 01544 U_JT_NON_JOINING, /*[U]*/ 01545 U_JT_JOIN_CAUSING, /*[C]*/ 01546 U_JT_DUAL_JOINING, /*[D]*/ 01547 U_JT_LEFT_JOINING, /*[L]*/ 01548 U_JT_RIGHT_JOINING, /*[R]*/ 01549 U_JT_TRANSPARENT, /*[T]*/ 01550 U_JT_COUNT /* 6 */ 01551 } UJoiningType; 01552 01559 typedef enum UJoiningGroup { 01560 /* 01561 * Note: UJoiningGroup constants are parsed by preparseucd.py. 01562 * It matches lines like 01563 * U_JG_<Unicode Joining_Group value name> 01564 */ 01565 01566 U_JG_NO_JOINING_GROUP, 01567 U_JG_AIN, 01568 U_JG_ALAPH, 01569 U_JG_ALEF, 01570 U_JG_BEH, 01571 U_JG_BETH, 01572 U_JG_DAL, 01573 U_JG_DALATH_RISH, 01574 U_JG_E, 01575 U_JG_FEH, 01576 U_JG_FINAL_SEMKATH, 01577 U_JG_GAF, 01578 U_JG_GAMAL, 01579 U_JG_HAH, 01580 U_JG_TEH_MARBUTA_GOAL, 01581 U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, 01582 U_JG_HE, 01583 U_JG_HEH, 01584 U_JG_HEH_GOAL, 01585 U_JG_HETH, 01586 U_JG_KAF, 01587 U_JG_KAPH, 01588 U_JG_KNOTTED_HEH, 01589 U_JG_LAM, 01590 U_JG_LAMADH, 01591 U_JG_MEEM, 01592 U_JG_MIM, 01593 U_JG_NOON, 01594 U_JG_NUN, 01595 U_JG_PE, 01596 U_JG_QAF, 01597 U_JG_QAPH, 01598 U_JG_REH, 01599 U_JG_REVERSED_PE, 01600 U_JG_SAD, 01601 U_JG_SADHE, 01602 U_JG_SEEN, 01603 U_JG_SEMKATH, 01604 U_JG_SHIN, 01605 U_JG_SWASH_KAF, 01606 U_JG_SYRIAC_WAW, 01607 U_JG_TAH, 01608 U_JG_TAW, 01609 U_JG_TEH_MARBUTA, 01610 U_JG_TETH, 01611 U_JG_WAW, 01612 U_JG_YEH, 01613 U_JG_YEH_BARREE, 01614 U_JG_YEH_WITH_TAIL, 01615 U_JG_YUDH, 01616 U_JG_YUDH_HE, 01617 U_JG_ZAIN, 01618 U_JG_FE, 01619 U_JG_KHAPH, 01620 U_JG_ZHAIN, 01621 U_JG_BURUSHASKI_YEH_BARREE, 01622 U_JG_FARSI_YEH, 01623 U_JG_NYA, 01624 U_JG_ROHINGYA_YEH, 01625 U_JG_COUNT 01626 } UJoiningGroup; 01627 01634 typedef enum UGraphemeClusterBreak { 01635 /* 01636 * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. 01637 * It matches lines like 01638 * U_GCB_<Unicode Grapheme_Cluster_Break value name> 01639 */ 01640 01641 U_GCB_OTHER = 0, /*[XX]*/ 01642 U_GCB_CONTROL = 1, /*[CN]*/ 01643 U_GCB_CR = 2, /*[CR]*/ 01644 U_GCB_EXTEND = 3, /*[EX]*/ 01645 U_GCB_L = 4, /*[L]*/ 01646 U_GCB_LF = 5, /*[LF]*/ 01647 U_GCB_LV = 6, /*[LV]*/ 01648 U_GCB_LVT = 7, /*[LVT]*/ 01649 U_GCB_T = 8, /*[T]*/ 01650 U_GCB_V = 9, /*[V]*/ 01651 U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 01652 U_GCB_PREPEND = 11, /*[PP]*/ 01653 U_GCB_COUNT = 12 01654 } UGraphemeClusterBreak; 01655 01663 typedef enum UWordBreakValues { 01664 /* 01665 * Note: UWordBreakValues constants are parsed by preparseucd.py. 01666 * It matches lines like 01667 * U_WB_<Unicode Word_Break value name> 01668 */ 01669 01670 U_WB_OTHER = 0, /*[XX]*/ 01671 U_WB_ALETTER = 1, /*[LE]*/ 01672 U_WB_FORMAT = 2, /*[FO]*/ 01673 U_WB_KATAKANA = 3, /*[KA]*/ 01674 U_WB_MIDLETTER = 4, /*[ML]*/ 01675 U_WB_MIDNUM = 5, /*[MN]*/ 01676 U_WB_NUMERIC = 6, /*[NU]*/ 01677 U_WB_EXTENDNUMLET = 7, /*[EX]*/ 01678 U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 01679 U_WB_EXTEND = 9, /*[Extend]*/ 01680 U_WB_LF = 10, /*[LF]*/ 01681 U_WB_MIDNUMLET =11, /*[MB]*/ 01682 U_WB_NEWLINE =12, /*[NL]*/ 01683 U_WB_COUNT = 13 01684 } UWordBreakValues; 01685 01692 typedef enum USentenceBreak { 01693 /* 01694 * Note: USentenceBreak constants are parsed by preparseucd.py. 01695 * It matches lines like 01696 * U_SB_<Unicode Sentence_Break value name> 01697 */ 01698 01699 U_SB_OTHER = 0, /*[XX]*/ 01700 U_SB_ATERM = 1, /*[AT]*/ 01701 U_SB_CLOSE = 2, /*[CL]*/ 01702 U_SB_FORMAT = 3, /*[FO]*/ 01703 U_SB_LOWER = 4, /*[LO]*/ 01704 U_SB_NUMERIC = 5, /*[NU]*/ 01705 U_SB_OLETTER = 6, /*[LE]*/ 01706 U_SB_SEP = 7, /*[SE]*/ 01707 U_SB_SP = 8, /*[SP]*/ 01708 U_SB_STERM = 9, /*[ST]*/ 01709 U_SB_UPPER = 10, /*[UP]*/ 01710 U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 01711 U_SB_EXTEND = 12, /*[EX]*/ 01712 U_SB_LF = 13, /*[LF]*/ 01713 U_SB_SCONTINUE = 14, /*[SC]*/ 01714 U_SB_COUNT = 15 01715 } USentenceBreak; 01716 01723 typedef enum ULineBreak { 01724 /* 01725 * Note: ULineBreak constants are parsed by preparseucd.py. 01726 * It matches lines like 01727 * U_LB_<Unicode Line_Break value name> 01728 */ 01729 01730 U_LB_UNKNOWN = 0, /*[XX]*/ 01731 U_LB_AMBIGUOUS = 1, /*[AI]*/ 01732 U_LB_ALPHABETIC = 2, /*[AL]*/ 01733 U_LB_BREAK_BOTH = 3, /*[B2]*/ 01734 U_LB_BREAK_AFTER = 4, /*[BA]*/ 01735 U_LB_BREAK_BEFORE = 5, /*[BB]*/ 01736 U_LB_MANDATORY_BREAK = 6, /*[BK]*/ 01737 U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ 01738 U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ 01739 U_LB_COMBINING_MARK = 9, /*[CM]*/ 01740 U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ 01741 U_LB_EXCLAMATION = 11, /*[EX]*/ 01742 U_LB_GLUE = 12, /*[GL]*/ 01743 U_LB_HYPHEN = 13, /*[HY]*/ 01744 U_LB_IDEOGRAPHIC = 14, /*[ID]*/ 01746 U_LB_INSEPARABLE = 15, /*[IN]*/ 01747 U_LB_INSEPERABLE = U_LB_INSEPARABLE, 01748 U_LB_INFIX_NUMERIC = 16, /*[IS]*/ 01749 U_LB_LINE_FEED = 17, /*[LF]*/ 01750 U_LB_NONSTARTER = 18, /*[NS]*/ 01751 U_LB_NUMERIC = 19, /*[NU]*/ 01752 U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ 01753 U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ 01754 U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ 01755 U_LB_QUOTATION = 23, /*[QU]*/ 01756 U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ 01757 U_LB_SURROGATE = 25, /*[SG]*/ 01758 U_LB_SPACE = 26, /*[SP]*/ 01759 U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ 01760 U_LB_ZWSPACE = 28, /*[ZW]*/ 01761 U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 01762 U_LB_WORD_JOINER = 30, /*[WJ]*/ 01763 U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ 01764 U_LB_H3 = 32, /*[H3]*/ 01765 U_LB_JL = 33, /*[JL]*/ 01766 U_LB_JT = 34, /*[JT]*/ 01767 U_LB_JV = 35, /*[JV]*/ 01768 U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 01769 U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 01770 U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 01771 U_LB_COUNT = 39 01772 } ULineBreak; 01773 01780 typedef enum UNumericType { 01781 /* 01782 * Note: UNumericType constants are parsed by preparseucd.py. 01783 * It matches lines like 01784 * U_NT_<Unicode Numeric_Type value name> 01785 */ 01786 01787 U_NT_NONE, /*[None]*/ 01788 U_NT_DECIMAL, /*[de]*/ 01789 U_NT_DIGIT, /*[di]*/ 01790 U_NT_NUMERIC, /*[nu]*/ 01791 U_NT_COUNT 01792 } UNumericType; 01793 01800 typedef enum UHangulSyllableType { 01801 /* 01802 * Note: UHangulSyllableType constants are parsed by preparseucd.py. 01803 * It matches lines like 01804 * U_HST_<Unicode Hangul_Syllable_Type value name> 01805 */ 01806 01807 U_HST_NOT_APPLICABLE, /*[NA]*/ 01808 U_HST_LEADING_JAMO, /*[L]*/ 01809 U_HST_VOWEL_JAMO, /*[V]*/ 01810 U_HST_TRAILING_JAMO, /*[T]*/ 01811 U_HST_LV_SYLLABLE, /*[LV]*/ 01812 U_HST_LVT_SYLLABLE, /*[LVT]*/ 01813 U_HST_COUNT 01814 } UHangulSyllableType; 01815 01842 U_STABLE UBool U_EXPORT2 01843 u_hasBinaryProperty(UChar32 c, UProperty which); 01844 01857 U_STABLE UBool U_EXPORT2 01858 u_isUAlphabetic(UChar32 c); 01859 01872 U_STABLE UBool U_EXPORT2 01873 u_isULowercase(UChar32 c); 01874 01887 U_STABLE UBool U_EXPORT2 01888 u_isUUppercase(UChar32 c); 01889 01908 U_STABLE UBool U_EXPORT2 01909 u_isUWhiteSpace(UChar32 c); 01910 01948 U_STABLE int32_t U_EXPORT2 01949 u_getIntPropertyValue(UChar32 c, UProperty which); 01950 01969 U_STABLE int32_t U_EXPORT2 01970 u_getIntPropertyMinValue(UProperty which); 01971 01998 U_STABLE int32_t U_EXPORT2 01999 u_getIntPropertyMaxValue(UProperty which); 02000 02023 U_STABLE double U_EXPORT2 02024 u_getNumericValue(UChar32 c); 02025 02033 #define U_NO_NUMERIC_VALUE ((double)-123456789.) 02034 02058 U_STABLE UBool U_EXPORT2 02059 u_islower(UChar32 c); 02060 02085 U_STABLE UBool U_EXPORT2 02086 u_isupper(UChar32 c); 02087 02102 U_STABLE UBool U_EXPORT2 02103 u_istitle(UChar32 c); 02104 02123 U_STABLE UBool U_EXPORT2 02124 u_isdigit(UChar32 c); 02125 02144 U_STABLE UBool U_EXPORT2 02145 u_isalpha(UChar32 c); 02146 02165 U_STABLE UBool U_EXPORT2 02166 u_isalnum(UChar32 c); 02167 02188 U_STABLE UBool U_EXPORT2 02189 u_isxdigit(UChar32 c); 02190 02204 U_STABLE UBool U_EXPORT2 02205 u_ispunct(UChar32 c); 02206 02223 U_STABLE UBool U_EXPORT2 02224 u_isgraph(UChar32 c); 02225 02252 U_STABLE UBool U_EXPORT2 02253 u_isblank(UChar32 c); 02254 02277 U_STABLE UBool U_EXPORT2 02278 u_isdefined(UChar32 c); 02279 02298 U_STABLE UBool U_EXPORT2 02299 u_isspace(UChar32 c); 02300 02319 U_STABLE UBool U_EXPORT2 02320 u_isJavaSpaceChar(UChar32 c); 02321 02359 U_STABLE UBool U_EXPORT2 02360 u_isWhitespace(UChar32 c); 02361 02383 U_STABLE UBool U_EXPORT2 02384 u_iscntrl(UChar32 c); 02385 02398 U_STABLE UBool U_EXPORT2 02399 u_isISOControl(UChar32 c); 02400 02416 U_STABLE UBool U_EXPORT2 02417 u_isprint(UChar32 c); 02418 02437 U_STABLE UBool U_EXPORT2 02438 u_isbase(UChar32 c); 02439 02456 U_STABLE UCharDirection U_EXPORT2 02457 u_charDirection(UChar32 c); 02458 02474 U_STABLE UBool U_EXPORT2 02475 u_isMirrored(UChar32 c); 02476 02496 U_STABLE UChar32 U_EXPORT2 02497 u_charMirror(UChar32 c); 02498 02510 U_STABLE int8_t U_EXPORT2 02511 u_charType(UChar32 c); 02512 02526 #define U_GET_GC_MASK(c) U_MASK(u_charType(c)) 02527 02545 typedef UBool U_CALLCONV 02546 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); 02547 02567 U_STABLE void U_EXPORT2 02568 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); 02569 02570 #if !UCONFIG_NO_NORMALIZATION 02571 02579 U_STABLE uint8_t U_EXPORT2 02580 u_getCombiningClass(UChar32 c); 02581 02582 #endif 02583 02607 U_STABLE int32_t U_EXPORT2 02608 u_charDigitValue(UChar32 c); 02609 02619 U_STABLE UBlockCode U_EXPORT2 02620 ublock_getCode(UChar32 c); 02621 02654 U_STABLE int32_t U_EXPORT2 02655 u_charName(UChar32 code, UCharNameChoice nameChoice, 02656 char *buffer, int32_t bufferLength, 02657 UErrorCode *pErrorCode); 02658 02677 U_STABLE int32_t U_EXPORT2 02678 u_getISOComment(UChar32 c, 02679 char *dest, int32_t destCapacity, 02680 UErrorCode *pErrorCode); 02681 02702 U_STABLE UChar32 U_EXPORT2 02703 u_charFromName(UCharNameChoice nameChoice, 02704 const char *name, 02705 UErrorCode *pErrorCode); 02706 02724 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, 02725 UChar32 code, 02726 UCharNameChoice nameChoice, 02727 const char *name, 02728 int32_t length); 02729 02751 U_STABLE void U_EXPORT2 02752 u_enumCharNames(UChar32 start, UChar32 limit, 02753 UEnumCharNamesFn *fn, 02754 void *context, 02755 UCharNameChoice nameChoice, 02756 UErrorCode *pErrorCode); 02757 02789 U_STABLE const char* U_EXPORT2 02790 u_getPropertyName(UProperty property, 02791 UPropertyNameChoice nameChoice); 02792 02812 U_STABLE UProperty U_EXPORT2 02813 u_getPropertyEnum(const char* alias); 02814 02862 U_STABLE const char* U_EXPORT2 02863 u_getPropertyValueName(UProperty property, 02864 int32_t value, 02865 UPropertyNameChoice nameChoice); 02866 02898 U_STABLE int32_t U_EXPORT2 02899 u_getPropertyValueEnum(UProperty property, 02900 const char* alias); 02901 02919 U_STABLE UBool U_EXPORT2 02920 u_isIDStart(UChar32 c); 02921 02943 U_STABLE UBool U_EXPORT2 02944 u_isIDPart(UChar32 c); 02945 02966 U_STABLE UBool U_EXPORT2 02967 u_isIDIgnorable(UChar32 c); 02968 02985 U_STABLE UBool U_EXPORT2 02986 u_isJavaIDStart(UChar32 c); 02987 03006 U_STABLE UBool U_EXPORT2 03007 u_isJavaIDPart(UChar32 c); 03008 03031 U_STABLE UChar32 U_EXPORT2 03032 u_tolower(UChar32 c); 03033 03056 U_STABLE UChar32 U_EXPORT2 03057 u_toupper(UChar32 c); 03058 03081 U_STABLE UChar32 U_EXPORT2 03082 u_totitle(UChar32 c); 03083 03085 #define U_FOLD_CASE_DEFAULT 0 03086 03103 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 03104 03127 U_STABLE UChar32 U_EXPORT2 03128 u_foldCase(UChar32 c, uint32_t options); 03129 03168 U_STABLE int32_t U_EXPORT2 03169 u_digit(UChar32 ch, int8_t radix); 03170 03199 U_STABLE UChar32 U_EXPORT2 03200 u_forDigit(int32_t digit, int8_t radix); 03201 03216 U_STABLE void U_EXPORT2 03217 u_charAge(UChar32 c, UVersionInfo versionArray); 03218 03230 U_STABLE void U_EXPORT2 03231 u_getUnicodeVersion(UVersionInfo versionArray); 03232 03233 #if !UCONFIG_NO_NORMALIZATION 03234 03255 U_STABLE int32_t U_EXPORT2 03256 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); 03257 03258 #endif 03259 03260 03261 U_CDECL_END 03262 03263 #endif /*_UCHAR*/ 03264 /*eof*/