ICU 49.1.1  49.1.1
utf_old.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002-2011, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00143 #ifndef __UTF_OLD_H__
00144 #define __UTF_OLD_H__
00145 
00146 #ifndef U_HIDE_DEPRECATED_API
00147 
00148 #include "unicode/utf.h"
00149 #include "unicode/utf8.h"
00150 #include "unicode/utf16.h"
00151 
00152 /* Formerly utf.h, part 1 --------------------------------------------------- */
00153 
00154 #ifdef U_USE_UTF_DEPRECATES
00155 
00162 typedef int32_t UTextOffset;
00163 #endif
00164 
00166 #define UTF_SIZE 16
00167 
00174 #define UTF_SAFE
00175 
00176 #undef UTF_UNSAFE
00177 
00178 #undef UTF_STRICT
00179 
00194 #define UTF8_ERROR_VALUE_1 0x15
00195 
00201 #define UTF8_ERROR_VALUE_2 0x9f
00202 
00209 #define UTF_ERROR_VALUE 0xffff
00210 
00217 #define UTF_IS_ERROR(c) \
00218     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00219 
00225 #define UTF_IS_VALID(c) \
00226     (UTF_IS_UNICODE_CHAR(c) && \
00227      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00228 
00233 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00234 
00240 #define UTF_IS_UNICODE_NONCHAR(c) \
00241     ((c)>=0xfdd0 && \
00242      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00243      (uint32_t)(c)<=0x10ffff)
00244 
00260 #define UTF_IS_UNICODE_CHAR(c) \
00261     ((uint32_t)(c)<0xd800 || \
00262         ((uint32_t)(c)>0xdfff && \
00263          (uint32_t)(c)<=0x10ffff && \
00264          !UTF_IS_UNICODE_NONCHAR(c)))
00265 
00266 /* Formerly utf8.h ---------------------------------------------------------- */
00267 
00272 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00273 
00278 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00279 
00281 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00282 
00283 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00284 
00285 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00286 
00288 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00289 
00303 #if 1
00304 #   define UTF8_CHAR_LENGTH(c) \
00305         ((uint32_t)(c)<=0x7f ? 1 : \
00306             ((uint32_t)(c)<=0x7ff ? 2 : \
00307                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00308             ) \
00309         )
00310 #else
00311 #   define UTF8_CHAR_LENGTH(c) \
00312         ((uint32_t)(c)<=0x7f ? 1 : \
00313             ((uint32_t)(c)<=0x7ff ? 2 : \
00314                 ((uint32_t)(c)<=0xffff ? 3 : \
00315                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00316                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00317                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00318                         ) \
00319                     ) \
00320                 ) \
00321             ) \
00322         )
00323 #endif
00324 
00326 #define UTF8_MAX_CHAR_LENGTH 4
00327 
00329 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00330 
00332 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00333     int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
00334     UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
00335     UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
00336 }
00337 
00339 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00340     int32_t _utf8_get_char_safe_index=(int32_t)(i); \
00341     UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
00342     UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
00343 }
00344 
00346 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00347     (c)=(s)[(i)++]; \
00348     if((uint8_t)((c)-0xc0)<0x35) { \
00349         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00350         UTF8_MASK_LEAD_BYTE(c, __count); \
00351         switch(__count) { \
00352         /* each following branch falls through to the next one */ \
00353         case 3: \
00354             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00355         case 2: \
00356             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00357         case 1: \
00358             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00359         /* no other branches to optimize switch() */ \
00360             break; \
00361         } \
00362     } \
00363 }
00364 
00366 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00367     if((uint32_t)(c)<=0x7f) { \
00368         (s)[(i)++]=(uint8_t)(c); \
00369     } else { \
00370         if((uint32_t)(c)<=0x7ff) { \
00371             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00372         } else { \
00373             if((uint32_t)(c)<=0xffff) { \
00374                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00375             } else { \
00376                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00377                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00378             } \
00379             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00380         } \
00381         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00382     } \
00383 }
00384 
00386 #define UTF8_FWD_1_UNSAFE(s, i) { \
00387     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00388 }
00389 
00391 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00392     int32_t __N=(n); \
00393     while(__N>0) { \
00394         UTF8_FWD_1_UNSAFE(s, i); \
00395         --__N; \
00396     } \
00397 }
00398 
00400 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00401     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00402 }
00403 
00405 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00406     (c)=(s)[(i)++]; \
00407     if((c)>=0x80) { \
00408         if(UTF8_IS_LEAD(c)) { \
00409             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00410         } else { \
00411             (c)=UTF8_ERROR_VALUE_1; \
00412         } \
00413     } \
00414 }
00415 
00417 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00418     if((uint32_t)(c)<=0x7f) { \
00419         (s)[(i)++]=(uint8_t)(c); \
00420     } else { \
00421         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00422     } \
00423 }
00424 
00426 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00427 
00429 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00430 
00432 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00433 
00435 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00436     (c)=(s)[--(i)]; \
00437     if(UTF8_IS_TRAIL(c)) { \
00438         uint8_t __b, __count=1, __shift=6; \
00439 \
00440         /* c is a trail byte */ \
00441         (c)&=0x3f; \
00442         for(;;) { \
00443             __b=(s)[--(i)]; \
00444             if(__b>=0xc0) { \
00445                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00446                 (c)|=(UChar32)__b<<__shift; \
00447                 break; \
00448             } else { \
00449                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00450                 ++__count; \
00451                 __shift+=6; \
00452             } \
00453         } \
00454     } \
00455 }
00456 
00458 #define UTF8_BACK_1_UNSAFE(s, i) { \
00459     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00460 }
00461 
00463 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00464     int32_t __N=(n); \
00465     while(__N>0) { \
00466         UTF8_BACK_1_UNSAFE(s, i); \
00467         --__N; \
00468     } \
00469 }
00470 
00472 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00473     UTF8_BACK_1_UNSAFE(s, i); \
00474     UTF8_FWD_1_UNSAFE(s, i); \
00475 }
00476 
00478 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00479     (c)=(s)[--(i)]; \
00480     if((c)>=0x80) { \
00481         if((c)<=0xbf) { \
00482             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00483         } else { \
00484             (c)=UTF8_ERROR_VALUE_1; \
00485         } \
00486     } \
00487 }
00488 
00490 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00491 
00493 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00494 
00496 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00497 
00498 /* Formerly utf16.h --------------------------------------------------------- */
00499 
00501 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00502 
00504 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00505 
00507 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00508 
00510 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00511 
00513 #define UTF16_GET_PAIR_VALUE(first, second) \
00514     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00515 
00517 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00518 
00520 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00521 
00523 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00524 
00526 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00527 
00529 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00530 
00532 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00533 
00535 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00536 
00538 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00539 
00541 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00542 
00544 #define UTF16_MAX_CHAR_LENGTH 2
00545 
00547 #define UTF16_ARRAY_SIZE(size) (size)
00548 
00560 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00561     (c)=(s)[i]; \
00562     if(UTF_IS_SURROGATE(c)) { \
00563         if(UTF_IS_SURROGATE_FIRST(c)) { \
00564             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00565         } else { \
00566             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00567         } \
00568     } \
00569 }
00570 
00572 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00573     (c)=(s)[i]; \
00574     if(UTF_IS_SURROGATE(c)) { \
00575         uint16_t __c2; \
00576         if(UTF_IS_SURROGATE_FIRST(c)) { \
00577             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00578                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00579                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00580             } else if(strict) {\
00581                 /* unmatched first surrogate */ \
00582                 (c)=UTF_ERROR_VALUE; \
00583             } \
00584         } else { \
00585             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00586                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00587                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00588             } else if(strict) {\
00589                 /* unmatched second surrogate */ \
00590                 (c)=UTF_ERROR_VALUE; \
00591             } \
00592         } \
00593     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00594         (c)=UTF_ERROR_VALUE; \
00595     } \
00596 }
00597 
00599 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00600     (c)=(s)[(i)++]; \
00601     if(UTF_IS_FIRST_SURROGATE(c)) { \
00602         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00603     } \
00604 }
00605 
00607 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00608     if((uint32_t)(c)<=0xffff) { \
00609         (s)[(i)++]=(uint16_t)(c); \
00610     } else { \
00611         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00612         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00613     } \
00614 }
00615 
00617 #define UTF16_FWD_1_UNSAFE(s, i) { \
00618     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00619         ++(i); \
00620     } \
00621 }
00622 
00624 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00625     int32_t __N=(n); \
00626     while(__N>0) { \
00627         UTF16_FWD_1_UNSAFE(s, i); \
00628         --__N; \
00629     } \
00630 }
00631 
00633 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00634     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00635         --(i); \
00636     } \
00637 }
00638 
00640 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00641     (c)=(s)[(i)++]; \
00642     if(UTF_IS_FIRST_SURROGATE(c)) { \
00643         uint16_t __c2; \
00644         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00645             ++(i); \
00646             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00647             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00648         } else if(strict) {\
00649             /* unmatched first surrogate */ \
00650             (c)=UTF_ERROR_VALUE; \
00651         } \
00652     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00653         /* unmatched second surrogate or other non-character */ \
00654         (c)=UTF_ERROR_VALUE; \
00655     } \
00656 }
00657 
00659 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00660     if((uint32_t)(c)<=0xffff) { \
00661         (s)[(i)++]=(uint16_t)(c); \
00662     } else if((uint32_t)(c)<=0x10ffff) { \
00663         if((i)+1<(length)) { \
00664             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00665             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00666         } else /* not enough space */ { \
00667             (s)[(i)++]=UTF_ERROR_VALUE; \
00668         } \
00669     } else /* c>0x10ffff, write error value */ { \
00670         (s)[(i)++]=UTF_ERROR_VALUE; \
00671     } \
00672 }
00673 
00675 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00676 
00678 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00679 
00681 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00682 
00684 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00685     (c)=(s)[--(i)]; \
00686     if(UTF_IS_SECOND_SURROGATE(c)) { \
00687         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00688     } \
00689 }
00690 
00692 #define UTF16_BACK_1_UNSAFE(s, i) { \
00693     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00694         --(i); \
00695     } \
00696 }
00697 
00699 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00700     int32_t __N=(n); \
00701     while(__N>0) { \
00702         UTF16_BACK_1_UNSAFE(s, i); \
00703         --__N; \
00704     } \
00705 }
00706 
00708 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00709     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00710         ++(i); \
00711     } \
00712 }
00713 
00715 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00716     (c)=(s)[--(i)]; \
00717     if(UTF_IS_SECOND_SURROGATE(c)) { \
00718         uint16_t __c2; \
00719         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00720             --(i); \
00721             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00722             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00723         } else if(strict) {\
00724             /* unmatched second surrogate */ \
00725             (c)=UTF_ERROR_VALUE; \
00726         } \
00727     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00728         /* unmatched first surrogate or other non-character */ \
00729         (c)=UTF_ERROR_VALUE; \
00730     } \
00731 }
00732 
00734 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00735 
00737 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00738 
00740 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00741 
00742 /* Formerly utf32.h --------------------------------------------------------- */
00743 
00744 /*
00745 * Old documentation:
00746 *
00747 *   This file defines macros to deal with UTF-32 code units and code points.
00748 *   Signatures and semantics are the same as for the similarly named macros
00749 *   in utf16.h.
00750 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00751 *   and some common definitions.
00752 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00753 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00754 *                  bodies and all macro statements should be terminated with semicolon.</p>
00755 */
00756 
00757 /* internal definitions ----------------------------------------------------- */
00758 
00760 #define UTF32_IS_SAFE(c, strict) \
00761     (!(strict) ? \
00762         (uint32_t)(c)<=0x10ffff : \
00763         UTF_IS_UNICODE_CHAR(c))
00764 
00765 /*
00766  * For the semantics of all of these macros, see utf16.h.
00767  * The UTF-32 versions are trivial because any code point is
00768  * encoded using exactly one code unit.
00769  */
00770 
00771 /* single-code point definitions -------------------------------------------- */
00772 
00773 /* classes of code unit values */
00774 
00776 #define UTF32_IS_SINGLE(uchar) 1
00777 
00778 #define UTF32_IS_LEAD(uchar) 0
00779 
00780 #define UTF32_IS_TRAIL(uchar) 0
00781 
00782 /* number of code units per code point */
00783 
00785 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00786 
00787 #define UTF32_CHAR_LENGTH(c) 1
00788 
00789 #define UTF32_MAX_CHAR_LENGTH 1
00790 
00791 /* average number of code units compared to UTF-16 */
00792 
00794 #define UTF32_ARRAY_SIZE(size) (size)
00795 
00797 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00798     (c)=(s)[i]; \
00799 }
00800 
00802 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00803     (c)=(s)[i]; \
00804     if(!UTF32_IS_SAFE(c, strict)) { \
00805         (c)=UTF_ERROR_VALUE; \
00806     } \
00807 }
00808 
00809 /* definitions with forward iteration --------------------------------------- */
00810 
00812 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00813     (c)=(s)[(i)++]; \
00814 }
00815 
00817 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00818     (s)[(i)++]=(c); \
00819 }
00820 
00822 #define UTF32_FWD_1_UNSAFE(s, i) { \
00823     ++(i); \
00824 }
00825 
00827 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00828     (i)+=(n); \
00829 }
00830 
00832 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00833 }
00834 
00836 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00837     (c)=(s)[(i)++]; \
00838     if(!UTF32_IS_SAFE(c, strict)) { \
00839         (c)=UTF_ERROR_VALUE; \
00840     } \
00841 }
00842 
00844 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00845     if((uint32_t)(c)<=0x10ffff) { \
00846         (s)[(i)++]=(c); \
00847     } else /* c>0x10ffff, write 0xfffd */ { \
00848         (s)[(i)++]=0xfffd; \
00849     } \
00850 }
00851 
00853 #define UTF32_FWD_1_SAFE(s, i, length) { \
00854     ++(i); \
00855 }
00856 
00858 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00859     if(((i)+=(n))>(length)) { \
00860         (i)=(length); \
00861     } \
00862 }
00863 
00865 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00866 }
00867 
00868 /* definitions with backward iteration -------------------------------------- */
00869 
00871 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00872     (c)=(s)[--(i)]; \
00873 }
00874 
00876 #define UTF32_BACK_1_UNSAFE(s, i) { \
00877     --(i); \
00878 }
00879 
00881 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00882     (i)-=(n); \
00883 }
00884 
00886 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00887 }
00888 
00890 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00891     (c)=(s)[--(i)]; \
00892     if(!UTF32_IS_SAFE(c, strict)) { \
00893         (c)=UTF_ERROR_VALUE; \
00894     } \
00895 }
00896 
00898 #define UTF32_BACK_1_SAFE(s, start, i) { \
00899     --(i); \
00900 }
00901 
00903 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00904     (i)-=(n); \
00905     if((i)<(start)) { \
00906         (i)=(start); \
00907     } \
00908 }
00909 
00911 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00912 }
00913 
00914 /* Formerly utf.h, part 2 --------------------------------------------------- */
00915 
00921 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00922 
00924 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00925 
00927 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00928 
00929 
00931 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00932 
00934 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00935 
00936 
00938 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00939 
00941 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00942 
00943 
00945 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00946 
00948 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00949 
00950 
00952 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00953 
00955 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00956 
00957 
00959 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00960 
00962 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00963 
00964 
00966 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00967 
00969 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00970 
00971 
00973 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00974 
00976 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00977 
00978 
00980 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00981 
00983 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00984 
00985 
00987 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00988 
00990 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00991 
00992 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00993 
00999 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
01000 
01006 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
01007 
01013 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01014 
01020 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01021 
01027 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01028 
01034 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01035 
01045 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01046 
01058 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01059 
01071 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01072 
01082 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01083 
01093 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01094 
01109 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01110 
01122 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01123 
01135 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01136 
01148 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01149 
01164 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01165 
01166 #endif /* U_HIDE_DEPRECATED_API */
01167 
01168 #endif
01169