ICU 49.1.1
49.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2002-2011, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2002sep21 00014 * created by: Markus W. Scherer 00015 */ 00016 00143 #ifndef __UTF_OLD_H__ 00144 #define __UTF_OLD_H__ 00145 00146 #ifndef U_HIDE_DEPRECATED_API 00147 00148 #include "unicode/utf.h" 00149 #include "unicode/utf8.h" 00150 #include "unicode/utf16.h" 00151 00152 /* Formerly utf.h, part 1 --------------------------------------------------- */ 00153 00154 #ifdef U_USE_UTF_DEPRECATES 00155 00162 typedef int32_t UTextOffset; 00163 #endif 00164 00166 #define UTF_SIZE 16 00167 00174 #define UTF_SAFE 00175 00176 #undef UTF_UNSAFE 00177 00178 #undef UTF_STRICT 00179 00194 #define UTF8_ERROR_VALUE_1 0x15 00195 00201 #define UTF8_ERROR_VALUE_2 0x9f 00202 00209 #define UTF_ERROR_VALUE 0xffff 00210 00217 #define UTF_IS_ERROR(c) \ 00218 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 00219 00225 #define UTF_IS_VALID(c) \ 00226 (UTF_IS_UNICODE_CHAR(c) && \ 00227 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 00228 00233 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 00234 00240 #define UTF_IS_UNICODE_NONCHAR(c) \ 00241 ((c)>=0xfdd0 && \ 00242 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 00243 (uint32_t)(c)<=0x10ffff) 00244 00260 #define UTF_IS_UNICODE_CHAR(c) \ 00261 ((uint32_t)(c)<0xd800 || \ 00262 ((uint32_t)(c)>0xdfff && \ 00263 (uint32_t)(c)<=0x10ffff && \ 00264 !UTF_IS_UNICODE_NONCHAR(c))) 00265 00266 /* Formerly utf8.h ---------------------------------------------------------- */ 00267 00272 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 00273 00278 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 00279 00281 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 00282 00283 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 00284 00285 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 00286 00288 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 00289 00303 #if 1 00304 # define UTF8_CHAR_LENGTH(c) \ 00305 ((uint32_t)(c)<=0x7f ? 1 : \ 00306 ((uint32_t)(c)<=0x7ff ? 2 : \ 00307 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 00308 ) \ 00309 ) 00310 #else 00311 # define UTF8_CHAR_LENGTH(c) \ 00312 ((uint32_t)(c)<=0x7f ? 1 : \ 00313 ((uint32_t)(c)<=0x7ff ? 2 : \ 00314 ((uint32_t)(c)<=0xffff ? 3 : \ 00315 ((uint32_t)(c)<=0x10ffff ? 4 : \ 00316 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 00317 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 00318 ) \ 00319 ) \ 00320 ) \ 00321 ) \ 00322 ) 00323 #endif 00324 00326 #define UTF8_MAX_CHAR_LENGTH 4 00327 00329 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 00330 00332 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 00333 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 00334 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 00335 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 00336 } 00337 00339 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00340 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 00341 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 00342 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 00343 } 00344 00346 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 00347 (c)=(s)[(i)++]; \ 00348 if((uint8_t)((c)-0xc0)<0x35) { \ 00349 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 00350 UTF8_MASK_LEAD_BYTE(c, __count); \ 00351 switch(__count) { \ 00352 /* each following branch falls through to the next one */ \ 00353 case 3: \ 00354 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00355 case 2: \ 00356 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00357 case 1: \ 00358 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00359 /* no other branches to optimize switch() */ \ 00360 break; \ 00361 } \ 00362 } \ 00363 } 00364 00366 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 00367 if((uint32_t)(c)<=0x7f) { \ 00368 (s)[(i)++]=(uint8_t)(c); \ 00369 } else { \ 00370 if((uint32_t)(c)<=0x7ff) { \ 00371 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 00372 } else { \ 00373 if((uint32_t)(c)<=0xffff) { \ 00374 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 00375 } else { \ 00376 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 00377 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 00378 } \ 00379 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 00380 } \ 00381 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 00382 } \ 00383 } 00384 00386 #define UTF8_FWD_1_UNSAFE(s, i) { \ 00387 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 00388 } 00389 00391 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 00392 int32_t __N=(n); \ 00393 while(__N>0) { \ 00394 UTF8_FWD_1_UNSAFE(s, i); \ 00395 --__N; \ 00396 } \ 00397 } 00398 00400 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 00401 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 00402 } 00403 00405 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00406 (c)=(s)[(i)++]; \ 00407 if((c)>=0x80) { \ 00408 if(UTF8_IS_LEAD(c)) { \ 00409 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 00410 } else { \ 00411 (c)=UTF8_ERROR_VALUE_1; \ 00412 } \ 00413 } \ 00414 } 00415 00417 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 00418 if((uint32_t)(c)<=0x7f) { \ 00419 (s)[(i)++]=(uint8_t)(c); \ 00420 } else { \ 00421 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 00422 } \ 00423 } 00424 00426 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 00427 00429 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 00430 00432 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 00433 00435 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 00436 (c)=(s)[--(i)]; \ 00437 if(UTF8_IS_TRAIL(c)) { \ 00438 uint8_t __b, __count=1, __shift=6; \ 00439 \ 00440 /* c is a trail byte */ \ 00441 (c)&=0x3f; \ 00442 for(;;) { \ 00443 __b=(s)[--(i)]; \ 00444 if(__b>=0xc0) { \ 00445 UTF8_MASK_LEAD_BYTE(__b, __count); \ 00446 (c)|=(UChar32)__b<<__shift; \ 00447 break; \ 00448 } else { \ 00449 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 00450 ++__count; \ 00451 __shift+=6; \ 00452 } \ 00453 } \ 00454 } \ 00455 } 00456 00458 #define UTF8_BACK_1_UNSAFE(s, i) { \ 00459 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 00460 } 00461 00463 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 00464 int32_t __N=(n); \ 00465 while(__N>0) { \ 00466 UTF8_BACK_1_UNSAFE(s, i); \ 00467 --__N; \ 00468 } \ 00469 } 00470 00472 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00473 UTF8_BACK_1_UNSAFE(s, i); \ 00474 UTF8_FWD_1_UNSAFE(s, i); \ 00475 } 00476 00478 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00479 (c)=(s)[--(i)]; \ 00480 if((c)>=0x80) { \ 00481 if((c)<=0xbf) { \ 00482 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 00483 } else { \ 00484 (c)=UTF8_ERROR_VALUE_1; \ 00485 } \ 00486 } \ 00487 } 00488 00490 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 00491 00493 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 00494 00496 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 00497 00498 /* Formerly utf16.h --------------------------------------------------------- */ 00499 00501 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 00502 00504 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 00505 00507 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 00508 00510 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 00511 00513 #define UTF16_GET_PAIR_VALUE(first, second) \ 00514 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 00515 00517 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 00518 00520 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 00521 00523 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 00524 00526 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 00527 00529 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 00530 00532 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 00533 00535 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 00536 00538 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 00539 00541 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 00542 00544 #define UTF16_MAX_CHAR_LENGTH 2 00545 00547 #define UTF16_ARRAY_SIZE(size) (size) 00548 00560 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 00561 (c)=(s)[i]; \ 00562 if(UTF_IS_SURROGATE(c)) { \ 00563 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00564 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 00565 } else { \ 00566 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 00567 } \ 00568 } \ 00569 } 00570 00572 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00573 (c)=(s)[i]; \ 00574 if(UTF_IS_SURROGATE(c)) { \ 00575 uint16_t __c2; \ 00576 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00577 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 00578 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00579 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00580 } else if(strict) {\ 00581 /* unmatched first surrogate */ \ 00582 (c)=UTF_ERROR_VALUE; \ 00583 } \ 00584 } else { \ 00585 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00586 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00587 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00588 } else if(strict) {\ 00589 /* unmatched second surrogate */ \ 00590 (c)=UTF_ERROR_VALUE; \ 00591 } \ 00592 } \ 00593 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00594 (c)=UTF_ERROR_VALUE; \ 00595 } \ 00596 } 00597 00599 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 00600 (c)=(s)[(i)++]; \ 00601 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00602 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 00603 } \ 00604 } 00605 00607 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 00608 if((uint32_t)(c)<=0xffff) { \ 00609 (s)[(i)++]=(uint16_t)(c); \ 00610 } else { \ 00611 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00612 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00613 } \ 00614 } 00615 00617 #define UTF16_FWD_1_UNSAFE(s, i) { \ 00618 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 00619 ++(i); \ 00620 } \ 00621 } 00622 00624 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 00625 int32_t __N=(n); \ 00626 while(__N>0) { \ 00627 UTF16_FWD_1_UNSAFE(s, i); \ 00628 --__N; \ 00629 } \ 00630 } 00631 00633 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 00634 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 00635 --(i); \ 00636 } \ 00637 } 00638 00640 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00641 (c)=(s)[(i)++]; \ 00642 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00643 uint16_t __c2; \ 00644 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 00645 ++(i); \ 00646 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00647 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00648 } else if(strict) {\ 00649 /* unmatched first surrogate */ \ 00650 (c)=UTF_ERROR_VALUE; \ 00651 } \ 00652 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00653 /* unmatched second surrogate or other non-character */ \ 00654 (c)=UTF_ERROR_VALUE; \ 00655 } \ 00656 } 00657 00659 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 00660 if((uint32_t)(c)<=0xffff) { \ 00661 (s)[(i)++]=(uint16_t)(c); \ 00662 } else if((uint32_t)(c)<=0x10ffff) { \ 00663 if((i)+1<(length)) { \ 00664 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00665 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00666 } else /* not enough space */ { \ 00667 (s)[(i)++]=UTF_ERROR_VALUE; \ 00668 } \ 00669 } else /* c>0x10ffff, write error value */ { \ 00670 (s)[(i)++]=UTF_ERROR_VALUE; \ 00671 } \ 00672 } 00673 00675 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 00676 00678 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 00679 00681 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 00682 00684 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 00685 (c)=(s)[--(i)]; \ 00686 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00687 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 00688 } \ 00689 } 00690 00692 #define UTF16_BACK_1_UNSAFE(s, i) { \ 00693 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 00694 --(i); \ 00695 } \ 00696 } 00697 00699 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 00700 int32_t __N=(n); \ 00701 while(__N>0) { \ 00702 UTF16_BACK_1_UNSAFE(s, i); \ 00703 --__N; \ 00704 } \ 00705 } 00706 00708 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00709 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 00710 ++(i); \ 00711 } \ 00712 } 00713 00715 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00716 (c)=(s)[--(i)]; \ 00717 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00718 uint16_t __c2; \ 00719 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00720 --(i); \ 00721 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00722 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00723 } else if(strict) {\ 00724 /* unmatched second surrogate */ \ 00725 (c)=UTF_ERROR_VALUE; \ 00726 } \ 00727 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00728 /* unmatched first surrogate or other non-character */ \ 00729 (c)=UTF_ERROR_VALUE; \ 00730 } \ 00731 } 00732 00734 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 00735 00737 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 00738 00740 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 00741 00742 /* Formerly utf32.h --------------------------------------------------------- */ 00743 00744 /* 00745 * Old documentation: 00746 * 00747 * This file defines macros to deal with UTF-32 code units and code points. 00748 * Signatures and semantics are the same as for the similarly named macros 00749 * in utf16.h. 00750 * utf32.h is included by utf.h after unicode/umachine.h</p> 00751 * and some common definitions. 00752 * <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. 00753 * Compound statements (curly braces {}) must be used for if-else-while... 00754 * bodies and all macro statements should be terminated with semicolon.</p> 00755 */ 00756 00757 /* internal definitions ----------------------------------------------------- */ 00758 00760 #define UTF32_IS_SAFE(c, strict) \ 00761 (!(strict) ? \ 00762 (uint32_t)(c)<=0x10ffff : \ 00763 UTF_IS_UNICODE_CHAR(c)) 00764 00765 /* 00766 * For the semantics of all of these macros, see utf16.h. 00767 * The UTF-32 versions are trivial because any code point is 00768 * encoded using exactly one code unit. 00769 */ 00770 00771 /* single-code point definitions -------------------------------------------- */ 00772 00773 /* classes of code unit values */ 00774 00776 #define UTF32_IS_SINGLE(uchar) 1 00777 00778 #define UTF32_IS_LEAD(uchar) 0 00779 00780 #define UTF32_IS_TRAIL(uchar) 0 00781 00782 /* number of code units per code point */ 00783 00785 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 00786 00787 #define UTF32_CHAR_LENGTH(c) 1 00788 00789 #define UTF32_MAX_CHAR_LENGTH 1 00790 00791 /* average number of code units compared to UTF-16 */ 00792 00794 #define UTF32_ARRAY_SIZE(size) (size) 00795 00797 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 00798 (c)=(s)[i]; \ 00799 } 00800 00802 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00803 (c)=(s)[i]; \ 00804 if(!UTF32_IS_SAFE(c, strict)) { \ 00805 (c)=UTF_ERROR_VALUE; \ 00806 } \ 00807 } 00808 00809 /* definitions with forward iteration --------------------------------------- */ 00810 00812 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 00813 (c)=(s)[(i)++]; \ 00814 } 00815 00817 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 00818 (s)[(i)++]=(c); \ 00819 } 00820 00822 #define UTF32_FWD_1_UNSAFE(s, i) { \ 00823 ++(i); \ 00824 } 00825 00827 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 00828 (i)+=(n); \ 00829 } 00830 00832 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 00833 } 00834 00836 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00837 (c)=(s)[(i)++]; \ 00838 if(!UTF32_IS_SAFE(c, strict)) { \ 00839 (c)=UTF_ERROR_VALUE; \ 00840 } \ 00841 } 00842 00844 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 00845 if((uint32_t)(c)<=0x10ffff) { \ 00846 (s)[(i)++]=(c); \ 00847 } else /* c>0x10ffff, write 0xfffd */ { \ 00848 (s)[(i)++]=0xfffd; \ 00849 } \ 00850 } 00851 00853 #define UTF32_FWD_1_SAFE(s, i, length) { \ 00854 ++(i); \ 00855 } 00856 00858 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 00859 if(((i)+=(n))>(length)) { \ 00860 (i)=(length); \ 00861 } \ 00862 } 00863 00865 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 00866 } 00867 00868 /* definitions with backward iteration -------------------------------------- */ 00869 00871 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 00872 (c)=(s)[--(i)]; \ 00873 } 00874 00876 #define UTF32_BACK_1_UNSAFE(s, i) { \ 00877 --(i); \ 00878 } 00879 00881 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 00882 (i)-=(n); \ 00883 } 00884 00886 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00887 } 00888 00890 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00891 (c)=(s)[--(i)]; \ 00892 if(!UTF32_IS_SAFE(c, strict)) { \ 00893 (c)=UTF_ERROR_VALUE; \ 00894 } \ 00895 } 00896 00898 #define UTF32_BACK_1_SAFE(s, start, i) { \ 00899 --(i); \ 00900 } 00901 00903 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 00904 (i)-=(n); \ 00905 if((i)<(start)) { \ 00906 (i)=(start); \ 00907 } \ 00908 } 00909 00911 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 00912 } 00913 00914 /* Formerly utf.h, part 2 --------------------------------------------------- */ 00915 00921 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 00922 00924 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 00925 00927 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 00928 00929 00931 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 00932 00934 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 00935 00936 00938 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 00939 00941 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 00942 00943 00945 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 00946 00948 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 00949 00950 00952 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 00953 00955 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 00956 00957 00959 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 00960 00962 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 00963 00964 00966 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 00967 00969 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 00970 00971 00973 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 00974 00976 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 00977 00978 00980 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 00981 00983 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 00984 00985 00987 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 00988 00990 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00991 00992 /* Define default macros (UTF-16 "safe") ------------------------------------ */ 00993 00999 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 01000 01006 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 01007 01013 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 01014 01020 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 01021 01027 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 01028 01034 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 01035 01045 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 01046 01058 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 01059 01071 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 01072 01082 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 01083 01093 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 01094 01109 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 01110 01122 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 01123 01135 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 01136 01148 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 01149 01164 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 01165 01166 #endif /* U_HIDE_DEPRECATED_API */ 01167 01168 #endif 01169