ICU 49.1.1
49.1.1
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2011, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 #include "unicode/ucasemap.h" 00035 00036 struct UConverter; // unicode/ucnv.h 00037 class StringThreadTest; 00038 00039 #ifndef U_COMPARE_CODE_POINT_ORDER 00040 /* see also ustring.h and unorm.h */ 00046 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00047 #endif 00048 00049 #ifndef USTRING_H 00050 00053 U_STABLE int32_t U_EXPORT2 00054 u_strlen(const UChar *s); 00055 #endif 00056 00057 #ifndef U_STRING_CASE_MAPPER_DEFINED 00058 #define U_STRING_CASE_MAPPER_DEFINED 00059 00064 typedef int32_t U_CALLCONV 00065 UStringCaseMapper(const UCaseMap *csm, 00066 UChar *dest, int32_t destCapacity, 00067 const UChar *src, int32_t srcLength, 00068 UErrorCode *pErrorCode); 00069 00070 #endif 00071 00072 U_NAMESPACE_BEGIN 00073 00074 class BreakIterator; // unicode/brkiter.h 00075 class Locale; // unicode/locid.h 00076 class StringCharacterIterator; 00077 class UnicodeStringAppendable; // unicode/appendable.h 00078 00079 /* The <iostream> include has been moved to unicode/ustream.h */ 00080 00091 #define US_INV icu::UnicodeString::kInvariant 00092 00110 #if defined(U_DECLARE_UTF16) 00111 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00112 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00113 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00114 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 00116 #else 00117 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 00118 #endif 00119 00133 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00134 00142 #ifndef UNISTR_FROM_CHAR_EXPLICIT 00143 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 00144 // Auto-"explicit" in ICU library code. 00145 # define UNISTR_FROM_CHAR_EXPLICIT explicit 00146 # else 00147 // Empty by default for source code compatibility. 00148 # define UNISTR_FROM_CHAR_EXPLICIT 00149 # endif 00150 #endif 00151 00162 #ifndef UNISTR_FROM_STRING_EXPLICIT 00163 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 00164 // Auto-"explicit" in ICU library code. 00165 # define UNISTR_FROM_STRING_EXPLICIT explicit 00166 # else 00167 // Empty by default for source code compatibility. 00168 # define UNISTR_FROM_STRING_EXPLICIT 00169 # endif 00170 #endif 00171 00241 class U_COMMON_API UnicodeString : public Replaceable 00242 { 00243 public: 00244 00253 enum EInvariant { 00258 kInvariant 00259 }; 00260 00261 //======================================== 00262 // Read-only operations 00263 //======================================== 00264 00265 /* Comparison - bitwise only - for international comparison use collation */ 00266 00274 inline UBool operator== (const UnicodeString& text) const; 00275 00283 inline UBool operator!= (const UnicodeString& text) const; 00284 00292 inline UBool operator> (const UnicodeString& text) const; 00293 00301 inline UBool operator< (const UnicodeString& text) const; 00302 00310 inline UBool operator>= (const UnicodeString& text) const; 00311 00319 inline UBool operator<= (const UnicodeString& text) const; 00320 00332 inline int8_t compare(const UnicodeString& text) const; 00333 00348 inline int8_t compare(int32_t start, 00349 int32_t length, 00350 const UnicodeString& text) const; 00351 00369 inline int8_t compare(int32_t start, 00370 int32_t length, 00371 const UnicodeString& srcText, 00372 int32_t srcStart, 00373 int32_t srcLength) const; 00374 00387 inline int8_t compare(const UChar *srcChars, 00388 int32_t srcLength) const; 00389 00404 inline int8_t compare(int32_t start, 00405 int32_t length, 00406 const UChar *srcChars) const; 00407 00425 inline int8_t compare(int32_t start, 00426 int32_t length, 00427 const UChar *srcChars, 00428 int32_t srcStart, 00429 int32_t srcLength) const; 00430 00448 inline int8_t compareBetween(int32_t start, 00449 int32_t limit, 00450 const UnicodeString& srcText, 00451 int32_t srcStart, 00452 int32_t srcLimit) const; 00453 00471 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00472 00492 inline int8_t compareCodePointOrder(int32_t start, 00493 int32_t length, 00494 const UnicodeString& srcText) const; 00495 00517 inline int8_t compareCodePointOrder(int32_t start, 00518 int32_t length, 00519 const UnicodeString& srcText, 00520 int32_t srcStart, 00521 int32_t srcLength) const; 00522 00541 inline int8_t compareCodePointOrder(const UChar *srcChars, 00542 int32_t srcLength) const; 00543 00563 inline int8_t compareCodePointOrder(int32_t start, 00564 int32_t length, 00565 const UChar *srcChars) const; 00566 00588 inline int8_t compareCodePointOrder(int32_t start, 00589 int32_t length, 00590 const UChar *srcChars, 00591 int32_t srcStart, 00592 int32_t srcLength) const; 00593 00615 inline int8_t compareCodePointOrderBetween(int32_t start, 00616 int32_t limit, 00617 const UnicodeString& srcText, 00618 int32_t srcStart, 00619 int32_t srcLimit) const; 00620 00639 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00640 00661 inline int8_t caseCompare(int32_t start, 00662 int32_t length, 00663 const UnicodeString& srcText, 00664 uint32_t options) const; 00665 00688 inline int8_t caseCompare(int32_t start, 00689 int32_t length, 00690 const UnicodeString& srcText, 00691 int32_t srcStart, 00692 int32_t srcLength, 00693 uint32_t options) const; 00694 00714 inline int8_t caseCompare(const UChar *srcChars, 00715 int32_t srcLength, 00716 uint32_t options) const; 00717 00738 inline int8_t caseCompare(int32_t start, 00739 int32_t length, 00740 const UChar *srcChars, 00741 uint32_t options) const; 00742 00765 inline int8_t caseCompare(int32_t start, 00766 int32_t length, 00767 const UChar *srcChars, 00768 int32_t srcStart, 00769 int32_t srcLength, 00770 uint32_t options) const; 00771 00794 inline int8_t caseCompareBetween(int32_t start, 00795 int32_t limit, 00796 const UnicodeString& srcText, 00797 int32_t srcStart, 00798 int32_t srcLimit, 00799 uint32_t options) const; 00800 00808 inline UBool startsWith(const UnicodeString& text) const; 00809 00820 inline UBool startsWith(const UnicodeString& srcText, 00821 int32_t srcStart, 00822 int32_t srcLength) const; 00823 00832 inline UBool startsWith(const UChar *srcChars, 00833 int32_t srcLength) const; 00834 00844 inline UBool startsWith(const UChar *srcChars, 00845 int32_t srcStart, 00846 int32_t srcLength) const; 00847 00855 inline UBool endsWith(const UnicodeString& text) const; 00856 00867 inline UBool endsWith(const UnicodeString& srcText, 00868 int32_t srcStart, 00869 int32_t srcLength) const; 00870 00879 inline UBool endsWith(const UChar *srcChars, 00880 int32_t srcLength) const; 00881 00892 inline UBool endsWith(const UChar *srcChars, 00893 int32_t srcStart, 00894 int32_t srcLength) const; 00895 00896 00897 /* Searching - bitwise only */ 00898 00907 inline int32_t indexOf(const UnicodeString& text) const; 00908 00918 inline int32_t indexOf(const UnicodeString& text, 00919 int32_t start) const; 00920 00932 inline int32_t indexOf(const UnicodeString& text, 00933 int32_t start, 00934 int32_t length) const; 00935 00952 inline int32_t indexOf(const UnicodeString& srcText, 00953 int32_t srcStart, 00954 int32_t srcLength, 00955 int32_t start, 00956 int32_t length) const; 00957 00969 inline int32_t indexOf(const UChar *srcChars, 00970 int32_t srcLength, 00971 int32_t start) const; 00972 00985 inline int32_t indexOf(const UChar *srcChars, 00986 int32_t srcLength, 00987 int32_t start, 00988 int32_t length) const; 00989 01006 int32_t indexOf(const UChar *srcChars, 01007 int32_t srcStart, 01008 int32_t srcLength, 01009 int32_t start, 01010 int32_t length) const; 01011 01019 inline int32_t indexOf(UChar c) const; 01020 01029 inline int32_t indexOf(UChar32 c) const; 01030 01039 inline int32_t indexOf(UChar c, 01040 int32_t start) const; 01041 01051 inline int32_t indexOf(UChar32 c, 01052 int32_t start) const; 01053 01064 inline int32_t indexOf(UChar c, 01065 int32_t start, 01066 int32_t length) const; 01067 01079 inline int32_t indexOf(UChar32 c, 01080 int32_t start, 01081 int32_t length) const; 01082 01091 inline int32_t lastIndexOf(const UnicodeString& text) const; 01092 01102 inline int32_t lastIndexOf(const UnicodeString& text, 01103 int32_t start) const; 01104 01116 inline int32_t lastIndexOf(const UnicodeString& text, 01117 int32_t start, 01118 int32_t length) const; 01119 01136 inline int32_t lastIndexOf(const UnicodeString& srcText, 01137 int32_t srcStart, 01138 int32_t srcLength, 01139 int32_t start, 01140 int32_t length) const; 01141 01152 inline int32_t lastIndexOf(const UChar *srcChars, 01153 int32_t srcLength, 01154 int32_t start) const; 01155 01168 inline int32_t lastIndexOf(const UChar *srcChars, 01169 int32_t srcLength, 01170 int32_t start, 01171 int32_t length) const; 01172 01189 int32_t lastIndexOf(const UChar *srcChars, 01190 int32_t srcStart, 01191 int32_t srcLength, 01192 int32_t start, 01193 int32_t length) const; 01194 01202 inline int32_t lastIndexOf(UChar c) const; 01203 01212 inline int32_t lastIndexOf(UChar32 c) const; 01213 01222 inline int32_t lastIndexOf(UChar c, 01223 int32_t start) const; 01224 01234 inline int32_t lastIndexOf(UChar32 c, 01235 int32_t start) const; 01236 01247 inline int32_t lastIndexOf(UChar c, 01248 int32_t start, 01249 int32_t length) const; 01250 01262 inline int32_t lastIndexOf(UChar32 c, 01263 int32_t start, 01264 int32_t length) const; 01265 01266 01267 /* Character access */ 01268 01277 inline UChar charAt(int32_t offset) const; 01278 01286 inline UChar operator[] (int32_t offset) const; 01287 01299 UChar32 char32At(int32_t offset) const; 01300 01316 int32_t getChar32Start(int32_t offset) const; 01317 01334 int32_t getChar32Limit(int32_t offset) const; 01335 01386 int32_t moveIndex32(int32_t index, int32_t delta) const; 01387 01388 /* Substring extraction */ 01389 01405 inline void extract(int32_t start, 01406 int32_t length, 01407 UChar *dst, 01408 int32_t dstStart = 0) const; 01409 01431 int32_t 01432 extract(UChar *dest, int32_t destCapacity, 01433 UErrorCode &errorCode) const; 01434 01445 inline void extract(int32_t start, 01446 int32_t length, 01447 UnicodeString& target) const; 01448 01460 inline void extractBetween(int32_t start, 01461 int32_t limit, 01462 UChar *dst, 01463 int32_t dstStart = 0) const; 01464 01474 virtual void extractBetween(int32_t start, 01475 int32_t limit, 01476 UnicodeString& target) const; 01477 01499 int32_t extract(int32_t start, 01500 int32_t startLength, 01501 char *target, 01502 int32_t targetCapacity, 01503 enum EInvariant inv) const; 01504 01505 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01506 01526 int32_t extract(int32_t start, 01527 int32_t startLength, 01528 char *target, 01529 uint32_t targetLength) const; 01530 01531 #endif 01532 01533 #if !UCONFIG_NO_CONVERSION 01534 01560 inline int32_t extract(int32_t start, 01561 int32_t startLength, 01562 char *target, 01563 const char *codepage = 0) const; 01564 01594 int32_t extract(int32_t start, 01595 int32_t startLength, 01596 char *target, 01597 uint32_t targetLength, 01598 const char *codepage) const; 01599 01617 int32_t extract(char *dest, int32_t destCapacity, 01618 UConverter *cnv, 01619 UErrorCode &errorCode) const; 01620 01621 #endif 01622 01636 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01637 01648 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01649 01661 void toUTF8(ByteSink &sink) const; 01662 01663 #if U_HAVE_STD_STRING 01664 01677 template<typename StringClass> 01678 StringClass &toUTF8String(StringClass &result) const { 01679 StringByteSink<StringClass> sbs(&result); 01680 toUTF8(sbs); 01681 return result; 01682 } 01683 01684 #endif 01685 01701 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01702 01703 /* Length operations */ 01704 01713 inline int32_t length(void) const; 01714 01728 int32_t 01729 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01730 01754 UBool 01755 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01756 01762 inline UBool isEmpty(void) const; 01763 01773 inline int32_t getCapacity(void) const; 01774 01775 /* Other operations */ 01776 01782 inline int32_t hashCode(void) const; 01783 01796 inline UBool isBogus(void) const; 01797 01798 01799 //======================================== 01800 // Write operations 01801 //======================================== 01802 01803 /* Assignment operations */ 01804 01812 UnicodeString &operator=(const UnicodeString &srcText); 01813 01834 UnicodeString &fastCopyFrom(const UnicodeString &src); 01835 01843 inline UnicodeString& operator= (UChar ch); 01844 01852 inline UnicodeString& operator= (UChar32 ch); 01853 01865 inline UnicodeString& setTo(const UnicodeString& srcText, 01866 int32_t srcStart); 01867 01881 inline UnicodeString& setTo(const UnicodeString& srcText, 01882 int32_t srcStart, 01883 int32_t srcLength); 01884 01893 inline UnicodeString& setTo(const UnicodeString& srcText); 01894 01903 inline UnicodeString& setTo(const UChar *srcChars, 01904 int32_t srcLength); 01905 01914 UnicodeString& setTo(UChar srcChar); 01915 01924 UnicodeString& setTo(UChar32 srcChar); 01925 01946 UnicodeString &setTo(UBool isTerminated, 01947 const UChar *text, 01948 int32_t textLength); 01949 01969 UnicodeString &setTo(UChar *buffer, 01970 int32_t buffLength, 01971 int32_t buffCapacity); 01972 02013 void setToBogus(); 02014 02022 UnicodeString& setCharAt(int32_t offset, 02023 UChar ch); 02024 02025 02026 /* Append operations */ 02027 02035 inline UnicodeString& operator+= (UChar ch); 02036 02044 inline UnicodeString& operator+= (UChar32 ch); 02045 02053 inline UnicodeString& operator+= (const UnicodeString& srcText); 02054 02069 inline UnicodeString& append(const UnicodeString& srcText, 02070 int32_t srcStart, 02071 int32_t srcLength); 02072 02080 inline UnicodeString& append(const UnicodeString& srcText); 02081 02095 inline UnicodeString& append(const UChar *srcChars, 02096 int32_t srcStart, 02097 int32_t srcLength); 02098 02108 inline UnicodeString& append(const UChar *srcChars, 02109 int32_t srcLength); 02110 02117 inline UnicodeString& append(UChar srcChar); 02118 02125 UnicodeString& append(UChar32 srcChar); 02126 02127 02128 /* Insert operations */ 02129 02143 inline UnicodeString& insert(int32_t start, 02144 const UnicodeString& srcText, 02145 int32_t srcStart, 02146 int32_t srcLength); 02147 02156 inline UnicodeString& insert(int32_t start, 02157 const UnicodeString& srcText); 02158 02172 inline UnicodeString& insert(int32_t start, 02173 const UChar *srcChars, 02174 int32_t srcStart, 02175 int32_t srcLength); 02176 02186 inline UnicodeString& insert(int32_t start, 02187 const UChar *srcChars, 02188 int32_t srcLength); 02189 02198 inline UnicodeString& insert(int32_t start, 02199 UChar srcChar); 02200 02209 inline UnicodeString& insert(int32_t start, 02210 UChar32 srcChar); 02211 02212 02213 /* Replace operations */ 02214 02232 UnicodeString& replace(int32_t start, 02233 int32_t length, 02234 const UnicodeString& srcText, 02235 int32_t srcStart, 02236 int32_t srcLength); 02237 02250 UnicodeString& replace(int32_t start, 02251 int32_t length, 02252 const UnicodeString& srcText); 02253 02271 UnicodeString& replace(int32_t start, 02272 int32_t length, 02273 const UChar *srcChars, 02274 int32_t srcStart, 02275 int32_t srcLength); 02276 02289 inline UnicodeString& replace(int32_t start, 02290 int32_t length, 02291 const UChar *srcChars, 02292 int32_t srcLength); 02293 02305 inline UnicodeString& replace(int32_t start, 02306 int32_t length, 02307 UChar srcChar); 02308 02320 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 02321 02331 inline UnicodeString& replaceBetween(int32_t start, 02332 int32_t limit, 02333 const UnicodeString& srcText); 02334 02349 inline UnicodeString& replaceBetween(int32_t start, 02350 int32_t limit, 02351 const UnicodeString& srcText, 02352 int32_t srcStart, 02353 int32_t srcLimit); 02354 02365 virtual void handleReplaceBetween(int32_t start, 02366 int32_t limit, 02367 const UnicodeString& text); 02368 02374 virtual UBool hasMetaData() const; 02375 02391 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02392 02393 /* Search and replace operations */ 02394 02403 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02404 const UnicodeString& newText); 02405 02417 inline UnicodeString& findAndReplace(int32_t start, 02418 int32_t length, 02419 const UnicodeString& oldText, 02420 const UnicodeString& newText); 02421 02439 UnicodeString& findAndReplace(int32_t start, 02440 int32_t length, 02441 const UnicodeString& oldText, 02442 int32_t oldStart, 02443 int32_t oldLength, 02444 const UnicodeString& newText, 02445 int32_t newStart, 02446 int32_t newLength); 02447 02448 02449 /* Remove operations */ 02450 02456 inline UnicodeString& remove(void); 02457 02466 inline UnicodeString& remove(int32_t start, 02467 int32_t length = (int32_t)INT32_MAX); 02468 02477 inline UnicodeString& removeBetween(int32_t start, 02478 int32_t limit = (int32_t)INT32_MAX); 02479 02489 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02490 02491 /* Length operations */ 02492 02504 UBool padLeading(int32_t targetLength, 02505 UChar padChar = 0x0020); 02506 02518 UBool padTrailing(int32_t targetLength, 02519 UChar padChar = 0x0020); 02520 02527 inline UBool truncate(int32_t targetLength); 02528 02534 UnicodeString& trim(void); 02535 02536 02537 /* Miscellaneous operations */ 02538 02544 inline UnicodeString& reverse(void); 02545 02554 inline UnicodeString& reverse(int32_t start, 02555 int32_t length); 02556 02563 UnicodeString& toUpper(void); 02564 02572 UnicodeString& toUpper(const Locale& locale); 02573 02580 UnicodeString& toLower(void); 02581 02589 UnicodeString& toLower(const Locale& locale); 02590 02591 #if !UCONFIG_NO_BREAK_ITERATION 02592 02619 UnicodeString &toTitle(BreakIterator *titleIter); 02620 02648 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02649 02681 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02682 02683 #endif 02684 02696 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02697 02698 //======================================== 02699 // Access to the internal buffer 02700 //======================================== 02701 02745 UChar *getBuffer(int32_t minCapacity); 02746 02767 void releaseBuffer(int32_t newLength=-1); 02768 02799 inline const UChar *getBuffer() const; 02800 02834 inline const UChar *getTerminatedBuffer(); 02835 02836 //======================================== 02837 // Constructors 02838 //======================================== 02839 02843 UnicodeString(); 02844 02856 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02857 02867 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 02868 02878 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 02879 02890 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 02891 02899 UnicodeString(const UChar *text, 02900 int32_t textLength); 02901 02921 UnicodeString(UBool isTerminated, 02922 const UChar *text, 02923 int32_t textLength); 02924 02943 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02944 02945 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02946 02966 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 02967 02976 UnicodeString(const char *codepageData, int32_t dataLength); 02977 02978 #endif 02979 02980 #if !UCONFIG_NO_CONVERSION 02981 02999 UnicodeString(const char *codepageData, const char *codepage); 03000 03018 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 03019 03041 UnicodeString( 03042 const char *src, int32_t srcLength, 03043 UConverter *cnv, 03044 UErrorCode &errorCode); 03045 03046 #endif 03047 03072 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 03073 03074 03080 UnicodeString(const UnicodeString& that); 03081 03088 UnicodeString(const UnicodeString& src, int32_t srcStart); 03089 03097 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03098 03115 virtual Replaceable *clone() const; 03116 03120 virtual ~UnicodeString(); 03121 03135 static UnicodeString fromUTF8(const StringPiece &utf8); 03136 03148 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03149 03150 /* Miscellaneous operations */ 03151 03186 UnicodeString unescape() const; 03187 03207 UChar32 unescapeAt(int32_t &offset) const; 03208 03214 static UClassID U_EXPORT2 getStaticClassID(); 03215 03221 virtual UClassID getDynamicClassID() const; 03222 03223 //======================================== 03224 // Implementation methods 03225 //======================================== 03226 03227 protected: 03232 virtual int32_t getLength() const; 03233 03239 virtual UChar getCharAt(int32_t offset) const; 03240 03246 virtual UChar32 getChar32At(int32_t offset) const; 03247 03248 private: 03249 // For char* constructors. Could be made public. 03250 UnicodeString &setToUTF8(const StringPiece &utf8); 03251 // For extract(char*). 03252 // We could make a toUTF8(target, capacity, errorCode) public but not 03253 // this version: New API will be cleaner if we make callers create substrings 03254 // rather than having start+length on every method, 03255 // and it should take a UErrorCode&. 03256 int32_t 03257 toUTF8(int32_t start, int32_t len, 03258 char *target, int32_t capacity) const; 03259 03260 03261 inline int8_t 03262 doCompare(int32_t start, 03263 int32_t length, 03264 const UnicodeString& srcText, 03265 int32_t srcStart, 03266 int32_t srcLength) const; 03267 03268 int8_t doCompare(int32_t start, 03269 int32_t length, 03270 const UChar *srcChars, 03271 int32_t srcStart, 03272 int32_t srcLength) const; 03273 03274 inline int8_t 03275 doCompareCodePointOrder(int32_t start, 03276 int32_t length, 03277 const UnicodeString& srcText, 03278 int32_t srcStart, 03279 int32_t srcLength) const; 03280 03281 int8_t doCompareCodePointOrder(int32_t start, 03282 int32_t length, 03283 const UChar *srcChars, 03284 int32_t srcStart, 03285 int32_t srcLength) const; 03286 03287 inline int8_t 03288 doCaseCompare(int32_t start, 03289 int32_t length, 03290 const UnicodeString &srcText, 03291 int32_t srcStart, 03292 int32_t srcLength, 03293 uint32_t options) const; 03294 03295 int8_t 03296 doCaseCompare(int32_t start, 03297 int32_t length, 03298 const UChar *srcChars, 03299 int32_t srcStart, 03300 int32_t srcLength, 03301 uint32_t options) const; 03302 03303 int32_t doIndexOf(UChar c, 03304 int32_t start, 03305 int32_t length) const; 03306 03307 int32_t doIndexOf(UChar32 c, 03308 int32_t start, 03309 int32_t length) const; 03310 03311 int32_t doLastIndexOf(UChar c, 03312 int32_t start, 03313 int32_t length) const; 03314 03315 int32_t doLastIndexOf(UChar32 c, 03316 int32_t start, 03317 int32_t length) const; 03318 03319 void doExtract(int32_t start, 03320 int32_t length, 03321 UChar *dst, 03322 int32_t dstStart) const; 03323 03324 inline void doExtract(int32_t start, 03325 int32_t length, 03326 UnicodeString& target) const; 03327 03328 inline UChar doCharAt(int32_t offset) const; 03329 03330 UnicodeString& doReplace(int32_t start, 03331 int32_t length, 03332 const UnicodeString& srcText, 03333 int32_t srcStart, 03334 int32_t srcLength); 03335 03336 UnicodeString& doReplace(int32_t start, 03337 int32_t length, 03338 const UChar *srcChars, 03339 int32_t srcStart, 03340 int32_t srcLength); 03341 03342 UnicodeString& doReverse(int32_t start, 03343 int32_t length); 03344 03345 // calculate hash code 03346 int32_t doHashCode(void) const; 03347 03348 // get pointer to start of array 03349 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03350 inline UChar* getArrayStart(void); 03351 inline const UChar* getArrayStart(void) const; 03352 03353 // A UnicodeString object (not necessarily its current buffer) 03354 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03355 inline UBool isWritable() const; 03356 03357 // Is the current buffer writable? 03358 inline UBool isBufferWritable() const; 03359 03360 // None of the following does releaseArray(). 03361 inline void setLength(int32_t len); // sets only fShortLength and fLength 03362 inline void setToEmpty(); // sets fFlags=kShortString 03363 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03364 03365 // allocate the array; result may be fStackBuffer 03366 // sets refCount to 1 if appropriate 03367 // sets fArray, fCapacity, and fFlags 03368 // returns boolean for success or failure 03369 UBool allocate(int32_t capacity); 03370 03371 // release the array if owned 03372 void releaseArray(void); 03373 03374 // turn a bogus string into an empty one 03375 void unBogus(); 03376 03377 // implements assigment operator, copy constructor, and fastCopyFrom() 03378 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03379 03380 // Pin start and limit to acceptable values. 03381 inline void pinIndex(int32_t& start) const; 03382 inline void pinIndices(int32_t& start, 03383 int32_t& length) const; 03384 03385 #if !UCONFIG_NO_CONVERSION 03386 03387 /* Internal extract() using UConverter. */ 03388 int32_t doExtract(int32_t start, int32_t length, 03389 char *dest, int32_t destCapacity, 03390 UConverter *cnv, 03391 UErrorCode &errorCode) const; 03392 03393 /* 03394 * Real constructor for converting from codepage data. 03395 * It assumes that it is called with !fRefCounted. 03396 * 03397 * If <code>codepage==0</code>, then the default converter 03398 * is used for the platform encoding. 03399 * If <code>codepage</code> is an empty string (<code>""</code>), 03400 * then a simple conversion is performed on the codepage-invariant 03401 * subset ("invariant characters") of the platform encoding. See utypes.h. 03402 */ 03403 void doCodepageCreate(const char *codepageData, 03404 int32_t dataLength, 03405 const char *codepage); 03406 03407 /* 03408 * Worker function for creating a UnicodeString from 03409 * a codepage string using a UConverter. 03410 */ 03411 void 03412 doCodepageCreate(const char *codepageData, 03413 int32_t dataLength, 03414 UConverter *converter, 03415 UErrorCode &status); 03416 03417 #endif 03418 03419 /* 03420 * This function is called when write access to the array 03421 * is necessary. 03422 * 03423 * We need to make a copy of the array if 03424 * the buffer is read-only, or 03425 * the buffer is refCounted (shared), and refCount>1, or 03426 * the buffer is too small. 03427 * 03428 * Return FALSE if memory could not be allocated. 03429 */ 03430 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03431 int32_t growCapacity = -1, 03432 UBool doCopyArray = TRUE, 03433 int32_t **pBufferToDelete = 0, 03434 UBool forceClone = FALSE); 03435 03441 UnicodeString & 03442 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 03443 03444 // ref counting 03445 void addRef(void); 03446 int32_t removeRef(void); 03447 int32_t refCount(void) const; 03448 03449 // constants 03450 enum { 03451 // Set the stack buffer size so that sizeof(UnicodeString) is, 03452 // naturally (without padding), a multiple of sizeof(pointer). 03453 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 03454 kInvalidUChar=0xffff, // invalid UChar index 03455 kGrowSize=128, // grow size for this buffer 03456 kInvalidHashCode=0, // invalid hash code 03457 kEmptyHashCode=1, // hash code for empty string 03458 03459 // bit flag values for fFlags 03460 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03461 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 03462 kRefCounted=4, // there is a refCount field before the characters in fArray 03463 kBufferIsReadonly=8,// do not write to this buffer 03464 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03465 // and releaseBuffer(newLength) must be called 03466 03467 // combined values for convenience 03468 kShortString=kUsingStackBuffer, 03469 kLongString=kRefCounted, 03470 kReadonlyAlias=kBufferIsReadonly, 03471 kWritableAlias=0 03472 }; 03473 03474 friend class StringThreadTest; 03475 friend class UnicodeStringAppendable; 03476 03477 union StackBufferOrFields; // forward declaration necessary before friend declaration 03478 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03479 03480 /* 03481 * The following are all the class fields that are stored 03482 * in each UnicodeString object. 03483 * Note that UnicodeString has virtual functions, 03484 * therefore there is an implicit vtable pointer 03485 * as the first real field. 03486 * The fields should be aligned such that no padding is necessary. 03487 * On 32-bit machines, the size should be 32 bytes, 03488 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03489 * 03490 * We use a hack to achieve this. 03491 * 03492 * With at least some compilers, each of the following is forced to 03493 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 03494 * rounded up with additional padding if the fields do not already fit that requirement: 03495 * - sizeof(class UnicodeString) 03496 * - offsetof(UnicodeString, fUnion) 03497 * - sizeof(fUnion) 03498 * - sizeof(fFields) 03499 * 03500 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 03501 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 03502 * (Padding at the end of fFields is ok: 03503 * As long as there is no padding after fStackBuffer, it is not wasted space.) 03504 * 03505 * We further assume that the compiler does not reorder the fields, 03506 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 03507 * with at most some padding (but no other field) in between. 03508 * (Padding there would be wasted space, but functionally harmless.) 03509 * 03510 * We use a few more sizeof(pointer)'s chunks of space with 03511 * fRestOfStackBuffer, fShortLength and fFlags, 03512 * to get up exactly to the intended sizeof(UnicodeString). 03513 */ 03514 // (implicit) *vtable; 03515 union StackBufferOrFields { 03516 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03517 // else fFields is used 03518 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 03519 struct { 03520 UChar *fArray; // the Unicode data 03521 int32_t fCapacity; // capacity of fArray (in UChars) 03522 int32_t fLength; // number of characters in fArray if >127; else undefined 03523 } fFields; 03524 } fUnion; 03525 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 03526 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03527 uint8_t fFlags; // bit flags: see constants above 03528 }; 03529 03538 U_COMMON_API UnicodeString U_EXPORT2 03539 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03540 03541 //======================================== 03542 // Inline members 03543 //======================================== 03544 03545 //======================================== 03546 // Privates 03547 //======================================== 03548 03549 inline void 03550 UnicodeString::pinIndex(int32_t& start) const 03551 { 03552 // pin index 03553 if(start < 0) { 03554 start = 0; 03555 } else if(start > length()) { 03556 start = length(); 03557 } 03558 } 03559 03560 inline void 03561 UnicodeString::pinIndices(int32_t& start, 03562 int32_t& _length) const 03563 { 03564 // pin indices 03565 int32_t len = length(); 03566 if(start < 0) { 03567 start = 0; 03568 } else if(start > len) { 03569 start = len; 03570 } 03571 if(_length < 0) { 03572 _length = 0; 03573 } else if(_length > (len - start)) { 03574 _length = (len - start); 03575 } 03576 } 03577 03578 inline UChar* 03579 UnicodeString::getArrayStart() 03580 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03581 03582 inline const UChar* 03583 UnicodeString::getArrayStart() const 03584 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03585 03586 //======================================== 03587 // Read-only implementation methods 03588 //======================================== 03589 inline int32_t 03590 UnicodeString::length() const 03591 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03592 03593 inline int32_t 03594 UnicodeString::getCapacity() const 03595 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03596 03597 inline int32_t 03598 UnicodeString::hashCode() const 03599 { return doHashCode(); } 03600 03601 inline UBool 03602 UnicodeString::isBogus() const 03603 { return (UBool)(fFlags & kIsBogus); } 03604 03605 inline UBool 03606 UnicodeString::isWritable() const 03607 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03608 03609 inline UBool 03610 UnicodeString::isBufferWritable() const 03611 { 03612 return (UBool)( 03613 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03614 (!(fFlags&kRefCounted) || refCount()==1)); 03615 } 03616 03617 inline const UChar * 03618 UnicodeString::getBuffer() const { 03619 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03620 return 0; 03621 } else if(fFlags&kUsingStackBuffer) { 03622 return fUnion.fStackBuffer; 03623 } else { 03624 return fUnion.fFields.fArray; 03625 } 03626 } 03627 03628 //======================================== 03629 // Read-only alias methods 03630 //======================================== 03631 inline int8_t 03632 UnicodeString::doCompare(int32_t start, 03633 int32_t thisLength, 03634 const UnicodeString& srcText, 03635 int32_t srcStart, 03636 int32_t srcLength) const 03637 { 03638 if(srcText.isBogus()) { 03639 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03640 } else { 03641 srcText.pinIndices(srcStart, srcLength); 03642 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03643 } 03644 } 03645 03646 inline UBool 03647 UnicodeString::operator== (const UnicodeString& text) const 03648 { 03649 if(isBogus()) { 03650 return text.isBogus(); 03651 } else { 03652 int32_t len = length(), textLength = text.length(); 03653 return 03654 !text.isBogus() && 03655 len == textLength && 03656 doCompare(0, len, text, 0, textLength) == 0; 03657 } 03658 } 03659 03660 inline UBool 03661 UnicodeString::operator!= (const UnicodeString& text) const 03662 { return (! operator==(text)); } 03663 03664 inline UBool 03665 UnicodeString::operator> (const UnicodeString& text) const 03666 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03667 03668 inline UBool 03669 UnicodeString::operator< (const UnicodeString& text) const 03670 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03671 03672 inline UBool 03673 UnicodeString::operator>= (const UnicodeString& text) const 03674 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03675 03676 inline UBool 03677 UnicodeString::operator<= (const UnicodeString& text) const 03678 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03679 03680 inline int8_t 03681 UnicodeString::compare(const UnicodeString& text) const 03682 { return doCompare(0, length(), text, 0, text.length()); } 03683 03684 inline int8_t 03685 UnicodeString::compare(int32_t start, 03686 int32_t _length, 03687 const UnicodeString& srcText) const 03688 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03689 03690 inline int8_t 03691 UnicodeString::compare(const UChar *srcChars, 03692 int32_t srcLength) const 03693 { return doCompare(0, length(), srcChars, 0, srcLength); } 03694 03695 inline int8_t 03696 UnicodeString::compare(int32_t start, 03697 int32_t _length, 03698 const UnicodeString& srcText, 03699 int32_t srcStart, 03700 int32_t srcLength) const 03701 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03702 03703 inline int8_t 03704 UnicodeString::compare(int32_t start, 03705 int32_t _length, 03706 const UChar *srcChars) const 03707 { return doCompare(start, _length, srcChars, 0, _length); } 03708 03709 inline int8_t 03710 UnicodeString::compare(int32_t start, 03711 int32_t _length, 03712 const UChar *srcChars, 03713 int32_t srcStart, 03714 int32_t srcLength) const 03715 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03716 03717 inline int8_t 03718 UnicodeString::compareBetween(int32_t start, 03719 int32_t limit, 03720 const UnicodeString& srcText, 03721 int32_t srcStart, 03722 int32_t srcLimit) const 03723 { return doCompare(start, limit - start, 03724 srcText, srcStart, srcLimit - srcStart); } 03725 03726 inline int8_t 03727 UnicodeString::doCompareCodePointOrder(int32_t start, 03728 int32_t thisLength, 03729 const UnicodeString& srcText, 03730 int32_t srcStart, 03731 int32_t srcLength) const 03732 { 03733 if(srcText.isBogus()) { 03734 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03735 } else { 03736 srcText.pinIndices(srcStart, srcLength); 03737 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03738 } 03739 } 03740 03741 inline int8_t 03742 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03743 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03744 03745 inline int8_t 03746 UnicodeString::compareCodePointOrder(int32_t start, 03747 int32_t _length, 03748 const UnicodeString& srcText) const 03749 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03750 03751 inline int8_t 03752 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03753 int32_t srcLength) const 03754 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03755 03756 inline int8_t 03757 UnicodeString::compareCodePointOrder(int32_t start, 03758 int32_t _length, 03759 const UnicodeString& srcText, 03760 int32_t srcStart, 03761 int32_t srcLength) const 03762 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03763 03764 inline int8_t 03765 UnicodeString::compareCodePointOrder(int32_t start, 03766 int32_t _length, 03767 const UChar *srcChars) const 03768 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03769 03770 inline int8_t 03771 UnicodeString::compareCodePointOrder(int32_t start, 03772 int32_t _length, 03773 const UChar *srcChars, 03774 int32_t srcStart, 03775 int32_t srcLength) const 03776 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03777 03778 inline int8_t 03779 UnicodeString::compareCodePointOrderBetween(int32_t start, 03780 int32_t limit, 03781 const UnicodeString& srcText, 03782 int32_t srcStart, 03783 int32_t srcLimit) const 03784 { return doCompareCodePointOrder(start, limit - start, 03785 srcText, srcStart, srcLimit - srcStart); } 03786 03787 inline int8_t 03788 UnicodeString::doCaseCompare(int32_t start, 03789 int32_t thisLength, 03790 const UnicodeString &srcText, 03791 int32_t srcStart, 03792 int32_t srcLength, 03793 uint32_t options) const 03794 { 03795 if(srcText.isBogus()) { 03796 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03797 } else { 03798 srcText.pinIndices(srcStart, srcLength); 03799 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03800 } 03801 } 03802 03803 inline int8_t 03804 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03805 return doCaseCompare(0, length(), text, 0, text.length(), options); 03806 } 03807 03808 inline int8_t 03809 UnicodeString::caseCompare(int32_t start, 03810 int32_t _length, 03811 const UnicodeString &srcText, 03812 uint32_t options) const { 03813 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03814 } 03815 03816 inline int8_t 03817 UnicodeString::caseCompare(const UChar *srcChars, 03818 int32_t srcLength, 03819 uint32_t options) const { 03820 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03821 } 03822 03823 inline int8_t 03824 UnicodeString::caseCompare(int32_t start, 03825 int32_t _length, 03826 const UnicodeString &srcText, 03827 int32_t srcStart, 03828 int32_t srcLength, 03829 uint32_t options) const { 03830 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03831 } 03832 03833 inline int8_t 03834 UnicodeString::caseCompare(int32_t start, 03835 int32_t _length, 03836 const UChar *srcChars, 03837 uint32_t options) const { 03838 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03839 } 03840 03841 inline int8_t 03842 UnicodeString::caseCompare(int32_t start, 03843 int32_t _length, 03844 const UChar *srcChars, 03845 int32_t srcStart, 03846 int32_t srcLength, 03847 uint32_t options) const { 03848 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03849 } 03850 03851 inline int8_t 03852 UnicodeString::caseCompareBetween(int32_t start, 03853 int32_t limit, 03854 const UnicodeString &srcText, 03855 int32_t srcStart, 03856 int32_t srcLimit, 03857 uint32_t options) const { 03858 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03859 } 03860 03861 inline int32_t 03862 UnicodeString::indexOf(const UnicodeString& srcText, 03863 int32_t srcStart, 03864 int32_t srcLength, 03865 int32_t start, 03866 int32_t _length) const 03867 { 03868 if(!srcText.isBogus()) { 03869 srcText.pinIndices(srcStart, srcLength); 03870 if(srcLength > 0) { 03871 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03872 } 03873 } 03874 return -1; 03875 } 03876 03877 inline int32_t 03878 UnicodeString::indexOf(const UnicodeString& text) const 03879 { return indexOf(text, 0, text.length(), 0, length()); } 03880 03881 inline int32_t 03882 UnicodeString::indexOf(const UnicodeString& text, 03883 int32_t start) const { 03884 pinIndex(start); 03885 return indexOf(text, 0, text.length(), start, length() - start); 03886 } 03887 03888 inline int32_t 03889 UnicodeString::indexOf(const UnicodeString& text, 03890 int32_t start, 03891 int32_t _length) const 03892 { return indexOf(text, 0, text.length(), start, _length); } 03893 03894 inline int32_t 03895 UnicodeString::indexOf(const UChar *srcChars, 03896 int32_t srcLength, 03897 int32_t start) const { 03898 pinIndex(start); 03899 return indexOf(srcChars, 0, srcLength, start, length() - start); 03900 } 03901 03902 inline int32_t 03903 UnicodeString::indexOf(const UChar *srcChars, 03904 int32_t srcLength, 03905 int32_t start, 03906 int32_t _length) const 03907 { return indexOf(srcChars, 0, srcLength, start, _length); } 03908 03909 inline int32_t 03910 UnicodeString::indexOf(UChar c, 03911 int32_t start, 03912 int32_t _length) const 03913 { return doIndexOf(c, start, _length); } 03914 03915 inline int32_t 03916 UnicodeString::indexOf(UChar32 c, 03917 int32_t start, 03918 int32_t _length) const 03919 { return doIndexOf(c, start, _length); } 03920 03921 inline int32_t 03922 UnicodeString::indexOf(UChar c) const 03923 { return doIndexOf(c, 0, length()); } 03924 03925 inline int32_t 03926 UnicodeString::indexOf(UChar32 c) const 03927 { return indexOf(c, 0, length()); } 03928 03929 inline int32_t 03930 UnicodeString::indexOf(UChar c, 03931 int32_t start) const { 03932 pinIndex(start); 03933 return doIndexOf(c, start, length() - start); 03934 } 03935 03936 inline int32_t 03937 UnicodeString::indexOf(UChar32 c, 03938 int32_t start) const { 03939 pinIndex(start); 03940 return indexOf(c, start, length() - start); 03941 } 03942 03943 inline int32_t 03944 UnicodeString::lastIndexOf(const UChar *srcChars, 03945 int32_t srcLength, 03946 int32_t start, 03947 int32_t _length) const 03948 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03949 03950 inline int32_t 03951 UnicodeString::lastIndexOf(const UChar *srcChars, 03952 int32_t srcLength, 03953 int32_t start) const { 03954 pinIndex(start); 03955 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03956 } 03957 03958 inline int32_t 03959 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03960 int32_t srcStart, 03961 int32_t srcLength, 03962 int32_t start, 03963 int32_t _length) const 03964 { 03965 if(!srcText.isBogus()) { 03966 srcText.pinIndices(srcStart, srcLength); 03967 if(srcLength > 0) { 03968 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03969 } 03970 } 03971 return -1; 03972 } 03973 03974 inline int32_t 03975 UnicodeString::lastIndexOf(const UnicodeString& text, 03976 int32_t start, 03977 int32_t _length) const 03978 { return lastIndexOf(text, 0, text.length(), start, _length); } 03979 03980 inline int32_t 03981 UnicodeString::lastIndexOf(const UnicodeString& text, 03982 int32_t start) const { 03983 pinIndex(start); 03984 return lastIndexOf(text, 0, text.length(), start, length() - start); 03985 } 03986 03987 inline int32_t 03988 UnicodeString::lastIndexOf(const UnicodeString& text) const 03989 { return lastIndexOf(text, 0, text.length(), 0, length()); } 03990 03991 inline int32_t 03992 UnicodeString::lastIndexOf(UChar c, 03993 int32_t start, 03994 int32_t _length) const 03995 { return doLastIndexOf(c, start, _length); } 03996 03997 inline int32_t 03998 UnicodeString::lastIndexOf(UChar32 c, 03999 int32_t start, 04000 int32_t _length) const { 04001 return doLastIndexOf(c, start, _length); 04002 } 04003 04004 inline int32_t 04005 UnicodeString::lastIndexOf(UChar c) const 04006 { return doLastIndexOf(c, 0, length()); } 04007 04008 inline int32_t 04009 UnicodeString::lastIndexOf(UChar32 c) const { 04010 return lastIndexOf(c, 0, length()); 04011 } 04012 04013 inline int32_t 04014 UnicodeString::lastIndexOf(UChar c, 04015 int32_t start) const { 04016 pinIndex(start); 04017 return doLastIndexOf(c, start, length() - start); 04018 } 04019 04020 inline int32_t 04021 UnicodeString::lastIndexOf(UChar32 c, 04022 int32_t start) const { 04023 pinIndex(start); 04024 return lastIndexOf(c, start, length() - start); 04025 } 04026 04027 inline UBool 04028 UnicodeString::startsWith(const UnicodeString& text) const 04029 { return compare(0, text.length(), text, 0, text.length()) == 0; } 04030 04031 inline UBool 04032 UnicodeString::startsWith(const UnicodeString& srcText, 04033 int32_t srcStart, 04034 int32_t srcLength) const 04035 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 04036 04037 inline UBool 04038 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 04039 if(srcLength < 0) { 04040 srcLength = u_strlen(srcChars); 04041 } 04042 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 04043 } 04044 04045 inline UBool 04046 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 04047 if(srcLength < 0) { 04048 srcLength = u_strlen(srcChars); 04049 } 04050 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 04051 } 04052 04053 inline UBool 04054 UnicodeString::endsWith(const UnicodeString& text) const 04055 { return doCompare(length() - text.length(), text.length(), 04056 text, 0, text.length()) == 0; } 04057 04058 inline UBool 04059 UnicodeString::endsWith(const UnicodeString& srcText, 04060 int32_t srcStart, 04061 int32_t srcLength) const { 04062 srcText.pinIndices(srcStart, srcLength); 04063 return doCompare(length() - srcLength, srcLength, 04064 srcText, srcStart, srcLength) == 0; 04065 } 04066 04067 inline UBool 04068 UnicodeString::endsWith(const UChar *srcChars, 04069 int32_t srcLength) const { 04070 if(srcLength < 0) { 04071 srcLength = u_strlen(srcChars); 04072 } 04073 return doCompare(length() - srcLength, srcLength, 04074 srcChars, 0, srcLength) == 0; 04075 } 04076 04077 inline UBool 04078 UnicodeString::endsWith(const UChar *srcChars, 04079 int32_t srcStart, 04080 int32_t srcLength) const { 04081 if(srcLength < 0) { 04082 srcLength = u_strlen(srcChars + srcStart); 04083 } 04084 return doCompare(length() - srcLength, srcLength, 04085 srcChars, srcStart, srcLength) == 0; 04086 } 04087 04088 //======================================== 04089 // replace 04090 //======================================== 04091 inline UnicodeString& 04092 UnicodeString::replace(int32_t start, 04093 int32_t _length, 04094 const UnicodeString& srcText) 04095 { return doReplace(start, _length, srcText, 0, srcText.length()); } 04096 04097 inline UnicodeString& 04098 UnicodeString::replace(int32_t start, 04099 int32_t _length, 04100 const UnicodeString& srcText, 04101 int32_t srcStart, 04102 int32_t srcLength) 04103 { return doReplace(start, _length, srcText, srcStart, srcLength); } 04104 04105 inline UnicodeString& 04106 UnicodeString::replace(int32_t start, 04107 int32_t _length, 04108 const UChar *srcChars, 04109 int32_t srcLength) 04110 { return doReplace(start, _length, srcChars, 0, srcLength); } 04111 04112 inline UnicodeString& 04113 UnicodeString::replace(int32_t start, 04114 int32_t _length, 04115 const UChar *srcChars, 04116 int32_t srcStart, 04117 int32_t srcLength) 04118 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04119 04120 inline UnicodeString& 04121 UnicodeString::replace(int32_t start, 04122 int32_t _length, 04123 UChar srcChar) 04124 { return doReplace(start, _length, &srcChar, 0, 1); } 04125 04126 inline UnicodeString& 04127 UnicodeString::replaceBetween(int32_t start, 04128 int32_t limit, 04129 const UnicodeString& srcText) 04130 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04131 04132 inline UnicodeString& 04133 UnicodeString::replaceBetween(int32_t start, 04134 int32_t limit, 04135 const UnicodeString& srcText, 04136 int32_t srcStart, 04137 int32_t srcLimit) 04138 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04139 04140 inline UnicodeString& 04141 UnicodeString::findAndReplace(const UnicodeString& oldText, 04142 const UnicodeString& newText) 04143 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04144 newText, 0, newText.length()); } 04145 04146 inline UnicodeString& 04147 UnicodeString::findAndReplace(int32_t start, 04148 int32_t _length, 04149 const UnicodeString& oldText, 04150 const UnicodeString& newText) 04151 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04152 newText, 0, newText.length()); } 04153 04154 // ============================ 04155 // extract 04156 // ============================ 04157 inline void 04158 UnicodeString::doExtract(int32_t start, 04159 int32_t _length, 04160 UnicodeString& target) const 04161 { target.replace(0, target.length(), *this, start, _length); } 04162 04163 inline void 04164 UnicodeString::extract(int32_t start, 04165 int32_t _length, 04166 UChar *target, 04167 int32_t targetStart) const 04168 { doExtract(start, _length, target, targetStart); } 04169 04170 inline void 04171 UnicodeString::extract(int32_t start, 04172 int32_t _length, 04173 UnicodeString& target) const 04174 { doExtract(start, _length, target); } 04175 04176 #if !UCONFIG_NO_CONVERSION 04177 04178 inline int32_t 04179 UnicodeString::extract(int32_t start, 04180 int32_t _length, 04181 char *dst, 04182 const char *codepage) const 04183 04184 { 04185 // This dstSize value will be checked explicitly 04186 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04187 } 04188 04189 #endif 04190 04191 inline void 04192 UnicodeString::extractBetween(int32_t start, 04193 int32_t limit, 04194 UChar *dst, 04195 int32_t dstStart) const { 04196 pinIndex(start); 04197 pinIndex(limit); 04198 doExtract(start, limit - start, dst, dstStart); 04199 } 04200 04201 inline UnicodeString 04202 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04203 return tempSubString(start, limit - start); 04204 } 04205 04206 inline UChar 04207 UnicodeString::doCharAt(int32_t offset) const 04208 { 04209 if((uint32_t)offset < (uint32_t)length()) { 04210 return getArrayStart()[offset]; 04211 } else { 04212 return kInvalidUChar; 04213 } 04214 } 04215 04216 inline UChar 04217 UnicodeString::charAt(int32_t offset) const 04218 { return doCharAt(offset); } 04219 04220 inline UChar 04221 UnicodeString::operator[] (int32_t offset) const 04222 { return doCharAt(offset); } 04223 04224 inline UBool 04225 UnicodeString::isEmpty() const { 04226 return fShortLength == 0; 04227 } 04228 04229 //======================================== 04230 // Write implementation methods 04231 //======================================== 04232 inline void 04233 UnicodeString::setLength(int32_t len) { 04234 if(len <= 127) { 04235 fShortLength = (int8_t)len; 04236 } else { 04237 fShortLength = (int8_t)-1; 04238 fUnion.fFields.fLength = len; 04239 } 04240 } 04241 04242 inline void 04243 UnicodeString::setToEmpty() { 04244 fShortLength = 0; 04245 fFlags = kShortString; 04246 } 04247 04248 inline void 04249 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04250 setLength(len); 04251 fUnion.fFields.fArray = array; 04252 fUnion.fFields.fCapacity = capacity; 04253 } 04254 04255 inline const UChar * 04256 UnicodeString::getTerminatedBuffer() { 04257 if(!isWritable()) { 04258 return 0; 04259 } else { 04260 UChar *array = getArrayStart(); 04261 int32_t len = length(); 04262 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04263 /* 04264 * kRefCounted: Do not write the NUL if the buffer is shared. 04265 * That is mostly safe, except when the length of one copy was modified 04266 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04267 * Then the NUL would be written into the middle of another copy's string. 04268 */ 04269 if(!(fFlags&kBufferIsReadonly)) { 04270 /* 04271 * We must not write to a readonly buffer, but it is known to be 04272 * NUL-terminated if len<capacity. 04273 * A shared, allocated buffer (refCount()>1) must not have its contents 04274 * modified, but the NUL at [len] is beyond the string contents, 04275 * and multiple string objects and threads writing the same NUL into the 04276 * same location is harmless. 04277 * In all other cases, the buffer is fully writable and it is anyway safe 04278 * to write the NUL. 04279 * 04280 * Note: An earlier version of this code tested whether there is a NUL 04281 * at [len] already, but, while safe, it generated lots of warnings from 04282 * tools like valgrind and Purify. 04283 */ 04284 array[len] = 0; 04285 } 04286 return array; 04287 } else if(cloneArrayIfNeeded(len+1)) { 04288 array = getArrayStart(); 04289 array[len] = 0; 04290 return array; 04291 } else { 04292 return 0; 04293 } 04294 } 04295 } 04296 04297 inline UnicodeString& 04298 UnicodeString::operator= (UChar ch) 04299 { return doReplace(0, length(), &ch, 0, 1); } 04300 04301 inline UnicodeString& 04302 UnicodeString::operator= (UChar32 ch) 04303 { return replace(0, length(), ch); } 04304 04305 inline UnicodeString& 04306 UnicodeString::setTo(const UnicodeString& srcText, 04307 int32_t srcStart, 04308 int32_t srcLength) 04309 { 04310 unBogus(); 04311 return doReplace(0, length(), srcText, srcStart, srcLength); 04312 } 04313 04314 inline UnicodeString& 04315 UnicodeString::setTo(const UnicodeString& srcText, 04316 int32_t srcStart) 04317 { 04318 unBogus(); 04319 srcText.pinIndex(srcStart); 04320 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04321 } 04322 04323 inline UnicodeString& 04324 UnicodeString::setTo(const UnicodeString& srcText) 04325 { 04326 return copyFrom(srcText); 04327 } 04328 04329 inline UnicodeString& 04330 UnicodeString::setTo(const UChar *srcChars, 04331 int32_t srcLength) 04332 { 04333 unBogus(); 04334 return doReplace(0, length(), srcChars, 0, srcLength); 04335 } 04336 04337 inline UnicodeString& 04338 UnicodeString::setTo(UChar srcChar) 04339 { 04340 unBogus(); 04341 return doReplace(0, length(), &srcChar, 0, 1); 04342 } 04343 04344 inline UnicodeString& 04345 UnicodeString::setTo(UChar32 srcChar) 04346 { 04347 unBogus(); 04348 return replace(0, length(), srcChar); 04349 } 04350 04351 inline UnicodeString& 04352 UnicodeString::append(const UnicodeString& srcText, 04353 int32_t srcStart, 04354 int32_t srcLength) 04355 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04356 04357 inline UnicodeString& 04358 UnicodeString::append(const UnicodeString& srcText) 04359 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04360 04361 inline UnicodeString& 04362 UnicodeString::append(const UChar *srcChars, 04363 int32_t srcStart, 04364 int32_t srcLength) 04365 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04366 04367 inline UnicodeString& 04368 UnicodeString::append(const UChar *srcChars, 04369 int32_t srcLength) 04370 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04371 04372 inline UnicodeString& 04373 UnicodeString::append(UChar srcChar) 04374 { return doReplace(length(), 0, &srcChar, 0, 1); } 04375 04376 inline UnicodeString& 04377 UnicodeString::operator+= (UChar ch) 04378 { return doReplace(length(), 0, &ch, 0, 1); } 04379 04380 inline UnicodeString& 04381 UnicodeString::operator+= (UChar32 ch) { 04382 return append(ch); 04383 } 04384 04385 inline UnicodeString& 04386 UnicodeString::operator+= (const UnicodeString& srcText) 04387 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04388 04389 inline UnicodeString& 04390 UnicodeString::insert(int32_t start, 04391 const UnicodeString& srcText, 04392 int32_t srcStart, 04393 int32_t srcLength) 04394 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04395 04396 inline UnicodeString& 04397 UnicodeString::insert(int32_t start, 04398 const UnicodeString& srcText) 04399 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04400 04401 inline UnicodeString& 04402 UnicodeString::insert(int32_t start, 04403 const UChar *srcChars, 04404 int32_t srcStart, 04405 int32_t srcLength) 04406 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04407 04408 inline UnicodeString& 04409 UnicodeString::insert(int32_t start, 04410 const UChar *srcChars, 04411 int32_t srcLength) 04412 { return doReplace(start, 0, srcChars, 0, srcLength); } 04413 04414 inline UnicodeString& 04415 UnicodeString::insert(int32_t start, 04416 UChar srcChar) 04417 { return doReplace(start, 0, &srcChar, 0, 1); } 04418 04419 inline UnicodeString& 04420 UnicodeString::insert(int32_t start, 04421 UChar32 srcChar) 04422 { return replace(start, 0, srcChar); } 04423 04424 04425 inline UnicodeString& 04426 UnicodeString::remove() 04427 { 04428 // remove() of a bogus string makes the string empty and non-bogus 04429 // we also un-alias a read-only alias to deal with NUL-termination 04430 // issues with getTerminatedBuffer() 04431 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04432 setToEmpty(); 04433 } else { 04434 fShortLength = 0; 04435 } 04436 return *this; 04437 } 04438 04439 inline UnicodeString& 04440 UnicodeString::remove(int32_t start, 04441 int32_t _length) 04442 { 04443 if(start <= 0 && _length == INT32_MAX) { 04444 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04445 return remove(); 04446 } 04447 return doReplace(start, _length, NULL, 0, 0); 04448 } 04449 04450 inline UnicodeString& 04451 UnicodeString::removeBetween(int32_t start, 04452 int32_t limit) 04453 { return doReplace(start, limit - start, NULL, 0, 0); } 04454 04455 inline UnicodeString & 04456 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04457 truncate(limit); 04458 return doReplace(0, start, NULL, 0, 0); 04459 } 04460 04461 inline UBool 04462 UnicodeString::truncate(int32_t targetLength) 04463 { 04464 if(isBogus() && targetLength == 0) { 04465 // truncate(0) of a bogus string makes the string empty and non-bogus 04466 unBogus(); 04467 return FALSE; 04468 } else if((uint32_t)targetLength < (uint32_t)length()) { 04469 setLength(targetLength); 04470 if(fFlags&kBufferIsReadonly) { 04471 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04472 } 04473 return TRUE; 04474 } else { 04475 return FALSE; 04476 } 04477 } 04478 04479 inline UnicodeString& 04480 UnicodeString::reverse() 04481 { return doReverse(0, length()); } 04482 04483 inline UnicodeString& 04484 UnicodeString::reverse(int32_t start, 04485 int32_t _length) 04486 { return doReverse(start, _length); } 04487 04488 U_NAMESPACE_END 04489 04490 #endif