ICU 49.1.1  49.1.1
unistr.h
Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2011, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 #include "unicode/ucasemap.h"
00035 
00036 struct UConverter;          // unicode/ucnv.h
00037 class  StringThreadTest;
00038 
00039 #ifndef U_COMPARE_CODE_POINT_ORDER
00040 /* see also ustring.h and unorm.h */
00046 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00047 #endif
00048 
00049 #ifndef USTRING_H
00050 
00053 U_STABLE int32_t U_EXPORT2
00054 u_strlen(const UChar *s);
00055 #endif
00056 
00057 #ifndef U_STRING_CASE_MAPPER_DEFINED
00058 #define U_STRING_CASE_MAPPER_DEFINED
00059 
00064 typedef int32_t U_CALLCONV
00065 UStringCaseMapper(const UCaseMap *csm,
00066                   UChar *dest, int32_t destCapacity,
00067                   const UChar *src, int32_t srcLength,
00068                   UErrorCode *pErrorCode);
00069 
00070 #endif
00071 
00072 U_NAMESPACE_BEGIN
00073 
00074 class BreakIterator;        // unicode/brkiter.h
00075 class Locale;               // unicode/locid.h
00076 class StringCharacterIterator;
00077 class UnicodeStringAppendable;  // unicode/appendable.h
00078 
00079 /* The <iostream> include has been moved to unicode/ustream.h */
00080 
00091 #define US_INV icu::UnicodeString::kInvariant
00092 
00110 #if defined(U_DECLARE_UTF16)
00111 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00112 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00113 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00114 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00115 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
00116 #else
00117 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
00118 #endif
00119 
00133 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00134 
00142 #ifndef UNISTR_FROM_CHAR_EXPLICIT
00143 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00144     // Auto-"explicit" in ICU library code.
00145 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
00146 # else
00147     // Empty by default for source code compatibility.
00148 #   define UNISTR_FROM_CHAR_EXPLICIT
00149 # endif
00150 #endif
00151 
00162 #ifndef UNISTR_FROM_STRING_EXPLICIT
00163 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
00164     // Auto-"explicit" in ICU library code.
00165 #   define UNISTR_FROM_STRING_EXPLICIT explicit
00166 # else
00167     // Empty by default for source code compatibility.
00168 #   define UNISTR_FROM_STRING_EXPLICIT
00169 # endif
00170 #endif
00171 
00241 class U_COMMON_API UnicodeString : public Replaceable
00242 {
00243 public:
00244 
00253   enum EInvariant {
00258     kInvariant
00259   };
00260 
00261   //========================================
00262   // Read-only operations
00263   //========================================
00264 
00265   /* Comparison - bitwise only - for international comparison use collation */
00266 
00274   inline UBool operator== (const UnicodeString& text) const;
00275 
00283   inline UBool operator!= (const UnicodeString& text) const;
00284 
00292   inline UBool operator> (const UnicodeString& text) const;
00293 
00301   inline UBool operator< (const UnicodeString& text) const;
00302 
00310   inline UBool operator>= (const UnicodeString& text) const;
00311 
00319   inline UBool operator<= (const UnicodeString& text) const;
00320 
00332   inline int8_t compare(const UnicodeString& text) const;
00333 
00348   inline int8_t compare(int32_t start,
00349          int32_t length,
00350          const UnicodeString& text) const;
00351 
00369    inline int8_t compare(int32_t start,
00370          int32_t length,
00371          const UnicodeString& srcText,
00372          int32_t srcStart,
00373          int32_t srcLength) const;
00374 
00387   inline int8_t compare(const UChar *srcChars,
00388          int32_t srcLength) const;
00389 
00404   inline int8_t compare(int32_t start,
00405          int32_t length,
00406          const UChar *srcChars) const;
00407 
00425   inline int8_t compare(int32_t start,
00426          int32_t length,
00427          const UChar *srcChars,
00428          int32_t srcStart,
00429          int32_t srcLength) const;
00430 
00448   inline int8_t compareBetween(int32_t start,
00449             int32_t limit,
00450             const UnicodeString& srcText,
00451             int32_t srcStart,
00452             int32_t srcLimit) const;
00453 
00471   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00472 
00492   inline int8_t compareCodePointOrder(int32_t start,
00493                                       int32_t length,
00494                                       const UnicodeString& srcText) const;
00495 
00517    inline int8_t compareCodePointOrder(int32_t start,
00518                                        int32_t length,
00519                                        const UnicodeString& srcText,
00520                                        int32_t srcStart,
00521                                        int32_t srcLength) const;
00522 
00541   inline int8_t compareCodePointOrder(const UChar *srcChars,
00542                                       int32_t srcLength) const;
00543 
00563   inline int8_t compareCodePointOrder(int32_t start,
00564                                       int32_t length,
00565                                       const UChar *srcChars) const;
00566 
00588   inline int8_t compareCodePointOrder(int32_t start,
00589                                       int32_t length,
00590                                       const UChar *srcChars,
00591                                       int32_t srcStart,
00592                                       int32_t srcLength) const;
00593 
00615   inline int8_t compareCodePointOrderBetween(int32_t start,
00616                                              int32_t limit,
00617                                              const UnicodeString& srcText,
00618                                              int32_t srcStart,
00619                                              int32_t srcLimit) const;
00620 
00639   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00640 
00661   inline int8_t caseCompare(int32_t start,
00662          int32_t length,
00663          const UnicodeString& srcText,
00664          uint32_t options) const;
00665 
00688   inline int8_t caseCompare(int32_t start,
00689          int32_t length,
00690          const UnicodeString& srcText,
00691          int32_t srcStart,
00692          int32_t srcLength,
00693          uint32_t options) const;
00694 
00714   inline int8_t caseCompare(const UChar *srcChars,
00715          int32_t srcLength,
00716          uint32_t options) const;
00717 
00738   inline int8_t caseCompare(int32_t start,
00739          int32_t length,
00740          const UChar *srcChars,
00741          uint32_t options) const;
00742 
00765   inline int8_t caseCompare(int32_t start,
00766          int32_t length,
00767          const UChar *srcChars,
00768          int32_t srcStart,
00769          int32_t srcLength,
00770          uint32_t options) const;
00771 
00794   inline int8_t caseCompareBetween(int32_t start,
00795             int32_t limit,
00796             const UnicodeString& srcText,
00797             int32_t srcStart,
00798             int32_t srcLimit,
00799             uint32_t options) const;
00800 
00808   inline UBool startsWith(const UnicodeString& text) const;
00809 
00820   inline UBool startsWith(const UnicodeString& srcText,
00821             int32_t srcStart,
00822             int32_t srcLength) const;
00823 
00832   inline UBool startsWith(const UChar *srcChars,
00833             int32_t srcLength) const;
00834 
00844   inline UBool startsWith(const UChar *srcChars,
00845             int32_t srcStart,
00846             int32_t srcLength) const;
00847 
00855   inline UBool endsWith(const UnicodeString& text) const;
00856 
00867   inline UBool endsWith(const UnicodeString& srcText,
00868           int32_t srcStart,
00869           int32_t srcLength) const;
00870 
00879   inline UBool endsWith(const UChar *srcChars,
00880           int32_t srcLength) const;
00881 
00892   inline UBool endsWith(const UChar *srcChars,
00893           int32_t srcStart,
00894           int32_t srcLength) const;
00895 
00896 
00897   /* Searching - bitwise only */
00898 
00907   inline int32_t indexOf(const UnicodeString& text) const;
00908 
00918   inline int32_t indexOf(const UnicodeString& text,
00919               int32_t start) const;
00920 
00932   inline int32_t indexOf(const UnicodeString& text,
00933               int32_t start,
00934               int32_t length) const;
00935 
00952   inline int32_t indexOf(const UnicodeString& srcText,
00953               int32_t srcStart,
00954               int32_t srcLength,
00955               int32_t start,
00956               int32_t length) const;
00957 
00969   inline int32_t indexOf(const UChar *srcChars,
00970               int32_t srcLength,
00971               int32_t start) const;
00972 
00985   inline int32_t indexOf(const UChar *srcChars,
00986               int32_t srcLength,
00987               int32_t start,
00988               int32_t length) const;
00989 
01006   int32_t indexOf(const UChar *srcChars,
01007               int32_t srcStart,
01008               int32_t srcLength,
01009               int32_t start,
01010               int32_t length) const;
01011 
01019   inline int32_t indexOf(UChar c) const;
01020 
01029   inline int32_t indexOf(UChar32 c) const;
01030 
01039   inline int32_t indexOf(UChar c,
01040               int32_t start) const;
01041 
01051   inline int32_t indexOf(UChar32 c,
01052               int32_t start) const;
01053 
01064   inline int32_t indexOf(UChar c,
01065               int32_t start,
01066               int32_t length) const;
01067 
01079   inline int32_t indexOf(UChar32 c,
01080               int32_t start,
01081               int32_t length) const;
01082 
01091   inline int32_t lastIndexOf(const UnicodeString& text) const;
01092 
01102   inline int32_t lastIndexOf(const UnicodeString& text,
01103               int32_t start) const;
01104 
01116   inline int32_t lastIndexOf(const UnicodeString& text,
01117               int32_t start,
01118               int32_t length) const;
01119 
01136   inline int32_t lastIndexOf(const UnicodeString& srcText,
01137               int32_t srcStart,
01138               int32_t srcLength,
01139               int32_t start,
01140               int32_t length) const;
01141 
01152   inline int32_t lastIndexOf(const UChar *srcChars,
01153               int32_t srcLength,
01154               int32_t start) const;
01155 
01168   inline int32_t lastIndexOf(const UChar *srcChars,
01169               int32_t srcLength,
01170               int32_t start,
01171               int32_t length) const;
01172 
01189   int32_t lastIndexOf(const UChar *srcChars,
01190               int32_t srcStart,
01191               int32_t srcLength,
01192               int32_t start,
01193               int32_t length) const;
01194 
01202   inline int32_t lastIndexOf(UChar c) const;
01203 
01212   inline int32_t lastIndexOf(UChar32 c) const;
01213 
01222   inline int32_t lastIndexOf(UChar c,
01223               int32_t start) const;
01224 
01234   inline int32_t lastIndexOf(UChar32 c,
01235               int32_t start) const;
01236 
01247   inline int32_t lastIndexOf(UChar c,
01248               int32_t start,
01249               int32_t length) const;
01250 
01262   inline int32_t lastIndexOf(UChar32 c,
01263               int32_t start,
01264               int32_t length) const;
01265 
01266 
01267   /* Character access */
01268 
01277   inline UChar charAt(int32_t offset) const;
01278 
01286   inline UChar operator[] (int32_t offset) const;
01287 
01299   UChar32 char32At(int32_t offset) const;
01300 
01316   int32_t getChar32Start(int32_t offset) const;
01317 
01334   int32_t getChar32Limit(int32_t offset) const;
01335 
01386   int32_t moveIndex32(int32_t index, int32_t delta) const;
01387 
01388   /* Substring extraction */
01389 
01405   inline void extract(int32_t start,
01406            int32_t length,
01407            UChar *dst,
01408            int32_t dstStart = 0) const;
01409 
01431   int32_t
01432   extract(UChar *dest, int32_t destCapacity,
01433           UErrorCode &errorCode) const;
01434 
01445   inline void extract(int32_t start,
01446            int32_t length,
01447            UnicodeString& target) const;
01448 
01460   inline void extractBetween(int32_t start,
01461               int32_t limit,
01462               UChar *dst,
01463               int32_t dstStart = 0) const;
01464 
01474   virtual void extractBetween(int32_t start,
01475               int32_t limit,
01476               UnicodeString& target) const;
01477 
01499   int32_t extract(int32_t start,
01500            int32_t startLength,
01501            char *target,
01502            int32_t targetCapacity,
01503            enum EInvariant inv) const;
01504 
01505 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01506 
01526   int32_t extract(int32_t start,
01527            int32_t startLength,
01528            char *target,
01529            uint32_t targetLength) const;
01530 
01531 #endif
01532 
01533 #if !UCONFIG_NO_CONVERSION
01534 
01560   inline int32_t extract(int32_t start,
01561                  int32_t startLength,
01562                  char *target,
01563                  const char *codepage = 0) const;
01564 
01594   int32_t extract(int32_t start,
01595            int32_t startLength,
01596            char *target,
01597            uint32_t targetLength,
01598            const char *codepage) const;
01599 
01617   int32_t extract(char *dest, int32_t destCapacity,
01618                   UConverter *cnv,
01619                   UErrorCode &errorCode) const;
01620 
01621 #endif
01622 
01636   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01637 
01648   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01649 
01661   void toUTF8(ByteSink &sink) const;
01662 
01663 #if U_HAVE_STD_STRING
01664 
01677   template<typename StringClass>
01678   StringClass &toUTF8String(StringClass &result) const {
01679     StringByteSink<StringClass> sbs(&result);
01680     toUTF8(sbs);
01681     return result;
01682   }
01683 
01684 #endif
01685 
01701   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01702 
01703   /* Length operations */
01704 
01713   inline int32_t length(void) const;
01714 
01728   int32_t
01729   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01730 
01754   UBool
01755   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01756 
01762   inline UBool isEmpty(void) const;
01763 
01773   inline int32_t getCapacity(void) const;
01774 
01775   /* Other operations */
01776 
01782   inline int32_t hashCode(void) const;
01783 
01796   inline UBool isBogus(void) const;
01797 
01798 
01799   //========================================
01800   // Write operations
01801   //========================================
01802 
01803   /* Assignment operations */
01804 
01812   UnicodeString &operator=(const UnicodeString &srcText);
01813 
01834   UnicodeString &fastCopyFrom(const UnicodeString &src);
01835 
01843   inline UnicodeString& operator= (UChar ch);
01844 
01852   inline UnicodeString& operator= (UChar32 ch);
01853 
01865   inline UnicodeString& setTo(const UnicodeString& srcText,
01866                int32_t srcStart);
01867 
01881   inline UnicodeString& setTo(const UnicodeString& srcText,
01882                int32_t srcStart,
01883                int32_t srcLength);
01884 
01893   inline UnicodeString& setTo(const UnicodeString& srcText);
01894 
01903   inline UnicodeString& setTo(const UChar *srcChars,
01904                int32_t srcLength);
01905 
01914   UnicodeString& setTo(UChar srcChar);
01915 
01924   UnicodeString& setTo(UChar32 srcChar);
01925 
01946   UnicodeString &setTo(UBool isTerminated,
01947                        const UChar *text,
01948                        int32_t textLength);
01949 
01969   UnicodeString &setTo(UChar *buffer,
01970                        int32_t buffLength,
01971                        int32_t buffCapacity);
01972 
02013   void setToBogus();
02014 
02022   UnicodeString& setCharAt(int32_t offset,
02023                UChar ch);
02024 
02025 
02026   /* Append operations */
02027 
02035  inline  UnicodeString& operator+= (UChar ch);
02036 
02044  inline  UnicodeString& operator+= (UChar32 ch);
02045 
02053   inline UnicodeString& operator+= (const UnicodeString& srcText);
02054 
02069   inline UnicodeString& append(const UnicodeString& srcText,
02070             int32_t srcStart,
02071             int32_t srcLength);
02072 
02080   inline UnicodeString& append(const UnicodeString& srcText);
02081 
02095   inline UnicodeString& append(const UChar *srcChars,
02096             int32_t srcStart,
02097             int32_t srcLength);
02098 
02108   inline UnicodeString& append(const UChar *srcChars,
02109             int32_t srcLength);
02110 
02117   inline UnicodeString& append(UChar srcChar);
02118 
02125   UnicodeString& append(UChar32 srcChar);
02126 
02127 
02128   /* Insert operations */
02129 
02143   inline UnicodeString& insert(int32_t start,
02144             const UnicodeString& srcText,
02145             int32_t srcStart,
02146             int32_t srcLength);
02147 
02156   inline UnicodeString& insert(int32_t start,
02157             const UnicodeString& srcText);
02158 
02172   inline UnicodeString& insert(int32_t start,
02173             const UChar *srcChars,
02174             int32_t srcStart,
02175             int32_t srcLength);
02176 
02186   inline UnicodeString& insert(int32_t start,
02187             const UChar *srcChars,
02188             int32_t srcLength);
02189 
02198   inline UnicodeString& insert(int32_t start,
02199             UChar srcChar);
02200 
02209   inline UnicodeString& insert(int32_t start,
02210             UChar32 srcChar);
02211 
02212 
02213   /* Replace operations */
02214 
02232   UnicodeString& replace(int32_t start,
02233              int32_t length,
02234              const UnicodeString& srcText,
02235              int32_t srcStart,
02236              int32_t srcLength);
02237 
02250   UnicodeString& replace(int32_t start,
02251              int32_t length,
02252              const UnicodeString& srcText);
02253 
02271   UnicodeString& replace(int32_t start,
02272              int32_t length,
02273              const UChar *srcChars,
02274              int32_t srcStart,
02275              int32_t srcLength);
02276 
02289   inline UnicodeString& replace(int32_t start,
02290              int32_t length,
02291              const UChar *srcChars,
02292              int32_t srcLength);
02293 
02305   inline UnicodeString& replace(int32_t start,
02306              int32_t length,
02307              UChar srcChar);
02308 
02320   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
02321 
02331   inline UnicodeString& replaceBetween(int32_t start,
02332                 int32_t limit,
02333                 const UnicodeString& srcText);
02334 
02349   inline UnicodeString& replaceBetween(int32_t start,
02350                 int32_t limit,
02351                 const UnicodeString& srcText,
02352                 int32_t srcStart,
02353                 int32_t srcLimit);
02354 
02365   virtual void handleReplaceBetween(int32_t start,
02366                                     int32_t limit,
02367                                     const UnicodeString& text);
02368 
02374   virtual UBool hasMetaData() const;
02375 
02391   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02392 
02393   /* Search and replace operations */
02394 
02403   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02404                 const UnicodeString& newText);
02405 
02417   inline UnicodeString& findAndReplace(int32_t start,
02418                 int32_t length,
02419                 const UnicodeString& oldText,
02420                 const UnicodeString& newText);
02421 
02439   UnicodeString& findAndReplace(int32_t start,
02440                 int32_t length,
02441                 const UnicodeString& oldText,
02442                 int32_t oldStart,
02443                 int32_t oldLength,
02444                 const UnicodeString& newText,
02445                 int32_t newStart,
02446                 int32_t newLength);
02447 
02448 
02449   /* Remove operations */
02450 
02456   inline UnicodeString& remove(void);
02457 
02466   inline UnicodeString& remove(int32_t start,
02467                                int32_t length = (int32_t)INT32_MAX);
02468 
02477   inline UnicodeString& removeBetween(int32_t start,
02478                                       int32_t limit = (int32_t)INT32_MAX);
02479 
02489   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02490 
02491   /* Length operations */
02492 
02504   UBool padLeading(int32_t targetLength,
02505                     UChar padChar = 0x0020);
02506 
02518   UBool padTrailing(int32_t targetLength,
02519                      UChar padChar = 0x0020);
02520 
02527   inline UBool truncate(int32_t targetLength);
02528 
02534   UnicodeString& trim(void);
02535 
02536 
02537   /* Miscellaneous operations */
02538 
02544   inline UnicodeString& reverse(void);
02545 
02554   inline UnicodeString& reverse(int32_t start,
02555              int32_t length);
02556 
02563   UnicodeString& toUpper(void);
02564 
02572   UnicodeString& toUpper(const Locale& locale);
02573 
02580   UnicodeString& toLower(void);
02581 
02589   UnicodeString& toLower(const Locale& locale);
02590 
02591 #if !UCONFIG_NO_BREAK_ITERATION
02592 
02619   UnicodeString &toTitle(BreakIterator *titleIter);
02620 
02648   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02649 
02681   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02682 
02683 #endif
02684 
02696   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02697 
02698   //========================================
02699   // Access to the internal buffer
02700   //========================================
02701 
02745   UChar *getBuffer(int32_t minCapacity);
02746 
02767   void releaseBuffer(int32_t newLength=-1);
02768 
02799   inline const UChar *getBuffer() const;
02800 
02834   inline const UChar *getTerminatedBuffer();
02835 
02836   //========================================
02837   // Constructors
02838   //========================================
02839 
02843   UnicodeString();
02844 
02856   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02857 
02867   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
02868 
02878   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
02879 
02890   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
02891 
02899   UnicodeString(const UChar *text,
02900         int32_t textLength);
02901 
02921   UnicodeString(UBool isTerminated,
02922                 const UChar *text,
02923                 int32_t textLength);
02924 
02943   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02944 
02945 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02946 
02966   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
02967 
02976   UnicodeString(const char *codepageData, int32_t dataLength);
02977 
02978 #endif
02979 
02980 #if !UCONFIG_NO_CONVERSION
02981 
02999   UnicodeString(const char *codepageData, const char *codepage);
03000 
03018   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
03019 
03041   UnicodeString(
03042         const char *src, int32_t srcLength,
03043         UConverter *cnv,
03044         UErrorCode &errorCode);
03045 
03046 #endif
03047 
03072   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
03073 
03074 
03080   UnicodeString(const UnicodeString& that);
03081 
03088   UnicodeString(const UnicodeString& src, int32_t srcStart);
03089 
03097   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03098 
03115   virtual Replaceable *clone() const;
03116 
03120   virtual ~UnicodeString();
03121 
03135   static UnicodeString fromUTF8(const StringPiece &utf8);
03136 
03148   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03149 
03150   /* Miscellaneous operations */
03151 
03186   UnicodeString unescape() const;
03187 
03207   UChar32 unescapeAt(int32_t &offset) const;
03208 
03214   static UClassID U_EXPORT2 getStaticClassID();
03215 
03221   virtual UClassID getDynamicClassID() const;
03222 
03223   //========================================
03224   // Implementation methods
03225   //========================================
03226 
03227 protected:
03232   virtual int32_t getLength() const;
03233 
03239   virtual UChar getCharAt(int32_t offset) const;
03240 
03246   virtual UChar32 getChar32At(int32_t offset) const;
03247 
03248 private:
03249   // For char* constructors. Could be made public.
03250   UnicodeString &setToUTF8(const StringPiece &utf8);
03251   // For extract(char*).
03252   // We could make a toUTF8(target, capacity, errorCode) public but not
03253   // this version: New API will be cleaner if we make callers create substrings
03254   // rather than having start+length on every method,
03255   // and it should take a UErrorCode&.
03256   int32_t
03257   toUTF8(int32_t start, int32_t len,
03258          char *target, int32_t capacity) const;
03259 
03260 
03261   inline int8_t
03262   doCompare(int32_t start,
03263            int32_t length,
03264            const UnicodeString& srcText,
03265            int32_t srcStart,
03266            int32_t srcLength) const;
03267 
03268   int8_t doCompare(int32_t start,
03269            int32_t length,
03270            const UChar *srcChars,
03271            int32_t srcStart,
03272            int32_t srcLength) const;
03273 
03274   inline int8_t
03275   doCompareCodePointOrder(int32_t start,
03276                           int32_t length,
03277                           const UnicodeString& srcText,
03278                           int32_t srcStart,
03279                           int32_t srcLength) const;
03280 
03281   int8_t doCompareCodePointOrder(int32_t start,
03282                                  int32_t length,
03283                                  const UChar *srcChars,
03284                                  int32_t srcStart,
03285                                  int32_t srcLength) const;
03286 
03287   inline int8_t
03288   doCaseCompare(int32_t start,
03289                 int32_t length,
03290                 const UnicodeString &srcText,
03291                 int32_t srcStart,
03292                 int32_t srcLength,
03293                 uint32_t options) const;
03294 
03295   int8_t
03296   doCaseCompare(int32_t start,
03297                 int32_t length,
03298                 const UChar *srcChars,
03299                 int32_t srcStart,
03300                 int32_t srcLength,
03301                 uint32_t options) const;
03302 
03303   int32_t doIndexOf(UChar c,
03304             int32_t start,
03305             int32_t length) const;
03306 
03307   int32_t doIndexOf(UChar32 c,
03308                         int32_t start,
03309                         int32_t length) const;
03310 
03311   int32_t doLastIndexOf(UChar c,
03312                 int32_t start,
03313                 int32_t length) const;
03314 
03315   int32_t doLastIndexOf(UChar32 c,
03316                             int32_t start,
03317                             int32_t length) const;
03318 
03319   void doExtract(int32_t start,
03320          int32_t length,
03321          UChar *dst,
03322          int32_t dstStart) const;
03323 
03324   inline void doExtract(int32_t start,
03325          int32_t length,
03326          UnicodeString& target) const;
03327 
03328   inline UChar doCharAt(int32_t offset)  const;
03329 
03330   UnicodeString& doReplace(int32_t start,
03331                int32_t length,
03332                const UnicodeString& srcText,
03333                int32_t srcStart,
03334                int32_t srcLength);
03335 
03336   UnicodeString& doReplace(int32_t start,
03337                int32_t length,
03338                const UChar *srcChars,
03339                int32_t srcStart,
03340                int32_t srcLength);
03341 
03342   UnicodeString& doReverse(int32_t start,
03343                int32_t length);
03344 
03345   // calculate hash code
03346   int32_t doHashCode(void) const;
03347 
03348   // get pointer to start of array
03349   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03350   inline UChar* getArrayStart(void);
03351   inline const UChar* getArrayStart(void) const;
03352 
03353   // A UnicodeString object (not necessarily its current buffer)
03354   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03355   inline UBool isWritable() const;
03356 
03357   // Is the current buffer writable?
03358   inline UBool isBufferWritable() const;
03359 
03360   // None of the following does releaseArray().
03361   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03362   inline void setToEmpty();                  // sets fFlags=kShortString
03363   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03364 
03365   // allocate the array; result may be fStackBuffer
03366   // sets refCount to 1 if appropriate
03367   // sets fArray, fCapacity, and fFlags
03368   // returns boolean for success or failure
03369   UBool allocate(int32_t capacity);
03370 
03371   // release the array if owned
03372   void releaseArray(void);
03373 
03374   // turn a bogus string into an empty one
03375   void unBogus();
03376 
03377   // implements assigment operator, copy constructor, and fastCopyFrom()
03378   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03379 
03380   // Pin start and limit to acceptable values.
03381   inline void pinIndex(int32_t& start) const;
03382   inline void pinIndices(int32_t& start,
03383                          int32_t& length) const;
03384 
03385 #if !UCONFIG_NO_CONVERSION
03386 
03387   /* Internal extract() using UConverter. */
03388   int32_t doExtract(int32_t start, int32_t length,
03389                     char *dest, int32_t destCapacity,
03390                     UConverter *cnv,
03391                     UErrorCode &errorCode) const;
03392 
03393   /*
03394    * Real constructor for converting from codepage data.
03395    * It assumes that it is called with !fRefCounted.
03396    *
03397    * If <code>codepage==0</code>, then the default converter
03398    * is used for the platform encoding.
03399    * If <code>codepage</code> is an empty string (<code>""</code>),
03400    * then a simple conversion is performed on the codepage-invariant
03401    * subset ("invariant characters") of the platform encoding. See utypes.h.
03402    */
03403   void doCodepageCreate(const char *codepageData,
03404                         int32_t dataLength,
03405                         const char *codepage);
03406 
03407   /*
03408    * Worker function for creating a UnicodeString from
03409    * a codepage string using a UConverter.
03410    */
03411   void
03412   doCodepageCreate(const char *codepageData,
03413                    int32_t dataLength,
03414                    UConverter *converter,
03415                    UErrorCode &status);
03416 
03417 #endif
03418 
03419   /*
03420    * This function is called when write access to the array
03421    * is necessary.
03422    *
03423    * We need to make a copy of the array if
03424    * the buffer is read-only, or
03425    * the buffer is refCounted (shared), and refCount>1, or
03426    * the buffer is too small.
03427    *
03428    * Return FALSE if memory could not be allocated.
03429    */
03430   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03431                             int32_t growCapacity = -1,
03432                             UBool doCopyArray = TRUE,
03433                             int32_t **pBufferToDelete = 0,
03434                             UBool forceClone = FALSE);
03435 
03441   UnicodeString &
03442   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
03443 
03444   // ref counting
03445   void addRef(void);
03446   int32_t removeRef(void);
03447   int32_t refCount(void) const;
03448 
03449   // constants
03450   enum {
03451     // Set the stack buffer size so that sizeof(UnicodeString) is,
03452     // naturally (without padding), a multiple of sizeof(pointer).
03453     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
03454     kInvalidUChar=0xffff, // invalid UChar index
03455     kGrowSize=128, // grow size for this buffer
03456     kInvalidHashCode=0, // invalid hash code
03457     kEmptyHashCode=1, // hash code for empty string
03458 
03459     // bit flag values for fFlags
03460     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03461     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
03462     kRefCounted=4,      // there is a refCount field before the characters in fArray
03463     kBufferIsReadonly=8,// do not write to this buffer
03464     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03465                         // and releaseBuffer(newLength) must be called
03466 
03467     // combined values for convenience
03468     kShortString=kUsingStackBuffer,
03469     kLongString=kRefCounted,
03470     kReadonlyAlias=kBufferIsReadonly,
03471     kWritableAlias=0
03472   };
03473 
03474   friend class StringThreadTest;
03475   friend class UnicodeStringAppendable;
03476 
03477   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03478   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03479 
03480   /*
03481    * The following are all the class fields that are stored
03482    * in each UnicodeString object.
03483    * Note that UnicodeString has virtual functions,
03484    * therefore there is an implicit vtable pointer
03485    * as the first real field.
03486    * The fields should be aligned such that no padding is necessary.
03487    * On 32-bit machines, the size should be 32 bytes,
03488    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03489    *
03490    * We use a hack to achieve this.
03491    *
03492    * With at least some compilers, each of the following is forced to
03493    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
03494    * rounded up with additional padding if the fields do not already fit that requirement:
03495    * - sizeof(class UnicodeString)
03496    * - offsetof(UnicodeString, fUnion)
03497    * - sizeof(fUnion)
03498    * - sizeof(fFields)
03499    *
03500    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
03501    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
03502    * (Padding at the end of fFields is ok:
03503    * As long as there is no padding after fStackBuffer, it is not wasted space.)
03504    *
03505    * We further assume that the compiler does not reorder the fields,
03506    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
03507    * with at most some padding (but no other field) in between.
03508    * (Padding there would be wasted space, but functionally harmless.)
03509    *
03510    * We use a few more sizeof(pointer)'s chunks of space with
03511    * fRestOfStackBuffer, fShortLength and fFlags,
03512    * to get up exactly to the intended sizeof(UnicodeString).
03513    */
03514   // (implicit) *vtable;
03515   union StackBufferOrFields {
03516     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03517     // else fFields is used
03518     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
03519     struct {
03520       UChar   *fArray;    // the Unicode data
03521       int32_t fCapacity;  // capacity of fArray (in UChars)
03522       int32_t fLength;    // number of characters in fArray if >127; else undefined
03523     } fFields;
03524   } fUnion;
03525   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
03526   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
03527   uint8_t fFlags;       // bit flags: see constants above
03528 };
03529 
03538 U_COMMON_API UnicodeString U_EXPORT2
03539 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03540 
03541 //========================================
03542 // Inline members
03543 //========================================
03544 
03545 //========================================
03546 // Privates
03547 //========================================
03548 
03549 inline void
03550 UnicodeString::pinIndex(int32_t& start) const
03551 {
03552   // pin index
03553   if(start < 0) {
03554     start = 0;
03555   } else if(start > length()) {
03556     start = length();
03557   }
03558 }
03559 
03560 inline void
03561 UnicodeString::pinIndices(int32_t& start,
03562                           int32_t& _length) const
03563 {
03564   // pin indices
03565   int32_t len = length();
03566   if(start < 0) {
03567     start = 0;
03568   } else if(start > len) {
03569     start = len;
03570   }
03571   if(_length < 0) {
03572     _length = 0;
03573   } else if(_length > (len - start)) {
03574     _length = (len - start);
03575   }
03576 }
03577 
03578 inline UChar*
03579 UnicodeString::getArrayStart()
03580 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03581 
03582 inline const UChar*
03583 UnicodeString::getArrayStart() const
03584 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03585 
03586 //========================================
03587 // Read-only implementation methods
03588 //========================================
03589 inline int32_t
03590 UnicodeString::length() const
03591 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03592 
03593 inline int32_t
03594 UnicodeString::getCapacity() const
03595 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03596 
03597 inline int32_t
03598 UnicodeString::hashCode() const
03599 { return doHashCode(); }
03600 
03601 inline UBool
03602 UnicodeString::isBogus() const
03603 { return (UBool)(fFlags & kIsBogus); }
03604 
03605 inline UBool
03606 UnicodeString::isWritable() const
03607 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03608 
03609 inline UBool
03610 UnicodeString::isBufferWritable() const
03611 {
03612   return (UBool)(
03613       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03614       (!(fFlags&kRefCounted) || refCount()==1));
03615 }
03616 
03617 inline const UChar *
03618 UnicodeString::getBuffer() const {
03619   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03620     return 0;
03621   } else if(fFlags&kUsingStackBuffer) {
03622     return fUnion.fStackBuffer;
03623   } else {
03624     return fUnion.fFields.fArray;
03625   }
03626 }
03627 
03628 //========================================
03629 // Read-only alias methods
03630 //========================================
03631 inline int8_t
03632 UnicodeString::doCompare(int32_t start,
03633               int32_t thisLength,
03634               const UnicodeString& srcText,
03635               int32_t srcStart,
03636               int32_t srcLength) const
03637 {
03638   if(srcText.isBogus()) {
03639     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03640   } else {
03641     srcText.pinIndices(srcStart, srcLength);
03642     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03643   }
03644 }
03645 
03646 inline UBool
03647 UnicodeString::operator== (const UnicodeString& text) const
03648 {
03649   if(isBogus()) {
03650     return text.isBogus();
03651   } else {
03652     int32_t len = length(), textLength = text.length();
03653     return
03654       !text.isBogus() &&
03655       len == textLength &&
03656       doCompare(0, len, text, 0, textLength) == 0;
03657   }
03658 }
03659 
03660 inline UBool
03661 UnicodeString::operator!= (const UnicodeString& text) const
03662 { return (! operator==(text)); }
03663 
03664 inline UBool
03665 UnicodeString::operator> (const UnicodeString& text) const
03666 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03667 
03668 inline UBool
03669 UnicodeString::operator< (const UnicodeString& text) const
03670 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03671 
03672 inline UBool
03673 UnicodeString::operator>= (const UnicodeString& text) const
03674 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03675 
03676 inline UBool
03677 UnicodeString::operator<= (const UnicodeString& text) const
03678 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03679 
03680 inline int8_t
03681 UnicodeString::compare(const UnicodeString& text) const
03682 { return doCompare(0, length(), text, 0, text.length()); }
03683 
03684 inline int8_t
03685 UnicodeString::compare(int32_t start,
03686                int32_t _length,
03687                const UnicodeString& srcText) const
03688 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03689 
03690 inline int8_t
03691 UnicodeString::compare(const UChar *srcChars,
03692                int32_t srcLength) const
03693 { return doCompare(0, length(), srcChars, 0, srcLength); }
03694 
03695 inline int8_t
03696 UnicodeString::compare(int32_t start,
03697                int32_t _length,
03698                const UnicodeString& srcText,
03699                int32_t srcStart,
03700                int32_t srcLength) const
03701 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03702 
03703 inline int8_t
03704 UnicodeString::compare(int32_t start,
03705                int32_t _length,
03706                const UChar *srcChars) const
03707 { return doCompare(start, _length, srcChars, 0, _length); }
03708 
03709 inline int8_t
03710 UnicodeString::compare(int32_t start,
03711                int32_t _length,
03712                const UChar *srcChars,
03713                int32_t srcStart,
03714                int32_t srcLength) const
03715 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03716 
03717 inline int8_t
03718 UnicodeString::compareBetween(int32_t start,
03719                   int32_t limit,
03720                   const UnicodeString& srcText,
03721                   int32_t srcStart,
03722                   int32_t srcLimit) const
03723 { return doCompare(start, limit - start,
03724            srcText, srcStart, srcLimit - srcStart); }
03725 
03726 inline int8_t
03727 UnicodeString::doCompareCodePointOrder(int32_t start,
03728                                        int32_t thisLength,
03729                                        const UnicodeString& srcText,
03730                                        int32_t srcStart,
03731                                        int32_t srcLength) const
03732 {
03733   if(srcText.isBogus()) {
03734     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03735   } else {
03736     srcText.pinIndices(srcStart, srcLength);
03737     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03738   }
03739 }
03740 
03741 inline int8_t
03742 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03743 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03744 
03745 inline int8_t
03746 UnicodeString::compareCodePointOrder(int32_t start,
03747                                      int32_t _length,
03748                                      const UnicodeString& srcText) const
03749 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03750 
03751 inline int8_t
03752 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03753                                      int32_t srcLength) const
03754 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03755 
03756 inline int8_t
03757 UnicodeString::compareCodePointOrder(int32_t start,
03758                                      int32_t _length,
03759                                      const UnicodeString& srcText,
03760                                      int32_t srcStart,
03761                                      int32_t srcLength) const
03762 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03763 
03764 inline int8_t
03765 UnicodeString::compareCodePointOrder(int32_t start,
03766                                      int32_t _length,
03767                                      const UChar *srcChars) const
03768 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03769 
03770 inline int8_t
03771 UnicodeString::compareCodePointOrder(int32_t start,
03772                                      int32_t _length,
03773                                      const UChar *srcChars,
03774                                      int32_t srcStart,
03775                                      int32_t srcLength) const
03776 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03777 
03778 inline int8_t
03779 UnicodeString::compareCodePointOrderBetween(int32_t start,
03780                                             int32_t limit,
03781                                             const UnicodeString& srcText,
03782                                             int32_t srcStart,
03783                                             int32_t srcLimit) const
03784 { return doCompareCodePointOrder(start, limit - start,
03785            srcText, srcStart, srcLimit - srcStart); }
03786 
03787 inline int8_t
03788 UnicodeString::doCaseCompare(int32_t start,
03789                              int32_t thisLength,
03790                              const UnicodeString &srcText,
03791                              int32_t srcStart,
03792                              int32_t srcLength,
03793                              uint32_t options) const
03794 {
03795   if(srcText.isBogus()) {
03796     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03797   } else {
03798     srcText.pinIndices(srcStart, srcLength);
03799     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03800   }
03801 }
03802 
03803 inline int8_t
03804 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03805   return doCaseCompare(0, length(), text, 0, text.length(), options);
03806 }
03807 
03808 inline int8_t
03809 UnicodeString::caseCompare(int32_t start,
03810                            int32_t _length,
03811                            const UnicodeString &srcText,
03812                            uint32_t options) const {
03813   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03814 }
03815 
03816 inline int8_t
03817 UnicodeString::caseCompare(const UChar *srcChars,
03818                            int32_t srcLength,
03819                            uint32_t options) const {
03820   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03821 }
03822 
03823 inline int8_t
03824 UnicodeString::caseCompare(int32_t start,
03825                            int32_t _length,
03826                            const UnicodeString &srcText,
03827                            int32_t srcStart,
03828                            int32_t srcLength,
03829                            uint32_t options) const {
03830   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03831 }
03832 
03833 inline int8_t
03834 UnicodeString::caseCompare(int32_t start,
03835                            int32_t _length,
03836                            const UChar *srcChars,
03837                            uint32_t options) const {
03838   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03839 }
03840 
03841 inline int8_t
03842 UnicodeString::caseCompare(int32_t start,
03843                            int32_t _length,
03844                            const UChar *srcChars,
03845                            int32_t srcStart,
03846                            int32_t srcLength,
03847                            uint32_t options) const {
03848   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03849 }
03850 
03851 inline int8_t
03852 UnicodeString::caseCompareBetween(int32_t start,
03853                                   int32_t limit,
03854                                   const UnicodeString &srcText,
03855                                   int32_t srcStart,
03856                                   int32_t srcLimit,
03857                                   uint32_t options) const {
03858   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03859 }
03860 
03861 inline int32_t
03862 UnicodeString::indexOf(const UnicodeString& srcText,
03863                int32_t srcStart,
03864                int32_t srcLength,
03865                int32_t start,
03866                int32_t _length) const
03867 {
03868   if(!srcText.isBogus()) {
03869     srcText.pinIndices(srcStart, srcLength);
03870     if(srcLength > 0) {
03871       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03872     }
03873   }
03874   return -1;
03875 }
03876 
03877 inline int32_t
03878 UnicodeString::indexOf(const UnicodeString& text) const
03879 { return indexOf(text, 0, text.length(), 0, length()); }
03880 
03881 inline int32_t
03882 UnicodeString::indexOf(const UnicodeString& text,
03883                int32_t start) const {
03884   pinIndex(start);
03885   return indexOf(text, 0, text.length(), start, length() - start);
03886 }
03887 
03888 inline int32_t
03889 UnicodeString::indexOf(const UnicodeString& text,
03890                int32_t start,
03891                int32_t _length) const
03892 { return indexOf(text, 0, text.length(), start, _length); }
03893 
03894 inline int32_t
03895 UnicodeString::indexOf(const UChar *srcChars,
03896                int32_t srcLength,
03897                int32_t start) const {
03898   pinIndex(start);
03899   return indexOf(srcChars, 0, srcLength, start, length() - start);
03900 }
03901 
03902 inline int32_t
03903 UnicodeString::indexOf(const UChar *srcChars,
03904                int32_t srcLength,
03905                int32_t start,
03906                int32_t _length) const
03907 { return indexOf(srcChars, 0, srcLength, start, _length); }
03908 
03909 inline int32_t
03910 UnicodeString::indexOf(UChar c,
03911                int32_t start,
03912                int32_t _length) const
03913 { return doIndexOf(c, start, _length); }
03914 
03915 inline int32_t
03916 UnicodeString::indexOf(UChar32 c,
03917                int32_t start,
03918                int32_t _length) const
03919 { return doIndexOf(c, start, _length); }
03920 
03921 inline int32_t
03922 UnicodeString::indexOf(UChar c) const
03923 { return doIndexOf(c, 0, length()); }
03924 
03925 inline int32_t
03926 UnicodeString::indexOf(UChar32 c) const
03927 { return indexOf(c, 0, length()); }
03928 
03929 inline int32_t
03930 UnicodeString::indexOf(UChar c,
03931                int32_t start) const {
03932   pinIndex(start);
03933   return doIndexOf(c, start, length() - start);
03934 }
03935 
03936 inline int32_t
03937 UnicodeString::indexOf(UChar32 c,
03938                int32_t start) const {
03939   pinIndex(start);
03940   return indexOf(c, start, length() - start);
03941 }
03942 
03943 inline int32_t
03944 UnicodeString::lastIndexOf(const UChar *srcChars,
03945                int32_t srcLength,
03946                int32_t start,
03947                int32_t _length) const
03948 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03949 
03950 inline int32_t
03951 UnicodeString::lastIndexOf(const UChar *srcChars,
03952                int32_t srcLength,
03953                int32_t start) const {
03954   pinIndex(start);
03955   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03956 }
03957 
03958 inline int32_t
03959 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03960                int32_t srcStart,
03961                int32_t srcLength,
03962                int32_t start,
03963                int32_t _length) const
03964 {
03965   if(!srcText.isBogus()) {
03966     srcText.pinIndices(srcStart, srcLength);
03967     if(srcLength > 0) {
03968       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03969     }
03970   }
03971   return -1;
03972 }
03973 
03974 inline int32_t
03975 UnicodeString::lastIndexOf(const UnicodeString& text,
03976                int32_t start,
03977                int32_t _length) const
03978 { return lastIndexOf(text, 0, text.length(), start, _length); }
03979 
03980 inline int32_t
03981 UnicodeString::lastIndexOf(const UnicodeString& text,
03982                int32_t start) const {
03983   pinIndex(start);
03984   return lastIndexOf(text, 0, text.length(), start, length() - start);
03985 }
03986 
03987 inline int32_t
03988 UnicodeString::lastIndexOf(const UnicodeString& text) const
03989 { return lastIndexOf(text, 0, text.length(), 0, length()); }
03990 
03991 inline int32_t
03992 UnicodeString::lastIndexOf(UChar c,
03993                int32_t start,
03994                int32_t _length) const
03995 { return doLastIndexOf(c, start, _length); }
03996 
03997 inline int32_t
03998 UnicodeString::lastIndexOf(UChar32 c,
03999                int32_t start,
04000                int32_t _length) const {
04001   return doLastIndexOf(c, start, _length);
04002 }
04003 
04004 inline int32_t
04005 UnicodeString::lastIndexOf(UChar c) const
04006 { return doLastIndexOf(c, 0, length()); }
04007 
04008 inline int32_t
04009 UnicodeString::lastIndexOf(UChar32 c) const {
04010   return lastIndexOf(c, 0, length());
04011 }
04012 
04013 inline int32_t
04014 UnicodeString::lastIndexOf(UChar c,
04015                int32_t start) const {
04016   pinIndex(start);
04017   return doLastIndexOf(c, start, length() - start);
04018 }
04019 
04020 inline int32_t
04021 UnicodeString::lastIndexOf(UChar32 c,
04022                int32_t start) const {
04023   pinIndex(start);
04024   return lastIndexOf(c, start, length() - start);
04025 }
04026 
04027 inline UBool
04028 UnicodeString::startsWith(const UnicodeString& text) const
04029 { return compare(0, text.length(), text, 0, text.length()) == 0; }
04030 
04031 inline UBool
04032 UnicodeString::startsWith(const UnicodeString& srcText,
04033               int32_t srcStart,
04034               int32_t srcLength) const
04035 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
04036 
04037 inline UBool
04038 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
04039   if(srcLength < 0) {
04040     srcLength = u_strlen(srcChars);
04041   }
04042   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
04043 }
04044 
04045 inline UBool
04046 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
04047   if(srcLength < 0) {
04048     srcLength = u_strlen(srcChars);
04049   }
04050   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
04051 }
04052 
04053 inline UBool
04054 UnicodeString::endsWith(const UnicodeString& text) const
04055 { return doCompare(length() - text.length(), text.length(),
04056            text, 0, text.length()) == 0; }
04057 
04058 inline UBool
04059 UnicodeString::endsWith(const UnicodeString& srcText,
04060             int32_t srcStart,
04061             int32_t srcLength) const {
04062   srcText.pinIndices(srcStart, srcLength);
04063   return doCompare(length() - srcLength, srcLength,
04064                    srcText, srcStart, srcLength) == 0;
04065 }
04066 
04067 inline UBool
04068 UnicodeString::endsWith(const UChar *srcChars,
04069             int32_t srcLength) const {
04070   if(srcLength < 0) {
04071     srcLength = u_strlen(srcChars);
04072   }
04073   return doCompare(length() - srcLength, srcLength,
04074                    srcChars, 0, srcLength) == 0;
04075 }
04076 
04077 inline UBool
04078 UnicodeString::endsWith(const UChar *srcChars,
04079             int32_t srcStart,
04080             int32_t srcLength) const {
04081   if(srcLength < 0) {
04082     srcLength = u_strlen(srcChars + srcStart);
04083   }
04084   return doCompare(length() - srcLength, srcLength,
04085                    srcChars, srcStart, srcLength) == 0;
04086 }
04087 
04088 //========================================
04089 // replace
04090 //========================================
04091 inline UnicodeString&
04092 UnicodeString::replace(int32_t start,
04093                int32_t _length,
04094                const UnicodeString& srcText)
04095 { return doReplace(start, _length, srcText, 0, srcText.length()); }
04096 
04097 inline UnicodeString&
04098 UnicodeString::replace(int32_t start,
04099                int32_t _length,
04100                const UnicodeString& srcText,
04101                int32_t srcStart,
04102                int32_t srcLength)
04103 { return doReplace(start, _length, srcText, srcStart, srcLength); }
04104 
04105 inline UnicodeString&
04106 UnicodeString::replace(int32_t start,
04107                int32_t _length,
04108                const UChar *srcChars,
04109                int32_t srcLength)
04110 { return doReplace(start, _length, srcChars, 0, srcLength); }
04111 
04112 inline UnicodeString&
04113 UnicodeString::replace(int32_t start,
04114                int32_t _length,
04115                const UChar *srcChars,
04116                int32_t srcStart,
04117                int32_t srcLength)
04118 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04119 
04120 inline UnicodeString&
04121 UnicodeString::replace(int32_t start,
04122                int32_t _length,
04123                UChar srcChar)
04124 { return doReplace(start, _length, &srcChar, 0, 1); }
04125 
04126 inline UnicodeString&
04127 UnicodeString::replaceBetween(int32_t start,
04128                   int32_t limit,
04129                   const UnicodeString& srcText)
04130 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04131 
04132 inline UnicodeString&
04133 UnicodeString::replaceBetween(int32_t start,
04134                   int32_t limit,
04135                   const UnicodeString& srcText,
04136                   int32_t srcStart,
04137                   int32_t srcLimit)
04138 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04139 
04140 inline UnicodeString&
04141 UnicodeString::findAndReplace(const UnicodeString& oldText,
04142                   const UnicodeString& newText)
04143 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04144             newText, 0, newText.length()); }
04145 
04146 inline UnicodeString&
04147 UnicodeString::findAndReplace(int32_t start,
04148                   int32_t _length,
04149                   const UnicodeString& oldText,
04150                   const UnicodeString& newText)
04151 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04152             newText, 0, newText.length()); }
04153 
04154 // ============================
04155 // extract
04156 // ============================
04157 inline void
04158 UnicodeString::doExtract(int32_t start,
04159              int32_t _length,
04160              UnicodeString& target) const
04161 { target.replace(0, target.length(), *this, start, _length); }
04162 
04163 inline void
04164 UnicodeString::extract(int32_t start,
04165                int32_t _length,
04166                UChar *target,
04167                int32_t targetStart) const
04168 { doExtract(start, _length, target, targetStart); }
04169 
04170 inline void
04171 UnicodeString::extract(int32_t start,
04172                int32_t _length,
04173                UnicodeString& target) const
04174 { doExtract(start, _length, target); }
04175 
04176 #if !UCONFIG_NO_CONVERSION
04177 
04178 inline int32_t
04179 UnicodeString::extract(int32_t start,
04180                int32_t _length,
04181                char *dst,
04182                const char *codepage) const
04183 
04184 {
04185   // This dstSize value will be checked explicitly
04186   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04187 }
04188 
04189 #endif
04190 
04191 inline void
04192 UnicodeString::extractBetween(int32_t start,
04193                   int32_t limit,
04194                   UChar *dst,
04195                   int32_t dstStart) const {
04196   pinIndex(start);
04197   pinIndex(limit);
04198   doExtract(start, limit - start, dst, dstStart);
04199 }
04200 
04201 inline UnicodeString
04202 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04203     return tempSubString(start, limit - start);
04204 }
04205 
04206 inline UChar
04207 UnicodeString::doCharAt(int32_t offset) const
04208 {
04209   if((uint32_t)offset < (uint32_t)length()) {
04210     return getArrayStart()[offset];
04211   } else {
04212     return kInvalidUChar;
04213   }
04214 }
04215 
04216 inline UChar
04217 UnicodeString::charAt(int32_t offset) const
04218 { return doCharAt(offset); }
04219 
04220 inline UChar
04221 UnicodeString::operator[] (int32_t offset) const
04222 { return doCharAt(offset); }
04223 
04224 inline UBool
04225 UnicodeString::isEmpty() const {
04226   return fShortLength == 0;
04227 }
04228 
04229 //========================================
04230 // Write implementation methods
04231 //========================================
04232 inline void
04233 UnicodeString::setLength(int32_t len) {
04234   if(len <= 127) {
04235     fShortLength = (int8_t)len;
04236   } else {
04237     fShortLength = (int8_t)-1;
04238     fUnion.fFields.fLength = len;
04239   }
04240 }
04241 
04242 inline void
04243 UnicodeString::setToEmpty() {
04244   fShortLength = 0;
04245   fFlags = kShortString;
04246 }
04247 
04248 inline void
04249 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04250   setLength(len);
04251   fUnion.fFields.fArray = array;
04252   fUnion.fFields.fCapacity = capacity;
04253 }
04254 
04255 inline const UChar *
04256 UnicodeString::getTerminatedBuffer() {
04257   if(!isWritable()) {
04258     return 0;
04259   } else {
04260     UChar *array = getArrayStart();
04261     int32_t len = length();
04262     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04263       /*
04264        * kRefCounted: Do not write the NUL if the buffer is shared.
04265        * That is mostly safe, except when the length of one copy was modified
04266        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04267        * Then the NUL would be written into the middle of another copy's string.
04268        */
04269       if(!(fFlags&kBufferIsReadonly)) {
04270         /*
04271          * We must not write to a readonly buffer, but it is known to be
04272          * NUL-terminated if len<capacity.
04273          * A shared, allocated buffer (refCount()>1) must not have its contents
04274          * modified, but the NUL at [len] is beyond the string contents,
04275          * and multiple string objects and threads writing the same NUL into the
04276          * same location is harmless.
04277          * In all other cases, the buffer is fully writable and it is anyway safe
04278          * to write the NUL.
04279          *
04280          * Note: An earlier version of this code tested whether there is a NUL
04281          * at [len] already, but, while safe, it generated lots of warnings from
04282          * tools like valgrind and Purify.
04283          */
04284         array[len] = 0;
04285       }
04286       return array;
04287     } else if(cloneArrayIfNeeded(len+1)) {
04288       array = getArrayStart();
04289       array[len] = 0;
04290       return array;
04291     } else {
04292       return 0;
04293     }
04294   }
04295 }
04296 
04297 inline UnicodeString&
04298 UnicodeString::operator= (UChar ch)
04299 { return doReplace(0, length(), &ch, 0, 1); }
04300 
04301 inline UnicodeString&
04302 UnicodeString::operator= (UChar32 ch)
04303 { return replace(0, length(), ch); }
04304 
04305 inline UnicodeString&
04306 UnicodeString::setTo(const UnicodeString& srcText,
04307              int32_t srcStart,
04308              int32_t srcLength)
04309 {
04310   unBogus();
04311   return doReplace(0, length(), srcText, srcStart, srcLength);
04312 }
04313 
04314 inline UnicodeString&
04315 UnicodeString::setTo(const UnicodeString& srcText,
04316              int32_t srcStart)
04317 {
04318   unBogus();
04319   srcText.pinIndex(srcStart);
04320   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04321 }
04322 
04323 inline UnicodeString&
04324 UnicodeString::setTo(const UnicodeString& srcText)
04325 {
04326   return copyFrom(srcText);
04327 }
04328 
04329 inline UnicodeString&
04330 UnicodeString::setTo(const UChar *srcChars,
04331              int32_t srcLength)
04332 {
04333   unBogus();
04334   return doReplace(0, length(), srcChars, 0, srcLength);
04335 }
04336 
04337 inline UnicodeString&
04338 UnicodeString::setTo(UChar srcChar)
04339 {
04340   unBogus();
04341   return doReplace(0, length(), &srcChar, 0, 1);
04342 }
04343 
04344 inline UnicodeString&
04345 UnicodeString::setTo(UChar32 srcChar)
04346 {
04347   unBogus();
04348   return replace(0, length(), srcChar);
04349 }
04350 
04351 inline UnicodeString&
04352 UnicodeString::append(const UnicodeString& srcText,
04353               int32_t srcStart,
04354               int32_t srcLength)
04355 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04356 
04357 inline UnicodeString&
04358 UnicodeString::append(const UnicodeString& srcText)
04359 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04360 
04361 inline UnicodeString&
04362 UnicodeString::append(const UChar *srcChars,
04363               int32_t srcStart,
04364               int32_t srcLength)
04365 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04366 
04367 inline UnicodeString&
04368 UnicodeString::append(const UChar *srcChars,
04369               int32_t srcLength)
04370 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04371 
04372 inline UnicodeString&
04373 UnicodeString::append(UChar srcChar)
04374 { return doReplace(length(), 0, &srcChar, 0, 1); }
04375 
04376 inline UnicodeString&
04377 UnicodeString::operator+= (UChar ch)
04378 { return doReplace(length(), 0, &ch, 0, 1); }
04379 
04380 inline UnicodeString&
04381 UnicodeString::operator+= (UChar32 ch) {
04382   return append(ch);
04383 }
04384 
04385 inline UnicodeString&
04386 UnicodeString::operator+= (const UnicodeString& srcText)
04387 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04388 
04389 inline UnicodeString&
04390 UnicodeString::insert(int32_t start,
04391               const UnicodeString& srcText,
04392               int32_t srcStart,
04393               int32_t srcLength)
04394 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04395 
04396 inline UnicodeString&
04397 UnicodeString::insert(int32_t start,
04398               const UnicodeString& srcText)
04399 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04400 
04401 inline UnicodeString&
04402 UnicodeString::insert(int32_t start,
04403               const UChar *srcChars,
04404               int32_t srcStart,
04405               int32_t srcLength)
04406 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04407 
04408 inline UnicodeString&
04409 UnicodeString::insert(int32_t start,
04410               const UChar *srcChars,
04411               int32_t srcLength)
04412 { return doReplace(start, 0, srcChars, 0, srcLength); }
04413 
04414 inline UnicodeString&
04415 UnicodeString::insert(int32_t start,
04416               UChar srcChar)
04417 { return doReplace(start, 0, &srcChar, 0, 1); }
04418 
04419 inline UnicodeString&
04420 UnicodeString::insert(int32_t start,
04421               UChar32 srcChar)
04422 { return replace(start, 0, srcChar); }
04423 
04424 
04425 inline UnicodeString&
04426 UnicodeString::remove()
04427 {
04428   // remove() of a bogus string makes the string empty and non-bogus
04429   // we also un-alias a read-only alias to deal with NUL-termination
04430   // issues with getTerminatedBuffer()
04431   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04432     setToEmpty();
04433   } else {
04434     fShortLength = 0;
04435   }
04436   return *this;
04437 }
04438 
04439 inline UnicodeString&
04440 UnicodeString::remove(int32_t start,
04441              int32_t _length)
04442 {
04443     if(start <= 0 && _length == INT32_MAX) {
04444         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04445         return remove();
04446     }
04447     return doReplace(start, _length, NULL, 0, 0);
04448 }
04449 
04450 inline UnicodeString&
04451 UnicodeString::removeBetween(int32_t start,
04452                 int32_t limit)
04453 { return doReplace(start, limit - start, NULL, 0, 0); }
04454 
04455 inline UnicodeString &
04456 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04457   truncate(limit);
04458   return doReplace(0, start, NULL, 0, 0);
04459 }
04460 
04461 inline UBool
04462 UnicodeString::truncate(int32_t targetLength)
04463 {
04464   if(isBogus() && targetLength == 0) {
04465     // truncate(0) of a bogus string makes the string empty and non-bogus
04466     unBogus();
04467     return FALSE;
04468   } else if((uint32_t)targetLength < (uint32_t)length()) {
04469     setLength(targetLength);
04470     if(fFlags&kBufferIsReadonly) {
04471       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04472     }
04473     return TRUE;
04474   } else {
04475     return FALSE;
04476   }
04477 }
04478 
04479 inline UnicodeString&
04480 UnicodeString::reverse()
04481 { return doReverse(0, length()); }
04482 
04483 inline UnicodeString&
04484 UnicodeString::reverse(int32_t start,
04485                int32_t _length)
04486 { return doReverse(start, _length); }
04487 
04488 U_NAMESPACE_END
04489 
04490 #endif