ICU 4.6 4.6
unistr.h
Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1998-2010, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File unistr.h
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   09/25/98    stephen     Creation.
00013 *   11/11/98    stephen     Changed per 11/9 code review.
00014 *   04/20/99    stephen     Overhauled per 4/16 code review.
00015 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
00016 *                           handleReplaceBetween(); other methods unchanged.
00017 *   06/25/01    grhoten     Remove dependency on iostream.
00018 ******************************************************************************
00019 */
00020 
00021 #ifndef UNISTR_H
00022 #define UNISTR_H
00023 
00029 #include "unicode/utypes.h"
00030 #include "unicode/rep.h"
00031 #include "unicode/std_string.h"
00032 #include "unicode/stringpiece.h"
00033 #include "unicode/bytestream.h"
00034 
00035 struct UConverter;          // unicode/ucnv.h
00036 class  StringThreadTest;
00037 
00038 #ifndef U_COMPARE_CODE_POINT_ORDER
00039 /* see also ustring.h and unorm.h */
00045 #define U_COMPARE_CODE_POINT_ORDER  0x8000
00046 #endif
00047 
00048 #ifndef USTRING_H
00049 
00052 U_STABLE int32_t U_EXPORT2
00053 u_strlen(const UChar *s);
00054 #endif
00055 
00056 U_NAMESPACE_BEGIN
00057 
00058 class Locale;               // unicode/locid.h
00059 class StringCharacterIterator;
00060 class BreakIterator;        // unicode/brkiter.h
00061 
00062 /* The <iostream> include has been moved to unicode/ustream.h */
00063 
00074 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
00075 
00093 #if defined(U_DECLARE_UTF16)
00094 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
00095 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
00096 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
00097 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
00098 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
00099 #else
00100 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
00101 #endif
00102 
00116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
00117 
00187 class U_COMMON_API UnicodeString : public Replaceable
00188 {
00189 public:
00190 
00199   enum EInvariant {
00204     kInvariant
00205   };
00206 
00207   //========================================
00208   // Read-only operations
00209   //========================================
00210 
00211   /* Comparison - bitwise only - for international comparison use collation */
00212 
00220   inline UBool operator== (const UnicodeString& text) const;
00221 
00229   inline UBool operator!= (const UnicodeString& text) const;
00230 
00238   inline UBool operator> (const UnicodeString& text) const;
00239 
00247   inline UBool operator< (const UnicodeString& text) const;
00248 
00256   inline UBool operator>= (const UnicodeString& text) const;
00257 
00265   inline UBool operator<= (const UnicodeString& text) const;
00266 
00278   inline int8_t compare(const UnicodeString& text) const;
00279 
00294   inline int8_t compare(int32_t start,
00295          int32_t length,
00296          const UnicodeString& text) const;
00297 
00315    inline int8_t compare(int32_t start,
00316          int32_t length,
00317          const UnicodeString& srcText,
00318          int32_t srcStart,
00319          int32_t srcLength) const;
00320 
00333   inline int8_t compare(const UChar *srcChars,
00334          int32_t srcLength) const;
00335 
00350   inline int8_t compare(int32_t start,
00351          int32_t length,
00352          const UChar *srcChars) const;
00353 
00371   inline int8_t compare(int32_t start,
00372          int32_t length,
00373          const UChar *srcChars,
00374          int32_t srcStart,
00375          int32_t srcLength) const;
00376 
00394   inline int8_t compareBetween(int32_t start,
00395             int32_t limit,
00396             const UnicodeString& srcText,
00397             int32_t srcStart,
00398             int32_t srcLimit) const;
00399 
00417   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
00418 
00438   inline int8_t compareCodePointOrder(int32_t start,
00439                                       int32_t length,
00440                                       const UnicodeString& srcText) const;
00441 
00463    inline int8_t compareCodePointOrder(int32_t start,
00464                                        int32_t length,
00465                                        const UnicodeString& srcText,
00466                                        int32_t srcStart,
00467                                        int32_t srcLength) const;
00468 
00487   inline int8_t compareCodePointOrder(const UChar *srcChars,
00488                                       int32_t srcLength) const;
00489 
00509   inline int8_t compareCodePointOrder(int32_t start,
00510                                       int32_t length,
00511                                       const UChar *srcChars) const;
00512 
00534   inline int8_t compareCodePointOrder(int32_t start,
00535                                       int32_t length,
00536                                       const UChar *srcChars,
00537                                       int32_t srcStart,
00538                                       int32_t srcLength) const;
00539 
00561   inline int8_t compareCodePointOrderBetween(int32_t start,
00562                                              int32_t limit,
00563                                              const UnicodeString& srcText,
00564                                              int32_t srcStart,
00565                                              int32_t srcLimit) const;
00566 
00585   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
00586 
00607   inline int8_t caseCompare(int32_t start,
00608          int32_t length,
00609          const UnicodeString& srcText,
00610          uint32_t options) const;
00611 
00634   inline int8_t caseCompare(int32_t start,
00635          int32_t length,
00636          const UnicodeString& srcText,
00637          int32_t srcStart,
00638          int32_t srcLength,
00639          uint32_t options) const;
00640 
00660   inline int8_t caseCompare(const UChar *srcChars,
00661          int32_t srcLength,
00662          uint32_t options) const;
00663 
00684   inline int8_t caseCompare(int32_t start,
00685          int32_t length,
00686          const UChar *srcChars,
00687          uint32_t options) const;
00688 
00711   inline int8_t caseCompare(int32_t start,
00712          int32_t length,
00713          const UChar *srcChars,
00714          int32_t srcStart,
00715          int32_t srcLength,
00716          uint32_t options) const;
00717 
00740   inline int8_t caseCompareBetween(int32_t start,
00741             int32_t limit,
00742             const UnicodeString& srcText,
00743             int32_t srcStart,
00744             int32_t srcLimit,
00745             uint32_t options) const;
00746 
00754   inline UBool startsWith(const UnicodeString& text) const;
00755 
00766   inline UBool startsWith(const UnicodeString& srcText,
00767             int32_t srcStart,
00768             int32_t srcLength) const;
00769 
00778   inline UBool startsWith(const UChar *srcChars,
00779             int32_t srcLength) const;
00780 
00790   inline UBool startsWith(const UChar *srcChars,
00791             int32_t srcStart,
00792             int32_t srcLength) const;
00793 
00801   inline UBool endsWith(const UnicodeString& text) const;
00802 
00813   inline UBool endsWith(const UnicodeString& srcText,
00814           int32_t srcStart,
00815           int32_t srcLength) const;
00816 
00825   inline UBool endsWith(const UChar *srcChars,
00826           int32_t srcLength) const;
00827 
00838   inline UBool endsWith(const UChar *srcChars,
00839           int32_t srcStart,
00840           int32_t srcLength) const;
00841 
00842 
00843   /* Searching - bitwise only */
00844 
00853   inline int32_t indexOf(const UnicodeString& text) const;
00854 
00864   inline int32_t indexOf(const UnicodeString& text,
00865               int32_t start) const;
00866 
00878   inline int32_t indexOf(const UnicodeString& text,
00879               int32_t start,
00880               int32_t length) const;
00881 
00898   inline int32_t indexOf(const UnicodeString& srcText,
00899               int32_t srcStart,
00900               int32_t srcLength,
00901               int32_t start,
00902               int32_t length) const;
00903 
00915   inline int32_t indexOf(const UChar *srcChars,
00916               int32_t srcLength,
00917               int32_t start) const;
00918 
00931   inline int32_t indexOf(const UChar *srcChars,
00932               int32_t srcLength,
00933               int32_t start,
00934               int32_t length) const;
00935 
00952   int32_t indexOf(const UChar *srcChars,
00953               int32_t srcStart,
00954               int32_t srcLength,
00955               int32_t start,
00956               int32_t length) const;
00957 
00965   inline int32_t indexOf(UChar c) const;
00966 
00975   inline int32_t indexOf(UChar32 c) const;
00976 
00985   inline int32_t indexOf(UChar c,
00986               int32_t start) const;
00987 
00997   inline int32_t indexOf(UChar32 c,
00998               int32_t start) const;
00999 
01010   inline int32_t indexOf(UChar c,
01011               int32_t start,
01012               int32_t length) const;
01013 
01025   inline int32_t indexOf(UChar32 c,
01026               int32_t start,
01027               int32_t length) const;
01028 
01037   inline int32_t lastIndexOf(const UnicodeString& text) const;
01038 
01048   inline int32_t lastIndexOf(const UnicodeString& text,
01049               int32_t start) const;
01050 
01062   inline int32_t lastIndexOf(const UnicodeString& text,
01063               int32_t start,
01064               int32_t length) const;
01065 
01082   inline int32_t lastIndexOf(const UnicodeString& srcText,
01083               int32_t srcStart,
01084               int32_t srcLength,
01085               int32_t start,
01086               int32_t length) const;
01087 
01098   inline int32_t lastIndexOf(const UChar *srcChars,
01099               int32_t srcLength,
01100               int32_t start) const;
01101 
01114   inline int32_t lastIndexOf(const UChar *srcChars,
01115               int32_t srcLength,
01116               int32_t start,
01117               int32_t length) const;
01118 
01135   int32_t lastIndexOf(const UChar *srcChars,
01136               int32_t srcStart,
01137               int32_t srcLength,
01138               int32_t start,
01139               int32_t length) const;
01140 
01148   inline int32_t lastIndexOf(UChar c) const;
01149 
01158   inline int32_t lastIndexOf(UChar32 c) const;
01159 
01168   inline int32_t lastIndexOf(UChar c,
01169               int32_t start) const;
01170 
01180   inline int32_t lastIndexOf(UChar32 c,
01181               int32_t start) const;
01182 
01193   inline int32_t lastIndexOf(UChar c,
01194               int32_t start,
01195               int32_t length) const;
01196 
01208   inline int32_t lastIndexOf(UChar32 c,
01209               int32_t start,
01210               int32_t length) const;
01211 
01212 
01213   /* Character access */
01214 
01223   inline UChar charAt(int32_t offset) const;
01224 
01232   inline UChar operator[] (int32_t offset) const;
01233 
01245   inline UChar32 char32At(int32_t offset) const;
01246 
01262   inline int32_t getChar32Start(int32_t offset) const;
01263 
01280   inline int32_t getChar32Limit(int32_t offset) const;
01281 
01332   int32_t moveIndex32(int32_t index, int32_t delta) const;
01333 
01334   /* Substring extraction */
01335 
01351   inline void extract(int32_t start,
01352            int32_t length,
01353            UChar *dst,
01354            int32_t dstStart = 0) const;
01355 
01377   int32_t
01378   extract(UChar *dest, int32_t destCapacity,
01379           UErrorCode &errorCode) const;
01380 
01391   inline void extract(int32_t start,
01392            int32_t length,
01393            UnicodeString& target) const;
01394 
01406   inline void extractBetween(int32_t start,
01407               int32_t limit,
01408               UChar *dst,
01409               int32_t dstStart = 0) const;
01410 
01420   virtual void extractBetween(int32_t start,
01421               int32_t limit,
01422               UnicodeString& target) const;
01423 
01445   int32_t extract(int32_t start,
01446            int32_t startLength,
01447            char *target,
01448            int32_t targetCapacity,
01449            enum EInvariant inv) const;
01450 
01451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
01452 
01472   int32_t extract(int32_t start,
01473            int32_t startLength,
01474            char *target,
01475            uint32_t targetLength) const;
01476 
01477 #endif
01478 
01479 #if !UCONFIG_NO_CONVERSION
01480 
01506   inline int32_t extract(int32_t start,
01507                  int32_t startLength,
01508                  char *target,
01509                  const char *codepage = 0) const;
01510 
01540   int32_t extract(int32_t start,
01541            int32_t startLength,
01542            char *target,
01543            uint32_t targetLength,
01544            const char *codepage) const;
01545 
01563   int32_t extract(char *dest, int32_t destCapacity,
01564                   UConverter *cnv,
01565                   UErrorCode &errorCode) const;
01566 
01567 #endif
01568 
01582   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
01583 
01594   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
01595 
01607   void toUTF8(ByteSink &sink) const;
01608 
01609 #if U_HAVE_STD_STRING
01610 
01623   template<typename StringClass>
01624   StringClass &toUTF8String(StringClass &result) const {
01625     StringByteSink<StringClass> sbs(&result);
01626     toUTF8(sbs);
01627     return result;
01628   }
01629 
01630 #endif
01631 
01647   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
01648 
01649   /* Length operations */
01650 
01659   inline int32_t length(void) const;
01660 
01674   int32_t
01675   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
01676 
01700   UBool
01701   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
01702 
01708   inline UBool isEmpty(void) const;
01709 
01719   inline int32_t getCapacity(void) const;
01720 
01721   /* Other operations */
01722 
01728   inline int32_t hashCode(void) const;
01729 
01742   inline UBool isBogus(void) const;
01743 
01744 
01745   //========================================
01746   // Write operations
01747   //========================================
01748 
01749   /* Assignment operations */
01750 
01758   UnicodeString &operator=(const UnicodeString &srcText);
01759 
01780   UnicodeString &fastCopyFrom(const UnicodeString &src);
01781 
01789   inline UnicodeString& operator= (UChar ch);
01790 
01798   inline UnicodeString& operator= (UChar32 ch);
01799 
01811   inline UnicodeString& setTo(const UnicodeString& srcText,
01812                int32_t srcStart);
01813 
01827   inline UnicodeString& setTo(const UnicodeString& srcText,
01828                int32_t srcStart,
01829                int32_t srcLength);
01830 
01839   inline UnicodeString& setTo(const UnicodeString& srcText);
01840 
01849   inline UnicodeString& setTo(const UChar *srcChars,
01850                int32_t srcLength);
01851 
01860   UnicodeString& setTo(UChar srcChar);
01861 
01870   UnicodeString& setTo(UChar32 srcChar);
01871 
01892   UnicodeString &setTo(UBool isTerminated,
01893                        const UChar *text,
01894                        int32_t textLength);
01895 
01915   UnicodeString &setTo(UChar *buffer,
01916                        int32_t buffLength,
01917                        int32_t buffCapacity);
01918 
01959   void setToBogus();
01960 
01968   UnicodeString& setCharAt(int32_t offset,
01969                UChar ch);
01970 
01971 
01972   /* Append operations */
01973 
01981  inline  UnicodeString& operator+= (UChar ch);
01982 
01990  inline  UnicodeString& operator+= (UChar32 ch);
01991 
02000   inline UnicodeString& operator+= (const UnicodeString& srcText);
02001 
02016   inline UnicodeString& append(const UnicodeString& srcText,
02017             int32_t srcStart,
02018             int32_t srcLength);
02019 
02027   inline UnicodeString& append(const UnicodeString& srcText);
02028 
02042   inline UnicodeString& append(const UChar *srcChars,
02043             int32_t srcStart,
02044             int32_t srcLength);
02045 
02054   inline UnicodeString& append(const UChar *srcChars,
02055             int32_t srcLength);
02056 
02063   inline UnicodeString& append(UChar srcChar);
02064 
02071   inline UnicodeString& append(UChar32 srcChar);
02072 
02073 
02074   /* Insert operations */
02075 
02089   inline UnicodeString& insert(int32_t start,
02090             const UnicodeString& srcText,
02091             int32_t srcStart,
02092             int32_t srcLength);
02093 
02102   inline UnicodeString& insert(int32_t start,
02103             const UnicodeString& srcText);
02104 
02118   inline UnicodeString& insert(int32_t start,
02119             const UChar *srcChars,
02120             int32_t srcStart,
02121             int32_t srcLength);
02122 
02132   inline UnicodeString& insert(int32_t start,
02133             const UChar *srcChars,
02134             int32_t srcLength);
02135 
02144   inline UnicodeString& insert(int32_t start,
02145             UChar srcChar);
02146 
02155   inline UnicodeString& insert(int32_t start,
02156             UChar32 srcChar);
02157 
02158 
02159   /* Replace operations */
02160 
02178   UnicodeString& replace(int32_t start,
02179              int32_t length,
02180              const UnicodeString& srcText,
02181              int32_t srcStart,
02182              int32_t srcLength);
02183 
02196   UnicodeString& replace(int32_t start,
02197              int32_t length,
02198              const UnicodeString& srcText);
02199 
02217   UnicodeString& replace(int32_t start,
02218              int32_t length,
02219              const UChar *srcChars,
02220              int32_t srcStart,
02221              int32_t srcLength);
02222 
02235   inline UnicodeString& replace(int32_t start,
02236              int32_t length,
02237              const UChar *srcChars,
02238              int32_t srcLength);
02239 
02251   inline UnicodeString& replace(int32_t start,
02252              int32_t length,
02253              UChar srcChar);
02254 
02266   inline UnicodeString& replace(int32_t start,
02267              int32_t length,
02268              UChar32 srcChar);
02269 
02279   inline UnicodeString& replaceBetween(int32_t start,
02280                 int32_t limit,
02281                 const UnicodeString& srcText);
02282 
02297   inline UnicodeString& replaceBetween(int32_t start,
02298                 int32_t limit,
02299                 const UnicodeString& srcText,
02300                 int32_t srcStart,
02301                 int32_t srcLimit);
02302 
02313   virtual void handleReplaceBetween(int32_t start,
02314                                     int32_t limit,
02315                                     const UnicodeString& text);
02316 
02322   virtual UBool hasMetaData() const;
02323 
02339   virtual void copy(int32_t start, int32_t limit, int32_t dest);
02340 
02341   /* Search and replace operations */
02342 
02351   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
02352                 const UnicodeString& newText);
02353 
02365   inline UnicodeString& findAndReplace(int32_t start,
02366                 int32_t length,
02367                 const UnicodeString& oldText,
02368                 const UnicodeString& newText);
02369 
02387   UnicodeString& findAndReplace(int32_t start,
02388                 int32_t length,
02389                 const UnicodeString& oldText,
02390                 int32_t oldStart,
02391                 int32_t oldLength,
02392                 const UnicodeString& newText,
02393                 int32_t newStart,
02394                 int32_t newLength);
02395 
02396 
02397   /* Remove operations */
02398 
02404   inline UnicodeString& remove(void);
02405 
02414   inline UnicodeString& remove(int32_t start,
02415                                int32_t length = (int32_t)INT32_MAX);
02416 
02425   inline UnicodeString& removeBetween(int32_t start,
02426                                       int32_t limit = (int32_t)INT32_MAX);
02427 
02437   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
02438 
02439   /* Length operations */
02440 
02452   UBool padLeading(int32_t targetLength,
02453                     UChar padChar = 0x0020);
02454 
02466   UBool padTrailing(int32_t targetLength,
02467                      UChar padChar = 0x0020);
02468 
02475   inline UBool truncate(int32_t targetLength);
02476 
02482   UnicodeString& trim(void);
02483 
02484 
02485   /* Miscellaneous operations */
02486 
02492   inline UnicodeString& reverse(void);
02493 
02502   inline UnicodeString& reverse(int32_t start,
02503              int32_t length);
02504 
02511   UnicodeString& toUpper(void);
02512 
02520   UnicodeString& toUpper(const Locale& locale);
02521 
02528   UnicodeString& toLower(void);
02529 
02537   UnicodeString& toLower(const Locale& locale);
02538 
02539 #if !UCONFIG_NO_BREAK_ITERATION
02540 
02567   UnicodeString &toTitle(BreakIterator *titleIter);
02568 
02596   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
02597 
02629   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
02630 
02631 #endif
02632 
02644   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
02645 
02646   //========================================
02647   // Access to the internal buffer
02648   //========================================
02649 
02693   UChar *getBuffer(int32_t minCapacity);
02694 
02715   void releaseBuffer(int32_t newLength=-1);
02716 
02747   inline const UChar *getBuffer() const;
02748 
02782   inline const UChar *getTerminatedBuffer();
02783 
02784   //========================================
02785   // Constructors
02786   //========================================
02787 
02791   UnicodeString();
02792 
02804   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
02805 
02811   UnicodeString(UChar ch);
02812 
02818   UnicodeString(UChar32 ch);
02819 
02826   UnicodeString(const UChar *text);
02827 
02835   UnicodeString(const UChar *text,
02836         int32_t textLength);
02837 
02857   UnicodeString(UBool isTerminated,
02858                 const UChar *text,
02859                 int32_t textLength);
02860 
02879   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
02880 
02881 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
02882 
02889   UnicodeString(const char *codepageData);
02890 
02897   UnicodeString(const char *codepageData, int32_t dataLength);
02898 
02899 #endif
02900 
02901 #if !UCONFIG_NO_CONVERSION
02902 
02920   UnicodeString(const char *codepageData, const char *codepage);
02921 
02939   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
02940 
02962   UnicodeString(
02963         const char *src, int32_t srcLength,
02964         UConverter *cnv,
02965         UErrorCode &errorCode);
02966 
02967 #endif
02968 
02993   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
02994 
02995 
03001   UnicodeString(const UnicodeString& that);
03002 
03009   UnicodeString(const UnicodeString& src, int32_t srcStart);
03010 
03018   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
03019 
03036   virtual Replaceable *clone() const;
03037 
03041   virtual ~UnicodeString();
03042 
03056   static UnicodeString fromUTF8(const StringPiece &utf8);
03057 
03069   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
03070 
03071   /* Miscellaneous operations */
03072 
03107   UnicodeString unescape() const;
03108 
03128   UChar32 unescapeAt(int32_t &offset) const;
03129 
03135   static UClassID U_EXPORT2 getStaticClassID();
03136 
03142   virtual UClassID getDynamicClassID() const;
03143 
03144   //========================================
03145   // Implementation methods
03146   //========================================
03147 
03148 protected:
03153   virtual int32_t getLength() const;
03154 
03160   virtual UChar getCharAt(int32_t offset) const;
03161 
03167   virtual UChar32 getChar32At(int32_t offset) const;
03168 
03169 private:
03170   // For char* constructors. Could be made public.
03171   UnicodeString &setToUTF8(const StringPiece &utf8);
03172   // For extract(char*).
03173   // We could make a toUTF8(target, capacity, errorCode) public but not
03174   // this version: New API will be cleaner if we make callers create substrings
03175   // rather than having start+length on every method,
03176   // and it should take a UErrorCode&.
03177   int32_t
03178   toUTF8(int32_t start, int32_t len,
03179          char *target, int32_t capacity) const;
03180 
03181 
03182   inline int8_t
03183   doCompare(int32_t start,
03184            int32_t length,
03185            const UnicodeString& srcText,
03186            int32_t srcStart,
03187            int32_t srcLength) const;
03188 
03189   int8_t doCompare(int32_t start,
03190            int32_t length,
03191            const UChar *srcChars,
03192            int32_t srcStart,
03193            int32_t srcLength) const;
03194 
03195   inline int8_t
03196   doCompareCodePointOrder(int32_t start,
03197                           int32_t length,
03198                           const UnicodeString& srcText,
03199                           int32_t srcStart,
03200                           int32_t srcLength) const;
03201 
03202   int8_t doCompareCodePointOrder(int32_t start,
03203                                  int32_t length,
03204                                  const UChar *srcChars,
03205                                  int32_t srcStart,
03206                                  int32_t srcLength) const;
03207 
03208   inline int8_t
03209   doCaseCompare(int32_t start,
03210                 int32_t length,
03211                 const UnicodeString &srcText,
03212                 int32_t srcStart,
03213                 int32_t srcLength,
03214                 uint32_t options) const;
03215 
03216   int8_t
03217   doCaseCompare(int32_t start,
03218                 int32_t length,
03219                 const UChar *srcChars,
03220                 int32_t srcStart,
03221                 int32_t srcLength,
03222                 uint32_t options) const;
03223 
03224   int32_t doIndexOf(UChar c,
03225             int32_t start,
03226             int32_t length) const;
03227 
03228   int32_t doIndexOf(UChar32 c,
03229                         int32_t start,
03230                         int32_t length) const;
03231 
03232   int32_t doLastIndexOf(UChar c,
03233                 int32_t start,
03234                 int32_t length) const;
03235 
03236   int32_t doLastIndexOf(UChar32 c,
03237                             int32_t start,
03238                             int32_t length) const;
03239 
03240   void doExtract(int32_t start,
03241          int32_t length,
03242          UChar *dst,
03243          int32_t dstStart) const;
03244 
03245   inline void doExtract(int32_t start,
03246          int32_t length,
03247          UnicodeString& target) const;
03248 
03249   inline UChar doCharAt(int32_t offset)  const;
03250 
03251   UnicodeString& doReplace(int32_t start,
03252                int32_t length,
03253                const UnicodeString& srcText,
03254                int32_t srcStart,
03255                int32_t srcLength);
03256 
03257   UnicodeString& doReplace(int32_t start,
03258                int32_t length,
03259                const UChar *srcChars,
03260                int32_t srcStart,
03261                int32_t srcLength);
03262 
03263   UnicodeString& doReverse(int32_t start,
03264                int32_t length);
03265 
03266   // calculate hash code
03267   int32_t doHashCode(void) const;
03268 
03269   // get pointer to start of array
03270   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
03271   inline UChar* getArrayStart(void);
03272   inline const UChar* getArrayStart(void) const;
03273 
03274   // A UnicodeString object (not necessarily its current buffer)
03275   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
03276   inline UBool isWritable() const;
03277 
03278   // Is the current buffer writable?
03279   inline UBool isBufferWritable() const;
03280 
03281   // None of the following does releaseArray().
03282   inline void setLength(int32_t len);        // sets only fShortLength and fLength
03283   inline void setToEmpty();                  // sets fFlags=kShortString
03284   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
03285 
03286   // allocate the array; result may be fStackBuffer
03287   // sets refCount to 1 if appropriate
03288   // sets fArray, fCapacity, and fFlags
03289   // returns boolean for success or failure
03290   UBool allocate(int32_t capacity);
03291 
03292   // release the array if owned
03293   void releaseArray(void);
03294 
03295   // turn a bogus string into an empty one
03296   void unBogus();
03297 
03298   // implements assigment operator, copy constructor, and fastCopyFrom()
03299   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
03300 
03301   // Pin start and limit to acceptable values.
03302   inline void pinIndex(int32_t& start) const;
03303   inline void pinIndices(int32_t& start,
03304                          int32_t& length) const;
03305 
03306 #if !UCONFIG_NO_CONVERSION
03307 
03308   /* Internal extract() using UConverter. */
03309   int32_t doExtract(int32_t start, int32_t length,
03310                     char *dest, int32_t destCapacity,
03311                     UConverter *cnv,
03312                     UErrorCode &errorCode) const;
03313 
03314   /*
03315    * Real constructor for converting from codepage data.
03316    * It assumes that it is called with !fRefCounted.
03317    *
03318    * If <code>codepage==0</code>, then the default converter
03319    * is used for the platform encoding.
03320    * If <code>codepage</code> is an empty string (<code>""</code>),
03321    * then a simple conversion is performed on the codepage-invariant
03322    * subset ("invariant characters") of the platform encoding. See utypes.h.
03323    */
03324   void doCodepageCreate(const char *codepageData,
03325                         int32_t dataLength,
03326                         const char *codepage);
03327 
03328   /*
03329    * Worker function for creating a UnicodeString from
03330    * a codepage string using a UConverter.
03331    */
03332   void
03333   doCodepageCreate(const char *codepageData,
03334                    int32_t dataLength,
03335                    UConverter *converter,
03336                    UErrorCode &status);
03337 
03338 #endif
03339 
03340   /*
03341    * This function is called when write access to the array
03342    * is necessary.
03343    *
03344    * We need to make a copy of the array if
03345    * the buffer is read-only, or
03346    * the buffer is refCounted (shared), and refCount>1, or
03347    * the buffer is too small.
03348    *
03349    * Return FALSE if memory could not be allocated.
03350    */
03351   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
03352                             int32_t growCapacity = -1,
03353                             UBool doCopyArray = TRUE,
03354                             int32_t **pBufferToDelete = 0,
03355                             UBool forceClone = FALSE);
03356 
03357   // common function for case mappings
03358   UnicodeString &
03359   caseMap(BreakIterator *titleIter,
03360           const char *locale,
03361           uint32_t options,
03362           int32_t toWhichCase);
03363 
03364   // ref counting
03365   void addRef(void);
03366   int32_t removeRef(void);
03367   int32_t refCount(void) const;
03368 
03369   // constants
03370   enum {
03371     // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
03372     // 32-bit pointers: 4+1+1+13*2 = 32 bytes
03373     // 64-bit pointers: 8+1+1+15*2 = 40 bytes
03374     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
03375     kInvalidUChar=0xffff, // invalid UChar index
03376     kGrowSize=128, // grow size for this buffer
03377     kInvalidHashCode=0, // invalid hash code
03378     kEmptyHashCode=1, // hash code for empty string
03379 
03380     // bit flag values for fFlags
03381     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
03382     kUsingStackBuffer=2,// fArray==fStackBuffer
03383     kRefCounted=4,      // there is a refCount field before the characters in fArray
03384     kBufferIsReadonly=8,// do not write to this buffer
03385     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
03386                         // and releaseBuffer(newLength) must be called
03387 
03388     // combined values for convenience
03389     kShortString=kUsingStackBuffer,
03390     kLongString=kRefCounted,
03391     kReadonlyAlias=kBufferIsReadonly,
03392     kWritableAlias=0
03393   };
03394 
03395   friend class StringThreadTest;
03396 
03397   union StackBufferOrFields;        // forward declaration necessary before friend declaration
03398   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
03399 
03400   /*
03401    * The following are all the class fields that are stored
03402    * in each UnicodeString object.
03403    * Note that UnicodeString has virtual functions,
03404    * therefore there is an implicit vtable pointer
03405    * as the first real field.
03406    * The fields should be aligned such that no padding is
03407    * necessary, mostly by having larger types first.
03408    * On 32-bit machines, the size should be 32 bytes,
03409    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
03410    */
03411   // (implicit) *vtable;
03412   int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
03413   uint8_t   fFlags;         // bit flags: see constants above
03414   union StackBufferOrFields {
03415     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
03416     // else fFields is used
03417     UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
03418     struct {
03419       uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
03420       int32_t   fLength;    // number of characters in fArray if >127; else undefined
03421       UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
03422       int32_t   fCapacity;  // sizeof fArray
03423     } fFields;
03424   } fUnion;
03425 };
03426 
03435 U_COMMON_API UnicodeString U_EXPORT2
03436 operator+ (const UnicodeString &s1, const UnicodeString &s2);
03437 
03438 //========================================
03439 // Inline members
03440 //========================================
03441 
03442 //========================================
03443 // Privates
03444 //========================================
03445 
03446 inline void
03447 UnicodeString::pinIndex(int32_t& start) const
03448 {
03449   // pin index
03450   if(start < 0) {
03451     start = 0;
03452   } else if(start > length()) {
03453     start = length();
03454   }
03455 }
03456 
03457 inline void
03458 UnicodeString::pinIndices(int32_t& start,
03459                           int32_t& _length) const
03460 {
03461   // pin indices
03462   int32_t len = length();
03463   if(start < 0) {
03464     start = 0;
03465   } else if(start > len) {
03466     start = len;
03467   }
03468   if(_length < 0) {
03469     _length = 0;
03470   } else if(_length > (len - start)) {
03471     _length = (len - start);
03472   }
03473 }
03474 
03475 inline UChar*
03476 UnicodeString::getArrayStart()
03477 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03478 
03479 inline const UChar*
03480 UnicodeString::getArrayStart() const
03481 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
03482 
03483 //========================================
03484 // Read-only implementation methods
03485 //========================================
03486 inline int32_t
03487 UnicodeString::length() const
03488 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
03489 
03490 inline int32_t
03491 UnicodeString::getCapacity() const
03492 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
03493 
03494 inline int32_t
03495 UnicodeString::hashCode() const
03496 { return doHashCode(); }
03497 
03498 inline UBool
03499 UnicodeString::isBogus() const
03500 { return (UBool)(fFlags & kIsBogus); }
03501 
03502 inline UBool
03503 UnicodeString::isWritable() const
03504 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
03505 
03506 inline UBool
03507 UnicodeString::isBufferWritable() const
03508 {
03509   return (UBool)(
03510       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
03511       (!(fFlags&kRefCounted) || refCount()==1));
03512 }
03513 
03514 inline const UChar *
03515 UnicodeString::getBuffer() const {
03516   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
03517     return 0;
03518   } else if(fFlags&kUsingStackBuffer) {
03519     return fUnion.fStackBuffer;
03520   } else {
03521     return fUnion.fFields.fArray;
03522   }
03523 }
03524 
03525 //========================================
03526 // Read-only alias methods
03527 //========================================
03528 inline int8_t
03529 UnicodeString::doCompare(int32_t start,
03530               int32_t thisLength,
03531               const UnicodeString& srcText,
03532               int32_t srcStart,
03533               int32_t srcLength) const
03534 {
03535   if(srcText.isBogus()) {
03536     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03537   } else {
03538     srcText.pinIndices(srcStart, srcLength);
03539     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03540   }
03541 }
03542 
03543 inline UBool
03544 UnicodeString::operator== (const UnicodeString& text) const
03545 {
03546   if(isBogus()) {
03547     return text.isBogus();
03548   } else {
03549     int32_t len = length(), textLength = text.length();
03550     return
03551       !text.isBogus() &&
03552       len == textLength &&
03553       doCompare(0, len, text, 0, textLength) == 0;
03554   }
03555 }
03556 
03557 inline UBool
03558 UnicodeString::operator!= (const UnicodeString& text) const
03559 { return (! operator==(text)); }
03560 
03561 inline UBool
03562 UnicodeString::operator> (const UnicodeString& text) const
03563 { return doCompare(0, length(), text, 0, text.length()) == 1; }
03564 
03565 inline UBool
03566 UnicodeString::operator< (const UnicodeString& text) const
03567 { return doCompare(0, length(), text, 0, text.length()) == -1; }
03568 
03569 inline UBool
03570 UnicodeString::operator>= (const UnicodeString& text) const
03571 { return doCompare(0, length(), text, 0, text.length()) != -1; }
03572 
03573 inline UBool
03574 UnicodeString::operator<= (const UnicodeString& text) const
03575 { return doCompare(0, length(), text, 0, text.length()) != 1; }
03576 
03577 inline int8_t
03578 UnicodeString::compare(const UnicodeString& text) const
03579 { return doCompare(0, length(), text, 0, text.length()); }
03580 
03581 inline int8_t
03582 UnicodeString::compare(int32_t start,
03583                int32_t _length,
03584                const UnicodeString& srcText) const
03585 { return doCompare(start, _length, srcText, 0, srcText.length()); }
03586 
03587 inline int8_t
03588 UnicodeString::compare(const UChar *srcChars,
03589                int32_t srcLength) const
03590 { return doCompare(0, length(), srcChars, 0, srcLength); }
03591 
03592 inline int8_t
03593 UnicodeString::compare(int32_t start,
03594                int32_t _length,
03595                const UnicodeString& srcText,
03596                int32_t srcStart,
03597                int32_t srcLength) const
03598 { return doCompare(start, _length, srcText, srcStart, srcLength); }
03599 
03600 inline int8_t
03601 UnicodeString::compare(int32_t start,
03602                int32_t _length,
03603                const UChar *srcChars) const
03604 { return doCompare(start, _length, srcChars, 0, _length); }
03605 
03606 inline int8_t
03607 UnicodeString::compare(int32_t start,
03608                int32_t _length,
03609                const UChar *srcChars,
03610                int32_t srcStart,
03611                int32_t srcLength) const
03612 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
03613 
03614 inline int8_t
03615 UnicodeString::compareBetween(int32_t start,
03616                   int32_t limit,
03617                   const UnicodeString& srcText,
03618                   int32_t srcStart,
03619                   int32_t srcLimit) const
03620 { return doCompare(start, limit - start,
03621            srcText, srcStart, srcLimit - srcStart); }
03622 
03623 inline int8_t
03624 UnicodeString::doCompareCodePointOrder(int32_t start,
03625                                        int32_t thisLength,
03626                                        const UnicodeString& srcText,
03627                                        int32_t srcStart,
03628                                        int32_t srcLength) const
03629 {
03630   if(srcText.isBogus()) {
03631     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03632   } else {
03633     srcText.pinIndices(srcStart, srcLength);
03634     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
03635   }
03636 }
03637 
03638 inline int8_t
03639 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
03640 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
03641 
03642 inline int8_t
03643 UnicodeString::compareCodePointOrder(int32_t start,
03644                                      int32_t _length,
03645                                      const UnicodeString& srcText) const
03646 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
03647 
03648 inline int8_t
03649 UnicodeString::compareCodePointOrder(const UChar *srcChars,
03650                                      int32_t srcLength) const
03651 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
03652 
03653 inline int8_t
03654 UnicodeString::compareCodePointOrder(int32_t start,
03655                                      int32_t _length,
03656                                      const UnicodeString& srcText,
03657                                      int32_t srcStart,
03658                                      int32_t srcLength) const
03659 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
03660 
03661 inline int8_t
03662 UnicodeString::compareCodePointOrder(int32_t start,
03663                                      int32_t _length,
03664                                      const UChar *srcChars) const
03665 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
03666 
03667 inline int8_t
03668 UnicodeString::compareCodePointOrder(int32_t start,
03669                                      int32_t _length,
03670                                      const UChar *srcChars,
03671                                      int32_t srcStart,
03672                                      int32_t srcLength) const
03673 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
03674 
03675 inline int8_t
03676 UnicodeString::compareCodePointOrderBetween(int32_t start,
03677                                             int32_t limit,
03678                                             const UnicodeString& srcText,
03679                                             int32_t srcStart,
03680                                             int32_t srcLimit) const
03681 { return doCompareCodePointOrder(start, limit - start,
03682            srcText, srcStart, srcLimit - srcStart); }
03683 
03684 inline int8_t
03685 UnicodeString::doCaseCompare(int32_t start,
03686                              int32_t thisLength,
03687                              const UnicodeString &srcText,
03688                              int32_t srcStart,
03689                              int32_t srcLength,
03690                              uint32_t options) const
03691 {
03692   if(srcText.isBogus()) {
03693     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
03694   } else {
03695     srcText.pinIndices(srcStart, srcLength);
03696     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
03697   }
03698 }
03699 
03700 inline int8_t
03701 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
03702   return doCaseCompare(0, length(), text, 0, text.length(), options);
03703 }
03704 
03705 inline int8_t
03706 UnicodeString::caseCompare(int32_t start,
03707                            int32_t _length,
03708                            const UnicodeString &srcText,
03709                            uint32_t options) const {
03710   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
03711 }
03712 
03713 inline int8_t
03714 UnicodeString::caseCompare(const UChar *srcChars,
03715                            int32_t srcLength,
03716                            uint32_t options) const {
03717   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
03718 }
03719 
03720 inline int8_t
03721 UnicodeString::caseCompare(int32_t start,
03722                            int32_t _length,
03723                            const UnicodeString &srcText,
03724                            int32_t srcStart,
03725                            int32_t srcLength,
03726                            uint32_t options) const {
03727   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
03728 }
03729 
03730 inline int8_t
03731 UnicodeString::caseCompare(int32_t start,
03732                            int32_t _length,
03733                            const UChar *srcChars,
03734                            uint32_t options) const {
03735   return doCaseCompare(start, _length, srcChars, 0, _length, options);
03736 }
03737 
03738 inline int8_t
03739 UnicodeString::caseCompare(int32_t start,
03740                            int32_t _length,
03741                            const UChar *srcChars,
03742                            int32_t srcStart,
03743                            int32_t srcLength,
03744                            uint32_t options) const {
03745   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
03746 }
03747 
03748 inline int8_t
03749 UnicodeString::caseCompareBetween(int32_t start,
03750                                   int32_t limit,
03751                                   const UnicodeString &srcText,
03752                                   int32_t srcStart,
03753                                   int32_t srcLimit,
03754                                   uint32_t options) const {
03755   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
03756 }
03757 
03758 inline int32_t
03759 UnicodeString::indexOf(const UnicodeString& srcText,
03760                int32_t srcStart,
03761                int32_t srcLength,
03762                int32_t start,
03763                int32_t _length) const
03764 {
03765   if(!srcText.isBogus()) {
03766     srcText.pinIndices(srcStart, srcLength);
03767     if(srcLength > 0) {
03768       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03769     }
03770   }
03771   return -1;
03772 }
03773 
03774 inline int32_t
03775 UnicodeString::indexOf(const UnicodeString& text) const
03776 { return indexOf(text, 0, text.length(), 0, length()); }
03777 
03778 inline int32_t
03779 UnicodeString::indexOf(const UnicodeString& text,
03780                int32_t start) const {
03781   pinIndex(start);
03782   return indexOf(text, 0, text.length(), start, length() - start);
03783 }
03784 
03785 inline int32_t
03786 UnicodeString::indexOf(const UnicodeString& text,
03787                int32_t start,
03788                int32_t _length) const
03789 { return indexOf(text, 0, text.length(), start, _length); }
03790 
03791 inline int32_t
03792 UnicodeString::indexOf(const UChar *srcChars,
03793                int32_t srcLength,
03794                int32_t start) const {
03795   pinIndex(start);
03796   return indexOf(srcChars, 0, srcLength, start, length() - start);
03797 }
03798 
03799 inline int32_t
03800 UnicodeString::indexOf(const UChar *srcChars,
03801                int32_t srcLength,
03802                int32_t start,
03803                int32_t _length) const
03804 { return indexOf(srcChars, 0, srcLength, start, _length); }
03805 
03806 inline int32_t
03807 UnicodeString::indexOf(UChar c,
03808                int32_t start,
03809                int32_t _length) const
03810 { return doIndexOf(c, start, _length); }
03811 
03812 inline int32_t
03813 UnicodeString::indexOf(UChar32 c,
03814                int32_t start,
03815                int32_t _length) const
03816 { return doIndexOf(c, start, _length); }
03817 
03818 inline int32_t
03819 UnicodeString::indexOf(UChar c) const
03820 { return doIndexOf(c, 0, length()); }
03821 
03822 inline int32_t
03823 UnicodeString::indexOf(UChar32 c) const
03824 { return indexOf(c, 0, length()); }
03825 
03826 inline int32_t
03827 UnicodeString::indexOf(UChar c,
03828                int32_t start) const {
03829   pinIndex(start);
03830   return doIndexOf(c, start, length() - start);
03831 }
03832 
03833 inline int32_t
03834 UnicodeString::indexOf(UChar32 c,
03835                int32_t start) const {
03836   pinIndex(start);
03837   return indexOf(c, start, length() - start);
03838 }
03839 
03840 inline int32_t
03841 UnicodeString::lastIndexOf(const UChar *srcChars,
03842                int32_t srcLength,
03843                int32_t start,
03844                int32_t _length) const
03845 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
03846 
03847 inline int32_t
03848 UnicodeString::lastIndexOf(const UChar *srcChars,
03849                int32_t srcLength,
03850                int32_t start) const {
03851   pinIndex(start);
03852   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
03853 }
03854 
03855 inline int32_t
03856 UnicodeString::lastIndexOf(const UnicodeString& srcText,
03857                int32_t srcStart,
03858                int32_t srcLength,
03859                int32_t start,
03860                int32_t _length) const
03861 {
03862   if(!srcText.isBogus()) {
03863     srcText.pinIndices(srcStart, srcLength);
03864     if(srcLength > 0) {
03865       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
03866     }
03867   }
03868   return -1;
03869 }
03870 
03871 inline int32_t
03872 UnicodeString::lastIndexOf(const UnicodeString& text,
03873                int32_t start,
03874                int32_t _length) const
03875 { return lastIndexOf(text, 0, text.length(), start, _length); }
03876 
03877 inline int32_t
03878 UnicodeString::lastIndexOf(const UnicodeString& text,
03879                int32_t start) const {
03880   pinIndex(start);
03881   return lastIndexOf(text, 0, text.length(), start, length() - start);
03882 }
03883 
03884 inline int32_t
03885 UnicodeString::lastIndexOf(const UnicodeString& text) const
03886 { return lastIndexOf(text, 0, text.length(), 0, length()); }
03887 
03888 inline int32_t
03889 UnicodeString::lastIndexOf(UChar c,
03890                int32_t start,
03891                int32_t _length) const
03892 { return doLastIndexOf(c, start, _length); }
03893 
03894 inline int32_t
03895 UnicodeString::lastIndexOf(UChar32 c,
03896                int32_t start,
03897                int32_t _length) const {
03898   return doLastIndexOf(c, start, _length);
03899 }
03900 
03901 inline int32_t
03902 UnicodeString::lastIndexOf(UChar c) const
03903 { return doLastIndexOf(c, 0, length()); }
03904 
03905 inline int32_t
03906 UnicodeString::lastIndexOf(UChar32 c) const {
03907   return lastIndexOf(c, 0, length());
03908 }
03909 
03910 inline int32_t
03911 UnicodeString::lastIndexOf(UChar c,
03912                int32_t start) const {
03913   pinIndex(start);
03914   return doLastIndexOf(c, start, length() - start);
03915 }
03916 
03917 inline int32_t
03918 UnicodeString::lastIndexOf(UChar32 c,
03919                int32_t start) const {
03920   pinIndex(start);
03921   return lastIndexOf(c, start, length() - start);
03922 }
03923 
03924 inline UBool
03925 UnicodeString::startsWith(const UnicodeString& text) const
03926 { return compare(0, text.length(), text, 0, text.length()) == 0; }
03927 
03928 inline UBool
03929 UnicodeString::startsWith(const UnicodeString& srcText,
03930               int32_t srcStart,
03931               int32_t srcLength) const
03932 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
03933 
03934 inline UBool
03935 UnicodeString::startsWith(const UChar *srcChars,
03936               int32_t srcLength) const
03937 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
03938 
03939 inline UBool
03940 UnicodeString::startsWith(const UChar *srcChars,
03941               int32_t srcStart,
03942               int32_t srcLength) const
03943 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
03944 
03945 inline UBool
03946 UnicodeString::endsWith(const UnicodeString& text) const
03947 { return doCompare(length() - text.length(), text.length(),
03948            text, 0, text.length()) == 0; }
03949 
03950 inline UBool
03951 UnicodeString::endsWith(const UnicodeString& srcText,
03952             int32_t srcStart,
03953             int32_t srcLength) const {
03954   srcText.pinIndices(srcStart, srcLength);
03955   return doCompare(length() - srcLength, srcLength,
03956                    srcText, srcStart, srcLength) == 0;
03957 }
03958 
03959 inline UBool
03960 UnicodeString::endsWith(const UChar *srcChars,
03961             int32_t srcLength) const {
03962   if(srcLength < 0) {
03963     srcLength = u_strlen(srcChars);
03964   }
03965   return doCompare(length() - srcLength, srcLength,
03966                    srcChars, 0, srcLength) == 0;
03967 }
03968 
03969 inline UBool
03970 UnicodeString::endsWith(const UChar *srcChars,
03971             int32_t srcStart,
03972             int32_t srcLength) const {
03973   if(srcLength < 0) {
03974     srcLength = u_strlen(srcChars + srcStart);
03975   }
03976   return doCompare(length() - srcLength, srcLength,
03977                    srcChars, srcStart, srcLength) == 0;
03978 }
03979 
03980 //========================================
03981 // replace
03982 //========================================
03983 inline UnicodeString&
03984 UnicodeString::replace(int32_t start,
03985                int32_t _length,
03986                const UnicodeString& srcText)
03987 { return doReplace(start, _length, srcText, 0, srcText.length()); }
03988 
03989 inline UnicodeString&
03990 UnicodeString::replace(int32_t start,
03991                int32_t _length,
03992                const UnicodeString& srcText,
03993                int32_t srcStart,
03994                int32_t srcLength)
03995 { return doReplace(start, _length, srcText, srcStart, srcLength); }
03996 
03997 inline UnicodeString&
03998 UnicodeString::replace(int32_t start,
03999                int32_t _length,
04000                const UChar *srcChars,
04001                int32_t srcLength)
04002 { return doReplace(start, _length, srcChars, 0, srcLength); }
04003 
04004 inline UnicodeString&
04005 UnicodeString::replace(int32_t start,
04006                int32_t _length,
04007                const UChar *srcChars,
04008                int32_t srcStart,
04009                int32_t srcLength)
04010 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
04011 
04012 inline UnicodeString&
04013 UnicodeString::replace(int32_t start,
04014                int32_t _length,
04015                UChar srcChar)
04016 { return doReplace(start, _length, &srcChar, 0, 1); }
04017 
04018 inline UnicodeString&
04019 UnicodeString::replace(int32_t start,
04020                int32_t _length,
04021                UChar32 srcChar) {
04022   UChar buffer[U16_MAX_LENGTH];
04023   int32_t count = 0;
04024   UBool isError = FALSE;
04025   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
04026   return doReplace(start, _length, buffer, 0, count);
04027 }
04028 
04029 inline UnicodeString&
04030 UnicodeString::replaceBetween(int32_t start,
04031                   int32_t limit,
04032                   const UnicodeString& srcText)
04033 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
04034 
04035 inline UnicodeString&
04036 UnicodeString::replaceBetween(int32_t start,
04037                   int32_t limit,
04038                   const UnicodeString& srcText,
04039                   int32_t srcStart,
04040                   int32_t srcLimit)
04041 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
04042 
04043 inline UnicodeString&
04044 UnicodeString::findAndReplace(const UnicodeString& oldText,
04045                   const UnicodeString& newText)
04046 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
04047             newText, 0, newText.length()); }
04048 
04049 inline UnicodeString&
04050 UnicodeString::findAndReplace(int32_t start,
04051                   int32_t _length,
04052                   const UnicodeString& oldText,
04053                   const UnicodeString& newText)
04054 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
04055             newText, 0, newText.length()); }
04056 
04057 // ============================
04058 // extract
04059 // ============================
04060 inline void
04061 UnicodeString::doExtract(int32_t start,
04062              int32_t _length,
04063              UnicodeString& target) const
04064 { target.replace(0, target.length(), *this, start, _length); }
04065 
04066 inline void
04067 UnicodeString::extract(int32_t start,
04068                int32_t _length,
04069                UChar *target,
04070                int32_t targetStart) const
04071 { doExtract(start, _length, target, targetStart); }
04072 
04073 inline void
04074 UnicodeString::extract(int32_t start,
04075                int32_t _length,
04076                UnicodeString& target) const
04077 { doExtract(start, _length, target); }
04078 
04079 #if !UCONFIG_NO_CONVERSION
04080 
04081 inline int32_t
04082 UnicodeString::extract(int32_t start,
04083                int32_t _length,
04084                char *dst,
04085                const char *codepage) const
04086 
04087 {
04088   // This dstSize value will be checked explicitly
04089 #if defined(__GNUC__)
04090   // Ticket #7039: Clip length to the maximum valid length to the end of addressable memory given the starting address
04091   // This is only an issue when using GCC and certain optimizations are turned on.
04092   return extract(start, _length, dst, dst!=0 ? ((dst >= (char*)((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);
04093 #else
04094   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
04095 #endif
04096 }
04097 
04098 #endif
04099 
04100 inline void
04101 UnicodeString::extractBetween(int32_t start,
04102                   int32_t limit,
04103                   UChar *dst,
04104                   int32_t dstStart) const {
04105   pinIndex(start);
04106   pinIndex(limit);
04107   doExtract(start, limit - start, dst, dstStart);
04108 }
04109 
04110 inline UnicodeString
04111 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
04112     return tempSubString(start, limit - start);
04113 }
04114 
04115 inline UChar
04116 UnicodeString::doCharAt(int32_t offset) const
04117 {
04118   if((uint32_t)offset < (uint32_t)length()) {
04119     return getArrayStart()[offset];
04120   } else {
04121     return kInvalidUChar;
04122   }
04123 }
04124 
04125 inline UChar
04126 UnicodeString::charAt(int32_t offset) const
04127 { return doCharAt(offset); }
04128 
04129 inline UChar
04130 UnicodeString::operator[] (int32_t offset) const
04131 { return doCharAt(offset); }
04132 
04133 inline UChar32
04134 UnicodeString::char32At(int32_t offset) const
04135 {
04136   int32_t len = length();
04137   if((uint32_t)offset < (uint32_t)len) {
04138     const UChar *array = getArrayStart();
04139     UChar32 c;
04140     U16_GET(array, 0, offset, len, c);
04141     return c;
04142   } else {
04143     return kInvalidUChar;
04144   }
04145 }
04146 
04147 inline int32_t
04148 UnicodeString::getChar32Start(int32_t offset) const {
04149   if((uint32_t)offset < (uint32_t)length()) {
04150     const UChar *array = getArrayStart();
04151     U16_SET_CP_START(array, 0, offset);
04152     return offset;
04153   } else {
04154     return 0;
04155   }
04156 }
04157 
04158 inline int32_t
04159 UnicodeString::getChar32Limit(int32_t offset) const {
04160   int32_t len = length();
04161   if((uint32_t)offset < (uint32_t)len) {
04162     const UChar *array = getArrayStart();
04163     U16_SET_CP_LIMIT(array, 0, offset, len);
04164     return offset;
04165   } else {
04166     return len;
04167   }
04168 }
04169 
04170 inline UBool
04171 UnicodeString::isEmpty() const {
04172   return fShortLength == 0;
04173 }
04174 
04175 //========================================
04176 // Write implementation methods
04177 //========================================
04178 inline void
04179 UnicodeString::setLength(int32_t len) {
04180   if(len <= 127) {
04181     fShortLength = (int8_t)len;
04182   } else {
04183     fShortLength = (int8_t)-1;
04184     fUnion.fFields.fLength = len;
04185   }
04186 }
04187 
04188 inline void
04189 UnicodeString::setToEmpty() {
04190   fShortLength = 0;
04191   fFlags = kShortString;
04192 }
04193 
04194 inline void
04195 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
04196   setLength(len);
04197   fUnion.fFields.fArray = array;
04198   fUnion.fFields.fCapacity = capacity;
04199 }
04200 
04201 inline const UChar *
04202 UnicodeString::getTerminatedBuffer() {
04203   if(!isWritable()) {
04204     return 0;
04205   } else {
04206     UChar *array = getArrayStart();
04207     int32_t len = length();
04208     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
04209       /*
04210        * kRefCounted: Do not write the NUL if the buffer is shared.
04211        * That is mostly safe, except when the length of one copy was modified
04212        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
04213        * Then the NUL would be written into the middle of another copy's string.
04214        */
04215       if(!(fFlags&kBufferIsReadonly)) {
04216         /*
04217          * We must not write to a readonly buffer, but it is known to be
04218          * NUL-terminated if len<capacity.
04219          * A shared, allocated buffer (refCount()>1) must not have its contents
04220          * modified, but the NUL at [len] is beyond the string contents,
04221          * and multiple string objects and threads writing the same NUL into the
04222          * same location is harmless.
04223          * In all other cases, the buffer is fully writable and it is anyway safe
04224          * to write the NUL.
04225          *
04226          * Note: An earlier version of this code tested whether there is a NUL
04227          * at [len] already, but, while safe, it generated lots of warnings from
04228          * tools like valgrind and Purify.
04229          */
04230         array[len] = 0;
04231       }
04232       return array;
04233     } else if(cloneArrayIfNeeded(len+1)) {
04234       array = getArrayStart();
04235       array[len] = 0;
04236       return array;
04237     } else {
04238       return 0;
04239     }
04240   }
04241 }
04242 
04243 inline UnicodeString&
04244 UnicodeString::operator= (UChar ch)
04245 { return doReplace(0, length(), &ch, 0, 1); }
04246 
04247 inline UnicodeString&
04248 UnicodeString::operator= (UChar32 ch)
04249 { return replace(0, length(), ch); }
04250 
04251 inline UnicodeString&
04252 UnicodeString::setTo(const UnicodeString& srcText,
04253              int32_t srcStart,
04254              int32_t srcLength)
04255 {
04256   unBogus();
04257   return doReplace(0, length(), srcText, srcStart, srcLength);
04258 }
04259 
04260 inline UnicodeString&
04261 UnicodeString::setTo(const UnicodeString& srcText,
04262              int32_t srcStart)
04263 {
04264   unBogus();
04265   srcText.pinIndex(srcStart);
04266   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
04267 }
04268 
04269 inline UnicodeString&
04270 UnicodeString::setTo(const UnicodeString& srcText)
04271 {
04272   unBogus();
04273   return doReplace(0, length(), srcText, 0, srcText.length());
04274 }
04275 
04276 inline UnicodeString&
04277 UnicodeString::setTo(const UChar *srcChars,
04278              int32_t srcLength)
04279 {
04280   unBogus();
04281   return doReplace(0, length(), srcChars, 0, srcLength);
04282 }
04283 
04284 inline UnicodeString&
04285 UnicodeString::setTo(UChar srcChar)
04286 {
04287   unBogus();
04288   return doReplace(0, length(), &srcChar, 0, 1);
04289 }
04290 
04291 inline UnicodeString&
04292 UnicodeString::setTo(UChar32 srcChar)
04293 {
04294   unBogus();
04295   return replace(0, length(), srcChar);
04296 }
04297 
04298 inline UnicodeString&
04299 UnicodeString::append(const UnicodeString& srcText,
04300               int32_t srcStart,
04301               int32_t srcLength)
04302 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
04303 
04304 inline UnicodeString&
04305 UnicodeString::append(const UnicodeString& srcText)
04306 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04307 
04308 inline UnicodeString&
04309 UnicodeString::append(const UChar *srcChars,
04310               int32_t srcStart,
04311               int32_t srcLength)
04312 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
04313 
04314 inline UnicodeString&
04315 UnicodeString::append(const UChar *srcChars,
04316               int32_t srcLength)
04317 { return doReplace(length(), 0, srcChars, 0, srcLength); }
04318 
04319 inline UnicodeString&
04320 UnicodeString::append(UChar srcChar)
04321 { return doReplace(length(), 0, &srcChar, 0, 1); }
04322 
04323 inline UnicodeString&
04324 UnicodeString::append(UChar32 srcChar) {
04325   UChar buffer[U16_MAX_LENGTH];
04326   int32_t _length = 0;
04327   UBool isError = FALSE;
04328   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
04329   return doReplace(length(), 0, buffer, 0, _length);
04330 }
04331 
04332 inline UnicodeString&
04333 UnicodeString::operator+= (UChar ch)
04334 { return doReplace(length(), 0, &ch, 0, 1); }
04335 
04336 inline UnicodeString&
04337 UnicodeString::operator+= (UChar32 ch) {
04338   return append(ch);
04339 }
04340 
04341 inline UnicodeString&
04342 UnicodeString::operator+= (const UnicodeString& srcText)
04343 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
04344 
04345 inline UnicodeString&
04346 UnicodeString::insert(int32_t start,
04347               const UnicodeString& srcText,
04348               int32_t srcStart,
04349               int32_t srcLength)
04350 { return doReplace(start, 0, srcText, srcStart, srcLength); }
04351 
04352 inline UnicodeString&
04353 UnicodeString::insert(int32_t start,
04354               const UnicodeString& srcText)
04355 { return doReplace(start, 0, srcText, 0, srcText.length()); }
04356 
04357 inline UnicodeString&
04358 UnicodeString::insert(int32_t start,
04359               const UChar *srcChars,
04360               int32_t srcStart,
04361               int32_t srcLength)
04362 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
04363 
04364 inline UnicodeString&
04365 UnicodeString::insert(int32_t start,
04366               const UChar *srcChars,
04367               int32_t srcLength)
04368 { return doReplace(start, 0, srcChars, 0, srcLength); }
04369 
04370 inline UnicodeString&
04371 UnicodeString::insert(int32_t start,
04372               UChar srcChar)
04373 { return doReplace(start, 0, &srcChar, 0, 1); }
04374 
04375 inline UnicodeString&
04376 UnicodeString::insert(int32_t start,
04377               UChar32 srcChar)
04378 { return replace(start, 0, srcChar); }
04379 
04380 
04381 inline UnicodeString&
04382 UnicodeString::remove()
04383 {
04384   // remove() of a bogus string makes the string empty and non-bogus
04385   // we also un-alias a read-only alias to deal with NUL-termination
04386   // issues with getTerminatedBuffer()
04387   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
04388     setToEmpty();
04389   } else {
04390     fShortLength = 0;
04391   }
04392   return *this;
04393 }
04394 
04395 inline UnicodeString&
04396 UnicodeString::remove(int32_t start,
04397              int32_t _length)
04398 {
04399     if(start <= 0 && _length == INT32_MAX) {
04400         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
04401         return remove();
04402     }
04403     return doReplace(start, _length, NULL, 0, 0);
04404 }
04405 
04406 inline UnicodeString&
04407 UnicodeString::removeBetween(int32_t start,
04408                 int32_t limit)
04409 { return doReplace(start, limit - start, NULL, 0, 0); }
04410 
04411 inline UnicodeString &
04412 UnicodeString::retainBetween(int32_t start, int32_t limit) {
04413   truncate(limit);
04414   return doReplace(0, start, NULL, 0, 0);
04415 }
04416 
04417 inline UBool
04418 UnicodeString::truncate(int32_t targetLength)
04419 {
04420   if(isBogus() && targetLength == 0) {
04421     // truncate(0) of a bogus string makes the string empty and non-bogus
04422     unBogus();
04423     return FALSE;
04424   } else if((uint32_t)targetLength < (uint32_t)length()) {
04425     setLength(targetLength);
04426     if(fFlags&kBufferIsReadonly) {
04427       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
04428     }
04429     return TRUE;
04430   } else {
04431     return FALSE;
04432   }
04433 }
04434 
04435 inline UnicodeString&
04436 UnicodeString::reverse()
04437 { return doReverse(0, length()); }
04438 
04439 inline UnicodeString&
04440 UnicodeString::reverse(int32_t start,
04441                int32_t _length)
04442 { return doReverse(start, _length); }
04443 
04444 U_NAMESPACE_END
04445 
04446 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines