ICU 4.6 4.6
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2010, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 00035 struct UConverter; // unicode/ucnv.h 00036 class StringThreadTest; 00037 00038 #ifndef U_COMPARE_CODE_POINT_ORDER 00039 /* see also ustring.h and unorm.h */ 00045 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00046 #endif 00047 00048 #ifndef USTRING_H 00049 00052 U_STABLE int32_t U_EXPORT2 00053 u_strlen(const UChar *s); 00054 #endif 00055 00056 U_NAMESPACE_BEGIN 00057 00058 class Locale; // unicode/locid.h 00059 class StringCharacterIterator; 00060 class BreakIterator; // unicode/brkiter.h 00061 00062 /* The <iostream> include has been moved to unicode/ustream.h */ 00063 00074 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant 00075 00093 #if defined(U_DECLARE_UTF16) 00094 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00095 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00096 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00097 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00098 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length) 00099 #else 00100 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV) 00101 #endif 00102 00116 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00117 00187 class U_COMMON_API UnicodeString : public Replaceable 00188 { 00189 public: 00190 00199 enum EInvariant { 00204 kInvariant 00205 }; 00206 00207 //======================================== 00208 // Read-only operations 00209 //======================================== 00210 00211 /* Comparison - bitwise only - for international comparison use collation */ 00212 00220 inline UBool operator== (const UnicodeString& text) const; 00221 00229 inline UBool operator!= (const UnicodeString& text) const; 00230 00238 inline UBool operator> (const UnicodeString& text) const; 00239 00247 inline UBool operator< (const UnicodeString& text) const; 00248 00256 inline UBool operator>= (const UnicodeString& text) const; 00257 00265 inline UBool operator<= (const UnicodeString& text) const; 00266 00278 inline int8_t compare(const UnicodeString& text) const; 00279 00294 inline int8_t compare(int32_t start, 00295 int32_t length, 00296 const UnicodeString& text) const; 00297 00315 inline int8_t compare(int32_t start, 00316 int32_t length, 00317 const UnicodeString& srcText, 00318 int32_t srcStart, 00319 int32_t srcLength) const; 00320 00333 inline int8_t compare(const UChar *srcChars, 00334 int32_t srcLength) const; 00335 00350 inline int8_t compare(int32_t start, 00351 int32_t length, 00352 const UChar *srcChars) const; 00353 00371 inline int8_t compare(int32_t start, 00372 int32_t length, 00373 const UChar *srcChars, 00374 int32_t srcStart, 00375 int32_t srcLength) const; 00376 00394 inline int8_t compareBetween(int32_t start, 00395 int32_t limit, 00396 const UnicodeString& srcText, 00397 int32_t srcStart, 00398 int32_t srcLimit) const; 00399 00417 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00418 00438 inline int8_t compareCodePointOrder(int32_t start, 00439 int32_t length, 00440 const UnicodeString& srcText) const; 00441 00463 inline int8_t compareCodePointOrder(int32_t start, 00464 int32_t length, 00465 const UnicodeString& srcText, 00466 int32_t srcStart, 00467 int32_t srcLength) const; 00468 00487 inline int8_t compareCodePointOrder(const UChar *srcChars, 00488 int32_t srcLength) const; 00489 00509 inline int8_t compareCodePointOrder(int32_t start, 00510 int32_t length, 00511 const UChar *srcChars) const; 00512 00534 inline int8_t compareCodePointOrder(int32_t start, 00535 int32_t length, 00536 const UChar *srcChars, 00537 int32_t srcStart, 00538 int32_t srcLength) const; 00539 00561 inline int8_t compareCodePointOrderBetween(int32_t start, 00562 int32_t limit, 00563 const UnicodeString& srcText, 00564 int32_t srcStart, 00565 int32_t srcLimit) const; 00566 00585 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00586 00607 inline int8_t caseCompare(int32_t start, 00608 int32_t length, 00609 const UnicodeString& srcText, 00610 uint32_t options) const; 00611 00634 inline int8_t caseCompare(int32_t start, 00635 int32_t length, 00636 const UnicodeString& srcText, 00637 int32_t srcStart, 00638 int32_t srcLength, 00639 uint32_t options) const; 00640 00660 inline int8_t caseCompare(const UChar *srcChars, 00661 int32_t srcLength, 00662 uint32_t options) const; 00663 00684 inline int8_t caseCompare(int32_t start, 00685 int32_t length, 00686 const UChar *srcChars, 00687 uint32_t options) const; 00688 00711 inline int8_t caseCompare(int32_t start, 00712 int32_t length, 00713 const UChar *srcChars, 00714 int32_t srcStart, 00715 int32_t srcLength, 00716 uint32_t options) const; 00717 00740 inline int8_t caseCompareBetween(int32_t start, 00741 int32_t limit, 00742 const UnicodeString& srcText, 00743 int32_t srcStart, 00744 int32_t srcLimit, 00745 uint32_t options) const; 00746 00754 inline UBool startsWith(const UnicodeString& text) const; 00755 00766 inline UBool startsWith(const UnicodeString& srcText, 00767 int32_t srcStart, 00768 int32_t srcLength) const; 00769 00778 inline UBool startsWith(const UChar *srcChars, 00779 int32_t srcLength) const; 00780 00790 inline UBool startsWith(const UChar *srcChars, 00791 int32_t srcStart, 00792 int32_t srcLength) const; 00793 00801 inline UBool endsWith(const UnicodeString& text) const; 00802 00813 inline UBool endsWith(const UnicodeString& srcText, 00814 int32_t srcStart, 00815 int32_t srcLength) const; 00816 00825 inline UBool endsWith(const UChar *srcChars, 00826 int32_t srcLength) const; 00827 00838 inline UBool endsWith(const UChar *srcChars, 00839 int32_t srcStart, 00840 int32_t srcLength) const; 00841 00842 00843 /* Searching - bitwise only */ 00844 00853 inline int32_t indexOf(const UnicodeString& text) const; 00854 00864 inline int32_t indexOf(const UnicodeString& text, 00865 int32_t start) const; 00866 00878 inline int32_t indexOf(const UnicodeString& text, 00879 int32_t start, 00880 int32_t length) const; 00881 00898 inline int32_t indexOf(const UnicodeString& srcText, 00899 int32_t srcStart, 00900 int32_t srcLength, 00901 int32_t start, 00902 int32_t length) const; 00903 00915 inline int32_t indexOf(const UChar *srcChars, 00916 int32_t srcLength, 00917 int32_t start) const; 00918 00931 inline int32_t indexOf(const UChar *srcChars, 00932 int32_t srcLength, 00933 int32_t start, 00934 int32_t length) const; 00935 00952 int32_t indexOf(const UChar *srcChars, 00953 int32_t srcStart, 00954 int32_t srcLength, 00955 int32_t start, 00956 int32_t length) const; 00957 00965 inline int32_t indexOf(UChar c) const; 00966 00975 inline int32_t indexOf(UChar32 c) const; 00976 00985 inline int32_t indexOf(UChar c, 00986 int32_t start) const; 00987 00997 inline int32_t indexOf(UChar32 c, 00998 int32_t start) const; 00999 01010 inline int32_t indexOf(UChar c, 01011 int32_t start, 01012 int32_t length) const; 01013 01025 inline int32_t indexOf(UChar32 c, 01026 int32_t start, 01027 int32_t length) const; 01028 01037 inline int32_t lastIndexOf(const UnicodeString& text) const; 01038 01048 inline int32_t lastIndexOf(const UnicodeString& text, 01049 int32_t start) const; 01050 01062 inline int32_t lastIndexOf(const UnicodeString& text, 01063 int32_t start, 01064 int32_t length) const; 01065 01082 inline int32_t lastIndexOf(const UnicodeString& srcText, 01083 int32_t srcStart, 01084 int32_t srcLength, 01085 int32_t start, 01086 int32_t length) const; 01087 01098 inline int32_t lastIndexOf(const UChar *srcChars, 01099 int32_t srcLength, 01100 int32_t start) const; 01101 01114 inline int32_t lastIndexOf(const UChar *srcChars, 01115 int32_t srcLength, 01116 int32_t start, 01117 int32_t length) const; 01118 01135 int32_t lastIndexOf(const UChar *srcChars, 01136 int32_t srcStart, 01137 int32_t srcLength, 01138 int32_t start, 01139 int32_t length) const; 01140 01148 inline int32_t lastIndexOf(UChar c) const; 01149 01158 inline int32_t lastIndexOf(UChar32 c) const; 01159 01168 inline int32_t lastIndexOf(UChar c, 01169 int32_t start) const; 01170 01180 inline int32_t lastIndexOf(UChar32 c, 01181 int32_t start) const; 01182 01193 inline int32_t lastIndexOf(UChar c, 01194 int32_t start, 01195 int32_t length) const; 01196 01208 inline int32_t lastIndexOf(UChar32 c, 01209 int32_t start, 01210 int32_t length) const; 01211 01212 01213 /* Character access */ 01214 01223 inline UChar charAt(int32_t offset) const; 01224 01232 inline UChar operator[] (int32_t offset) const; 01233 01245 inline UChar32 char32At(int32_t offset) const; 01246 01262 inline int32_t getChar32Start(int32_t offset) const; 01263 01280 inline int32_t getChar32Limit(int32_t offset) const; 01281 01332 int32_t moveIndex32(int32_t index, int32_t delta) const; 01333 01334 /* Substring extraction */ 01335 01351 inline void extract(int32_t start, 01352 int32_t length, 01353 UChar *dst, 01354 int32_t dstStart = 0) const; 01355 01377 int32_t 01378 extract(UChar *dest, int32_t destCapacity, 01379 UErrorCode &errorCode) const; 01380 01391 inline void extract(int32_t start, 01392 int32_t length, 01393 UnicodeString& target) const; 01394 01406 inline void extractBetween(int32_t start, 01407 int32_t limit, 01408 UChar *dst, 01409 int32_t dstStart = 0) const; 01410 01420 virtual void extractBetween(int32_t start, 01421 int32_t limit, 01422 UnicodeString& target) const; 01423 01445 int32_t extract(int32_t start, 01446 int32_t startLength, 01447 char *target, 01448 int32_t targetCapacity, 01449 enum EInvariant inv) const; 01450 01451 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01452 01472 int32_t extract(int32_t start, 01473 int32_t startLength, 01474 char *target, 01475 uint32_t targetLength) const; 01476 01477 #endif 01478 01479 #if !UCONFIG_NO_CONVERSION 01480 01506 inline int32_t extract(int32_t start, 01507 int32_t startLength, 01508 char *target, 01509 const char *codepage = 0) const; 01510 01540 int32_t extract(int32_t start, 01541 int32_t startLength, 01542 char *target, 01543 uint32_t targetLength, 01544 const char *codepage) const; 01545 01563 int32_t extract(char *dest, int32_t destCapacity, 01564 UConverter *cnv, 01565 UErrorCode &errorCode) const; 01566 01567 #endif 01568 01582 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01583 01594 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01595 01607 void toUTF8(ByteSink &sink) const; 01608 01609 #if U_HAVE_STD_STRING 01610 01623 template<typename StringClass> 01624 StringClass &toUTF8String(StringClass &result) const { 01625 StringByteSink<StringClass> sbs(&result); 01626 toUTF8(sbs); 01627 return result; 01628 } 01629 01630 #endif 01631 01647 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01648 01649 /* Length operations */ 01650 01659 inline int32_t length(void) const; 01660 01674 int32_t 01675 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01676 01700 UBool 01701 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01702 01708 inline UBool isEmpty(void) const; 01709 01719 inline int32_t getCapacity(void) const; 01720 01721 /* Other operations */ 01722 01728 inline int32_t hashCode(void) const; 01729 01742 inline UBool isBogus(void) const; 01743 01744 01745 //======================================== 01746 // Write operations 01747 //======================================== 01748 01749 /* Assignment operations */ 01750 01758 UnicodeString &operator=(const UnicodeString &srcText); 01759 01780 UnicodeString &fastCopyFrom(const UnicodeString &src); 01781 01789 inline UnicodeString& operator= (UChar ch); 01790 01798 inline UnicodeString& operator= (UChar32 ch); 01799 01811 inline UnicodeString& setTo(const UnicodeString& srcText, 01812 int32_t srcStart); 01813 01827 inline UnicodeString& setTo(const UnicodeString& srcText, 01828 int32_t srcStart, 01829 int32_t srcLength); 01830 01839 inline UnicodeString& setTo(const UnicodeString& srcText); 01840 01849 inline UnicodeString& setTo(const UChar *srcChars, 01850 int32_t srcLength); 01851 01860 UnicodeString& setTo(UChar srcChar); 01861 01870 UnicodeString& setTo(UChar32 srcChar); 01871 01892 UnicodeString &setTo(UBool isTerminated, 01893 const UChar *text, 01894 int32_t textLength); 01895 01915 UnicodeString &setTo(UChar *buffer, 01916 int32_t buffLength, 01917 int32_t buffCapacity); 01918 01959 void setToBogus(); 01960 01968 UnicodeString& setCharAt(int32_t offset, 01969 UChar ch); 01970 01971 01972 /* Append operations */ 01973 01981 inline UnicodeString& operator+= (UChar ch); 01982 01990 inline UnicodeString& operator+= (UChar32 ch); 01991 02000 inline UnicodeString& operator+= (const UnicodeString& srcText); 02001 02016 inline UnicodeString& append(const UnicodeString& srcText, 02017 int32_t srcStart, 02018 int32_t srcLength); 02019 02027 inline UnicodeString& append(const UnicodeString& srcText); 02028 02042 inline UnicodeString& append(const UChar *srcChars, 02043 int32_t srcStart, 02044 int32_t srcLength); 02045 02054 inline UnicodeString& append(const UChar *srcChars, 02055 int32_t srcLength); 02056 02063 inline UnicodeString& append(UChar srcChar); 02064 02071 inline UnicodeString& append(UChar32 srcChar); 02072 02073 02074 /* Insert operations */ 02075 02089 inline UnicodeString& insert(int32_t start, 02090 const UnicodeString& srcText, 02091 int32_t srcStart, 02092 int32_t srcLength); 02093 02102 inline UnicodeString& insert(int32_t start, 02103 const UnicodeString& srcText); 02104 02118 inline UnicodeString& insert(int32_t start, 02119 const UChar *srcChars, 02120 int32_t srcStart, 02121 int32_t srcLength); 02122 02132 inline UnicodeString& insert(int32_t start, 02133 const UChar *srcChars, 02134 int32_t srcLength); 02135 02144 inline UnicodeString& insert(int32_t start, 02145 UChar srcChar); 02146 02155 inline UnicodeString& insert(int32_t start, 02156 UChar32 srcChar); 02157 02158 02159 /* Replace operations */ 02160 02178 UnicodeString& replace(int32_t start, 02179 int32_t length, 02180 const UnicodeString& srcText, 02181 int32_t srcStart, 02182 int32_t srcLength); 02183 02196 UnicodeString& replace(int32_t start, 02197 int32_t length, 02198 const UnicodeString& srcText); 02199 02217 UnicodeString& replace(int32_t start, 02218 int32_t length, 02219 const UChar *srcChars, 02220 int32_t srcStart, 02221 int32_t srcLength); 02222 02235 inline UnicodeString& replace(int32_t start, 02236 int32_t length, 02237 const UChar *srcChars, 02238 int32_t srcLength); 02239 02251 inline UnicodeString& replace(int32_t start, 02252 int32_t length, 02253 UChar srcChar); 02254 02266 inline UnicodeString& replace(int32_t start, 02267 int32_t length, 02268 UChar32 srcChar); 02269 02279 inline UnicodeString& replaceBetween(int32_t start, 02280 int32_t limit, 02281 const UnicodeString& srcText); 02282 02297 inline UnicodeString& replaceBetween(int32_t start, 02298 int32_t limit, 02299 const UnicodeString& srcText, 02300 int32_t srcStart, 02301 int32_t srcLimit); 02302 02313 virtual void handleReplaceBetween(int32_t start, 02314 int32_t limit, 02315 const UnicodeString& text); 02316 02322 virtual UBool hasMetaData() const; 02323 02339 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02340 02341 /* Search and replace operations */ 02342 02351 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02352 const UnicodeString& newText); 02353 02365 inline UnicodeString& findAndReplace(int32_t start, 02366 int32_t length, 02367 const UnicodeString& oldText, 02368 const UnicodeString& newText); 02369 02387 UnicodeString& findAndReplace(int32_t start, 02388 int32_t length, 02389 const UnicodeString& oldText, 02390 int32_t oldStart, 02391 int32_t oldLength, 02392 const UnicodeString& newText, 02393 int32_t newStart, 02394 int32_t newLength); 02395 02396 02397 /* Remove operations */ 02398 02404 inline UnicodeString& remove(void); 02405 02414 inline UnicodeString& remove(int32_t start, 02415 int32_t length = (int32_t)INT32_MAX); 02416 02425 inline UnicodeString& removeBetween(int32_t start, 02426 int32_t limit = (int32_t)INT32_MAX); 02427 02437 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02438 02439 /* Length operations */ 02440 02452 UBool padLeading(int32_t targetLength, 02453 UChar padChar = 0x0020); 02454 02466 UBool padTrailing(int32_t targetLength, 02467 UChar padChar = 0x0020); 02468 02475 inline UBool truncate(int32_t targetLength); 02476 02482 UnicodeString& trim(void); 02483 02484 02485 /* Miscellaneous operations */ 02486 02492 inline UnicodeString& reverse(void); 02493 02502 inline UnicodeString& reverse(int32_t start, 02503 int32_t length); 02504 02511 UnicodeString& toUpper(void); 02512 02520 UnicodeString& toUpper(const Locale& locale); 02521 02528 UnicodeString& toLower(void); 02529 02537 UnicodeString& toLower(const Locale& locale); 02538 02539 #if !UCONFIG_NO_BREAK_ITERATION 02540 02567 UnicodeString &toTitle(BreakIterator *titleIter); 02568 02596 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02597 02629 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02630 02631 #endif 02632 02644 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02645 02646 //======================================== 02647 // Access to the internal buffer 02648 //======================================== 02649 02693 UChar *getBuffer(int32_t minCapacity); 02694 02715 void releaseBuffer(int32_t newLength=-1); 02716 02747 inline const UChar *getBuffer() const; 02748 02782 inline const UChar *getTerminatedBuffer(); 02783 02784 //======================================== 02785 // Constructors 02786 //======================================== 02787 02791 UnicodeString(); 02792 02804 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02805 02811 UnicodeString(UChar ch); 02812 02818 UnicodeString(UChar32 ch); 02819 02826 UnicodeString(const UChar *text); 02827 02835 UnicodeString(const UChar *text, 02836 int32_t textLength); 02837 02857 UnicodeString(UBool isTerminated, 02858 const UChar *text, 02859 int32_t textLength); 02860 02879 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02880 02881 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02882 02889 UnicodeString(const char *codepageData); 02890 02897 UnicodeString(const char *codepageData, int32_t dataLength); 02898 02899 #endif 02900 02901 #if !UCONFIG_NO_CONVERSION 02902 02920 UnicodeString(const char *codepageData, const char *codepage); 02921 02939 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 02940 02962 UnicodeString( 02963 const char *src, int32_t srcLength, 02964 UConverter *cnv, 02965 UErrorCode &errorCode); 02966 02967 #endif 02968 02993 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 02994 02995 03001 UnicodeString(const UnicodeString& that); 03002 03009 UnicodeString(const UnicodeString& src, int32_t srcStart); 03010 03018 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03019 03036 virtual Replaceable *clone() const; 03037 03041 virtual ~UnicodeString(); 03042 03056 static UnicodeString fromUTF8(const StringPiece &utf8); 03057 03069 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03070 03071 /* Miscellaneous operations */ 03072 03107 UnicodeString unescape() const; 03108 03128 UChar32 unescapeAt(int32_t &offset) const; 03129 03135 static UClassID U_EXPORT2 getStaticClassID(); 03136 03142 virtual UClassID getDynamicClassID() const; 03143 03144 //======================================== 03145 // Implementation methods 03146 //======================================== 03147 03148 protected: 03153 virtual int32_t getLength() const; 03154 03160 virtual UChar getCharAt(int32_t offset) const; 03161 03167 virtual UChar32 getChar32At(int32_t offset) const; 03168 03169 private: 03170 // For char* constructors. Could be made public. 03171 UnicodeString &setToUTF8(const StringPiece &utf8); 03172 // For extract(char*). 03173 // We could make a toUTF8(target, capacity, errorCode) public but not 03174 // this version: New API will be cleaner if we make callers create substrings 03175 // rather than having start+length on every method, 03176 // and it should take a UErrorCode&. 03177 int32_t 03178 toUTF8(int32_t start, int32_t len, 03179 char *target, int32_t capacity) const; 03180 03181 03182 inline int8_t 03183 doCompare(int32_t start, 03184 int32_t length, 03185 const UnicodeString& srcText, 03186 int32_t srcStart, 03187 int32_t srcLength) const; 03188 03189 int8_t doCompare(int32_t start, 03190 int32_t length, 03191 const UChar *srcChars, 03192 int32_t srcStart, 03193 int32_t srcLength) const; 03194 03195 inline int8_t 03196 doCompareCodePointOrder(int32_t start, 03197 int32_t length, 03198 const UnicodeString& srcText, 03199 int32_t srcStart, 03200 int32_t srcLength) const; 03201 03202 int8_t doCompareCodePointOrder(int32_t start, 03203 int32_t length, 03204 const UChar *srcChars, 03205 int32_t srcStart, 03206 int32_t srcLength) const; 03207 03208 inline int8_t 03209 doCaseCompare(int32_t start, 03210 int32_t length, 03211 const UnicodeString &srcText, 03212 int32_t srcStart, 03213 int32_t srcLength, 03214 uint32_t options) const; 03215 03216 int8_t 03217 doCaseCompare(int32_t start, 03218 int32_t length, 03219 const UChar *srcChars, 03220 int32_t srcStart, 03221 int32_t srcLength, 03222 uint32_t options) const; 03223 03224 int32_t doIndexOf(UChar c, 03225 int32_t start, 03226 int32_t length) const; 03227 03228 int32_t doIndexOf(UChar32 c, 03229 int32_t start, 03230 int32_t length) const; 03231 03232 int32_t doLastIndexOf(UChar c, 03233 int32_t start, 03234 int32_t length) const; 03235 03236 int32_t doLastIndexOf(UChar32 c, 03237 int32_t start, 03238 int32_t length) const; 03239 03240 void doExtract(int32_t start, 03241 int32_t length, 03242 UChar *dst, 03243 int32_t dstStart) const; 03244 03245 inline void doExtract(int32_t start, 03246 int32_t length, 03247 UnicodeString& target) const; 03248 03249 inline UChar doCharAt(int32_t offset) const; 03250 03251 UnicodeString& doReplace(int32_t start, 03252 int32_t length, 03253 const UnicodeString& srcText, 03254 int32_t srcStart, 03255 int32_t srcLength); 03256 03257 UnicodeString& doReplace(int32_t start, 03258 int32_t length, 03259 const UChar *srcChars, 03260 int32_t srcStart, 03261 int32_t srcLength); 03262 03263 UnicodeString& doReverse(int32_t start, 03264 int32_t length); 03265 03266 // calculate hash code 03267 int32_t doHashCode(void) const; 03268 03269 // get pointer to start of array 03270 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03271 inline UChar* getArrayStart(void); 03272 inline const UChar* getArrayStart(void) const; 03273 03274 // A UnicodeString object (not necessarily its current buffer) 03275 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03276 inline UBool isWritable() const; 03277 03278 // Is the current buffer writable? 03279 inline UBool isBufferWritable() const; 03280 03281 // None of the following does releaseArray(). 03282 inline void setLength(int32_t len); // sets only fShortLength and fLength 03283 inline void setToEmpty(); // sets fFlags=kShortString 03284 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03285 03286 // allocate the array; result may be fStackBuffer 03287 // sets refCount to 1 if appropriate 03288 // sets fArray, fCapacity, and fFlags 03289 // returns boolean for success or failure 03290 UBool allocate(int32_t capacity); 03291 03292 // release the array if owned 03293 void releaseArray(void); 03294 03295 // turn a bogus string into an empty one 03296 void unBogus(); 03297 03298 // implements assigment operator, copy constructor, and fastCopyFrom() 03299 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03300 03301 // Pin start and limit to acceptable values. 03302 inline void pinIndex(int32_t& start) const; 03303 inline void pinIndices(int32_t& start, 03304 int32_t& length) const; 03305 03306 #if !UCONFIG_NO_CONVERSION 03307 03308 /* Internal extract() using UConverter. */ 03309 int32_t doExtract(int32_t start, int32_t length, 03310 char *dest, int32_t destCapacity, 03311 UConverter *cnv, 03312 UErrorCode &errorCode) const; 03313 03314 /* 03315 * Real constructor for converting from codepage data. 03316 * It assumes that it is called with !fRefCounted. 03317 * 03318 * If <code>codepage==0</code>, then the default converter 03319 * is used for the platform encoding. 03320 * If <code>codepage</code> is an empty string (<code>""</code>), 03321 * then a simple conversion is performed on the codepage-invariant 03322 * subset ("invariant characters") of the platform encoding. See utypes.h. 03323 */ 03324 void doCodepageCreate(const char *codepageData, 03325 int32_t dataLength, 03326 const char *codepage); 03327 03328 /* 03329 * Worker function for creating a UnicodeString from 03330 * a codepage string using a UConverter. 03331 */ 03332 void 03333 doCodepageCreate(const char *codepageData, 03334 int32_t dataLength, 03335 UConverter *converter, 03336 UErrorCode &status); 03337 03338 #endif 03339 03340 /* 03341 * This function is called when write access to the array 03342 * is necessary. 03343 * 03344 * We need to make a copy of the array if 03345 * the buffer is read-only, or 03346 * the buffer is refCounted (shared), and refCount>1, or 03347 * the buffer is too small. 03348 * 03349 * Return FALSE if memory could not be allocated. 03350 */ 03351 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03352 int32_t growCapacity = -1, 03353 UBool doCopyArray = TRUE, 03354 int32_t **pBufferToDelete = 0, 03355 UBool forceClone = FALSE); 03356 03357 // common function for case mappings 03358 UnicodeString & 03359 caseMap(BreakIterator *titleIter, 03360 const char *locale, 03361 uint32_t options, 03362 int32_t toWhichCase); 03363 03364 // ref counting 03365 void addRef(void); 03366 int32_t removeRef(void); 03367 int32_t refCount(void) const; 03368 03369 // constants 03370 enum { 03371 // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer): 03372 // 32-bit pointers: 4+1+1+13*2 = 32 bytes 03373 // 64-bit pointers: 8+1+1+15*2 = 40 bytes 03374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings 03375 kInvalidUChar=0xffff, // invalid UChar index 03376 kGrowSize=128, // grow size for this buffer 03377 kInvalidHashCode=0, // invalid hash code 03378 kEmptyHashCode=1, // hash code for empty string 03379 03380 // bit flag values for fFlags 03381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03382 kUsingStackBuffer=2,// fArray==fStackBuffer 03383 kRefCounted=4, // there is a refCount field before the characters in fArray 03384 kBufferIsReadonly=8,// do not write to this buffer 03385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03386 // and releaseBuffer(newLength) must be called 03387 03388 // combined values for convenience 03389 kShortString=kUsingStackBuffer, 03390 kLongString=kRefCounted, 03391 kReadonlyAlias=kBufferIsReadonly, 03392 kWritableAlias=0 03393 }; 03394 03395 friend class StringThreadTest; 03396 03397 union StackBufferOrFields; // forward declaration necessary before friend declaration 03398 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03399 03400 /* 03401 * The following are all the class fields that are stored 03402 * in each UnicodeString object. 03403 * Note that UnicodeString has virtual functions, 03404 * therefore there is an implicit vtable pointer 03405 * as the first real field. 03406 * The fields should be aligned such that no padding is 03407 * necessary, mostly by having larger types first. 03408 * On 32-bit machines, the size should be 32 bytes, 03409 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03410 */ 03411 // (implicit) *vtable; 03412 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03413 uint8_t fFlags; // bit flags: see constants above 03414 union StackBufferOrFields { 03415 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03416 // else fFields is used 03417 UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings 03418 struct { 03419 uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b) 03420 int32_t fLength; // number of characters in fArray if >127; else undefined 03421 UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b)) 03422 int32_t fCapacity; // sizeof fArray 03423 } fFields; 03424 } fUnion; 03425 }; 03426 03435 U_COMMON_API UnicodeString U_EXPORT2 03436 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03437 03438 //======================================== 03439 // Inline members 03440 //======================================== 03441 03442 //======================================== 03443 // Privates 03444 //======================================== 03445 03446 inline void 03447 UnicodeString::pinIndex(int32_t& start) const 03448 { 03449 // pin index 03450 if(start < 0) { 03451 start = 0; 03452 } else if(start > length()) { 03453 start = length(); 03454 } 03455 } 03456 03457 inline void 03458 UnicodeString::pinIndices(int32_t& start, 03459 int32_t& _length) const 03460 { 03461 // pin indices 03462 int32_t len = length(); 03463 if(start < 0) { 03464 start = 0; 03465 } else if(start > len) { 03466 start = len; 03467 } 03468 if(_length < 0) { 03469 _length = 0; 03470 } else if(_length > (len - start)) { 03471 _length = (len - start); 03472 } 03473 } 03474 03475 inline UChar* 03476 UnicodeString::getArrayStart() 03477 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03478 03479 inline const UChar* 03480 UnicodeString::getArrayStart() const 03481 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03482 03483 //======================================== 03484 // Read-only implementation methods 03485 //======================================== 03486 inline int32_t 03487 UnicodeString::length() const 03488 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03489 03490 inline int32_t 03491 UnicodeString::getCapacity() const 03492 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03493 03494 inline int32_t 03495 UnicodeString::hashCode() const 03496 { return doHashCode(); } 03497 03498 inline UBool 03499 UnicodeString::isBogus() const 03500 { return (UBool)(fFlags & kIsBogus); } 03501 03502 inline UBool 03503 UnicodeString::isWritable() const 03504 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03505 03506 inline UBool 03507 UnicodeString::isBufferWritable() const 03508 { 03509 return (UBool)( 03510 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03511 (!(fFlags&kRefCounted) || refCount()==1)); 03512 } 03513 03514 inline const UChar * 03515 UnicodeString::getBuffer() const { 03516 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03517 return 0; 03518 } else if(fFlags&kUsingStackBuffer) { 03519 return fUnion.fStackBuffer; 03520 } else { 03521 return fUnion.fFields.fArray; 03522 } 03523 } 03524 03525 //======================================== 03526 // Read-only alias methods 03527 //======================================== 03528 inline int8_t 03529 UnicodeString::doCompare(int32_t start, 03530 int32_t thisLength, 03531 const UnicodeString& srcText, 03532 int32_t srcStart, 03533 int32_t srcLength) const 03534 { 03535 if(srcText.isBogus()) { 03536 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03537 } else { 03538 srcText.pinIndices(srcStart, srcLength); 03539 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03540 } 03541 } 03542 03543 inline UBool 03544 UnicodeString::operator== (const UnicodeString& text) const 03545 { 03546 if(isBogus()) { 03547 return text.isBogus(); 03548 } else { 03549 int32_t len = length(), textLength = text.length(); 03550 return 03551 !text.isBogus() && 03552 len == textLength && 03553 doCompare(0, len, text, 0, textLength) == 0; 03554 } 03555 } 03556 03557 inline UBool 03558 UnicodeString::operator!= (const UnicodeString& text) const 03559 { return (! operator==(text)); } 03560 03561 inline UBool 03562 UnicodeString::operator> (const UnicodeString& text) const 03563 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03564 03565 inline UBool 03566 UnicodeString::operator< (const UnicodeString& text) const 03567 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03568 03569 inline UBool 03570 UnicodeString::operator>= (const UnicodeString& text) const 03571 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03572 03573 inline UBool 03574 UnicodeString::operator<= (const UnicodeString& text) const 03575 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03576 03577 inline int8_t 03578 UnicodeString::compare(const UnicodeString& text) const 03579 { return doCompare(0, length(), text, 0, text.length()); } 03580 03581 inline int8_t 03582 UnicodeString::compare(int32_t start, 03583 int32_t _length, 03584 const UnicodeString& srcText) const 03585 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03586 03587 inline int8_t 03588 UnicodeString::compare(const UChar *srcChars, 03589 int32_t srcLength) const 03590 { return doCompare(0, length(), srcChars, 0, srcLength); } 03591 03592 inline int8_t 03593 UnicodeString::compare(int32_t start, 03594 int32_t _length, 03595 const UnicodeString& srcText, 03596 int32_t srcStart, 03597 int32_t srcLength) const 03598 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03599 03600 inline int8_t 03601 UnicodeString::compare(int32_t start, 03602 int32_t _length, 03603 const UChar *srcChars) const 03604 { return doCompare(start, _length, srcChars, 0, _length); } 03605 03606 inline int8_t 03607 UnicodeString::compare(int32_t start, 03608 int32_t _length, 03609 const UChar *srcChars, 03610 int32_t srcStart, 03611 int32_t srcLength) const 03612 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03613 03614 inline int8_t 03615 UnicodeString::compareBetween(int32_t start, 03616 int32_t limit, 03617 const UnicodeString& srcText, 03618 int32_t srcStart, 03619 int32_t srcLimit) const 03620 { return doCompare(start, limit - start, 03621 srcText, srcStart, srcLimit - srcStart); } 03622 03623 inline int8_t 03624 UnicodeString::doCompareCodePointOrder(int32_t start, 03625 int32_t thisLength, 03626 const UnicodeString& srcText, 03627 int32_t srcStart, 03628 int32_t srcLength) const 03629 { 03630 if(srcText.isBogus()) { 03631 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03632 } else { 03633 srcText.pinIndices(srcStart, srcLength); 03634 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03635 } 03636 } 03637 03638 inline int8_t 03639 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03640 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03641 03642 inline int8_t 03643 UnicodeString::compareCodePointOrder(int32_t start, 03644 int32_t _length, 03645 const UnicodeString& srcText) const 03646 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03647 03648 inline int8_t 03649 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03650 int32_t srcLength) const 03651 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03652 03653 inline int8_t 03654 UnicodeString::compareCodePointOrder(int32_t start, 03655 int32_t _length, 03656 const UnicodeString& srcText, 03657 int32_t srcStart, 03658 int32_t srcLength) const 03659 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03660 03661 inline int8_t 03662 UnicodeString::compareCodePointOrder(int32_t start, 03663 int32_t _length, 03664 const UChar *srcChars) const 03665 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03666 03667 inline int8_t 03668 UnicodeString::compareCodePointOrder(int32_t start, 03669 int32_t _length, 03670 const UChar *srcChars, 03671 int32_t srcStart, 03672 int32_t srcLength) const 03673 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03674 03675 inline int8_t 03676 UnicodeString::compareCodePointOrderBetween(int32_t start, 03677 int32_t limit, 03678 const UnicodeString& srcText, 03679 int32_t srcStart, 03680 int32_t srcLimit) const 03681 { return doCompareCodePointOrder(start, limit - start, 03682 srcText, srcStart, srcLimit - srcStart); } 03683 03684 inline int8_t 03685 UnicodeString::doCaseCompare(int32_t start, 03686 int32_t thisLength, 03687 const UnicodeString &srcText, 03688 int32_t srcStart, 03689 int32_t srcLength, 03690 uint32_t options) const 03691 { 03692 if(srcText.isBogus()) { 03693 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03694 } else { 03695 srcText.pinIndices(srcStart, srcLength); 03696 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03697 } 03698 } 03699 03700 inline int8_t 03701 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03702 return doCaseCompare(0, length(), text, 0, text.length(), options); 03703 } 03704 03705 inline int8_t 03706 UnicodeString::caseCompare(int32_t start, 03707 int32_t _length, 03708 const UnicodeString &srcText, 03709 uint32_t options) const { 03710 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03711 } 03712 03713 inline int8_t 03714 UnicodeString::caseCompare(const UChar *srcChars, 03715 int32_t srcLength, 03716 uint32_t options) const { 03717 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03718 } 03719 03720 inline int8_t 03721 UnicodeString::caseCompare(int32_t start, 03722 int32_t _length, 03723 const UnicodeString &srcText, 03724 int32_t srcStart, 03725 int32_t srcLength, 03726 uint32_t options) const { 03727 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03728 } 03729 03730 inline int8_t 03731 UnicodeString::caseCompare(int32_t start, 03732 int32_t _length, 03733 const UChar *srcChars, 03734 uint32_t options) const { 03735 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03736 } 03737 03738 inline int8_t 03739 UnicodeString::caseCompare(int32_t start, 03740 int32_t _length, 03741 const UChar *srcChars, 03742 int32_t srcStart, 03743 int32_t srcLength, 03744 uint32_t options) const { 03745 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03746 } 03747 03748 inline int8_t 03749 UnicodeString::caseCompareBetween(int32_t start, 03750 int32_t limit, 03751 const UnicodeString &srcText, 03752 int32_t srcStart, 03753 int32_t srcLimit, 03754 uint32_t options) const { 03755 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03756 } 03757 03758 inline int32_t 03759 UnicodeString::indexOf(const UnicodeString& srcText, 03760 int32_t srcStart, 03761 int32_t srcLength, 03762 int32_t start, 03763 int32_t _length) const 03764 { 03765 if(!srcText.isBogus()) { 03766 srcText.pinIndices(srcStart, srcLength); 03767 if(srcLength > 0) { 03768 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03769 } 03770 } 03771 return -1; 03772 } 03773 03774 inline int32_t 03775 UnicodeString::indexOf(const UnicodeString& text) const 03776 { return indexOf(text, 0, text.length(), 0, length()); } 03777 03778 inline int32_t 03779 UnicodeString::indexOf(const UnicodeString& text, 03780 int32_t start) const { 03781 pinIndex(start); 03782 return indexOf(text, 0, text.length(), start, length() - start); 03783 } 03784 03785 inline int32_t 03786 UnicodeString::indexOf(const UnicodeString& text, 03787 int32_t start, 03788 int32_t _length) const 03789 { return indexOf(text, 0, text.length(), start, _length); } 03790 03791 inline int32_t 03792 UnicodeString::indexOf(const UChar *srcChars, 03793 int32_t srcLength, 03794 int32_t start) const { 03795 pinIndex(start); 03796 return indexOf(srcChars, 0, srcLength, start, length() - start); 03797 } 03798 03799 inline int32_t 03800 UnicodeString::indexOf(const UChar *srcChars, 03801 int32_t srcLength, 03802 int32_t start, 03803 int32_t _length) const 03804 { return indexOf(srcChars, 0, srcLength, start, _length); } 03805 03806 inline int32_t 03807 UnicodeString::indexOf(UChar c, 03808 int32_t start, 03809 int32_t _length) const 03810 { return doIndexOf(c, start, _length); } 03811 03812 inline int32_t 03813 UnicodeString::indexOf(UChar32 c, 03814 int32_t start, 03815 int32_t _length) const 03816 { return doIndexOf(c, start, _length); } 03817 03818 inline int32_t 03819 UnicodeString::indexOf(UChar c) const 03820 { return doIndexOf(c, 0, length()); } 03821 03822 inline int32_t 03823 UnicodeString::indexOf(UChar32 c) const 03824 { return indexOf(c, 0, length()); } 03825 03826 inline int32_t 03827 UnicodeString::indexOf(UChar c, 03828 int32_t start) const { 03829 pinIndex(start); 03830 return doIndexOf(c, start, length() - start); 03831 } 03832 03833 inline int32_t 03834 UnicodeString::indexOf(UChar32 c, 03835 int32_t start) const { 03836 pinIndex(start); 03837 return indexOf(c, start, length() - start); 03838 } 03839 03840 inline int32_t 03841 UnicodeString::lastIndexOf(const UChar *srcChars, 03842 int32_t srcLength, 03843 int32_t start, 03844 int32_t _length) const 03845 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03846 03847 inline int32_t 03848 UnicodeString::lastIndexOf(const UChar *srcChars, 03849 int32_t srcLength, 03850 int32_t start) const { 03851 pinIndex(start); 03852 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03853 } 03854 03855 inline int32_t 03856 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03857 int32_t srcStart, 03858 int32_t srcLength, 03859 int32_t start, 03860 int32_t _length) const 03861 { 03862 if(!srcText.isBogus()) { 03863 srcText.pinIndices(srcStart, srcLength); 03864 if(srcLength > 0) { 03865 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03866 } 03867 } 03868 return -1; 03869 } 03870 03871 inline int32_t 03872 UnicodeString::lastIndexOf(const UnicodeString& text, 03873 int32_t start, 03874 int32_t _length) const 03875 { return lastIndexOf(text, 0, text.length(), start, _length); } 03876 03877 inline int32_t 03878 UnicodeString::lastIndexOf(const UnicodeString& text, 03879 int32_t start) const { 03880 pinIndex(start); 03881 return lastIndexOf(text, 0, text.length(), start, length() - start); 03882 } 03883 03884 inline int32_t 03885 UnicodeString::lastIndexOf(const UnicodeString& text) const 03886 { return lastIndexOf(text, 0, text.length(), 0, length()); } 03887 03888 inline int32_t 03889 UnicodeString::lastIndexOf(UChar c, 03890 int32_t start, 03891 int32_t _length) const 03892 { return doLastIndexOf(c, start, _length); } 03893 03894 inline int32_t 03895 UnicodeString::lastIndexOf(UChar32 c, 03896 int32_t start, 03897 int32_t _length) const { 03898 return doLastIndexOf(c, start, _length); 03899 } 03900 03901 inline int32_t 03902 UnicodeString::lastIndexOf(UChar c) const 03903 { return doLastIndexOf(c, 0, length()); } 03904 03905 inline int32_t 03906 UnicodeString::lastIndexOf(UChar32 c) const { 03907 return lastIndexOf(c, 0, length()); 03908 } 03909 03910 inline int32_t 03911 UnicodeString::lastIndexOf(UChar c, 03912 int32_t start) const { 03913 pinIndex(start); 03914 return doLastIndexOf(c, start, length() - start); 03915 } 03916 03917 inline int32_t 03918 UnicodeString::lastIndexOf(UChar32 c, 03919 int32_t start) const { 03920 pinIndex(start); 03921 return lastIndexOf(c, start, length() - start); 03922 } 03923 03924 inline UBool 03925 UnicodeString::startsWith(const UnicodeString& text) const 03926 { return compare(0, text.length(), text, 0, text.length()) == 0; } 03927 03928 inline UBool 03929 UnicodeString::startsWith(const UnicodeString& srcText, 03930 int32_t srcStart, 03931 int32_t srcLength) const 03932 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 03933 03934 inline UBool 03935 UnicodeString::startsWith(const UChar *srcChars, 03936 int32_t srcLength) const 03937 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } 03938 03939 inline UBool 03940 UnicodeString::startsWith(const UChar *srcChars, 03941 int32_t srcStart, 03942 int32_t srcLength) const 03943 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;} 03944 03945 inline UBool 03946 UnicodeString::endsWith(const UnicodeString& text) const 03947 { return doCompare(length() - text.length(), text.length(), 03948 text, 0, text.length()) == 0; } 03949 03950 inline UBool 03951 UnicodeString::endsWith(const UnicodeString& srcText, 03952 int32_t srcStart, 03953 int32_t srcLength) const { 03954 srcText.pinIndices(srcStart, srcLength); 03955 return doCompare(length() - srcLength, srcLength, 03956 srcText, srcStart, srcLength) == 0; 03957 } 03958 03959 inline UBool 03960 UnicodeString::endsWith(const UChar *srcChars, 03961 int32_t srcLength) const { 03962 if(srcLength < 0) { 03963 srcLength = u_strlen(srcChars); 03964 } 03965 return doCompare(length() - srcLength, srcLength, 03966 srcChars, 0, srcLength) == 0; 03967 } 03968 03969 inline UBool 03970 UnicodeString::endsWith(const UChar *srcChars, 03971 int32_t srcStart, 03972 int32_t srcLength) const { 03973 if(srcLength < 0) { 03974 srcLength = u_strlen(srcChars + srcStart); 03975 } 03976 return doCompare(length() - srcLength, srcLength, 03977 srcChars, srcStart, srcLength) == 0; 03978 } 03979 03980 //======================================== 03981 // replace 03982 //======================================== 03983 inline UnicodeString& 03984 UnicodeString::replace(int32_t start, 03985 int32_t _length, 03986 const UnicodeString& srcText) 03987 { return doReplace(start, _length, srcText, 0, srcText.length()); } 03988 03989 inline UnicodeString& 03990 UnicodeString::replace(int32_t start, 03991 int32_t _length, 03992 const UnicodeString& srcText, 03993 int32_t srcStart, 03994 int32_t srcLength) 03995 { return doReplace(start, _length, srcText, srcStart, srcLength); } 03996 03997 inline UnicodeString& 03998 UnicodeString::replace(int32_t start, 03999 int32_t _length, 04000 const UChar *srcChars, 04001 int32_t srcLength) 04002 { return doReplace(start, _length, srcChars, 0, srcLength); } 04003 04004 inline UnicodeString& 04005 UnicodeString::replace(int32_t start, 04006 int32_t _length, 04007 const UChar *srcChars, 04008 int32_t srcStart, 04009 int32_t srcLength) 04010 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04011 04012 inline UnicodeString& 04013 UnicodeString::replace(int32_t start, 04014 int32_t _length, 04015 UChar srcChar) 04016 { return doReplace(start, _length, &srcChar, 0, 1); } 04017 04018 inline UnicodeString& 04019 UnicodeString::replace(int32_t start, 04020 int32_t _length, 04021 UChar32 srcChar) { 04022 UChar buffer[U16_MAX_LENGTH]; 04023 int32_t count = 0; 04024 UBool isError = FALSE; 04025 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 04026 return doReplace(start, _length, buffer, 0, count); 04027 } 04028 04029 inline UnicodeString& 04030 UnicodeString::replaceBetween(int32_t start, 04031 int32_t limit, 04032 const UnicodeString& srcText) 04033 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04034 04035 inline UnicodeString& 04036 UnicodeString::replaceBetween(int32_t start, 04037 int32_t limit, 04038 const UnicodeString& srcText, 04039 int32_t srcStart, 04040 int32_t srcLimit) 04041 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04042 04043 inline UnicodeString& 04044 UnicodeString::findAndReplace(const UnicodeString& oldText, 04045 const UnicodeString& newText) 04046 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04047 newText, 0, newText.length()); } 04048 04049 inline UnicodeString& 04050 UnicodeString::findAndReplace(int32_t start, 04051 int32_t _length, 04052 const UnicodeString& oldText, 04053 const UnicodeString& newText) 04054 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04055 newText, 0, newText.length()); } 04056 04057 // ============================ 04058 // extract 04059 // ============================ 04060 inline void 04061 UnicodeString::doExtract(int32_t start, 04062 int32_t _length, 04063 UnicodeString& target) const 04064 { target.replace(0, target.length(), *this, start, _length); } 04065 04066 inline void 04067 UnicodeString::extract(int32_t start, 04068 int32_t _length, 04069 UChar *target, 04070 int32_t targetStart) const 04071 { doExtract(start, _length, target, targetStart); } 04072 04073 inline void 04074 UnicodeString::extract(int32_t start, 04075 int32_t _length, 04076 UnicodeString& target) const 04077 { doExtract(start, _length, target); } 04078 04079 #if !UCONFIG_NO_CONVERSION 04080 04081 inline int32_t 04082 UnicodeString::extract(int32_t start, 04083 int32_t _length, 04084 char *dst, 04085 const char *codepage) const 04086 04087 { 04088 // This dstSize value will be checked explicitly 04089 #if defined(__GNUC__) 04090 // Ticket #7039: Clip length to the maximum valid length to the end of addressable memory given the starting address 04091 // This is only an issue when using GCC and certain optimizations are turned on. 04092 return extract(start, _length, dst, dst!=0 ? ((dst >= (char*)((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage); 04093 #else 04094 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04095 #endif 04096 } 04097 04098 #endif 04099 04100 inline void 04101 UnicodeString::extractBetween(int32_t start, 04102 int32_t limit, 04103 UChar *dst, 04104 int32_t dstStart) const { 04105 pinIndex(start); 04106 pinIndex(limit); 04107 doExtract(start, limit - start, dst, dstStart); 04108 } 04109 04110 inline UnicodeString 04111 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04112 return tempSubString(start, limit - start); 04113 } 04114 04115 inline UChar 04116 UnicodeString::doCharAt(int32_t offset) const 04117 { 04118 if((uint32_t)offset < (uint32_t)length()) { 04119 return getArrayStart()[offset]; 04120 } else { 04121 return kInvalidUChar; 04122 } 04123 } 04124 04125 inline UChar 04126 UnicodeString::charAt(int32_t offset) const 04127 { return doCharAt(offset); } 04128 04129 inline UChar 04130 UnicodeString::operator[] (int32_t offset) const 04131 { return doCharAt(offset); } 04132 04133 inline UChar32 04134 UnicodeString::char32At(int32_t offset) const 04135 { 04136 int32_t len = length(); 04137 if((uint32_t)offset < (uint32_t)len) { 04138 const UChar *array = getArrayStart(); 04139 UChar32 c; 04140 U16_GET(array, 0, offset, len, c); 04141 return c; 04142 } else { 04143 return kInvalidUChar; 04144 } 04145 } 04146 04147 inline int32_t 04148 UnicodeString::getChar32Start(int32_t offset) const { 04149 if((uint32_t)offset < (uint32_t)length()) { 04150 const UChar *array = getArrayStart(); 04151 U16_SET_CP_START(array, 0, offset); 04152 return offset; 04153 } else { 04154 return 0; 04155 } 04156 } 04157 04158 inline int32_t 04159 UnicodeString::getChar32Limit(int32_t offset) const { 04160 int32_t len = length(); 04161 if((uint32_t)offset < (uint32_t)len) { 04162 const UChar *array = getArrayStart(); 04163 U16_SET_CP_LIMIT(array, 0, offset, len); 04164 return offset; 04165 } else { 04166 return len; 04167 } 04168 } 04169 04170 inline UBool 04171 UnicodeString::isEmpty() const { 04172 return fShortLength == 0; 04173 } 04174 04175 //======================================== 04176 // Write implementation methods 04177 //======================================== 04178 inline void 04179 UnicodeString::setLength(int32_t len) { 04180 if(len <= 127) { 04181 fShortLength = (int8_t)len; 04182 } else { 04183 fShortLength = (int8_t)-1; 04184 fUnion.fFields.fLength = len; 04185 } 04186 } 04187 04188 inline void 04189 UnicodeString::setToEmpty() { 04190 fShortLength = 0; 04191 fFlags = kShortString; 04192 } 04193 04194 inline void 04195 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04196 setLength(len); 04197 fUnion.fFields.fArray = array; 04198 fUnion.fFields.fCapacity = capacity; 04199 } 04200 04201 inline const UChar * 04202 UnicodeString::getTerminatedBuffer() { 04203 if(!isWritable()) { 04204 return 0; 04205 } else { 04206 UChar *array = getArrayStart(); 04207 int32_t len = length(); 04208 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04209 /* 04210 * kRefCounted: Do not write the NUL if the buffer is shared. 04211 * That is mostly safe, except when the length of one copy was modified 04212 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04213 * Then the NUL would be written into the middle of another copy's string. 04214 */ 04215 if(!(fFlags&kBufferIsReadonly)) { 04216 /* 04217 * We must not write to a readonly buffer, but it is known to be 04218 * NUL-terminated if len<capacity. 04219 * A shared, allocated buffer (refCount()>1) must not have its contents 04220 * modified, but the NUL at [len] is beyond the string contents, 04221 * and multiple string objects and threads writing the same NUL into the 04222 * same location is harmless. 04223 * In all other cases, the buffer is fully writable and it is anyway safe 04224 * to write the NUL. 04225 * 04226 * Note: An earlier version of this code tested whether there is a NUL 04227 * at [len] already, but, while safe, it generated lots of warnings from 04228 * tools like valgrind and Purify. 04229 */ 04230 array[len] = 0; 04231 } 04232 return array; 04233 } else if(cloneArrayIfNeeded(len+1)) { 04234 array = getArrayStart(); 04235 array[len] = 0; 04236 return array; 04237 } else { 04238 return 0; 04239 } 04240 } 04241 } 04242 04243 inline UnicodeString& 04244 UnicodeString::operator= (UChar ch) 04245 { return doReplace(0, length(), &ch, 0, 1); } 04246 04247 inline UnicodeString& 04248 UnicodeString::operator= (UChar32 ch) 04249 { return replace(0, length(), ch); } 04250 04251 inline UnicodeString& 04252 UnicodeString::setTo(const UnicodeString& srcText, 04253 int32_t srcStart, 04254 int32_t srcLength) 04255 { 04256 unBogus(); 04257 return doReplace(0, length(), srcText, srcStart, srcLength); 04258 } 04259 04260 inline UnicodeString& 04261 UnicodeString::setTo(const UnicodeString& srcText, 04262 int32_t srcStart) 04263 { 04264 unBogus(); 04265 srcText.pinIndex(srcStart); 04266 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04267 } 04268 04269 inline UnicodeString& 04270 UnicodeString::setTo(const UnicodeString& srcText) 04271 { 04272 unBogus(); 04273 return doReplace(0, length(), srcText, 0, srcText.length()); 04274 } 04275 04276 inline UnicodeString& 04277 UnicodeString::setTo(const UChar *srcChars, 04278 int32_t srcLength) 04279 { 04280 unBogus(); 04281 return doReplace(0, length(), srcChars, 0, srcLength); 04282 } 04283 04284 inline UnicodeString& 04285 UnicodeString::setTo(UChar srcChar) 04286 { 04287 unBogus(); 04288 return doReplace(0, length(), &srcChar, 0, 1); 04289 } 04290 04291 inline UnicodeString& 04292 UnicodeString::setTo(UChar32 srcChar) 04293 { 04294 unBogus(); 04295 return replace(0, length(), srcChar); 04296 } 04297 04298 inline UnicodeString& 04299 UnicodeString::append(const UnicodeString& srcText, 04300 int32_t srcStart, 04301 int32_t srcLength) 04302 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04303 04304 inline UnicodeString& 04305 UnicodeString::append(const UnicodeString& srcText) 04306 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04307 04308 inline UnicodeString& 04309 UnicodeString::append(const UChar *srcChars, 04310 int32_t srcStart, 04311 int32_t srcLength) 04312 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04313 04314 inline UnicodeString& 04315 UnicodeString::append(const UChar *srcChars, 04316 int32_t srcLength) 04317 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04318 04319 inline UnicodeString& 04320 UnicodeString::append(UChar srcChar) 04321 { return doReplace(length(), 0, &srcChar, 0, 1); } 04322 04323 inline UnicodeString& 04324 UnicodeString::append(UChar32 srcChar) { 04325 UChar buffer[U16_MAX_LENGTH]; 04326 int32_t _length = 0; 04327 UBool isError = FALSE; 04328 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 04329 return doReplace(length(), 0, buffer, 0, _length); 04330 } 04331 04332 inline UnicodeString& 04333 UnicodeString::operator+= (UChar ch) 04334 { return doReplace(length(), 0, &ch, 0, 1); } 04335 04336 inline UnicodeString& 04337 UnicodeString::operator+= (UChar32 ch) { 04338 return append(ch); 04339 } 04340 04341 inline UnicodeString& 04342 UnicodeString::operator+= (const UnicodeString& srcText) 04343 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04344 04345 inline UnicodeString& 04346 UnicodeString::insert(int32_t start, 04347 const UnicodeString& srcText, 04348 int32_t srcStart, 04349 int32_t srcLength) 04350 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04351 04352 inline UnicodeString& 04353 UnicodeString::insert(int32_t start, 04354 const UnicodeString& srcText) 04355 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04356 04357 inline UnicodeString& 04358 UnicodeString::insert(int32_t start, 04359 const UChar *srcChars, 04360 int32_t srcStart, 04361 int32_t srcLength) 04362 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04363 04364 inline UnicodeString& 04365 UnicodeString::insert(int32_t start, 04366 const UChar *srcChars, 04367 int32_t srcLength) 04368 { return doReplace(start, 0, srcChars, 0, srcLength); } 04369 04370 inline UnicodeString& 04371 UnicodeString::insert(int32_t start, 04372 UChar srcChar) 04373 { return doReplace(start, 0, &srcChar, 0, 1); } 04374 04375 inline UnicodeString& 04376 UnicodeString::insert(int32_t start, 04377 UChar32 srcChar) 04378 { return replace(start, 0, srcChar); } 04379 04380 04381 inline UnicodeString& 04382 UnicodeString::remove() 04383 { 04384 // remove() of a bogus string makes the string empty and non-bogus 04385 // we also un-alias a read-only alias to deal with NUL-termination 04386 // issues with getTerminatedBuffer() 04387 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04388 setToEmpty(); 04389 } else { 04390 fShortLength = 0; 04391 } 04392 return *this; 04393 } 04394 04395 inline UnicodeString& 04396 UnicodeString::remove(int32_t start, 04397 int32_t _length) 04398 { 04399 if(start <= 0 && _length == INT32_MAX) { 04400 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04401 return remove(); 04402 } 04403 return doReplace(start, _length, NULL, 0, 0); 04404 } 04405 04406 inline UnicodeString& 04407 UnicodeString::removeBetween(int32_t start, 04408 int32_t limit) 04409 { return doReplace(start, limit - start, NULL, 0, 0); } 04410 04411 inline UnicodeString & 04412 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04413 truncate(limit); 04414 return doReplace(0, start, NULL, 0, 0); 04415 } 04416 04417 inline UBool 04418 UnicodeString::truncate(int32_t targetLength) 04419 { 04420 if(isBogus() && targetLength == 0) { 04421 // truncate(0) of a bogus string makes the string empty and non-bogus 04422 unBogus(); 04423 return FALSE; 04424 } else if((uint32_t)targetLength < (uint32_t)length()) { 04425 setLength(targetLength); 04426 if(fFlags&kBufferIsReadonly) { 04427 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04428 } 04429 return TRUE; 04430 } else { 04431 return FALSE; 04432 } 04433 } 04434 04435 inline UnicodeString& 04436 UnicodeString::reverse() 04437 { return doReverse(0, length()); } 04438 04439 inline UnicodeString& 04440 UnicodeString::reverse(int32_t start, 04441 int32_t _length) 04442 { return doReverse(start, _length); } 04443 04444 U_NAMESPACE_END 04445 04446 #endif