ICU 49.1.1
49.1.1
|
00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2012 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00024 #if !UCONFIG_NO_BREAK_ITERATION 00025 00026 #include "unicode/brkiter.h" 00027 #include "unicode/udata.h" 00028 #include "unicode/parseerr.h" 00029 #include "unicode/schriter.h" 00030 #include "unicode/uchriter.h" 00031 00032 00033 struct UTrie; 00034 00035 U_NAMESPACE_BEGIN 00036 00038 struct RBBIDataHeader; 00039 class RuleBasedBreakIteratorTables; 00040 class BreakIterator; 00041 class RBBIDataWrapper; 00042 class UStack; 00043 class LanguageBreakEngine; 00044 class UnhandledEngine; 00045 struct RBBIStateTable; 00046 00047 00048 00049 00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00066 00067 protected: 00072 UText *fText; 00073 00079 CharacterIterator *fCharIter; 00080 00086 StringCharacterIterator *fSCharIter; 00087 00093 UCharCharacterIterator *fDCharIter; 00094 00099 RBBIDataWrapper *fData; 00100 00104 int32_t fLastRuleStatusIndex; 00105 00112 UBool fLastStatusIndexValid; 00113 00119 uint32_t fDictionaryCharCount; 00120 00128 int32_t* fCachedBreakPositions; 00129 00134 int32_t fNumCachedBreakPositions; 00135 00141 int32_t fPositionInCache; 00142 00150 UStack *fLanguageBreakEngines; 00151 00159 UnhandledEngine *fUnhandledBreakEngine; 00160 00166 int32_t fBreakType; 00167 00168 protected: 00169 //======================================================================= 00170 // constructors 00171 //======================================================================= 00172 00173 #ifndef U_HIDE_INTERNAL_API 00174 00182 enum EDontAdopt { 00183 kDontAdopt 00184 }; 00185 00196 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00197 00206 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status); 00207 #endif /* U_HIDE_INTERNAL_API */ 00208 00209 00210 friend class RBBIRuleBuilder; 00212 friend class BreakIterator; 00213 00214 00215 00216 public: 00217 00222 RuleBasedBreakIterator(); 00223 00230 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00231 00240 RuleBasedBreakIterator( const UnicodeString &rules, 00241 UParseError &parseError, 00242 UErrorCode &status); 00243 00267 RuleBasedBreakIterator(const uint8_t *compiledRules, 00268 uint32_t ruleLength, 00269 UErrorCode &status); 00270 00283 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00284 00289 virtual ~RuleBasedBreakIterator(); 00290 00298 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00299 00308 virtual UBool operator==(const BreakIterator& that) const; 00309 00317 UBool operator!=(const BreakIterator& that) const; 00318 00329 virtual BreakIterator* clone() const; 00330 00336 virtual int32_t hashCode(void) const; 00337 00343 virtual const UnicodeString& getRules(void) const; 00344 00345 //======================================================================= 00346 // BreakIterator overrides 00347 //======================================================================= 00348 00374 virtual CharacterIterator& getText(void) const; 00375 00376 00391 virtual UText *getUText(UText *fillIn, UErrorCode &status) const; 00392 00400 virtual void adoptText(CharacterIterator* newText); 00401 00408 virtual void setText(const UnicodeString& newText); 00409 00423 virtual void setText(UText *text, UErrorCode &status); 00424 00430 virtual int32_t first(void); 00431 00437 virtual int32_t last(void); 00438 00449 virtual int32_t next(int32_t n); 00450 00456 virtual int32_t next(void); 00457 00463 virtual int32_t previous(void); 00464 00472 virtual int32_t following(int32_t offset); 00473 00481 virtual int32_t preceding(int32_t offset); 00482 00491 virtual UBool isBoundary(int32_t offset); 00492 00498 virtual int32_t current(void) const; 00499 00500 00533 virtual int32_t getRuleStatus() const; 00534 00558 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 00559 00571 virtual UClassID getDynamicClassID(void) const; 00572 00584 static UClassID U_EXPORT2 getStaticClassID(void); 00585 00586 /* 00587 * Create a clone (copy) of this break iterator in memory provided 00588 * by the caller. The idea is to increase performance by avoiding 00589 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00590 * Performance gains are minimal, and correct buffer management is 00591 * tricky. Use clone() instead. 00592 * 00593 * @param stackBuffer The pointer to the memory into which the cloned object 00594 * should be placed. If NULL, allocate heap memory 00595 * for the cloned object. 00596 * @param BufferSize The size of the buffer. If zero, return the required 00597 * buffer size, but do not clone the object. If the 00598 * size was too small (but not zero), allocate heap 00599 * storage for the cloned object. 00600 * 00601 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00602 * returned if the the provided buffer was too small, and 00603 * the clone was therefore put on the heap. 00604 * 00605 * @return Pointer to the clone object. This may differ from the stackBuffer 00606 * address if the byte alignment of the stack buffer was not suitable 00607 * or if the stackBuffer was too small to hold the clone. 00608 * @stable ICU 2.0 00609 */ 00610 virtual BreakIterator * createBufferClone(void *stackBuffer, 00611 int32_t &BufferSize, 00612 UErrorCode &status); 00613 00614 00632 virtual const uint8_t *getBinaryRules(uint32_t &length); 00633 00659 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status); 00660 00661 00662 protected: 00663 //======================================================================= 00664 // implementation 00665 //======================================================================= 00671 virtual void reset(void); 00672 00673 #if 0 00674 00682 virtual UBool isDictionaryChar(UChar32); 00683 00688 virtual int32_t getBreakType() const; 00689 #endif 00690 00695 virtual void setBreakType(int32_t type); 00696 00697 #ifndef U_HIDE_INTERNAL_API 00698 00703 void init(); 00704 #endif /* U_HIDE_INTERNAL_API */ 00705 00706 private: 00707 00717 int32_t handlePrevious(const RBBIStateTable *statetable); 00718 00728 int32_t handleNext(const RBBIStateTable *statetable); 00729 00730 protected: 00731 00732 #ifndef U_HIDE_INTERNAL_API 00733 00747 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); 00748 #endif /* U_HIDE_INTERNAL_API */ 00749 00750 private: 00751 00758 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 00759 00763 void makeRuleStatusValid(); 00764 00765 }; 00766 00767 //------------------------------------------------------------------------------ 00768 // 00769 // Inline Functions Definitions ... 00770 // 00771 //------------------------------------------------------------------------------ 00772 00773 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00774 return !operator==(that); 00775 } 00776 00777 U_NAMESPACE_END 00778 00779 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00780 00781 #endif