ICU 49.1.1  49.1.1
rbbi.h
Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 *   Copyright (C) 1999-2012 International Business Machines Corporation   *
00004 *   and others. All rights reserved.                                      *
00005 ***************************************************************************
00006 
00007 **********************************************************************
00008 *   Date        Name        Description
00009 *   10/22/99    alan        Creation.
00010 *   11/11/99    rgillam     Complete port from Java.
00011 **********************************************************************
00012 */
00013 
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016 
00017 #include "unicode/utypes.h"
00018 
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025 
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031 
00032 
00033 struct UTrie;
00034 
00035 U_NAMESPACE_BEGIN
00036 
00038 struct RBBIDataHeader;
00039 class  RuleBasedBreakIteratorTables;
00040 class  BreakIterator;
00041 class  RBBIDataWrapper;
00042 class  UStack;
00043 class  LanguageBreakEngine;
00044 class  UnhandledEngine;
00045 struct RBBIStateTable;
00046 
00047 
00048 
00049 
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066 
00067 protected:
00072     UText  *fText;
00073 
00079     CharacterIterator  *fCharIter;
00080 
00086     StringCharacterIterator *fSCharIter;
00087 
00093     UCharCharacterIterator *fDCharIter;
00094 
00099     RBBIDataWrapper    *fData;
00100 
00104     int32_t             fLastRuleStatusIndex;
00105 
00112     UBool               fLastStatusIndexValid;
00113 
00119     uint32_t            fDictionaryCharCount;
00120 
00128     int32_t*            fCachedBreakPositions;
00129 
00134     int32_t             fNumCachedBreakPositions;
00135 
00141     int32_t             fPositionInCache;
00142     
00150     UStack              *fLanguageBreakEngines;
00151     
00159     UnhandledEngine     *fUnhandledBreakEngine;
00160     
00166     int32_t             fBreakType;
00167     
00168 protected:
00169     //=======================================================================
00170     // constructors
00171     //=======================================================================
00172 
00173 #ifndef U_HIDE_INTERNAL_API
00174 
00182     enum EDontAdopt {
00183         kDontAdopt
00184     };
00185 
00196     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00197 
00206     RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
00207 #endif  /* U_HIDE_INTERNAL_API */
00208 
00209 
00210     friend class RBBIRuleBuilder;
00212     friend class BreakIterator;
00213 
00214 
00215 
00216 public:
00217 
00222     RuleBasedBreakIterator();
00223 
00230     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00231 
00240     RuleBasedBreakIterator( const UnicodeString    &rules,
00241                              UParseError           &parseError,
00242                              UErrorCode            &status);
00243 
00267     RuleBasedBreakIterator(const uint8_t *compiledRules,
00268                            uint32_t       ruleLength,
00269                            UErrorCode    &status);
00270 
00283     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00284 
00289     virtual ~RuleBasedBreakIterator();
00290 
00298     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00299 
00308     virtual UBool operator==(const BreakIterator& that) const;
00309 
00317     UBool operator!=(const BreakIterator& that) const;
00318 
00329     virtual BreakIterator* clone() const;
00330 
00336     virtual int32_t hashCode(void) const;
00337 
00343     virtual const UnicodeString& getRules(void) const;
00344 
00345     //=======================================================================
00346     // BreakIterator overrides
00347     //=======================================================================
00348 
00374     virtual  CharacterIterator& getText(void) const;
00375 
00376 
00391      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00392 
00400     virtual void adoptText(CharacterIterator* newText);
00401 
00408     virtual void setText(const UnicodeString& newText);
00409 
00423     virtual void  setText(UText *text, UErrorCode &status);
00424 
00430     virtual int32_t first(void);
00431 
00437     virtual int32_t last(void);
00438 
00449     virtual int32_t next(int32_t n);
00450 
00456     virtual int32_t next(void);
00457 
00463     virtual int32_t previous(void);
00464 
00472     virtual int32_t following(int32_t offset);
00473 
00481     virtual int32_t preceding(int32_t offset);
00482 
00491     virtual UBool isBoundary(int32_t offset);
00492 
00498     virtual int32_t current(void) const;
00499 
00500 
00533     virtual int32_t getRuleStatus() const;
00534 
00558     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00559 
00571     virtual UClassID getDynamicClassID(void) const;
00572 
00584     static UClassID U_EXPORT2 getStaticClassID(void);
00585 
00586     /*
00587      * Create a clone (copy) of this break iterator in memory provided
00588      *  by the caller.  The idea is to increase performance by avoiding
00589      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
00590      *  Performance gains are minimal, and correct buffer management is
00591      *  tricky.  Use clone() instead.
00592      *
00593      * @param stackBuffer  The pointer to the memory into which the cloned object
00594      *                     should be placed.  If NULL,  allocate heap memory
00595      *                     for the cloned object.
00596      * @param BufferSize   The size of the buffer.  If zero, return the required
00597      *                     buffer size, but do not clone the object.  If the
00598      *                     size was too small (but not zero), allocate heap
00599      *                     storage for the cloned object.
00600      *
00601      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
00602      *                     returned if the the provided buffer was too small, and
00603      *                     the clone was therefore put on the heap.
00604      *
00605      * @return  Pointer to the clone object.  This may differ from the stackBuffer
00606      *          address if the byte alignment of the stack buffer was not suitable
00607      *          or if the stackBuffer was too small to hold the clone.
00608      * @stable ICU 2.0
00609      */
00610     virtual BreakIterator *  createBufferClone(void *stackBuffer,
00611                                                int32_t &BufferSize,
00612                                                UErrorCode &status);
00613 
00614 
00632     virtual const uint8_t *getBinaryRules(uint32_t &length);
00633 
00659     virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
00660 
00661 
00662 protected:
00663     //=======================================================================
00664     // implementation
00665     //=======================================================================
00671     virtual void reset(void);
00672 
00673 #if 0
00674 
00682     virtual UBool isDictionaryChar(UChar32);
00683 
00688     virtual int32_t getBreakType() const;
00689 #endif
00690 
00695     virtual void setBreakType(int32_t type);
00696 
00697 #ifndef U_HIDE_INTERNAL_API
00698 
00703     void init();
00704 #endif  /* U_HIDE_INTERNAL_API */
00705 
00706 private:
00707 
00717     int32_t handlePrevious(const RBBIStateTable *statetable);
00718 
00728     int32_t handleNext(const RBBIStateTable *statetable);
00729 
00730 protected:
00731 
00732 #ifndef U_HIDE_INTERNAL_API
00733 
00747     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00748 #endif  /* U_HIDE_INTERNAL_API */
00749 
00750 private:
00751 
00758     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00759 
00763     void makeRuleStatusValid();
00764 
00765 };
00766 
00767 //------------------------------------------------------------------------------
00768 //
00769 //   Inline Functions Definitions ...
00770 //
00771 //------------------------------------------------------------------------------
00772 
00773 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00774     return !operator==(that);
00775 }
00776 
00777 U_NAMESPACE_END
00778 
00779 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
00780 
00781 #endif