ICU 49.1.1  49.1.1
messagepattern.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *   Copyright (C) 2011-2012, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 *******************************************************************************
00006 *   file name:  messagepattern.h
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2011mar14
00012 *   created by: Markus W. Scherer
00013 */
00014 
00015 #ifndef __MESSAGEPATTERN_H__
00016 #define __MESSAGEPATTERN_H__
00017 
00023 #include "unicode/utypes.h"
00024 
00025 #if !UCONFIG_NO_FORMATTING
00026 
00027 #include "unicode/parseerr.h"
00028 #include "unicode/unistr.h"
00029 
00066 enum UMessagePatternApostropheMode {
00078     UMSGPAT_APOS_DOUBLE_OPTIONAL,
00087     UMSGPAT_APOS_DOUBLE_REQUIRED
00088 };
00092 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
00093 
00098 enum UMessagePatternPartType {
00108     UMSGPAT_PART_TYPE_MSG_START,
00117     UMSGPAT_PART_TYPE_MSG_LIMIT,
00125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
00132     UMSGPAT_PART_TYPE_INSERT_CHAR,
00140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
00151     UMSGPAT_PART_TYPE_ARG_START,
00158     UMSGPAT_PART_TYPE_ARG_LIMIT,
00163     UMSGPAT_PART_TYPE_ARG_NUMBER,
00169     UMSGPAT_PART_TYPE_ARG_NAME,
00175     UMSGPAT_PART_TYPE_ARG_TYPE,
00181     UMSGPAT_PART_TYPE_ARG_STYLE,
00187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
00194     UMSGPAT_PART_TYPE_ARG_INT,
00202     UMSGPAT_PART_TYPE_ARG_DOUBLE
00203 };
00207 typedef enum UMessagePatternPartType UMessagePatternPartType;
00208 
00217 enum UMessagePatternArgType {
00222     UMSGPAT_ARG_TYPE_NONE,
00228     UMSGPAT_ARG_TYPE_SIMPLE,
00234     UMSGPAT_ARG_TYPE_CHOICE,
00244     UMSGPAT_ARG_TYPE_PLURAL,
00249     UMSGPAT_ARG_TYPE_SELECT
00250 };
00254 typedef enum UMessagePatternArgType UMessagePatternArgType;
00255 
00256 enum {
00262     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
00263 
00271     UMSGPAT_ARG_NAME_NOT_VALID=-2
00272 };
00273 
00280 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
00281 
00282 U_NAMESPACE_BEGIN
00283 
00284 class MessagePatternDoubleList;
00285 class MessagePatternPartsList;
00286 
00343 class U_COMMON_API MessagePattern : public UObject {
00344 public:
00353     MessagePattern(UErrorCode &errorCode);
00354 
00364     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
00365 
00384     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00385 
00391     MessagePattern(const MessagePattern &other);
00392 
00399     MessagePattern &operator=(const MessagePattern &other);
00400 
00405     virtual ~MessagePattern();
00406 
00424     MessagePattern &parse(const UnicodeString &pattern,
00425                           UParseError *parseError, UErrorCode &errorCode);
00426 
00444     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
00445                                      UParseError *parseError, UErrorCode &errorCode);
00446 
00464     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
00465                                      UParseError *parseError, UErrorCode &errorCode);
00466 
00484     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
00485                                      UParseError *parseError, UErrorCode &errorCode);
00486 
00492     void clear();
00493 
00500     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
00501         clear();
00502         aposMode=mode;
00503     }
00504 
00510     UBool operator==(const MessagePattern &other) const;
00511 
00517     inline UBool operator!=(const MessagePattern &other) const {
00518         return !operator==(other);
00519     }
00520 
00525     int32_t hashCode() const;
00526 
00531     UMessagePatternApostropheMode getApostropheMode() const {
00532         return aposMode;
00533     }
00534 
00535     // Java has package-private jdkAposMode() here.
00536     // In C++, this is declared in the MessageImpl class.
00537 
00542     const UnicodeString &getPatternString() const {
00543         return msg;
00544     }
00545 
00551     UBool hasNamedArguments() const {
00552         return hasArgNames;
00553     }
00554 
00560     UBool hasNumberedArguments() const {
00561         return hasArgNumbers;
00562     }
00563 
00575     static int32_t validateArgumentName(const UnicodeString &name);
00576 
00587     UnicodeString autoQuoteApostropheDeep() const;
00588 
00589     class Part;
00590 
00597     int32_t countParts() const {
00598         return partsLength;
00599     }
00600 
00607     const Part &getPart(int32_t i) const {
00608         return parts[i];
00609     }
00610 
00618     UMessagePatternPartType getPartType(int32_t i) const {
00619         return getPart(i).type;
00620     }
00621 
00629     int32_t getPatternIndex(int32_t partIndex) const {
00630         return getPart(partIndex).index;
00631     }
00632 
00640     UnicodeString getSubstring(const Part &part) const {
00641         return msg.tempSubString(part.index, part.length);
00642     }
00643 
00651     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
00652         return 0==msg.compare(part.index, part.length, s);
00653     }
00654 
00661     double getNumericValue(const Part &part) const;
00662 
00669     double getPluralOffset(int32_t pluralStart) const;
00670 
00679     int32_t getLimitPartIndex(int32_t start) const {
00680         int32_t limit=getPart(start).limitPartIndex;
00681         if(limit<start) {
00682             return start;
00683         }
00684         return limit;
00685     }
00686 
00694     class Part : public UMemory {
00695     public:
00700         Part() {}
00701 
00707         UMessagePatternPartType getType() const {
00708             return type;
00709         }
00710 
00716         int32_t getIndex() const {
00717             return index;
00718         }
00719 
00726         int32_t getLength() const {
00727             return length;
00728         }
00729 
00736         int32_t getLimit() const {
00737             return index+length;
00738         }
00739 
00746         int32_t getValue() const {
00747             return value;
00748         }
00749 
00756         UMessagePatternArgType getArgType() const {
00757             UMessagePatternPartType type=getType();
00758             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
00759                 return (UMessagePatternArgType)value;
00760             } else {
00761                 return UMSGPAT_ARG_TYPE_NONE;
00762             }
00763         }
00764 
00772         static UBool hasNumericValue(UMessagePatternPartType type) {
00773             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
00774         }
00775 
00781         UBool operator==(const Part &other) const;
00782 
00788         inline UBool operator!=(const Part &other) const {
00789             return !operator==(other);
00790         }
00791 
00796         int32_t hashCode() const {
00797             return ((type*37+index)*37+length)*37+value;
00798         }
00799 
00800     private:
00801         friend class MessagePattern;
00802 
00803         static const int32_t MAX_LENGTH=0xffff;
00804         static const int32_t MAX_VALUE=0x7fff;
00805 
00806         // Some fields are not final because they are modified during pattern parsing.
00807         // After pattern parsing, the parts are effectively immutable.
00808         UMessagePatternPartType type;
00809         int32_t index;
00810         uint16_t length;
00811         int16_t value;
00812         int32_t limitPartIndex;
00813     };
00814 
00815 private:
00816     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00817 
00818     void postParse();
00819 
00820     int32_t parseMessage(int32_t index, int32_t msgStartLength,
00821                          int32_t nestingLevel, UMessagePatternArgType parentType,
00822                          UParseError *parseError, UErrorCode &errorCode);
00823 
00824     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
00825                      UParseError *parseError, UErrorCode &errorCode);
00826 
00827     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
00828 
00829     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
00830                              UParseError *parseError, UErrorCode &errorCode);
00831 
00832     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
00833                                      UParseError *parseError, UErrorCode &errorCode);
00834 
00843     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
00844 
00845     int32_t parseArgNumber(int32_t start, int32_t limit) {
00846         return parseArgNumber(msg, start, limit);
00847     }
00848 
00857     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
00858                      UParseError *parseError, UErrorCode &errorCode);
00859 
00860     // Java has package-private appendReducedApostrophes() here.
00861     // In C++, this is declared in the MessageImpl class.
00862 
00863     int32_t skipWhiteSpace(int32_t index);
00864 
00865     int32_t skipIdentifier(int32_t index);
00866 
00871     int32_t skipDouble(int32_t index);
00872 
00873     static UBool isArgTypeChar(UChar32 c);
00874 
00875     UBool isChoice(int32_t index);
00876 
00877     UBool isPlural(int32_t index);
00878 
00879     UBool isSelect(int32_t index);
00880 
00885     UBool inMessageFormatPattern(int32_t nestingLevel);
00886 
00891     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
00892 
00893     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
00894                  int32_t value, UErrorCode &errorCode);
00895 
00896     void addLimitPart(int32_t start,
00897                       UMessagePatternPartType type, int32_t index, int32_t length,
00898                       int32_t value, UErrorCode &errorCode);
00899 
00900     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
00901 
00902     void setParseError(UParseError *parseError, int32_t index);
00903 
00904     // No ICU "poor man's RTTI" for this class nor its subclasses.
00905     virtual UClassID getDynamicClassID() const;
00906 
00907     UBool init(UErrorCode &errorCode);
00908     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
00909 
00910     UMessagePatternApostropheMode aposMode;
00911     UnicodeString msg;
00912     // ArrayList<Part> parts=new ArrayList<Part>();
00913     MessagePatternPartsList *partsList;
00914     Part *parts;
00915     int32_t partsLength;
00916     // ArrayList<Double> numericValues;
00917     MessagePatternDoubleList *numericValuesList;
00918     double *numericValues;
00919     int32_t numericValuesLength;
00920     UBool hasArgNames;
00921     UBool hasArgNumbers;
00922     UBool needsAutoQuoting;
00923 };
00924 
00925 U_NAMESPACE_END
00926 
00927 #endif  // !UCONFIG_NO_FORMATTING
00928 
00929 #endif  // __MESSAGEPATTERN_H__