Source for java.util.regex.Pattern

   1: /* Pattern.java -- Compiled regular expression ready to be applied.
   2:    Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.util.regex;
  39: 
  40: import gnu.regexp.RE;
  41: import gnu.regexp.REException;
  42: import gnu.regexp.RESyntax;
  43: 
  44: import java.io.Serializable;
  45: import java.util.ArrayList;
  46: 
  47: 
  48: /**
  49:  * Compiled regular expression ready to be applied. 
  50:  *
  51:  * @since 1.4
  52:  */
  53: public final class Pattern implements Serializable
  54: {
  55:   private static final long serialVersionUID = 5073258162644648461L;
  56:   
  57:   public static final int CANON_EQ = 128;
  58:   public static final int CASE_INSENSITIVE = 2;
  59:   public static final int COMMENTS = 4;
  60:   public static final int DOTALL = 32;
  61:   public static final int MULTILINE = 8;
  62:   public static final int UNICODE_CASE = 64;
  63:   public static final int UNIX_LINES = 1;
  64:   
  65:   private final String regex;
  66:   private final int flags;
  67: 
  68:   private final RE re;
  69: 
  70:   private Pattern (String regex, int flags)
  71:     throws PatternSyntaxException
  72:   {
  73:     this.regex = regex;
  74:     this.flags = flags;
  75: 
  76:     int gnuFlags = 0;
  77:     gnuFlags |= RE.REG_ICASE_USASCII;
  78:     if ((flags & CASE_INSENSITIVE) != 0)
  79:       gnuFlags |= RE.REG_ICASE;
  80:     if ((flags & MULTILINE) != 0)
  81:       gnuFlags |= RE.REG_MULTILINE;
  82:     if ((flags & DOTALL) != 0)
  83:       gnuFlags |= RE.REG_DOT_NEWLINE;
  84:     if ((flags & UNICODE_CASE) != 0)
  85:       gnuFlags &= ~RE.REG_ICASE_USASCII;
  86:     // not yet supported:
  87:     // if ((flags & CANON_EQ) != 0) gnuFlags =
  88: 
  89:     RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
  90:     if ((flags & UNIX_LINES) != 0)
  91:       {
  92:     // Use a syntax set with \n for linefeeds?
  93:     syntax = new RESyntax(syntax);
  94:     syntax.setLineSeparator("\n");
  95:       }
  96: 
  97:     if ((flags & COMMENTS) != 0)
  98:       {
  99:     gnuFlags |= RE.REG_X_COMMENTS;
 100:       }
 101: 
 102:     try
 103:       {
 104:     this.re = new RE(regex, gnuFlags, syntax);
 105:       }
 106:     catch (REException e)
 107:       {
 108:     PatternSyntaxException pse;
 109:     pse = new PatternSyntaxException(e.getMessage(),
 110:                      regex, e.getPosition());
 111:     pse.initCause(e);
 112:     throw pse;
 113:       }
 114:   }
 115:  
 116:   // package private accessor method
 117:   RE getRE()
 118:   {
 119:     return re;
 120:   }
 121: 
 122:   /**
 123:    * @param regex The regular expression
 124:    *
 125:    * @exception PatternSyntaxException If the expression's syntax is invalid
 126:    */
 127:   public static Pattern compile (String regex)
 128:     throws PatternSyntaxException
 129:   {
 130:     return compile(regex, 0);
 131:   }
 132:   
 133:   /**
 134:    * @param regex The regular expression
 135:    * @param flags The match flags, a bit mask
 136:    *
 137:    * @exception PatternSyntaxException If the expression's syntax is invalid
 138:    * @exception IllegalArgumentException If bit values other than those
 139:    * corresponding to the defined match flags are set in flags
 140:    */
 141:   public static Pattern compile (String regex, int flags)
 142:     throws PatternSyntaxException
 143:   {
 144:     // FIXME: check which flags are really accepted
 145:     if ((flags & ~0xEF) != 0)
 146:       throw new IllegalArgumentException ();
 147:     
 148:     return new Pattern (regex, flags); 
 149:   }
 150:   
 151:   public int flags ()
 152:   {
 153:     return this.flags;
 154:   }
 155:   
 156:   /**
 157:    * @param regex The regular expression
 158:    * @param input The character sequence to be matched
 159:    *
 160:    * @exception PatternSyntaxException If the expression's syntax is invalid
 161:    */
 162:   public static boolean matches (String regex, CharSequence input) 
 163:   {
 164:     return compile(regex).matcher(input).matches();
 165:   }
 166:   
 167:   /**
 168:    * @param input The character sequence to be matched
 169:    */
 170:   public Matcher matcher (CharSequence input)
 171:   {
 172:     return new Matcher(this, input);
 173:   }
 174:   
 175:   /**
 176:    * @param input The character sequence to be matched
 177:    */
 178:   public String[] split (CharSequence input)
 179:   {
 180:     return split(input, 0);
 181:   }
 182:   
 183:   /**
 184:    * @param input The character sequence to be matched
 185:    * @param limit The result threshold
 186:    */
 187:   public String[] split (CharSequence input, int limit)
 188:   {
 189:     Matcher matcher = new Matcher(this, input);
 190:     ArrayList list = new ArrayList();
 191:     int empties = 0;
 192:     int count = 0;
 193:     int start = 0;
 194:     int end;
 195:     boolean matched = matcher.find();
 196: 
 197:     while (matched && (limit <= 0 || count < limit - 1))
 198:       {
 199:     ++count;
 200:     end = matcher.start();
 201:     if (start == end)
 202:       empties++;
 203:     else
 204:       {
 205:         while (empties > 0)
 206:           {
 207:         list.add("");
 208:         empties--;
 209:           }
 210: 
 211:         String text = input.subSequence(start, end).toString();
 212:         list.add(text);
 213:       }
 214:     start = matcher.end();
 215:     matched = matcher.find();
 216:       }
 217: 
 218:     // We matched nothing.
 219:     if (!matched && count == 0)
 220:       return new String[] { input.toString() };
 221:     
 222:     // Is the last token empty?
 223:     boolean emptyLast = (start == input.length());
 224: 
 225:     // Can/Must we add empties or an extra last token at the end?
 226:     if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
 227:       {
 228:     if (limit > list.size())
 229:       {
 230:         int max = limit - list.size();
 231:         empties = (empties > max) ? max : empties;
 232:       }
 233:     while (empties > 0)
 234:       {
 235:         list.add("");
 236:         empties--;
 237:       }
 238:       }
 239: 
 240:     // last token at end
 241:     if (limit != 0 || (limit == 0 && !emptyLast))
 242:       {
 243:     String t = input.subSequence(start, input.length()).toString();
 244:     if ("".equals(t) && limit == 0)
 245:       ; // Don't add.
 246:     else
 247:       list.add(t);
 248:       }
 249: 
 250:     String[] output = new String [list.size()];
 251:     list.toArray(output);
 252:     return output;
 253:   }
 254:   
 255:   public String pattern ()
 256:   {
 257:     return regex;
 258:   }
 259: }