Class RegExp
- java.lang.Object
-
- dk.brics.automaton.RegExp
-
public class RegExp extends java.lang.Object
Regular Expression extension toAutomaton
.Regular expressions are built from the following abstract syntax:
Regular expression abstract syntax regexp ::= unionexp | unionexp ::= interexp |
unionexp(union) | interexp interexp ::= concatexp &
interexp(intersection) [OPTIONAL] | concatexp concatexp ::= repeatexp concatexp (concatenation) | repeatexp repeatexp ::= repeatexp ?
(zero or one occurrence) | repeatexp *
(zero or more occurrences) | repeatexp +
(one or more occurrences) | repeatexp {n}
( n
occurrences)| repeatexp {n,}
( n
or more occurrences)| repeatexp {n,m}
( n
tom
occurrences, including both)| complexp complexp ::= ~
complexp(complement) [OPTIONAL] | charclassexp charclassexp ::= [
charclasses]
(character class) | [^
charclasses]
(negated character class) | simpleexp charclasses ::= charclass charclasses | charclass charclass ::= charexp -
charexp(character range, including end-points) | charexp simpleexp ::= charexp | .
(any single character) | #
(the empty language) [OPTIONAL] | @
(any string) [OPTIONAL] | "
<Unicode string without double-quotes>"
(a string) | (
)
(the empty string) | (
unionexp)
(precedence override) | <
<identifier>>
(named automaton) [OPTIONAL] | <n-m>
(numerical interval) [OPTIONAL] charexp ::= <Unicode character> (a single non-reserved character) | \
<Unicode character>(a single character) The productions marked [OPTIONAL] are only allowed if specified by the syntax flags passed to the
RegExp
constructor. The reserved characters used in the (enabled) syntax must be escaped with backslash (\
) or double-quotes ("..."
). (In contrast to other regexp syntaxes, this is required also in character classes.) Be aware that dash (-
) has a special meaning in charclass expressions. An identifier is a string not containing right angle bracket (>
) or dash (-
). Numerical intervals are specified by non-negative decimal integers and include both end points, and ifn
andm
have the same number of digits, then the conforming strings must have that length (i.e. prefixed by 0's).
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description (package private) static class
RegExp.Kind
-
Field Summary
Fields Modifier and Type Field Description static int
ALL
Syntax flag, enables all optional regexp syntax.private static boolean
allow_mutation
static int
ANYSTRING
Syntax flag, enables anystring (@
).static int
AUTOMATON
Syntax flag, enables named automata (<
identifier>
).(package private) java.lang.String
b
(package private) char
c
static int
COMPLEMENT
Syntax flag, enables complement (~
).(package private) int
digits
static int
EMPTY
Syntax flag, enables empty language (#
).(package private) RegExp
exp1
(package private) RegExp
exp2
(package private) int
flags
(package private) char
from
static int
INTERSECTION
Syntax flag, enables intersection (&
).static int
INTERVAL
Syntax flag, enables numerical intervals (<n-m>
).(package private) RegExp.Kind
kind
(package private) int
max
(package private) int
min
static int
NONE
Syntax flag, enables no optional regexp syntax.(package private) int
pos
(package private) java.lang.String
s
(package private) char
to
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description private void
appendChar(char c, java.lang.StringBuilder b)
private boolean
check(int flag)
private void
findLeaves(RegExp exp, RegExp.Kind kind, java.util.List<Automaton> list, java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize)
java.util.Set<java.lang.String>
getIdentifiers()
Returns set of automaton identifiers that occur in this regular expression.(package private) void
getIdentifiers(java.util.Set<java.lang.String> set)
(package private) static RegExp
makeAnyChar()
(package private) static RegExp
makeAnyString()
(package private) static RegExp
makeAutomaton(java.lang.String s)
(package private) static RegExp
makeChar(char c)
(package private) static RegExp
makeCharRange(char from, char to)
(package private) static RegExp
makeComplement(RegExp exp)
(package private) static RegExp
makeConcatenation(RegExp exp1, RegExp exp2)
(package private) static RegExp
makeEmpty()
(package private) static RegExp
makeIntersection(RegExp exp1, RegExp exp2)
(package private) static RegExp
makeInterval(int min, int max, int digits)
(package private) static RegExp
makeOptional(RegExp exp)
(package private) static RegExp
makeRepeat(RegExp exp)
(package private) static RegExp
makeRepeat(RegExp exp, int min)
(package private) static RegExp
makeRepeat(RegExp exp, int min, int max)
private static RegExp
makeString(RegExp exp1, RegExp exp2)
(package private) static RegExp
makeString(java.lang.String s)
(package private) static RegExp
makeUnion(RegExp exp1, RegExp exp2)
private boolean
match(char c)
private boolean
more()
private char
next()
(package private) RegExp
parseCharClass()
(package private) RegExp
parseCharClasses()
(package private) RegExp
parseCharClassExp()
(package private) char
parseCharExp()
(package private) RegExp
parseComplExp()
(package private) RegExp
parseConcatExp()
(package private) RegExp
parseInterExp()
(package private) RegExp
parseRepeatExp()
(package private) RegExp
parseSimpleExp()
(package private) RegExp
parseUnionExp()
private boolean
peek(java.lang.String s)
boolean
setAllowMutate(boolean flag)
Sets or resets allow mutate flag.Automaton
toAutomaton()
Constructs newAutomaton
from thisRegExp
.Automaton
toAutomaton(boolean minimize)
Constructs newAutomaton
from thisRegExp
.Automaton
toAutomaton(AutomatonProvider automaton_provider)
Constructs newAutomaton
from thisRegExp
.Automaton
toAutomaton(AutomatonProvider automaton_provider, boolean minimize)
Constructs newAutomaton
from thisRegExp
.Automaton
toAutomaton(java.util.Map<java.lang.String,Automaton> automata)
Constructs newAutomaton
from thisRegExp
.Automaton
toAutomaton(java.util.Map<java.lang.String,Automaton> automata, boolean minimize)
Constructs newAutomaton
from thisRegExp
.private Automaton
toAutomaton(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize)
private Automaton
toAutomatonAllowMutate(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize)
java.lang.String
toString()
Constructs string from parsed regular expression.(package private) java.lang.StringBuilder
toStringBuilder(java.lang.StringBuilder b)
-
-
-
Field Detail
-
INTERSECTION
public static final int INTERSECTION
Syntax flag, enables intersection (&
).- See Also:
- Constant Field Values
-
COMPLEMENT
public static final int COMPLEMENT
Syntax flag, enables complement (~
).- See Also:
- Constant Field Values
-
EMPTY
public static final int EMPTY
Syntax flag, enables empty language (#
).- See Also:
- Constant Field Values
-
ANYSTRING
public static final int ANYSTRING
Syntax flag, enables anystring (@
).- See Also:
- Constant Field Values
-
AUTOMATON
public static final int AUTOMATON
Syntax flag, enables named automata (<
identifier>
).- See Also:
- Constant Field Values
-
INTERVAL
public static final int INTERVAL
Syntax flag, enables numerical intervals (<n-m>
).- See Also:
- Constant Field Values
-
ALL
public static final int ALL
Syntax flag, enables all optional regexp syntax.- See Also:
- Constant Field Values
-
NONE
public static final int NONE
Syntax flag, enables no optional regexp syntax.- See Also:
- Constant Field Values
-
allow_mutation
private static boolean allow_mutation
-
kind
RegExp.Kind kind
-
exp1
RegExp exp1
-
exp2
RegExp exp2
-
s
java.lang.String s
-
c
char c
-
min
int min
-
max
int max
-
digits
int digits
-
from
char from
-
to
char to
-
b
java.lang.String b
-
flags
int flags
-
pos
int pos
-
-
Constructor Detail
-
RegExp
RegExp()
-
RegExp
public RegExp(java.lang.String s) throws java.lang.IllegalArgumentException
Constructs newRegExp
from a string. Same asRegExp(s, ALL)
.- Parameters:
s
- regexp string- Throws:
java.lang.IllegalArgumentException
- if an error occured while parsing the regular expression
-
RegExp
public RegExp(java.lang.String s, int syntax_flags) throws java.lang.IllegalArgumentException
Constructs newRegExp
from a string.- Parameters:
s
- regexp stringsyntax_flags
- boolean 'or' of optional syntax constructs to be enabled- Throws:
java.lang.IllegalArgumentException
- if an error occured while parsing the regular expression
-
-
Method Detail
-
toAutomaton
public Automaton toAutomaton()
Constructs newAutomaton
from thisRegExp
. Same astoAutomaton(null)
(empty automaton map).
-
toAutomaton
public Automaton toAutomaton(boolean minimize)
Constructs newAutomaton
from thisRegExp
. Same astoAutomaton(null,minimize)
(empty automaton map).
-
toAutomaton
public Automaton toAutomaton(AutomatonProvider automaton_provider) throws java.lang.IllegalArgumentException
Constructs newAutomaton
from thisRegExp
. The constructed automaton is minimal and deterministic and has no transitions to dead states.- Parameters:
automaton_provider
- provider of automata for named identifiers- Throws:
java.lang.IllegalArgumentException
- if this regular expression uses a named identifier that is not available from the automaton provider
-
toAutomaton
public Automaton toAutomaton(AutomatonProvider automaton_provider, boolean minimize) throws java.lang.IllegalArgumentException
Constructs newAutomaton
from thisRegExp
. The constructed automaton has no transitions to dead states.- Parameters:
automaton_provider
- provider of automata for named identifiersminimize
- if set, the automaton is minimized and determinized- Throws:
java.lang.IllegalArgumentException
- if this regular expression uses a named identifier that is not available from the automaton provider
-
toAutomaton
public Automaton toAutomaton(java.util.Map<java.lang.String,Automaton> automata) throws java.lang.IllegalArgumentException
Constructs newAutomaton
from thisRegExp
. The constructed automaton is minimal and deterministic and has no transitions to dead states.- Parameters:
automata
- a map from automaton identifiers to automata (of typeAutomaton
).- Throws:
java.lang.IllegalArgumentException
- if this regular expression uses a named identifier that does not occur in the automaton map
-
toAutomaton
public Automaton toAutomaton(java.util.Map<java.lang.String,Automaton> automata, boolean minimize) throws java.lang.IllegalArgumentException
Constructs newAutomaton
from thisRegExp
. The constructed automaton has no transitions to dead states.- Parameters:
automata
- a map from automaton identifiers to automata (of typeAutomaton
).minimize
- if set, the automaton is minimized and determinized- Throws:
java.lang.IllegalArgumentException
- if this regular expression uses a named identifier that does not occur in the automaton map
-
setAllowMutate
public boolean setAllowMutate(boolean flag)
Sets or resets allow mutate flag. If this flag is set, then automata construction uses mutable automata, which is slightly faster but not thread safe. By default, the flag is not set.- Parameters:
flag
- if true, the flag is set- Returns:
- previous value of the flag
-
toAutomatonAllowMutate
private Automaton toAutomatonAllowMutate(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize) throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
toAutomaton
private Automaton toAutomaton(java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize) throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
findLeaves
private void findLeaves(RegExp exp, RegExp.Kind kind, java.util.List<Automaton> list, java.util.Map<java.lang.String,Automaton> automata, AutomatonProvider automaton_provider, boolean minimize)
-
toString
public java.lang.String toString()
Constructs string from parsed regular expression.- Overrides:
toString
in classjava.lang.Object
-
toStringBuilder
java.lang.StringBuilder toStringBuilder(java.lang.StringBuilder b)
-
appendChar
private void appendChar(char c, java.lang.StringBuilder b)
-
getIdentifiers
public java.util.Set<java.lang.String> getIdentifiers()
Returns set of automaton identifiers that occur in this regular expression.
-
getIdentifiers
void getIdentifiers(java.util.Set<java.lang.String> set)
-
makeChar
static RegExp makeChar(char c)
-
makeCharRange
static RegExp makeCharRange(char from, char to)
-
makeAnyChar
static RegExp makeAnyChar()
-
makeEmpty
static RegExp makeEmpty()
-
makeString
static RegExp makeString(java.lang.String s)
-
makeAnyString
static RegExp makeAnyString()
-
makeAutomaton
static RegExp makeAutomaton(java.lang.String s)
-
makeInterval
static RegExp makeInterval(int min, int max, int digits)
-
peek
private boolean peek(java.lang.String s)
-
match
private boolean match(char c)
-
more
private boolean more()
-
next
private char next() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
check
private boolean check(int flag)
-
parseUnionExp
final RegExp parseUnionExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseInterExp
final RegExp parseInterExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseConcatExp
final RegExp parseConcatExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseRepeatExp
final RegExp parseRepeatExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseComplExp
final RegExp parseComplExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseCharClassExp
final RegExp parseCharClassExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseCharClasses
final RegExp parseCharClasses() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseCharClass
final RegExp parseCharClass() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseSimpleExp
final RegExp parseSimpleExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
parseCharExp
final char parseCharExp() throws java.lang.IllegalArgumentException
- Throws:
java.lang.IllegalArgumentException
-
-