|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--org.apache.oro.text.regex.Perl5Matcher
Safe: The Perl5Matcher class is used to match regular expressions (conforming to the Perl5 regular expression syntax) generated by Perl5Compiler.
PatternMatcher
,
Perl5Compiler
Field Summary | |
private int[] |
__beginMatchOffsets
|
private int |
__bol
|
private int |
__currentOffset
|
private Perl5Repetition |
__currentRep
|
private static int |
__DEFAULT_LAST_MATCH_END_OFFSET
|
private int[] |
__endMatchOffsets
|
private int |
__endOffset
|
private int |
__eol
|
private static char |
__EOS
|
private int |
__expSize
|
private static int |
__INITIAL_NUM_OFFSETS
|
private char[] |
__input
|
private int |
__inputOffset
|
private int |
__lastMatchInputEndOffset
|
private Perl5MatchResult |
__lastMatchResult
|
private int |
__lastParen
|
private boolean |
__lastSuccess
|
private boolean |
__multiline
|
private int |
__numParentheses
|
private char[] |
__originalInput
|
private char |
__previousChar
|
private char[] |
__program
|
private Stack |
__stack
|
Constructor Summary | |
Perl5Matcher()
Enabled: |
Method Summary | |
private static boolean |
__compare(char[] s1,
int s1Offs,
char[] s2,
int s2Offs,
int n)
|
private static int |
__findFirst(char[] input,
int current,
int endOffset,
char[] mustString)
|
private void |
__initInterpreterGlobals(Perl5Pattern expression,
char[] input,
int beginOffset,
int endOffset)
|
private boolean |
__interpret(Perl5Pattern expression,
char[] input,
int beginOffset,
int endOffset)
|
private boolean |
__match(int offset)
|
private void |
__popState()
|
private void |
__pushState(int parenFloor)
|
private int |
__repeat(int offset,
int max)
|
private void |
__setLastMatchResult()
|
private boolean |
__tryExpression(Perl5Pattern expression,
int offset)
|
(package private) char[] |
_toLower(char[] input)
|
boolean |
contains(char[] input,
Pattern pattern)
Enabled: Determines if a string (represented as a char[]) contains a pattern. |
boolean |
contains(PatternMatcherInput input,
Pattern pattern)
Enabled: Determines if the contents of a PatternMatcherInput, starting from the current offset of the input contains a pattern. |
boolean |
contains(String input,
Pattern pattern)
Enabled: Determines if a string contains a pattern. |
MatchResult |
getMatch()
Enabled: Fetches the last match found by a call to a matches() or contains() method. |
boolean |
isMultiline()
Enabled: @return True if the matcher is treating input as consisting of multiple lines with respect to the ^ and $ metacharacters, false otherwise. |
boolean |
matches(char[] input,
Pattern pattern)
Enabled: Determines if a string (represented as a char[]) exactly matches a given pattern. |
boolean |
matches(PatternMatcherInput input,
Pattern pattern)
Enabled: Determines if the contents of a PatternMatcherInput instance exactly matches a given pattern. |
boolean |
matches(String input,
Pattern pattern)
Enabled: Determines if a string exactly matches a given pattern. |
boolean |
matchesPrefix(char[] input,
Pattern pattern)
Enabled: Determines if a prefix of a string (represented as a char[]) matches a given pattern. |
boolean |
matchesPrefix(char[] input,
Pattern pattern,
int offset)
Enabled: Determines if a prefix of a string (represented as a char[]) matches a given pattern, starting from a given offset into the string. |
boolean |
matchesPrefix(PatternMatcherInput input,
Pattern pattern)
Enabled: Determines if a prefix of a PatternMatcherInput instance matches a given pattern. |
boolean |
matchesPrefix(String input,
Pattern pattern)
Enabled: Determines if a prefix of a string matches a given pattern. |
void |
setMultiline(boolean multiline)
Enabled: Set whether or not subsequent calls to matches()
or contains() should treat the input as
consisting of multiple lines. |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
private static final char __EOS
private static final int __INITIAL_NUM_OFFSETS
private boolean __multiline
private boolean __lastSuccess
private char __previousChar
private char[] __input
private char[] __originalInput
private Perl5Repetition __currentRep
private int __numParentheses
private int __bol
private int __eol
private int __currentOffset
private int __endOffset
private char[] __program
private int __expSize
private int __inputOffset
private int __lastParen
private int[] __beginMatchOffsets
private int[] __endMatchOffsets
private Stack __stack
private Perl5MatchResult __lastMatchResult
private static final int __DEFAULT_LAST_MATCH_END_OFFSET
private int __lastMatchInputEndOffset
Constructor Detail |
public Perl5Matcher()
Method Detail |
private static boolean __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n)
private static int __findFirst(char[] input, int current, int endOffset, char[] mustString)
private void __pushState(int parenFloor)
private void __popState()
private void __initInterpreterGlobals(Perl5Pattern expression, char[] input, int beginOffset, int endOffset)
private void __setLastMatchResult()
private boolean __interpret(Perl5Pattern expression, char[] input, int beginOffset, int endOffset)
private boolean __tryExpression(Perl5Pattern expression, int offset)
private int __repeat(int offset, int max)
private boolean __match(int offset)
public void setMultiline(boolean multiline)
matches()
or contains()
should treat the input as
consisting of multiple lines. The default behavior is for
input to be treated as consisting of multiple lines. This method
should only be called if the Perl5Pattern used for a match was
compiled without either of the Perl5Compiler.MULTILINE_MASK or
Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the
behavior of how the ^, $, and . metacharacters are
interpreted on the fly. The compilation options used when compiling
a pattern ALWAYS override the behavior specified by setMultiline(). See
Perl5Compiler
for more details.
multiline
- If set to true treats the input as consisting of
multiple lines with respect to the ^ and $
metacharacters. If set to false treats the input as consisting
of a single line with respect to the ^ and $
metacharacters.public boolean isMultiline()
char[] _toLower(char[] input)
public boolean matchesPrefix(char[] input, Pattern pattern, int offset)
getMatch()
.
This method is useful for certain common token identification tasks that are made more difficult without this functionality.
matchesPrefix
in interface PatternMatcher
input
- The char[] to test for a prefix match.pattern
- The Pattern to be matched.offset
- The offset at which to start searching for the prefix.
public boolean matchesPrefix(char[] input, Pattern pattern)
getMatch()
.
This method is useful for certain common token identification tasks that are made more difficult without this functionality.
matchesPrefix
in interface PatternMatcher
input
- The char[] to test for a prefix match.pattern
- The Pattern to be matched.
public boolean matchesPrefix(String input, Pattern pattern)
getMatch()
.
This method is useful for certain common token identification tasks that are made more difficult without this functionality.
matchesPrefix
in interface PatternMatcher
input
- The String to test for a prefix match.pattern
- The Pattern to be matched.
public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern)
getMatch()
. Unlike the
contains(PatternMatcherInput, Pattern)
method, the current offset of the PatternMatcherInput argument
is not updated. However, unlike the
matches(PatternMatcherInput, Pattern)
method,
matchesPrefix() will start its search from the current offset
rather than the begin offset of the PatternMatcherInput.
This method is useful for certain common token identification tasks that are made more difficult without this functionality.
matchesPrefix
in interface PatternMatcher
input
- The PatternMatcherInput to test for a prefix match.pattern
- The Pattern to be matched.
public boolean matches(char[] input, Pattern pattern)
getMatch()
. The pattern must be
a Perl5Pattern instance, otherwise a ClassCastException will
be thrown. You are not required to, and indeed should NOT try to
(for performance reasons), catch a ClassCastException because it
will never be thrown as long as you use a Perl5Pattern as the pattern
parameter.
Note: matches() is not the same as sticking a ^ in front of your expression and a $ at the end of your expression in Perl5 and using the =~ operator, even though in many cases it will be equivalent. matches() literally looks for an exact match according to the rules of Perl5 expression matching. Therefore, if you have a pattern foo|foot and are matching the input foot it will not produce an exact match. But foot|foo will produce an exact match for either foot or foo. Remember, Perl5 regular expressions do not match the longest possible match. From the perlre manpage:
Alternatives are tried from left to right, so the first alternative found for which the entire expression matches, is the one that is chosen. This means that alternatives are not necessarily greedy. For example: when matching foo|foot against "barefoot", only the "foo" part will match, as that is the first alternative tried, and it successfully matches the target string.
matches
in interface PatternMatcher
input
- The char[] to test for an exact match.pattern
- The Perl5Pattern to be matched.
public boolean matches(String input, Pattern pattern)
getMatch()
. The pattern must be
a Perl5Pattern instance, otherwise a ClassCastException will
be thrown. You are not required to, and indeed should NOT try to
(for performance reasons), catch a ClassCastException because it
will never be thrown as long as you use a Perl5Pattern as the pattern
parameter.
Note: matches() is not the same as sticking a ^ in front of your expression and a $ at the end of your expression in Perl5 and using the =~ operator, even though in many cases it will be equivalent. matches() literally looks for an exact match according to the rules of Perl5 expression matching. Therefore, if you have a pattern foo|foot and are matching the input foot it will not produce an exact match. But foot|foo will produce an exact match for either foot or foo. Remember, Perl5 regular expressions do not match the longest possible match. From the perlre manpage:
Alternatives are tried from left to right, so the first alternative found for which the entire expression matches, is the one that is chosen. This means that alternatives are not necessarily greedy. For example: when matching foo|foot against "barefoot", only the "foo" part will match, as that is the first alternative tried, and it successfully matches the target string.
matches
in interface PatternMatcher
input
- The String to test for an exact match.pattern
- The Perl5Pattern to be matched.
public boolean matches(PatternMatcherInput input, Pattern pattern)
getMatch()
. Unlike the
contains(PatternMatcherInput, Pattern)
method, the current offset of the PatternMatcherInput argument
is not updated. You should remember that the region between
the begin (NOT the current) and end offsets of the PatternMatcherInput
will be tested for an exact match.
The pattern must be a Perl5Pattern instance, otherwise a ClassCastException will be thrown. You are not required to, and indeed should NOT try to (for performance reasons), catch a ClassCastException because it will never be thrown as long as you use a Perl5Pattern as the pattern parameter.
Note: matches() is not the same as sticking a ^ in front of your expression and a $ at the end of your expression in Perl5 and using the =~ operator, even though in many cases it will be equivalent. matches() literally looks for an exact match according to the rules of Perl5 expression matching. Therefore, if you have a pattern foo|foot and are matching the input foot it will not produce an exact match. But foot|foo will produce an exact match for either foot or foo. Remember, Perl5 regular expressions do not match the longest possible match. From the perlre manpage:
Alternatives are tried from left to right, so the first alternative found for which the entire expression matches, is the one that is chosen. This means that alternatives are not necessarily greedy. For example: when matching foo|foot against "barefoot", only the "foo" part will match, as that is the first alternative tried, and it successfully matches the target string.
matches
in interface PatternMatcher
input
- The PatternMatcherInput to test for a match.pattern
- The Perl5Pattern to be matched.
public boolean contains(String input, Pattern pattern)
getMatch()
. If you want to access
subsequent matches you should either use a PatternMatcherInput object
or use the offset information in the MatchResult to create a substring
representing the remaining input. Using the MatchResult offset
information is the recommended method of obtaining the parts of the
string preceeding the match and following the match.
The pattern must be a Perl5Pattern instance, otherwise a ClassCastException will be thrown. You are not required to, and indeed should NOT try to (for performance reasons), catch a ClassCastException because it will never be thrown as long as you use a Perl5Pattern as the pattern parameter.
contains
in interface PatternMatcher
input
- The String to test for a match.pattern
- The Perl5Pattern to be matched.
public boolean contains(char[] input, Pattern pattern)
getMatch()
. If you want to access
subsequent matches you should either use a PatternMatcherInput object
or use the offset information in the MatchResult to create a substring
representing the remaining input. Using the MatchResult offset
information is the recommended method of obtaining the parts of the
string preceeding the match and following the match.
The pattern must be a Perl5Pattern instance, otherwise a ClassCastException will be thrown. You are not required to, and indeed should NOT try to (for performance reasons), catch a ClassCastException because it will never be thrown as long as you use a Perl5Pattern as the pattern parameter.
contains
in interface PatternMatcher
input
- The char[] to test for a match.pattern
- The Perl5Pattern to be matched.
public boolean contains(PatternMatcherInput input, Pattern pattern)
getMatch()
. The current offset of the
PatternMatcherInput is set to the offset corresponding to the end
of the match, so that a subsequent call to this method will continue
searching where the last call left off. You should remember that the
region between the begin and end offsets of the PatternMatcherInput are
considered the input to be searched, and that the current offset
of the PatternMatcherInput reflects where a search will start from.
Matches extending beyond the end offset of the PatternMatcherInput
will not be matched. In other words, a match must occur entirely
between the begin and end offsets of the input. See
PatternMatcherInput
for more details.
As a side effect, if a match is found, the PatternMatcherInput match
offset information is updated. See the
PatternMatcherInput.setMatchOffsets(int, int)
method for more details.
The pattern must be a Perl5Pattern instance, otherwise a ClassCastException will be thrown. You are not required to, and indeed should NOT try to (for performance reasons), catch a ClassCastException because it will never be thrown as long as you use a Perl5Pattern as the pattern parameter.
This method is usually used in a loop as follows:
PatternMatcher matcher; PatternCompiler compiler; Pattern pattern; PatternMatcherInput input; MatchResult result; compiler = new Perl5Compiler(); matcher = new Perl5Matcher(); try { pattern = compiler.compile(somePatternString); } catch(MalformedPatternException e) { System.err.println("Bad pattern."); System.err.println(e.getMessage()); return; } input = new PatternMatcherInput(someStringInput); while(matcher.contains(input, pattern)) { result = matcher.getMatch(); // Perform whatever processing on the result you want. }
contains
in interface PatternMatcher
input
- The PatternMatcherInput to test for a match.pattern
- The Pattern to be matched.
public MatchResult getMatch()
getMatch
in interface PatternMatcher
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |