diff options
Diffstat (limited to 'libjava/classpath/java/util/regex')
-rw-r--r-- | libjava/classpath/java/util/regex/MatchResult.java | 81 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/Matcher.java | 611 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/Pattern.java | 271 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/PatternSyntaxException.java | 135 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/package.html | 46 |
5 files changed, 1144 insertions, 0 deletions
diff --git a/libjava/classpath/java/util/regex/MatchResult.java b/libjava/classpath/java/util/regex/MatchResult.java new file mode 100644 index 000000000..605873dd0 --- /dev/null +++ b/libjava/classpath/java/util/regex/MatchResult.java @@ -0,0 +1,81 @@ +/* MatchResult.java -- Result of a regular expression match. + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package java.util.regex; + +/** + * This interface represents the result of a regular expression match. + * It can be used to query the contents of the match, but not to modify + * them. + * @since 1.5 + */ +public interface MatchResult +{ + /** Returns the index just after the last matched character. */ + int end(); + + /** + * Returns the index just after the last matched character of the + * given sub-match group. + * @param group the sub-match group + */ + int end(int group); + + /** Returns the substring of the input which was matched. */ + String group(); + + /** + * Returns the substring of the input which was matched by the + * given sub-match group. + * @param group the sub-match group + */ + String group(int group); + + /** Returns the number of sub-match groups in the matching pattern. */ + int groupCount(); + + /** Returns the index of the first character of the match. */ + int start(); + + /** + * Returns the index of the first character of the given sub-match + * group. + * @param group the sub-match group + */ + int start(int group); +} diff --git a/libjava/classpath/java/util/regex/Matcher.java b/libjava/classpath/java/util/regex/Matcher.java new file mode 100644 index 000000000..be57471de --- /dev/null +++ b/libjava/classpath/java/util/regex/Matcher.java @@ -0,0 +1,611 @@ +/* Matcher.java -- Instance of a regular expression applied to a char sequence. + Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package java.util.regex; + +import gnu.java.lang.CPStringBuilder; + +import gnu.java.util.regex.CharIndexed; +import gnu.java.util.regex.RE; +import gnu.java.util.regex.REMatch; + +/** + * Instance of a regular expression applied to a char sequence. + * + * @since 1.4 + */ +public final class Matcher implements MatchResult +{ + private Pattern pattern; + private CharSequence input; + // We use CharIndexed as an input object to the getMatch method in order + // that /\G/ (the end of the previous match) may work. The information + // of the previous match is stored in the CharIndexed object. + private CharIndexed inputCharIndexed; + private int position; + private int appendPosition; + private REMatch match; + + /** + * The start of the region of the input on which to match. + */ + private int regionStart; + + /** + * The end of the region of the input on which to match. + */ + private int regionEnd; + + /** + * True if the match process should look beyond the + * region marked by regionStart to regionEnd when + * performing lookAhead, lookBehind and boundary + * matching. + */ + private boolean transparentBounds; + + /** + * The flags that affect the anchoring bounds. + * If {@link #hasAnchoringBounds()} is {@code true}, + * the match process will honour the + * anchoring bounds: ^, \A, \Z, \z and $. If + * {@link #hasAnchoringBounds()} is {@code false}, + * the anchors are ignored and appropriate flags, + * stored in this variable, are used to provide this + * behaviour. + */ + private int anchoringBounds; + + Matcher(Pattern pattern, CharSequence input) + { + this.pattern = pattern; + this.input = input; + this.inputCharIndexed = RE.makeCharIndexed(input, 0); + regionStart = 0; + regionEnd = input.length(); + transparentBounds = false; + anchoringBounds = 0; + } + + /** + * @param sb The target string buffer + * @param replacement The replacement string + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public Matcher appendReplacement (StringBuffer sb, String replacement) + throws IllegalStateException + { + assertMatchOp(); + sb.append(input.subSequence(appendPosition, + match.getStartIndex()).toString()); + sb.append(RE.getReplacement(replacement, match, + RE.REG_REPLACE_USE_BACKSLASHESCAPE)); + appendPosition = match.getEndIndex(); + return this; + } + + /** + * @param sb The target string buffer + */ + public StringBuffer appendTail (StringBuffer sb) + { + sb.append(input.subSequence(appendPosition, input.length()).toString()); + return sb; + } + + /** + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public int end () + throws IllegalStateException + { + assertMatchOp(); + return match.getEndIndex(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public int end (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.getEndIndex(group); + } + + public boolean find () + { + boolean first = (match == null); + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), + position, anchoringBounds); + if (match != null) + { + int endIndex = match.getEndIndex(); + // Are we stuck at the same position? + if (!first && endIndex == position) + { + match = null; + // Not at the end of the input yet? + if (position < input.length() - 1) + { + position++; + return find(position); + } + else + return false; + } + position = endIndex; + return true; + } + return false; + } + + /** + * @param start The index to start the new pattern matching + * + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public boolean find (int start) + { + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), + start, anchoringBounds); + if (match != null) + { + position = match.getEndIndex(); + return true; + } + return false; + } + + /** + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public String group () + { + assertMatchOp(); + return match.toString(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public String group (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.toString(group); + } + + /** + * @param replacement The replacement string + */ + public String replaceFirst (String replacement) + { + reset(); + // Semantics might not quite match + return pattern.getRE().substitute(input, replacement, position, + RE.REG_REPLACE_USE_BACKSLASHESCAPE); + } + + /** + * @param replacement The replacement string + */ + public String replaceAll (String replacement) + { + reset(); + return pattern.getRE().substituteAll(input, replacement, position, + RE.REG_REPLACE_USE_BACKSLASHESCAPE); + } + + public int groupCount () + { + return pattern.getRE().getNumSubs(); + } + + public boolean lookingAt () + { + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, regionStart, + anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, + anchoringBounds|RE.REG_FIX_STARTING_POSITION); + if (match != null) + { + if (match.getStartIndex() == 0) + { + position = match.getEndIndex(); + return true; + } + match = null; + } + return false; + } + + /** + * Attempts to match the entire input sequence against the pattern. + * + * If the match succeeds then more information can be obtained via the + * start, end, and group methods. + * + * @see #start() + * @see #end() + * @see #group() + */ + public boolean matches () + { + if (transparentBounds || (regionStart == 0 && regionEnd == input.length())) + match = pattern.getRE().getMatch(inputCharIndexed, regionStart, + anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX); + else + match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0, + anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION); + if (match != null) + { + if (match.getStartIndex() == 0) + { + position = match.getEndIndex(); + if (position == input.length()) + return true; + } + match = null; + } + return false; + } + + /** + * Returns the Pattern that is interpreted by this Matcher + */ + public Pattern pattern () + { + return pattern; + } + + /** + * Resets the internal state of the matcher, including + * resetting the region to its default state of encompassing + * the whole input. The state of {@link #hasTransparentBounds()} + * and {@link #hasAnchoringBounds()} are unaffected. + * + * @return a reference to this matcher. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #hasAnchoringBounds() + */ + public Matcher reset () + { + position = 0; + match = null; + regionStart = 0; + regionEnd = input.length(); + appendPosition = 0; + return this; + } + + /** + * Resets the internal state of the matcher, including + * resetting the region to its default state of encompassing + * the whole input. The state of {@link #hasTransparentBounds()} + * and {@link #hasAnchoringBounds()} are unaffected. + * + * @param input The new input character sequence. + * @return a reference to this matcher. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #hasAnchoringBounds() + */ + public Matcher reset (CharSequence input) + { + this.input = input; + this.inputCharIndexed = RE.makeCharIndexed(input, 0); + return reset(); + } + + /** + * @return the index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public int start () + throws IllegalStateException + { + assertMatchOp(); + return match.getStartIndex(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public int start (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.getStartIndex(group); + } + + /** + * @return True if and only if the matcher hit the end of input. + * @since 1.5 + */ + public boolean hitEnd() + { + return inputCharIndexed.hitEnd(); + } + + /** + * @return A string expression of this matcher. + */ + public String toString() + { + CPStringBuilder sb = new CPStringBuilder(); + sb.append(this.getClass().getName()) + .append("[pattern=").append(pattern.pattern()) + .append(" region=").append(regionStart).append(",").append(regionEnd) + .append(" anchoringBounds=").append(anchoringBounds == 0) + .append(" transparentBounds=").append(transparentBounds) + .append(" lastmatch=").append(match == null ? "" : match.toString()) + .append("]"); + return sb.toString(); + } + + private void assertMatchOp() + { + if (match == null) throw new IllegalStateException(); + } + + /** + * <p> + * Defines the region of the input on which to match. + * By default, the {@link Matcher} attempts to match + * the whole string (from 0 to the length of the input), + * but a region between {@code start} (inclusive) and + * {@code end} (exclusive) on which to match may instead + * be defined using this method. + * </p> + * <p> + * The behaviour of region matching is further affected + * by the use of transparent or opaque bounds (see + * {@link #useTransparentBounds(boolean)}) and whether or not + * anchors ({@code ^} and {@code $}) are in use + * (see {@link #useAnchoringBounds(boolean)}). With transparent + * bounds, the matcher is aware of input outside the bounds + * set by this method, whereas, with opaque bounds (the default) + * only the input within the bounds is used. The use of + * anchors are affected by this setting; with transparent + * bounds, anchors will match the beginning of the real input, + * while with opaque bounds they match the beginning of the + * region. {@link #useAnchoringBounds(boolean)} can be used + * to turn on or off the matching of anchors. + * </p> + * + * @param start the start of the region (inclusive). + * @param end the end of the region (exclusive). + * @return a reference to this matcher. + * @throws IndexOutOfBoundsException if either {@code start} or + * {@code end} are less than zero, + * if either {@code start} or + * {@code end} are greater than the + * length of the input, or if + * {@code start} is greater than + * {@code end}. + * @see #regionStart() + * @see #regionEnd() + * @see #hasTransparentBounds() + * @see #useTransparentBounds(boolean) + * @see #hasAnchoringBounds() + * @see #useAnchoringBounds(boolean) + * @since 1.5 + */ + public Matcher region(int start, int end) + { + int length = input.length(); + if (start < 0) + throw new IndexOutOfBoundsException("The start position was less than zero."); + if (start >= length) + throw new IndexOutOfBoundsException("The start position is after the end of the input."); + if (end < 0) + throw new IndexOutOfBoundsException("The end position was less than zero."); + if (end > length) + throw new IndexOutOfBoundsException("The end position is after the end of the input."); + if (start > end) + throw new IndexOutOfBoundsException("The start position is after the end position."); + reset(); + regionStart = start; + regionEnd = end; + return this; + } + + /** + * The start of the region on which to perform matches (inclusive). + * + * @return the start index of the region. + * @see #region(int,int) + * #see #regionEnd() + * @since 1.5 + */ + public int regionStart() + { + return regionStart; + } + + /** + * The end of the region on which to perform matches (exclusive). + * + * @return the end index of the region. + * @see #region(int,int) + * @see #regionStart() + * @since 1.5 + */ + public int regionEnd() + { + return regionEnd; + } + + /** + * Returns true if the bounds of the region marked by + * {@link #regionStart()} and {@link #regionEnd()} are + * transparent. When these bounds are transparent, the + * matching process can look beyond them to perform + * lookahead, lookbehind and boundary matching operations. + * By default, the bounds are opaque. + * + * @return true if the bounds of the matching region are + * transparent. + * @see #useTransparentBounds(boolean) + * @see #region(int,int) + * @see #regionStart() + * @see #regionEnd() + * @since 1.5 + */ + public boolean hasTransparentBounds() + { + return transparentBounds; + } + + /** + * Sets the transparency of the bounds of the region + * marked by {@link #regionStart()} and {@link #regionEnd()}. + * A value of {@code true} makes the bounds transparent, + * so the matcher can see beyond them to perform lookahead, + * lookbehind and boundary matching operations. A value + * of {@code false} (the default) makes the bounds opaque, + * restricting the match to the input region denoted + * by {@link #regionStart()} and {@link #regionEnd()}. + * + * @param transparent true if the bounds should be transparent. + * @return a reference to this matcher. + * @see #hasTransparentBounds() + * @see #region(int,int) + * @see #regionStart() + * @see #regionEnd() + * @since 1.5 + */ + public Matcher useTransparentBounds(boolean transparent) + { + transparentBounds = transparent; + return this; + } + + /** + * Returns true if the matcher will honour the use of + * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z}, + * {@code \z} and {@code $}. By default, the anchors + * are used. Note that the effect of the anchors is + * also affected by {@link #hasTransparentBounds()}. + * + * @return true if the matcher will attempt to match + * the anchoring bounds. + * @see #useAnchoringBounds(boolean) + * @see #hasTransparentBounds() + * @since 1.5 + */ + public boolean hasAnchoringBounds() + { + return anchoringBounds == 0; + } + + /** + * Enables or disables the use of the anchoring bounds: + * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and + * {@code $}. By default, their use is enabled. When + * disabled, the matcher will not attempt to match + * the anchors. + * + * @param useAnchors true if anchoring bounds should be used. + * @return a reference to this matcher. + * @since 1.5 + * @see #hasAnchoringBounds() + */ + public Matcher useAnchoringBounds(boolean useAnchors) + { + if (useAnchors) + anchoringBounds = 0; + else + anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL; + return this; + } + + /** + * Returns a read-only snapshot of the current state of + * the {@link Matcher} as a {@link MatchResult}. Any + * subsequent changes to this instance are not reflected + * in the returned {@link MatchResult}. + * + * @return a {@link MatchResult} instance representing the + * current state of the {@link Matcher}. + */ + public MatchResult toMatchResult() + { + Matcher snapshot = new Matcher(pattern, input); + if (match != null) + snapshot.match = (REMatch) match.clone(); + return snapshot; + } + +} diff --git a/libjava/classpath/java/util/regex/Pattern.java b/libjava/classpath/java/util/regex/Pattern.java new file mode 100644 index 000000000..7d1fc84b4 --- /dev/null +++ b/libjava/classpath/java/util/regex/Pattern.java @@ -0,0 +1,271 @@ +/* Pattern.java -- Compiled regular expression ready to be applied. + Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package java.util.regex; + +import gnu.java.util.regex.RE; +import gnu.java.util.regex.REException; +import gnu.java.util.regex.RESyntax; + +import java.io.Serializable; +import java.util.ArrayList; + + +/** + * Compiled regular expression ready to be applied. + * + * @since 1.4 + */ +public final class Pattern implements Serializable +{ + private static final long serialVersionUID = 5073258162644648461L; + + public static final int CANON_EQ = 128; + public static final int CASE_INSENSITIVE = 2; + public static final int COMMENTS = 4; + public static final int DOTALL = 32; + public static final int MULTILINE = 8; + public static final int UNICODE_CASE = 64; + public static final int UNIX_LINES = 1; + + private final String regex; + private final int flags; + + private final RE re; + + private Pattern (String regex, int flags) + throws PatternSyntaxException + { + this.regex = regex; + this.flags = flags; + + RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4; + int gnuFlags = 0; + gnuFlags |= RE.REG_ICASE_USASCII; + if ((flags & CASE_INSENSITIVE) != 0) + gnuFlags |= RE.REG_ICASE; + if ((flags & MULTILINE) != 0) + { + gnuFlags |= RE.REG_MULTILINE; + syntax = new RESyntax(syntax); + syntax.setLineSeparator(null); + } + if ((flags & DOTALL) != 0) + gnuFlags |= RE.REG_DOT_NEWLINE; + if ((flags & UNICODE_CASE) != 0) + gnuFlags &= ~RE.REG_ICASE_USASCII; + // not yet supported: + // if ((flags & CANON_EQ) != 0) gnuFlags = + + if ((flags & UNIX_LINES) != 0) + { + // Use a syntax set with \n for linefeeds? + syntax = new RESyntax(syntax); + syntax.setLineSeparator("\n"); + } + + if ((flags & COMMENTS) != 0) + { + gnuFlags |= RE.REG_X_COMMENTS; + } + + try + { + this.re = new RE(regex, gnuFlags, syntax); + } + catch (REException e) + { + PatternSyntaxException pse; + pse = new PatternSyntaxException(e.getMessage(), + regex, e.getPosition()); + pse.initCause(e); + throw pse; + } + } + + // package private accessor method + RE getRE() + { + return re; + } + + /** + * @param regex The regular expression + * + * @exception PatternSyntaxException If the expression's syntax is invalid + */ + public static Pattern compile (String regex) + throws PatternSyntaxException + { + return compile(regex, 0); + } + + /** + * @param regex The regular expression + * @param flags The match flags, a bit mask + * + * @exception PatternSyntaxException If the expression's syntax is invalid + * @exception IllegalArgumentException If bit values other than those + * corresponding to the defined match flags are set in flags + */ + public static Pattern compile (String regex, int flags) + throws PatternSyntaxException + { + // FIXME: check which flags are really accepted + if ((flags & ~0xEF) != 0) + throw new IllegalArgumentException (); + + return new Pattern (regex, flags); + } + + public int flags () + { + return this.flags; + } + + /** + * @param regex The regular expression + * @param input The character sequence to be matched + * + * @exception PatternSyntaxException If the expression's syntax is invalid + */ + public static boolean matches (String regex, CharSequence input) + { + return compile(regex).matcher(input).matches(); + } + + /** + * @param input The character sequence to be matched + */ + public Matcher matcher (CharSequence input) + { + return new Matcher(this, input); + } + + /** + * @param input The character sequence to be matched + */ + public String[] split (CharSequence input) + { + return split(input, 0); + } + + /** + * @param input The character sequence to be matched + * @param limit The result threshold + */ + public String[] split (CharSequence input, int limit) + { + Matcher matcher = new Matcher(this, input); + ArrayList<String> list = new ArrayList<String>(); + int empties = 0; + int count = 0; + int start = 0; + int end; + boolean matched = matcher.find(); + + while (matched && (limit <= 0 || count < limit - 1)) + { + ++count; + end = matcher.start(); + if (start == end) + empties++; + else + { + while (empties > 0) + { + list.add(""); + empties--; + } + + String text = input.subSequence(start, end).toString(); + list.add(text); + } + start = matcher.end(); + matched = matcher.find(); + } + + // We matched nothing. + if (!matched && count == 0) + return new String[] { input.toString() }; + + // Is the last token empty? + boolean emptyLast = (start == input.length()); + + // Can/Must we add empties or an extra last token at the end? + if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast)) + { + if (limit > list.size()) + { + int max = limit - list.size(); + empties = (empties > max) ? max : empties; + } + while (empties > 0) + { + list.add(""); + empties--; + } + } + + // last token at end + if (limit != 0 || (limit == 0 && !emptyLast)) + { + String t = input.subSequence(start, input.length()).toString(); + if ("".equals(t) && limit == 0) + { /* Don't add. */ } + else + list.add(t); + } + + return list.toArray(new String[list.size()]); + } + + public String pattern () + { + return regex; + } + + /** + * Return the regular expression used to construct this object. + * @specnote Prior to JDK 1.5 this method had a different behavior + * @since 1.5 + */ + public String toString() + { + return regex; + } +} diff --git a/libjava/classpath/java/util/regex/PatternSyntaxException.java b/libjava/classpath/java/util/regex/PatternSyntaxException.java new file mode 100644 index 000000000..db73d06e2 --- /dev/null +++ b/libjava/classpath/java/util/regex/PatternSyntaxException.java @@ -0,0 +1,135 @@ +/* PatternSyntaxException - Indicates illegal pattern for regular expression. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package java.util.regex; + +import gnu.java.lang.CPStringBuilder; + +/** + * Indicates illegal pattern for regular expression. + * Includes state to inspect the pattern and what and where the expression + * was not valid regular expression. + * @since 1.4 + */ +public class PatternSyntaxException extends IllegalArgumentException +{ + private static final long serialVersionUID = -3864639126226059218L; + + /** + * Human readable escription of the syntax error. + */ + private final String desc; + + /** + * The original pattern that contained the syntax error. + */ + private final String pattern; + + /** + * Index of the first character in the String that was probably invalid, + * or -1 when unknown. + */ + private final int index; + + /** + * Creates a new PatternSyntaxException. + * + * @param description Human readable escription of the syntax error. + * @param pattern The original pattern that contained the syntax error. + * @param index Index of the first character in the String that was + * probably invalid, or -1 when unknown. + */ + public PatternSyntaxException(String description, + String pattern, + int index) + { + super(description); + this.desc = description; + this.pattern = pattern; + this.index = index; + } + + /** + * Returns a human readable escription of the syntax error. + */ + public String getDescription() + { + return desc; + } + + /** + * Returns the original pattern that contained the syntax error. + */ + public String getPattern() + { + return pattern; + } + + /** + * Returns the index of the first character in the String that was probably + * invalid, or -1 when unknown. + */ + public int getIndex() + { + return index; + } + + /** + * Returns a string containing a line with the description, a line with + * the original pattern and a line indicating with a ^ which character is + * probably the first invalid character in the pattern if the index is not + * negative. + */ + public String getMessage() + { + String lineSep = System.getProperty("line.separator"); + CPStringBuilder sb = new CPStringBuilder(desc); + sb.append(lineSep); + sb.append('\t'); + sb.append(pattern); + if (index != -1) + { + sb.append(lineSep); + sb.append('\t'); + for (int i=0; i<index; i++) + sb.append(' '); + sb.append('^'); + } + return sb.toString(); + } + +} diff --git a/libjava/classpath/java/util/regex/package.html b/libjava/classpath/java/util/regex/package.html new file mode 100644 index 000000000..0573a360c --- /dev/null +++ b/libjava/classpath/java/util/regex/package.html @@ -0,0 +1,46 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- package.html - describes classes in java.util.regex package. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. --> + +<html> +<head><title>GNU Classpath - java.util.regex</title></head> + +<body> +<p>Regular expression patterns and matchers.</p> + +</body> +</html> |