From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- .../gnu/java/text/AttributedFormatBuffer.java | 251 ++++++++++ .../classpath/gnu/java/text/BaseBreakIterator.java | 124 +++++ .../gnu/java/text/CharacterBreakIterator.java | 213 ++++++++ libjava/classpath/gnu/java/text/FormatBuffer.java | 136 ++++++ .../gnu/java/text/FormatCharacterIterator.java | 533 +++++++++++++++++++++ .../classpath/gnu/java/text/LineBreakIterator.java | 194 ++++++++ .../gnu/java/text/SentenceBreakIterator.java | 247 ++++++++++ .../gnu/java/text/StringFormatBuffer.java | 127 +++++ .../classpath/gnu/java/text/WordBreakIterator.java | 250 ++++++++++ libjava/classpath/gnu/java/text/package.html | 46 ++ 10 files changed, 2121 insertions(+) create mode 100644 libjava/classpath/gnu/java/text/AttributedFormatBuffer.java create mode 100644 libjava/classpath/gnu/java/text/BaseBreakIterator.java create mode 100644 libjava/classpath/gnu/java/text/CharacterBreakIterator.java create mode 100644 libjava/classpath/gnu/java/text/FormatBuffer.java create mode 100644 libjava/classpath/gnu/java/text/FormatCharacterIterator.java create mode 100644 libjava/classpath/gnu/java/text/LineBreakIterator.java create mode 100644 libjava/classpath/gnu/java/text/SentenceBreakIterator.java create mode 100644 libjava/classpath/gnu/java/text/StringFormatBuffer.java create mode 100644 libjava/classpath/gnu/java/text/WordBreakIterator.java create mode 100644 libjava/classpath/gnu/java/text/package.html (limited to 'libjava/classpath/gnu/java/text') diff --git a/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java b/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java new file mode 100644 index 000000000..2a89ae097 --- /dev/null +++ b/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java @@ -0,0 +1,251 @@ +/* AttributedFormatBuffer.java -- Implements an attributed FormatBuffer. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ +package gnu.java.text; + +import gnu.java.lang.CPStringBuilder; + +import java.text.AttributedCharacterIterator; +import java.util.ArrayList; +import java.util.HashMap; + +/** + * This class is an implementation of a FormatBuffer with attributes. + * Note that this class is not thread-safe; external synchronisation + * should be used if an instance is to be accessed from multiple threads. + * + * @author Guilhem Lavaux + * @date April 10, 2004 + */ +public class AttributedFormatBuffer implements FormatBuffer +{ + private final CPStringBuilder buffer; + private final ArrayList ranges; + private final ArrayList attributes; + private int[] a_ranges; + private HashMap[] a_attributes; + private int startingRange; + AttributedCharacterIterator.Attribute defaultAttr; + + /** + * This constructor accepts a StringBuffer. If the buffer contains + * already some characters they will not be attributed. + */ + public AttributedFormatBuffer(CPStringBuilder buffer) + { + this.buffer = new CPStringBuilder(buffer); + this.ranges = new ArrayList(); + this.attributes = new ArrayList(); + this.defaultAttr = null; + if (buffer.length() != 0) + { + this.startingRange = buffer.length(); + addAttribute(buffer.length(), null); + } + else + this.startingRange = -1; + } + + public AttributedFormatBuffer(int prebuffer) + { + this(new CPStringBuilder(prebuffer)); + } + + public AttributedFormatBuffer() + { + this(10); + } + + /** + * This method is a helper function for formatters. Given a set of ranges + * and attributes it adds exactly one attribute for the range of characters + * comprised between the last entry in 'ranges' and the specified new range. + * + * @param new_range A new range to insert in the list. + * @param attr A new attribute to insert in the list. + */ + private final void addAttribute(int new_range, AttributedCharacterIterator.Attribute attr) + { + HashMap map; + + if (attr != null) + { + map = new HashMap(); + map.put(attr, attr); + attributes.add(map); + } + else + attributes.add(null); + + ranges.add(new Integer(new_range)); + } + + public void append(String s) + { + if (startingRange < 0) + startingRange = 0; + buffer.append(s); + } + + public void append(String s, AttributedCharacterIterator.Attribute attr) + { + setDefaultAttribute(attr); + startingRange = buffer.length(); + append(s); + setDefaultAttribute(null); + } + + public void append(String s, int[] ranges, HashMap[] attrs) + { + int curPos = buffer.length(); + + setDefaultAttribute(null); + if (ranges != null) + { + for (int i = 0; i < ranges.length; i++) + { + this.ranges.add(new Integer(ranges[i] + curPos)); + this.attributes.add(attrs[i]); + } + } + startingRange = buffer.length(); + buffer.append(s); + } + + public void append(char c) + { + if (startingRange < 0) + startingRange = buffer.length(); + buffer.append(c); + } + + public void append(char c, AttributedCharacterIterator.Attribute attr) + { + setDefaultAttribute(attr); + buffer.append(c); + setDefaultAttribute(null); + } + + public void setDefaultAttribute(AttributedCharacterIterator.Attribute attr) + { + if (attr == defaultAttr) + return; + + int currentPos = buffer.length(); + + if (startingRange != currentPos && startingRange >= 0) + { + addAttribute(currentPos, defaultAttr); + } + defaultAttr = attr; + startingRange = currentPos; + } + + public AttributedCharacterIterator.Attribute getDefaultAttribute() + { + return defaultAttr; + } + + public void cutTail(int length) + { + buffer.setLength(buffer.length()-length); + } + + public int length() + { + return buffer.length(); + } + + public void clear() + { + buffer.setLength(0); + ranges.clear(); + attributes.clear(); + defaultAttr = null; + startingRange = -1; + } + + /** + * This method synchronizes the state of the attribute array. + * After calling it you may call {@link #getDefaultAttribute()}. + */ + public void sync() + { + if (startingRange < 0 || startingRange == buffer.length()) + return; + + addAttribute(buffer.length(), defaultAttr); + + a_ranges = new int[ranges.size()]; + for (int i = 0; i < a_ranges.length; i++) + a_ranges[i] = ((Integer)(ranges.get (i))).intValue(); + + a_attributes = new HashMap[attributes.size()]; + System.arraycopy(attributes.toArray(), 0, a_attributes, 0, a_attributes.length); + } + + /** + * This method returns the internal CPStringBuilder describing + * the attributed string. + * + * @return An instance of CPStringBuilder which contains the string. + */ + public CPStringBuilder getBuffer() + { + return buffer; + } + + /** + * This method returns the ranges for the attributes. + * + * @return An array of int describing the ranges. + */ + public int[] getRanges() + { + return a_ranges; + } + + /** + * This method returns the array containing the map on the + * attributes. + * + * @return An array of {@link java.util.Map} containing the attributes. + */ + public HashMap[] getAttributes() + { + return a_attributes; + } +} diff --git a/libjava/classpath/gnu/java/text/BaseBreakIterator.java b/libjava/classpath/gnu/java/text/BaseBreakIterator.java new file mode 100644 index 000000000..b69f698a1 --- /dev/null +++ b/libjava/classpath/gnu/java/text/BaseBreakIterator.java @@ -0,0 +1,124 @@ +/* BaseBreakIterator.java -- Base class for default BreakIterators + Copyright (C) 1999, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.text; + +import java.text.BreakIterator; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; + +/** + * @author Tom Tromey + * @date March 22, 1999 + */ + +public abstract class BaseBreakIterator extends BreakIterator +{ + public BaseBreakIterator () + { + // It isn't documented, but break iterators are created in a + // working state; their methods won't throw exceptions before + // setText(). + iter = new StringCharacterIterator(""); + } + + public int current () + { + return iter.getIndex(); + } + + public int first () + { + iter.first(); + return iter.getBeginIndex(); + } + + /** + * Return the first boundary after pos. + * This has the side effect of setting the index of the + * CharacterIterator. + */ + public int following (int pos) + { + iter.setIndex(pos); + int r = next (); + return r; + } + + public CharacterIterator getText () + { + return iter; + } + + public int last () + { + iter.last(); + // Go past the last character. + iter.next(); + return iter.getEndIndex(); + } + + public int next (int n) + { + int r = iter.getIndex (); + if (n > 0) + { + while (n > 0 && r != DONE) + { + r = next (); + --n; + } + } + else if (n < 0) + { + while (n < 0 && r != DONE) + { + r = previous (); + ++n; + } + } + return r; + } + + public void setText (CharacterIterator newText) + { + iter = newText; + } + + protected CharacterIterator iter; +} diff --git a/libjava/classpath/gnu/java/text/CharacterBreakIterator.java b/libjava/classpath/gnu/java/text/CharacterBreakIterator.java new file mode 100644 index 000000000..565eb9b9d --- /dev/null +++ b/libjava/classpath/gnu/java/text/CharacterBreakIterator.java @@ -0,0 +1,213 @@ +/* CharacterBreakIterator.java - Default character BreakIterator. + Copyright (C) 1999, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.text; + +import java.text.CharacterIterator; + +/** + * @author Tom Tromey + * @date March 19, 1999 + * Written using The Unicode Standard, Version 2.0. + */ + +public class CharacterBreakIterator extends BaseBreakIterator +{ + // Hangul Jamo constants from Unicode book. + private static final int LBase = 0x1100; + private static final int VBase = 0x1161; + private static final int TBase = 0x11a7; + private static final int LCount = 19; + private static final int VCount = 21; + private static final int TCount = 28; + + // Information about surrogates. + private static final int highSurrogateStart = 0xD800; + private static final int highSurrogateEnd = 0xDBFF; + private static final int lowSurrogateStart = 0xDC00; + private static final int lowSurrogateEnd = 0xDFFF; + + public Object clone () + { + return new CharacterBreakIterator (this); + } + + public CharacterBreakIterator () + { + } + + private CharacterBreakIterator (CharacterBreakIterator other) + { + iter = (CharacterIterator) other.iter.clone(); + } + + // Some methods to tell us different properties of characters. + private final boolean isL (char c) + { + return c >= LBase && c <= LBase + LCount; + } + private final boolean isV (char c) + { + return c >= VBase && c <= VBase + VCount; + } + private final boolean isT (char c) + { + return c >= TBase && c <= TBase + TCount; + } + private final boolean isLVT (char c) + { + return isL (c) || isV (c) || isT (c); + } + private final boolean isHighSurrogate (char c) + { + return c >= highSurrogateStart && c <= highSurrogateEnd; + } + private final boolean isLowSurrogate (char c) + { + return c >= lowSurrogateStart && c <= lowSurrogateEnd; + } + + public int next () + { + int end = iter.getEndIndex(); + if (iter.getIndex() == end) + return DONE; + + char c; + for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c) + { + c = iter.next(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR) + break; + + // Now we need some lookahead. + char ahead = iter.next(); + iter.previous(); + if (ahead == CharacterIterator.DONE) + break; + int aheadType = Character.getType(ahead); + + if (aheadType != Character.NON_SPACING_MARK + && ! isLowSurrogate (ahead) + && ! isLVT (ahead)) + break; + if (! isLVT (c) && isLVT (ahead)) + break; + if (isL (c) && ! isLVT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + if (isV (c) && ! isV (ahead) && !isT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + if (isT (c) && ! isT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + + if (! isHighSurrogate (c) && isLowSurrogate (ahead)) + break; + if (isHighSurrogate (c) && ! isLowSurrogate (ahead)) + break; + if (! isHighSurrogate (prev) && isLowSurrogate (c)) + break; + } + + return iter.getIndex(); + } + + public int previous () + { + if (iter.getIndex() == iter.getBeginIndex()) + return DONE; + + while (iter.getIndex() >= iter.getBeginIndex()) + { + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + if (type != Character.NON_SPACING_MARK + && ! isLowSurrogate (c) + && ! isLVT (c)) + break; + + // Now we need some lookahead. + char ahead = iter.previous(); + if (ahead == CharacterIterator.DONE) + { + iter.next(); + break; + } + char ahead2 = iter.previous(); + iter.next(); + iter.next(); + if (ahead2 == CharacterIterator.DONE) + break; + int aheadType = Character.getType(ahead); + + if (aheadType == Character.PARAGRAPH_SEPARATOR) + break; + + if (isLVT (c) && ! isLVT (ahead)) + break; + if (! isLVT (c) && type != Character.NON_SPACING_MARK + && isL (ahead)) + break; + if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK + && isV (ahead)) + break; + if (! isT (c) && type != Character.NON_SPACING_MARK + && isT (ahead)) + break; + + if (isLowSurrogate (c) && ! isHighSurrogate (ahead)) + break; + if (! isLowSurrogate (c) && isHighSurrogate (ahead)) + break; + if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2)) + break; + } + + return iter.getIndex(); + } +} diff --git a/libjava/classpath/gnu/java/text/FormatBuffer.java b/libjava/classpath/gnu/java/text/FormatBuffer.java new file mode 100644 index 000000000..590b16cce --- /dev/null +++ b/libjava/classpath/gnu/java/text/FormatBuffer.java @@ -0,0 +1,136 @@ +/* FormatBuffer.java -- General interface to build attributed strings. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ +package gnu.java.text; + +import java.text.AttributedCharacterIterator; +import java.util.HashMap; + +/** + * This interface describes a modifiable buffer which contains attributed + * characters. The implementation may or may not implements attributes. It + * aims to greatly simplify and clarify the implementation of java.text + * formatters. The buffer may be appended or have its tail cut. It may also + * be completely cleant up. + * + * @author Guilhem Lavaux + * @date April 10, 2004 + */ +public interface FormatBuffer +{ + /** + * This method appends a simple string to the buffer. This part of + * the buffer will be attributed using the default attribute. + * + * @param s The string to append to the buffer. + */ + public void append(String s); + + /** + * This method appends a simple string to the buffer. This part of + * the buffer will have the specified attribute (and only this one). + * The default attribute may be changed after calling this method. + * + * @param s The string to append to the buffer. + * @param attr Attribute to use for the string in the buffer. + */ + public void append(String s, AttributedCharacterIterator.Attribute attr); + + /** + * This method appends a simple string to the buffer. This part of + * the buffer will be attributed using the specified ranges and attributes. + * To have an example on how to specify ranges see {@link gnu.java.text.FormatCharacterIterator}. + * + * @param s The string to append to the buffer. + * @param ranges The ranges describing how the attributes should be applied + * to the string. + * @param attrs The attributes of the string in the buffer. + */ + public void append(String s, int[] ranges, HashMap[] attrs); + + /** + * This method appends a simple char to the buffer. This part of + * the buffer will be attributed using the default attribute. + * + * @param c The character to append to the buffer. + */ + public void append(char c); + + /** + * This method appends a simple character to the buffer. This part of + * the buffer will have the specified attribute (and only this one). + * The default attribute may be changed after calling this method. + * + * @param c The character to append to the buffer. + * @param attr Attribute to use for the character in the buffer. + */ + public void append(char c, AttributedCharacterIterator.Attribute attr); + + /** + * This method changes the current default attribute for the next string + * or character which will be appended to the buffer. + * + * @param attr The attribute which will be used by default. + */ + public void setDefaultAttribute(AttributedCharacterIterator.Attribute attr); + + /** + * This method returns the current default attribute for the buffer. + * + * @return The default attribute for the buffer. + */ + public AttributedCharacterIterator.Attribute getDefaultAttribute(); + + /** + * This method cuts the last characters of the buffer. The number of + * characters to cut is given by "length". + * + * @param length Number of characters to cut at the end of the buffer. + */ + public void cutTail(int length); + + /** + * This method resets completely the buffer. + */ + public void clear(); + + /** + * This method returns the number of character in the buffer. + * + * @return The number of character in the buffer. + */ + public int length(); +} diff --git a/libjava/classpath/gnu/java/text/FormatCharacterIterator.java b/libjava/classpath/gnu/java/text/FormatCharacterIterator.java new file mode 100644 index 000000000..889394ca4 --- /dev/null +++ b/libjava/classpath/gnu/java/text/FormatCharacterIterator.java @@ -0,0 +1,533 @@ +/* FormatCharacter.java -- Implementation of AttributedCharacterIterator for + formatters. + Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ +package gnu.java.text; + +import java.text.AttributedCharacterIterator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.Vector; + +/** + * This class should not be put public and it is only intended to the + * classes of the java.text package. Its aim is to build a segmented + * character iterator by appending strings and adding attributes to + * portions of strings. The code intends to do some optimization + * concerning memory consumption and attribute access but at the + * end it is only an AttributedCharacterIterator. + * + * @author Guilhem Lavaux + * @date November 22, 2003 + */ +public class FormatCharacterIterator implements AttributedCharacterIterator +{ + private String formattedString; + private int charIndex; + private int attributeIndex; + private int[] ranges; + private HashMap[] attributes; + private static final boolean DEBUG = false; + + /** + * This constructor builds an empty iterated strings. The attributes + * are empty and so is the string. However you may append strings + * and attributes to this iterator. + */ + public FormatCharacterIterator() + { + formattedString = ""; + ranges = new int[0]; + attributes = new HashMap[0]; + } + + /** + * This constructor take a string s, a set of ranges + * and the corresponding attributes. This is used to build an iterator. + * The array ranges should be formatted as follow: + * each element of ranges specifies the index in the string + * until which the corresponding map of attributes at the same position + * is applied. For example, if you have: + *
+   *   s = "hello";
+   *   ranges = new int[] { 2, 6 };
+   *   attributes = new HashMap[2];
+   * 
+ * "he" will have the attributes attributes[0], + * "llo" the attributes[1]. + */ + public FormatCharacterIterator (String s, int[] ranges, HashMap[] attributes) + { + formattedString = s; + this.ranges = ranges; + this.attributes = attributes; + } + + /* + * The following methods are inherited from AttributedCharacterIterator, + * and thus are already documented. + */ + + public Set getAllAttributeKeys() + { + if (attributes != null && attributes[attributeIndex] != null) + return attributes[attributeIndex].keySet(); + else + return new HashSet(); + } + + public Map getAttributes() + { + if (attributes != null && attributes[attributeIndex] != null) + return attributes[attributeIndex]; + else + return new HashMap(); + } + + public Object getAttribute (AttributedCharacterIterator.Attribute attrib) + { + if (attributes != null && attributes[attributeIndex] != null) + return attributes[attributeIndex].get (attrib); + else + return null; + } + + public int getRunLimit(Set reqAttrs) + { + if (attributes == null) + return formattedString.length(); + + int currentAttrIndex = attributeIndex; + Set newKeys; + + do + { + currentAttrIndex++; + if (currentAttrIndex == attributes.length) + return formattedString.length(); + if (attributes[currentAttrIndex] == null) + break; + newKeys = attributes[currentAttrIndex].keySet(); + } + while (newKeys.containsAll (reqAttrs)); + + return ranges[currentAttrIndex-1]; + } + + public int getRunLimit (AttributedCharacterIterator.Attribute attribute) + { + Set s = new HashSet(); + + s.add (attribute); + return getRunLimit (s); + } + + public int getRunLimit() + { + if (attributes == null) + return formattedString.length(); + if (attributes[attributeIndex] == null) + { + for (int i=attributeIndex+1;i 0) ? ranges[currentAttrIndex-1] : 0; + } + + public int getRunStart() + { + if (attributes == null) + return 0; + + if (attributes[attributeIndex] == null) + { + for (int i=attributeIndex;i>0;i--) + if (attributes[i] != null) + return ranges[attributeIndex-1]; + return 0; + } + + return getRunStart (attributes[attributeIndex].keySet()); + } + + public int getRunStart (AttributedCharacterIterator.Attribute attribute) + { + Set s = new HashSet(); + + s.add (attribute); + return getRunStart (s); + } + + public Object clone() + { + return new FormatCharacterIterator (formattedString, ranges, attributes); + } + + /* + * The following methods are inherited from CharacterIterator and thus + * are already documented. + */ + + public char current() + { + return formattedString.charAt (charIndex); + } + + public char first() + { + charIndex = 0; + attributeIndex = 0; + return formattedString.charAt (0); + } + + public int getBeginIndex() + { + return 0; + } + + public int getEndIndex() + { + return formattedString.length(); + } + + public int getIndex() + { + return charIndex; + } + + public char last() + { + charIndex = formattedString.length()-1; + if (attributes != null) + attributeIndex = attributes.length-1; + return formattedString.charAt (charIndex); + } + + public char next() + { + charIndex++; + if (charIndex >= formattedString.length()) + { + charIndex = getEndIndex(); + return DONE; + } + if (attributes != null) + { + if (charIndex >= ranges[attributeIndex]) + attributeIndex++; + } + return formattedString.charAt (charIndex); + } + + public char previous() + { + charIndex--; + if (charIndex < 0) + { + charIndex = 0; + return DONE; + } + + if (attributes != null) + { + if (charIndex < ranges[attributeIndex]) + attributeIndex--; + } + return formattedString.charAt (charIndex); + } + + public char setIndex (int position) + { + if (position < 0 || position > formattedString.length()) + throw new IllegalArgumentException ("position is out of range"); + + charIndex = position; + if (attributes != null) + { + for (attributeIndex=0;attributeIndex charIndex) + break; + attributeIndex--; + } + if (charIndex == formattedString.length()) + return DONE; + else + return formattedString.charAt (charIndex); + } + + /** + * This method merge the specified attributes and ranges with the + * internal tables. This method is in charge of the optimization + * of tables. Two following sets of attributes are never the same. + * + * @see #FormatCharacterIterator() + * + * @param attributes the new array attributes to apply to the string. + */ + public void mergeAttributes (HashMap[] attributes, int[] ranges) + { + Vector new_ranges = new Vector(); + Vector new_attributes = new Vector(); + int i = 0, j = 0; + + debug("merging " + attributes.length + " attrs"); + + while (i < this.ranges.length && j < ranges.length) + { + if (this.attributes[i] != null) + { + new_attributes.add (this.attributes[i]); + if (attributes[j] != null) + this.attributes[i].putAll (attributes[j]); + } + else + { + new_attributes.add (attributes[j]); + } + if (this.ranges[i] == ranges[j]) + { + new_ranges.add (new Integer (ranges[j])); + i++; + j++; + } + else if (this.ranges[i] < ranges[j]) + { + new_ranges.add (new Integer (this.ranges[i])); + i++; + } + else + { + new_ranges.add (new Integer (ranges[j])); + j++; + } + } + + if (i != this.ranges.length) + { + for (;inull the string will simply have no + * attributes. + */ + public void append (String text, HashMap local_attributes) + { + int[] new_ranges = new int[ranges.length+1]; + HashMap[] new_attributes = new HashMap[attributes.length+1]; + + formattedString += text; + System.arraycopy (attributes, 0, new_attributes, 0, attributes.length); + System.arraycopy (ranges, 0, new_ranges, 0, ranges.length); + new_ranges[ranges.length] = formattedString.length(); + new_attributes[attributes.length] = local_attributes; + + ranges = new_ranges; + attributes = new_attributes; + } + + /** + * This method appends a string without attributes. It is completely + * equivalent to call {@link #append(String,HashMap)} with local_attributes + * equal to null. + * + * @param text The string to append to the iterator. + */ + public void append (String text) + { + append (text, null); + } + + /** + * This method adds a set of attributes to a range of character. The + * bounds are always inclusive. In the case many attributes have to + * be added it is advised to directly use {@link #mergeAttributes([Ljava.util.HashMap;[I} + * + * @param attributes Attributes to merge into the iterator. + * @param range_start Lower bound of the range of characters which will receive the + * attribute. + * @param range_end Upper bound of the range of characters which will receive the + * attribute. + * + * @throws IllegalArgumentException if ranges are out of bounds. + */ + public void addAttributes(HashMap attributes, int range_start, int range_end) + { + if (range_start == 0) + mergeAttributes(new HashMap[] { attributes }, new int[] { range_end }); + else + mergeAttributes(new HashMap[] { null, attributes }, new int[] { range_start, range_end }); + } + + private void debug(String s) + { + if (DEBUG) + System.out.println(s); + } + + private void dumpTable() + { + int start_range = 0; + + if (!DEBUG) + return; + + System.out.println("Dumping internal table:"); + for (int i = 0; i < ranges.length; i++) + { + System.out.print("\t" + start_range + " => " + ranges[i] + ":"); + if (attributes[i] == null) + System.out.println("null"); + else + { + Set keyset = attributes[i].keySet(); + if (keyset != null) + { + Iterator keys = keyset.iterator(); + + while (keys.hasNext()) + System.out.print(" " + keys.next()); + } + else + System.out.println("keySet null"); + System.out.println(); + } + } + System.out.println(); + System.out.flush(); + } +} diff --git a/libjava/classpath/gnu/java/text/LineBreakIterator.java b/libjava/classpath/gnu/java/text/LineBreakIterator.java new file mode 100644 index 000000000..7e44121b0 --- /dev/null +++ b/libjava/classpath/gnu/java/text/LineBreakIterator.java @@ -0,0 +1,194 @@ +/* LineBreakIterator.java - Default word BreakIterator. + Copyright (C) 1999, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.text; + +import java.text.CharacterIterator; + +/** + * @author Tom Tromey + * @date March 22, 1999 + * Written using The Unicode Standard, Version 2.0. + */ + +public class LineBreakIterator extends BaseBreakIterator +{ + public Object clone () + { + return new LineBreakIterator (this); + } + + public LineBreakIterator () + { + } + + private LineBreakIterator (LineBreakIterator other) + { + iter = (CharacterIterator) other.iter.clone(); + } + + // Some methods to tell us different properties of characters. + private final boolean isNb (char c) + { + return (c == 0x00a0 // NO-BREAK SPACE + || c == 0x2011 // NON-BREAKING HYPHEN + || c == 0xfeff); // ZERO WITH NO-BREAK SPACE + } + private final boolean isClose (int type) + { + return (type == Character.END_PUNCTUATION + // Unicode book says "comma, period, ...", which I take to + // mean "Po" class. + || type == Character.OTHER_PUNCTUATION); + } + private final boolean isIdeo (char c) + { + return (c >= 0x3040 && c <= 0x309f // Hiragana + || c >= 0x30a0 && c <= 0x30ff // Katakana + || c >= 0x4e00 && c <= 0x9fff // Han + || c >= 0x3100 && c <= 0x312f); // Bopomofo + } + + public int next () + { + int end = iter.getEndIndex(); + if (iter.getIndex() == end) + return DONE; + + while (iter.getIndex() < end) + { + char c = iter.current(); + int type = Character.getType(c); + + char n = iter.next(); + + if (n == CharacterIterator.DONE + || type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Handle two cases where we must scan for non-spacing marks. + int start = iter.getIndex(); + if (type == Character.SPACE_SEPARATOR + || type == Character.START_PUNCTUATION + || isIdeo (c)) + { + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + if (type == Character.SPACE_SEPARATOR) + { + int nt = Character.getType(n); + if (nt != Character.NON_SPACING_MARK + && nt != Character.SPACE_SEPARATOR + && ! isNb (n)) + break; + } + else if (type == Character.START_PUNCTUATION) + { + if (isIdeo (n)) + { + // Open punctuation followed by non spacing marks + // and then ideograph does not have a break in + // it. So skip all this. + start = iter.getIndex(); + } + } + else + { + // Ideograph preceded this character. + if (isClose (Character.getType(n))) + break; + } + } + iter.setIndex(start); + } + + return iter.getIndex(); + } + + public int previous () + { + int start = iter.getBeginIndex(); + if (iter.getIndex() == start) + return DONE; + + while (iter.getIndex() >= start) + { + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + + int nt = Character.getType(n); + // Break after paragraph separators. + if (nt == Character.PARAGRAPH_SEPARATOR + || nt == Character.LINE_SEPARATOR) + break; + + // Skip non-spacing marks. + int init = iter.getIndex(); + while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK) + { + n = iter.previous(); + nt = Character.getType(n); + } + + if (nt == Character.SPACE_SEPARATOR + && type != Character.SPACE_SEPARATOR + && type != Character.NON_SPACING_MARK + && ! isNb (c)) + break; + if (! isClose (type) && isIdeo (n)) + break; + if (isIdeo (c) && nt != Character.START_PUNCTUATION) + break; + iter.setIndex(init); + } + + return iter.getIndex(); + } +} diff --git a/libjava/classpath/gnu/java/text/SentenceBreakIterator.java b/libjava/classpath/gnu/java/text/SentenceBreakIterator.java new file mode 100644 index 000000000..4da9df2ea --- /dev/null +++ b/libjava/classpath/gnu/java/text/SentenceBreakIterator.java @@ -0,0 +1,247 @@ +/* SentenceBreakIterator.java - Default sentence BreakIterator. + Copyright (C) 1999, 2001, 2002, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.text; + +import java.text.CharacterIterator; + +/** + * @author Tom Tromey + * @date March 23, 1999 + * Written using The Unicode Standard, Version 2.0. + */ + +public class SentenceBreakIterator extends BaseBreakIterator +{ + public Object clone () + { + return new SentenceBreakIterator (this); + } + + public SentenceBreakIterator () + { + } + + private SentenceBreakIterator (SentenceBreakIterator other) + { + iter = (CharacterIterator) other.iter.clone(); + } + + public int next () + { + int end = iter.getEndIndex(); + if (iter.getIndex() == end) + return DONE; + + while (iter.getIndex() < end) + { + char c = iter.current(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + // Always break after paragraph separator. + if (type == Character.PARAGRAPH_SEPARATOR) + break; + + if (c == '!' || c == '?') + { + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.next(); + // Skip (java) space, line and paragraph separators. + while (n != CharacterIterator.DONE && Character.isWhitespace(n)) + n = iter.next(); + + // There's always a break somewhere after `!' or `?'. + break; + } + + if (c == '.') + { + int save = iter.getIndex(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.next(); + // Skip (java) space, line and paragraph separators. + // We keep count because we need at least one for this period to + // represent a terminator. + int spcount = 0; + while (n != CharacterIterator.DONE && Character.isWhitespace(n)) + { + n = iter.next(); + ++spcount; + } + if (spcount > 0) + { + int save2 = iter.getIndex(); + // Skip over open puncutation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.START_PUNCTUATION) + n = iter.next(); + // Next character must not be lower case. + if (n == CharacterIterator.DONE + || ! Character.isLowerCase(n)) + { + iter.setIndex(save2); + break; + } + } + iter.setIndex(save); + } + } + + return iter.getIndex(); + } + + private final int previous_internal () + { + int start = iter.getBeginIndex(); + if (iter.getIndex() == start) + return DONE; + + while (iter.getIndex() >= start) + { + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + int nt = Character.getType(n); + + if (! Character.isLowerCase(c) + && (nt == Character.START_PUNCTUATION + || Character.isWhitespace(n))) + { + int save = iter.getIndex(); + int save_nt = nt; + char save_n = n; + // Skip open punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.START_PUNCTUATION) + n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + if (Character.isWhitespace(n)) + { + // Must have at least one (java) space after the `.'. + int save2 = iter.getIndex(); + while (n != CharacterIterator.DONE + && Character.isWhitespace(n)) + n = iter.previous(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.previous(); + if (n == CharacterIterator.DONE || n == '.') + { + // Communicate location of actual end. + period = iter.getIndex(); + iter.setIndex(save2); + break; + } + } + iter.setIndex(save); + nt = save_nt; + n = save_n; + } + + if (nt == Character.PARAGRAPH_SEPARATOR) + { + // Communicate location of actual end. + period = iter.getIndex(); + break; + } + else if (Character.isWhitespace(n) + || nt == Character.END_PUNCTUATION) + { + int save = iter.getIndex(); + // Skip (java) space, line and paragraph separators. + while (n != CharacterIterator.DONE + && Character.isWhitespace(n)) + n = iter.previous(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.previous(); + int here = iter.getIndex(); + iter.setIndex(save); + if (n == CharacterIterator.DONE || n == '!' || n == '?') + { + // Communicate location of actual end. + period = here; + break; + } + } + else if (n == '!' || n == '?') + { + // Communicate location of actual end. + period = iter.getIndex(); + break; + } + } + + return iter.getIndex(); + } + + public int previous () + { + // We want to skip over the first sentence end to the second one. + // However, at the end of the string we want the first end. + int here = iter.getIndex(); + period = here; + int first = previous_internal (); + if (here == iter.getEndIndex() || first == DONE) + return first; + iter.setIndex(period); + return previous_internal (); + } + + // This is used for communication between previous and + // previous_internal. + private int period; +} diff --git a/libjava/classpath/gnu/java/text/StringFormatBuffer.java b/libjava/classpath/gnu/java/text/StringFormatBuffer.java new file mode 100644 index 000000000..2367fccb3 --- /dev/null +++ b/libjava/classpath/gnu/java/text/StringFormatBuffer.java @@ -0,0 +1,127 @@ +/* StringFormatBuffer.java -- Implements FormatBuffer using StringBuffer. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ +package gnu.java.text; + +import java.text.AttributedCharacterIterator; +import java.util.HashMap; + +/** + * This class is an implementation of a FormatBuffer without attributes. + * + * @author Guilhem Lavaux + * @date April 10, 2004 + */ +public class StringFormatBuffer implements FormatBuffer +{ + private final StringBuffer buffer; + private AttributedCharacterIterator.Attribute defaultAttr; + + public StringFormatBuffer(int prebuffer) + { + buffer = new StringBuffer(prebuffer); + } + + public StringFormatBuffer(StringBuffer buffer) + { + this.buffer = buffer; + } + + public void append(String s) + { + buffer.append(s); + } + + public void append(String s, AttributedCharacterIterator.Attribute attr) + { + buffer.append(s); + } + + public void append(String s, int[] ranges, HashMap[] attrs) + { + buffer.append(s); + } + + public void append(char c) + { + buffer.append(c); + } + + public void append(char c, AttributedCharacterIterator.Attribute attr) + { + buffer.append(c); + } + + public void setDefaultAttribute(AttributedCharacterIterator.Attribute attr) + { + defaultAttr = attr; + } + + public AttributedCharacterIterator.Attribute getDefaultAttribute() + { + return defaultAttr; + } + + public void cutTail(int length) + { + buffer.setLength(buffer.length()-length); + } + + public int length() + { + return buffer.length(); + } + + public void clear() + { + buffer.setLength(0); + } + + /** + * This method returns the internal {@link java.lang.StringBuffer} which + * contains the string of character. + */ + public StringBuffer getBuffer() + { + return buffer; + } + + public String toString() + { + return buffer.toString(); + } + +} diff --git a/libjava/classpath/gnu/java/text/WordBreakIterator.java b/libjava/classpath/gnu/java/text/WordBreakIterator.java new file mode 100644 index 000000000..fded4bf26 --- /dev/null +++ b/libjava/classpath/gnu/java/text/WordBreakIterator.java @@ -0,0 +1,250 @@ +/* WordBreakIterator.java - Default word BreakIterator. + Copyright (C) 1999, 2001, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.text; + +import java.text.CharacterIterator; + +/** + * @author Tom Tromey + * @date March 22, 1999 + * Written using The Unicode Standard, Version 2.0. + */ + +public class WordBreakIterator extends BaseBreakIterator +{ + public Object clone () + { + return new WordBreakIterator (this); + } + + public WordBreakIterator () + { + } + + private WordBreakIterator (WordBreakIterator other) + { + iter = (CharacterIterator) other.iter.clone(); + } + + // Some methods to tell us different properties of characters. + private final boolean isHira (char c) + { + return c >= 0x3040 && c <= 0x309f; + } + private final boolean isKata (char c) + { + return c >= 0x30a0 && c <= 0x30ff; + } + private final boolean isHan (char c) + { + return c >= 0x4e00 && c <= 0x9fff; + } + + public int next () + { + int end = iter.getEndIndex(); + if (iter.getIndex() == end) + return DONE; + + while (iter.getIndex() < end) + { + char c = iter.current(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Break between letters and non-letters. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + boolean is_letter = Character.isLetter(c); + if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK + && Character.isLetter(n)) + break; + + // Always break after certain symbols, such as punctuation. + // This heuristic is derived from hints in the JCL book and is + // not part of Unicode. It seems to be right, however. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (c != '\'' + && (type == Character.DASH_PUNCTUATION + || type == Character.START_PUNCTUATION + || type == Character.END_PUNCTUATION + || type == Character.CONNECTOR_PUNCTUATION + || type == Character.OTHER_PUNCTUATION + || type == Character.MATH_SYMBOL + || type == Character.CURRENCY_SYMBOL + || type == Character.MODIFIER_SYMBOL + || type == Character.OTHER_SYMBOL + || type == Character.FORMAT + || type == Character.CONTROL)) + break; + + boolean is_hira = isHira (c); + boolean is_kata = isKata (c); + boolean is_han = isHan (c); + + // Special case Japanese. + if (! is_hira && ! is_kata && ! is_han + && type != Character.NON_SPACING_MARK + && (isHira (n) || isKata (n) || isHan (n))) + break; + + if (is_hira || is_kata || is_han || is_letter) + { + // Now we need to do some lookahead. We might need to do + // quite a bit of lookahead, so we save our position and + // restore it later. + int save = iter.getIndex(); + // Skip string of non spacing marks. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.next(); + if (n == CharacterIterator.DONE) + break; + if ((is_hira && ! isHira (n)) + || (is_kata && ! isHira (n) && ! isKata (n)) + || (is_han && ! isHira (n) && ! isHan (n)) + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + || (is_letter && ! Character.isLetter(n) && n != '\'')) + break; + iter.setIndex(save); + } + } + + return iter.getIndex(); + } + + public int previous () + { + int start = iter.getBeginIndex(); + if (iter.getIndex() == start) + return DONE; + + while (iter.getIndex() >= start) + { + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + + boolean is_hira = isHira (c); + boolean is_kata = isKata (c); + boolean is_han = isHan (c); + boolean is_letter = Character.isLetter(c); + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + int type = Character.getType(n); + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Break between letters and non-letters. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (n != '\'' && ! Character.isLetter(n) + && type != Character.NON_SPACING_MARK + && is_letter) + break; + + // Always break after certain symbols, such as punctuation. + // This heuristic is derived from hints in the JCL book and is + // not part of Unicode. It seems to be right, however. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (n != '\'' + && (type == Character.DASH_PUNCTUATION + || type == Character.START_PUNCTUATION + || type == Character.END_PUNCTUATION + || type == Character.CONNECTOR_PUNCTUATION + || type == Character.OTHER_PUNCTUATION + || type == Character.MATH_SYMBOL + || type == Character.CURRENCY_SYMBOL + || type == Character.MODIFIER_SYMBOL + || type == Character.OTHER_SYMBOL + || type == Character.FORMAT + || type == Character.CONTROL)) + break; + + // Special case Japanese. + if ((is_hira || is_kata || is_han) + && ! isHira (n) && ! isKata (n) && ! isHan (n) + && type != Character.NON_SPACING_MARK) + break; + + // We might have to skip over non spacing marks to see what's + // on the other side. + if (! is_hira || (! is_letter && c != '\'')) + { + int save = iter.getIndex(); + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.previous(); + iter.setIndex(save); + // This is a strange case: a bunch of non-spacing marks at + // the beginning. We treat the current location as a word + // break. + if (n == CharacterIterator.DONE) + break; + if ((isHira (n) && ! is_hira) + || (isKata (n) && ! is_hira && ! is_kata) + || (isHan (n) && ! is_hira && ! is_han) + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + || (! is_letter && c != '\'' && Character.isLetter(n))) + break; + } + } + + return iter.getIndex(); + } +} diff --git a/libjava/classpath/gnu/java/text/package.html b/libjava/classpath/gnu/java/text/package.html new file mode 100644 index 000000000..a1025a8e9 --- /dev/null +++ b/libjava/classpath/gnu/java/text/package.html @@ -0,0 +1,46 @@ + + + + +GNU Classpath - gnu.java.text + + +

+ + + -- cgit v1.2.3