summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java')
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java373
1 files changed, 373 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java
new file mode 100644
index 000000000..45ac181b3
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java
@@ -0,0 +1,373 @@
+/* ReaderTokenizer.java -- splits the input char sequence int tokens.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Reader splits the input char sequence into tokens.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class ReaderTokenizer
+ extends Constants
+{
+ /**
+ * This is set to true each time the getNextToken is called.
+ * Used in preventing loops when all patterns refuse to accept
+ * the invalid input.
+ */
+ protected boolean advanced;
+
+ /**
+ * If true, the returned tokens are also placed in the backup
+ * queue.
+ */
+ protected boolean backupMode;
+
+ /**
+ * The buffer to read document into.
+ */
+ Buffer buffer = new Buffer();
+
+ /**
+ * The queue for supporting mark().
+ */
+ Queue backup = new Queue();
+
+ /**
+ * The queue of found tokens.
+ */
+ Queue queue = new Queue();
+
+ /**
+ * The reader to read the document from.
+ */
+ Reader reader;
+
+ /**
+ * Array of char tokens
+ */
+ char[] charTokens;
+
+ /**
+ * Array of string tokens.
+ */
+ String[] stringTokens;
+
+ /**
+ * The current reader position.
+ */
+ int readerPosition = -1;
+
+ /**
+ * Creates a new ReaderTokenizer. The reset(...) method must be
+ * subsequently called to set the reader.
+ */
+ public ReaderTokenizer()
+ {
+ }
+
+ /**
+ * Return the sequence, used to separate lines in the document.
+ * @return one of \n, \r or \r\n.
+ */
+ public String getEndOfLineSequence()
+ {
+ return buffer.getEndOfLineSequence();
+ }
+
+ /**
+ * Get the next token.
+ * @return
+ */
+ public Token getNextToken()
+ {
+ Token rt;
+ advanced = true;
+ try
+ {
+ if (queue.isEmpty())
+ read(1);
+
+ if (!queue.isEmpty())
+ rt = queue.next();
+ else
+ rt = new Token(EOF, new Location(readerPosition));
+ }
+ catch (IOException ex)
+ {
+ throw new ParseException("IO Exception", ex);
+ }
+ if (backupMode)
+ backup.add(rt);
+ return rt;
+ }
+
+ /**
+ * Get a token, lying the given number of tokens
+ * ahead. getToken(0) will return the same token,
+ * what would be returned by getNextToken().
+ * getToken(..) does change the current position
+ * in the input stream. If the end of stream is
+ * reached, the EOF token is always returned.
+ */
+ public Token getTokenAhead(int ahead)
+ {
+ try
+ {
+ read(ahead - queue.size() + 1);
+ return queue.size() >= ahead ? queue.get(ahead) : eofToken();
+ }
+ catch (IOException ex)
+ {
+ throw new ParseException("IO Exception", ex);
+ }
+ }
+
+ /**
+ * Get a token, bein immediatley ahead.
+ * If the end of stream is
+ * reached, the EOF token is always returned.
+ * The method is equivalent calling getTokenAhead(0).
+ */
+ public Token getTokenAhead()
+ {
+ try
+ {
+ if (queue.isEmpty())
+ read(1);
+ if (!queue.isEmpty())
+ return queue.get(0);
+ else
+ return eofToken();
+ }
+ catch (IOException ex)
+ {
+ throw new ParseException("IO Exception", ex);
+ }
+ }
+
+ /**
+ * Invokes the error handler.
+ */
+ public void error(String msg, Token at)
+ {
+ System.out.println(msg);
+ }
+
+ /**
+ * Turns the backup mode on or off.
+ * It is possible to return where the mark(true) was last called
+ * by calling reset().
+ * @param mode True if it is required to save tokens, making
+ * returning to the current point possible.
+ */
+ public void mark(boolean mode)
+ {
+ backup.clear();
+ backupMode = mode;
+ }
+
+ /**
+ * Prepare for new parsing from the given stream.
+ * @param a_reader A reader to parse from.
+ */
+ public void reset(Reader a_reader)
+ {
+ reader = a_reader;
+ readerPosition = -1;
+ buffer.reset();
+ queue.clear();
+ }
+
+ /**
+ * Reset the internal cursor to the position where the mark()
+ * was last time called. Switches the backup mode off.
+ */
+ public void reset()
+ {
+ if (!backupMode)
+ throw new AssertionError("Call mark(true) before using reset()!");
+ backupMode = false;
+
+ // That is now in the queue, will be appended to the end of backup.
+ while (!queue.isEmpty())
+ backup.add(queue.next());
+
+ Queue t = queue;
+ queue = backup;
+ backup = t;
+ backup.clear();
+ }
+
+ /**
+ * Read the given number of the tokens. Add the needed number of EOF
+ * tokens if there are no more data in the stream.
+ * @param numberOfTokens The number of additional tokens to read.
+ */
+ void read(int numberOfTokens)
+ throws IOException
+ {
+ if (numberOfTokens <= 0)
+ return;
+
+ for (int i = 0; i < numberOfTokens; i++)
+ readToken();
+ }
+
+ /**
+ * Read next token from the reader, add it to the queue
+ */
+ void readToken()
+ throws IOException
+ {
+ Token t;
+ int ch;
+
+ enlarging:
+ while (true)
+ {
+ t = tokenMatches();
+ if (t != null)
+ break enlarging;
+ else
+ {
+ ch = reader.read();
+ readerPosition++;
+ if (ch == ETX)
+ ch = ' ';
+ if (ch < 0)
+ {
+ if (buffer.length() == 0)
+ {
+ queue.add(eofToken());
+ return;
+ }
+ else
+ {
+ if (buffer.charAt(buffer.length() - 1) != ETX)
+ buffer.append(ETX, readerPosition++);
+ else
+ {
+ // Discard terminating ETX
+ buffer.setLength(buffer.length() - 1);
+ if (buffer.length() > 0)
+ {
+ t = new Token(OTHER, buffer.toString(),
+ buffer.getLocation(0, buffer.length())
+ );
+ queue.add(t);
+ buffer.setLength(0);
+ }
+ return;
+ }
+ }
+ }
+ else
+ buffer.append((char) ch, readerPosition);
+ }
+ }
+ }
+
+ /**
+ * Check if the end of buffer matches one of the tokens. If it does,
+ * return this token and remove the token sequence from the end of
+ * buffer.
+ * @return The matching token.
+ */
+ Token tokenMatches()
+ {
+ Token rt = endMatches(buffer);
+ if (rt != null) // Remove the matched image
+ {
+ // Consume future character if it was an entity and the future
+ // character is semicolon.
+ if (rt.kind == ENTITY)
+ {
+ if (buffer.charAt(buffer.length() - 1) == ';')
+ buffer.setLength(buffer.length() - rt.getImage().length() - 1);
+ else
+ {
+ error("Missing closing semicolon for entity '" + rt.getImage() +
+ "'", rt
+ );
+ consumeBuffer(rt);
+ }
+ }
+ else
+ {
+ consumeBuffer(rt);
+ }
+ }
+
+ // If the buffer is not empty, some sequence does not match any tokens.
+ // Add it to the queue as "OTHER".
+ if (rt != null)
+ {
+ if (buffer.length() > 1)
+ {
+ String rest = buffer.toString();
+ rest = rest.substring(0, rest.length() - 1);
+
+ Token other =
+ new Token(OTHER, rest, buffer.getLocation(0, buffer.length));
+ queue.add(other);
+ consumeBuffer(other);
+ }
+ queue.add(rt);
+ }
+ return rt;
+ }
+
+ private void consumeBuffer(Token rt)
+ {
+ buffer.delete(buffer.length() - rt.getImage().length() - 1,
+ buffer.length() - 1
+ );
+ }
+
+ /**
+ * Create EOF token.
+ */
+ private Token eofToken()
+ {
+ return new Token(EOF, "#", new Location(readerPosition));
+ }
+}