diff options
Diffstat (limited to 'libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java')
-rw-r--r-- | libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java new file mode 100644 index 000000000..45ac181b3 --- /dev/null +++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java @@ -0,0 +1,373 @@ +/* ReaderTokenizer.java -- splits the input char sequence int tokens. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.javax.swing.text.html.parser.support.low; + +import java.io.IOException; +import java.io.Reader; + +/** + * Reader splits the input char sequence into tokens. + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class ReaderTokenizer + extends Constants +{ + /** + * This is set to true each time the getNextToken is called. + * Used in preventing loops when all patterns refuse to accept + * the invalid input. + */ + protected boolean advanced; + + /** + * If true, the returned tokens are also placed in the backup + * queue. + */ + protected boolean backupMode; + + /** + * The buffer to read document into. + */ + Buffer buffer = new Buffer(); + + /** + * The queue for supporting mark(). + */ + Queue backup = new Queue(); + + /** + * The queue of found tokens. + */ + Queue queue = new Queue(); + + /** + * The reader to read the document from. + */ + Reader reader; + + /** + * Array of char tokens + */ + char[] charTokens; + + /** + * Array of string tokens. + */ + String[] stringTokens; + + /** + * The current reader position. + */ + int readerPosition = -1; + + /** + * Creates a new ReaderTokenizer. The reset(...) method must be + * subsequently called to set the reader. + */ + public ReaderTokenizer() + { + } + + /** + * Return the sequence, used to separate lines in the document. + * @return one of \n, \r or \r\n. + */ + public String getEndOfLineSequence() + { + return buffer.getEndOfLineSequence(); + } + + /** + * Get the next token. + * @return + */ + public Token getNextToken() + { + Token rt; + advanced = true; + try + { + if (queue.isEmpty()) + read(1); + + if (!queue.isEmpty()) + rt = queue.next(); + else + rt = new Token(EOF, new Location(readerPosition)); + } + catch (IOException ex) + { + throw new ParseException("IO Exception", ex); + } + if (backupMode) + backup.add(rt); + return rt; + } + + /** + * Get a token, lying the given number of tokens + * ahead. getToken(0) will return the same token, + * what would be returned by getNextToken(). + * getToken(..) does change the current position + * in the input stream. If the end of stream is + * reached, the EOF token is always returned. + */ + public Token getTokenAhead(int ahead) + { + try + { + read(ahead - queue.size() + 1); + return queue.size() >= ahead ? queue.get(ahead) : eofToken(); + } + catch (IOException ex) + { + throw new ParseException("IO Exception", ex); + } + } + + /** + * Get a token, bein immediatley ahead. + * If the end of stream is + * reached, the EOF token is always returned. + * The method is equivalent calling getTokenAhead(0). + */ + public Token getTokenAhead() + { + try + { + if (queue.isEmpty()) + read(1); + if (!queue.isEmpty()) + return queue.get(0); + else + return eofToken(); + } + catch (IOException ex) + { + throw new ParseException("IO Exception", ex); + } + } + + /** + * Invokes the error handler. + */ + public void error(String msg, Token at) + { + System.out.println(msg); + } + + /** + * Turns the backup mode on or off. + * It is possible to return where the mark(true) was last called + * by calling reset(). + * @param mode True if it is required to save tokens, making + * returning to the current point possible. + */ + public void mark(boolean mode) + { + backup.clear(); + backupMode = mode; + } + + /** + * Prepare for new parsing from the given stream. + * @param a_reader A reader to parse from. + */ + public void reset(Reader a_reader) + { + reader = a_reader; + readerPosition = -1; + buffer.reset(); + queue.clear(); + } + + /** + * Reset the internal cursor to the position where the mark() + * was last time called. Switches the backup mode off. + */ + public void reset() + { + if (!backupMode) + throw new AssertionError("Call mark(true) before using reset()!"); + backupMode = false; + + // That is now in the queue, will be appended to the end of backup. + while (!queue.isEmpty()) + backup.add(queue.next()); + + Queue t = queue; + queue = backup; + backup = t; + backup.clear(); + } + + /** + * Read the given number of the tokens. Add the needed number of EOF + * tokens if there are no more data in the stream. + * @param numberOfTokens The number of additional tokens to read. + */ + void read(int numberOfTokens) + throws IOException + { + if (numberOfTokens <= 0) + return; + + for (int i = 0; i < numberOfTokens; i++) + readToken(); + } + + /** + * Read next token from the reader, add it to the queue + */ + void readToken() + throws IOException + { + Token t; + int ch; + + enlarging: + while (true) + { + t = tokenMatches(); + if (t != null) + break enlarging; + else + { + ch = reader.read(); + readerPosition++; + if (ch == ETX) + ch = ' '; + if (ch < 0) + { + if (buffer.length() == 0) + { + queue.add(eofToken()); + return; + } + else + { + if (buffer.charAt(buffer.length() - 1) != ETX) + buffer.append(ETX, readerPosition++); + else + { + // Discard terminating ETX + buffer.setLength(buffer.length() - 1); + if (buffer.length() > 0) + { + t = new Token(OTHER, buffer.toString(), + buffer.getLocation(0, buffer.length()) + ); + queue.add(t); + buffer.setLength(0); + } + return; + } + } + } + else + buffer.append((char) ch, readerPosition); + } + } + } + + /** + * Check if the end of buffer matches one of the tokens. If it does, + * return this token and remove the token sequence from the end of + * buffer. + * @return The matching token. + */ + Token tokenMatches() + { + Token rt = endMatches(buffer); + if (rt != null) // Remove the matched image + { + // Consume future character if it was an entity and the future + // character is semicolon. + if (rt.kind == ENTITY) + { + if (buffer.charAt(buffer.length() - 1) == ';') + buffer.setLength(buffer.length() - rt.getImage().length() - 1); + else + { + error("Missing closing semicolon for entity '" + rt.getImage() + + "'", rt + ); + consumeBuffer(rt); + } + } + else + { + consumeBuffer(rt); + } + } + + // If the buffer is not empty, some sequence does not match any tokens. + // Add it to the queue as "OTHER". + if (rt != null) + { + if (buffer.length() > 1) + { + String rest = buffer.toString(); + rest = rest.substring(0, rest.length() - 1); + + Token other = + new Token(OTHER, rest, buffer.getLocation(0, buffer.length)); + queue.add(other); + consumeBuffer(other); + } + queue.add(rt); + } + return rt; + } + + private void consumeBuffer(Token rt) + { + buffer.delete(buffer.length() - rt.getImage().length() - 1, + buffer.length() - 1 + ); + } + + /** + * Create EOF token. + */ + private Token eofToken() + { + return new Token(EOF, "#", new Location(readerPosition)); + } +} |