15 files changed, 3705 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java
new file mode 100644
index 000000000..cdefb75c8
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java
@@ -0,0 +1,1532 @@
+/* Parser.java -- HTML parser.
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support;
+
+import gnu.java.lang.CPStringBuilder;
+
+import gnu.javax.swing.text.html.parser.htmlAttributeSet;
+import gnu.javax.swing.text.html.parser.htmlValidator;
+import gnu.javax.swing.text.html.parser.support.low.Constants;
+import gnu.javax.swing.text.html.parser.support.low.ParseException;
+import gnu.javax.swing.text.html.parser.support.low.ReaderTokenizer;
+import gnu.javax.swing.text.html.parser.support.low.Token;
+import gnu.javax.swing.text.html.parser.support.low.node;
+import gnu.javax.swing.text.html.parser.support.low.pattern;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import java.util.Comparator;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.Vector;
+
+import javax.swing.text.ChangedCharSetException;
+import javax.swing.text.SimpleAttributeSet;
+import javax.swing.text.html.HTML;
+import javax.swing.text.html.parser.AttributeList;
+import javax.swing.text.html.parser.DTD;
+import javax.swing.text.html.parser.DTDConstants;
+import javax.swing.text.html.parser.Element;
+import javax.swing.text.html.parser.Entity;
+import javax.swing.text.html.parser.TagElement;
+
+/**
+ * <p>A simple error-tolerant HTML parser that uses a DTD document
+ * to access data on the possible tokens, arguments and syntax.</p>
+ * <p> The parser reads an HTML content from a Reader and calls various
+ * notifying methods (which should be overridden in a subclass)
+ * when tags or data are encountered.</p>
+ * <p>Some HTML elements need no opening or closing tags. The
+ * task of this parser is to invoke the tag handling methods also when
+ * the tags are not explicitly specified and must be supposed using
+ * information, stored in the DTD.
+ * For  example, parsing the document
+ * <p>&lt;table&gt;&lt;tr&gt;&lt;td&gt;a&lt;td&gt;b&lt;td&gt;c&lt;/tr&gt; <br>
+ * will invoke exactly the handling methods exactly in the same order
+ * (and with the same parameters) as if parsing the document: <br>
+ * <em>&lt;html&gt;&lt;head&gt;&lt;/head&gt;&lt;body&gt;&lt;table&gt;&lt;
+ * tbody&gt;</em>&lt;tr&gt;&lt;td&gt;a<em>&lt;/td&gt;</em>&lt;td&gt;b<em>
+ * &lt;/td&gt;</em>&lt;td&gt;c<em>&lt;/td&gt;&lt;/tr&gt;</em>&lt;
+ * <em>/tbody&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;</em></p>
+ * (supposed tags are given in italics). The parser also supports
+ * obsolete elements of HTML syntax.<p>
+ * </p>
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Parser
+  extends ReaderTokenizer
+  implements DTDConstants
+{
+  /**
+   * The current html tag.
+   */
+  public Token hTag = new Token();
+
+  /**
+   * The document template description that will be used to parse the documents.
+   */
+  protected DTD dtd;
+
+  /**
+   * The value of this field determines whether or not the Parser will be
+   * strict in enforcing SGML compatibility. The default value is false,
+   * stating that the parser should do everything to parse and get at least
+   * some information even from the incorrectly written HTML input.
+   */
+  protected boolean strict;
+
+  /**
+   * This fields has positive values in preformatted tags.
+   */
+  protected int preformatted = 0;
+
+  /**
+   * The set of the document tags. This field is used for supporting
+   * markFirstTime().
+   */
+  private Set documentTags =
+    new TreeSet(new Comparator()
+      {
+        public int compare(Object a, Object b)
+        {
+          return ((String) a).compareToIgnoreCase((String) b);
+        }
+      }
+               );
+
+  /**
+  * The buffer to collect the incremental output like text or coment.
+  */
+  private final StringBuffer buffer = new StringBuffer();
+
+  /**
+   * The buffer to store the document title.
+   */
+  private final StringBuffer title = new StringBuffer();
+
+  /**
+   * The current token.
+   */
+  private Token t;
+
+  /**
+   * True means that the 'title' tag of this document has
+   * already been handled.
+   */
+  private boolean titleHandled;
+
+  /**
+   * True means that the 'title' tag is currently open and all
+   * text is also added to the title buffer.
+   */
+  private boolean titleOpen;
+
+  /**
+   * The attributes of the current HTML element.
+   * Package-private to avoid an accessor method.
+   */
+  htmlAttributeSet attributes =
+    htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
+
+  /**
+   * The validator, controlling the forcible closing of the tags that
+   * (in accordance to dtd) are not allowed in the current context.
+   */
+  private htmlValidator validator;
+
+  /**
+   * Provides the default values for parameters in the case when these
+   * values are defined in the DTD.
+   */
+  private parameterDefaulter defaulter;
+
+  /**
+   * The text pre-processor for handling line ends and tabs.
+   */
+  private textPreProcessor textProcessor = new textPreProcessor();
+
+  /**
+   * Creates a new Parser that uses the given
+   * {@link javax.swing.text.html.parser.DTD }. The only standard way
+   * to get an instance of DTD is to construct it manually, filling in
+   * all required fields.
+   * @param a_dtd The DTD to use. The parser behaviour after passing null
+   * as an argument is not documented and may vary between implementations.
+   */
+  public Parser(DTD a_dtd)
+  {
+    if (a_dtd == null)
+      dtd = gnu.javax.swing.text.html.parser.HTML_401F.getInstance();
+    else
+      dtd = a_dtd;
+
+    defaulter = new parameterDefaulter(dtd);
+
+    validator =
+      new htmlValidator(dtd)
+        {
+          /**
+           * Handles the error message. This method must be overridden to pass
+           * the message where required.
+           * @param msg The message text.
+           */
+          protected void s_error(String msg)
+          {
+            error(msg);
+          }
+
+          /**
+           * The method is called when the tag validator decides to close the
+           * tag on its own initiative. After reaching the end of stream,
+           * The tag validator closes all unclosed elements that are required
+           * to have the end (closing) tag.
+           *
+           * @param tElement The tag being fictionally (forcibly) closed.
+           */
+          protected void handleSupposedEndTag(Element tElement)
+          {
+            // The tag is cloned as the original tElement is the
+            // element from the starting tag - may be accidently used
+            // somewhere else.
+            TagElement tag = makeTag(tElement, true);
+            _handleEndTag_remaining(tag);
+          }
+
+          /**
+           * The method is called when the the tag validator decides to open
+           * the new tag on its own initiative. The tags, opened in this
+           * way, are HTML, HEAD and BODY. The attribute set is temporary
+           * assigned to the empty one, the previous value is
+           * restored before return.
+           *
+           * @param tElement The tag being fictionally (forcibly) closed.
+           */
+          protected void handleSupposedStartTag(Element tElement)
+          {
+            TagElement tag = makeTag(tElement, true);
+            htmlAttributeSet were = attributes;
+            attributes = htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
+            _handleStartTag(tag);
+            attributes = were;
+          }
+        };
+  }
+
+  /**
+   * Get the attributes of the current tag.
+   * @return The attribute set, representing the attributes of the current tag.
+   */
+  public SimpleAttributeSet getAttributes()
+  {
+    return new SimpleAttributeSet(attributes);
+  }
+
+  /**
+   * Invokes the error handler. The default method in this implementation
+   * delegates the call to handleError, also providing the current line.
+   */
+  public void error(String msg)
+  {
+    error(msg, getTokenAhead());
+  }
+
+  public void error(String msg, Token atToken)
+  {
+    if (atToken != null)
+      handleError(atToken.where.beginLine,
+                  msg + ": line " + atToken.where.beginLine +
+                  ", absolute pos " + atToken.where.startPosition
+                 );
+    else
+      handleError(0, msg);
+  }
+
+  /**
+   * Invokes the error handler. The default method in this implementation
+   * delegates the call to error (parm1+": '"+parm2+"'").
+   */
+  public void error(String msg, String invalid)
+  {
+    error(msg + ": '" + invalid + "'");
+  }
+
+  /**
+   * Invokes the error handler. The default method in this implementation
+   * delegates the call to error (parm1+" "+ parm2+" "+ parm3).
+   */
+  public void error(String parm1, String parm2, String parm3)
+  {
+    error(parm1 + " " + parm2 + " " + parm3);
+  }
+
+  /**
+   * Invokes the error handler. The default method in this implementation
+   * delegates the call to error (parm1+" "+ parm2+" "+ parm3+" "+ parm4).
+   */
+  public void error(String parm1, String parm2, String parm3, String parm4)
+  {
+    error(parm1 + " " + parm2 + " " + parm3 + " " + parm4);
+  }
+
+  public void flushAttributes()
+  {
+  }
+
+  /**
+   * Parse the HTML text, calling various methods in response to the
+   * occurence of the corresponding HTML constructions.
+   * @param reader The reader to read the source HTML from.
+   * @throws IOException If the reader throws one.
+   */
+  public synchronized void parse(Reader reader)
+                          throws IOException
+  {
+    reset(reader);
+    restart();
+    try
+      {
+        parseDocument();
+        validator.closeAll();
+      }
+    catch (ParseException ex)
+      {
+        if (ex != null)
+          {
+            error("Unable to continue parsing the document", ex.getMessage());
+
+            Throwable cause = ex.getCause();
+            if (cause instanceof IOException)
+              throw (IOException) cause;
+          }
+      }
+  }
+
+  /**
+   * Parses DTD markup declaration. Currently returns null without action.
+   * @return null.
+   * @throws IOException
+   */
+  public String parseDTDMarkup()
+                        throws IOException
+  {
+    return null;
+  }
+
+  /**
+   * Parse SGML insertion ( &lt;! ... &gt; ). When the
+   * the SGML insertion is found, this method is called, passing
+   * SGML in the string buffer as a parameter. The default method
+   * returns false without action and can be overridden to
+   * implement user - defined SGML support.
+   * <p>
+   * If you need more information about SGML insertions in HTML documents,
+   * the author suggests to read SGML tutorial on
+   * {@link http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html}.
+   * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
+   * Oxford University Press, 688 p, ISBN: 0198537379.
+   * </p>
+   * @param strBuff
+   * @return true if this is a valid DTD markup declaration.
+   * @throws IOException
+   */
+  public boolean parseMarkupDeclarations(StringBuffer strBuff)
+                                  throws IOException
+  {
+    return false;
+  }
+
+  /**
+   * Get the first line of the last parsed token.
+   */
+  protected int getCurrentLine()
+  {
+    return hTag.where.beginLine;
+  }
+
+  /**
+   * Read parseable character data, add to buffer.
+   * @param clearBuffer If true, buffer if filled by CDATA section,
+   * otherwise the section is appended to the existing content of the
+   * buffer.
+   *
+   * @throws ParseException
+   */
+  protected void CDATA(boolean clearBuffer)
+                throws ParseException
+  {
+    Token start = hTag = getTokenAhead();
+
+    if (clearBuffer)
+      buffer.setLength(0);
+
+    // Handle expected EOF.
+    if (start.kind == EOF)
+      return;
+
+    read:
+    while (true)
+      {
+        t = getTokenAhead();
+        if (t.kind == EOF)
+          {
+            error("unexpected eof", t);
+            break read;
+          }
+        else if (t.kind == BEGIN)
+          break read;
+        else if (t.kind == Constants.ENTITY)
+          {
+            resolveAndAppendEntity(t);
+            getNextToken();
+          }
+        else
+          {
+            append(t);
+            getNextToken();
+          }
+      }
+    hTag = new Token(start, getTokenAhead(0));
+    if (buffer.length() != 0)
+      _handleText();
+  }
+
+  /**
+  * Process Comment. This method skips till --> without
+  * taking SGML constructs into consideration.  The supported SGML
+  * constructs are handled separately.
+  */
+  protected void Comment()
+                  throws ParseException
+  {
+    buffer.setLength(0);
+
+    Token start = hTag = mustBe(BEGIN);
+    optional(WS);
+    mustBe(EXCLAMATION);
+    optional(WS);
+    mustBe(DOUBLE_DASH);
+
+    Token t;
+    Token last;
+
+    comment:
+    while (true)
+      {
+        t = getTokenAhead();
+        if (t.kind == EOF)
+          {
+            handleEOFInComment();
+            last = t;
+            break comment;
+          }
+        else if (COMMENT_END.matches(this))
+          {
+            mustBe(DOUBLE_DASH);
+            optional(WS);
+            last = mustBe(END);
+            break comment;
+          }
+        else if (COMMENT_TRIPLEDASH_END.matches(this))
+          {
+            mustBe(DOUBLE_DASH);
+            t = mustBe(NUMTOKEN);
+            if (t.getImage().equals("-"))
+              {
+                append(t);
+                last = mustBe(END);
+                break comment;
+              }
+            else
+              {
+                buffer.append("--");
+                append(t);
+                t = getTokenAhead();
+              }
+          }
+        else
+        /* The lllll-- can match as NUMTOKEN */
+        if ((t.getImage().endsWith("--")) &&
+            (
+              getTokenAhead(1).kind == END ||
+              (getTokenAhead(1).kind == WS && getTokenAhead(2).kind == END)
+            )
+           )
+          {
+            buffer.append(t.getImage().substring(0, t.getImage().length() - 2));
+
+            /* Skip the closing > that we have already checked. */
+            last = mustBe(t.kind);
+            break comment;
+          }
+        else
+          append(t);
+        mustBe(t.kind);
+      }
+    hTag = new Token(start, last);
+
+    // Consume any whitespace immediately following a comment.
+    optional(WS);
+    handleComment();
+  }
+
+  /**
+  * Read a script. The text, returned without any changes,
+  * is terminated only by the closing tag SCRIPT.
+  */
+  protected void Script()
+                 throws ParseException
+  {
+    Token name;
+
+    Token start = hTag = mustBe(BEGIN);
+    optional(WS);
+
+    name = mustBe(SCRIPT);
+
+    optional(WS);
+
+    restOfTag(false, name, start);
+
+    buffer.setLength(0);
+
+    while (!SCRIPT_CLOSE.matches(this))
+      {
+        append(getNextToken());
+      }
+
+    consume(SCRIPT_CLOSE);
+
+    _handleText();
+
+    endTag(false);
+    _handleEndTag(makeTagElement(name.getImage(), false));
+  }
+
+  /**
+  * Process SGML insertion that is not a comment.
+  */
+  protected void Sgml()
+               throws ParseException
+  {
+    if (COMMENT_OPEN.matches(this))
+      Comment();
+    else // skip till ">"
+      {
+        Token start = hTag = mustBe(BEGIN);
+        optional(WS);
+        mustBe(EXCLAMATION);
+
+        buffer.setLength(0);
+        read:
+        while (true)
+          {
+            t = getNextToken();
+            if (t.kind == Constants.ENTITY)
+              {
+                resolveAndAppendEntity(t);
+              }
+            else if (t.kind == EOF)
+              {
+                error("unexpected eof", t);
+                break read;
+              }
+            else if (t.kind == END)
+              break read;
+            else
+              append(t);
+          }
+
+        try
+          {
+            parseMarkupDeclarations(buffer);
+          }
+        catch (IOException ex)
+          {
+            error("Unable to parse SGML insertion: '" + buffer + "'",
+                  new Token(start, t)
+                 );
+          }
+      }
+    // Consume any whitespace that follows the Sgml insertion.
+    optional(WS);
+  }
+
+  /**
+  * Read a style definition. The text, returned without any changes,
+  * is terminated only by the closing tag STYLE.
+  */
+  protected void Style()
+                throws ParseException
+  {
+    Token name;
+
+    Token start = hTag = mustBe(BEGIN);
+    optional(WS);
+
+    name = mustBe(STYLE);
+
+    optional(WS);
+
+    restOfTag(false, name, start);
+
+    buffer.setLength(0);
+
+    while (!STYLE_CLOSE.matches(this))
+      {
+        append(getNextToken());
+      }
+
+    consume(STYLE_CLOSE);
+
+    _handleText();
+
+    endTag(false);
+    _handleEndTag(makeTagElement(name.getImage(), false));
+  }
+
+  /**
+   * Read a html tag.
+   */
+  protected void Tag()
+              throws ParseException
+  {
+    mark(true);
+
+    boolean closing = false;
+    Token name;
+    Token start = hTag = mustBe(BEGIN);
+
+    optional(WS);
+    name = getNextToken();
+    optional(WS);
+
+    if (name.kind == SLASH)
+      {
+        closing = true;
+        name = getNextToken();
+      }
+
+    restOfTag(closing, name, start);
+  }
+
+  /**
+   * A hook, for operations, preceeding call to handleText.
+   * Handle text in a string buffer.
+   * In non - preformatted mode, all line breaks immediately following the
+   * start tag and immediately before an end tag is discarded,
+   * \r, \n and \t are replaced by spaces, multiple space are replaced
+   * by the single one and the result is  moved into array,
+   * passing it  to handleText().
+   */
+  protected void _handleText()
+  {
+    char[] text;
+
+    if (preformatted > 0)
+      text = textProcessor.preprocessPreformatted(buffer);
+    else
+      text = textProcessor.preprocess(buffer);
+
+    if (text != null && text.length > 0
+        // According to the specs we need to discard whitespace immediately
+        // before a closing tag.
+        && (text.length > 1 || text[0] != ' ' || ! TAG_CLOSE.matches(this)))
+      {
+        TagElement pcdata = new TagElement(dtd.getElement("#pcdata"));
+        attributes = htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
+        _handleEmptyTag(pcdata);
+
+        handleText(text);
+        if (titleOpen)
+          title.append(text);
+      }
+  }
+
+  /**
+   * Add the image of this token to the buffer.
+   * @param t A token to append.
+   */
+  protected final void append(Token t)
+  {
+    if (t.kind != EOF)
+      t.appendTo(buffer);
+  }
+
+  /**
+   * Consume pattern that must match.
+   * @param p A pattern to consume.
+   */
+  protected final void consume(pattern p)
+  {
+    node n;
+    for (int i = 0; i < p.nodes.length; i++)
+      {
+        n = p.nodes [ i ];
+        if (n.optional)
+          optional(n.kind);
+        else
+          mustBe(n.kind);
+      }
+  }
+
+  /**
+   * The method is called when the HTML end (closing) tag is found or if
+   * the parser concludes that the one should be present in the
+   * current position. The method is called immediatly
+   * before calling the handleEndTag().
+   * @param omitted True if the tag is no actually present in the document,
+   * but is supposed by the parser (like &lt;/html&gt; at the end of the
+   * document).
+   */
+  protected void endTag(boolean omitted)
+  {
+  }
+
+  /**
+   * Handle HTML comment. The default method returns without action.
+   * @param comment
+   */
+  protected void handleComment(char[] comment)
+  {
+  }
+
+  /**
+   * This is additionally called in when the HTML content terminates
+   * without closing the HTML comment. This can only happen if the
+   * HTML document contains errors (for example, the closing --;gt is
+   * missing.
+   */
+  protected void handleEOFInComment()
+  {
+    error("Unclosed comment");
+  }
+
+  /**
+   * Handle the tag with no content, like &lt;br&gt;. The method is
+   * called for the elements that, in accordance with the current DTD,
+   * has an empty content.
+   * @param tag The tag being handled.
+   * @throws javax.swing.text.ChangedCharSetException
+   */
+  protected void handleEmptyTag(TagElement tag)
+                         throws javax.swing.text.ChangedCharSetException
+  {
+  }
+
+  /**
+   * The method is called when the HTML closing tag ((like &lt;/table&gt;)
+   * is found or if the parser concludes that the one should be present
+   * in the current position.
+   * @param tag The tag
+   */
+  protected void handleEndTag(TagElement tag)
+  {
+  }
+
+  /* Handle error that has occured in the given line. */
+  protected void handleError(int line, String message)
+  {
+  }
+
+  /**
+   * The method is called when the HTML opening tag ((like &lt;table&gt;)
+   * is found or if the parser concludes that the one should be present
+   * in the current position.
+   * @param tag The tag
+   */
+  protected void handleStartTag(TagElement tag)
+  {
+  }
+
+  /**
+   * Handle the text section.
+   * <p> For non-preformatted section, the parser replaces
+   * \t, \r and \n by spaces and then multiple spaces
+   * by a single space. Additionaly, all whitespace around
+   * tags is discarded.
+   * </p>
+   * <p> For pre-formatted text (inside TEXAREA and PRE), the parser preserves
+   * all tabs and spaces, but removes <b>one</b>  bounding \r, \n or \r\n,
+   * if it is present. Additionally, it replaces each occurence of \r or \r\n
+   * by a single \n.</p>
+   *
+   * @param text A section text.
+   */
+  protected void handleText(char[] text)
+  {
+  }
+
+  /**
+   * Handle HTML &lt;title&gt; tag. This method is invoked when
+   * both title starting and closing tags are already behind.
+   * The passed argument contains the concatenation of all
+   * title text sections.
+   * @param title The title text.
+   */
+  protected void handleTitle(char[] title)
+  {
+  }
+
+  /**
+   * Constructs the tag from the given element. In this implementation,
+   * this is defined, but never called.
+   * @return the tag
+   */
+  protected TagElement makeTag(Element element)
+  {
+    return makeTag(element, false);
+  }
+
+  /**
+   * Constructs the tag from the given element.
+   * @param the tag base {@link javax.swing.text.html.parser.Element}
+   * @param isSupposed true if the tag is not actually present in the
+   * html input, but the parser supposes that it should to occur in
+   * the current location.
+   * @return the tag
+   */
+  protected TagElement makeTag(Element element, boolean isSupposed)
+  {
+    return new TagElement(element, isSupposed);
+  }
+
+  /**
+   * This is called when the tag, representing the given element,
+   * occurs first time in the document.
+   * @param element
+   */
+  protected void markFirstTime(Element element)
+  {
+  }
+
+  /**
+   * Consume the token that was checked before and hence MUST be present.
+   * @param kind The kind of token to consume.
+   */
+  protected Token mustBe(int kind)
+  {
+    if (getTokenAhead().kind == kind)
+      return getNextToken();
+    else
+      {
+        String ei = "";
+        if (kind < 1000)
+          ei = " ('" + (char) kind + "') ";
+        throw new AssertionError("The token of kind " + kind + ei +
+                                 " MUST be here,"
+                                );
+      }
+  }
+
+  /**
+   * Handle attribute without value. The default method uses
+   * the only allowed attribute value from DTD.
+   * If the attribute is unknown or allows several values,
+   * the HTML.NULL_ATTRIBUTE_VALUE is used. The attribute with
+   * this value is added to the attribute set.
+   * @param element The name of element.
+   * @param attribute The name of attribute without value.
+   */
+  protected void noValueAttribute(String element, String attribute)
+  {
+    Object value = HTML.NULL_ATTRIBUTE_VALUE;
+
+    Element e = dtd.elementHash.get(element.toLowerCase());
+    if (e != null)
+      {
+        AttributeList attr = e.getAttribute(attribute);
+        if (attr != null)
+          {
+            Vector values = attr.values;
+            if (values != null && values.size() == 1)
+              value = values.get(0);
+          }
+      }
+    attributes.addAttribute(attribute, value);
+  }
+
+  /**
+   * Consume the optional token, if present.
+   * @param kind The kind of token to consume.
+   */
+  protected Token optional(int kind)
+  {
+    if (getTokenAhead().kind == kind)
+      return getNextToken();
+    else
+      return null;
+  }
+
+  /** Parse the html document. */
+  protected void parseDocument()
+                        throws ParseException
+  {
+    // Read up any initial whitespace.
+    optional(WS);
+    while (getTokenAhead().kind != EOF)
+      {
+        advanced = false;
+        if (TAG.matches(this))
+          Tag();
+        else if (COMMENT_OPEN.matches(this))
+          Comment();
+        else if (STYLE_OPEN.matches(this))
+          Style();
+        else if (SCRIPT_OPEN.matches(this))
+          Script();
+        else if (SGML.matches(this))
+          Sgml();
+        else
+          CDATA(true);
+
+        // Surely HTML error, treat as a text.
+        if (!advanced)
+          {
+            Token wrong = getNextToken();
+            error("unexpected '" + wrong.getImage() + "'", wrong);
+            buffer.setLength(0);
+            buffer.append(wrong.getImage());
+            _handleText();
+          }
+      }
+  }
+
+  /**
+   * Read the element attributes, adding them into attribute set.
+   * @param element The element name (needed to access attribute
+   * information in dtd).
+   */
+  protected void readAttributes(String element)
+  {
+    Token name;
+    Token value;
+    Token next;
+    String attrValue;
+
+    attributes = new htmlAttributeSet();
+
+    optional(WS);
+
+    attributeReading:
+      while (getTokenAhead().kind == NUMTOKEN)
+      {
+        name = getNextToken();
+        optional(WS);
+
+        next = getTokenAhead();
+        if (next.kind == EQ)
+          {
+            mustBe(EQ);
+            optional(WS);
+
+            next = getNextToken();
+
+            switch (next.kind)
+              {
+              case QUOT:
+
+                // read "quoted" attribute.
+                buffer.setLength(0);
+                readTillTokenE(QUOT);
+                attrValue = buffer.toString();
+                break;
+
+              case AP:
+
+                // read 'quoted' attribute.
+                buffer.setLength(0);
+                readTillTokenE(AP);
+                attrValue = buffer.toString();
+                break;
+
+              // read unquoted attribute.
+              case NUMTOKEN:
+                value = next;
+                optional(WS);
+
+                // Check maybe the opening quote is missing.
+                next = getTokenAhead();
+                if (bQUOTING.get(next.kind))
+                  {
+                    hTag = next;
+                    error("The value without opening quote is closed with '"
+                          + next.getImage() + "'");
+                    attrValue = value.getImage();
+                  }
+                else if (next.kind == SLASH || next.kind == OTHER)
+                // The slash and other characters (like %) in this context is
+                // treated as the ordinary
+                // character, not as a token. The character may be part of
+                // the unquoted URL.
+                  {
+                    CPStringBuilder image = new CPStringBuilder(value.getImage());
+                    while (next.kind == NUMTOKEN || next.kind == SLASH
+                           || next.kind == OTHER)
+                      {
+                        image.append(getNextToken().getImage());
+                        next = getTokenAhead();
+                      }
+                    attrValue = image.toString();
+                  }
+                else
+                  attrValue = value.getImage();
+                break;
+
+              case SLASH:
+                value = next;
+                optional(WS);
+
+                // Check maybe the opening quote is missing.
+                next = getTokenAhead();
+                if (bQUOTING.get(next.kind))
+                  {
+                    hTag = next;
+                    error("The value without opening quote is closed with '"
+                          + next.getImage() + "'");
+                    attrValue = value.getImage();
+                  }
+                else if (next.kind == NUMTOKEN || next.kind == SLASH)
+                // The slash in this context is treated as the ordinary
+                // character, not as a token. The slash may be part of
+                // the unquoted URL.
+                  {
+                    CPStringBuilder image = new CPStringBuilder(value.getImage());
+                    while (next.kind == NUMTOKEN || next.kind == SLASH)
+                      {
+                        image.append(getNextToken().getImage());
+                        next = getTokenAhead();
+                      }
+                    attrValue = image.toString();
+                  }
+                else
+                  attrValue = value.getImage();
+                break;
+              default:
+                break attributeReading;
+              }
+            attributes.addAttribute(name.getImage(), attrValue);
+            optional(WS);
+          }
+        else
+          // The '=' is missing: attribute without value.
+          {
+            noValueAttribute(element, name.getImage());
+          }
+      }
+  }
+
+  /**
+   * Return string, corresponding the given named entity. The name is passed
+   * with the preceeding &, but without the ending semicolon.
+   */
+  protected String resolveNamedEntity(final String a_tag)
+  {
+    // Discard &
+    if (!a_tag.startsWith("&"))
+      throw new AssertionError("Named entity " + a_tag +
+                               " must start witn '&'."
+                              );
+
+    String tag = a_tag.substring(1);
+
+    try
+      {
+        Entity entity = dtd.getEntity(tag);
+        if (entity != null)
+          return entity.getString();
+
+        entity = dtd.getEntity(tag.toLowerCase());
+
+        if (entity != null)
+          {
+            error("The name of this entity should be in lowercase", a_tag);
+            return entity.getString();
+          }
+      }
+    catch (IndexOutOfBoundsException ibx)
+      {
+        /* The error will be reported. */
+      }
+
+    error("Unknown named entity", a_tag);
+    return a_tag;
+  }
+
+  /**
+   * Return char, corresponding the given numeric entity.
+   * The name is passed with the preceeding &#, but without
+   * the ending semicolon.
+   */
+  protected char resolveNumericEntity(final String a_tag)
+  {
+    // Discard &#
+    if (!a_tag.startsWith("&#"))
+      throw new AssertionError("Numeric entity " + a_tag +
+                               " must start witn '&#'."
+                              );
+
+    String tag = a_tag.substring(2);
+
+    try
+      {
+        // Determine the encoding type:
+        char cx = tag.charAt(0);
+        if (cx == 'x' || cx == 'X') // Hexadecimal &#Xnnn;
+
+          return (char) Integer.parseInt(tag.substring(1), 16);
+
+        return (char) Integer.parseInt(tag);
+      }
+
+    /* The error will be reported. */
+    catch (NumberFormatException nex)
+      {
+      }
+    catch (IndexOutOfBoundsException ix)
+      {
+      }
+
+    error("Invalid numeric entity", a_tag);
+    return '?';
+  }
+
+  /**
+   * Reset all fields into the intial default state, preparing the
+   * parset for parsing the next document.
+   */
+  protected void restart()
+  {
+    documentTags.clear();
+    titleHandled = false;
+    titleOpen = false;
+    buffer.setLength(0);
+    title.setLength(0);
+    validator.restart();
+  }
+
+  /**
+   * The method is called when the HTML opening tag ((like &lt;table&gt;)
+   * is found or if the parser concludes that the one should be present
+   * in the current position. The method is called immediately before
+   * calling the handleStartTag.
+   * @param tag The tag
+   */
+  protected void startTag(TagElement tag)
+                   throws ChangedCharSetException
+  {
+  }
+
+  /**
+   * Handle a complete element, when the tag content is already present in the
+   * buffer and both starting and heading tags behind. This is called
+   * in the case when the tag text must not be parsed for the nested
+   * elements (elements STYLE and SCRIPT).
+   */
+  private void _handleCompleteElement(TagElement tag)
+  {
+    _handleStartTag(tag);
+
+    // Suppress inclusion of the SCRIPT ans STYLE texts into the title.
+    HTML.Tag h = tag.getHTMLTag();
+    if (h == HTML.Tag.SCRIPT || h == HTML.Tag.STYLE)
+      {
+        boolean tmp = titleOpen;
+        titleOpen = false;
+        _handleText();
+        titleOpen = tmp;
+      }
+    else
+      _handleText();
+
+    _handleEndTag(tag);
+  }
+
+  /**
+   * A hooks for operations, preceeding call to handleEmptyTag().
+   * Handle the tag with no content, like &lt;br&gt;. As no any
+   * nested tags are expected, the tag validator is not involved.
+   * @param tag The tag being handled.
+   */
+  private void _handleEmptyTag(TagElement tag)
+  {
+    try
+      {
+        validator.validateTag(tag, attributes);
+        handleEmptyTag(tag);
+        HTML.Tag h = tag.getHTMLTag();
+        // When a block tag is closed, consume whitespace that follows after
+        // it.
+        // For some unknown reason a FRAME tag is not treated as block element.
+        // However in this case it should be treated as such.
+        if (isBlock(h))
+          optional(WS);
+      }
+    catch (ChangedCharSetException ex)
+      {
+        error("Changed charset exception:", ex.getMessage());
+      }
+  }
+
+  /**
+   * A hooks for operations, preceeding call to handleEndTag().
+   * The method is called when the HTML closing tag
+   * is found. Calls handleTitle after closing the 'title' tag.
+   * @param tag The tag
+   */
+  private void _handleEndTag(TagElement tag)
+  {
+    if (validator.closeTag(tag))
+       _handleEndTag_remaining(tag);
+  }
+
+  /**
+   * Actions that are also required if the closing action was
+   * initiated by the tag validator.
+   * Package-private to avoid an accessor method.
+   */
+  void _handleEndTag_remaining(TagElement tag)
+  {
+    HTML.Tag h = tag.getHTMLTag();
+
+    handleEndTag(tag);
+    endTag(tag.fictional());
+
+    if (h.isPreformatted())
+      preformatted--;
+    if (preformatted < 0)
+      preformatted = 0;
+
+    // When a block tag is closed, consume whitespace that follows after
+    // it.
+    if (isBlock(h))
+      optional(WS);
+
+    if (h == HTML.Tag.TITLE)
+      {
+        titleOpen = false;
+        titleHandled = true;
+
+        char[] a = new char[ title.length() ];
+        title.getChars(0, a.length, a, 0);
+        handleTitle(a);
+      }
+  }
+
+  /**
+   * A hooks for operations, preceeding call to handleStartTag().
+   * The method is called when the HTML opening tag ((like &lt;table&gt;)
+   * is found.
+   * Package-private to avoid an accessor method.
+   * @param tag The tag
+   */
+  void _handleStartTag(TagElement tag)
+  {
+    validator.openTag(tag, attributes);
+    startingTag(tag);
+    handleStartTag(tag);
+
+    HTML.Tag h = tag.getHTMLTag();
+
+    if (isBlock(h))
+      optional(WS);
+
+    if (h.isPreformatted())
+      preformatted++;
+
+    if (h == HTML.Tag.TITLE)
+      {
+        if (titleHandled)
+          error("Repetetive <TITLE> tag");
+        titleOpen = true;
+        titleHandled = false;
+      }
+  }
+
+  /**
+   * Resume parsing after heavy errors in HTML tag structure.
+   * @throws ParseException
+   */
+  private void forciblyCloseTheTag()
+                            throws ParseException
+  {
+    int closeAt = 0;
+    buffer.setLength(0);
+
+    ahead:
+    for (int i = 1; i < 100; i++)
+      {
+        t = getTokenAhead(i - 1);
+        if (t.kind == EOF || t.kind == BEGIN)
+          break ahead;
+        if (t.kind == END)
+          {
+            /* Closing '>' found. */
+            closeAt = i;
+            break ahead;
+          }
+      }
+    if (closeAt > 0)
+      {
+        buffer.append("Ignoring '");
+        for (int i = 1; i <= closeAt; i++)
+          {
+            t = getNextToken();
+            append(t);
+          }
+        buffer.append('\'');
+        error(buffer.toString());
+      }
+  }
+
+  /**
+   * Handle comment in string buffer. You can avoid allocating a char
+   * array each time by processing your comment directly here.
+   */
+  private void handleComment()
+  {
+    char[] a = new char[ buffer.length() ];
+    buffer.getChars(0, a.length, a, 0);
+    handleComment(a);
+  }
+
+  private TagElement makeTagElement(String name, boolean isSupposed)
+  {
+    Element e = dtd.elementHash.get(name.toLowerCase());
+    if (e == null)
+      {
+        error("Unknown tag <" + name + ">");
+        e = dtd.getElement(name);
+        e.name = name.toUpperCase();
+        e.index = -1;
+      }
+
+    if (!documentTags.contains(e.name))
+      {
+        markFirstTime(e);
+        documentTags.add(e.name);
+      }
+
+    return makeTag(e, isSupposed);
+  }
+
+  /**
+   * Read till the given token, resolving entities. Consume the given
+   * token without adding it to buffer.
+   * @param till The token to read till
+   * @throws ParseException
+   */
+  private void readTillTokenE(int till)
+                       throws ParseException
+  {
+    buffer.setLength(0);
+    read:
+    while (true)
+      {
+        t = getNextToken();
+        if (t.kind == Constants.ENTITY)
+          {
+            resolveAndAppendEntity(t);
+          }
+        else if (t.kind == EOF)
+          {
+            error("unexpected eof", t);
+            break read;
+          }
+        else if (t.kind == till)
+          break read;
+        else if (t.kind == WS)
+          {
+            // Processing whitespace in accordance with CDATA rules:
+            String s = t.getImage();
+            char c;
+            for (int i = 0; i < s.length(); i++)
+              {
+                c = s.charAt(i);
+                if (c == '\r')
+                  buffer.append(' '); // CR replaced by space
+                else if (c == '\n')
+                  { /* LF ignored */ }
+                else if (c == '\t')
+                  buffer.append(' '); // Tab replaced by space
+                else
+                  buffer.append(c);
+              }
+          }
+        else
+          append(t);
+      }
+  }
+
+  /**
+   * Resolve the entity and append it to the end of buffer.
+   * @param entity
+   */
+  private void resolveAndAppendEntity(Token entity)
+  {
+    switch (entity.category)
+      {
+        case ENTITY_NAMED :
+          buffer.append(resolveNamedEntity(entity.getImage()));
+          break;
+
+        case ENTITY_NUMERIC :
+          buffer.append(resolveNumericEntity(entity.getImage()));
+          break;
+
+        default :
+          throw new AssertionError("Invalid entity category " +
+                                   entity.category
+                                  );
+      }
+  }
+
+  /**
+   * Handle the remaining of HTML tags. This is a common end for
+   * TAG, SCRIPT and STYLE.
+   * @param closing True for closing tags ( &lt;/TAG&gt; ).
+   * @param name Name of element
+   * @param start Token where element has started
+   * @throws ParseException
+   */
+  private void restOfTag(boolean closing, Token name, Token start)
+                  throws ParseException
+  {
+    boolean end = false;
+    Token next;
+
+    optional(WS);
+
+    readAttributes(name.getImage());
+
+    optional(WS);
+
+    next = getTokenAhead();
+    if (next.kind == END)
+      {
+        mustBe(END);
+        end = true;
+      }
+
+    hTag = new Token(start, next);
+
+    if (!end)
+      {
+        // The tag body contains errors. If additionally the tag
+        // name is not valid, this construction is treated as text.
+        if (dtd.elementHash.get(name.getImage().toLowerCase()) == null &&
+            backupMode
+           )
+          {
+            error("Errors in tag body and unknown tag name. " +
+                  "Treating the tag as a text."
+                 );
+            reset();
+
+            hTag = mustBe(BEGIN);
+            buffer.setLength(0);
+            buffer.append(hTag.getImage());
+            CDATA(false);
+            return;
+          }
+        else
+          {
+            error("Forcibly closing invalid parameter list");
+            forciblyCloseTheTag();
+          }
+      }
+
+    if (closing)
+      {
+        endTag(false);
+        _handleEndTag(makeTagElement(name.getImage(), false));
+      }
+    else
+      {
+        TagElement te = makeTagElement(name.getImage(), false);
+        if (te.getElement().type == DTDConstants.EMPTY)
+          _handleEmptyTag(te);
+        else
+          {
+            // According to the specs we need to consume whitespace following
+            // immediately after a opening tag.
+            optional(WS);
+            _handleStartTag(te);
+          }
+      }
+  }
+
+  /**
+   * This should fire additional actions in response to the
+   * ChangedCharSetException.  The current implementation
+   * does nothing.
+   * @param tag
+   */
+  private void startingTag(TagElement tag)
+  {
+    try
+      {
+        startTag(tag);
+      }
+    catch (ChangedCharSetException cax)
+      {
+        error("Invalid change of charset");
+      }
+  }
+
+  private void ws_error()
+  {
+    error("Whitespace here is not permitted");
+  }
+
+  /**
+   * Returns true when the specified tag should be considered a block tag
+   * wrt whitespace handling. We need this special handling, since there
+   * are a couple of tags that we must treat as block tags but which aren't
+   * officially block tags.
+   *
+   * @param tag the tag to check
+   * @return true when the specified tag should be considered a block tag
+   *         wrt whitespace handling
+   */
+  private boolean isBlock(HTML.Tag tag)
+  {
+    return tag.isBlock() || tag == HTML.Tag.STYLE || tag == HTML.Tag.FRAME;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/gnuStringIntMapper.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/gnuStringIntMapper.java
new file mode 100644
index 000000000..9cdf810dd
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/gnuStringIntMapper.java
@@ -0,0 +1,112 @@
+/* gnuStringIntMapper.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * A helper class, mapping between the strings and they unique integer
+ * identifiers.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public abstract class gnuStringIntMapper
+{
+  /**
+   * Maps argument integer values from DTDConstants into they string
+   * names. Initialized on demand.
+   */
+  private Map is_Map;
+
+  /**
+   * Maps argument string names into they integer values from DTDConstants.
+   * Initialized on demand.
+   */
+  private Map si_Map;
+
+  /**
+   *  Get string from id or null if no such id is present in the mapper.
+   */
+  public final String get(int id)
+  {
+    if (is_Map == null)
+      createTheMap();
+
+    return (String) is_Map.get(new Integer(id));
+  }
+
+  /** Get id from string or 0 if no such string is present in the mapper. */
+  public final int get(String id)
+  {
+    if (si_Map == null)
+      createTheMap();
+
+    Integer i = (Integer) si_Map.get(id);
+
+    return i != null ? i.intValue() : 0;
+  }
+
+  /**
+   * Create the mapping table for this mapper by adding the required
+   * String/int pairs. The method is invoked
+   * only once for each instance, after the first invocation of the any
+   * form of the <code>get</code> method. Use <code>add</code> to
+   * create a map for a concrete instance.
+   */
+  protected abstract void create();
+
+  /**
+   * Add an id/string pair to this mapper. This is called from
+   * the method <code>create</code> only.
+   */
+  protected void add(String name, int id)
+  {
+    Integer i = new Integer(id);
+    si_Map.put(name, i);
+    is_Map.put(i, name);
+  }
+
+  private void createTheMap()
+  {
+    is_Map = new HashMap();
+    si_Map = new TreeMap();
+    create();
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Buffer.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Buffer.java
new file mode 100644
index 000000000..a39330af8
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Buffer.java
@@ -0,0 +1,238 @@
+/* Buffer.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+/**
+ * A string buffer that additionally holds line and absolute postion
+ * information.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Buffer
+{
+  public static int INITIAL_SIZE = 2048;
+
+  /**
+   * True if the \n symbol has been seen.
+   */
+  public boolean n_seen;
+
+  /**
+   * True if the \r symbol has been seen.
+   */
+  public boolean r_seen;
+  char[] chr = new char[ INITIAL_SIZE ];
+  int[] line = new int[ INITIAL_SIZE ];
+  int[] position = new int[ INITIAL_SIZE ];
+
+  /**
+   * Current line.
+   */
+  int current_line = 0;
+
+  /**
+   * Point to the next free position.
+   */
+  int length;
+
+  public Buffer()
+  {
+  }
+
+  public Buffer(String content)
+  {
+    for (int i = 0; i < content.length(); i++)
+      {
+        append(content.charAt(i), i);
+      }
+  }
+
+  /**
+   * Get the characters into array.
+   * @param srcBegin From, inclusive
+   * @param srcEnd To, exclusive.
+   * @param dst Into
+   * @param dstBegin Offset.
+   */
+  public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
+  {
+    System.arraycopy(chr, srcBegin, dst, dstBegin, (srcEnd - srcBegin));
+  }
+
+  /**
+   * Return the sequence, used to separate lines in the document.
+   * @return one of \n, \r or \r\n.
+   */
+  public String getEndOfLineSequence()
+  {
+    if (r_seen && n_seen)
+      return "\r\n";
+    else if (r_seen)
+      return "\r";
+    else
+
+      // This also is returned for single-line document.
+      return "\n";
+  }
+
+  /**
+   * Truncate.
+   * @param n The length to truncate till.
+   */
+  public void setLength(int n)
+  {
+    length = n;
+  }
+
+  /**
+   * Get location information for the given region.
+   * @param from Region start, inclusive.
+   * @param to Region end, exclusive.
+   * @return The location, covering the region.
+   */
+  public Location getLocation(int from, int to)
+  {
+    Location l = new Location();
+    l.beginLine = line [ from ];
+    l.endLine = line [ to - 1 ];
+
+    l.startPosition = position [ from ];
+    l.endPosition = position [ to - 1 ] + 1;
+
+    return l;
+  }
+
+  /**
+   * Add the character.
+   * @param c The character.
+   * @param pos The character position in the stream (the line number
+   * is handled internally in the buffer).
+   */
+  public void append(char c, int pos)
+  {
+    if (length >= chr.length)
+      expand();
+    chr [ length ] = c;
+    position [ length ] = pos;
+
+    if (c == '\n')
+      {
+        if (!r_seen)
+          current_line++;
+        n_seen = true;
+      }
+    else if (c == '\r')
+      {
+        current_line++;
+        r_seen = true;
+      }
+
+    line [ length ] = current_line;
+
+    length++;
+  }
+
+  /**
+   * Return char at the given positon.
+   */
+  public char charAt(int i)
+  {
+    return chr [ i ];
+  }
+
+  /**
+   * Delete the range
+   * @param from Start position, inclusive.
+   * @param to End position, exclusive.
+   */
+  public void delete(int from, int to)
+  {
+    int len = to - from;
+    if (len < 1)
+      throw new AssertionError("Deleting " + from + " till " + to);
+
+    int tail = length - to;
+
+    System.arraycopy(chr, to, chr, from, tail);
+    System.arraycopy(position, to, position, from, tail);
+    System.arraycopy(line, to, line, from, tail);
+    length = length - len;
+  }
+
+  /**
+   * Double the buffer size.
+   */
+  public void expand()
+  {
+    int nSize = 2 * chr.length;
+
+    char[] nchr = new char[ nSize ];
+    int[] nposition = new int[ nSize ];
+    int[] nline = new int[ nSize ];
+
+    System.arraycopy(chr, 0, nchr, 0, chr.length);
+    System.arraycopy(position, 0, nposition, 0, position.length);
+    System.arraycopy(line, 0, nline, 0, line.length);
+
+    chr = nchr;
+    position = nposition;
+    line = nline;
+  }
+
+  /**
+   * Return length of the occupied part of the buffer.
+   */
+  public int length()
+  {
+    return length;
+  }
+
+  /**
+   * Prepare for parsing the new document.
+   */
+  public void reset()
+  {
+    setLength(0);
+    r_seen = n_seen = false;
+  }
+
+  public String toString()
+  {
+    return new String(chr, 0, length);
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java
new file mode 100644
index 000000000..5416582ad
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java
@@ -0,0 +1,433 @@
+/* Constants.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+import java.util.BitSet;
+
+/**
+ * The parser constants and operations, directly related to the parser
+ * constants.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Constants
+{
+  /* Single character tokens are reflected into they ASCII codes. */
+
+  /**
+   * Start of HTML token.
+   */
+  public static final int BEGIN = '<';
+
+  /**
+   * End of HTML token.
+   */
+  public static final int END = '>';
+
+  /**
+   * Exclamation (indicates SGML or comment).
+   */
+  public static final int EXCLAMATION = '!';
+
+  /**
+   * Slash (indicates closing tag).
+   */
+  public static final int SLASH = '/';
+
+  /**
+   * Equals sign.
+   */
+  public static final int EQ = '=';
+
+  /**
+   * Quoting sign.
+   */
+  public static final int AP = '\'';
+
+  /**
+   * Quoting sign.
+   */
+  public static final int QUOT = '"';
+
+  /* The numbers of other tokens start outside the ascii space. */
+  /* String tokens */
+
+  /**
+   * Double dash (--)
+   */
+  public static final int DOUBLE_DASH = 1000;
+
+  /**
+   * The STYLE tag (needs special handling).
+   */
+  public static final int STYLE = 1001;
+
+  /**
+   * The SCRIPT tag (needs special handling).
+   */
+  public static final int SCRIPT = 1002;
+
+  /* Pattern tokens */
+
+  /**
+   * HTML whitespace.
+   */
+  public static final int WS = 1003;
+
+  /**
+   * Named or numeric entity,
+   */
+  public static final int ENTITY = 1004;
+
+  /**
+   * Sequence of valid name characters (can start from digit).
+   */
+  public static final int NUMTOKEN = 1005;
+
+  /* Complex tokens */
+
+  /**
+   * Comment opening sequence.
+   */
+  public static final pattern COMMENT_OPEN =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION),
+                  new node(WS, true), new node(DOUBLE_DASH),
+                }
+               );
+
+  /**
+   * Comment closing sequence
+   */
+  public static final pattern COMMENT_END =
+    new pattern(new node[]
+                {
+                  new node(DOUBLE_DASH), new node(WS, true), new node(END)
+                }
+               );
+
+  /**
+   * Special case ---> (also is treated as end of comment).
+   */
+  public static final pattern COMMENT_TRIPLEDASH_END =
+    new pattern(new node[]
+                {
+                  new node(DOUBLE_DASH), new node(NUMTOKEN), new node(END)
+                }
+               );
+
+  /**
+   * STYLE element heading pattern.
+   */
+  public static final pattern STYLE_OPEN =
+    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(STYLE) });
+
+  /**
+   * SCRIPT element heading pattern.
+   */
+  public static final pattern SCRIPT_OPEN =
+    new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(SCRIPT) });
+
+  /**
+   * SGML element heading pattern.
+   */
+  public static final pattern SGML =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(EXCLAMATION)
+                }
+               );
+
+  /**
+   * SCRIPT element closing pattern.
+   */
+  public static final pattern SCRIPT_CLOSE =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(SLASH),
+                  new node(WS, true), new node(SCRIPT), new node(WS, true),
+                  new node(END)
+                }
+               );
+
+  /**
+   * STYLE element closing pattern.
+   */
+  public static final pattern STYLE_CLOSE =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(SLASH),
+                  new node(WS, true), new node(STYLE), new node(WS, true),
+                  new node(END)
+                }
+               );
+
+  /**
+   * Ordinary HTML tag heading pattern.
+   */
+  public static final pattern TAG =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(SLASH, true),
+                  new node(WS, true), new node(NUMTOKEN)
+                }
+               );
+
+  /**
+   * Ordinary HTML tag closing pattern.
+   */
+  public static final pattern TAG_CLOSE =
+    new pattern(new node[]
+                {
+                  new node(BEGIN), new node(WS, true), new node(SLASH),
+                  new node(WS, true), new node(NUMTOKEN)
+                }
+               );
+
+  /* Special tokens */
+
+  /**
+   * All other tokens.
+   */
+  public static final int OTHER = 1999;
+
+  /**
+   * The UNICODE "end of text" control code
+   */
+  static final char ETX = 3;
+
+  /**
+   * End of file.
+   */
+  public static final int EOF = ETX;
+
+  /* Character categories */
+
+  /**
+   * All single char tokens.
+   */
+  public static final BitSet bSINGLE_CHAR_TOKEN = new BitSet();
+
+  /**
+   * Non letters and non numbers, allowed in HTML names.
+   */
+  public static final BitSet bSPECIAL = new BitSet();
+
+  /**
+   * All letters, used in HTML names.
+   */
+  public static final BitSet bLETTER = new BitSet();
+
+  /**
+   * Digits.
+   */
+  public static final BitSet bDIGIT = new BitSet();
+
+  /**
+   * Both line breaks.
+   */
+  public static final BitSet bLINEBREAK = new BitSet();
+
+  /**
+   * All whitespace.
+   */
+  public static final BitSet bWHITESPACE = new BitSet();
+
+  /**
+   * Both quoting characters.
+   */
+  public static final BitSet bQUOTING = new BitSet();
+
+  /**
+   * Valid name characters.
+   */
+  public static final BitSet bNAME = new BitSet();
+
+  /* Entity subcategories */
+
+  /**
+   * Named entity.
+   */
+  public static final int ENTITY_NAMED = 1;
+
+  /**
+   * Numeric entity.
+   */
+  public static final int ENTITY_NUMERIC = 2;
+
+  static
+  {
+    bQUOTING.set(AP);
+    bQUOTING.set(QUOT);
+
+    bSINGLE_CHAR_TOKEN.set(BEGIN);
+    bSINGLE_CHAR_TOKEN.set(END);
+    bSINGLE_CHAR_TOKEN.set(EXCLAMATION);
+    bSINGLE_CHAR_TOKEN.set(SLASH);
+    bSINGLE_CHAR_TOKEN.set(EQ);
+    bSINGLE_CHAR_TOKEN.set(EOF);
+
+    bSINGLE_CHAR_TOKEN.or(bQUOTING);
+
+    bLINEBREAK.set('\r');
+    bLINEBREAK.set('\n');
+
+    bWHITESPACE.set(' ');
+    bWHITESPACE.set('\t');
+    bWHITESPACE.set(0xC);
+    bWHITESPACE.or(bLINEBREAK);
+
+    for (char i = '0'; i <= '9'; i++)
+      {
+        bDIGIT.set(i);
+      }
+
+    for (char i = 'a'; i <= 'z'; i++)
+      {
+        bLETTER.set(i);
+      }
+
+    for (char i = 'A'; i <= 'Z'; i++)
+      {
+        bLETTER.set(i);
+      }
+
+    bSPECIAL.set('-');
+    bSPECIAL.set('_');
+    bSPECIAL.set(':');
+    bSPECIAL.set('.');
+
+    bNAME.or(bLETTER);
+    bNAME.or(bDIGIT);
+    bNAME.or(bSPECIAL);
+  }
+
+  /**
+   * Verifies if one of the tokens matches the end of string
+   * buffer. The last character in the string buffer is the
+   * "future character", some tokens needs to verify it the
+   * token does not continue "towards the future". If the token
+   * matches, it matches till "pre-last" character in the buffer.
+   * @param b
+   * @return
+   */
+  public Token endMatches(Buffer b)
+  {
+    if (b.length() < 2)
+      return null;
+
+    int p = b.length() - 2;
+
+    if (b.length() > 2 && b.charAt(p) == '-' && b.charAt(p - 1) == '-')
+      return new Token(DOUBLE_DASH, "--", b.getLocation(p - 1, p + 1));
+
+    char last = b.charAt(p);
+
+    if (bSINGLE_CHAR_TOKEN.get(last))
+      return new Token(last, last, b.getLocation(p, p + 1));
+
+    char future = b.charAt(p + 1);
+
+    // Check for numtokens, script and style:
+    if (bNAME.get(last) && !bNAME.get(future))
+      {
+        // Scan the history up:
+        int u = p - 1;
+        while (u >= 0 && bNAME.get(b.charAt(u)))
+          u--;
+        u++;
+
+        char[] token = new char[ p - u + 1 ];
+
+        // Found a numtoken
+        b.getChars(u, p + 1, token, 0);
+
+        // Verify for the built-in tokens:
+        String e = new String(token);
+
+        // found the entity reference
+        if (u > 0 && b.charAt(u - 1) == '&')
+          {
+            // The subsequent semicolon may be the part of the token
+            // as well. The semicolon must be ignored. This must be
+            // handled elsewhere.
+            return new Token(ENTITY, ENTITY_NAMED, "&" + e,
+                             b.getLocation(u - 1, p + 1)
+                            );
+          }
+
+        // found the numeric entity reference
+        if (u > 1 && b.charAt(u - 1) == '#' && b.charAt(u - 2) == '&')
+          {
+            // The subsequent semicolon may be the part of the token
+            // as well. The semicolon must be ignored. This must be
+            // handled elsewhere.
+            return new Token(ENTITY, ENTITY_NUMERIC, "&#" + e,
+                             b.getLocation(u - 2, p + 2)
+                            );
+          }
+
+        Location le = b.getLocation(u, p + 1);
+
+        if (e.equalsIgnoreCase("SCRIPT"))
+          return new Token(SCRIPT, e, le);
+        else if (e.equalsIgnoreCase("STYLE"))
+          return new Token(STYLE, e, le);
+        else
+          return new Token(NUMTOKEN, e, le);
+      }
+
+    // Check for whitespace
+    if (bWHITESPACE.get(last) && !bWHITESPACE.get(future))
+      {
+        // Scan the history up:
+        int u = p - 1;
+        while (u >= 0 && bWHITESPACE.get(b.charAt(u)))
+          u--;
+        u++;
+
+        char[] token = new char[ p - u + 1 ];
+        b.getChars(u, p + 1, token, 0);
+
+        return new Token(WS, new String(token), b.getLocation(u, p + 1));
+      }
+
+    return null;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Location.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Location.java
new file mode 100644
index 000000000..8a1cde1c8
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Location.java
@@ -0,0 +1,83 @@
+/* Location.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+/**
+ * Defines a region in the text: its bounding positions and the line number.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Location
+{
+  /**
+   * The line number, where the token starts.
+   */
+  public int beginLine;
+
+  /**
+   * The line, where the token ends.
+   */
+  public int endLine;
+
+  /**
+   * The absolute token end position in the input stream,
+   * exclusive.
+   */
+  public int endPosition;
+
+  /**
+   * The absolute token start position in the input stream,
+   * inclusive.
+   */
+  public int startPosition;
+
+  public Location()
+  {
+  }
+
+  /**
+   * Special case, used to mark EOF.
+   * @param p The total stream length.
+   */
+  public Location(int p)
+  {
+    startPosition = p;
+    endPosition = p + 1;
+    beginLine = endLine = -1;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ParseException.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ParseException.java
new file mode 100644
index 000000000..e71c0c1f6
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ParseException.java
@@ -0,0 +1,51 @@
+/* ParseException.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+/**
+ * This can be thrown from various parsing methods.
+ */
+public class ParseException
+  extends RuntimeException
+{
+  public ParseException(String s, Throwable cause)
+  {
+    super(s, cause);
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Queue.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Queue.java
new file mode 100644
index 000000000..31cf4bb4d
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Queue.java
@@ -0,0 +1,142 @@
+/* Queue.java -- a token queue.
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+import java.util.Arrays;
+
+/**
+ * A token queue.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Queue
+{
+  Token[] m = new Token[ 64 ];
+  int a = 0;
+  int b = 0;
+
+  /**
+   * True for the empty queue.
+   */
+  public boolean isEmpty()
+  {
+    return size() == 0;
+  }
+
+  /**
+   *  Add this trace to the end of the queue.
+   */
+  public void add(Token u)
+  {
+    if (a < m.length)
+      {
+        m [ a ] = u;
+        a++;
+      }
+    else // The end of array has been reached.
+      {
+        if (b > 0) // If some elements were deleted from the start of the queue, shift.
+          {
+            int d = b;
+            System.arraycopy(m, b, m, 0, a - b);
+            b = b - d;
+            a = a - d;
+            m [ a ] = u;
+            a++;
+          }
+        else // Enlarge the queue, doubling the size.
+          {
+            int n = m.length * 2;
+            Token[] nm = new Token[ 2 * n ];
+            System.arraycopy(m, 0, nm, 0, m.length);
+            Arrays.fill(m, null);
+
+            nm [ a ] = u;
+            m = nm;
+            a++;
+          }
+      }
+  }
+
+  /**
+   * Clear the queue.
+   */
+  public void clear()
+  {
+    a = b = 0;
+    Arrays.fill(m, null);
+  }
+
+  /**
+   * Read the value ahead. 0 is the value that will be returned with
+   * the following next. This method does not remove values from the
+   * queue. To test if there is enough tokens in the queue, size() must
+   * be checked before calling this method.
+   */
+  public Token get(int ahead)
+  {
+    int p = b + ahead;
+    if (p < a)
+      return m [ p ];
+    else
+      throw new ArrayIndexOutOfBoundsException("Not enough tokens");
+  }
+
+  /**
+   * Read the oldest value from the queue and remove this value from
+   * the queue.
+   */
+  public Token next()
+  {
+    if (a == b)
+      throw new ArrayIndexOutOfBoundsException("queue empty");
+
+    Token r = m [ b ];
+    m [ b ] = null;
+    b++;
+    return r;
+  }
+
+  /**
+   * Size of the queue.
+   */
+  public int size()
+  {
+    return a - b;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java
new file mode 100644
index 000000000..45ac181b3
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java
@@ -0,0 +1,373 @@
+/* ReaderTokenizer.java -- splits the input char sequence int tokens.
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Reader splits the input char sequence into tokens.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class ReaderTokenizer
+  extends Constants
+{
+  /**
+   * This is set to true each time the getNextToken is called.
+   * Used in preventing loops when all patterns refuse to accept
+   * the invalid input.
+   */
+  protected boolean advanced;
+
+  /**
+   * If true, the returned tokens are also placed in the backup
+   * queue.
+   */
+  protected boolean backupMode;
+
+  /**
+   * The buffer to read document into.
+   */
+  Buffer buffer = new Buffer();
+
+  /**
+   * The queue for supporting mark().
+   */
+  Queue backup = new Queue();
+
+  /**
+   * The queue of found tokens.
+   */
+  Queue queue = new Queue();
+
+  /**
+   * The reader to read the document from.
+   */
+  Reader reader;
+
+  /**
+   * Array of char tokens
+   */
+  char[] charTokens;
+
+  /**
+   * Array of string tokens.
+   */
+  String[] stringTokens;
+
+  /**
+   * The current reader position.
+   */
+  int readerPosition = -1;
+
+  /**
+   * Creates a new ReaderTokenizer. The reset(...) method must be
+   * subsequently called to set the reader.
+   */
+  public ReaderTokenizer()
+  {
+  }
+
+  /**
+   * Return the sequence, used to separate lines in the document.
+   * @return one of \n, \r or \r\n.
+   */
+  public String getEndOfLineSequence()
+  {
+    return buffer.getEndOfLineSequence();
+  }
+
+  /**
+   * Get the next token.
+   * @return
+   */
+  public Token getNextToken()
+  {
+    Token rt;
+    advanced = true;
+    try
+      {
+        if (queue.isEmpty())
+          read(1);
+
+        if (!queue.isEmpty())
+          rt = queue.next();
+        else
+          rt = new Token(EOF, new Location(readerPosition));
+      }
+    catch (IOException ex)
+      {
+        throw new ParseException("IO Exception", ex);
+      }
+    if (backupMode)
+      backup.add(rt);
+    return rt;
+  }
+
+  /**
+   * Get a token, lying the given number of tokens
+   * ahead. getToken(0) will return the same token,
+   * what would be returned by getNextToken().
+   * getToken(..) does change the current position
+   * in the input stream. If the end of stream is
+   * reached, the EOF token is always returned.
+   */
+  public Token getTokenAhead(int ahead)
+  {
+    try
+      {
+        read(ahead - queue.size() + 1);
+        return queue.size() >= ahead ? queue.get(ahead) : eofToken();
+      }
+    catch (IOException ex)
+      {
+        throw new ParseException("IO Exception", ex);
+      }
+  }
+
+  /**
+   * Get a token, bein immediatley ahead.
+   * If the end of stream is
+   * reached, the EOF token is always returned.
+   * The method is equivalent calling getTokenAhead(0).
+   */
+  public Token getTokenAhead()
+  {
+    try
+      {
+        if (queue.isEmpty())
+          read(1);
+        if (!queue.isEmpty())
+          return queue.get(0);
+        else
+          return eofToken();
+      }
+    catch (IOException ex)
+      {
+        throw new ParseException("IO Exception", ex);
+      }
+  }
+
+  /**
+   * Invokes the error handler.
+   */
+  public void error(String msg, Token at)
+  {
+    System.out.println(msg);
+  }
+
+  /**
+   * Turns the backup mode on or off.
+   * It is possible to return where the mark(true) was last called
+   * by calling reset().
+   * @param mode True if it is required to save tokens, making
+   * returning to the current point possible.
+   */
+  public void mark(boolean mode)
+  {
+    backup.clear();
+    backupMode = mode;
+  }
+
+  /**
+   * Prepare for new parsing from the given stream.
+   * @param a_reader A reader to parse from.
+   */
+  public void reset(Reader a_reader)
+  {
+    reader = a_reader;
+    readerPosition = -1;
+    buffer.reset();
+    queue.clear();
+  }
+
+  /**
+   * Reset the internal cursor to the position where the mark()
+   * was last time called. Switches the backup mode off.
+   */
+  public void reset()
+  {
+    if (!backupMode)
+      throw new AssertionError("Call mark(true) before using reset()!");
+    backupMode = false;
+
+    // That is now in the queue, will be appended to the end of backup.
+    while (!queue.isEmpty())
+      backup.add(queue.next());
+
+    Queue t = queue;
+    queue = backup;
+    backup = t;
+    backup.clear();
+  }
+
+  /**
+   * Read the given number of the tokens. Add the needed number of EOF
+   * tokens if there are no more data in the stream.
+   * @param numberOfTokens The number of additional tokens to read.
+   */
+  void read(int numberOfTokens)
+     throws IOException
+  {
+    if (numberOfTokens <= 0)
+      return;
+
+    for (int i = 0; i < numberOfTokens; i++)
+      readToken();
+  }
+
+  /**
+   * Read next token from the reader, add it to the queue
+   */
+  void readToken()
+          throws IOException
+  {
+    Token t;
+    int ch;
+
+    enlarging:
+    while (true)
+      {
+        t = tokenMatches();
+        if (t != null)
+          break enlarging;
+        else
+          {
+            ch = reader.read();
+            readerPosition++;
+            if (ch == ETX)
+              ch = ' ';
+            if (ch < 0)
+              {
+                if (buffer.length() == 0)
+                  {
+                    queue.add(eofToken());
+                    return;
+                  }
+                else
+                  {
+                    if (buffer.charAt(buffer.length() - 1) != ETX)
+                      buffer.append(ETX, readerPosition++);
+                    else
+                      {
+                        // Discard terminating ETX
+                        buffer.setLength(buffer.length() - 1);
+                        if (buffer.length() > 0)
+                          {
+                            t = new Token(OTHER, buffer.toString(),
+                                          buffer.getLocation(0, buffer.length())
+                                         );
+                            queue.add(t);
+                            buffer.setLength(0);
+                          }
+                        return;
+                      }
+                  }
+              }
+            else
+              buffer.append((char) ch, readerPosition);
+          }
+      }
+  }
+
+  /**
+   * Check if the end of buffer matches one of the tokens. If it does,
+   * return this token and remove the token sequence from the end of
+   * buffer.
+   * @return The matching token.
+   */
+  Token tokenMatches()
+  {
+    Token rt = endMatches(buffer);
+    if (rt != null) // Remove the matched image
+      {
+        // Consume future character if it was an entity and the future
+        // character is semicolon.
+        if (rt.kind == ENTITY)
+          {
+            if (buffer.charAt(buffer.length() - 1) == ';')
+              buffer.setLength(buffer.length() - rt.getImage().length() - 1);
+            else
+              {
+                error("Missing closing semicolon for entity '" + rt.getImage() +
+                      "'", rt
+                     );
+                consumeBuffer(rt);
+              }
+          }
+        else
+          {
+            consumeBuffer(rt);
+          }
+      }
+
+    // If the buffer is not empty, some sequence does not match any tokens.
+    // Add it to the queue as "OTHER".
+    if (rt != null)
+      {
+        if (buffer.length() > 1)
+          {
+            String rest = buffer.toString();
+            rest = rest.substring(0, rest.length() - 1);
+
+            Token other =
+              new Token(OTHER, rest, buffer.getLocation(0, buffer.length));
+            queue.add(other);
+            consumeBuffer(other);
+          }
+        queue.add(rt);
+      }
+    return rt;
+  }
+
+  private void consumeBuffer(Token rt)
+  {
+    buffer.delete(buffer.length() - rt.getImage().length() - 1,
+                  buffer.length() - 1
+                 );
+  }
+
+  /**
+   * Create EOF token.
+   */
+  private Token eofToken()
+  {
+    return new Token(EOF, "#", new Location(readerPosition));
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Token.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Token.java
new file mode 100644
index 000000000..d91adf47a
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Token.java
@@ -0,0 +1,169 @@
+/* Token.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+/**
+ * A token.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Token
+{
+  /**
+   * The place of this token in the document.
+   */
+  public Location where;
+
+  /**
+   * The additional category of token.
+   */
+  public int category;
+
+  /**
+   * An integer that describes the kind of this token.
+   */
+  public int kind;
+
+  /**
+   * The string image of the token, null if the char image must be used.
+   */
+  private String stringImage;
+
+  /**
+   * The char image of the token.
+   */
+  private char charImage;
+
+  /**
+   * Creates a new token with fields, initialized to the default values.
+   */
+  public Token()
+  {
+  }
+
+  /**
+   * Creates a new token of the given kind.
+   */
+  public Token(int _kind, Location _where)
+  {
+    kind = _kind;
+    where = _where;
+  }
+
+  /**
+   * Creates a new token of the given kind and given single char image.
+   */
+  public Token(int _kind, char _image, Location _where)
+  {
+    kind = _kind;
+    charImage = _image;
+    where = _where;
+  }
+
+  /**
+   * Creates a new token of the given kind and given string image.
+   */
+  public Token(int _kind, String _image, Location _where)
+  {
+    kind = _kind;
+    stringImage = _image;
+    where = _where;
+  }
+
+  /**
+   * Creates a new token of the given kind, category and given string image.
+   */
+  public Token(int _kind, int _category, String _image, Location _where)
+  {
+    kind = _kind;
+    category = _category;
+    stringImage = _image;
+    where = _where;
+  }
+
+  /**
+   * Creates a new token, where location fields are set as for token,
+   * spanning over two provided tokens and any tokens between them.
+   * The image field is initialized to null, the kind field is set to -1.
+   */
+  public Token(Token fromInclusive, Token toInclusive)
+  {
+    where = new Location();
+    where.beginLine = fromInclusive.where.beginLine;
+    where.startPosition = fromInclusive.where.startPosition;
+
+    where.endLine = toInclusive.where.endLine;
+    where.endPosition = toInclusive.where.endPosition;
+  }
+
+  public String getImage()
+  {
+    if (kind == 3)
+      return "#";
+    if (stringImage == null)
+      {
+        if (charImage == 0)
+          return null;
+        stringImage = new String(new char[] { charImage });
+      }
+    return stringImage;
+  }
+
+  /**
+   * Append the token image to the given string buffer.
+   * This may be more effective that buffer.append(this.getImage()).
+   * @param buffer A buffer to append.
+   */
+  public void appendTo(StringBuffer buffer)
+  {
+    if (charImage == 0)
+      buffer.append(getImage());
+    else
+      buffer.append(charImage);
+  }
+
+  /**
+   * Returns the string image or, if null, the bounding positions.
+   */
+  public String toString()
+  {
+    return getImage() != null ? kind + "'" + getImage()
+           : "<line " + where.beginLine + ", abs pos " + where.startPosition +
+           ".." + where.endPosition + ">";
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/node.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/node.java
new file mode 100644
index 000000000..b54ed86a3
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/node.java
@@ -0,0 +1,78 @@
+/* node.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+/**
+ * A text level content model node. The only required unary operations
+ * here are "appears" and "optionally appears" ('?').
+ * <p>@author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)</p>
+ */
+public class node
+{
+  /**
+   * True for node that is optional for the given position.
+   */
+  public boolean optional;
+
+  /**
+   * The kind of the token to match.
+   */
+  public int kind;
+
+  /**
+   * Creates the new node for matching a given kind of the token.
+   * @param kind The kind of the token to match.
+   * @param modifier The modifier (*?+).
+   */
+  public node(int kind, boolean _optional)
+  {
+    this.kind = kind;
+    optional = _optional;
+  }
+
+  /**
+   * Creates the node, indicating that token must match exactluy one time.
+   * @param kind The kind of token to match.
+   */
+  public node(int kind)
+  {
+    this.kind = kind;
+    optional = false;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/package.html b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/package.html
new file mode 100644
index 000000000..173583015
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/package.html
@@ -0,0 +1,47 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!-- package.html - describes classes in javax.swing.text.html.parser package.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. -->
+
+<html>
+<head><title>GNU Classpath - gnu.javax.swing.text.html.parser.support.low</title></head>
+
+<body>
+<p>This package contains classes that are directly used to process
+the text input: adapted stream tokenizer, specialized buffer and text-level content models .</p>
+@author Audrius Meskauskas, Lithuania
+</body>
+</html>
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/pattern.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/pattern.java
new file mode 100644
index 000000000..0fe03fdbe
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/pattern.java
@@ -0,0 +1,105 @@
+/* pattern.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support.low;
+
+
+/**
+ * The simple pattern, consisting from the sequence of tokens that
+ * may have the unary modifier '?'. Choices and grouping
+ * are not required here.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class pattern
+{
+  /**
+   * The nodes of this pattern.
+   */
+  public final node[] nodes;
+
+  /**
+   * Create a pattern, containing the given list of nodes.
+   * @param a_nodes
+   */
+  public pattern(node[] a_nodes)
+  {
+    nodes = a_nodes;
+  }
+
+  /**
+   * Checks if the pattern can match the tokens in this
+   * tokenizer. Does not change the state of tokenizer.
+   * @param stream The tokenizer to read data from
+   * @return True if the pattern sequence matches the
+   * beginning of the tokenizer content.
+   */
+  public boolean matches(ReaderTokenizer stream)
+  {
+    try
+      {
+        int pt = 0;
+        int pn = 0;
+        Token t;
+        node n;
+
+        while (pn < nodes.length)
+          {
+            n = nodes [ pn ];
+            t = stream.getTokenAhead(pt);
+
+            if (t.kind == n.kind)
+              {
+                pn++;
+                pt++;
+              }
+            else
+              {
+                if (!n.optional)
+                  return false;
+                else
+                  pn++;
+              }
+          }
+        return true;
+      }
+    catch (Exception ex)
+      {
+        throw new ParseException("Exception", ex);
+      }
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/package.html b/libjava/classpath/gnu/javax/swing/text/html/parser/support/package.html
new file mode 100644
index 000000000..97c6439b3
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/package.html
@@ -0,0 +1,47 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!-- package.html - describes classes in javax.swing.text.html.parser package.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. -->
+
+<html>
+<head><title>GNU Classpath - gnu.javax.swing.text.html.parser.support</title></head>
+
+<body>
+<p>This package provides various specialised classes, needed by HTML parser.
+</p>
+@author Audrius Meskauskas, Lithuania
+</body>
+</html>
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/parameterDefaulter.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/parameterDefaulter.java
new file mode 100644
index 000000000..43c07572a
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/parameterDefaulter.java
@@ -0,0 +1,106 @@
+/* parameterDefaulter.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support;
+
+import gnu.javax.swing.text.html.parser.htmlAttributeSet;
+
+import java.util.Hashtable;
+
+import javax.swing.text.html.parser.AttributeList;
+import javax.swing.text.html.parser.DTD;
+import javax.swing.text.html.parser.Element;
+
+/**
+ * Returns an attribute set, containing default
+ * parameters for the given element. Caches sets of default
+ * parameters.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class parameterDefaulter
+{
+  public final DTD dtd;
+  Hashtable sets = new Hashtable();
+
+  /**
+   * Create a parameterDefaulter that looks for the default attribute
+   * values in the given DTD.
+   * @param a_dtd
+   */
+  public parameterDefaulter(DTD a_dtd)
+  {
+    dtd = a_dtd;
+  }
+
+  /**
+   * Get the default parameter set for the given element.
+   * @param element The element name (case insensitive).
+   * @return the default attrbute set.
+   */
+  public htmlAttributeSet getDefaultParameters(String element)
+  {
+    String key = element.toLowerCase();
+    htmlAttributeSet atts = (htmlAttributeSet) sets.get(key);
+
+    if (atts == null)
+      {
+        htmlAttributeSet set = new htmlAttributeSet();
+        Element e = dtd.elementHash.get(element.toLowerCase());
+
+        if (e != null)
+          {
+            AttributeList a = e.getAttributes();
+
+            while (a != null)
+              {
+                if (a.value != null)
+                  set.addAttribute(a.name, a.value);
+                a = a.next;
+              }
+          }
+
+        if (set.getAttributeCount() > 0)
+          sets.put(key, set);
+        else
+          sets.put(key, htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET);
+
+        atts = set;
+      }
+    return atts;
+  }
+}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java
new file mode 100644
index 000000000..22c44be4f
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java
@@ -0,0 +1,189 @@
+/* textPreProcessor.java --
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser.support;
+
+import gnu.javax.swing.text.html.parser.support.low.Constants;
+
+/**
+ * Pre - processes text in text parts of the html document.
+ *
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class textPreProcessor
+{
+  /**
+   * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then
+   * multiple spaces mutate into single one, all whitespace around tags is
+   * consumed. The content of the passed buffer is destroyed.
+   *
+   * @param a_text A text to pre-process.
+   */
+  public char[] preprocess(StringBuffer a_text)
+  {
+    if (a_text.length() == 0)
+      return null;
+
+    char[] text = toCharArray(a_text);
+
+    int a = 0;
+    int b = text.length - 1;
+
+    // Remove leading/trailing whitespace, leaving at most one character
+    int len = text.length;
+    while (a + 1 < len && Constants.bWHITESPACE.get(text[a])
+           && Constants.bWHITESPACE.get(text[a + 1]))
+      a++;
+
+    while (b > a && Constants.bWHITESPACE.get(text[b])
+               && Constants.bWHITESPACE.get(text[b - 1]))
+      b--;
+
+    a_text.setLength(0);
+
+    boolean spacesWere = false;
+    boolean spaceNow;
+    char c;
+
+    chars: for (int i = a; i <= b; i++)
+      {
+        c = text[i];
+        spaceNow = Constants.bWHITESPACE.get(c);
+        if (spacesWere && spaceNow)
+          continue chars;
+        if (spaceNow)
+          a_text.append(' ');
+        else
+          a_text.append(c);
+        spacesWere = spaceNow;
+      }
+
+    if (a_text.length() == text.length)
+      {
+        a_text.getChars(0, a_text.length(), text, 0);
+        return text;
+      }
+    else
+      return toCharArray(a_text);
+  }
+
+  /**
+   * Pre - process pre-formatted text.
+   * Heading/closing spaces and tabs preserved.
+   * ONE  bounding \r, \n or \r\n is removed.
+   * \r or \r\n mutate into \n. Tabs are
+   * preserved.
+   * The content of the passed buffer is destroyed.
+   * @param a_text
+   * @return
+   */
+  public char[] preprocessPreformatted(StringBuffer a_text)
+  {
+    if (a_text.length() == 0)
+      return null;
+
+    char[] text = toCharArray(a_text);
+
+    int a = 0;
+    int n = text.length - 1;
+    int b = n;
+
+    if (text [ 0 ] == '\n')
+      a++;
+    else
+      {
+        if (text [ 0 ] == '\r')
+          {
+            a++;
+            if (text.length > 1 && text [ 1 ] == '\n')
+              a++;
+          }
+      }
+
+    if (text [ n ] == '\r')
+      b--;
+    else
+      {
+        if (text [ n ] == '\n')
+          {
+            b--;
+            if (n > 0 && text [ n - 1 ] == '\r')
+              b--;
+          }
+      }
+
+    a_text.setLength(0);
+
+    if (a > b)
+      return null;
+
+    char c;
+
+    for (int i = a; i <= b; i++)
+      {
+        c = text [ i ];
+        if (c == '\r')
+          {
+            if (i == b || text [ i + 1 ] != '\n')
+              a_text.append('\n');
+          }
+        else
+          a_text.append(c);
+      }
+
+    if (a_text.length() == text.length)
+      {
+        a_text.getChars(0, a_text.length(), text, 0);
+        return text;
+      }
+    else
+      return toCharArray(a_text);
+  }
+
+  /**
+   * Return array of chars, present in the given buffer.
+   * @param a_text The buffer
+   * @return
+   */
+  private static char[] toCharArray(StringBuffer a_text)
+  {
+    char[] text = new char[ a_text.length() ];
+    a_text.getChars(0, text.length, text, 0);
+    return text;
+  }
+}