1 files changed, 622 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java b/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java
new file mode 100644
index 000000000..2b624cc3c
--- /dev/null
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java
@@ -0,0 +1,622 @@
+/* tagStack.java -- The HTML tag stack.
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.parser;
+
+import gnu.java.lang.CPStringBuilder;
+
+import gnu.javax.swing.text.html.parser.models.node;
+import gnu.javax.swing.text.html.parser.models.transformer;
+
+import java.util.BitSet;
+import java.util.Enumeration;
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+import javax.swing.text.SimpleAttributeSet;
+import javax.swing.text.html.HTML;
+import javax.swing.text.html.parser.*;
+
+/**
+ * <p>The HTML content validator, is responsible for opening and
+ * closing elements with optional start/end tags, detecting
+ * the wrongly placed html tags and reporting errors. The working instance
+ * is the inner class inside the {@link javax.swing.text.html.parser.Parser }
+ * </p>
+ * <p>This class could potentially
+ * provide basis for automated closing and insertion of the html tags,
+ * correcting the found html errors.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public abstract class htmlValidator
+{
+  /**
+   * The tag reference, holding additional information that the tag
+   * has been forcibly closed.
+   */
+  protected class hTag
+  {
+    protected final Element element;
+    protected final HTML.Tag tag;
+    protected final TagElement tgElement;
+    protected boolean forcibly_closed;
+    protected node validationTrace;
+
+    protected hTag(TagElement an_element)
+    {
+      element = an_element.getElement();
+      tag = an_element.getHTMLTag();
+      tgElement = an_element;
+
+      if (element.content != null)
+        validationTrace = transformer.transform(element.content, dtd);
+    }
+
+    /**
+     * This is called when the tag must be forcibly closed because
+     * it would make the newly appearing tag invalid.
+     * The parser is not notified about such event (just the error
+     * is reported). For such tags, the closing message does not
+     * appear when later reaching the end of stream. The exception is
+     * the &lt;head&gt; tag: the parser is notified about its silent closing
+     * when &lt;body&gt; or other html content appears.
+     */
+    protected void forciblyCloseDueContext()
+    {
+      forcibly_closed = true;
+    }
+
+    /**
+     * This is called when the tag must be forcibly closed after
+     * reaching the end of stream. The parser is notified as if
+     * closing the tag explicitly.
+     */
+    protected void forciblyCloseDueEndOfStream()
+    {
+      forcibly_closed = true;
+      handleSupposedEndTag(element);
+    }
+  }
+
+  /**
+   * The DTD, providing information about the valid document structure.
+   */
+  protected final DTD dtd;
+
+  /**
+  * The stack, holding the current tag context.
+  */
+  protected final LinkedList stack = new LinkedList();
+
+  /**
+   * Creates a new tag stack, using the given DTD.
+   * @param a_dtd A DTD, providing the information about the valid
+   * tag content.
+   */
+  public htmlValidator(DTD a_dtd)
+  {
+    dtd = a_dtd;
+  }
+
+  /**
+   * Close all opened tags (called at the end of parsing).
+   */
+  public void closeAll()
+  {
+    hTag h;
+    while (!stack.isEmpty())
+      {
+        h = (hTag) stack.getLast();
+        if (!h.forcibly_closed && !h.element.omitEnd())
+          s_error("Unclosed <" + h.tag + ">, closing at the end of stream");
+
+        handleSupposedEndTag(h.element);
+
+        closeTag(h.tgElement);
+      }
+  }
+
+  /**
+   * Remove the given tag from the stack or (if found) from the list
+   * of the forcibly closed tags.
+   */
+  public boolean closeTag(TagElement tElement)
+  {
+    HTML.Tag tag = tElement.getHTMLTag();
+    hTag x;
+    hTag close;
+
+    if (!stack.isEmpty())
+      {
+        ListIterator iter = stack.listIterator(stack.size());
+
+        while (iter.hasPrevious())
+          {
+            x = (hTag) iter.previous();
+            if (tag.equals(x.tag))
+              {
+                if (x.forcibly_closed && !x.element.omitEnd())
+                  s_error("The tag <" + x.tag +
+                          "> has already been forcibly closed"
+                         );
+
+
+                // If the tag has a content model defined, forcibly close all
+                // tags that were opened after the tag being currently closed.
+                closing:
+                if (x.element.content != null)
+                  {
+                    iter = stack.listIterator(stack.size());
+                    while (iter.hasPrevious())
+                      {
+                        close = (hTag) iter.previous();
+                        if (close == x)
+                          break closing;
+                        handleSupposedEndTag(close.element);
+                        iter.remove();
+                      }
+                  }
+
+                stack.remove(x);
+                return true;
+              }
+          }
+      }
+    s_error("Closing unopened <" + tag + ">");
+    return false;
+  }
+
+  /**
+   * Add the given HTML tag to the stack of the opened tags. Forcibly closes
+   * all tags in the stack that does not allow this tag in they content (error
+   * is reported).
+   * @param element
+   */
+  public void openTag(TagElement tElement, htmlAttributeSet parameters)
+  {
+    // If this is a fictional call, the message from the parser
+    // has recursively returned - ignore.
+    if (tElement.fictional())
+      return;
+
+    validateParameters(tElement, parameters);
+
+    // If the stack is empty, start from HTML
+    if (stack.isEmpty() && tElement.getHTMLTag() != HTML.Tag.HTML)
+      {
+        Element html = dtd.getElement(HTML.Tag.HTML.toString());
+        openFictionalTag(html);
+      }
+
+    Object v = tagIsValidForContext(tElement);
+    if (v != Boolean.TRUE)
+      {
+        // The tag is not valid for context, the content
+        // model suggest to open another tag.
+        if (v instanceof Element)
+          {
+            int n = 0;
+            while (v instanceof Element && (n++ < 100))
+              {
+                Element fe = (Element) v;
+
+                // notify the content model that we add the proposed tag
+                node ccm = getCurrentContentModel();
+                if (ccm != null)
+                  ccm.show(fe);
+                openFictionalTag(fe);
+
+                Object vv = tagIsValidForContext(tElement);
+                if (vv instanceof Element) // One level of nesting is supported.
+                  {
+                    openFictionalTag((Element) vv);
+
+                    Object vx = tagIsValidForContext(tElement);
+                    if (vx instanceof Element)
+                      openFictionalTag((Element) vx);
+                  }
+                else if (vv == Boolean.FALSE)
+                  {
+                    // The tag is still not valid for the current
+                    // content after opening a fictional element.
+                    if (fe.omitEnd())
+                      {
+                        // close the previously opened fictional tag.
+                        closeLast();
+                        vv = tagIsValidForContext(tElement);
+                        if (vv instanceof Element)
+
+                          // another tag was suggested by the content model
+                          openFictionalTag((Element) vv);
+                      }
+                  }
+                v = tagIsValidForContext(tElement);
+              }
+          }
+        else // If the current element has the optional end tag, close it.
+          {
+            if (!stack.isEmpty())
+              {
+                closing:
+                do
+                  {
+                    hTag last = (hTag) stack.getLast();
+                    if (last.element.omitEnd())
+                      {
+                        closeLast();
+                        v = tagIsValidForContext(tElement);
+                        if (v instanceof Element) // another tag was suggested by the content model
+                          {
+                            openFictionalTag((Element) v);
+                            break closing;
+                          }
+                      }
+                    else
+                      break closing;
+                  }
+                while (v == Boolean.FALSE && !stack.isEmpty());
+              }
+          }
+      }
+
+    stack.add(new hTag(tElement));
+  }
+
+  /**
+   * Clear the stack.
+   */
+  public void restart()
+  {
+    stack.clear();
+  }
+
+  /**
+   * Check if this tag is valid for the current context. Return Boolean.True if
+   * it is OK, Boolean.False if it is surely not OK or the Element that the
+   * content model recommends to insert making the situation ok. If Boolean.True
+   * is returned, the content model current position is moved forward. Otherwise
+   * this position remains the same.
+   *
+   * @param tElement
+   * @return
+   */
+  public Object tagIsValidForContext(TagElement tElement)
+  {
+    // Check the current content model, if one is available.
+    node cv = getCurrentContentModel();
+
+    if (cv != null)
+      return cv.show(tElement.getElement());
+
+    // Check exclusions and inclusions.
+    ListIterator iter = stack.listIterator(stack.size());
+    hTag t = null;
+    final int idx = tElement.getElement().index;
+
+    // Check only known tags.
+    if (idx >= 0)
+      {
+        BitSet inclusions = new BitSet();
+        while (iter.hasPrevious())
+          {
+            t = (hTag) iter.previous();
+            if (! t.forcibly_closed)
+              {
+                if (t.element.exclusions != null
+                    && t.element.exclusions.get(idx))
+                  return Boolean.FALSE;
+
+                if (t.element.inclusions != null)
+                  inclusions.or(t.element.inclusions);
+              }
+          }
+        if (! inclusions.get(idx))
+          {
+            // If we need to insert something, and cannot do this, but
+            // it is allowed to insert the paragraph here, insert the
+            // paragraph.
+            Element P = dtd.getElement(HTML_401F.P);
+            if (inclusions.get(P.index))
+              return P;
+            else
+              return Boolean.FALSE;
+          }
+      }
+    return Boolean.TRUE;
+  }
+
+  /**
+   * Validate tag without storing in into the tag stack. This is called
+   * for the empty tags and results the subsequent calls to the openTag
+   * and closeTag.
+   */
+  public void validateTag(TagElement tElement, htmlAttributeSet parameters)
+  {
+    openTag(tElement, parameters);
+    closeTag(tElement);
+  }
+
+  /**
+   * Check for mandatory elements, subsequent to the last tag:
+   * @param tElement The element that will be inserted next.
+   */
+  protected void checkContentModel(TagElement tElement, boolean first)
+  {
+    if (stack.isEmpty())
+      return;
+
+    hTag last = (hTag) stack.getLast();
+    if (last.validationTrace == null)
+      return;
+
+    Object r = last.validationTrace.show(tElement.getElement());
+    if (r == Boolean.FALSE)
+      s_error("The <" + last.element + "> does not match the content model " +
+              last.validationTrace
+             );
+    else if (r instanceof Element) // The content model recommends insertion of this element
+      {
+        if (!first)
+          closeTag(last.tgElement);
+        handleSupposedStartTag((Element) r);
+        openTag(new TagElement((Element) r), null);
+      }
+  }
+
+  /**
+   * The method is called when the tag must be closed because
+   * it does not allow the subsequent elements inside its context
+   * or the end of stream has been reached. The parser is only
+   * informed if the element being closed does not require the
+   * end tag (the "omitEnd" flag is set).
+   * The closing message must be passed to the parser mechanism
+   * before passing message about the opening the next tag.
+   *
+   * @param element The tag being fictionally (forcibly) closed.
+   */
+  protected abstract void handleSupposedEndTag(Element element);
+
+  /**
+   * The method is called when the validator decides to open the
+   * tag on its own initiative. This may happen if the content model
+   * includes the element with the optional (supposed) start tag.
+   *
+   * @param element The tag being opened.
+   */
+  protected abstract void handleSupposedStartTag(Element element);
+
+  /**
+   * Handles the error message. This method must be overridden to pass
+   * the message where required.
+   * @param msg The message text.
+   */
+  protected abstract void s_error(String msg);
+
+  /**
+   * Validate the parameters, report the error if the given parameter is
+   * not in the parameter set, valid for the given attribute. The information
+   * about the valid parameter set is taken from the Element, enclosed
+   * inside the tag. The method does not validate the default parameters.
+   * @param tag The tag
+   * @param parameters The parameters of this tag.
+   */
+  protected void validateParameters(TagElement tag, htmlAttributeSet parameters)
+  {
+    if (parameters == null ||
+        parameters == htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET ||
+        parameters == SimpleAttributeSet.EMPTY
+       )
+      return;
+
+    Enumeration enumeration = parameters.getAttributeNames();
+
+    while (enumeration.hasMoreElements())
+      {
+        validateAttribute(tag, parameters, enumeration);
+      }
+
+    // Check for missing required values.
+    AttributeList a = tag.getElement().getAttributes();
+
+    while (a != null)
+      {
+        if (a.getModifier() == DTDConstants.REQUIRED)
+          if (parameters.getAttribute(a.getName()) == null)
+            {
+              s_error("Missing required attribute '" + a.getName() + "' for <" +
+                      tag.getHTMLTag() + ">"
+                     );
+            }
+        a = a.next;
+      }
+  }
+
+  private node getCurrentContentModel()
+  {
+    if (!stack.isEmpty())
+      {
+        hTag last = (hTag) stack.getLast();
+        return last.validationTrace;
+      }
+    else
+      return null;
+  }
+
+  private void closeLast()
+  {
+    handleSupposedEndTag(((hTag) stack.getLast()).element);
+    stack.removeLast();
+  }
+
+  private void openFictionalTag(Element e)
+  {
+    handleSupposedStartTag(e);
+    stack.add(new hTag(new TagElement(e, true)));
+    if (!e.omitStart())
+      s_error("<" + e + "> is expected (supposing it)");
+  }
+
+  private void validateAttribute(TagElement tag, htmlAttributeSet parameters,
+                                 Enumeration enumeration
+                                )
+  {
+    Object foundAttribute;
+    AttributeList dtdAttribute;
+    foundAttribute = enumeration.nextElement();
+    dtdAttribute = tag.getElement().getAttribute(foundAttribute.toString());
+    if (dtdAttribute == null)
+      {
+        CPStringBuilder valid =
+          new CPStringBuilder("The tag <" + tag.getHTMLTag() +
+                              "> cannot contain the attribute '" + foundAttribute +
+                              "'. The valid attributes for this tag are: "
+                              );
+
+        AttributeList a = tag.getElement().getAttributes();
+
+        while (a != null)
+          {
+            valid.append(a.name.toUpperCase());
+            valid.append(' ');
+            a = a.next;
+          }
+        s_error(valid.toString());
+      }
+
+    else
+      {
+        String value = parameters.getAttribute(foundAttribute).toString();
+
+        if (dtdAttribute.type == DTDConstants.NUMBER)
+          validateNumberAttribute(tag, foundAttribute, value);
+
+        if (dtdAttribute.type == DTDConstants.NAME ||
+            dtdAttribute.type == DTDConstants.ID
+           )
+          validateNameOrIdAttribute(tag, foundAttribute, value);
+
+        if (dtdAttribute.values != null)
+          validateAttributeWithValueList(tag, foundAttribute, dtdAttribute,
+                                         value
+                                        );
+      }
+  }
+
+  private void validateAttributeWithValueList(TagElement tag,
+                                              Object foundAttribute,
+                                              AttributeList dtdAttribute,
+                                              String value
+                                             )
+  {
+    if (!dtdAttribute.values.contains(value.toLowerCase()) &&
+        !dtdAttribute.values.contains(value.toUpperCase())
+       )
+      {
+        CPStringBuilder valid;
+        if (dtdAttribute.values.size() == 1)
+          valid =
+            new CPStringBuilder("The attribute '" + foundAttribute +
+                                "' of the tag <" + tag.getHTMLTag() +
+                                "> cannot have the value '" + value +
+                                "'. The only valid value is "
+                                );
+        else
+          valid =
+            new CPStringBuilder("The attribute '" + foundAttribute +
+                                "' of the tag <" + tag.getHTMLTag() +
+                                "> cannot have the value '" + value + "'. The " +
+                                dtdAttribute.values.size() +
+                                " valid values are: "
+                                );
+
+        Enumeration vv = dtdAttribute.values.elements();
+        while (vv.hasMoreElements())
+          {
+            valid.append('"');
+            valid.append(vv.nextElement());
+            valid.append("\"  ");
+          }
+        s_error(valid.toString());
+      }
+  }
+
+  private void validateNameOrIdAttribute(TagElement tag, Object foundAttribute,
+                                         String value
+                                        )
+  {
+    boolean ok = true;
+
+    if (!Character.isLetter(value.charAt(0)))
+      ok = false;
+
+    char c;
+    for (int i = 0; i < value.length(); i++)
+      {
+        c = value.charAt(i);
+        if (!(
+              Character.isLetter(c) || Character.isDigit(c) ||
+              "".indexOf(c) >= 0
+            )
+           )
+          ok = false;
+      }
+    if (!ok)
+      s_error("The '" + foundAttribute + "' attribute of the tag <" +
+              tag.getHTMLTag() + "> must start from letter and consist of " +
+              "letters, digits, hypens, colons, underscores and periods. " +
+              "It cannot be '" + value + "'"
+             );
+  }
+
+  private void validateNumberAttribute(TagElement tag, Object foundAttribute,
+                                       String value
+                                      )
+  {
+    try
+      {
+        Integer.parseInt(value);
+      }
+    catch (NumberFormatException ex)
+      {
+        s_error("The '" + foundAttribute + "' attribute of the tag <" +
+                tag.getHTMLTag() + "> must be a valid number and not '" +
+                value + "'"
+               );
+      }
+  }
+}