summaryrefslogtreecommitdiff
path: root/libjava/classpath/javax/swing/text/html/parser
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/javax/swing/text/html/parser')
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/AttributeList.java294
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/ContentModel.java223
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/DTD.java609
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/DTDConstants.java292
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/DocumentParser.java268
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/Element.java317
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/Entity.java183
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/Parser.java446
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java207
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/TagElement.java142
-rw-r--r--libjava/classpath/javax/swing/text/html/parser/package.html50
11 files changed, 3031 insertions, 0 deletions
diff --git a/libjava/classpath/javax/swing/text/html/parser/AttributeList.java b/libjava/classpath/javax/swing/text/html/parser/AttributeList.java
new file mode 100644
index 000000000..a943f056d
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/AttributeList.java
@@ -0,0 +1,294 @@
+/* AttributeList.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper;
+
+import java.io.Serializable;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * <p>
+ * Stores the attribute information, obtained by parsing SGML (DTD) tag
+ * <code>&lt;!ATTLIST .. &gt;</code></p>
+ * <p>
+ * Elements can have a associated named properties (attributes) having the
+ * assigned values. The element start tag can have any number of attribute
+ * value pairs, separated by spaces. They can appear in any order.
+ * SGML requires you to delimit the attribute values using either double (")
+ * or single (') quotation marks. In HTML, it is possible
+ * (but not recommended) to specify the value of an attribute without
+ * quotation marks. Such attribute value may only contain
+ * letters, digits, hyphens (-) and periods (.) .
+ * </p>
+ * <p>
+ * The <code>AttributeList</code> defines a single attribute that additionally
+ * has a pointer referencing the possible subsequent attribute.
+ * The whole structure is just a simple linked list, storing all attributes of
+ * some <code>Element</code>.
+ * Use the <code>getNext()</code> method repeatedly to see all attributes in
+ * the list.
+ * </p>
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public final class AttributeList
+ implements DTDConstants, Serializable
+{
+ /** Maps between type names and they string values. */
+ private static final gnuStringIntMapper mapper =
+ new gnuStringIntMapper()
+ {
+ protected void create()
+ {
+ add("CDATA", DTDConstants.CDATA);
+ add("ENTITY", DTDConstants.ENTITY);
+ add("ENTITIES", DTDConstants.ENTITIES);
+ add("ID", DTDConstants.ID);
+ add("IDREF", DTDConstants.IDREF);
+ add("IDREFS", DTDConstants.IDREFS);
+ add("NAME", DTDConstants.NAME);
+ add("NAMES", DTDConstants.NAMES);
+ add("NMTOKEN", DTDConstants.NMTOKEN);
+ add("NMTOKENS", DTDConstants.NMTOKENS);
+ add("NOTATION", DTDConstants.NOTATION);
+ add("NUMBER", DTDConstants.NUMBER);
+ add("NUMBERS", DTDConstants.NUMBERS);
+ add("NUTOKEN", DTDConstants.NUTOKEN);
+ add("NUTOKENS", DTDConstants.NUTOKENS);
+ }
+ };
+
+ /** Use serialVersionUID for interoperability. */
+ private static final long serialVersionUID = -1361214058742015233L;
+
+ /**
+ * The value of ( = pointer to ) the next attribute in the linked list,
+ * storing all attributes of some Element. Contains null for the
+ * last attribute.
+ */
+ public AttributeList next;
+
+ /**
+ * The name of the attribute. The attribute names are case insensitive.
+ */
+ public String name;
+
+ /**
+ * The default value of this attribute. Equals to null if no default value
+ * is specified.
+ */
+ public String value;
+
+ /**
+ * The explicit set of the allowed values of this attribute. Equals to
+ * null, if this parameter was not specified.
+ * Values, defined in DTD, are case insensitive.
+ */
+ public Vector<?> values;
+
+ /**
+ * The modifier of this attribute. This field contains one of the
+ * following DTD constants:
+ * <ul>
+ * <li> REQUIRED if the attribute value is always required,</li>
+ * <li> IMPLIED if the user agent must supply the default value itself,</li>
+ * <li> FIXED if the attribute value is fixed to some value and cannot
+ * be changed.</li>
+ * <li> DEFAULT if the attribute default value has been supplied.</li>
+ * <li> CURRENT the value that at any point in the document is
+ * the last value supplied for that element. A value is required to be
+ * supplied for the first* occurrence of an element</li>
+ * <li> CONREF specifies the IDREF value of
+ * the reference to content in another location of the document.
+ * The element with this attribute is empty, the content from
+ * that another location must be used instead.</li>
+ * </ul>
+ */
+ public int modifier;
+
+ /**
+ * The type of the attribute. The possible values of this field
+ * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants.
+ */
+ public int type;
+
+ /**
+ * Creates the attribute with the given name, initializing other fields
+ * to the default values ( 0 and null ).
+ *
+ * @param a_name The name of the attribute.
+ */
+ public AttributeList(String a_name)
+ {
+ name = a_name;
+ }
+
+ /**
+ * Creates the attribute with the given properties.
+ * @param a_name The name of the attribute
+ * @param a_type The type of the attribute. The possible values are defined
+ * in <code> DTDConstants</code>.
+ * @param a_modifier The modifier of this attribute. The possible values
+ * are defined in <code> DTDConstants</code>.
+ * @param a_default The default value of this attribute
+ * @param allowed_values The explicit set of the allowed values of
+ * this attribute
+ * @param a_next The value of the subsequent instance of the AttributeList,
+ * representing the next attribute definition for the same element.
+ * Equals to null for the last attribute definition.
+ */
+ public AttributeList(String a_name, int a_type, int a_modifier,
+ String a_default, Vector<?> allowed_values,
+ AttributeList a_next
+ )
+ {
+ this(a_name);
+ type = a_type;
+ modifier = a_modifier;
+ value = a_default;
+ values = allowed_values;
+ next = a_next;
+ }
+
+ /**
+ * Get the modifier of this attribute. This field contains one of the
+ * following DTD constants:
+ * <ul>
+ * <li> REQUIRED if the attribute value is always required,</li>
+ * <li> IMPLIED if the user agent must supply the default value itself,</li>
+ * <li> FIXED if the attribute value is fixed to some value and cannot
+ * be changed.</li>
+ * <li> DEFAULT if the attribute default value has been supplied.</li>
+ * <li> CURRENT the value that at any point in the document is
+ * the last value supplied for that element. A value is required to be
+ * supplied for the first* occurrence of an element</li>
+ * <li> CONREF specifies the IDREF value of
+ * the reference to content in another location of the document.
+ * The element with this attribute is empty, the content from
+ * that another location must be used instead.</li>
+ * </ul>
+ */
+ public int getModifier()
+ {
+ return modifier;
+ }
+
+ /**
+ * Get the name of the attribute.
+ * The value is returned as it was supplied to a
+ * constructor, preserving the character case.
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * Get the value of ( = pointer to ) the next attribute in the linked list,
+ * storing all attributes of some Element. Contains null for the
+ * last attribute.
+ */
+ public AttributeList getNext()
+ {
+ return next;
+ }
+
+ /**
+ * Get the type of the attribute. The possible values of this field
+ * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants.
+ */
+ public int getType()
+ {
+ return type;
+ }
+
+ /**
+ * Get the default value of this attribute.
+ */
+ public String getValue()
+ {
+ return value;
+ }
+
+ /**
+ * Get the allowed values of this attribute.
+ */
+ public Enumeration<?> getValues()
+ {
+ return (values != null) ? values.elements() : null;
+ }
+
+ /**
+ * Converts a string value, representing a valid SGLM attribute type,
+ * into the corresponding value, defined in DTDConstants.
+ * @param typeName the name of the type (character case is ignored).
+ * @return a value from DTDConstants or DTDConstants.ANY if the
+ * string is not representing a known type. The known attribute types
+ * in this implementation are CDATA, ENTITY, ENTITIES, ID, IDREF, IDREFS,
+ * NAME, NAMES, NMTOKEN, NMTOKENS, NOTATION, NUMBER, NUMBERS, NUTOKEN and
+ * NUTOKENS.
+ * @throws NullPointerException if the passed parameter is null.
+ */
+ public static int name2type(String typeName)
+ {
+ return mapper.get(typeName.toUpperCase());
+ }
+
+ /**
+ * Returns the attribute name.
+ */
+ public String toString()
+ {
+ return name;
+ }
+
+ /**
+ * Converts a value from DTDConstants into the string representation.
+ * @param type - an integer value of the public static integer field,
+ * defined in the DTDConstants class.
+ * @return a corresponding SGML DTD keyword (UPPERCASE) or null if there
+ * are no attribute type constant having the given value.
+ */
+ public static String type2name(int type)
+ {
+ return mapper.get(type);
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/ContentModel.java b/libjava/classpath/javax/swing/text/html/parser/ContentModel.java
new file mode 100644
index 000000000..d5c4418de
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/ContentModel.java
@@ -0,0 +1,223 @@
+/* ContentModel.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import gnu.javax.swing.text.html.parser.models.transformer;
+
+import java.io.Serializable;
+
+import java.util.Vector;
+
+/**
+ * A representation of the element content. The instances of this class
+ * can be arranged into the linked list, representing a BNF expression.
+ * The content model is constructed as a branched tree structure in the
+ * following way:
+ * <pre>
+ * a = new ContentModel('+', A, null); // a reprensents A+
+ * b = new ContentModel('&amp;', B, a); // b represents B &amp; A+
+ * c = new ContentModel('*', b, null); // c represents ( B &amp; A+) *
+ * d = new ContentModel('|', new ContentModel('*', A, null),
+ * new ContentModel('?', B, null)); // d represents ( A* | B? )
+ * </pre>
+ * where the valid operations are:
+ * <ul>
+ * <li><code>E* </code> E occurs zero or more times</li>
+ * <li><code>E+ </code> E occurs one or more times</li>
+ * <li><code>E? </code> E occurs once or not atl all</li>
+ * <li><code>A,B</code> A occurs before B</li>
+ * <li><code>A|B</code> both A and B are permitted in any order.
+ * The '|' alone does not permit the repetetive occurence of A or B
+ * (use <code>(A|B)*</code>.</li>
+ * <li><code>A&amp;B</code> both A and B must occur once (in any order)</li>
+ * </ul>
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public final class ContentModel
+ implements Serializable
+{
+ /** Use serialVersionUID for interoperability. */
+ private static final long serialVersionUID = -1130825523866321257L;
+
+ /**
+ * The next content model model ( = pointer to the next element of
+ * the linked list) for the binary expression (',','&amp;' or '|'). Null
+ * for the last element in the list.
+ */
+ public ContentModel next;
+
+ /**
+ * The document content, containing either Element or the enclosed
+ * content model (that would be in the parentheses in BNF expression).
+ */
+ public Object content;
+
+ /**
+ * Specifies the BNF operation between this node and the node,
+ * stored in the field <code>next</code> (or for this node, if it is
+ * an unary operation.
+ */
+ public int type;
+
+ /**
+ * Create a content model initializing all fields to default values.
+ */
+ public ContentModel()
+ {
+ // Nothing to do here.
+ }
+
+ /**
+ * Create a content model, consisting of the single element.
+ * Examples:
+ *<code>
+ * a = new ContentModel('+', A, null); // a reprensents A+
+ * b = new ContentModel('&amp;', B, a); // b represents B &amp; A+
+ * c = new ContentModel('*', b, null); // c represents ( B &amp; A+) *
+ * d = new ContentModel('|', A,
+ * new ContentModel('?',b, null);
+ * // d represents
+ * </code>
+ */
+ public ContentModel(Element a_content)
+ {
+ content = a_content;
+ }
+
+ /**
+ * Create a content model, involving expression of the given type.
+ * @param a_type The expression operation type ('*','?' or '+'
+ * @param a_content The content for that the expression is applied.
+ */
+ public ContentModel(int a_type, ContentModel a_content)
+ {
+ content = a_content;
+ type = a_type;
+ }
+
+ /**
+ * Create a content model, involving binary expression of the given type.
+ * @param a_type The expression operation type ( ',', '|' or '&amp;').
+ * @param a_content The content of the left part of the expression.
+ * @param a_next The content model, representing the right part of the
+ * expression.
+ */
+ public ContentModel(int a_type, Object a_content, ContentModel a_next)
+ {
+ content = a_content;
+ type = a_type;
+ next = a_next;
+ }
+
+ /**
+ * Adds all list elements to the given vector, ignoring the
+ * operations between the elements. The old vector values are not
+ * discarded.
+ * @param elements - a vector to add the values to.
+ */
+ public void getElements(Vector<Element> elements)
+ {
+ ContentModel c = this;
+
+ while (c != null)
+ {
+ // FIXME: correct?
+ if (c.content instanceof Element)
+ elements.add((Element) c.content);
+ c = c.next;
+ }
+ }
+
+ /**
+ * Checks if the content model matches an empty input stream.
+ * The empty content is created using SGML DTD keyword EMPTY.
+ * The empty model is a model with the content field equal to null.
+ *
+ * @return true if the content field is equal to null.
+ */
+ public boolean empty()
+ {
+ return content == null;
+ }
+
+ /**
+ * Get the element, stored in the <code>next.content</code>.
+ * The method is programmed as the part of the standard API, but not
+ * used in this implementation.
+ * @return the value of the field <code>next</code>.
+ */
+ public Element first()
+ {
+ return (Element) next.content;
+ }
+
+ /**
+ * Checks if this object can potentially be the first token in the
+ * ContenModel list. The method is programmed as the part of the
+ * standard API, but not used in this implementation.
+ */
+ public boolean first(Object token)
+ {
+ ContentModel c = this;
+ while (c.next != null)
+ {
+ if (c.content != null && c.content.toString().equals(token.toString()) &&
+ c.type != ','
+ )
+
+ // Agree if the operation with the preceeding element
+ // is not the comma operation.
+ return true;
+ c = c.next;
+ }
+ return false;
+ }
+
+ /**
+ * Returns a string representation (an expression) of this content model.
+ * The expression has BNF-like syntax, except the absence of the
+ * unary operator is additionally indicated by " ' ". It is
+ * advisable to check the created models for correctness using this
+ * method.
+ */
+ public String toString()
+ {
+ return transformer.transform(this).toString();
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/DTD.java b/libjava/classpath/javax/swing/text/html/parser/DTD.java
new file mode 100644
index 000000000..09b50fee7
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/DTD.java
@@ -0,0 +1,609 @@
+/* DTD.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.BitSet;
+import java.util.Hashtable;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+/**
+ * <p>Representation or the SGML DTD document.
+ * Provides basis for describing a syntax of the
+ * HTML documents. The fields of this class are NOT initialized in
+ * constructor. You need to do this separately before passing this data
+ * structure to the HTML parser. The subclasses with the fields, pre-
+ * initialized, for example, for HTML 4.01, can be available only between
+ * the implementation specific classes
+ * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
+ * in this implementation).</p>
+ * <p>
+ * If you need more information about SGML DTD documents,
+ * the author suggests to read SGML tutorial on
+ * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
+ * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
+ * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
+ * Oxford University Press, 688 p, ISBN: 0198537379.
+ * </p>
+ * <p>
+ * Warning: the html, head and other tag fields will only be automatically
+ * assigned if the VM has the correctly implemented reflection mechanism.
+ * As these fields are not used anywhere in the implementation, not
+ * exception will be thrown in the opposite case.
+ * </p>
+ *
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class DTD
+ implements DTDConstants
+{
+ /**
+ * The version of the persistent data format.
+ * @specnote This was made <code>final</code> in 1.5.
+ */
+ public static final int FILE_VERSION = 1;
+
+ /**
+ * The table of existing available DTDs.
+ */
+ static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>();
+
+ /**
+ * The applet element for this DTD.
+ */
+ public Element applet;
+
+ /**
+ * The base element for this DTD.
+ */
+ public Element base;
+
+ /**
+ * The body element for this DTD.
+ */
+ public Element body;
+
+ /**
+ * The head element for this DTD.
+ */
+ public Element head;
+
+ /**
+ * The html element for this DTD.
+ */
+ public Element html;
+
+ /**
+ * The isindex element of for this DTD.
+ */
+ public Element isindex;
+
+ /**
+ * The meta element for this DTD.
+ */
+ public Element meta;
+
+ /**
+ * The p element for this DTD.
+ */
+ public Element p;
+
+ /**
+ * The param element for this DTD.
+ */
+ public Element param;
+
+ /**
+ * The pcdata for this DTD.
+ */
+ public Element pcdata;
+
+ /**
+ * The title element for this DTD.
+ */
+ public Element title;
+
+ /**
+ * The element for accessing all DTD elements by name.
+ */
+ public Hashtable<String,Element> elementHash =
+ new Hashtable<String,Element>();
+
+ /**
+ * The entity table for accessing all DTD entities by name.
+ */
+ public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>();
+
+ /**
+ * The name of this DTD.
+ */
+ public String name;
+
+ /**
+ * Contains all elements in this DTD. The
+ * javax.swing.text.html.parser.Element#index field of all elements
+ * in this vector is set to the element position in this vector.
+ */
+ public Vector<Element> elements = new Vector<Element>();
+
+ /** Create a new DTD with the specified name. */
+ protected DTD(String a_name)
+ {
+ name = a_name;
+ }
+
+ /** Get this DTD by name. The current implementation
+ * only looks in the internal table of DTD documents. If no corresponding
+ * entry is found, the new entry is created, placed into
+ * the table and returned. */
+ public static DTD getDTD(String name)
+ throws IOException
+ {
+ DTD d = dtdHash.get(name);
+
+ if (d == null)
+ {
+ d = new DTD(name);
+ dtdHash.put(d.name, d);
+ }
+
+ return d;
+ }
+
+ /**
+ * Get the element by the element name. If the element is not yet
+ * defined, it is newly created and placed into the element table.
+ * If the element name matches (ingoring case) a public non static
+ * element field in this class, this field is assigned to the value
+ * of the newly created element.
+ */
+ public Element getElement(String element_name)
+ {
+ return newElement(element_name);
+ }
+
+ /**
+ * Get the element by the value of its
+ * {@link javax.swing.text.html.parser.Element#index} field.
+ */
+ public Element getElement(int index)
+ {
+ return elements.get(index);
+ }
+
+ /**
+ * Get the entity with the given identifier.
+ * @param id that can be returned by
+ * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
+ * @return The entity from this DTD or null if there is no entity with
+ * such id or such entity is not present in the table of this instance.
+ */
+ public Entity getEntity(int id)
+ {
+ String name = Entity.mapper.get(id);
+
+ if (name != null)
+ return entityHash.get(name);
+ else
+ return null;
+ }
+
+ /**
+ * Get the named entity by its name.
+ */
+ public Entity getEntity(String entity_name)
+ {
+ return entityHash.get(entity_name);
+ }
+
+ /**
+ * Get the name of this instance of DTD
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * Creates, adds into the entity table and returns the
+ * character entity like <code>&amp;lt;</code>
+ * (means '<code>&lt;</code>' );
+ * @param name The entity name (without heading &amp; and closing ;)
+ * @param type The entity type
+ * @param character The entity value (single character)
+ * @return The created entity
+ */
+ public Entity defEntity(String name, int type, int character)
+ {
+ Entity e = newEntity(name, type);
+ e.data = new char[] { (char) character };
+ return e;
+ }
+
+ /**
+ * Define the attributes for the element with the given name.
+ * If the element is not exist, it is created.
+ * @param forElement
+ * @param attributes
+ */
+ public void defineAttributes(String forElement, AttributeList attributes)
+ {
+ Element e = elementHash.get(forElement.toLowerCase());
+
+ if (e == null)
+ e = newElement(forElement);
+
+ e.atts = attributes;
+ }
+
+ /**
+ * Defines the element and adds it to the element table. Sets the
+ * <code>Element.index</code> field to the value, unique for this
+ * instance of DTD. If the element with the given name already exists,
+ * replaces all other its settings by the method argument values.
+ * @param name the name of the element
+ * @param type the type of the element
+ * @param headless true if the element needs no starting tag
+ * (should not occur in HTML).
+ * @param tailless true if the element needs no ending tag (like
+ * <code>&lt;hr&gt;</code>
+ * @param content the element content
+ * @param exclusions the set of elements that must not occur inside
+ * this element. The <code>Element.index</code> value defines which
+ * bit in this bitset corresponds to that element.
+ * @param inclusions the set of elements that can occur inside this
+ * element. the <code>Element.index</code> value defines which
+ * bit in this bitset corresponds to that element.
+ * @param attributes the element attributes.
+ * @return the newly defined element.
+ */
+ public Element defineElement(String name, int type, boolean headless,
+ boolean tailless, ContentModel content,
+ BitSet exclusions, BitSet inclusions,
+ AttributeList attributes
+ )
+ {
+ Element e = newElement(name);
+ e.type = type;
+ e.oStart = headless;
+ e.oEnd = tailless;
+ e.content = content;
+ e.exclusions = exclusions;
+ e.inclusions = inclusions;
+ e.atts = attributes;
+
+ return e;
+ }
+
+ /**
+ * Creates, intializes and adds to the entity table the new
+ * entity.
+ * @param name the name of the entity
+ * @param type the type of the entity
+ * @param data the data section of the entity
+ * @return the created entity
+ */
+ public Entity defineEntity(String name, int type, char[] data)
+ {
+ Entity e = newEntity(name, type);
+ e.data = data;
+
+ return e;
+ }
+
+ /** Place this DTD into the DTD table. */
+ public static void putDTDHash(String name, DTD dtd)
+ {
+ dtdHash.put(name, dtd);
+ }
+
+ /**
+ * <p>Reads DTD from an archived format. This format is not standardized
+ * and differs between implementations.</p><p> This implementation
+ * reads and defines all entities and elements using
+ * ObjectInputStream. The elements and entities can be written into the
+ * stream in any order. The objects other than elements and entities
+ * are ignored.</p>
+ * @param stream A data stream to read from.
+ * @throws java.io.IOException If one is thrown by the input stream
+ */
+ public void read(DataInputStream stream)
+ throws java.io.IOException
+ {
+ ObjectInputStream oi = new ObjectInputStream(stream);
+ Object def;
+ try
+ {
+ while (true)
+ {
+ def = oi.readObject();
+ if (def instanceof Element)
+ {
+ Element e = (Element) def;
+ elementHash.put(e.name.toLowerCase(), e);
+ assignField(e);
+ }
+ else if (def instanceof Entity)
+ {
+ Entity e = (Entity) def;
+ entityHash.put(e.name, e);
+ }
+ }
+ }
+ catch (ClassNotFoundException ex)
+ {
+ throw new IOException(ex.getMessage());
+ }
+ catch (EOFException ex)
+ {
+ // ok EOF
+ }
+ }
+
+ /**
+ * Returns the name of this instance of DTD.
+ */
+ public String toString()
+ {
+ return name;
+ }
+
+ /**
+ * Creates and returns new attribute (not an attribute list).
+ * @param name the name of this attribute
+ * @param type the type of this attribute (FIXED, IMPLIED or
+ * REQUIRED from <code>DTDConstants</code>).
+ * @param modifier the modifier of this attribute
+ * @param default_value the default value of this attribute
+ * @param allowed_values the allowed values of this attribute. The multiple
+ * possible values in this parameter are supposed to be separated by
+ * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
+ * can be null if no list of allowed values is specified.
+ * @param atts the previous attribute of this element. This is
+ * placed to the field
+ * {@link javax.swing.text.html.parser.AttributeList#next },
+ * creating a linked list.
+ * @return The attributes.
+ */
+ protected AttributeList defAttributeList(String name, int type, int modifier,
+ String default_value,
+ String allowed_values,
+ AttributeList atts
+ )
+ {
+ AttributeList al = new AttributeList(name);
+ al.modifier = modifier;
+ al.value = default_value;
+ al.next = atts;
+
+ if (allowed_values != null)
+ {
+ StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
+ Vector<String> v = new Vector<String>(st.countTokens());
+
+ while (st.hasMoreTokens())
+ v.add(st.nextToken());
+
+ al.values = v;
+ }
+
+ return al;
+ }
+
+ /**
+ * Creates a new content model.
+ * @param type specifies the BNF operation for this content model.
+ * The valid operations are documented in the
+ * {@link javax.swing.text.html.parser.ContentModel#type }.
+ * @param content the content of this content model
+ * @param next if the content model is specified by BNF-like
+ * expression, contains the rest of this expression.
+ * @return The newly created content model.
+ */
+ protected ContentModel defContentModel(int type, Object content,
+ ContentModel next
+ )
+ {
+ ContentModel model = new ContentModel();
+ model.type = type;
+ model.next = next;
+ model.content = content;
+
+ return model;
+ }
+
+ /**
+ * Defines a new element and adds it to the element table.
+ * If the element alredy exists,
+ * overrides it settings with the specified values.
+ * @param name the name of the new element
+ * @param type the type of the element
+ * @param headless true if the element needs no starting tag
+ * @param tailless true if the element needs no closing tag
+ * @param content the element content.
+ * @param exclusions the elements that must be excluded from the
+ * content of this element, in all levels of the hierarchy.
+ * @param inclusions the elements that can be included as the
+ * content of this element.
+ * @param attributes the element attributes.
+ * @return the created or updated element.
+ */
+ protected Element defElement(String name, int type, boolean headless,
+ boolean tailless, ContentModel content,
+ String[] exclusions, String[] inclusions,
+ AttributeList attributes
+ )
+ {
+ // compute the bit sets
+ BitSet exclude = bitSet(exclusions);
+ BitSet include = bitSet(inclusions);
+
+ Element e =
+ defineElement(name, type, headless, tailless, content, exclude, include,
+ attributes
+ );
+
+ return e;
+ }
+
+ /**
+ * Creates, intializes and adds to the entity table the new
+ * entity.
+ * @param name the name of the entity
+ * @param type the type of the entity
+ * @param data the data section of the entity
+ * @return the created entity
+ */
+ protected Entity defEntity(String name, int type, String data)
+ {
+ Entity e = newEntity(name, type);
+ e.data = data.toCharArray();
+
+ return e;
+ }
+
+ private void assignField(Element e)
+ {
+ String element_name = e.name;
+ try
+ {
+ // Assign the field via reflection.
+ Field f = getClass().getField(element_name.toLowerCase());
+ if ((f.getModifiers() & Modifier.PUBLIC) != 0)
+ if ((f.getModifiers() & Modifier.STATIC) == 0)
+ if (f.getType().isAssignableFrom(e.getClass()))
+ f.set(this, e);
+ }
+ catch (IllegalAccessException ex)
+ {
+ unexpected(ex);
+ }
+ catch (NoSuchFieldException ex)
+ {
+ // This is ok.
+ }
+
+ // Some virtual machines may still lack the proper
+ // implementation of reflection. As the tag fields
+ // are not used anywhere in this implementation,
+ // (and this class is also rarely used by the end user),
+ // it may be better not to crash everything by throwing an error
+ // for each case when the HTML parsing is required.
+ catch (Throwable t)
+ {
+ // This VM has no reflection mechanism implemented!
+ if (t instanceof OutOfMemoryError)
+ throw (Error) t;
+ }
+ }
+
+ /**
+ * Create the bit set for this array of elements.
+ * The unknown elements are automatically defined and added
+ * to the element table.
+ * @param elements
+ * @return The bit set.
+ */
+ private BitSet bitSet(String[] elements)
+ {
+ BitSet b = new BitSet();
+
+ for (int i = 0; i < elements.length; i++)
+ {
+ Element e = getElement(elements [ i ]);
+
+ if (e == null)
+ e = newElement(elements [ i ]);
+
+ b.set(e.index);
+ }
+
+ return b;
+ }
+
+ /**
+ * Find the element with the given name in the element table.
+ * If not find, create a new element with this name and add to the
+ * table.
+ * @param name the name of the element
+ * @return the found or created element.
+ */
+ private Element newElement(String name)
+ {
+ Element e = elementHash.get(name.toLowerCase());
+
+ if (e == null)
+ {
+ e = new Element();
+ e.name = name;
+ e.index = elements.size();
+ elements.add(e);
+ elementHash.put(e.name.toLowerCase(), e);
+ assignField(e);
+ }
+ return e;
+ }
+
+ /**
+ * Creates and adds to the element table the entity with an
+ * unitialized data section. Used internally.
+ * @param name the name of the entity
+ * @param type the type of the entity, a bitwise combination
+ * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
+ *
+ * @return the created entity
+ */
+ private Entity newEntity(String name, int type)
+ {
+ Entity e = new Entity(name, type, null);
+ entityHash.put(e.name, e);
+ return e;
+ }
+
+ private void unexpected(Exception ex)
+ {
+ throw new Error("This should never happen, report a bug", ex);
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java b/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java
new file mode 100644
index 000000000..75e7afb4d
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java
@@ -0,0 +1,292 @@
+/* DTDConstants.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+/**
+ * <p>This class defines the SGML basic types, used for describing HTML 4.01
+ * at <a href="http://www.w3.org/TR/html4/types.html"
+ * >http://www.w3.org/TR/html4/types.html</a>. Not all constants,
+ * defined here, are actually used in HTML 4.01 SGML specification. Some others
+ * are defined just as part of the required implementation.
+ * </p>
+ * <p>
+ * If you need more information about SGML DTD documents,
+ * the author suggests to read SGML tutorial on
+ * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
+ * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
+ * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
+ * Oxford University Press, 688 p, ISBN: 0198537379.
+ * </p>
+ *
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public interface DTDConstants
+{
+ /* ----- The data types, used in HTML 4.01 SGML definition: ---- */
+
+ /**
+ * The CDATA (Character data) constant, specifes the content model,
+ * consisting of characters only. In SGML for HTML 4.01, the character
+ * entities must be replaced by characters, the line feeds must be
+ * ignored and any number of the subsequent carriage returns or tabs
+ * must be replaced by a single space.
+ */
+ int CDATA = 1;
+
+ /**
+ * The EMPTY constant, means the element with no content.
+ */
+ int EMPTY = 17;
+
+ /**
+ * The ID constant, means that the token is the unique identifier.
+ * This identifier can be referenced by attribute with value of IDREF.
+ * The identifier must begin with letter, followed by any number of
+ * letters, digits, hyphens, underscores, colons and periods.
+ */
+ int ID = 4;
+
+ /**
+ * The IDREF constant, specifies reference to a valid ID within
+ * the document.
+ */
+ int IDREF = 5;
+
+ /**
+ * The IDREFS constant, a space separated list of IDREFs
+ */
+ int IDREFS = 6;
+
+ /**
+ * The NAME constant, means the token that
+ * must begin with letter, followed by any number of
+ * letters, digits, hyphens, underscores, colons and periods.
+ */
+ int NAME = 7;
+
+ /**
+ * The NAMES constant, specifies a space separated of NAMEs.
+ */
+ int NAMES = 8;
+
+ /**
+ * The NMTOKEN constant, specifies the attribute, consisting of
+ * characters that can be either digits or alphabetic characters).
+ */
+ int NMTOKEN = 9;
+
+ /**
+ * The NMTOKENS constant, specifies a list of NMTOKENs.
+ */
+ int NMTOKENS = 10;
+
+ /**
+ * The NOTATION constant, a previously defined data type.
+ */
+ int NOTATION = 11;
+
+ /**
+ * The NUMBER constant (means that the attribute consists of at least
+ * one decimal digit).
+ */
+ int NUMBER = 12;
+
+ /**
+ * The NUMBERS constant, specifies a space separated list of NUMBERs.
+ */
+ int NUMBERS = 13;
+
+ /**
+ * The NUTOKEN constant.
+ */
+ int NUTOKEN = 14;
+
+ /**
+ * The NUTOKENS constant.
+ */
+ int NUTOKENS = 15;
+
+ /* -------
+ The entity scope constants.
+ As these four constants are combined with the bitwise OR,
+ they are defined in the hexadecimal notation.
+ The reason of setting the two bits at once (for PUBLIC and SYSTEM)
+ is probably historical. ----- */
+
+ /**
+ * The PUBLIC constant, specifies the public entity. The PUBLIC entities
+ * are assumed to be known to many systems so that a full declaration
+ * need not be transmitted. For example,
+ * &lt;!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN"&gt;
+ */
+ int PUBLIC = 0xA;
+
+ /**
+ * The SYSTEM constant, specifies the system entitiy. The system entities
+ * are assumed to be known but require the clear identifer
+ * (like the file path), where they can be found in the system.
+ * For example, <code>
+ * &lt;DOCTYPE html SYSTEM "/path/to/file.dtd"&gt; </code>.
+ */
+ int SYSTEM = 0x11;
+
+ /**
+ * The PARAMETER constant, specifies that entity is only valid
+ * inside SGML DTD scope.
+ */
+ int PARAMETER = 0x40000;
+
+ /**
+ * The GENERAL constant, specifies theat the entity is valid in the
+ * whole HTML document scope.
+ */
+ int GENERAL = 0x10000;
+
+ /* ---- The constants, defining if the element attribute is required,
+ fixed or implied. ---- */
+
+ /**
+ * The attribute modifier #REQUIRED constant, indicates that the
+ * value must be supplied.
+ */
+ int REQUIRED = 2;
+
+ /**
+ * The attribute modifier #FIXED constant, means that the attribute has
+ * the fixed value that cannot be changed.
+ */
+ int FIXED = 1;
+
+ /**
+ * The attribute modifier #IMPLIED constant,
+ * indicating that for this attribute the user agent must provide
+ * the value itself.
+ */
+ int IMPLIED = 5;
+
+ /**
+ * The attribute modifier #CURRENT constant, specifies the value
+ * that at any point in the document is the last value supplied for
+ * that element. A value is required to be supplied for the first
+ * occurrence of an element
+ */
+ int CURRENT = 3;
+
+ /**
+ * The attribute modifier #CONREF constant, specifies the IDREF value of
+ * the reference to content in another location of the document.
+ * The element with this attribute is empty, the content from
+ * that another location must be used instead.
+ */
+ int CONREF = 4;
+
+ /* ----- Constants, defining if the element
+ start and end tags are required. ---- */
+
+ /**
+ * The STARTTAG, meaning that the element needs a starting tag.
+ */
+ int STARTTAG = 13;
+
+ /**
+ * The ENDTAG constant, meaning that the element needs a closing tag.
+ */
+ int ENDTAG = 14;
+
+ /* ----- Other constants: ----- */
+
+ /**
+ * The ANY constant, specifies
+ * an attribute, consisting from arbitrary characters.
+ */
+ int ANY = 19;
+
+ /**
+ * The DEFAULT constant, specifies the default value.
+ */
+ int DEFAULT = 131072;
+
+ /**
+ * The ENTITIES constant (list of ENTITYes)
+ */
+ int ENTITIES = 3;
+
+ /**
+ * The ENTITY constant, meaning the numeric or symbolic name of some
+ * HTML data.
+ */
+ int ENTITY = 2;
+
+ /**
+ * The MD constant.
+ */
+ int MD = 16;
+
+ /**
+ * The MODEL constant.
+ */
+ int MODEL = 18;
+
+ /**
+ * The MS constant.
+ */
+ int MS = 15;
+
+ /**
+ * The PI (Processing Instruction) constant, specifies a processing
+ * instruction. Processing instructions are used to embed information
+ * intended for specific applications.
+ */
+ int PI = 12;
+
+ /**
+ * The RCDATA constant (Entity References and Character Data), specifies
+ * the content model, consisting of characters AND entities. The
+ * "&lt;" is threated as an ordinary character, but
+ * "<code>&amp;name;</code>" still means the general entity with
+ * the given name.
+ */
+ int RCDATA = 16;
+
+ /**
+ * The SDATA constant. Means that the value contains the entity name
+ * and the replacement value of a character entity reference.
+ */
+ int SDATA = 11;
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java b/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java
new file mode 100644
index 000000000..f717d69cb
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java
@@ -0,0 +1,268 @@
+/* DocumentParser.java -- A parser for HTML documents.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import javax.swing.text.html.parser.Parser;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import javax.swing.text.BadLocationException;
+import javax.swing.text.SimpleAttributeSet;
+import javax.swing.text.html.HTMLEditorKit;
+
+/**
+ * <p>A simple error-tolerant HTML parser that uses a DTD document
+ * to access data on the possible tokens, arguments and syntax.</p>
+ * <p> The parser reads an HTML content from a Reader and calls various
+ * notifying methods (which should be overridden in a subclass)
+ * when tags or data are encountered.</p>
+ * <p>Some HTML elements need no opening or closing tags. The
+ * task of this parser is to invoke the tag handling methods also when
+ * the tags are not explicitly specified and must be supposed using
+ * information, stored in the DTD.
+ * For example, parsing the document
+ * <p>&lt;table&gt;&lt;tr&gt;&lt;td&gt;a&lt;td&gt;b&lt;td&gt;c&lt;/tr&gt; <br>
+ * will invoke exactly the handling methods exactly in the same order
+ * (and with the same parameters) as if parsing the document: <br>
+ * <em>&lt;html&gt;&lt;head&gt;&lt;/head&gt;&lt;body&gt;&lt;table&gt;&lt;
+ * tbody&gt;</em>&lt;tr&gt;&lt;td&gt;a<em>&lt;/td&gt;</em>&lt;td&gt;b<em>
+ * &lt;/td&gt;</em>&lt;td&gt;c<em>&lt;/td&gt;&lt;/tr&gt;</em>&lt;
+ * <em>/tbody&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;</em></p>
+ * (supposed tags are given in italics). The parser also supports
+ * obsolete elements of HTML syntax.<p>
+ * </p>
+ * In this implementation, DocumentParser is directly derived from its
+ * ancestor without changes of functionality.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class DocumentParser
+ extends Parser
+ implements DTDConstants
+{
+ /**
+ * The enclosed working parser class.
+ */
+ private class gnuParser
+ extends gnu.javax.swing.text.html.parser.support.Parser
+ {
+ private gnuParser(DTD d)
+ {
+ super(d);
+ }
+
+ protected final void handleComment(char[] comment)
+ {
+ parser.handleComment(comment);
+ callBack.handleComment(comment, hTag.where.startPosition);
+ }
+
+ protected final void handleEmptyTag(TagElement tag)
+ throws javax.swing.text.ChangedCharSetException
+ {
+ parser.handleEmptyTag(tag);
+ callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(),
+ hTag.where.startPosition
+ );
+ }
+
+ protected final void handleEndTag(TagElement tag)
+ {
+ parser.handleEndTag(tag);
+ callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition);
+ }
+
+ protected final void handleError(int line, String message)
+ {
+ parser.handleError(line, message);
+ callBack.handleError(message, hTag.where.startPosition);
+ }
+
+ protected final void handleStartTag(TagElement tag)
+ {
+ parser.handleStartTag(tag);
+ SimpleAttributeSet attributes = gnu.getAttributes();
+
+ if (tag.fictional())
+ attributes.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED,
+ Boolean.TRUE
+ );
+
+ callBack.handleStartTag(tag.getHTMLTag(), attributes,
+ hTag.where.startPosition
+ );
+ }
+
+ protected final void handleText(char[] text)
+ {
+ parser.handleText(text);
+ callBack.handleText(text, hTag.where.startPosition);
+ }
+
+ DTD getDTD()
+ {
+ return dtd;
+ }
+ }
+
+ /**
+ * This field is used to access the identically named
+ * methods of the outer class.
+ * This is package-private to avoid an accessor method.
+ */
+ DocumentParser parser = this;
+
+ /**
+ * The callback.
+ * This is package-private to avoid an accessor method.
+ */
+ HTMLEditorKit.ParserCallback callBack;
+
+ /**
+ * The reference to the working class of HTML parser that is
+ * actually used to parse the document.
+ * This is package-private to avoid an accessor method.
+ */
+ gnuParser gnu;
+
+ /**
+ * Creates a new parser that uses the given DTD to access data on the
+ * possible tokens, arguments and syntax. There is no single - step way
+ * to get a default DTD; you must either refer to the implementation -
+ * specific packages, write your own DTD or obtain the working instance
+ * of parser in other way, for example, by calling
+ * {@link javax.swing.text.html.HTMLEditorKit#getParser()}.
+ *
+ * @param a_dtd a DTD to use.
+ */
+ public DocumentParser(DTD a_dtd)
+ {
+ super(a_dtd);
+ gnu = new gnuParser(a_dtd);
+ }
+
+ /**
+ * Parses the HTML document, calling methods of the provided
+ * callback. This method must be multithread - safe.
+ * @param reader The reader to read the HTML document from
+ * @param aCallback The callback that is notifyed about the presence
+ * of HTML elements in the document.
+ * @param ignoreCharSet If thrue, any charset changes during parsing
+ * are ignored.
+ * @throws java.io.IOException
+ */
+ public void parse(Reader reader, HTMLEditorKit.ParserCallback aCallback,
+ boolean ignoreCharSet
+ )
+ throws IOException
+ {
+ callBack = aCallback;
+ gnu.parse(reader);
+
+ callBack.handleEndOfLineString(gnu.getEndOfLineSequence());
+ try
+ {
+ callBack.flush();
+ }
+ catch (BadLocationException ex)
+ {
+ // Convert this into the supported type of exception.
+ throw new IOException(ex.getMessage());
+ }
+ }
+
+ /**
+ * Handle HTML comment. The default method returns without action.
+ * @param comment the comment being handled
+ */
+ protected void handleComment(char[] comment)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Handle the tag with no content, like &lt;br&gt;. The method is
+ * called for the elements that, in accordance with the current DTD,
+ * has an empty content.
+ * @param tag the tag being handled.
+ * @throws javax.swing.text.ChangedCharSetException
+ */
+ protected void handleEmptyTag(TagElement tag)
+ throws javax.swing.text.ChangedCharSetException
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * The method is called when the HTML closing tag ((like &lt;/table&gt;)
+ * is found or if the parser concludes that the one should be present
+ * in the current position.
+ * @param tag The tag being handled
+ */
+ protected void handleEndTag(TagElement tag)
+ {
+ // This default implementation does nothing.
+ }
+
+ /* Handle error that has occured in the given line. */
+ protected void handleError(int line, String message)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * The method is called when the HTML opening tag ((like &lt;table&gt;)
+ * is found or if the parser concludes that the one should be present
+ * in the current position.
+ * @param tag The tag being handled
+ */
+ protected void handleStartTag(TagElement tag)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Handle the text section.
+ * @param text a section text.
+ */
+ protected void handleText(char[] text)
+ {
+ // This default implementation does nothing.
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/Element.java b/libjava/classpath/javax/swing/text/html/parser/Element.java
new file mode 100644
index 000000000..c07c07f54
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/Element.java
@@ -0,0 +1,317 @@
+/* Element.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper;
+
+import java.io.Serializable;
+
+import java.util.BitSet;
+
+/**
+ * <p>
+ * Stores the element information, obtained by parsing SGML DTD
+ * tag <code>&lt;!ELEMENT .. &gt;</code>. This class has no public
+ * constructor and can only be instantiated using the
+ * {@link javax.swing.text.html.parser.DTD } methods</p>
+ *
+ * <p>SGML defines elements that represent structures or
+ * behavior. An element typically consists of a start tag, content, and an
+ * end tag. Hence the elements are not tags. The HTML 4.0 definition specifies
+ * that some elements are not required to have the end tags. Also, some
+ * HTML elements (like <code>&lt;hr&gt;</code>) have no content. Element names
+ * are case sensitive.</p>
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public final class Element
+ implements DTDConstants, Serializable
+{
+ /**
+ * Package level mapper between type names and they string values.
+ */
+ static final gnuStringIntMapper mapper =
+ new gnuStringIntMapper()
+ {
+ protected void create()
+ {
+ add("CDATA", DTDConstants.CDATA);
+ add("RCDATA", DTDConstants.RCDATA);
+ add("EMPTY", DTDConstants.EMPTY);
+ add("ANY", DTDConstants.ANY);
+ }
+ };
+
+ /** Use serialVersionUID for interoperability. */
+ private static final long serialVersionUID = -6717939384601675586L;
+
+ /**
+ * The element attributes.
+ */
+ public AttributeList atts;
+
+ /**
+ * Contains refernces to elements that must NOT occur inside this element,
+ * at any level of hierarchy.
+ */
+ public BitSet exclusions;
+
+ /**
+ * Contains refernces to elements that must CAN occur inside this element,
+ * at any level of hierarchy.
+ */
+ public BitSet inclusions;
+
+ /**
+ * The content model, defining elements, entities and DTD text
+ * that may/may not occur inside this element.
+ */
+ public ContentModel content;
+
+ /**
+ * A field to store additional user data for this Element.
+ */
+ public Object data;
+
+ /**
+ * The element name.
+ */
+ public String name;
+
+ /**
+ * True is this element need not to have the closing tag, false
+ * otherwise. The HTML 4.0 definition specifies
+ * that some elements (like <code>&lt;hr&gt;</code>are
+ * not required to have the end tags.
+ */
+ public boolean oEnd;
+
+ /**
+ * True is this element need not to have the starting tag, false
+ * otherwise. The HTML 4.0 definition specifies
+ * that some elements (like <code>&lt;head&gt;</code> or
+ * <code>&lt;body&gt;</code>) are
+ * not required to have the start tags.
+
+ */
+ public boolean oStart;
+
+ /**
+ * This field contains the unique integer identifier of this Element,
+ * used to refer the element (more exactly, the element flag)
+ * in <code>inclusions</code> and <code>exclusions</code> bit set.
+ */
+ public int index;
+
+ /**
+ * The element type, containing value, defined in DTDConstants.
+ * In this implementation, the element type can be
+ * CDATA, RCDATA, EMPTY or ANY.
+ */
+ public int type;
+
+ /**
+ * The default constructor must have package level access in this
+ * class. Use DTD.defineElement(..) to create an element when required.
+ */
+ Element()
+ {
+ // Nothing to do here.
+ }
+
+ /**
+ * Converts the string representation of the element type
+ * into its unique integer identifier, defined in DTDConstants.
+ * @param a_type A name of the type
+ * @return DTDConstants.CDATA, DTDConstants.RCDATA, DTDConstants.EMPTY,
+ * DTDConstants.ANY or null if the type name is not
+ * "CDATA", "RCDATA", "EMPTY" or "ANY". This function is case sensitive.
+ * @throws NullPointerException if <code>a_type</code> is null.
+ */
+ public static int name2type(String a_type)
+ {
+ return mapper.get(a_type);
+ }
+
+ /**
+ * Get the element attribute by name.
+ * @param attribute the attribute name, case insensitive.
+ * @return the correspoding attribute of this element. The class,
+ * for storing as attribute list, as a single attribute, is used to
+ * store a single attribute in this case.
+ * @throws NullPointerException if the attribute name is null.
+ */
+ public AttributeList getAttribute(String attribute)
+ {
+ AttributeList a = atts;
+
+ while (a != null && !attribute.equalsIgnoreCase(a.name))
+ a = a.next;
+
+ return a;
+ }
+
+ /**
+ * Get the element attribute by its value.
+ * @param a_value the attribute value, case insensitive.
+ * @return the correspoding attribute of this element. The class,
+ * for storing as attribute list, as a single attribute, is used to
+ * store a single attribute in this case. If there are several
+ * attributes with the same value, there is no garranty, which one
+ * is returned.
+ */
+ public AttributeList getAttributeByValue(String a_value)
+ {
+ AttributeList a = atts;
+
+ if (a_value == null)
+ {
+ while (a != null)
+ {
+ if (a.value == null)
+ return a;
+
+ a = a.next;
+ }
+ }
+ else
+ {
+ while (a != null)
+ {
+ if (a.value != null && a_value.equalsIgnoreCase(a.value))
+ return a;
+
+ a = a.next;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Get all attributes of this document as an attribute list.
+ * @return The attribute list.
+ */
+ public AttributeList getAttributes()
+ {
+ return atts;
+ }
+
+ /**
+ * Get the content model, defining elements, entities and DTD text
+ * that may/may not occur inside this element.
+ */
+ public ContentModel getContent()
+ {
+ return content;
+ }
+
+ /**
+ * Returns true for the element with no content.
+ * Empty elements are defined with the SGML DTD keyword "EMPTY".
+ * @return true if content model field (content) method is equal to
+ * null or its method empty() returns true.
+ */
+ public boolean isEmpty()
+ {
+ return content == null || content.empty();
+ }
+
+ /**
+ * Get the unique integer identifier of this Element,
+ * used to refer the element (more exactly, the element flag)
+ * in <code>inclusions</code> and <code>exclusions</code> bit set.
+ * WARNING: This value may not be the same between different
+ * implementations.
+ */
+ public int getIndex()
+ {
+ return index;
+ }
+
+ /**
+ * Get the element name.
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * Get the element type.
+ * @return one of the values, defined DTDConstants.
+ * In this implementation, the element type can be
+ * CDATA, RCDATA, EMPTY or ANY.
+ */
+ public int getType()
+ {
+ return type;
+ }
+
+ /**
+ * True is this element need not to have the starting tag, false
+ * otherwise.s element need not to have the closing tag, false
+ * otherwise. The HTML 4.0 definition specifies
+ * that some elements (like <code>&lt;hr&gt;</code>are
+ * not required to have the end tags.
+ */
+ public boolean omitEnd()
+ {
+ return oEnd;
+ }
+
+ /**
+ * True is this element need not to have the closing tag, false
+ * otherwise. The HTML 4.0 definition specifies
+ * that some elements (like <code>&lt;head&gt;</code> or
+ * <code>&lt;body&gt;</code>) are
+ * not required to have the start tags.
+ */
+ public boolean omitStart()
+ {
+ return oStart;
+ }
+
+ /**
+ * Returns the name of this element.
+ */
+ public String toString()
+ {
+ return name;
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/Entity.java b/libjava/classpath/javax/swing/text/html/parser/Entity.java
new file mode 100644
index 000000000..d40fb94f3
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/Entity.java
@@ -0,0 +1,183 @@
+/* Entity.java -- Stores information, obtained by parsing SGML DTL
+ * &lt;!ENTITY % .. &gt; tag
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper;
+
+/**
+ * <p>Stores information, obtained by parsing SGML DTL
+ * &lt;!ENTITY % .. &gt; tag.</p>
+ * <p>
+ * The entity defines some kind of macro that can be used elsewhere in
+ * the document.
+ * When the macro is referred to by the name in the DTD, it is expanded into
+ * a string
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public final class Entity
+ implements DTDConstants
+{
+ /**
+ * Package level mapper between type names and they string values.
+ */
+ final static gnuStringIntMapper mapper =
+ new gnuStringIntMapper()
+ {
+ protected void create()
+ {
+ add("ANY", DTDConstants.ANY);
+ add("CDATA", DTDConstants.CDATA);
+ add("PUBLIC", DTDConstants.PUBLIC);
+ add("SDATA", DTDConstants.SDATA);
+ add("PI", DTDConstants.PI);
+ add("STARTTAG", DTDConstants.STARTTAG);
+ add("ENDTAG", DTDConstants.ENDTAG);
+ add("MS", DTDConstants.MS);
+ add("MD", DTDConstants.MD);
+ add("SYSTEM", DTDConstants.SYSTEM);
+ }
+ };
+
+ /**
+ * The entity name.
+ */
+ public String name;
+
+ /**
+ * The entity data
+ */
+ public char[] data;
+
+ /**
+ * The entity type.
+ */
+ public int type;
+
+ /**
+ * String representation of the entity data.
+ */
+ private String sdata;
+
+ /**
+ * Create a new entity
+ * @param a_name the entity name
+ * @param a_type the entity type
+ * @param a_data the data replacing the entity reference
+ */
+ public Entity(String a_name, int a_type, char[] a_data)
+ {
+ name = a_name;
+ type = a_type;
+ data = a_data;
+ }
+
+ /**
+ * Converts a given string to the corresponding entity type.
+ * @return a value, defined in DTDConstants (one of
+ * PUBLIC, CDATA, SDATA, PI, STARTTAG, ENDTAG, MS, MD, SYSTEM)
+ * or CDATA if the parameter is not a valid entity type.
+ */
+ public static int name2type(String an_entity)
+ {
+ int r = mapper.get(an_entity);
+ return (r == 0) ? DTDConstants.CDATA : r;
+ }
+
+ /**
+ * Get the entity data.
+ */
+ public char[] getData()
+ {
+ return data;
+ }
+
+ /**
+ * Returns true for general entities. Each general entity can be
+ * referenced as <code>&entity-name;</code>. Such entities are
+ * defined by the SGML DTD tag
+ * <code>&lt;!ENTITY <i>name</i> "<i>value</i>"></code>. The general
+ * entities can be used anywhere in the document.
+ */
+ public boolean isGeneral()
+ {
+ return (type & DTDConstants.GENERAL) != 0;
+ }
+
+ /**
+ * Get the entity name.
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * Returns true for parameter entities. Each parameter entity can be
+ * referenced as <code>&entity-name;</code>. Such entities are
+ * defined by the SGML DTD tag
+ * <code>&lt;!ENTITY % <i>name</i> "<i>value</i>"></code>. The parameter
+ * entities can be used only in SGML context.
+ */
+ public boolean isParameter()
+ {
+ return (type & DTDConstants.PARAMETER) != 0;
+ }
+
+ /**
+ * Returns a data as String
+ */
+ public String getString()
+ {
+ if (sdata == null)
+ sdata = new String(data);
+
+ return sdata;
+ }
+
+ /**
+ * Get the entity type.
+ * @return the value of the {@link #type}.
+ */
+ public int getType()
+ {
+ return type;
+ }
+
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/Parser.java b/libjava/classpath/javax/swing/text/html/parser/Parser.java
new file mode 100644
index 000000000..f3faa2524
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/Parser.java
@@ -0,0 +1,446 @@
+/* Parser.java -- HTML parser
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import javax.swing.text.ChangedCharSetException;
+import javax.swing.text.SimpleAttributeSet;
+
+/*
+ * FOR DEVELOPERS: To avoid regression, please run the package test
+ * textsuite/javax.swing.text.html.parser/AllParserTests after your
+ * modifications.
+ */
+
+/**
+ * <p>A simple error-tolerant HTML parser that uses a DTD document
+ * to access data on the possible tokens, arguments and syntax.</p>
+ * <p> The parser reads an HTML content from a Reader and calls various
+ * notifying methods (which should be overridden in a subclass)
+ * when tags or data are encountered.</p>
+ * <p>Some HTML elements need no opening or closing tags. The
+ * task of this parser is to invoke the tag handling methods also when
+ * the tags are not explicitly specified and must be supposed using
+ * information, stored in the DTD.
+ * For example, parsing the document
+ * <p>&lt;table&gt;&lt;tr&gt;&lt;td&gt;a&lt;td&gt;b&lt;td&gt;c&lt;/tr&gt; <br>
+ * will invoke exactly the handling methods exactly in the same order
+ * (and with the same parameters) as if parsing the document: <br>
+ * <em>&lt;html&gt;&lt;head&gt;&lt;/head&gt;&lt;body&gt;&lt;table&gt;&lt;
+ * tbody&gt;</em>&lt;tr&gt;&lt;td&gt;a<em>&lt;/td&gt;</em>&lt;td&gt;b<em>
+ * &lt;/td&gt;</em>&lt;td&gt;c<em>&lt;/td&gt;&lt;/tr&gt;</em>&lt;
+ * <em>/tbody&gt;&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;</em></p>
+ * (supposed tags are given in italics). The parser also supports
+ * obsolete elements of HTML syntax.<p>
+ * </p>
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class Parser
+ implements DTDConstants
+{
+ /**
+ * The document template description that will be used to parse the documents.
+ */
+ protected DTD dtd;
+
+ /**
+ * The value of this field determines whether or not the Parser will be
+ * strict in enforcing SGML compatibility. The default value is false,
+ * stating that the parser should do everything to parse and get at least
+ * some information even from the incorrectly written HTML input.
+ */
+ protected boolean strict;
+
+ /**
+ * The package level reference to the working HTML parser in this
+ * implementation.
+ */
+ final gnu.javax.swing.text.html.parser.support.Parser gnu;
+
+ /**
+ * Creates a new parser that uses the given DTD to access data on the
+ * possible tokens, arguments and syntax. There is no single - step way
+ * to get a default DTD; you must either refer to the implementation -
+ * specific packages, write your own DTD or obtain the working instance
+ * of parser in other way, for example, by calling
+ * {@link javax.swing.text.html.HTMLEditorKit#getParser() }.
+ * @param a_dtd A DTD to use.
+ */
+ public Parser(DTD a_dtd)
+ {
+ dtd = a_dtd;
+
+ final Parser j = this;
+
+ gnu =
+ new gnu.javax.swing.text.html.parser.support.Parser(dtd)
+ {
+ protected final void handleComment(char[] comment)
+ {
+ j.handleComment(comment);
+ }
+
+ protected final void handleEOFInComment()
+ {
+ j.handleEOFInComment();
+ }
+
+ protected final void handleEmptyTag(TagElement tag)
+ throws javax.swing.text.ChangedCharSetException
+ {
+ j.handleEmptyTag(tag);
+ }
+
+ protected final void handleStartTag(TagElement tag)
+ {
+ j.handleStartTag(tag);
+ }
+
+ protected final void handleEndTag(TagElement tag)
+ {
+ j.handleEndTag(tag);
+ }
+
+ protected final void handleError(int line, String message)
+ {
+ j.handleError(line, message);
+ }
+
+ protected final void handleText(char[] text)
+ {
+ j.handleText(text);
+ }
+
+ protected final void handleTitle(char[] title)
+ {
+ j.handleTitle(title);
+ }
+
+ protected final void markFirstTime(Element element)
+ {
+ j.markFirstTime(element);
+ }
+
+ protected final void startTag(TagElement tag)
+ throws ChangedCharSetException
+ {
+ j.startTag(tag);
+ }
+
+ protected final void endTag(boolean omitted)
+ {
+ j.endTag(omitted);
+ }
+
+ protected TagElement makeTag(Element element)
+ {
+ return j.makeTag(element);
+ }
+
+ protected TagElement makeTag(Element element, boolean isSupposed)
+ {
+ return j.makeTag(element, isSupposed);
+ }
+ };
+ }
+
+ /**
+ * Parse the HTML text, calling various methods in response to the
+ * occurence of the corresponding HTML constructions.
+ * @param reader The reader to read the source HTML from.
+ * @throws IOException If the reader throws one.
+ */
+ public synchronized void parse(Reader reader)
+ throws IOException
+ {
+ gnu.parse(reader);
+ }
+
+ /**
+ * Parses DTD markup declaration. Currently returns without action.
+ * @return null.
+ * @throws java.io.IOException
+ */
+ public String parseDTDMarkup()
+ throws IOException
+ {
+ return gnu.parseDTDMarkup();
+ }
+
+ /**
+ * Parse DTD document declarations. Currently only parses the document
+ * type declaration markup.
+ * @param strBuff
+ * @return true if this is a valid DTD markup declaration.
+ * @throws IOException
+ */
+ protected boolean parseMarkupDeclarations(StringBuffer strBuff)
+ throws IOException
+ {
+ return gnu.parseMarkupDeclarations(strBuff);
+ }
+
+ /**
+ * Get the attributes of the current tag.
+ * @return The attribute set, representing the attributes of the current tag.
+ */
+ protected SimpleAttributeSet getAttributes()
+ {
+ return gnu.getAttributes();
+ }
+
+ /**
+ * Get the number of the document line being parsed.
+ * @return The current line.
+ */
+ protected int getCurrentLine()
+ {
+ return gnu.hTag.where.beginLine;
+ }
+
+ /**
+ * Get the current position in the document being parsed.
+ * @return The current position.
+ */
+ protected int getCurrentPos()
+ {
+ return gnu.hTag.where.startPosition;
+ }
+
+ /**
+ * The method is called when the HTML end (closing) tag is found or if
+ * the parser concludes that the one should be present in the
+ * current position. The method is called immediatly
+ * before calling the handleEndTag().
+ * @param omitted True if the tag is no actually present in the document,
+ * but is supposed by the parser (like &lt;/html&gt; at the end of the
+ * document).
+ */
+ protected void endTag(boolean omitted)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Invokes the error handler. The default method in this implementation
+ * finally delegates the call to handleError, also providing the number of the
+ * current line.
+ */
+ protected void error(String msg)
+ {
+ gnu.error(msg);
+ }
+
+ /**
+ * Invokes the error handler. The default method in this implementation
+ * finally delegates the call to error (msg+": '"+invalid+"'").
+ */
+ protected void error(String msg, String invalid)
+ {
+ gnu.error(msg, invalid);
+ }
+
+ /**
+ * Invokes the error handler. The default method in this implementation
+ * finally delegates the call to error (parm1+" "+ parm2+" "+ parm3).
+ */
+ protected void error(String parm1, String parm2, String parm3)
+ {
+ gnu.error(parm1, parm2, parm3);
+ }
+
+ /**
+ * Invokes the error handler. The default method in this implementation
+ * finally delegates the call to error
+ * (parm1+" "+ parm2+" "+ parm3+" "+ parm4).
+ */
+ protected void error(String parm1, String parm2, String parm3, String parm4)
+ {
+ gnu.error(parm1, parm2, parm3, parm4);
+ }
+
+ /**
+ * In this implementation, this is never called and returns without action.
+ */
+ protected void flushAttributes()
+ {
+ gnu.flushAttributes();
+ }
+
+ /**
+ * Handle HTML comment. The default method returns without action.
+ * @param comment The comment being handled
+ */
+ protected void handleComment(char[] comment)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * This is additionally called in when the HTML content terminates
+ * without closing the HTML comment. This can only happen if the
+ * HTML document contains errors (for example, the closing --;gt is
+ * missing. The default method calls the error handler.
+ */
+ protected void handleEOFInComment()
+ {
+ gnu.error("Unclosed comment");
+ }
+
+ /**
+ * Handle the tag with no content, like &lt;br&gt;. The method is
+ * called for the elements that, in accordance with the current DTD,
+ * has an empty content.
+ * @param tag The tag being handled.
+ * @throws javax.swing.text.ChangedCharSetException
+ */
+ protected void handleEmptyTag(TagElement tag)
+ throws ChangedCharSetException
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * The method is called when the HTML closing tag ((like &lt;/table&gt;)
+ * is found or if the parser concludes that the one should be present
+ * in the current position.
+ * @param tag The tag being handled
+ */
+ protected void handleEndTag(TagElement tag)
+ {
+ // This default implementation does nothing.
+ }
+
+ /* Handle error that has occured in the given line. */
+ protected void handleError(int line, String message)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * The method is called when the HTML opening tag ((like &lt;table&gt;)
+ * is found or if the parser concludes that the one should be present
+ * in the current position.
+ * @param tag The tag being handled
+ */
+ protected void handleStartTag(TagElement tag)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Handle the text section.
+ * <p> For non-preformatted section, the parser replaces
+ * \t, \r and \n by spaces and then multiple spaces
+ * by a single space. Additionaly, all whitespace around
+ * tags is discarded.
+ * </p>
+ * <p> For pre-formatted text (inside TEXAREA and PRE), the parser preserves
+ * all tabs and spaces, but removes <b>one</b> bounding \r, \n or \r\n,
+ * if it is present. Additionally, it replaces each occurence of \r or \r\n
+ * by a single \n.</p>
+ *
+ * @param text A section text.
+ */
+ protected void handleText(char[] text)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Handle HTML &lt;title&gt; tag. This method is invoked when
+ * both title starting and closing tags are already behind.
+ * The passed argument contains the concatenation of all
+ * title text sections.
+ * @param title The title text.
+ */
+ protected void handleTitle(char[] title)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * Constructs the tag from the given element. In this implementation,
+ * this is defined, but never called.
+ * @param element the base element of the tag.
+ * @return the tag
+ */
+ protected TagElement makeTag(Element element)
+ {
+ return makeTag(element, false);
+ }
+
+ /**
+ * Constructs the tag from the given element.
+ * @param element the tag base {@link javax.swing.text.html.parser.Element}
+ * @param isSupposed true if the tag is not actually present in the
+ * html input, but the parser supposes that it should to occur in
+ * the current location.
+ * @return the tag
+ */
+ protected TagElement makeTag(Element element, boolean isSupposed)
+ {
+ return new TagElement(element, isSupposed);
+ }
+
+ /**
+ * This is called when the tag, representing the given element,
+ * occurs first time in the document.
+ * @param element
+ */
+ protected void markFirstTime(Element element)
+ {
+ // This default implementation does nothing.
+ }
+
+ /**
+ * The method is called when the HTML opening tag ((like &lt;table&gt;)
+ * is found or if the parser concludes that the one should be present
+ * in the current position. The method is called immediately before
+ * calling the handleStartTag.
+ * @param tag The tag
+ */
+ protected void startTag(TagElement tag)
+ throws ChangedCharSetException
+ {
+ // This default implementation does nothing.
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java b/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java
new file mode 100644
index 000000000..cdd339b8f
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java
@@ -0,0 +1,207 @@
+/* ParserDelegator.java -- Delegator for ParserDocument.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package javax.swing.text.html.parser;
+
+import gnu.javax.swing.text.html.parser.HTML_401F;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Serializable;
+
+import javax.swing.text.BadLocationException;
+import javax.swing.text.SimpleAttributeSet;
+import javax.swing.text.html.HTMLEditorKit;
+import javax.swing.text.html.HTMLEditorKit.ParserCallback;
+
+/**
+ * This class instantiates and starts the working instance of
+ * html parser, being responsible for providing the default DTD.
+ *
+ * @author Audrius Meskauskas (AudriusA@Bioinformatics.org)
+ */
+public class ParserDelegator
+ extends javax.swing.text.html.HTMLEditorKit.Parser
+ implements Serializable
+{
+ private class gnuParser
+ extends gnu.javax.swing.text.html.parser.support.Parser
+ {
+ private static final long serialVersionUID = 1;
+
+ private gnuParser(DTD d)
+ {
+ super(d);
+ }
+
+ protected final void handleComment(char[] comment)
+ {
+ callBack.handleComment(comment, hTag.where.startPosition);
+ }
+
+ protected final void handleEmptyTag(TagElement tag)
+ throws javax.swing.text.ChangedCharSetException
+ {
+ callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(),
+ hTag.where.startPosition
+ );
+ }
+
+ protected final void handleEndTag(TagElement tag)
+ {
+ callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition);
+ }
+
+ protected final void handleError(int line, String message)
+ {
+ callBack.handleError(message, hTag.where.startPosition);
+ }
+
+ protected final void handleStartTag(TagElement tag)
+ {
+ SimpleAttributeSet attributes = gnu.getAttributes();
+
+ if (tag.fictional())
+ attributes.addAttribute(ParserCallback.IMPLIED, Boolean.TRUE);
+
+ callBack.handleStartTag(tag.getHTMLTag(), attributes,
+ hTag.where.startPosition
+ );
+ }
+
+ protected final void handleText(char[] text)
+ {
+ callBack.handleText(text, hTag.where.startPosition);
+ }
+
+ DTD getDTD()
+ {
+ // Accessing the inherited gnu.javax.swing.text.html.parser.support.Parser
+ // field. super. is a workaround, required to support JDK1.3's javac.
+ return super.dtd;
+ }
+ }
+
+ /**
+ * Use serialVersionUID for interoperability.
+ */
+ private static final long serialVersionUID = -1276686502624777206L;
+
+ private static DTD dtd = HTML_401F.getInstance();
+
+ /**
+ * The callback.
+ * This is package-private to avoid an accessor method.
+ */
+ HTMLEditorKit.ParserCallback callBack;
+
+ /**
+ * The reference to the working class of HTML parser that is
+ * actually used to parse the document.
+ * This is package-private to avoid an accessor method.
+ */
+ gnuParser gnu;
+
+ /**
+ * Parses the HTML document, calling methods of the provided
+ * callback. This method must be multithread - safe.
+ * @param reader The reader to read the HTML document from
+ * @param a_callback The callback that is notifyed about the presence
+ * of HTML elements in the document.
+ * @param ignoreCharSet If thrue, any charset changes during parsing
+ * are ignored.
+ * @throws java.io.IOException
+ */
+ public void parse(Reader reader, HTMLEditorKit.ParserCallback a_callback,
+ boolean ignoreCharSet
+ )
+ throws IOException
+ {
+ callBack = a_callback;
+
+ if (gnu == null || !dtd.equals(gnu.getDTD()))
+ {
+ gnu = new gnuParser(dtd);
+ }
+
+ gnu.parse(reader);
+
+ callBack.handleEndOfLineString(gnu.getEndOfLineSequence());
+ try
+ {
+ callBack.flush();
+ }
+ catch (BadLocationException ex)
+ {
+ // Convert this into the supported type of exception.
+ throw new IOException(ex.getMessage());
+ }
+ }
+
+ /**
+ * Calling this method instructs that, if not specified directly,
+ * the documents will be parsed using the default
+ * DTD of the implementation.
+ */
+ protected static void setDefaultDTD()
+ {
+ dtd = HTML_401F.getInstance();
+ }
+
+ /**
+ * Registers the user - written DTD under the given name, also
+ * making it default for the subsequent parsings. This has effect on
+ * all subsequent calls to the parse(...) . If you need to specify
+ * your DTD locally, simply {@link javax.swing.text.html.parser.Parser}
+ * instead.
+ * @param a_dtd The DTD that will be used to parse documents by this class.
+ * @param name The name of this DTD.
+ * @return No standard is specified on which instance of DTD must be
+ * returned by this method, and it is recommended to leave the returned
+ * value without consideration. This implementation returns the DTD
+ * that was previously set as the default DTD, or the implementations
+ * default DTD if none was set.
+ */
+ protected static DTD createDTD(DTD a_dtd, String name)
+ {
+ DTD.putDTDHash(name, a_dtd);
+
+ DTD dtd_prev = dtd;
+ dtd = a_dtd;
+ return dtd_prev;
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/TagElement.java b/libjava/classpath/javax/swing/text/html/parser/TagElement.java
new file mode 100644
index 000000000..4558b15eb
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/TagElement.java
@@ -0,0 +1,142 @@
+/* TagElement.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package javax.swing.text.html.parser;
+
+import javax.swing.text.html.HTML;
+
+/**
+ * The SGML element, defining a single html tag.
+ * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
+ */
+public class TagElement
+{
+ /**
+ * The Element the tag was constructed from.
+ */
+ private final Element element;
+
+ /**
+ * The coresponding HTML tag, assigned once in constructor.
+ */
+ private final HTML.Tag tag;
+
+ /**
+ * The 'fictional' flag.
+ */
+ private final boolean fictional;
+
+ /**
+ * Creates the html tag element from the defintion, stored in the
+ * given element. Sets the flag 'fictional' to false.
+ * @param an_element
+ */
+ public TagElement(Element an_element)
+ {
+ this(an_element, false);
+ }
+
+ /**
+ * Creates the html tag element from the defintion, stored in the
+ * given element, setting the flag 'fictional' to the given value.
+ */
+ public TagElement(Element an_element, boolean is_fictional)
+ {
+ element = an_element;
+ fictional = is_fictional;
+
+ HTML.Tag t = HTML.getTag(element.getName());
+
+ if (t != null)
+ tag = t;
+ else
+ tag = new HTML.UnknownTag(element.getName());
+ }
+
+ /**
+ * Get the element from that the tag was constructed.
+ */
+ public Element getElement()
+ {
+ return element;
+ }
+
+ /**
+ * Get the corresponding HTML tag. This is either one of the
+ * pre-defined HTML tags or the instance of the UnknownTag with the
+ * element name.
+ */
+ public HTML.Tag getHTMLTag()
+ {
+ return tag;
+ }
+
+ /**
+ * Calls isPreformatted() for the corresponding html tag and returns
+ * the obtained value.
+ */
+ public boolean isPreformatted()
+ {
+ return tag.isPreformatted();
+ }
+
+ /**
+ * Calls breaksFlow() for the corresponding html tag and returns
+ * the obtained value.
+ */
+ public boolean breaksFlow()
+ {
+ return tag.breaksFlow();
+ }
+
+ /**
+ * Get the value of the flag 'fictional'.
+ */
+ public boolean fictional()
+ {
+ return fictional;
+ }
+
+ /**
+ * Returns string representation of this object.
+ */
+ public String toString()
+ {
+ return getElement() + (fictional ? "?" : "");
+ }
+}
diff --git a/libjava/classpath/javax/swing/text/html/parser/package.html b/libjava/classpath/javax/swing/text/html/parser/package.html
new file mode 100644
index 000000000..5d5157fb2
--- /dev/null
+++ b/libjava/classpath/javax/swing/text/html/parser/package.html
@@ -0,0 +1,50 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!-- package.html - describes classes in javax.swing.text.html package.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. -->
+
+<html>
+<head><title>GNU Classpath - javax.swing.text.html.parser</title></head>
+
+<body>
+<p> Provides the DTD driven for web browsers,
+ web robots, web page content analysers, web editors and
+ other applications applications working with Hypertext
+ Markup Language (HTML).
+</p>
+
+</body>
+</html>