diff options
Diffstat (limited to 'libjava/classpath/javax/swing/text/html/parser')
11 files changed, 3031 insertions, 0 deletions
diff --git a/libjava/classpath/javax/swing/text/html/parser/AttributeList.java b/libjava/classpath/javax/swing/text/html/parser/AttributeList.java new file mode 100644 index 000000000..a943f056d --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/AttributeList.java @@ -0,0 +1,294 @@ +/* AttributeList.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +import java.io.Serializable; + +import java.util.Enumeration; +import java.util.Vector; + +/** + * <p> + * Stores the attribute information, obtained by parsing SGML (DTD) tag + * <code><!ATTLIST .. ></code></p> + * <p> + * Elements can have a associated named properties (attributes) having the + * assigned values. The element start tag can have any number of attribute + * value pairs, separated by spaces. They can appear in any order. + * SGML requires you to delimit the attribute values using either double (") + * or single (') quotation marks. In HTML, it is possible + * (but not recommended) to specify the value of an attribute without + * quotation marks. Such attribute value may only contain + * letters, digits, hyphens (-) and periods (.) . + * </p> + * <p> + * The <code>AttributeList</code> defines a single attribute that additionally + * has a pointer referencing the possible subsequent attribute. + * The whole structure is just a simple linked list, storing all attributes of + * some <code>Element</code>. + * Use the <code>getNext()</code> method repeatedly to see all attributes in + * the list. + * </p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class AttributeList + implements DTDConstants, Serializable +{ + /** Maps between type names and they string values. */ + private static final gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("CDATA", DTDConstants.CDATA); + add("ENTITY", DTDConstants.ENTITY); + add("ENTITIES", DTDConstants.ENTITIES); + add("ID", DTDConstants.ID); + add("IDREF", DTDConstants.IDREF); + add("IDREFS", DTDConstants.IDREFS); + add("NAME", DTDConstants.NAME); + add("NAMES", DTDConstants.NAMES); + add("NMTOKEN", DTDConstants.NMTOKEN); + add("NMTOKENS", DTDConstants.NMTOKENS); + add("NOTATION", DTDConstants.NOTATION); + add("NUMBER", DTDConstants.NUMBER); + add("NUMBERS", DTDConstants.NUMBERS); + add("NUTOKEN", DTDConstants.NUTOKEN); + add("NUTOKENS", DTDConstants.NUTOKENS); + } + }; + + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -1361214058742015233L; + + /** + * The value of ( = pointer to ) the next attribute in the linked list, + * storing all attributes of some Element. Contains null for the + * last attribute. + */ + public AttributeList next; + + /** + * The name of the attribute. The attribute names are case insensitive. + */ + public String name; + + /** + * The default value of this attribute. Equals to null if no default value + * is specified. + */ + public String value; + + /** + * The explicit set of the allowed values of this attribute. Equals to + * null, if this parameter was not specified. + * Values, defined in DTD, are case insensitive. + */ + public Vector<?> values; + + /** + * The modifier of this attribute. This field contains one of the + * following DTD constants: + * <ul> + * <li> REQUIRED if the attribute value is always required,</li> + * <li> IMPLIED if the user agent must supply the default value itself,</li> + * <li> FIXED if the attribute value is fixed to some value and cannot + * be changed.</li> + * <li> DEFAULT if the attribute default value has been supplied.</li> + * <li> CURRENT the value that at any point in the document is + * the last value supplied for that element. A value is required to be + * supplied for the first* occurrence of an element</li> + * <li> CONREF specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead.</li> + * </ul> + */ + public int modifier; + + /** + * The type of the attribute. The possible values of this field + * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants. + */ + public int type; + + /** + * Creates the attribute with the given name, initializing other fields + * to the default values ( 0 and null ). + * + * @param a_name The name of the attribute. + */ + public AttributeList(String a_name) + { + name = a_name; + } + + /** + * Creates the attribute with the given properties. + * @param a_name The name of the attribute + * @param a_type The type of the attribute. The possible values are defined + * in <code> DTDConstants</code>. + * @param a_modifier The modifier of this attribute. The possible values + * are defined in <code> DTDConstants</code>. + * @param a_default The default value of this attribute + * @param allowed_values The explicit set of the allowed values of + * this attribute + * @param a_next The value of the subsequent instance of the AttributeList, + * representing the next attribute definition for the same element. + * Equals to null for the last attribute definition. + */ + public AttributeList(String a_name, int a_type, int a_modifier, + String a_default, Vector<?> allowed_values, + AttributeList a_next + ) + { + this(a_name); + type = a_type; + modifier = a_modifier; + value = a_default; + values = allowed_values; + next = a_next; + } + + /** + * Get the modifier of this attribute. This field contains one of the + * following DTD constants: + * <ul> + * <li> REQUIRED if the attribute value is always required,</li> + * <li> IMPLIED if the user agent must supply the default value itself,</li> + * <li> FIXED if the attribute value is fixed to some value and cannot + * be changed.</li> + * <li> DEFAULT if the attribute default value has been supplied.</li> + * <li> CURRENT the value that at any point in the document is + * the last value supplied for that element. A value is required to be + * supplied for the first* occurrence of an element</li> + * <li> CONREF specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead.</li> + * </ul> + */ + public int getModifier() + { + return modifier; + } + + /** + * Get the name of the attribute. + * The value is returned as it was supplied to a + * constructor, preserving the character case. + */ + public String getName() + { + return name; + } + + /** + * Get the value of ( = pointer to ) the next attribute in the linked list, + * storing all attributes of some Element. Contains null for the + * last attribute. + */ + public AttributeList getNext() + { + return next; + } + + /** + * Get the type of the attribute. The possible values of this field + * (NUMBER, NAME, ID, CDATA and so on) are defined in DTDConstants. + */ + public int getType() + { + return type; + } + + /** + * Get the default value of this attribute. + */ + public String getValue() + { + return value; + } + + /** + * Get the allowed values of this attribute. + */ + public Enumeration<?> getValues() + { + return (values != null) ? values.elements() : null; + } + + /** + * Converts a string value, representing a valid SGLM attribute type, + * into the corresponding value, defined in DTDConstants. + * @param typeName the name of the type (character case is ignored). + * @return a value from DTDConstants or DTDConstants.ANY if the + * string is not representing a known type. The known attribute types + * in this implementation are CDATA, ENTITY, ENTITIES, ID, IDREF, IDREFS, + * NAME, NAMES, NMTOKEN, NMTOKENS, NOTATION, NUMBER, NUMBERS, NUTOKEN and + * NUTOKENS. + * @throws NullPointerException if the passed parameter is null. + */ + public static int name2type(String typeName) + { + return mapper.get(typeName.toUpperCase()); + } + + /** + * Returns the attribute name. + */ + public String toString() + { + return name; + } + + /** + * Converts a value from DTDConstants into the string representation. + * @param type - an integer value of the public static integer field, + * defined in the DTDConstants class. + * @return a corresponding SGML DTD keyword (UPPERCASE) or null if there + * are no attribute type constant having the given value. + */ + public static String type2name(int type) + { + return mapper.get(type); + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/ContentModel.java b/libjava/classpath/javax/swing/text/html/parser/ContentModel.java new file mode 100644 index 000000000..d5c4418de --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/ContentModel.java @@ -0,0 +1,223 @@ +/* ContentModel.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.models.transformer; + +import java.io.Serializable; + +import java.util.Vector; + +/** + * A representation of the element content. The instances of this class + * can be arranged into the linked list, representing a BNF expression. + * The content model is constructed as a branched tree structure in the + * following way: + * <pre> + * a = new ContentModel('+', A, null); // a reprensents A+ + * b = new ContentModel('&', B, a); // b represents B & A+ + * c = new ContentModel('*', b, null); // c represents ( B & A+) * + * d = new ContentModel('|', new ContentModel('*', A, null), + * new ContentModel('?', B, null)); // d represents ( A* | B? ) + * </pre> + * where the valid operations are: + * <ul> + * <li><code>E* </code> E occurs zero or more times</li> + * <li><code>E+ </code> E occurs one or more times</li> + * <li><code>E? </code> E occurs once or not atl all</li> + * <li><code>A,B</code> A occurs before B</li> + * <li><code>A|B</code> both A and B are permitted in any order. + * The '|' alone does not permit the repetetive occurence of A or B + * (use <code>(A|B)*</code>.</li> + * <li><code>A&B</code> both A and B must occur once (in any order)</li> + * </ul> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class ContentModel + implements Serializable +{ + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -1130825523866321257L; + + /** + * The next content model model ( = pointer to the next element of + * the linked list) for the binary expression (',','&' or '|'). Null + * for the last element in the list. + */ + public ContentModel next; + + /** + * The document content, containing either Element or the enclosed + * content model (that would be in the parentheses in BNF expression). + */ + public Object content; + + /** + * Specifies the BNF operation between this node and the node, + * stored in the field <code>next</code> (or for this node, if it is + * an unary operation. + */ + public int type; + + /** + * Create a content model initializing all fields to default values. + */ + public ContentModel() + { + // Nothing to do here. + } + + /** + * Create a content model, consisting of the single element. + * Examples: + *<code> + * a = new ContentModel('+', A, null); // a reprensents A+ + * b = new ContentModel('&', B, a); // b represents B & A+ + * c = new ContentModel('*', b, null); // c represents ( B & A+) * + * d = new ContentModel('|', A, + * new ContentModel('?',b, null); + * // d represents + * </code> + */ + public ContentModel(Element a_content) + { + content = a_content; + } + + /** + * Create a content model, involving expression of the given type. + * @param a_type The expression operation type ('*','?' or '+' + * @param a_content The content for that the expression is applied. + */ + public ContentModel(int a_type, ContentModel a_content) + { + content = a_content; + type = a_type; + } + + /** + * Create a content model, involving binary expression of the given type. + * @param a_type The expression operation type ( ',', '|' or '&'). + * @param a_content The content of the left part of the expression. + * @param a_next The content model, representing the right part of the + * expression. + */ + public ContentModel(int a_type, Object a_content, ContentModel a_next) + { + content = a_content; + type = a_type; + next = a_next; + } + + /** + * Adds all list elements to the given vector, ignoring the + * operations between the elements. The old vector values are not + * discarded. + * @param elements - a vector to add the values to. + */ + public void getElements(Vector<Element> elements) + { + ContentModel c = this; + + while (c != null) + { + // FIXME: correct? + if (c.content instanceof Element) + elements.add((Element) c.content); + c = c.next; + } + } + + /** + * Checks if the content model matches an empty input stream. + * The empty content is created using SGML DTD keyword EMPTY. + * The empty model is a model with the content field equal to null. + * + * @return true if the content field is equal to null. + */ + public boolean empty() + { + return content == null; + } + + /** + * Get the element, stored in the <code>next.content</code>. + * The method is programmed as the part of the standard API, but not + * used in this implementation. + * @return the value of the field <code>next</code>. + */ + public Element first() + { + return (Element) next.content; + } + + /** + * Checks if this object can potentially be the first token in the + * ContenModel list. The method is programmed as the part of the + * standard API, but not used in this implementation. + */ + public boolean first(Object token) + { + ContentModel c = this; + while (c.next != null) + { + if (c.content != null && c.content.toString().equals(token.toString()) && + c.type != ',' + ) + + // Agree if the operation with the preceeding element + // is not the comma operation. + return true; + c = c.next; + } + return false; + } + + /** + * Returns a string representation (an expression) of this content model. + * The expression has BNF-like syntax, except the absence of the + * unary operator is additionally indicated by " ' ". It is + * advisable to check the created models for correctness using this + * method. + */ + public String toString() + { + return transformer.transform(this).toString(); + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/DTD.java b/libjava/classpath/javax/swing/text/html/parser/DTD.java new file mode 100644 index 000000000..09b50fee7 --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/DTD.java @@ -0,0 +1,609 @@ +/* DTD.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.BitSet; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; + +/** + * <p>Representation or the SGML DTD document. + * Provides basis for describing a syntax of the + * HTML documents. The fields of this class are NOT initialized in + * constructor. You need to do this separately before passing this data + * structure to the HTML parser. The subclasses with the fields, pre- + * initialized, for example, for HTML 4.01, can be available only between + * the implementation specific classes + * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F } + * in this implementation).</p> + * <p> + * If you need more information about SGML DTD documents, + * the author suggests to read SGML tutorial on + * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html" + * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>. + * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>, + * Oxford University Press, 688 p, ISBN: 0198537379. + * </p> + * <p> + * Warning: the html, head and other tag fields will only be automatically + * assigned if the VM has the correctly implemented reflection mechanism. + * As these fields are not used anywhere in the implementation, not + * exception will be thrown in the opposite case. + * </p> + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class DTD + implements DTDConstants +{ + /** + * The version of the persistent data format. + * @specnote This was made <code>final</code> in 1.5. + */ + public static final int FILE_VERSION = 1; + + /** + * The table of existing available DTDs. + */ + static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>(); + + /** + * The applet element for this DTD. + */ + public Element applet; + + /** + * The base element for this DTD. + */ + public Element base; + + /** + * The body element for this DTD. + */ + public Element body; + + /** + * The head element for this DTD. + */ + public Element head; + + /** + * The html element for this DTD. + */ + public Element html; + + /** + * The isindex element of for this DTD. + */ + public Element isindex; + + /** + * The meta element for this DTD. + */ + public Element meta; + + /** + * The p element for this DTD. + */ + public Element p; + + /** + * The param element for this DTD. + */ + public Element param; + + /** + * The pcdata for this DTD. + */ + public Element pcdata; + + /** + * The title element for this DTD. + */ + public Element title; + + /** + * The element for accessing all DTD elements by name. + */ + public Hashtable<String,Element> elementHash = + new Hashtable<String,Element>(); + + /** + * The entity table for accessing all DTD entities by name. + */ + public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>(); + + /** + * The name of this DTD. + */ + public String name; + + /** + * Contains all elements in this DTD. The + * javax.swing.text.html.parser.Element#index field of all elements + * in this vector is set to the element position in this vector. + */ + public Vector<Element> elements = new Vector<Element>(); + + /** Create a new DTD with the specified name. */ + protected DTD(String a_name) + { + name = a_name; + } + + /** Get this DTD by name. The current implementation + * only looks in the internal table of DTD documents. If no corresponding + * entry is found, the new entry is created, placed into + * the table and returned. */ + public static DTD getDTD(String name) + throws IOException + { + DTD d = dtdHash.get(name); + + if (d == null) + { + d = new DTD(name); + dtdHash.put(d.name, d); + } + + return d; + } + + /** + * Get the element by the element name. If the element is not yet + * defined, it is newly created and placed into the element table. + * If the element name matches (ingoring case) a public non static + * element field in this class, this field is assigned to the value + * of the newly created element. + */ + public Element getElement(String element_name) + { + return newElement(element_name); + } + + /** + * Get the element by the value of its + * {@link javax.swing.text.html.parser.Element#index} field. + */ + public Element getElement(int index) + { + return elements.get(index); + } + + /** + * Get the entity with the given identifier. + * @param id that can be returned by + * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)} + * @return The entity from this DTD or null if there is no entity with + * such id or such entity is not present in the table of this instance. + */ + public Entity getEntity(int id) + { + String name = Entity.mapper.get(id); + + if (name != null) + return entityHash.get(name); + else + return null; + } + + /** + * Get the named entity by its name. + */ + public Entity getEntity(String entity_name) + { + return entityHash.get(entity_name); + } + + /** + * Get the name of this instance of DTD + */ + public String getName() + { + return name; + } + + /** + * Creates, adds into the entity table and returns the + * character entity like <code>&lt;</code> + * (means '<code><</code>' ); + * @param name The entity name (without heading & and closing ;) + * @param type The entity type + * @param character The entity value (single character) + * @return The created entity + */ + public Entity defEntity(String name, int type, int character) + { + Entity e = newEntity(name, type); + e.data = new char[] { (char) character }; + return e; + } + + /** + * Define the attributes for the element with the given name. + * If the element is not exist, it is created. + * @param forElement + * @param attributes + */ + public void defineAttributes(String forElement, AttributeList attributes) + { + Element e = elementHash.get(forElement.toLowerCase()); + + if (e == null) + e = newElement(forElement); + + e.atts = attributes; + } + + /** + * Defines the element and adds it to the element table. Sets the + * <code>Element.index</code> field to the value, unique for this + * instance of DTD. If the element with the given name already exists, + * replaces all other its settings by the method argument values. + * @param name the name of the element + * @param type the type of the element + * @param headless true if the element needs no starting tag + * (should not occur in HTML). + * @param tailless true if the element needs no ending tag (like + * <code><hr></code> + * @param content the element content + * @param exclusions the set of elements that must not occur inside + * this element. The <code>Element.index</code> value defines which + * bit in this bitset corresponds to that element. + * @param inclusions the set of elements that can occur inside this + * element. the <code>Element.index</code> value defines which + * bit in this bitset corresponds to that element. + * @param attributes the element attributes. + * @return the newly defined element. + */ + public Element defineElement(String name, int type, boolean headless, + boolean tailless, ContentModel content, + BitSet exclusions, BitSet inclusions, + AttributeList attributes + ) + { + Element e = newElement(name); + e.type = type; + e.oStart = headless; + e.oEnd = tailless; + e.content = content; + e.exclusions = exclusions; + e.inclusions = inclusions; + e.atts = attributes; + + return e; + } + + /** + * Creates, intializes and adds to the entity table the new + * entity. + * @param name the name of the entity + * @param type the type of the entity + * @param data the data section of the entity + * @return the created entity + */ + public Entity defineEntity(String name, int type, char[] data) + { + Entity e = newEntity(name, type); + e.data = data; + + return e; + } + + /** Place this DTD into the DTD table. */ + public static void putDTDHash(String name, DTD dtd) + { + dtdHash.put(name, dtd); + } + + /** + * <p>Reads DTD from an archived format. This format is not standardized + * and differs between implementations.</p><p> This implementation + * reads and defines all entities and elements using + * ObjectInputStream. The elements and entities can be written into the + * stream in any order. The objects other than elements and entities + * are ignored.</p> + * @param stream A data stream to read from. + * @throws java.io.IOException If one is thrown by the input stream + */ + public void read(DataInputStream stream) + throws java.io.IOException + { + ObjectInputStream oi = new ObjectInputStream(stream); + Object def; + try + { + while (true) + { + def = oi.readObject(); + if (def instanceof Element) + { + Element e = (Element) def; + elementHash.put(e.name.toLowerCase(), e); + assignField(e); + } + else if (def instanceof Entity) + { + Entity e = (Entity) def; + entityHash.put(e.name, e); + } + } + } + catch (ClassNotFoundException ex) + { + throw new IOException(ex.getMessage()); + } + catch (EOFException ex) + { + // ok EOF + } + } + + /** + * Returns the name of this instance of DTD. + */ + public String toString() + { + return name; + } + + /** + * Creates and returns new attribute (not an attribute list). + * @param name the name of this attribute + * @param type the type of this attribute (FIXED, IMPLIED or + * REQUIRED from <code>DTDConstants</code>). + * @param modifier the modifier of this attribute + * @param default_value the default value of this attribute + * @param allowed_values the allowed values of this attribute. The multiple + * possible values in this parameter are supposed to be separated by + * '|', same as in SGML DTD <code><!ATTLIST </code>tag. This parameter + * can be null if no list of allowed values is specified. + * @param atts the previous attribute of this element. This is + * placed to the field + * {@link javax.swing.text.html.parser.AttributeList#next }, + * creating a linked list. + * @return The attributes. + */ + protected AttributeList defAttributeList(String name, int type, int modifier, + String default_value, + String allowed_values, + AttributeList atts + ) + { + AttributeList al = new AttributeList(name); + al.modifier = modifier; + al.value = default_value; + al.next = atts; + + if (allowed_values != null) + { + StringTokenizer st = new StringTokenizer(allowed_values, " \t|"); + Vector<String> v = new Vector<String>(st.countTokens()); + + while (st.hasMoreTokens()) + v.add(st.nextToken()); + + al.values = v; + } + + return al; + } + + /** + * Creates a new content model. + * @param type specifies the BNF operation for this content model. + * The valid operations are documented in the + * {@link javax.swing.text.html.parser.ContentModel#type }. + * @param content the content of this content model + * @param next if the content model is specified by BNF-like + * expression, contains the rest of this expression. + * @return The newly created content model. + */ + protected ContentModel defContentModel(int type, Object content, + ContentModel next + ) + { + ContentModel model = new ContentModel(); + model.type = type; + model.next = next; + model.content = content; + + return model; + } + + /** + * Defines a new element and adds it to the element table. + * If the element alredy exists, + * overrides it settings with the specified values. + * @param name the name of the new element + * @param type the type of the element + * @param headless true if the element needs no starting tag + * @param tailless true if the element needs no closing tag + * @param content the element content. + * @param exclusions the elements that must be excluded from the + * content of this element, in all levels of the hierarchy. + * @param inclusions the elements that can be included as the + * content of this element. + * @param attributes the element attributes. + * @return the created or updated element. + */ + protected Element defElement(String name, int type, boolean headless, + boolean tailless, ContentModel content, + String[] exclusions, String[] inclusions, + AttributeList attributes + ) + { + // compute the bit sets + BitSet exclude = bitSet(exclusions); + BitSet include = bitSet(inclusions); + + Element e = + defineElement(name, type, headless, tailless, content, exclude, include, + attributes + ); + + return e; + } + + /** + * Creates, intializes and adds to the entity table the new + * entity. + * @param name the name of the entity + * @param type the type of the entity + * @param data the data section of the entity + * @return the created entity + */ + protected Entity defEntity(String name, int type, String data) + { + Entity e = newEntity(name, type); + e.data = data.toCharArray(); + + return e; + } + + private void assignField(Element e) + { + String element_name = e.name; + try + { + // Assign the field via reflection. + Field f = getClass().getField(element_name.toLowerCase()); + if ((f.getModifiers() & Modifier.PUBLIC) != 0) + if ((f.getModifiers() & Modifier.STATIC) == 0) + if (f.getType().isAssignableFrom(e.getClass())) + f.set(this, e); + } + catch (IllegalAccessException ex) + { + unexpected(ex); + } + catch (NoSuchFieldException ex) + { + // This is ok. + } + + // Some virtual machines may still lack the proper + // implementation of reflection. As the tag fields + // are not used anywhere in this implementation, + // (and this class is also rarely used by the end user), + // it may be better not to crash everything by throwing an error + // for each case when the HTML parsing is required. + catch (Throwable t) + { + // This VM has no reflection mechanism implemented! + if (t instanceof OutOfMemoryError) + throw (Error) t; + } + } + + /** + * Create the bit set for this array of elements. + * The unknown elements are automatically defined and added + * to the element table. + * @param elements + * @return The bit set. + */ + private BitSet bitSet(String[] elements) + { + BitSet b = new BitSet(); + + for (int i = 0; i < elements.length; i++) + { + Element e = getElement(elements [ i ]); + + if (e == null) + e = newElement(elements [ i ]); + + b.set(e.index); + } + + return b; + } + + /** + * Find the element with the given name in the element table. + * If not find, create a new element with this name and add to the + * table. + * @param name the name of the element + * @return the found or created element. + */ + private Element newElement(String name) + { + Element e = elementHash.get(name.toLowerCase()); + + if (e == null) + { + e = new Element(); + e.name = name; + e.index = elements.size(); + elements.add(e); + elementHash.put(e.name.toLowerCase(), e); + assignField(e); + } + return e; + } + + /** + * Creates and adds to the element table the entity with an + * unitialized data section. Used internally. + * @param name the name of the entity + * @param type the type of the entity, a bitwise combination + * of GENERAL, PARAMETER, SYSTEM and PUBLIC. + * + * @return the created entity + */ + private Entity newEntity(String name, int type) + { + Entity e = new Entity(name, type, null); + entityHash.put(e.name, e); + return e; + } + + private void unexpected(Exception ex) + { + throw new Error("This should never happen, report a bug", ex); + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java b/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java new file mode 100644 index 000000000..75e7afb4d --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/DTDConstants.java @@ -0,0 +1,292 @@ +/* DTDConstants.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +/** + * <p>This class defines the SGML basic types, used for describing HTML 4.01 + * at <a href="http://www.w3.org/TR/html4/types.html" + * >http://www.w3.org/TR/html4/types.html</a>. Not all constants, + * defined here, are actually used in HTML 4.01 SGML specification. Some others + * are defined just as part of the required implementation. + * </p> + * <p> + * If you need more information about SGML DTD documents, + * the author suggests to read SGML tutorial on + * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html" + * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>. + * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>, + * Oxford University Press, 688 p, ISBN: 0198537379. + * </p> + * + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public interface DTDConstants +{ + /* ----- The data types, used in HTML 4.01 SGML definition: ---- */ + + /** + * The CDATA (Character data) constant, specifes the content model, + * consisting of characters only. In SGML for HTML 4.01, the character + * entities must be replaced by characters, the line feeds must be + * ignored and any number of the subsequent carriage returns or tabs + * must be replaced by a single space. + */ + int CDATA = 1; + + /** + * The EMPTY constant, means the element with no content. + */ + int EMPTY = 17; + + /** + * The ID constant, means that the token is the unique identifier. + * This identifier can be referenced by attribute with value of IDREF. + * The identifier must begin with letter, followed by any number of + * letters, digits, hyphens, underscores, colons and periods. + */ + int ID = 4; + + /** + * The IDREF constant, specifies reference to a valid ID within + * the document. + */ + int IDREF = 5; + + /** + * The IDREFS constant, a space separated list of IDREFs + */ + int IDREFS = 6; + + /** + * The NAME constant, means the token that + * must begin with letter, followed by any number of + * letters, digits, hyphens, underscores, colons and periods. + */ + int NAME = 7; + + /** + * The NAMES constant, specifies a space separated of NAMEs. + */ + int NAMES = 8; + + /** + * The NMTOKEN constant, specifies the attribute, consisting of + * characters that can be either digits or alphabetic characters). + */ + int NMTOKEN = 9; + + /** + * The NMTOKENS constant, specifies a list of NMTOKENs. + */ + int NMTOKENS = 10; + + /** + * The NOTATION constant, a previously defined data type. + */ + int NOTATION = 11; + + /** + * The NUMBER constant (means that the attribute consists of at least + * one decimal digit). + */ + int NUMBER = 12; + + /** + * The NUMBERS constant, specifies a space separated list of NUMBERs. + */ + int NUMBERS = 13; + + /** + * The NUTOKEN constant. + */ + int NUTOKEN = 14; + + /** + * The NUTOKENS constant. + */ + int NUTOKENS = 15; + + /* ------- + The entity scope constants. + As these four constants are combined with the bitwise OR, + they are defined in the hexadecimal notation. + The reason of setting the two bits at once (for PUBLIC and SYSTEM) + is probably historical. ----- */ + + /** + * The PUBLIC constant, specifies the public entity. The PUBLIC entities + * are assumed to be known to many systems so that a full declaration + * need not be transmitted. For example, + * <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0//EN"> + */ + int PUBLIC = 0xA; + + /** + * The SYSTEM constant, specifies the system entitiy. The system entities + * are assumed to be known but require the clear identifer + * (like the file path), where they can be found in the system. + * For example, <code> + * <DOCTYPE html SYSTEM "/path/to/file.dtd"> </code>. + */ + int SYSTEM = 0x11; + + /** + * The PARAMETER constant, specifies that entity is only valid + * inside SGML DTD scope. + */ + int PARAMETER = 0x40000; + + /** + * The GENERAL constant, specifies theat the entity is valid in the + * whole HTML document scope. + */ + int GENERAL = 0x10000; + + /* ---- The constants, defining if the element attribute is required, + fixed or implied. ---- */ + + /** + * The attribute modifier #REQUIRED constant, indicates that the + * value must be supplied. + */ + int REQUIRED = 2; + + /** + * The attribute modifier #FIXED constant, means that the attribute has + * the fixed value that cannot be changed. + */ + int FIXED = 1; + + /** + * The attribute modifier #IMPLIED constant, + * indicating that for this attribute the user agent must provide + * the value itself. + */ + int IMPLIED = 5; + + /** + * The attribute modifier #CURRENT constant, specifies the value + * that at any point in the document is the last value supplied for + * that element. A value is required to be supplied for the first + * occurrence of an element + */ + int CURRENT = 3; + + /** + * The attribute modifier #CONREF constant, specifies the IDREF value of + * the reference to content in another location of the document. + * The element with this attribute is empty, the content from + * that another location must be used instead. + */ + int CONREF = 4; + + /* ----- Constants, defining if the element + start and end tags are required. ---- */ + + /** + * The STARTTAG, meaning that the element needs a starting tag. + */ + int STARTTAG = 13; + + /** + * The ENDTAG constant, meaning that the element needs a closing tag. + */ + int ENDTAG = 14; + + /* ----- Other constants: ----- */ + + /** + * The ANY constant, specifies + * an attribute, consisting from arbitrary characters. + */ + int ANY = 19; + + /** + * The DEFAULT constant, specifies the default value. + */ + int DEFAULT = 131072; + + /** + * The ENTITIES constant (list of ENTITYes) + */ + int ENTITIES = 3; + + /** + * The ENTITY constant, meaning the numeric or symbolic name of some + * HTML data. + */ + int ENTITY = 2; + + /** + * The MD constant. + */ + int MD = 16; + + /** + * The MODEL constant. + */ + int MODEL = 18; + + /** + * The MS constant. + */ + int MS = 15; + + /** + * The PI (Processing Instruction) constant, specifies a processing + * instruction. Processing instructions are used to embed information + * intended for specific applications. + */ + int PI = 12; + + /** + * The RCDATA constant (Entity References and Character Data), specifies + * the content model, consisting of characters AND entities. The + * "<" is threated as an ordinary character, but + * "<code>&name;</code>" still means the general entity with + * the given name. + */ + int RCDATA = 16; + + /** + * The SDATA constant. Means that the value contains the entity name + * and the replacement value of a character entity reference. + */ + int SDATA = 11; +} diff --git a/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java b/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java new file mode 100644 index 000000000..f717d69cb --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/DocumentParser.java @@ -0,0 +1,268 @@ +/* DocumentParser.java -- A parser for HTML documents. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import javax.swing.text.html.parser.Parser; + +import java.io.IOException; +import java.io.Reader; + +import javax.swing.text.BadLocationException; +import javax.swing.text.SimpleAttributeSet; +import javax.swing.text.html.HTMLEditorKit; + +/** + * <p>A simple error-tolerant HTML parser that uses a DTD document + * to access data on the possible tokens, arguments and syntax.</p> + * <p> The parser reads an HTML content from a Reader and calls various + * notifying methods (which should be overridden in a subclass) + * when tags or data are encountered.</p> + * <p>Some HTML elements need no opening or closing tags. The + * task of this parser is to invoke the tag handling methods also when + * the tags are not explicitly specified and must be supposed using + * information, stored in the DTD. + * For example, parsing the document + * <p><table><tr><td>a<td>b<td>c</tr> <br> + * will invoke exactly the handling methods exactly in the same order + * (and with the same parameters) as if parsing the document: <br> + * <em><html><head></head><body><table>< + * tbody></em><tr><td>a<em></td></em><td>b<em> + * </td></em><td>c<em></td></tr></em>< + * <em>/tbody></table></body></html></em></p> + * (supposed tags are given in italics). The parser also supports + * obsolete elements of HTML syntax.<p> + * </p> + * In this implementation, DocumentParser is directly derived from its + * ancestor without changes of functionality. + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class DocumentParser + extends Parser + implements DTDConstants +{ + /** + * The enclosed working parser class. + */ + private class gnuParser + extends gnu.javax.swing.text.html.parser.support.Parser + { + private gnuParser(DTD d) + { + super(d); + } + + protected final void handleComment(char[] comment) + { + parser.handleComment(comment); + callBack.handleComment(comment, hTag.where.startPosition); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + parser.handleEmptyTag(tag); + callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(), + hTag.where.startPosition + ); + } + + protected final void handleEndTag(TagElement tag) + { + parser.handleEndTag(tag); + callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition); + } + + protected final void handleError(int line, String message) + { + parser.handleError(line, message); + callBack.handleError(message, hTag.where.startPosition); + } + + protected final void handleStartTag(TagElement tag) + { + parser.handleStartTag(tag); + SimpleAttributeSet attributes = gnu.getAttributes(); + + if (tag.fictional()) + attributes.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED, + Boolean.TRUE + ); + + callBack.handleStartTag(tag.getHTMLTag(), attributes, + hTag.where.startPosition + ); + } + + protected final void handleText(char[] text) + { + parser.handleText(text); + callBack.handleText(text, hTag.where.startPosition); + } + + DTD getDTD() + { + return dtd; + } + } + + /** + * This field is used to access the identically named + * methods of the outer class. + * This is package-private to avoid an accessor method. + */ + DocumentParser parser = this; + + /** + * The callback. + * This is package-private to avoid an accessor method. + */ + HTMLEditorKit.ParserCallback callBack; + + /** + * The reference to the working class of HTML parser that is + * actually used to parse the document. + * This is package-private to avoid an accessor method. + */ + gnuParser gnu; + + /** + * Creates a new parser that uses the given DTD to access data on the + * possible tokens, arguments and syntax. There is no single - step way + * to get a default DTD; you must either refer to the implementation - + * specific packages, write your own DTD or obtain the working instance + * of parser in other way, for example, by calling + * {@link javax.swing.text.html.HTMLEditorKit#getParser()}. + * + * @param a_dtd a DTD to use. + */ + public DocumentParser(DTD a_dtd) + { + super(a_dtd); + gnu = new gnuParser(a_dtd); + } + + /** + * Parses the HTML document, calling methods of the provided + * callback. This method must be multithread - safe. + * @param reader The reader to read the HTML document from + * @param aCallback The callback that is notifyed about the presence + * of HTML elements in the document. + * @param ignoreCharSet If thrue, any charset changes during parsing + * are ignored. + * @throws java.io.IOException + */ + public void parse(Reader reader, HTMLEditorKit.ParserCallback aCallback, + boolean ignoreCharSet + ) + throws IOException + { + callBack = aCallback; + gnu.parse(reader); + + callBack.handleEndOfLineString(gnu.getEndOfLineSequence()); + try + { + callBack.flush(); + } + catch (BadLocationException ex) + { + // Convert this into the supported type of exception. + throw new IOException(ex.getMessage()); + } + } + + /** + * Handle HTML comment. The default method returns without action. + * @param comment the comment being handled + */ + protected void handleComment(char[] comment) + { + // This default implementation does nothing. + } + + /** + * Handle the tag with no content, like <br>. The method is + * called for the elements that, in accordance with the current DTD, + * has an empty content. + * @param tag the tag being handled. + * @throws javax.swing.text.ChangedCharSetException + */ + protected void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + // This default implementation does nothing. + } + + /** + * The method is called when the HTML closing tag ((like </table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param tag The tag being handled + */ + protected void handleEndTag(TagElement tag) + { + // This default implementation does nothing. + } + + /* Handle error that has occured in the given line. */ + protected void handleError(int line, String message) + { + // This default implementation does nothing. + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param tag The tag being handled + */ + protected void handleStartTag(TagElement tag) + { + // This default implementation does nothing. + } + + /** + * Handle the text section. + * @param text a section text. + */ + protected void handleText(char[] text) + { + // This default implementation does nothing. + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/Element.java b/libjava/classpath/javax/swing/text/html/parser/Element.java new file mode 100644 index 000000000..c07c07f54 --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/Element.java @@ -0,0 +1,317 @@ +/* Element.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +import java.io.Serializable; + +import java.util.BitSet; + +/** + * <p> + * Stores the element information, obtained by parsing SGML DTD + * tag <code><!ELEMENT .. ></code>. This class has no public + * constructor and can only be instantiated using the + * {@link javax.swing.text.html.parser.DTD } methods</p> + * + * <p>SGML defines elements that represent structures or + * behavior. An element typically consists of a start tag, content, and an + * end tag. Hence the elements are not tags. The HTML 4.0 definition specifies + * that some elements are not required to have the end tags. Also, some + * HTML elements (like <code><hr></code>) have no content. Element names + * are case sensitive.</p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class Element + implements DTDConstants, Serializable +{ + /** + * Package level mapper between type names and they string values. + */ + static final gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("CDATA", DTDConstants.CDATA); + add("RCDATA", DTDConstants.RCDATA); + add("EMPTY", DTDConstants.EMPTY); + add("ANY", DTDConstants.ANY); + } + }; + + /** Use serialVersionUID for interoperability. */ + private static final long serialVersionUID = -6717939384601675586L; + + /** + * The element attributes. + */ + public AttributeList atts; + + /** + * Contains refernces to elements that must NOT occur inside this element, + * at any level of hierarchy. + */ + public BitSet exclusions; + + /** + * Contains refernces to elements that must CAN occur inside this element, + * at any level of hierarchy. + */ + public BitSet inclusions; + + /** + * The content model, defining elements, entities and DTD text + * that may/may not occur inside this element. + */ + public ContentModel content; + + /** + * A field to store additional user data for this Element. + */ + public Object data; + + /** + * The element name. + */ + public String name; + + /** + * True is this element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><hr></code>are + * not required to have the end tags. + */ + public boolean oEnd; + + /** + * True is this element need not to have the starting tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><head></code> or + * <code><body></code>) are + * not required to have the start tags. + + */ + public boolean oStart; + + /** + * This field contains the unique integer identifier of this Element, + * used to refer the element (more exactly, the element flag) + * in <code>inclusions</code> and <code>exclusions</code> bit set. + */ + public int index; + + /** + * The element type, containing value, defined in DTDConstants. + * In this implementation, the element type can be + * CDATA, RCDATA, EMPTY or ANY. + */ + public int type; + + /** + * The default constructor must have package level access in this + * class. Use DTD.defineElement(..) to create an element when required. + */ + Element() + { + // Nothing to do here. + } + + /** + * Converts the string representation of the element type + * into its unique integer identifier, defined in DTDConstants. + * @param a_type A name of the type + * @return DTDConstants.CDATA, DTDConstants.RCDATA, DTDConstants.EMPTY, + * DTDConstants.ANY or null if the type name is not + * "CDATA", "RCDATA", "EMPTY" or "ANY". This function is case sensitive. + * @throws NullPointerException if <code>a_type</code> is null. + */ + public static int name2type(String a_type) + { + return mapper.get(a_type); + } + + /** + * Get the element attribute by name. + * @param attribute the attribute name, case insensitive. + * @return the correspoding attribute of this element. The class, + * for storing as attribute list, as a single attribute, is used to + * store a single attribute in this case. + * @throws NullPointerException if the attribute name is null. + */ + public AttributeList getAttribute(String attribute) + { + AttributeList a = atts; + + while (a != null && !attribute.equalsIgnoreCase(a.name)) + a = a.next; + + return a; + } + + /** + * Get the element attribute by its value. + * @param a_value the attribute value, case insensitive. + * @return the correspoding attribute of this element. The class, + * for storing as attribute list, as a single attribute, is used to + * store a single attribute in this case. If there are several + * attributes with the same value, there is no garranty, which one + * is returned. + */ + public AttributeList getAttributeByValue(String a_value) + { + AttributeList a = atts; + + if (a_value == null) + { + while (a != null) + { + if (a.value == null) + return a; + + a = a.next; + } + } + else + { + while (a != null) + { + if (a.value != null && a_value.equalsIgnoreCase(a.value)) + return a; + + a = a.next; + } + } + + return null; + } + + /** + * Get all attributes of this document as an attribute list. + * @return The attribute list. + */ + public AttributeList getAttributes() + { + return atts; + } + + /** + * Get the content model, defining elements, entities and DTD text + * that may/may not occur inside this element. + */ + public ContentModel getContent() + { + return content; + } + + /** + * Returns true for the element with no content. + * Empty elements are defined with the SGML DTD keyword "EMPTY". + * @return true if content model field (content) method is equal to + * null or its method empty() returns true. + */ + public boolean isEmpty() + { + return content == null || content.empty(); + } + + /** + * Get the unique integer identifier of this Element, + * used to refer the element (more exactly, the element flag) + * in <code>inclusions</code> and <code>exclusions</code> bit set. + * WARNING: This value may not be the same between different + * implementations. + */ + public int getIndex() + { + return index; + } + + /** + * Get the element name. + */ + public String getName() + { + return name; + } + + /** + * Get the element type. + * @return one of the values, defined DTDConstants. + * In this implementation, the element type can be + * CDATA, RCDATA, EMPTY or ANY. + */ + public int getType() + { + return type; + } + + /** + * True is this element need not to have the starting tag, false + * otherwise.s element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><hr></code>are + * not required to have the end tags. + */ + public boolean omitEnd() + { + return oEnd; + } + + /** + * True is this element need not to have the closing tag, false + * otherwise. The HTML 4.0 definition specifies + * that some elements (like <code><head></code> or + * <code><body></code>) are + * not required to have the start tags. + */ + public boolean omitStart() + { + return oStart; + } + + /** + * Returns the name of this element. + */ + public String toString() + { + return name; + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/Entity.java b/libjava/classpath/javax/swing/text/html/parser/Entity.java new file mode 100644 index 000000000..d40fb94f3 --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/Entity.java @@ -0,0 +1,183 @@ +/* Entity.java -- Stores information, obtained by parsing SGML DTL + * <!ENTITY % .. > tag + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.support.gnuStringIntMapper; + +/** + * <p>Stores information, obtained by parsing SGML DTL + * <!ENTITY % .. > tag.</p> + * <p> + * The entity defines some kind of macro that can be used elsewhere in + * the document. + * When the macro is referred to by the name in the DTD, it is expanded into + * a string + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public final class Entity + implements DTDConstants +{ + /** + * Package level mapper between type names and they string values. + */ + final static gnuStringIntMapper mapper = + new gnuStringIntMapper() + { + protected void create() + { + add("ANY", DTDConstants.ANY); + add("CDATA", DTDConstants.CDATA); + add("PUBLIC", DTDConstants.PUBLIC); + add("SDATA", DTDConstants.SDATA); + add("PI", DTDConstants.PI); + add("STARTTAG", DTDConstants.STARTTAG); + add("ENDTAG", DTDConstants.ENDTAG); + add("MS", DTDConstants.MS); + add("MD", DTDConstants.MD); + add("SYSTEM", DTDConstants.SYSTEM); + } + }; + + /** + * The entity name. + */ + public String name; + + /** + * The entity data + */ + public char[] data; + + /** + * The entity type. + */ + public int type; + + /** + * String representation of the entity data. + */ + private String sdata; + + /** + * Create a new entity + * @param a_name the entity name + * @param a_type the entity type + * @param a_data the data replacing the entity reference + */ + public Entity(String a_name, int a_type, char[] a_data) + { + name = a_name; + type = a_type; + data = a_data; + } + + /** + * Converts a given string to the corresponding entity type. + * @return a value, defined in DTDConstants (one of + * PUBLIC, CDATA, SDATA, PI, STARTTAG, ENDTAG, MS, MD, SYSTEM) + * or CDATA if the parameter is not a valid entity type. + */ + public static int name2type(String an_entity) + { + int r = mapper.get(an_entity); + return (r == 0) ? DTDConstants.CDATA : r; + } + + /** + * Get the entity data. + */ + public char[] getData() + { + return data; + } + + /** + * Returns true for general entities. Each general entity can be + * referenced as <code>&entity-name;</code>. Such entities are + * defined by the SGML DTD tag + * <code><!ENTITY <i>name</i> "<i>value</i>"></code>. The general + * entities can be used anywhere in the document. + */ + public boolean isGeneral() + { + return (type & DTDConstants.GENERAL) != 0; + } + + /** + * Get the entity name. + */ + public String getName() + { + return name; + } + + /** + * Returns true for parameter entities. Each parameter entity can be + * referenced as <code>&entity-name;</code>. Such entities are + * defined by the SGML DTD tag + * <code><!ENTITY % <i>name</i> "<i>value</i>"></code>. The parameter + * entities can be used only in SGML context. + */ + public boolean isParameter() + { + return (type & DTDConstants.PARAMETER) != 0; + } + + /** + * Returns a data as String + */ + public String getString() + { + if (sdata == null) + sdata = new String(data); + + return sdata; + } + + /** + * Get the entity type. + * @return the value of the {@link #type}. + */ + public int getType() + { + return type; + } + +} diff --git a/libjava/classpath/javax/swing/text/html/parser/Parser.java b/libjava/classpath/javax/swing/text/html/parser/Parser.java new file mode 100644 index 000000000..f3faa2524 --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/Parser.java @@ -0,0 +1,446 @@ +/* Parser.java -- HTML parser + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import java.io.IOException; +import java.io.Reader; + +import javax.swing.text.ChangedCharSetException; +import javax.swing.text.SimpleAttributeSet; + +/* + * FOR DEVELOPERS: To avoid regression, please run the package test + * textsuite/javax.swing.text.html.parser/AllParserTests after your + * modifications. + */ + +/** + * <p>A simple error-tolerant HTML parser that uses a DTD document + * to access data on the possible tokens, arguments and syntax.</p> + * <p> The parser reads an HTML content from a Reader and calls various + * notifying methods (which should be overridden in a subclass) + * when tags or data are encountered.</p> + * <p>Some HTML elements need no opening or closing tags. The + * task of this parser is to invoke the tag handling methods also when + * the tags are not explicitly specified and must be supposed using + * information, stored in the DTD. + * For example, parsing the document + * <p><table><tr><td>a<td>b<td>c</tr> <br> + * will invoke exactly the handling methods exactly in the same order + * (and with the same parameters) as if parsing the document: <br> + * <em><html><head></head><body><table>< + * tbody></em><tr><td>a<em></td></em><td>b<em> + * </td></em><td>c<em></td></tr></em>< + * <em>/tbody></table></body></html></em></p> + * (supposed tags are given in italics). The parser also supports + * obsolete elements of HTML syntax.<p> + * </p> + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class Parser + implements DTDConstants +{ + /** + * The document template description that will be used to parse the documents. + */ + protected DTD dtd; + + /** + * The value of this field determines whether or not the Parser will be + * strict in enforcing SGML compatibility. The default value is false, + * stating that the parser should do everything to parse and get at least + * some information even from the incorrectly written HTML input. + */ + protected boolean strict; + + /** + * The package level reference to the working HTML parser in this + * implementation. + */ + final gnu.javax.swing.text.html.parser.support.Parser gnu; + + /** + * Creates a new parser that uses the given DTD to access data on the + * possible tokens, arguments and syntax. There is no single - step way + * to get a default DTD; you must either refer to the implementation - + * specific packages, write your own DTD or obtain the working instance + * of parser in other way, for example, by calling + * {@link javax.swing.text.html.HTMLEditorKit#getParser() }. + * @param a_dtd A DTD to use. + */ + public Parser(DTD a_dtd) + { + dtd = a_dtd; + + final Parser j = this; + + gnu = + new gnu.javax.swing.text.html.parser.support.Parser(dtd) + { + protected final void handleComment(char[] comment) + { + j.handleComment(comment); + } + + protected final void handleEOFInComment() + { + j.handleEOFInComment(); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + j.handleEmptyTag(tag); + } + + protected final void handleStartTag(TagElement tag) + { + j.handleStartTag(tag); + } + + protected final void handleEndTag(TagElement tag) + { + j.handleEndTag(tag); + } + + protected final void handleError(int line, String message) + { + j.handleError(line, message); + } + + protected final void handleText(char[] text) + { + j.handleText(text); + } + + protected final void handleTitle(char[] title) + { + j.handleTitle(title); + } + + protected final void markFirstTime(Element element) + { + j.markFirstTime(element); + } + + protected final void startTag(TagElement tag) + throws ChangedCharSetException + { + j.startTag(tag); + } + + protected final void endTag(boolean omitted) + { + j.endTag(omitted); + } + + protected TagElement makeTag(Element element) + { + return j.makeTag(element); + } + + protected TagElement makeTag(Element element, boolean isSupposed) + { + return j.makeTag(element, isSupposed); + } + }; + } + + /** + * Parse the HTML text, calling various methods in response to the + * occurence of the corresponding HTML constructions. + * @param reader The reader to read the source HTML from. + * @throws IOException If the reader throws one. + */ + public synchronized void parse(Reader reader) + throws IOException + { + gnu.parse(reader); + } + + /** + * Parses DTD markup declaration. Currently returns without action. + * @return null. + * @throws java.io.IOException + */ + public String parseDTDMarkup() + throws IOException + { + return gnu.parseDTDMarkup(); + } + + /** + * Parse DTD document declarations. Currently only parses the document + * type declaration markup. + * @param strBuff + * @return true if this is a valid DTD markup declaration. + * @throws IOException + */ + protected boolean parseMarkupDeclarations(StringBuffer strBuff) + throws IOException + { + return gnu.parseMarkupDeclarations(strBuff); + } + + /** + * Get the attributes of the current tag. + * @return The attribute set, representing the attributes of the current tag. + */ + protected SimpleAttributeSet getAttributes() + { + return gnu.getAttributes(); + } + + /** + * Get the number of the document line being parsed. + * @return The current line. + */ + protected int getCurrentLine() + { + return gnu.hTag.where.beginLine; + } + + /** + * Get the current position in the document being parsed. + * @return The current position. + */ + protected int getCurrentPos() + { + return gnu.hTag.where.startPosition; + } + + /** + * The method is called when the HTML end (closing) tag is found or if + * the parser concludes that the one should be present in the + * current position. The method is called immediatly + * before calling the handleEndTag(). + * @param omitted True if the tag is no actually present in the document, + * but is supposed by the parser (like </html> at the end of the + * document). + */ + protected void endTag(boolean omitted) + { + // This default implementation does nothing. + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to handleError, also providing the number of the + * current line. + */ + protected void error(String msg) + { + gnu.error(msg); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error (msg+": '"+invalid+"'"). + */ + protected void error(String msg, String invalid) + { + gnu.error(msg, invalid); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error (parm1+" "+ parm2+" "+ parm3). + */ + protected void error(String parm1, String parm2, String parm3) + { + gnu.error(parm1, parm2, parm3); + } + + /** + * Invokes the error handler. The default method in this implementation + * finally delegates the call to error + * (parm1+" "+ parm2+" "+ parm3+" "+ parm4). + */ + protected void error(String parm1, String parm2, String parm3, String parm4) + { + gnu.error(parm1, parm2, parm3, parm4); + } + + /** + * In this implementation, this is never called and returns without action. + */ + protected void flushAttributes() + { + gnu.flushAttributes(); + } + + /** + * Handle HTML comment. The default method returns without action. + * @param comment The comment being handled + */ + protected void handleComment(char[] comment) + { + // This default implementation does nothing. + } + + /** + * This is additionally called in when the HTML content terminates + * without closing the HTML comment. This can only happen if the + * HTML document contains errors (for example, the closing --;gt is + * missing. The default method calls the error handler. + */ + protected void handleEOFInComment() + { + gnu.error("Unclosed comment"); + } + + /** + * Handle the tag with no content, like <br>. The method is + * called for the elements that, in accordance with the current DTD, + * has an empty content. + * @param tag The tag being handled. + * @throws javax.swing.text.ChangedCharSetException + */ + protected void handleEmptyTag(TagElement tag) + throws ChangedCharSetException + { + // This default implementation does nothing. + } + + /** + * The method is called when the HTML closing tag ((like </table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param tag The tag being handled + */ + protected void handleEndTag(TagElement tag) + { + // This default implementation does nothing. + } + + /* Handle error that has occured in the given line. */ + protected void handleError(int line, String message) + { + // This default implementation does nothing. + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. + * @param tag The tag being handled + */ + protected void handleStartTag(TagElement tag) + { + // This default implementation does nothing. + } + + /** + * Handle the text section. + * <p> For non-preformatted section, the parser replaces + * \t, \r and \n by spaces and then multiple spaces + * by a single space. Additionaly, all whitespace around + * tags is discarded. + * </p> + * <p> For pre-formatted text (inside TEXAREA and PRE), the parser preserves + * all tabs and spaces, but removes <b>one</b> bounding \r, \n or \r\n, + * if it is present. Additionally, it replaces each occurence of \r or \r\n + * by a single \n.</p> + * + * @param text A section text. + */ + protected void handleText(char[] text) + { + // This default implementation does nothing. + } + + /** + * Handle HTML <title> tag. This method is invoked when + * both title starting and closing tags are already behind. + * The passed argument contains the concatenation of all + * title text sections. + * @param title The title text. + */ + protected void handleTitle(char[] title) + { + // This default implementation does nothing. + } + + /** + * Constructs the tag from the given element. In this implementation, + * this is defined, but never called. + * @param element the base element of the tag. + * @return the tag + */ + protected TagElement makeTag(Element element) + { + return makeTag(element, false); + } + + /** + * Constructs the tag from the given element. + * @param element the tag base {@link javax.swing.text.html.parser.Element} + * @param isSupposed true if the tag is not actually present in the + * html input, but the parser supposes that it should to occur in + * the current location. + * @return the tag + */ + protected TagElement makeTag(Element element, boolean isSupposed) + { + return new TagElement(element, isSupposed); + } + + /** + * This is called when the tag, representing the given element, + * occurs first time in the document. + * @param element + */ + protected void markFirstTime(Element element) + { + // This default implementation does nothing. + } + + /** + * The method is called when the HTML opening tag ((like <table>) + * is found or if the parser concludes that the one should be present + * in the current position. The method is called immediately before + * calling the handleStartTag. + * @param tag The tag + */ + protected void startTag(TagElement tag) + throws ChangedCharSetException + { + // This default implementation does nothing. + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java b/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java new file mode 100644 index 000000000..cdd339b8f --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/ParserDelegator.java @@ -0,0 +1,207 @@ +/* ParserDelegator.java -- Delegator for ParserDocument. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package javax.swing.text.html.parser; + +import gnu.javax.swing.text.html.parser.HTML_401F; + +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; + +import javax.swing.text.BadLocationException; +import javax.swing.text.SimpleAttributeSet; +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.HTMLEditorKit.ParserCallback; + +/** + * This class instantiates and starts the working instance of + * html parser, being responsible for providing the default DTD. + * + * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) + */ +public class ParserDelegator + extends javax.swing.text.html.HTMLEditorKit.Parser + implements Serializable +{ + private class gnuParser + extends gnu.javax.swing.text.html.parser.support.Parser + { + private static final long serialVersionUID = 1; + + private gnuParser(DTD d) + { + super(d); + } + + protected final void handleComment(char[] comment) + { + callBack.handleComment(comment, hTag.where.startPosition); + } + + protected final void handleEmptyTag(TagElement tag) + throws javax.swing.text.ChangedCharSetException + { + callBack.handleSimpleTag(tag.getHTMLTag(), getAttributes(), + hTag.where.startPosition + ); + } + + protected final void handleEndTag(TagElement tag) + { + callBack.handleEndTag(tag.getHTMLTag(), hTag.where.startPosition); + } + + protected final void handleError(int line, String message) + { + callBack.handleError(message, hTag.where.startPosition); + } + + protected final void handleStartTag(TagElement tag) + { + SimpleAttributeSet attributes = gnu.getAttributes(); + + if (tag.fictional()) + attributes.addAttribute(ParserCallback.IMPLIED, Boolean.TRUE); + + callBack.handleStartTag(tag.getHTMLTag(), attributes, + hTag.where.startPosition + ); + } + + protected final void handleText(char[] text) + { + callBack.handleText(text, hTag.where.startPosition); + } + + DTD getDTD() + { + // Accessing the inherited gnu.javax.swing.text.html.parser.support.Parser + // field. super. is a workaround, required to support JDK1.3's javac. + return super.dtd; + } + } + + /** + * Use serialVersionUID for interoperability. + */ + private static final long serialVersionUID = -1276686502624777206L; + + private static DTD dtd = HTML_401F.getInstance(); + + /** + * The callback. + * This is package-private to avoid an accessor method. + */ + HTMLEditorKit.ParserCallback callBack; + + /** + * The reference to the working class of HTML parser that is + * actually used to parse the document. + * This is package-private to avoid an accessor method. + */ + gnuParser gnu; + + /** + * Parses the HTML document, calling methods of the provided + * callback. This method must be multithread - safe. + * @param reader The reader to read the HTML document from + * @param a_callback The callback that is notifyed about the presence + * of HTML elements in the document. + * @param ignoreCharSet If thrue, any charset changes during parsing + * are ignored. + * @throws java.io.IOException + */ + public void parse(Reader reader, HTMLEditorKit.ParserCallback a_callback, + boolean ignoreCharSet + ) + throws IOException + { + callBack = a_callback; + + if (gnu == null || !dtd.equals(gnu.getDTD())) + { + gnu = new gnuParser(dtd); + } + + gnu.parse(reader); + + callBack.handleEndOfLineString(gnu.getEndOfLineSequence()); + try + { + callBack.flush(); + } + catch (BadLocationException ex) + { + // Convert this into the supported type of exception. + throw new IOException(ex.getMessage()); + } + } + + /** + * Calling this method instructs that, if not specified directly, + * the documents will be parsed using the default + * DTD of the implementation. + */ + protected static void setDefaultDTD() + { + dtd = HTML_401F.getInstance(); + } + + /** + * Registers the user - written DTD under the given name, also + * making it default for the subsequent parsings. This has effect on + * all subsequent calls to the parse(...) . If you need to specify + * your DTD locally, simply {@link javax.swing.text.html.parser.Parser} + * instead. + * @param a_dtd The DTD that will be used to parse documents by this class. + * @param name The name of this DTD. + * @return No standard is specified on which instance of DTD must be + * returned by this method, and it is recommended to leave the returned + * value without consideration. This implementation returns the DTD + * that was previously set as the default DTD, or the implementations + * default DTD if none was set. + */ + protected static DTD createDTD(DTD a_dtd, String name) + { + DTD.putDTDHash(name, a_dtd); + + DTD dtd_prev = dtd; + dtd = a_dtd; + return dtd_prev; + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/TagElement.java b/libjava/classpath/javax/swing/text/html/parser/TagElement.java new file mode 100644 index 000000000..4558b15eb --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/TagElement.java @@ -0,0 +1,142 @@ +/* TagElement.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package javax.swing.text.html.parser; + +import javax.swing.text.html.HTML; + +/** + * The SGML element, defining a single html tag. + * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) + */ +public class TagElement +{ + /** + * The Element the tag was constructed from. + */ + private final Element element; + + /** + * The coresponding HTML tag, assigned once in constructor. + */ + private final HTML.Tag tag; + + /** + * The 'fictional' flag. + */ + private final boolean fictional; + + /** + * Creates the html tag element from the defintion, stored in the + * given element. Sets the flag 'fictional' to false. + * @param an_element + */ + public TagElement(Element an_element) + { + this(an_element, false); + } + + /** + * Creates the html tag element from the defintion, stored in the + * given element, setting the flag 'fictional' to the given value. + */ + public TagElement(Element an_element, boolean is_fictional) + { + element = an_element; + fictional = is_fictional; + + HTML.Tag t = HTML.getTag(element.getName()); + + if (t != null) + tag = t; + else + tag = new HTML.UnknownTag(element.getName()); + } + + /** + * Get the element from that the tag was constructed. + */ + public Element getElement() + { + return element; + } + + /** + * Get the corresponding HTML tag. This is either one of the + * pre-defined HTML tags or the instance of the UnknownTag with the + * element name. + */ + public HTML.Tag getHTMLTag() + { + return tag; + } + + /** + * Calls isPreformatted() for the corresponding html tag and returns + * the obtained value. + */ + public boolean isPreformatted() + { + return tag.isPreformatted(); + } + + /** + * Calls breaksFlow() for the corresponding html tag and returns + * the obtained value. + */ + public boolean breaksFlow() + { + return tag.breaksFlow(); + } + + /** + * Get the value of the flag 'fictional'. + */ + public boolean fictional() + { + return fictional; + } + + /** + * Returns string representation of this object. + */ + public String toString() + { + return getElement() + (fictional ? "?" : ""); + } +} diff --git a/libjava/classpath/javax/swing/text/html/parser/package.html b/libjava/classpath/javax/swing/text/html/parser/package.html new file mode 100644 index 000000000..5d5157fb2 --- /dev/null +++ b/libjava/classpath/javax/swing/text/html/parser/package.html @@ -0,0 +1,50 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- package.html - describes classes in javax.swing.text.html package. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. --> + +<html> +<head><title>GNU Classpath - javax.swing.text.html.parser</title></head> + +<body> +<p> Provides the DTD driven for web browsers, + web robots, web page content analysers, web editors and + other applications applications working with Hypertext + Markup Language (HTML). +</p> + +</body> +</html> |