From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001
From: upstream source tree This parser currently implements the SAX1 Parser API, but
+ * it may not continue to do so in the future.
+ *
+ * @author Written by David Megginson (version 1.2a from Microstar)
+ * @author Updated by David Brownell <dbrownell@users.sourceforge.net>
+ * @see org.xml.sax.Parser
+ */
+final public class SAXDriver
+ implements Locator, Attributes2, XMLReader, Parser, AttributeList
+{
+
+ private final DefaultHandler2 base = new DefaultHandler2();
+ private XmlParser parser;
+
+ private EntityResolver entityResolver = base;
+ private EntityResolver2 resolver2 = null;
+ private ContentHandler contentHandler = base;
+ private DTDHandler dtdHandler = base;
+ private ErrorHandler errorHandler = base;
+ private DeclHandler declHandler = base;
+ private LexicalHandler lexicalHandler = base;
+
+ private String elementName;
+ private Stack entityStack;
+
+ // one vector (of object/struct): faster, smaller
+ private List attributesList;
+
+ private boolean namespaces = true;
+ private boolean xmlNames = false;
+ private boolean extGE = true;
+ private boolean extPE = true;
+ private boolean resolveAll = true;
+ private boolean useResolver2 = true;
+
+ // package private to allow (read-only) access in XmlParser
+ boolean stringInterning = true;
+
+ private int attributeCount;
+ private boolean attributes;
+ private String[] nsTemp;
+ private NamespaceSupport prefixStack;
+
+ //
+ // Constructor.
+ //
+
+ /**
+ * Constructs a SAX Parser.
+ */
+ public SAXDriver()
+ {
+ reset();
+ }
+
+ private void reset()
+ {
+ elementName = null;
+ entityStack = new Stack();
+ attributesList = Collections.synchronizedList(new ArrayList());
+ attributeCount = 0;
+ attributes = false;
+ nsTemp = new String[3];
+ prefixStack = null;
+ }
+
+
+ //
+ // Implementation of org.xml.sax.Parser.
+ //
+
+ /**
+ * SAX1: Sets the locale used for diagnostics; currently,
+ * only locales using the English language are supported.
+ * @param locale The locale for which diagnostics will be generated
+ */
+ public void setLocale(Locale locale)
+ throws SAXException
+ {
+ if ("en".equals(locale.getLanguage()))
+ {
+ return;
+ }
+ throw new SAXException ("AElfred2 only supports English locales.");
+ }
+
+ /**
+ * SAX2: Returns the object used when resolving external
+ * entities during parsing (both general and parameter entities).
+ */
+ public EntityResolver getEntityResolver()
+ {
+ return (entityResolver == base) ? null : entityResolver;
+ }
+
+ /**
+ * SAX1, SAX2: Set the entity resolver for this parser.
+ * @param handler The object to receive entity events.
+ */
+ public void setEntityResolver(EntityResolver resolver)
+ {
+ if (resolver instanceof EntityResolver2)
+ {
+ resolver2 = (EntityResolver2) resolver;
+ }
+ else
+ {
+ resolver2 = null;
+ }
+ if (resolver == null)
+ {
+ resolver = base;
+ }
+ entityResolver = resolver;
+ }
+
+ /**
+ * SAX2: Returns the object used to process declarations related
+ * to notations and unparsed entities.
+ */
+ public DTDHandler getDTDHandler()
+ {
+ return (dtdHandler == base) ? null : dtdHandler;
+ }
+
+ /**
+ * SAX1, SAX2: Set the DTD handler for this parser.
+ * @param handler The object to receive DTD events.
+ */
+ public void setDTDHandler(DTDHandler handler)
+ {
+ if (handler == null)
+ {
+ handler = base;
+ }
+ this.dtdHandler = handler;
+ }
+
+
+ /**
+ * SAX1: Set the document handler for this parser. If a
+ * content handler was set, this document handler will supplant it.
+ * The parser is set to report all XML 1.0 names rather than to
+ * filter out "xmlns" attributes (the "namespace-prefixes" feature
+ * is set to true).
+ *
+ * @deprecated SAX2 programs should use the XMLReader interface
+ * and a ContentHandler.
+ *
+ * @param handler The object to receive document events.
+ */
+ public void setDocumentHandler(DocumentHandler handler)
+ {
+ contentHandler = new Adapter(handler);
+ xmlNames = true;
+ }
+
+ /**
+ * SAX2: Returns the object used to report the logical
+ * content of an XML document.
+ */
+ public ContentHandler getContentHandler()
+ {
+ return (contentHandler == base) ? null : contentHandler;
+ }
+
+ /**
+ * SAX2: Assigns the object used to report the logical
+ * content of an XML document. If a document handler was set,
+ * this content handler will supplant it (but XML 1.0 style name
+ * reporting may remain enabled).
+ */
+ public void setContentHandler(ContentHandler handler)
+ {
+ if (handler == null)
+ {
+ handler = base;
+ }
+ contentHandler = handler;
+ }
+
+ /**
+ * SAX1, SAX2: Set the error handler for this parser.
+ * @param handler The object to receive error events.
+ */
+ public void setErrorHandler(ErrorHandler handler)
+ {
+ if (handler == null)
+ {
+ handler = base;
+ }
+ this.errorHandler = handler;
+ }
+
+ /**
+ * SAX2: Returns the object used to receive callbacks for XML
+ * errors of all levels (fatal, nonfatal, warning); this is never null;
+ */
+ public ErrorHandler getErrorHandler()
+ {
+ return (errorHandler == base) ? null : errorHandler;
+ }
+
+ /**
+ * SAX1, SAX2: Auxiliary API to parse an XML document, used mostly
+ * when no URI is available.
+ * If you want anything useful to happen, you should set
+ * at least one type of handler.
+ * @param source The XML input source. Don't set 'encoding' unless
+ * you know for a fact that it's correct.
+ * @see #setEntityResolver
+ * @see #setDTDHandler
+ * @see #setContentHandler
+ * @see #setErrorHandler
+ * @exception SAXException The handlers may throw any SAXException,
+ * and the parser normally throws SAXParseException objects.
+ * @exception IOException IOExceptions are normally through through
+ * the parser if there are problems reading the source document.
+ */
+ public void parse(InputSource source)
+ throws SAXException, IOException
+ {
+ synchronized (base)
+ {
+ parser = new XmlParser();
+ if (namespaces)
+ {
+ prefixStack = new NamespaceSupport();
+ }
+ else if (!xmlNames)
+ {
+ throw new IllegalStateException();
+ }
+ parser.setHandler(this);
+
+ try
+ {
+ Reader r = source.getCharacterStream();
+ InputStream in = source.getByteStream();
+
+ parser.doParse(source.getSystemId(),
+ source.getPublicId(),
+ r,
+ in,
+ source.getEncoding());
+ }
+ catch (SAXException e)
+ {
+ throw e;
+ }
+ catch (IOException e)
+ {
+ throw e;
+ }
+ catch (RuntimeException e)
+ {
+ throw e;
+ }
+ catch (Exception e)
+ {
+ throw new SAXParseException(e.getMessage(), this, e);
+ }
+ finally
+ {
+ contentHandler.endDocument();
+ reset();
+ }
+ }
+ }
+
+ /**
+ * SAX1, SAX2: Preferred API to parse an XML document, using a
+ * system identifier (URI).
+ */
+ public void parse(String systemId)
+ throws SAXException, IOException
+ {
+ parse(new InputSource(systemId));
+ }
+
+ //
+ // Implementation of SAX2 "XMLReader" interface
+ //
+ static final String FEATURE = "http://xml.org/sax/features/";
+ static final String PROPERTY = "http://xml.org/sax/properties/";
+
+ /**
+ * SAX2: Tells the value of the specified feature flag.
+ *
+ * @exception SAXNotRecognizedException thrown if the feature flag
+ * is neither built in, nor yet assigned.
+ */
+ public boolean getFeature(String featureId)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ if ((FEATURE + "validation").equals(featureId))
+ {
+ return false;
+ }
+
+ // external entities (both types) are optionally included
+ if ((FEATURE + "external-general-entities").equals(featureId))
+ {
+ return extGE;
+ }
+ if ((FEATURE + "external-parameter-entities").equals(featureId))
+ {
+ return extPE;
+ }
+
+ // element/attribute names are as written in document; no mangling
+ if ((FEATURE + "namespace-prefixes").equals(featureId))
+ {
+ return xmlNames;
+ }
+
+ // report element/attribute namespaces?
+ if ((FEATURE + "namespaces").equals(featureId))
+ {
+ return namespaces;
+ }
+
+ // all PEs and GEs are reported
+ if ((FEATURE + "lexical-handler/parameter-entities").equals(featureId))
+ {
+ return true;
+ }
+
+ // default is true
+ if ((FEATURE + "string-interning").equals(featureId))
+ {
+ return stringInterning;
+ }
+
+ // EXTENSIONS 1.1
+
+ // always returns isSpecified info
+ if ((FEATURE + "use-attributes2").equals(featureId))
+ {
+ return true;
+ }
+
+ // meaningful between startDocument/endDocument
+ if ((FEATURE + "is-standalone").equals(featureId))
+ {
+ if (parser == null)
+ {
+ throw new SAXNotSupportedException(featureId);
+ }
+ return parser.isStandalone();
+ }
+
+ // optionally don't absolutize URIs in declarations
+ if ((FEATURE + "resolve-dtd-uris").equals(featureId))
+ {
+ return resolveAll;
+ }
+
+ // optionally use resolver2 interface methods, if possible
+ if ((FEATURE + "use-entity-resolver2").equals(featureId))
+ {
+ return useResolver2;
+ }
+
+ throw new SAXNotRecognizedException(featureId);
+ }
+
+ // package private
+ DeclHandler getDeclHandler()
+ {
+ return declHandler;
+ }
+
+ // package private
+ boolean resolveURIs()
+ {
+ return resolveAll;
+ }
+
+ /**
+ * SAX2: Returns the specified property.
+ *
+ * @exception SAXNotRecognizedException thrown if the property value
+ * is neither built in, nor yet stored.
+ */
+ public Object getProperty(String propertyId)
+ throws SAXNotRecognizedException
+ {
+ if ((PROPERTY + "declaration-handler").equals(propertyId))
+ {
+ return (declHandler == base) ? null : declHandler;
+ }
+
+ if ((PROPERTY + "lexical-handler").equals(propertyId))
+ {
+ return (lexicalHandler == base) ? null : lexicalHandler;
+ }
+
+ // unknown properties
+ throw new SAXNotRecognizedException(propertyId);
+ }
+
+ /**
+ * SAX2: Sets the state of feature flags in this parser. Some
+ * built-in feature flags are mutable.
+ */
+ public void setFeature(String featureId, boolean value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ boolean state;
+
+ // Features with a defined value, we just change it if we can.
+ state = getFeature (featureId);
+
+ if (state == value)
+ {
+ return;
+ }
+ if (parser != null)
+ {
+ throw new SAXNotSupportedException("not while parsing");
+ }
+
+ if ((FEATURE + "namespace-prefixes").equals(featureId))
+ {
+ // in this implementation, this only affects xmlns reporting
+ xmlNames = value;
+ // forcibly prevent illegal parser state
+ if (!xmlNames)
+ {
+ namespaces = true;
+ }
+ return;
+ }
+
+ if ((FEATURE + "namespaces").equals(featureId))
+ {
+ namespaces = value;
+ // forcibly prevent illegal parser state
+ if (!namespaces)
+ {
+ xmlNames = true;
+ }
+ return;
+ }
+
+ if ((FEATURE + "external-general-entities").equals(featureId))
+ {
+ extGE = value;
+ return;
+ }
+ if ((FEATURE + "external-parameter-entities").equals(featureId))
+ {
+ extPE = value;
+ return;
+ }
+ if ((FEATURE + "resolve-dtd-uris").equals(featureId))
+ {
+ resolveAll = value;
+ return;
+ }
+
+ if ((FEATURE + "use-entity-resolver2").equals(featureId))
+ {
+ useResolver2 = value;
+ return;
+ }
+
+ throw new SAXNotRecognizedException(featureId);
+ }
+
+ /**
+ * SAX2: Assigns the specified property. Like SAX1 handlers,
+ * these may be changed at any time.
+ */
+ public void setProperty(String propertyId, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ // see if the property is recognized
+ getProperty(propertyId);
+
+ // Properties with a defined value, we just change it if we can.
+
+ if ((PROPERTY + "declaration-handler").equals(propertyId))
+ {
+ if (value == null)
+ {
+ declHandler = base;
+ }
+ else if (!(value instanceof DeclHandler))
+ {
+ throw new SAXNotSupportedException(propertyId);
+ }
+ else
+ {
+ declHandler = (DeclHandler) value;
+ }
+ return ;
+ }
+
+ if ((PROPERTY + "lexical-handler").equals(propertyId))
+ {
+ if (value == null)
+ {
+ lexicalHandler = base;
+ }
+ else if (!(value instanceof LexicalHandler))
+ {
+ throw new SAXNotSupportedException(propertyId);
+ }
+ else
+ {
+ lexicalHandler = (LexicalHandler) value;
+ }
+ return;
+ }
+
+ throw new SAXNotSupportedException(propertyId);
+ }
+
+ //
+ // This is where the driver receives XmlParser callbacks and translates
+ // them into SAX callbacks. Some more callbacks have been added for
+ // SAX2 support.
+ //
+
+ void startDocument()
+ throws SAXException
+ {
+ contentHandler.setDocumentLocator(this);
+ contentHandler.startDocument();
+ attributesList.clear();
+ }
+
+ void skippedEntity(String name)
+ throws SAXException
+ {
+ contentHandler.skippedEntity(name);
+ }
+
+ InputSource getExternalSubset(String name, String baseURI)
+ throws SAXException, IOException
+ {
+ if (resolver2 == null || !useResolver2 || !extPE)
+ {
+ return null;
+ }
+ return resolver2.getExternalSubset(name, baseURI);
+ }
+
+ InputSource resolveEntity(boolean isPE, String name,
+ InputSource in, String baseURI)
+ throws SAXException, IOException
+ {
+ InputSource source;
+
+ // external entities might be skipped
+ if (isPE && !extPE)
+ {
+ return null;
+ }
+ if (!isPE && !extGE)
+ {
+ return null;
+ }
+
+ // ... or not
+ lexicalHandler.startEntity(name);
+ if (resolver2 != null && useResolver2)
+ {
+ source = resolver2.resolveEntity(name, in.getPublicId(),
+ baseURI, in.getSystemId());
+ if (source == null)
+ {
+ in.setSystemId(absolutize(baseURI,
+ in.getSystemId(), false));
+ source = in;
+ }
+ }
+ else
+ {
+ in.setSystemId(absolutize(baseURI,
+ in.getSystemId(),
+ entityResolver != base));
+ source = entityResolver.resolveEntity(in.getPublicId(),
+ in.getSystemId());
+ if (source == null)
+ {
+ source = in;
+ }
+ }
+ startExternalEntity(name, source.getSystemId(), true);
+ return source;
+ }
+
+ // absolutize a system ID relative to the specified base URI
+ // (temporarily) package-visible for external entity decls
+ String absolutize(String baseURI, String systemId, boolean nice)
+ throws MalformedURLException, SAXException
+ {
+ // FIXME normalize system IDs -- when?
+ // - Convert to UTF-8
+ // - Map reserved and non-ASCII characters to %HH
+
+ try
+ {
+ if (baseURI == null)
+ {
+ if (XmlParser.uriWarnings)
+ {
+ warn ("No base URI; hope this SYSTEM id is absolute: "
+ + systemId);
+ }
+ return new URL(systemId).toString();
+ }
+ else
+ {
+ return new URL(new URL(baseURI), systemId).toString();
+ }
+ }
+ catch (MalformedURLException e)
+ {
+ // Let unknown URI schemes pass through unless we need
+ // the JVM to map them to i/o streams for us...
+ if (!nice)
+ {
+ throw e;
+ }
+
+ // sometimes sysids for notations or unparsed entities
+ // aren't really URIs...
+ warn("Can't absolutize SYSTEM id: " + e.getMessage());
+ return systemId;
+ }
+ }
+
+ void startExternalEntity(String name, String systemId, boolean stackOnly)
+ throws SAXException
+ {
+ // The following warning was deleted because the application has the
+ // option of not setting systemId. Sun's JAXP or Xerces seems to
+ // ignore this case.
+ /*
+ if (systemId == null)
+ warn ("URI was not reported to parser for entity " + name);
+ */
+ if (!stackOnly) // spliced [dtd] needs startEntity
+ {
+ lexicalHandler.startEntity(name);
+ }
+ entityStack.push(systemId);
+ }
+
+ void endExternalEntity(String name)
+ throws SAXException
+ {
+ if (!"[document]".equals(name))
+ {
+ lexicalHandler.endEntity(name);
+ }
+ entityStack.pop();
+ }
+
+ void startInternalEntity(String name)
+ throws SAXException
+ {
+ lexicalHandler.startEntity(name);
+ }
+
+ void endInternalEntity(String name)
+ throws SAXException
+ {
+ lexicalHandler.endEntity(name);
+ }
+
+ void doctypeDecl(String name, String publicId, String systemId)
+ throws SAXException
+ {
+ lexicalHandler.startDTD(name, publicId, systemId);
+
+ // ... the "name" is a declaration and should be given
+ // to the DeclHandler (but sax2 doesn't).
+
+ // the IDs for the external subset are lexical details,
+ // as are the contents of the internal subset; but sax2
+ // doesn't provide the internal subset "pre-parse"
+ }
+
+ void notationDecl(String name, String publicId, String systemId,
+ String baseUri)
+ throws SAXException
+ {
+ try
+ {
+ dtdHandler.notationDecl(name, publicId,
+ (resolveAll && systemId != null)
+ ? absolutize(baseUri, systemId, true)
+ : systemId);
+ }
+ catch (IOException e)
+ {
+ // "can't happen"
+ throw new SAXParseException(e.getMessage(), this, e);
+ }
+ }
+
+ void unparsedEntityDecl(String name, String publicId, String systemId,
+ String baseUri, String notation)
+ throws SAXException
+ {
+ try
+ {
+ dtdHandler.unparsedEntityDecl(name, publicId,
+ resolveAll
+ ? absolutize(baseUri, systemId, true)
+ : systemId,
+ notation);
+ }
+ catch (IOException e)
+ {
+ // "can't happen"
+ throw new SAXParseException(e.getMessage(), this, e);
+ }
+ }
+
+ void endDoctype()
+ throws SAXException
+ {
+ lexicalHandler.endDTD();
+ }
+
+ private void declarePrefix(String prefix, String uri)
+ throws SAXException
+ {
+ int index = uri.indexOf(':');
+
+ // many versions of nwalsh docbook stylesheets
+ // have bogus URLs; so this can't be an error...
+ if (index < 1 && uri.length() != 0)
+ {
+ warn("relative URI for namespace: " + uri);
+ }
+
+ // FIXME: char [0] must be ascii alpha; chars [1..index]
+ // must be ascii alphanumeric or in "+-." [RFC 2396]
+
+ //Namespace Constraints
+ //name for xml prefix must be http://www.w3.org/XML/1998/namespace
+ boolean prefixEquality = prefix.equals("xml");
+ boolean uriEquality = uri.equals("http://www.w3.org/XML/1998/namespace");
+ if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality))
+ {
+ fatal("xml is by definition bound to the namespace name " +
+ "http://www.w3.org/XML/1998/namespace");
+ }
+
+ //xmlns prefix declaration is illegal but xml prefix declaration is llegal...
+ if (prefixEquality && uriEquality)
+ {
+ return;
+ }
+
+ //name for xmlns prefix must be http://www.w3.org/2000/xmlns/
+ prefixEquality = prefix.equals("xmlns");
+ uriEquality = uri.equals("http://www.w3.org/2000/xmlns/");
+ if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality))
+ {
+ fatal("http://www.w3.org/2000/xmlns/ is by definition bound" +
+ " to prefix xmlns");
+ }
+
+ //even if the uri is http://www.w3.org/2000/xmlns/
+ // it is illegal to declare it
+ if (prefixEquality && uriEquality)
+ {
+ fatal ("declaring the xmlns prefix is illegal");
+ }
+
+ uri = uri.intern();
+ prefixStack.declarePrefix(prefix, uri);
+ contentHandler.startPrefixMapping(prefix, uri);
+ }
+
+ void attribute(String qname, String value, boolean isSpecified)
+ throws SAXException
+ {
+ if (!attributes)
+ {
+ attributes = true;
+ if (namespaces)
+ {
+ prefixStack.pushContext();
+ }
+ }
+
+ // process namespace decls immediately;
+ // then maybe forget this as an attribute
+ if (namespaces)
+ {
+ int index;
+
+ // default NS declaration?
+ if (stringInterning)
+ {
+ if ("xmlns" == qname)
+ {
+ declarePrefix("", value);
+ if (!xmlNames)
+ {
+ return;
+ }
+ }
+ // NS prefix declaration?
+ else if ((index = qname.indexOf(':')) == 5
+ && qname.startsWith("xmlns"))
+ {
+ String prefix = qname.substring(6);
+
+ if (prefix.equals(""))
+ {
+ fatal("missing prefix " +
+ "in namespace declaration attribute");
+ }
+ if (value.length() == 0)
+ {
+ verror("missing URI in namespace declaration attribute: "
+ + qname);
+ }
+ else
+ {
+ declarePrefix(prefix, value);
+ }
+ if (!xmlNames)
+ {
+ return;
+ }
+ }
+ }
+ else
+ {
+ if ("xmlns".equals(qname))
+ {
+ declarePrefix("", value);
+ if (!xmlNames)
+ {
+ return;
+ }
+ }
+ // NS prefix declaration?
+ else if ((index = qname.indexOf(':')) == 5
+ && qname.startsWith("xmlns"))
+ {
+ String prefix = qname.substring(6);
+
+ if (value.length() == 0)
+ {
+ verror("missing URI in namespace decl attribute: "
+ + qname);
+ }
+ else
+ {
+ declarePrefix(prefix, value);
+ }
+ if (!xmlNames)
+ {
+ return;
+ }
+ }
+ }
+ }
+ // remember this attribute ...
+ attributeCount++;
+
+ // attribute type comes from querying parser's DTD records
+ attributesList.add(new Attribute(qname, value, isSpecified));
+
+ }
+
+ void startElement(String elname)
+ throws SAXException
+ {
+ ContentHandler handler = contentHandler;
+
+ //
+ // NOTE: this implementation of namespace support adds something
+ // like six percent to parsing CPU time, in a large (~50 MB)
+ // document that doesn't use namespaces at all. (Measured by PC
+ // sampling, with a bug where endElement processing was omitted.)
+ // [Measurement referred to older implementation, older JVM ...]
+ //
+ // It ought to become notably faster in such cases. Most
+ // costs are the prefix stack calling Hashtable.get() (2%),
+ // String.hashCode() (1.5%) and about 1.3% each for pushing
+ // the context, and two chunks of name processing.
+ //
+
+ if (!attributes)
+ {
+ if (namespaces)
+ {
+ prefixStack.pushContext();
+ }
+ }
+ else if (namespaces)
+ {
+
+ // now we can patch up namespace refs; we saw all the
+ // declarations, so now we'll do the Right Thing
+ Iterator itt = attributesList.iterator();
+ while (itt.hasNext())
+ {
+ Attribute attribute = (Attribute) itt.next();
+ String qname = attribute.name;
+ int index;
+
+ // default NS declaration?
+ if (stringInterning)
+ {
+ if ("xmlns" == qname)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ if ("xmlns".equals(qname))
+ {
+ continue;
+ }
+ }
+ //Illegal in the new Namespaces Draft
+ //should it be only in 1.1 docs??
+ if (qname.equals (":"))
+ {
+ fatal("namespace names consisting of a single colon " +
+ "character are invalid");
+ }
+ index = qname.indexOf(':');
+
+ // NS prefix declaration?
+ if (index == 5 && qname.startsWith("xmlns"))
+ {
+ continue;
+ }
+
+ // it's not a NS decl; patch namespace info items
+ if (prefixStack.processName(qname, nsTemp, true) == null)
+ {
+ fatal("undeclared attribute prefix in: " + qname);
+ }
+ else
+ {
+ attribute.nameSpace = nsTemp[0];
+ attribute.localName = nsTemp[1];
+ }
+ }
+ }
+
+ // save element name so attribute callbacks work
+ elementName = elname;
+ if (namespaces)
+ {
+ if (prefixStack.processName(elname, nsTemp, false) == null)
+ {
+ fatal("undeclared element prefix in: " + elname);
+ nsTemp[0] = nsTemp[1] = "";
+ }
+ handler.startElement(nsTemp[0], nsTemp[1], elname, this);
+ }
+ else
+ {
+ handler.startElement("", "", elname, this);
+ }
+ // elementName = null;
+
+ // elements with no attributes are pretty common!
+ if (attributes)
+ {
+ attributesList.clear();
+ attributeCount = 0;
+ attributes = false;
+ }
+ }
+
+ void endElement(String elname)
+ throws SAXException
+ {
+ ContentHandler handler = contentHandler;
+
+ if (!namespaces)
+ {
+ handler.endElement("", "", elname);
+ return;
+ }
+ prefixStack.processName(elname, nsTemp, false);
+ handler.endElement(nsTemp[0], nsTemp[1], elname);
+
+ Enumeration prefixes = prefixStack.getDeclaredPrefixes();
+
+ while (prefixes.hasMoreElements())
+ {
+ handler.endPrefixMapping((String) prefixes.nextElement());
+ }
+ prefixStack.popContext();
+ }
+
+ void startCDATA()
+ throws SAXException
+ {
+ lexicalHandler.startCDATA();
+ }
+
+ void charData(char[] ch, int start, int length)
+ throws SAXException
+ {
+ contentHandler.characters(ch, start, length);
+ }
+
+ void endCDATA()
+ throws SAXException
+ {
+ lexicalHandler.endCDATA();
+ }
+
+ void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException
+ {
+ contentHandler.ignorableWhitespace(ch, start, length);
+ }
+
+ void processingInstruction(String target, String data)
+ throws SAXException
+ {
+ contentHandler.processingInstruction(target, data);
+ }
+
+ void comment(char[] ch, int start, int length)
+ throws SAXException
+ {
+ if (lexicalHandler != base)
+ {
+ lexicalHandler.comment(ch, start, length);
+ }
+ }
+
+ void fatal(String message)
+ throws SAXException
+ {
+ SAXParseException fatal;
+
+ fatal = new SAXParseException(message, this);
+ errorHandler.fatalError(fatal);
+
+ // Even if the application can continue ... we can't!
+ throw fatal;
+ }
+
+ // We can safely report a few validity errors that
+ // make layered SAX2 DTD validation more conformant
+ void verror(String message)
+ throws SAXException
+ {
+ SAXParseException err;
+
+ err = new SAXParseException(message, this);
+ errorHandler.error(err);
+ }
+
+ void warn(String message)
+ throws SAXException
+ {
+ SAXParseException err;
+
+ err = new SAXParseException(message, this);
+ errorHandler.warning(err);
+ }
+
+ //
+ // Implementation of org.xml.sax.Attributes.
+ //
+
+ /**
+ * SAX1 AttributeList, SAX2 Attributes method
+ * (don't invoke on parser);
+ */
+ public int getLength()
+ {
+ return attributesList.size();
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public String getURI(int index)
+ {
+ if (index < 0 || index >= attributesList.size())
+ {
+ return null;
+ }
+ return ((Attribute) attributesList.get(index)).nameSpace;
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public String getLocalName(int index)
+ {
+ if (index < 0 || index >= attributesList.size())
+ {
+ return null;
+ }
+ Attribute attr = (Attribute) attributesList.get(index);
+ // FIXME attr.localName is sometimes null, why?
+ if (namespaces && attr.localName == null)
+ {
+ // XXX fix this here for now
+ int ci = attr.name.indexOf(':');
+ attr.localName = (ci == -1) ? attr.name :
+ attr.name.substring(ci + 1);
+ }
+ return (attr.localName == null) ? "" : attr.localName;
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public String getQName(int index)
+ {
+ if (index < 0 || index >= attributesList.size())
+ {
+ return null;
+ }
+ Attribute attr = (Attribute) attributesList.get(index);
+ return (attr.name == null) ? "" : attr.name;
+ }
+
+ /**
+ * SAX1 AttributeList method (don't invoke on parser);
+ */
+ public String getName(int index)
+ {
+ return getQName(index);
+ }
+
+ /**
+ * SAX1 AttributeList, SAX2 Attributes method
+ * (don't invoke on parser);
+ */
+ public String getType(int index)
+ {
+ if (index < 0 || index >= attributesList.size())
+ {
+ return null;
+ }
+ String type = parser.getAttributeType(elementName, getQName(index));
+ if (type == null)
+ {
+ return "CDATA";
+ }
+ // ... use DeclHandler.attributeDecl to see enumerations
+ if (type == "ENUMERATION")
+ {
+ return "NMTOKEN";
+ }
+ return type;
+ }
+
+ /**
+ * SAX1 AttributeList, SAX2 Attributes method
+ * (don't invoke on parser);
+ */
+ public String getValue(int index)
+ {
+ if (index < 0 || index >= attributesList.size())
+ {
+ return null;
+ }
+ return ((Attribute) attributesList.get(index)).value;
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public int getIndex(String uri, String local)
+ {
+ int length = getLength();
+
+ for (int i = 0; i < length; i++)
+ {
+ if (!getURI(i).equals(uri))
+ {
+ continue;
+ }
+ if (getLocalName(i).equals(local))
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public int getIndex(String xmlName)
+ {
+ int length = getLength();
+
+ for (int i = 0; i < length; i++)
+ {
+ if (getQName(i).equals(xmlName))
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * SAX2 Attributes method (don't invoke on parser);
+ */
+ public String getType(String uri, String local)
+ {
+ int index = getIndex(uri, local);
+
+ if (index < 0)
+ {
+ return null;
+ }
+ return getType(index);
+ }
+
+ /**
+ * SAX1 AttributeList, SAX2 Attributes method
+ * (don't invoke on parser);
+ */
+ public String getType(String xmlName)
+ {
+ int index = getIndex(xmlName);
+
+ if (index < 0)
+ {
+ return null;
+ }
+ return getType(index);
+ }
+
+ /**
+ * SAX Attributes method (don't invoke on parser);
+ */
+ public String getValue(String uri, String local)
+ {
+ int index = getIndex(uri, local);
+
+ if (index < 0)
+ {
+ return null;
+ }
+ return getValue(index);
+ }
+
+ /**
+ * SAX1 AttributeList, SAX2 Attributes method
+ * (don't invoke on parser);
+ */
+ public String getValue(String xmlName)
+ {
+ int index = getIndex(xmlName);
+
+ if (index < 0)
+ {
+ return null;
+ }
+ return getValue(index);
+ }
+
+ //
+ // Implementation of org.xml.sax.ext.Attributes2
+ //
+
+ /** @return false unless the attribute was declared in the DTD.
+ * @throws java.lang.ArrayIndexOutOfBoundsException
+ * When the supplied index does not identify an attribute.
+ */
+ public boolean isDeclared(int index)
+ {
+ if (index < 0 || index >= attributeCount)
+ {
+ throw new ArrayIndexOutOfBoundsException();
+ }
+ String type = parser.getAttributeType(elementName, getQName(index));
+ return (type != null);
+ }
+
+ /** @return false unless the attribute was declared in the DTD.
+ * @throws java.lang.IllegalArgumentException
+ * When the supplied names do not identify an attribute.
+ */
+ public boolean isDeclared(String qName)
+ {
+ int index = getIndex(qName);
+ if (index < 0)
+ {
+ throw new IllegalArgumentException();
+ }
+ String type = parser.getAttributeType(elementName, qName);
+ return (type != null);
+ }
+
+ /** @return false unless the attribute was declared in the DTD.
+ * @throws java.lang.IllegalArgumentException
+ * When the supplied names do not identify an attribute.
+ */
+ public boolean isDeclared(String uri, String localName)
+ {
+ int index = getIndex(uri, localName);
+ return isDeclared(index);
+ }
+
+ /**
+ * SAX-ext Attributes2 method (don't invoke on parser);
+ */
+ public boolean isSpecified(int index)
+ {
+ return ((Attribute) attributesList.get(index)).specified;
+ }
+
+ /**
+ * SAX-ext Attributes2 method (don't invoke on parser);
+ */
+ public boolean isSpecified(String uri, String local)
+ {
+ int index = getIndex (uri, local);
+ return isSpecified(index);
+ }
+
+ /**
+ * SAX-ext Attributes2 method (don't invoke on parser);
+ */
+ public boolean isSpecified(String xmlName)
+ {
+ int index = getIndex (xmlName);
+ return isSpecified(index);
+ }
+
+ //
+ // Implementation of org.xml.sax.Locator.
+ //
+
+ /**
+ * SAX Locator method (don't invoke on parser);
+ */
+ public String getPublicId()
+ {
+ return null; // FIXME track public IDs too
+ }
+
+ /**
+ * SAX Locator method (don't invoke on parser);
+ */
+ public String getSystemId()
+ {
+ if (entityStack.empty())
+ {
+ return null;
+ }
+ else
+ {
+ return (String) entityStack.peek();
+ }
+ }
+
+ /**
+ * SAX Locator method (don't invoke on parser);
+ */
+ public int getLineNumber()
+ {
+ return parser.getLineNumber();
+ }
+
+ /**
+ * SAX Locator method (don't invoke on parser);
+ */
+ public int getColumnNumber()
+ {
+ return parser.getColumnNumber();
+ }
+
+ // adapter between SAX2 content handler and SAX1 document handler callbacks
+ private static class Adapter
+ implements ContentHandler
+ {
+
+ private DocumentHandler docHandler;
+
+ Adapter(DocumentHandler dh)
+ {
+ docHandler = dh;
+ }
+
+ public void setDocumentLocator(Locator l)
+ {
+ docHandler.setDocumentLocator(l);
+ }
+
+ public void startDocument()
+ throws SAXException
+ {
+ docHandler.startDocument();
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException
+ {
+ docHandler.processingInstruction(target, data);
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ {
+ /* ignored */
+ }
+
+ public void startElement(String namespace,
+ String local,
+ String name,
+ Attributes attrs)
+ throws SAXException
+ {
+ docHandler.startElement(name, (AttributeList) attrs);
+ }
+
+ public void characters(char[] buf, int offset, int len)
+ throws SAXException
+ {
+ docHandler.characters(buf, offset, len);
+ }
+
+ public void ignorableWhitespace(char[] buf, int offset, int len)
+ throws SAXException
+ {
+ docHandler.ignorableWhitespace(buf, offset, len);
+ }
+
+ public void skippedEntity(String name)
+ {
+ /* ignored */
+ }
+
+ public void endElement(String u, String l, String name)
+ throws SAXException
+ {
+ docHandler.endElement(name);
+ }
+
+ public void endPrefixMapping(String prefix)
+ {
+ /* ignored */
+ }
+
+ public void endDocument()
+ throws SAXException
+ {
+ docHandler.endDocument();
+ }
+ }
+
+ private static class Attribute
+ {
+
+ String name;
+ String value;
+ String nameSpace;
+ String localName;
+ boolean specified;
+
+ Attribute(String name, String value, boolean specified)
+ {
+ this.name = name;
+ this.value = value;
+ this.nameSpace = "";
+ this.specified = specified;
+ }
+
+ }
+
+}
diff --git a/libjava/classpath/gnu/xml/aelfred2/XmlParser.java b/libjava/classpath/gnu/xml/aelfred2/XmlParser.java
new file mode 100644
index 000000000..813593d93
--- /dev/null
+++ b/libjava/classpath/gnu/xml/aelfred2/XmlParser.java
@@ -0,0 +1,5831 @@
+/* XmlParser.java --
+ Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version.
+
+Partly derived from code which carried the following notice:
+
+ Copyright (c) 1997, 1998 by Microstar Software Ltd.
+
+ AElfred is free for both commercial and non-commercial use and
+ redistribution, provided that Microstar's copyright and disclaimer are
+ retained intact. You are free to modify AElfred for your own use and
+ to redistribute AElfred with your modifications, provided that the
+ modifications are clearly documented.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ merchantability or fitness for a particular purpose. Please use it AT
+ YOUR OWN RISK.
+*/
+
+package gnu.xml.aelfred2;
+
+import gnu.java.security.action.GetPropertyAction;
+
+import java.io.BufferedInputStream;
+import java.io.CharConversionException;
+import java.io.EOFException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.security.AccessController;
+
+import java.util.Iterator;
+import java.util.HashMap;
+import java.util.LinkedList;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+
+/**
+ * Parse XML documents and return parse events through call-backs.
+ * Use the Only one thread at a time may use this parser; since it is
+ * private to this package, post-parse cleanup is done by the caller,
+ * which MUST NOT REUSE the parser (just null it).
+ *
+ * @param systemId Absolute URI of the document; should never be null,
+ * but may be so iff a reader or a stream is provided.
+ * @param publicId The public identifier of the document, or null.
+ * @param reader A character stream; must be null if stream isn't.
+ * @param stream A byte input stream; must be null if reader isn't.
+ * @param encoding The suggested encoding, or null if unknown.
+ * @exception java.lang.Exception Basically SAXException or IOException
+ */
+ // package private
+ void doParse(String systemId, String publicId, Reader reader,
+ InputStream stream, String encoding)
+ throws Exception
+ {
+ if (handler == null)
+ {
+ throw new IllegalStateException("no callback handler");
+ }
+
+ initializeVariables();
+
+ // predeclare the built-in entities here (replacement texts)
+ // we don't need to intern(), since we're guaranteed literals
+ // are always (globally) interned.
+ setInternalEntity("amp", "&");
+ setInternalEntity("lt", "<");
+ setInternalEntity("gt", ">");
+ setInternalEntity("apos", "'");
+ setInternalEntity("quot", """);
+
+ try
+ {
+ // pushURL first to ensure locator is correct in startDocument
+ // ... it might report an IO or encoding exception.
+ handler.startDocument();
+ pushURL(false, "[document]",
+ // default baseURI: null
+ new ExternalIdentifiers(publicId, systemId, null),
+ reader, stream, encoding, false);
+
+ parseDocument();
+ }
+ catch (EOFException e)
+ {
+ //empty input
+ error("empty document, with no root element.");
+ }
+ finally
+ {
+ if (reader != null)
+ {
+ try
+ {
+ reader.close();
+ }
+ catch (IOException e)
+ {
+ /* ignore */
+ }
+ }
+ if (stream != null)
+ {
+ try
+ {
+ stream.close();
+ }
+ catch (IOException e)
+ {
+ /* ignore */
+ }
+ }
+ if (is != null)
+ {
+ try
+ {
+ is.close();
+ }
+ catch (IOException e)
+ {
+ /* ignore */
+ }
+ }
+ scratch = null;
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // Error reporting.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Report an error.
+ * @param message The error message.
+ * @param textFound The text that caused the error (or null).
+ * @see SAXDriver#error
+ * @see #line
+ */
+ private void error(String message, String textFound, String textExpected)
+ throws SAXException
+ {
+ if (textFound != null)
+ {
+ message = message + " (found \"" + textFound + "\")";
+ }
+ if (textExpected != null)
+ {
+ message = message + " (expected \"" + textExpected + "\")";
+ }
+ handler.fatal(message);
+
+ // "can't happen"
+ throw new SAXException(message);
+ }
+
+ /**
+ * Report a serious error.
+ * @param message The error message.
+ * @param textFound The text that caused the error (or null).
+ */
+ private void error(String message, char textFound, String textExpected)
+ throws SAXException
+ {
+ error(message, Character.toString(textFound), textExpected);
+ }
+
+ /**
+ * Report typical case fatal errors.
+ */
+ private void error(String message)
+ throws SAXException
+ {
+ handler.fatal(message);
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // Major syntactic productions.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Parse an XML document.
+ * This is the top-level parsing function for a single XML
+ * document. As a minimum, a well-formed document must have
+ * a document element, and a valid document must have a prolog
+ * (one with doctype) as well.
+ */
+ private void parseDocument()
+ throws Exception
+ {
+ try
+ { // added by MHK
+ boolean sawDTD = parseProlog();
+ require('<');
+ parseElement(!sawDTD);
+ }
+ catch (EOFException ee)
+ { // added by MHK
+ error("premature end of file", "[EOF]", null);
+ }
+
+ try
+ {
+ parseMisc(); //skip all white, PIs, and comments
+ char c = readCh(); //if this doesn't throw an exception...
+ error("unexpected characters after document end", c, null);
+ }
+ catch (EOFException e)
+ {
+ return;
+ }
+ }
+
+ static final char[] startDelimComment = { '<', '!', '-', '-' };
+ static final char[] endDelimComment = { '-', '-' };
+
+ /**
+ * Skip a comment.
+ * (The (The (The '<![CDATA[' has already been read.)
+ */
+ private void parseCDSect()
+ throws Exception
+ {
+ parseUntil(endDelimCDATA);
+ dataBufferFlush();
+ }
+
+ /**
+ * Parse the prolog of an XML document.
+ * We do not look for the XML declaration here, because it was
+ * handled by pushURL ().
+ * @see pushURL
+ * @return true if a DTD was read.
+ */
+ private boolean parseProlog()
+ throws Exception
+ {
+ parseMisc();
+
+ if (tryRead("
+ * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+ * [24] VersionInfo ::= S 'version' Eq
+ * ("'" VersionNum "'" | '"' VersionNum '"' )
+ * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
+ * [32] SDDecl ::= S 'standalone' Eq
+ * ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
+ * [80] EncodingDecl ::= S 'encoding' Eq
+ * ( "'" EncName "'" | "'" EncName "'" )
+ * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+ *
+ * (The (The It is also used after autodetection, at which point only very
+ * limited adjustments to the encoding may be used (switching between
+ * related builtin decoders).
+ *
+ * @param encodingName The name of the encoding specified by the user.
+ * @exception IOException if the encoding isn't supported either
+ * internally to this parser, or by the hosting JVM.
+ * @see #parseXMLDecl
+ * @see #parseTextDecl
+ */
+ private void setupDecoding(String encodingName)
+ throws SAXException, IOException
+ {
+ encodingName = encodingName.toUpperCase();
+
+ // ENCODING_EXTERNAL indicates an encoding that wasn't
+ // autodetected ... we can use builtin decoders, or
+ // ones from the JVM (InputStreamReader).
+
+ // Otherwise we can only tweak what was autodetected, and
+ // only for single byte (ASCII derived) builtin encodings.
+
+ // ASCII-derived encodings
+ if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL)
+ {
+ if (encodingName.equals("ISO-8859-1")
+ || encodingName.equals("8859_1")
+ || encodingName.equals("ISO8859_1"))
+ {
+ encoding = ENCODING_ISO_8859_1;
+ return;
+ }
+ else if (encodingName.equals("US-ASCII")
+ || encodingName.equals("ASCII"))
+ {
+ encoding = ENCODING_ASCII;
+ return;
+ }
+ else if (encodingName.equals("UTF-8")
+ || encodingName.equals("UTF8"))
+ {
+ encoding = ENCODING_UTF_8;
+ return;
+ }
+ else if (encoding != ENCODING_EXTERNAL)
+ {
+ // used to start with a new reader ...
+ throw new UnsupportedEncodingException(encodingName);
+ }
+ // else fallthrough ...
+ // it's ASCII-ish and something other than a builtin
+ }
+
+ // Unicode and such
+ if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21)
+ {
+ if (!(encodingName.equals("ISO-10646-UCS-2")
+ || encodingName.equals("UTF-16")
+ || encodingName.equals("UTF-16BE")
+ || encodingName.equals("UTF-16LE")))
+ {
+ error("unsupported Unicode encoding", encodingName, "UTF-16");
+ }
+ return;
+ }
+
+ // four byte encodings
+ if (encoding == ENCODING_UCS_4_1234
+ || encoding == ENCODING_UCS_4_4321
+ || encoding == ENCODING_UCS_4_2143
+ || encoding == ENCODING_UCS_4_3412)
+ {
+ // Strictly: "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists
+ if (!encodingName.equals("ISO-10646-UCS-4"))
+ {
+ error("unsupported 32-bit encoding", encodingName,
+ "ISO-10646-UCS-4");
+ }
+ return;
+ }
+
+ // assert encoding == ENCODING_EXTERNAL
+ // if (encoding != ENCODING_EXTERNAL)
+ // throw new RuntimeException ("encoding = " + encoding);
+
+ if (encodingName.equals("UTF-16BE"))
+ {
+ encoding = ENCODING_UCS_2_12;
+ return;
+ }
+ if (encodingName.equals("UTF-16LE"))
+ {
+ encoding = ENCODING_UCS_2_21;
+ return;
+ }
+
+ // We couldn't use the builtin decoders at all. But we can try to
+ // create a reader, since we haven't messed up buffering. Tweak
+ // the encoding name if necessary.
+
+ if (encodingName.equals("UTF-16")
+ || encodingName.equals("ISO-10646-UCS-2"))
+ {
+ encodingName = "Unicode";
+ }
+ // Ignoring all the EBCDIC aliases here
+
+ reader = new InputStreamReader(is, encodingName);
+ sourceType = INPUT_READER;
+ }
+
+ /**
+ * Parse miscellaneous markup outside the document element and DOCTYPE
+ * declaration.
+ * (The Reading toplevel PE references is handled as a lexical issue
+ * by the caller, as is whitespace.
+ */
+ private void parseMarkupdecl()
+ throws Exception
+ {
+ char[] saved = null;
+ boolean savedPE = expandPE;
+
+ // prevent "<%foo;" and ensures saved entity is right
+ require('<');
+ unread('<');
+ expandPE = false;
+
+ if (tryRead(" 0)
+ {
+ parseConditionalSect(saved);
+ }
+ else
+ {
+ error("conditional sections illegal in internal subset");
+ }
+ }
+ else
+ {
+ error("expected markup declaration");
+ }
+
+ // VC: Proper Decl/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Declaration/PE nesting");
+ }
+ }
+
+ /**
+ * Parse an element, with its tags.
+ * (The '<' has already been read.)
+ * NOTE: this method actually chains onto parseContent (), if necessary,
+ * and parseContent () will take care of calling parseETag ().
+ */
+ private void parseElement(boolean maybeGetSubset)
+ throws Exception
+ {
+ String gi;
+ char c;
+ int oldElementContent = currentElementContent;
+ String oldElement = currentElement;
+ ElementDecl element;
+
+ // This is the (global) counter for the
+ // array of specified attributes.
+ tagAttributePos = 0;
+
+ // Read the element type name.
+ gi = readNmtoken(true);
+
+ // If we saw no DTD, and this is the document root element,
+ // let the application modify the input stream by providing one.
+ if (maybeGetSubset)
+ {
+ InputSource subset = handler.getExternalSubset(gi,
+ handler.getSystemId());
+ if (subset != null)
+ {
+ String publicId = subset.getPublicId();
+ String systemId = subset.getSystemId();
+
+ handler.warn("modifying document by adding DTD");
+ handler.doctypeDecl(gi, publicId, systemId);
+ pushString(null, ">");
+
+ // NOTE: [dtd] is so we say what SAX2 expects,
+ // though it's misleading (subset, not entire dtd)
+ pushURL(true, "[dtd]",
+ new ExternalIdentifiers(publicId, systemId, null),
+ subset.getCharacterStream(),
+ subset.getByteStream(),
+ subset.getEncoding(),
+ false);
+
+ // Loop until we end up back at '>'
+ while (true)
+ {
+ doReport = expandPE = true;
+ skipWhitespace();
+ doReport = expandPE = false;
+ if (tryRead('>'))
+ {
+ break;
+ }
+ else
+ {
+ expandPE = true;
+ parseMarkupdecl();
+ expandPE = false;
+ }
+ }
+
+ // the ">" string isn't popped yet
+ if (inputStack.size() != 1)
+ {
+ error("external subset has unmatched '>'");
+ }
+
+ handler.endDoctype();
+ }
+ }
+
+ // Determine the current content type.
+ currentElement = gi;
+ element = (ElementDecl) elementInfo.get(gi);
+ currentElementContent = getContentType(element, CONTENT_ANY);
+
+ // Read the attributes, if any.
+ // After this loop, "c" is the closing delimiter.
+ boolean white = tryWhitespace();
+ c = readCh();
+ while (c != '/' && c != '>')
+ {
+ unread(c);
+ if (!white)
+ {
+ error("need whitespace between attributes");
+ }
+ parseAttribute(gi);
+ white = tryWhitespace();
+ c = readCh();
+ }
+
+ // Supply any defaulted attributes.
+ Iterator atts = declaredAttributes(element);
+ if (atts != null)
+ {
+ String aname;
+loop:
+ while (atts.hasNext())
+ {
+ aname = (String) atts.next();
+ // See if it was specified.
+ for (int i = 0; i < tagAttributePos; i++)
+ {
+ if (tagAttributes[i] == aname)
+ {
+ continue loop;
+ }
+ }
+ // ... or has a default
+ String value = getAttributeDefaultValue(gi, aname);
+
+ if (value == null)
+ {
+ continue;
+ }
+ handler.attribute(aname, value, false);
+ }
+ }
+
+ // Figure out if this is a start tag
+ // or an empty element, and dispatch an
+ // event accordingly.
+ switch (c)
+ {
+ case '>':
+ handler.startElement(gi);
+ parseContent();
+ break;
+ case '/':
+ require('>');
+ handler.startElement(gi);
+ handler.endElement(gi);
+ break;
+ }
+
+ // Restore the previous state.
+ currentElement = oldElement;
+ currentElementContent = oldElementContent;
+ }
+
+ /**
+ * Parse an attribute assignment.
+ * NOTE: parseContent () chains to here, we already read the
+ * "</".
+ */
+ private void parseETag()
+ throws Exception
+ {
+ require(currentElement);
+ skipWhitespace();
+ require('>');
+ handler.endElement(currentElement);
+ // not re-reporting any SAXException re bogus end tags,
+ // even though that diagnostic might be clearer ...
+ }
+
+ /**
+ * Parse the content of an element.
+ * NOTE: consumes ETtag.
+ */
+ private void parseContent()
+ throws Exception
+ {
+ char c;
+
+ while (true)
+ {
+ // consume characters (or ignorable whitspace) until delimiter
+ parseCharData();
+
+ // Handle delimiters
+ c = readCh();
+ switch (c)
+ {
+ case '&': // Found "&"
+ c = readCh();
+ if (c == '#')
+ {
+ parseCharRef();
+ }
+ else
+ {
+ unread(c);
+ parseEntityRef(true);
+ }
+ isDirtyCurrentElement = true;
+ break;
+
+ case '<': // Found "<"
+ dataBufferFlush();
+ c = readCh();
+ switch (c)
+ {
+ case '!': // Found "
+ * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
+ *
+ * NOTE: the '<!ELEMENT' has already been read.
+ */
+ private void parseElementDecl()
+ throws Exception
+ {
+ String name;
+
+ requireWhitespace();
+ // Read the element type name.
+ name = readNmtoken(true);
+
+ requireWhitespace();
+ // Read the content model.
+ parseContentspec(name);
+
+ skipWhitespace();
+ require('>');
+ }
+
+ /**
+ * Content specification.
+ * NOTE: the opening '(' and S have already been read.
+ *
+ * @param saved Buffer for entity that should have the terminal ')'
+ */
+ private void parseElements(char[] saved)
+ throws Exception
+ {
+ char c;
+ char sep;
+
+ // Parse the first content particle
+ skipWhitespace();
+ parseCp();
+
+ // Check for end or for a separator.
+ skipWhitespace();
+ c = readCh();
+ switch (c)
+ {
+ case ')':
+ // VC: Proper Group/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Group/PE nesting");
+ }
+
+ dataBufferAppend(')');
+ c = readCh();
+ switch (c)
+ {
+ case '*':
+ case '+':
+ case '?':
+ dataBufferAppend(c);
+ break;
+ default:
+ unread(c);
+ }
+ return;
+ case ',': // Register the separator.
+ case '|':
+ sep = c;
+ dataBufferAppend(c);
+ break;
+ default:
+ error("bad separator in content model", c, null);
+ return;
+ }
+
+ // Parse the rest of the content model.
+ while (true)
+ {
+ skipWhitespace();
+ parseCp();
+ skipWhitespace();
+ c = readCh();
+ if (c == ')')
+ {
+ // VC: Proper Group/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Group/PE nesting");
+ }
+
+ dataBufferAppend(')');
+ break;
+ }
+ else if (c != sep)
+ {
+ error("bad separator in content model", c, null);
+ return;
+ }
+ else
+ {
+ dataBufferAppend(c);
+ }
+ }
+
+ // Check for the occurrence indicator.
+ c = readCh();
+ switch (c)
+ {
+ case '?':
+ case '*':
+ case '+':
+ dataBufferAppend(c);
+ return;
+ default:
+ unread(c);
+ return;
+ }
+ }
+
+ /**
+ * Parse a content particle.
+ * NOTE: the '<!ATTLIST' has already been read.
+ */
+ private void parseAttlistDecl()
+ throws Exception
+ {
+ String elementName;
+
+ requireWhitespace();
+ elementName = readNmtoken(true);
+ boolean white = tryWhitespace();
+ while (!tryRead('>'))
+ {
+ if (!white)
+ {
+ error("whitespace required before attribute definition");
+ }
+ parseAttDef(elementName);
+ white = tryWhitespace();
+ }
+ }
+
+ /**
+ * Parse a single attribute definition.
+ * NOTE: the '(' has already been read.
+ */
+ private void parseEnumeration(boolean isNames)
+ throws Exception
+ {
+ dataBufferAppend('(');
+
+ // Read the first token.
+ skipWhitespace();
+ dataBufferAppend(readNmtoken(isNames));
+ // Read the remaining tokens.
+ skipWhitespace();
+ while (!tryRead(')'))
+ {
+ require('|');
+ dataBufferAppend('|');
+ skipWhitespace();
+ dataBufferAppend(readNmtoken (isNames));
+ skipWhitespace();
+ }
+ dataBufferAppend(')');
+ }
+
+ /**
+ * Parse a notation type for an attribute.
+ * NOTE: the 'NOTATION' has already been read
+ */
+ private void parseNotationType()
+ throws Exception
+ {
+ requireWhitespace();
+ require('(');
+
+ parseEnumeration(true);
+ }
+
+ /**
+ * Parse the default value for an attribute.
+ * NOTE: the '>![' has already been read.
+ */
+ private void parseConditionalSect(char[] saved)
+ throws Exception
+ {
+ skipWhitespace();
+ if (tryRead("INCLUDE"))
+ {
+ skipWhitespace();
+ require('[');
+ // VC: Proper Conditional Section/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Conditional Section/PE nesting");
+ }
+ skipWhitespace();
+ while (!tryRead("]]>"))
+ {
+ parseMarkupdecl();
+ skipWhitespace();
+ }
+ }
+ else if (tryRead("IGNORE"))
+ {
+ skipWhitespace();
+ require('[');
+ // VC: Proper Conditional Section/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Conditional Section/PE nesting");
+ }
+ int nesting = 1;
+ char c;
+ expandPE = false;
+ for (int nest = 1; nest > 0; )
+ {
+ c = readCh();
+ switch (c)
+ {
+ case '<':
+ if (tryRead("!["))
+ {
+ nest++;
+ }
+ break;
+ case ']':
+ if (tryRead("]>"))
+ {
+ nest--;
+ }
+ }
+ }
+ expandPE = true;
+ }
+ else
+ {
+ error("conditional section must begin with INCLUDE or IGNORE");
+ }
+ }
+
+ private void parseCharRef()
+ throws SAXException, IOException
+ {
+ parseCharRef(true /* do flushDataBuffer by default */);
+ }
+
+ /**
+ * Try to read a character reference without consuming data from buffer.
+ * NOTE: the '' has already been read.
+ */
+ private void tryReadCharRef()
+ throws SAXException, IOException
+ {
+ int value = 0;
+ char c;
+
+ if (tryRead('x'))
+ {
+loop1:
+ while (true)
+ {
+ c = readCh();
+ if (c == ';')
+ {
+ break loop1;
+ }
+ else
+ {
+ int n = Character.digit(c, 16);
+ if (n == -1)
+ {
+ error("illegal character in character reference", c, null);
+ break loop1;
+ }
+ value *= 16;
+ value += n;
+ }
+ }
+ }
+ else
+ {
+loop2:
+ while (true)
+ {
+ c = readCh();
+ if (c == ';')
+ {
+ break loop2;
+ }
+ else
+ {
+ int n = Character.digit(c, 10);
+ if (n == -1)
+ {
+ error("illegal character in character reference", c, null);
+ break loop2;
+ }
+ value *= 10;
+ value += n;
+ }
+ }
+ }
+
+ // check for character refs being legal XML
+ if ((value < 0x0020
+ && ! (value == '\n' || value == '\t' || value == '\r'))
+ || (value >= 0xD800 && value <= 0xDFFF)
+ || value == 0xFFFE || value == 0xFFFF
+ || value > 0x0010ffff)
+ {
+ error("illegal XML character reference U+"
+ + Integer.toHexString(value));
+ }
+
+ // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
+ // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
+ if (value > 0x0010ffff)
+ {
+ // too big for surrogate
+ error("character reference " + value + " is too large for UTF-16",
+ Integer.toString(value), null);
+ }
+
+ }
+
+ /**
+ * Read and interpret a character reference.
+ * NOTE: the '' has already been read.
+ */
+ private void parseCharRef(boolean doFlush)
+ throws SAXException, IOException
+ {
+ int value = 0;
+ char c;
+
+ if (tryRead('x'))
+ {
+loop1:
+ while (true)
+ {
+ c = readCh();
+ if (c == ';')
+ {
+ break loop1;
+ }
+ else
+ {
+ int n = Character.digit(c, 16);
+ if (n == -1)
+ {
+ error("illegal character in character reference", c, null);
+ break loop1;
+ }
+ value *= 16;
+ value += n;
+ }
+ }
+ }
+ else
+ {
+loop2:
+ while (true)
+ {
+ c = readCh();
+ if (c == ';')
+ {
+ break loop2;
+ }
+ else
+ {
+ int n = Character.digit(c, 10);
+ if (n == -1)
+ {
+ error("illegal character in character reference", c, null);
+ break loop2;
+ }
+ value *= 10;
+ value += c - '0';
+ }
+ }
+ }
+
+ // check for character refs being legal XML
+ if ((value < 0x0020
+ && ! (value == '\n' || value == '\t' || value == '\r'))
+ || (value >= 0xD800 && value <= 0xDFFF)
+ || value == 0xFFFE || value == 0xFFFF
+ || value > 0x0010ffff)
+ {
+ error("illegal XML character reference U+"
+ + Integer.toHexString(value));
+ }
+
+ // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
+ // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
+ if (value <= 0x0000ffff)
+ {
+ // no surrogates needed
+ dataBufferAppend((char) value);
+ }
+ else if (value <= 0x0010ffff)
+ {
+ value -= 0x10000;
+ // > 16 bits, surrogate needed
+ dataBufferAppend((char) (0xd800 | (value >> 10)));
+ dataBufferAppend((char) (0xdc00 | (value & 0x0003ff)));
+ }
+ else
+ {
+ // too big for surrogate
+ error("character reference " + value + " is too large for UTF-16",
+ Integer.toString(value), null);
+ }
+ if (doFlush)
+ {
+ dataBufferFlush();
+ }
+ }
+
+ /**
+ * Parse and expand an entity reference.
+ * NOTE: the '&' has already been read.
+ * @param externalAllowed External entities are allowed here.
+ */
+ private void parseEntityRef(boolean externalAllowed)
+ throws SAXException, IOException
+ {
+ String name;
+
+ name = readNmtoken(true);
+ require(';');
+ switch (getEntityType(name))
+ {
+ case ENTITY_UNDECLARED:
+ // NOTE: XML REC describes amazingly convoluted handling for
+ // this case. Nothing as meaningful as being a WFness error
+ // unless the processor might _legitimately_ not have seen a
+ // declaration ... which is what this implements.
+ String message;
+
+ message = "reference to undeclared general entity " + name;
+ if (skippedPE && !docIsStandalone)
+ {
+ handler.verror(message);
+ // we don't know this entity, and it might be external...
+ if (externalAllowed)
+ {
+ handler.skippedEntity(name);
+ }
+ }
+ else
+ {
+ error(message);
+ }
+ break;
+ case ENTITY_INTERNAL:
+ pushString(name, getEntityValue(name));
+
+ //workaround for possible input pop before marking
+ //the buffer reading position
+ char t = readCh();
+ unread(t);
+ int bufferPosMark = readBufferPos;
+
+ int end = readBufferPos + getEntityValue(name).length();
+ for (int k = readBufferPos; k < end; k++)
+ {
+ t = readCh();
+ if (t == '&')
+ {
+ t = readCh();
+ if (t == '#')
+ {
+ //try to match a character ref
+ tryReadCharRef();
+
+ //everything has been read
+ if (readBufferPos >= end)
+ {
+ break;
+ }
+ k = readBufferPos;
+ continue;
+ }
+ else if (Character.isLetter(t))
+ {
+ //looks like an entity ref
+ unread(t);
+ readNmtoken(true);
+ require(';');
+
+ //everything has been read
+ if (readBufferPos >= end)
+ {
+ break;
+ }
+ k = readBufferPos;
+ continue;
+ }
+ error(" malformed entity reference");
+ }
+
+ }
+ readBufferPos = bufferPosMark;
+ break;
+ case ENTITY_TEXT:
+ if (externalAllowed)
+ {
+ pushURL(false, name, getEntityIds(name),
+ null, null, null, true);
+ }
+ else
+ {
+ error("reference to external entity in attribute value.",
+ name, null);
+ }
+ break;
+ case ENTITY_NDATA:
+ if (externalAllowed)
+ {
+ error("unparsed entity reference in content", name, null);
+ }
+ else
+ {
+ error("reference to external entity in attribute value.",
+ name, null);
+ }
+ break;
+ default:
+ throw new RuntimeException();
+ }
+ }
+
+ /**
+ * Parse and expand a parameter entity reference.
+ * NOTE: the '%' has already been read.
+ */
+ private void parsePEReference()
+ throws SAXException, IOException
+ {
+ String name;
+
+ name = "%" + readNmtoken(true);
+ require(';');
+ switch (getEntityType(name))
+ {
+ case ENTITY_UNDECLARED:
+ // VC: Entity Declared
+ handler.verror("reference to undeclared parameter entity " + name);
+
+ // we should disable handling of all subsequent declarations
+ // unless this is a standalone document (info discarded)
+ break;
+ case ENTITY_INTERNAL:
+ if (inLiteral)
+ {
+ pushString(name, getEntityValue(name));
+ }
+ else
+ {
+ pushString(name, ' ' + getEntityValue(name) + ' ');
+ }
+ break;
+ case ENTITY_TEXT:
+ if (!inLiteral)
+ {
+ pushString(null, " ");
+ }
+ pushURL(true, name, getEntityIds(name), null, null, null, true);
+ if (!inLiteral)
+ {
+ pushString(null, " ");
+ }
+ break;
+ }
+ }
+
+ /**
+ * Parse an entity declaration.
+ * NOTE: the '<!ENTITY' has already been read.
+ */
+ private void parseEntityDecl()
+ throws Exception
+ {
+ boolean peFlag = false;
+ int flags = 0;
+
+ // Check for a parameter entity.
+ expandPE = false;
+ requireWhitespace();
+ if (tryRead('%'))
+ {
+ peFlag = true;
+ requireWhitespace();
+ }
+ expandPE = true;
+
+ // Read the entity name, and prepend
+ // '%' if necessary.
+ String name = readNmtoken(true);
+ //NE08
+ if (name.indexOf(':') >= 0)
+ {
+ error("Illegal character(':') in entity name ", name, null);
+ }
+ if (peFlag)
+ {
+ name = "%" + name;
+ }
+
+ // Read the entity value.
+ requireWhitespace();
+ char c = readCh();
+ unread (c);
+ if (c == '"' || c == '\'')
+ {
+ // Internal entity ... replacement text has expanded refs
+ // to characters and PEs, but not to general entities
+ String value = readLiteral(flags);
+ setInternalEntity(name, value);
+ }
+ else
+ {
+ // Read the external IDs
+ ExternalIdentifiers ids = readExternalIds(false, false);
+
+ // Check for NDATA declaration.
+ boolean white = tryWhitespace();
+ if (!peFlag && tryRead("NDATA"))
+ {
+ if (!white)
+ {
+ error("whitespace required before NDATA");
+ }
+ requireWhitespace();
+ String notationName = readNmtoken(true);
+ if (!skippedPE)
+ {
+ setExternalEntity(name, ENTITY_NDATA, ids, notationName);
+ handler.unparsedEntityDecl(name, ids.publicId, ids.systemId,
+ ids.baseUri, notationName);
+ }
+ }
+ else if (!skippedPE)
+ {
+ setExternalEntity(name, ENTITY_TEXT, ids, null);
+ handler.getDeclHandler()
+ .externalEntityDecl(name, ids.publicId,
+ handler.resolveURIs()
+ // FIXME: ASSUMES not skipped
+ // "false" forces error on bad URI
+ ? handler.absolutize(ids.baseUri,
+ ids.systemId,
+ false)
+ : ids.systemId);
+ }
+ }
+
+ // Finish the declaration.
+ skipWhitespace();
+ require('>');
+ }
+
+ /**
+ * Parse a notation declaration.
+ * NOTE: the '<!NOTATION' has already been read.
+ */
+ private void parseNotationDecl()
+ throws Exception
+ {
+ String nname;
+ ExternalIdentifiers ids;
+
+ requireWhitespace();
+ nname = readNmtoken(true);
+ //NE08
+ if (nname.indexOf(':') >= 0)
+ {
+ error("Illegal character(':') in notation name ", nname, null);
+ }
+ requireWhitespace();
+
+ // Read the external identifiers.
+ ids = readExternalIds(true, false);
+
+ // Register the notation.
+ setNotation(nname, ids);
+
+ skipWhitespace();
+ require('>');
+ }
+
+ /**
+ * Parse character data.
+ * Precondition: Entity expansion is not required.
+ * Precondition: data buffer has no characters that
+ * will get sent to the application.
+ */
+ private void require(String delim)
+ throws SAXException, IOException
+ {
+ int length = delim.length();
+ char[] ch;
+
+ if (length < dataBuffer.length)
+ {
+ ch = dataBuffer;
+ delim.getChars(0, length, ch, 0);
+ }
+ else
+ {
+ ch = delim.toCharArray();
+ }
+
+ if (USE_CHEATS && length <= (readBufferLength - readBufferPos))
+ {
+ int offset = readBufferPos;
+
+ for (int i = 0; i < length; i++, offset++)
+ {
+ if (ch[i] != readBuffer[offset])
+ {
+ error ("required string", null, delim);
+ }
+ }
+ readBufferPos = offset;
+
+ }
+ else
+ {
+ for (int i = 0; i < length; i++)
+ {
+ require(ch[i]);
+ }
+ }
+ }
+
+ /**
+ * Require a character to appear, or throw an exception.
+ */
+ private void require(char delim)
+ throws SAXException, IOException
+ {
+ char c = readCh();
+
+ if (c != delim)
+ {
+ error("required character", c, Character.toString(delim));
+ }
+ }
+
+ /**
+ * Create an interned string from a character array.
+ * Ælfred uses this method to create an interned version
+ * of all names and name tokens, so that it can test equality
+ * with This is much more efficient than constructing a non-interned
+ * string first, and then interning it.
+ *
+ * @param ch an array of characters for building the string.
+ * @param start the starting position in the array.
+ * @param length the number of characters to place in the string.
+ * @return an interned string.
+ * @see #intern (String)
+ * @see java.lang.String#intern
+ */
+ public String intern(char[] ch, int start, int length)
+ {
+ int index = 0;
+ int hash = 0;
+ Object[] bucket;
+
+ // Generate a hash code. This is a widely used string hash,
+ // often attributed to Brian Kernighan.
+ for (int i = start; i < start + length; i++)
+ {
+ hash = 31 * hash + ch[i];
+ }
+ hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH;
+
+ // Get the bucket -- consists of {array,String} pairs
+ if ((bucket = symbolTable[hash]) == null)
+ {
+ // first string in this bucket
+ bucket = new Object[8];
+
+ // Search for a matching tuple, and
+ // return the string if we find one.
+ }
+ else
+ {
+ while (index < bucket.length)
+ {
+ char[] chFound = (char[]) bucket[index];
+
+ // Stop when we hit an empty entry.
+ if (chFound == null)
+ {
+ break;
+ }
+
+ // If they're the same length, check for a match.
+ if (chFound.length == length)
+ {
+ for (int i = 0; i < chFound.length; i++)
+ {
+ // continue search on failure
+ if (ch[start + i] != chFound[i])
+ {
+ break;
+ }
+ else if (i == length - 1)
+ {
+ // That's it, we have a match!
+ return (String) bucket[index + 1];
+ }
+ }
+ }
+ index += 2;
+ }
+ // Not found -- we'll have to add it.
+
+ // Do we have to grow the bucket?
+ bucket = (Object[]) extendArray(bucket, bucket.length, index);
+ }
+ symbolTable[hash] = bucket;
+
+ // OK, add it to the end of the bucket -- "local" interning.
+ // Intern "globally" to let applications share interning benefits.
+ // That is, "!=" and "==" work on our strings, not just equals().
+ String s = new String(ch, start, length).intern();
+ bucket[index] = s.toCharArray();
+ bucket[index + 1] = s;
+ return s;
+ }
+
+ /**
+ * Ensure the capacity of an array, allocating a new one if
+ * necessary. Usually extends only for name hash collisions.
+ */
+ private Object extendArray(Object array, int currentSize, int requiredSize)
+ {
+ if (requiredSize < currentSize)
+ {
+ return array;
+ }
+ else
+ {
+ Object newArray = null;
+ int newSize = currentSize * 2;
+
+ if (newSize <= requiredSize)
+ {
+ newSize = requiredSize + 1;
+ }
+
+ if (array instanceof char[])
+ {
+ newArray = new char[newSize];
+ }
+ else if (array instanceof Object[])
+ {
+ newArray = new Object[newSize];
+ }
+ else
+ {
+ throw new RuntimeException();
+ }
+
+ System.arraycopy(array, 0, newArray, 0, currentSize);
+ return newArray;
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // XML query routines.
+ //////////////////////////////////////////////////////////////////////
+
+ boolean isStandalone()
+ {
+ return docIsStandalone;
+ }
+
+ //
+ // Elements
+ //
+
+ private int getContentType(ElementDecl element, int defaultType)
+ {
+ int retval;
+
+ if (element == null)
+ {
+ return defaultType;
+ }
+ retval = element.contentType;
+ if (retval == CONTENT_UNDECLARED)
+ {
+ retval = defaultType;
+ }
+ return retval;
+ }
+
+ /**
+ * Look up the content type of an element.
+ * @param name The element type name.
+ * @return An integer constant representing the content type.
+ * @see #CONTENT_UNDECLARED
+ * @see #CONTENT_ANY
+ * @see #CONTENT_EMPTY
+ * @see #CONTENT_MIXED
+ * @see #CONTENT_ELEMENTS
+ */
+ public int getElementContentType(String name)
+ {
+ ElementDecl element = (ElementDecl) elementInfo.get(name);
+ return getContentType(element, CONTENT_UNDECLARED);
+ }
+
+ /**
+ * Register an element.
+ * Array format:
+ * [0] element type name
+ * [1] content model (mixed, elements only)
+ * [2] attribute hash table
+ */
+ private void setElement(String name, int contentType,
+ String contentModel, HashMap attributes)
+ throws SAXException
+ {
+ if (skippedPE)
+ {
+ return;
+ }
+
+ ElementDecl element = (ElementDecl) elementInfo.get(name);
+
+ // first or for this type?
+ if (element == null)
+ {
+ element = new ElementDecl();
+ element.contentType = contentType;
+ element.contentModel = contentModel;
+ element.attributes = attributes;
+ elementInfo.put(name, element);
+ return;
+ }
+
+ // declaration?
+ if (contentType != CONTENT_UNDECLARED)
+ {
+ // ... following an associated
+ if (element.contentType == CONTENT_UNDECLARED)
+ {
+ element.contentType = contentType;
+ element.contentModel = contentModel;
+ }
+ else
+ {
+ // VC: Unique Element Type Declaration
+ handler.verror("multiple declarations for element type: "
+ + name);
+ }
+ }
+
+ // first , before ?
+ else if (attributes != null)
+ {
+ element.attributes = attributes;
+ }
+ }
+
+ /**
+ * Look up the attribute hash table for an element.
+ * The hash table is the second item in the element array.
+ */
+ private HashMap getElementAttributes(String name)
+ {
+ ElementDecl element = (ElementDecl) elementInfo.get(name);
+ return (element == null) ? null : element.attributes;
+ }
+
+ //
+ // Attributes
+ //
+
+ /**
+ * Get the declared attributes for an element type.
+ * @param elname The name of the element type.
+ * @return An iterator over all the attributes declared for
+ * a specific element type. The results will be valid only
+ * after the DTD (if any) has been parsed.
+ * @see #getAttributeType
+ * @see #getAttributeEnumeration
+ * @see #getAttributeDefaultValueType
+ * @see #getAttributeDefaultValue
+ * @see #getAttributeExpandedValue
+ */
+ private Iterator declaredAttributes(ElementDecl element)
+ {
+ HashMap attlist;
+
+ if (element == null)
+ {
+ return null;
+ }
+ if ((attlist = element.attributes) == null)
+ {
+ return null;
+ }
+ return attlist.keySet().iterator();
+ }
+
+ /**
+ * Get the declared attributes for an element type.
+ * @param elname The name of the element type.
+ * @return An iterator over all the attributes declared for
+ * a specific element type. The results will be valid only
+ * after the DTD (if any) has been parsed.
+ * @see #getAttributeType
+ * @see #getAttributeEnumeration
+ * @see #getAttributeDefaultValueType
+ * @see #getAttributeDefaultValue
+ * @see #getAttributeExpandedValue
+ */
+ public Iterator declaredAttributes(String elname)
+ {
+ return declaredAttributes((ElementDecl) elementInfo.get(elname));
+ }
+
+ /**
+ * Retrieve the declared type of an attribute.
+ * @param name The name of the associated element.
+ * @param aname The name of the attribute.
+ * @return An interend string denoting the type, or null
+ * indicating an undeclared attribute.
+ */
+ public String getAttributeType(String name, String aname)
+ {
+ AttributeDecl attribute = getAttribute(name, aname);
+ return (attribute == null) ? null : attribute.type;
+ }
+
+ /**
+ * Retrieve the allowed values for an enumerated attribute type.
+ * @param name The name of the associated element.
+ * @param aname The name of the attribute.
+ * @return A string containing the token list.
+ */
+ public String getAttributeEnumeration(String name, String aname)
+ {
+ AttributeDecl attribute = getAttribute(name, aname);
+ // assert: attribute.enumeration is "ENUMERATION" or "NOTATION"
+ return (attribute == null) ? null : attribute.enumeration;
+ }
+
+ /**
+ * Retrieve the default value of a declared attribute.
+ * @param name The name of the associated element.
+ * @param aname The name of the attribute.
+ * @return The default value, or null if the attribute was
+ * #IMPLIED or simply undeclared and unspecified.
+ * @see #getAttributeExpandedValue
+ */
+ public String getAttributeDefaultValue(String name, String aname)
+ {
+ AttributeDecl attribute = getAttribute(name, aname);
+ return (attribute == null) ? null : attribute.value;
+ }
+
+ /*
+
+// FIXME: Leaving this in, until W3C finally resolves the confusion
+// between parts of the XML 2nd REC about when entity declararations
+// are guaranteed to be known. Current code matches what section 5.1
+// (conformance) describes, but some readings of the self-contradicting
+// text in 4.1 (the "Entity Declared" WFC and VC) seem to expect that
+// attribute expansion/normalization must be deferred in some cases
+// (just TRY to identify them!).
+
+ * Retrieve the expanded value of a declared attribute.
+ * General entities (and char refs) will be expanded (once).
+ * @param name The name of the associated element.
+ * @param aname The name of the attribute.
+ * @return The expanded default value, or null if the attribute was
+ * #IMPLIED or simply undeclared
+ * @see #getAttributeDefaultValue
+ public String getAttributeExpandedValue (String name, String aname)
+ throws Exception
+ {
+ AttributeDecl attribute = getAttribute (name, aname);
+
+ if (attribute == null) {
+ return null;
+ } else if (attribute.defaultValue == null && attribute.value != null) {
+ // we MUST use the same buf for both quotes else the literal
+ // can't be properly terminated
+ char buf [] = new char [1];
+ int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE;
+ String type = getAttributeType (name, aname);
+
+ if (type != "CDATA" && type != null)
+ flags |= LIT_NORMALIZE;
+ buf [0] = '"';
+ pushCharArray (null, buf, 0, 1);
+ pushString (null, attribute.value);
+ pushCharArray (null, buf, 0, 1);
+ attribute.defaultValue = readLiteral (flags);
+ }
+ return attribute.defaultValue;
+ }
+ */
+
+ /**
+ * Retrieve the default value mode of a declared attribute.
+ * @see #ATTRIBUTE_DEFAULT_SPECIFIED
+ * @see #ATTRIBUTE_DEFAULT_IMPLIED
+ * @see #ATTRIBUTE_DEFAULT_REQUIRED
+ * @see #ATTRIBUTE_DEFAULT_FIXED
+ */
+ public int getAttributeDefaultValueType(String name, String aname)
+ {
+ AttributeDecl attribute = getAttribute(name, aname);
+ return (attribute == null) ? ATTRIBUTE_DEFAULT_UNDECLARED :
+ attribute.valueType;
+ }
+
+ /**
+ * Register an attribute declaration for later retrieval.
+ * Format:
+ * - String type
+ * - String default value
+ * - int value type
+ * - enumeration
+ * - processed default value
+ */
+ private void setAttribute(String elName, String name, String type,
+ String enumeration, String value, int valueType)
+ throws Exception
+ {
+ HashMap attlist;
+
+ if (skippedPE)
+ {
+ return;
+ }
+
+ // Create a new hashtable if necessary.
+ attlist = getElementAttributes(elName);
+ if (attlist == null)
+ {
+ attlist = new HashMap();
+ }
+
+ // ignore multiple attribute declarations!
+ if (attlist.get(name) != null)
+ {
+ // warn ...
+ return;
+ }
+ else
+ {
+ AttributeDecl attribute = new AttributeDecl();
+ attribute.type = type;
+ attribute.value = value;
+ attribute.valueType = valueType;
+ attribute.enumeration = enumeration;
+ attlist.put(name, attribute);
+
+ // save; but don't overwrite any existing
+ setElement(elName, CONTENT_UNDECLARED, null, attlist);
+ }
+ }
+
+ /**
+ * Retrieve the attribute declaration for the given element name and name.
+ */
+ private AttributeDecl getAttribute(String elName, String name)
+ {
+ HashMap attlist = getElementAttributes(elName);
+ return (attlist == null) ? null : (AttributeDecl) attlist.get(name);
+ }
+
+ //
+ // Entities
+ //
+
+ /**
+ * Find the type of an entity.
+ * @returns An integer constant representing the entity type.
+ * @see #ENTITY_UNDECLARED
+ * @see #ENTITY_INTERNAL
+ * @see #ENTITY_NDATA
+ * @see #ENTITY_TEXT
+ */
+ public int getEntityType(String ename)
+ {
+ EntityInfo entity = (EntityInfo) entityInfo.get(ename);
+ return (entity == null) ? ENTITY_UNDECLARED : entity.type;
+ }
+
+ /**
+ * Return an external entity's identifiers.
+ * @param ename The name of the external entity.
+ * @return The entity's public identifier, system identifier, and base URI.
+ * Null if the entity was not declared as an external entity.
+ * @see #getEntityType
+ */
+ public ExternalIdentifiers getEntityIds(String ename)
+ {
+ EntityInfo entity = (EntityInfo) entityInfo.get(ename);
+ return (entity == null) ? null : entity.ids;
+ }
+
+ /**
+ * Return an internal entity's replacement text.
+ * @param ename The name of the internal entity.
+ * @return The entity's replacement text, or null if
+ * the entity was not declared as an internal entity.
+ * @see #getEntityType
+ */
+ public String getEntityValue(String ename)
+ {
+ EntityInfo entity = (EntityInfo) entityInfo.get(ename);
+ return (entity == null) ? null : entity.value;
+ }
+
+ /**
+ * Register an entity declaration for later retrieval.
+ */
+ private void setInternalEntity(String eName, String value)
+ throws SAXException
+ {
+ if (skippedPE)
+ {
+ return;
+ }
+
+ if (entityInfo.get(eName) == null)
+ {
+ EntityInfo entity = new EntityInfo();
+ entity.type = ENTITY_INTERNAL;
+ entity.value = value;
+ entityInfo.put(eName, entity);
+ }
+ if (handler.stringInterning)
+ {
+ if ("lt" == eName || "gt" == eName || "quot" == eName
+ || "apos" == eName || "amp" == eName)
+ {
+ return;
+ }
+ }
+ else
+ {
+ if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName)
+ || "apos".equals(eName) || "amp".equals(eName))
+ {
+ return;
+ }
+ }
+ handler.getDeclHandler().internalEntityDecl(eName, value);
+ }
+
+ /**
+ * Register an external entity declaration for later retrieval.
+ */
+ private void setExternalEntity(String eName, int eClass,
+ ExternalIdentifiers ids, String nName)
+ {
+ if (entityInfo.get(eName) == null)
+ {
+ EntityInfo entity = new EntityInfo();
+ entity.type = eClass;
+ entity.ids = ids;
+ entity.notationName = nName;
+ entityInfo.put(eName, entity);
+ }
+ }
+
+ //
+ // Notations.
+ //
+
+ /**
+ * Report a notation declaration, checking for duplicates.
+ */
+ private void setNotation(String nname, ExternalIdentifiers ids)
+ throws SAXException
+ {
+ if (skippedPE)
+ {
+ return;
+ }
+
+ handler.notationDecl(nname, ids.publicId, ids.systemId, ids.baseUri);
+ if (notationInfo.get(nname) == null)
+ {
+ notationInfo.put(nname, nname);
+ }
+ else
+ {
+ // VC: Unique Notation Name
+ handler.verror("Duplicate notation name decl: " + nname);
+ }
+ }
+
+ //
+ // Location.
+ //
+
+ /**
+ * Return the current line number.
+ */
+ public int getLineNumber()
+ {
+ return line;
+ }
+
+ /**
+ * Return the current column number.
+ */
+ public int getColumnNumber()
+ {
+ return column;
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // High-level I/O.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Read a single character from the readBuffer.
+ * The readDataChunk () method maintains the buffer.
+ * If we hit the end of an entity, try to pop the stack and
+ * keep going.
+ * (This approach doesn't really enforce XML's rules about
+ * entity boundaries, but this is not currently a validating
+ * parser).
+ * This routine also attempts to keep track of the current
+ * position in external entities, but it's not entirely accurate.
+ * @return The next available input character.
+ * @see #unread (char)
+ * @see #readDataChunk
+ * @see #readBuffer
+ * @see #line
+ * @return The next character from the current input source.
+ */
+ private char readCh()
+ throws SAXException, IOException
+ {
+ // As long as there's nothing in the
+ // read buffer, try reading more data
+ // (for an external entity) or popping
+ // the entity stack (for either).
+ while (readBufferPos >= readBufferLength)
+ {
+ switch (sourceType)
+ {
+ case INPUT_READER:
+ case INPUT_STREAM:
+ readDataChunk();
+ while (readBufferLength < 1)
+ {
+ popInput();
+ if (readBufferLength < 1)
+ {
+ readDataChunk();
+ }
+ }
+ break;
+
+ default:
+
+ popInput();
+ break;
+ }
+ }
+
+ char c = readBuffer[readBufferPos++];
+
+ if (c == '\n')
+ {
+ line++;
+ column = 0;
+ }
+ else
+ {
+ if (c == '<')
+ {
+ /* the most common return to parseContent () ... NOP */
+ }
+ else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
+ || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
+ && xmlVersion == XML_11))
+ {
+ error("illegal XML character U+" + Integer.toHexString(c));
+ }
+
+ // If we're in the DTD and in a context where PEs get expanded,
+ // do so ... 1/14/2000 errata identify those contexts. There
+ // are also spots in the internal subset where PE refs are fatal
+ // errors, hence yet another flag.
+ else if (c == '%' && expandPE)
+ {
+ if (peIsError)
+ {
+ error("PE reference within decl in internal subset.");
+ }
+ parsePEReference();
+ return readCh();
+ }
+ column++;
+ }
+
+ return c;
+ }
+
+ /**
+ * Push a single character back onto the current input stream.
+ * This method usually pushes the character back onto
+ * the readBuffer.
+ * I don't think that this would ever be called with
+ * readBufferPos = 0, because the methods always reads a character
+ * before unreading it, but just in case, I've added a boundary
+ * condition.
+ * @param c The character to push back.
+ * @see #readCh
+ * @see #unread (char[])
+ * @see #readBuffer
+ */
+ private void unread(char c)
+ throws SAXException
+ {
+ // Normal condition.
+ if (c == '\n')
+ {
+ line--;
+ column = -1;
+ }
+ if (readBufferPos > 0)
+ {
+ readBuffer[--readBufferPos] = c;
+ }
+ else
+ {
+ pushString(null, Character.toString(c));
+ }
+ }
+
+ /**
+ * Push a char array back onto the current input stream.
+ * NOTE: you must never push back characters that you
+ * haven't actually read: use pushString () instead.
+ * @see #readCh
+ * @see #unread (char)
+ * @see #readBuffer
+ * @see #pushString
+ */
+ private void unread(char[] ch, int length)
+ throws SAXException
+ {
+ for (int i = 0; i < length; i++)
+ {
+ if (ch[i] == '\n')
+ {
+ line--;
+ column = -1;
+ }
+ }
+ if (length < readBufferPos)
+ {
+ readBufferPos -= length;
+ }
+ else
+ {
+ pushCharArray(null, ch, 0, length);
+ }
+ }
+
+ /**
+ * Push, or skip, a new external input source.
+ * The source will be some kind of parsed entity, such as a PE
+ * (including the external DTD subset) or content for the body.
+ *
+ * @param url The java.net.URL object for the entity.
+ * @see SAXDriver#resolveEntity
+ * @see #pushString
+ * @see #sourceType
+ * @see #pushInput
+ * @see #detectEncoding
+ * @see #sourceType
+ * @see #readBuffer
+ */
+ private void pushURL(boolean isPE,
+ String ename,
+ ExternalIdentifiers ids,
+ Reader reader,
+ InputStream stream,
+ String encoding,
+ boolean doResolve)
+ throws SAXException, IOException
+ {
+ boolean ignoreEncoding;
+ String systemId;
+ InputSource source;
+
+ if (!isPE)
+ {
+ dataBufferFlush();
+ }
+
+ scratch.setPublicId(ids.publicId);
+ scratch.setSystemId(ids.systemId);
+
+ // See if we should skip or substitute the entity.
+ // If we're not skipping, resolving reports startEntity()
+ // and updates the (handler's) stack of URIs.
+ if (doResolve)
+ {
+ // assert (stream == null && reader == null && encoding == null)
+ source = handler.resolveEntity(isPE, ename, scratch, ids.baseUri);
+ if (source == null)
+ {
+ handler.warn("skipping entity: " + ename);
+ handler.skippedEntity(ename);
+ if (isPE)
+ {
+ skippedPE = true;
+ }
+ return;
+ }
+
+ // we might be using alternate IDs/encoding
+ systemId = source.getSystemId();
+ // The following warning and setting systemId was deleted bcause
+ // the application has the option of not setting systemId
+ // provided that it has set the characte/byte stream.
+ /*
+ if (systemId == null) {
+ handler.warn ("missing system ID, using " + ids.systemId);
+ systemId = ids.systemId;
+ }
+ */
+ }
+ else
+ {
+ // "[document]", or "[dtd]" via getExternalSubset()
+ scratch.setCharacterStream(reader);
+ scratch.setByteStream(stream);
+ scratch.setEncoding(encoding);
+ source = scratch;
+ systemId = ids.systemId;
+ if (handler.stringInterning)
+ {
+ handler.startExternalEntity(ename, systemId,
+ "[document]" == ename);
+ }
+ else
+ {
+ handler.startExternalEntity(ename, systemId,
+ "[document]".equals(ename));
+ }
+ }
+
+ // we may have been given I/O streams directly
+ if (source.getCharacterStream() != null)
+ {
+ if (source.getByteStream() != null)
+ error("InputSource has two streams!");
+ reader = source.getCharacterStream();
+ }
+ else if (source.getByteStream() != null)
+ {
+ encoding = source.getEncoding();
+ if (encoding == null)
+ {
+ stream = source.getByteStream();
+ }
+ else
+ {
+ try
+ {
+ reader = new InputStreamReader(source.getByteStream(),
+ encoding);
+ }
+ catch (IOException e)
+ {
+ stream = source.getByteStream();
+ }
+ }
+ }
+ else if (systemId == null)
+ {
+ error("InputSource has no URI!");
+ }
+ scratch.setCharacterStream(null);
+ scratch.setByteStream(null);
+ scratch.setEncoding(null);
+
+ // Push the existing status.
+ pushInput(ename);
+
+ // Create a new read buffer.
+ // (Note the four-character margin)
+ readBuffer = new char[READ_BUFFER_MAX + 4];
+ readBufferPos = 0;
+ readBufferLength = 0;
+ readBufferOverflow = -1;
+ is = null;
+ line = 1;
+ column = 0;
+ currentByteCount = 0;
+
+ // If there's an explicit character stream, just
+ // ignore encoding declarations.
+ if (reader != null)
+ {
+ sourceType = INPUT_READER;
+ this.reader = reader;
+ tryEncodingDecl(true);
+ return;
+ }
+
+ // Else we handle the conversion, and need to ensure
+ // it's done right.
+ sourceType = INPUT_STREAM;
+ if (stream != null)
+ {
+ is = stream;
+ }
+ else
+ {
+ // We have to open our own stream to the URL.
+ URL url = new URL(systemId);
+
+ externalEntity = url.openConnection();
+ externalEntity.connect();
+ is = externalEntity.getInputStream();
+ }
+
+ // If we get to here, there must be
+ // an InputStream available.
+ if (!is.markSupported())
+ {
+ is = new BufferedInputStream(is);
+ }
+
+ // Get any external encoding label.
+ if (encoding == null && externalEntity != null)
+ {
+ // External labels can be untrustworthy; filesystems in
+ // particular often have the wrong default for content
+ // that wasn't locally originated. Those we autodetect.
+ if (!"file".equals(externalEntity.getURL().getProtocol()))
+ {
+ int temp;
+
+ // application/xml;charset=something;otherAttr=...
+ // ... with many variants on 'something'
+ encoding = externalEntity.getContentType();
+
+ // MHK code (fix for Saxon 5.5.1/007):
+ // protect against encoding==null
+ if (encoding == null)
+ {
+ temp = -1;
+ }
+ else
+ {
+ temp = encoding.indexOf("charset");
+ }
+
+ // RFC 2376 sez MIME text defaults to ASCII, but since the
+ // JDK will create a MIME type out of thin air, we always
+ // autodetect when there's no explicit charset attribute.
+ if (temp < 0)
+ {
+ encoding = null; // autodetect
+ }
+ else
+ {
+ // only this one attribute
+ if ((temp = encoding.indexOf(';')) > 0)
+ {
+ encoding = encoding.substring(0, temp);
+ }
+
+ if ((temp = encoding.indexOf('=', temp + 7)) > 0)
+ {
+ encoding = encoding.substring(temp + 1);
+
+ // attributes can have comment fields (RFC 822)
+ if ((temp = encoding.indexOf('(')) > 0)
+ {
+ encoding = encoding.substring(0, temp);
+ }
+ // ... and values may be quoted
+ if ((temp = encoding.indexOf('"')) > 0)
+ {
+ encoding =
+ encoding.substring(temp + 1,
+ encoding.indexOf('"', temp + 2));
+ }
+ encoding = encoding.trim();
+ }
+ else
+ {
+ handler.warn("ignoring illegal MIME attribute: "
+ + encoding);
+ encoding = null;
+ }
+ }
+ }
+ }
+
+ // if we got an external encoding label, use it ...
+ if (encoding != null)
+ {
+ this.encoding = ENCODING_EXTERNAL;
+ setupDecoding(encoding);
+ ignoreEncoding = true;
+
+ // ... else autodetect from first bytes.
+ }
+ else
+ {
+ detectEncoding();
+ ignoreEncoding = false;
+ }
+
+ // Read any XML or text declaration.
+ // If we autodetected, it may tell us the "real" encoding.
+ try
+ {
+ tryEncodingDecl(ignoreEncoding);
+ }
+ catch (UnsupportedEncodingException x)
+ {
+ encoding = x.getMessage();
+
+ // if we don't handle the declared encoding,
+ // try letting a JVM InputStreamReader do it
+ try
+ {
+ if (sourceType != INPUT_STREAM)
+ {
+ throw x;
+ }
+
+ is.reset();
+ readBufferPos = 0;
+ readBufferLength = 0;
+ readBufferOverflow = -1;
+ line = 1;
+ currentByteCount = column = 0;
+
+ sourceType = INPUT_READER;
+ this.reader = new InputStreamReader(is, encoding);
+ is = null;
+
+ tryEncodingDecl(true);
+
+ }
+ catch (IOException e)
+ {
+ error("unsupported text encoding",
+ encoding,
+ null);
+ }
+ }
+ }
+
+ /**
+ * Check for an encoding declaration. This is the second part of the
+ * XML encoding autodetection algorithm, relying on detectEncoding to
+ * get to the point that this part can read any encoding declaration
+ * in the document (using only US-ASCII characters).
+ *
+ * Because this part starts to fill parser buffers with this data,
+ * it's tricky to setup a reader so that Java's built-in decoders can be
+ * used for the character encodings that aren't built in to this parser
+ * (such as EUC-JP, KOI8-R, Big5, etc).
+ *
+ * @return any encoding in the declaration, uppercased; or null
+ * @see detectEncoding
+ */
+ private String tryEncodingDecl(boolean ignoreEncoding)
+ throws SAXException, IOException
+ {
+ // Read the XML/text declaration.
+ if (tryRead(" 0)
+ {
+ return parseTextDecl(ignoreEncoding);
+ }
+ else
+ {
+ return parseXMLDecl(ignoreEncoding);
+ }
+ }
+ else
+ {
+ // or similar
+ unread('l');
+ unread('m');
+ unread('x');
+ unread('?');
+ unread('<');
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Attempt to detect the encoding of an entity.
+ * The trick here (as suggested in the XML standard) is that
+ * any entity not in UTF-8, or in UCS-2 with a byte-order mark,
+ * must begin with an XML declaration or an encoding
+ * declaration; we simply have to look for "<?xml" in various
+ * encodings.
+ * This method has no way to distinguish among 8-bit encodings.
+ * Instead, it sets up for UTF-8, then (possibly) revises its assumption
+ * later in setupDecoding (). Any ASCII-derived 8-bit encoding
+ * should work, but most will be rejected later by setupDecoding ().
+ * @see #tryEncoding (byte[], byte, byte, byte, byte)
+ * @see #tryEncoding (byte[], byte, byte)
+ * @see #setupDecoding
+ */
+ private void detectEncoding()
+ throws SAXException, IOException
+ {
+ byte[] signature = new byte[4];
+
+ // Read the first four bytes for
+ // autodetection.
+ is.mark(4);
+ is.read(signature);
+ is.reset();
+
+ //
+ // FIRST: four byte encodings (who uses these?)
+ //
+ if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
+ (byte) 0x00, (byte) 0x3c))
+ {
+ // UCS-4 must begin with "Utility routine for detectEncoding ().
+ * Always looks for some part of "Looks for a UCS-2 byte-order mark.
+ * Utility routine for detectEncoding ().
+ * @param sig The first four bytes read.
+ * @param b1 The first byte of the signature
+ * @param b2 The second byte of the signature
+ * @see #detectEncoding
+ */
+ private static boolean tryEncoding(byte[] sig, byte b1, byte b2)
+ {
+ return ((sig[0] == b1) && (sig[1] == b2));
+ }
+
+ /**
+ * This method pushes a string back onto input.
+ * It is useful either as the expansion of an internal entity,
+ * or for backtracking during the parse.
+ * Call pushCharArray () to do the actual work.
+ * @param s The string to push back onto input.
+ * @see #pushCharArray
+ */
+ private void pushString(String ename, String s)
+ throws SAXException
+ {
+ char[] ch = s.toCharArray();
+ pushCharArray(ename, ch, 0, ch.length);
+ }
+
+ /**
+ * Push a new internal input source.
+ * This method is useful for expanding an internal entity,
+ * or for unreading a string of characters. It creates a new
+ * readBuffer containing the characters in the array, instead
+ * of characters converted from an input byte stream.
+ * @param ch The char array to push.
+ * @see #pushString
+ * @see #pushURL
+ * @see #readBuffer
+ * @see #sourceType
+ * @see #pushInput
+ */
+ private void pushCharArray(String ename, char[] ch, int start, int length)
+ throws SAXException
+ {
+ // Push the existing status
+ pushInput(ename);
+ if (ename != null && doReport)
+ {
+ dataBufferFlush();
+ handler.startInternalEntity(ename);
+ }
+ sourceType = INPUT_INTERNAL;
+ readBuffer = ch;
+ readBufferPos = start;
+ readBufferLength = length;
+ readBufferOverflow = -1;
+ }
+
+ /**
+ * Save the current input source onto the stack.
+ * This method saves all of the global variables associated with
+ * the current input source, so that they can be restored when a new
+ * input source has finished. It also tests for entity recursion.
+ * The method saves the following global variables onto a stack
+ * using a fixed-length array:
+ * This method restores all of the global variables associated with
+ * the current input source.
+ * @exception java.io.EOFException
+ * If there are no more entries on the input stack.
+ * @see #pushInput
+ * @see #sourceType
+ * @see #externalEntity
+ * @see #readBuffer
+ * @see #readBufferPos
+ * @see #readBufferLength
+ * @see #line
+ * @see #encoding
+ */
+ private void popInput()
+ throws SAXException, IOException
+ {
+ String ename = (String) entityStack.removeLast();
+
+ if (ename != null && doReport)
+ {
+ dataBufferFlush();
+ }
+ switch (sourceType)
+ {
+ case INPUT_STREAM:
+ handler.endExternalEntity(ename);
+ is.close();
+ break;
+ case INPUT_READER:
+ handler.endExternalEntity(ename);
+ reader.close();
+ break;
+ case INPUT_INTERNAL:
+ if (ename != null && doReport)
+ {
+ handler.endInternalEntity(ename);
+ }
+ break;
+ }
+
+ // Throw an EOFException if there
+ // is nothing else to pop.
+ if (inputStack.isEmpty())
+ {
+ throw new EOFException("no more input");
+ }
+
+ Input input = (Input) inputStack.removeLast();
+
+ sourceType = input.sourceType;
+ externalEntity = input.externalEntity;
+ readBuffer = input.readBuffer;
+ readBufferPos = input.readBufferPos;
+ readBufferLength = input.readBufferLength;
+ line = input.line;
+ encoding = input.encoding;
+ readBufferOverflow = input.readBufferOverflow;
+ is = input.is;
+ currentByteCount = input.currentByteCount;
+ column = input.column;
+ reader = input.reader;
+ }
+
+ /**
+ * Return true if we can read the expected character.
+ * Note that the character will be removed from the input stream
+ * on success, but will be put back on failure. Do not attempt to
+ * read the character again if the method succeeds.
+ * @param delim The character that should appear next. For a
+ * insensitive match, you must supply this in upper-case.
+ * @return true if the character was successfully read, or false if
+ * it was not.
+ * @see #tryRead (String)
+ */
+ private boolean tryRead(char delim)
+ throws SAXException, IOException
+ {
+ char c;
+
+ // Read the character
+ c = readCh();
+
+ // Test for a match, and push the character
+ // back if the match fails.
+ if (c == delim)
+ {
+ return true;
+ }
+ else
+ {
+ unread(c);
+ return false;
+ }
+ }
+
+ /**
+ * Return true if we can read the expected string.
+ * This is simply a convenience method.
+ * Note that the string will be removed from the input stream
+ * on success, but will be put back on failure. Do not attempt to
+ * read the string again if the method succeeds.
+ * This method will push back a character rather than an
+ * array whenever possible (probably the majority of cases).
+ * @param delim The string that should appear next.
+ * @return true if the string was successfully read, or false if
+ * it was not.
+ * @see #tryRead (char)
+ */
+ private boolean tryRead(String delim)
+ throws SAXException, IOException
+ {
+ return tryRead(delim.toCharArray());
+ }
+
+ private boolean tryRead(char[] ch)
+ throws SAXException, IOException
+ {
+ char c;
+
+ // Compare the input, character-
+ // by character.
+
+ for (int i = 0; i < ch.length; i++)
+ {
+ c = readCh();
+ if (c != ch[i])
+ {
+ unread(c);
+ if (i != 0)
+ {
+ unread(ch, i);
+ }
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Return true if we can read some whitespace.
+ * This is simply a convenience method.
+ * This method will push back a character rather than an
+ * array whenever possible (probably the majority of cases).
+ * @return true if whitespace was found.
+ */
+ private boolean tryWhitespace()
+ throws SAXException, IOException
+ {
+ char c;
+ c = readCh();
+ if (isWhitespace(c))
+ {
+ skipWhitespace();
+ return true;
+ }
+ else
+ {
+ unread(c);
+ return false;
+ }
+ }
+
+ /**
+ * Read all data until we find the specified string.
+ * This is useful for scanning CDATA sections and PIs.
+ * This is inefficient right now, since it calls tryRead ()
+ * for every character.
+ * @param delim The string delimiter
+ * @see #tryRead (String, boolean)
+ * @see #readCh
+ */
+ private void parseUntil(String delim)
+ throws SAXException, IOException
+ {
+ parseUntil(delim.toCharArray());
+ }
+
+ private void parseUntil(char[] delim)
+ throws SAXException, IOException
+ {
+ char c;
+ int startLine = line;
+
+ try
+ {
+ while (!tryRead(delim))
+ {
+ c = readCh();
+ dataBufferAppend(c);
+ }
+ }
+ catch (EOFException e)
+ {
+ error("end of input while looking for delimiter "
+ + "(started on line " + startLine
+ + ')', null, new String(delim));
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // Low-level I/O.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Prefetch US-ASCII XML/text decl from input stream into read buffer.
+ * Doesn't buffer more than absolutely needed, so that when an encoding
+ * decl says we need to create an InputStreamReader, we can discard our
+ * buffer and reset(). Caller knows the first chars of the decl exist
+ * in the input stream.
+ */
+ private void prefetchASCIIEncodingDecl()
+ throws SAXException, IOException
+ {
+ int ch;
+ readBufferPos = readBufferLength = 0;
+
+ is.mark(readBuffer.length);
+ while (true)
+ {
+ ch = is.read();
+ readBuffer[readBufferLength++] = (char) ch;
+ switch (ch)
+ {
+ case (int) '>':
+ return;
+ case -1:
+ error("file ends before end of XML or encoding declaration.",
+ null, "?>");
+ }
+ if (readBuffer.length == readBufferLength)
+ {
+ error("unfinished XML or encoding declaration");
+ }
+ }
+ }
+
+ /**
+ * Read a chunk of data from an external input source.
+ * This is simply a front-end that fills the rawReadBuffer
+ * with bytes, then calls the appropriate encoding handler.
+ * @see #encoding
+ * @see #rawReadBuffer
+ * @see #readBuffer
+ * @see #filterCR
+ * @see #copyUtf8ReadBuffer
+ * @see #copyIso8859_1ReadBuffer
+ * @see #copyUcs_2ReadBuffer
+ * @see #copyUcs_4ReadBuffer
+ */
+ private void readDataChunk()
+ throws SAXException, IOException
+ {
+ int count;
+
+ // See if we have any overflow (filterCR sets for CR at end)
+ if (readBufferOverflow > -1)
+ {
+ readBuffer[0] = (char) readBufferOverflow;
+ readBufferOverflow = -1;
+ readBufferPos = 1;
+ sawCR = true;
+ }
+ else
+ {
+ readBufferPos = 0;
+ sawCR = false;
+ }
+
+ // input from a character stream.
+ if (sourceType == INPUT_READER)
+ {
+ count = reader.read(readBuffer,
+ readBufferPos, READ_BUFFER_MAX - readBufferPos);
+ if (count < 0)
+ {
+ readBufferLength = readBufferPos;
+ }
+ else
+ {
+ readBufferLength = readBufferPos + count;
+ }
+ if (readBufferLength > 0)
+ {
+ filterCR(count >= 0);
+ }
+ sawCR = false;
+ return;
+ }
+
+ // Read as many bytes as possible into the raw buffer.
+ count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX);
+
+ // Dispatch to an encoding-specific reader method to populate
+ // the readBuffer. In most parser speed profiles, these routines
+ // show up at the top of the CPU usage chart.
+ if (count > 0)
+ {
+ switch (encoding)
+ {
+ // one byte builtins
+ case ENCODING_ASCII:
+ copyIso8859_1ReadBuffer(count, (char) 0x0080);
+ break;
+ case ENCODING_UTF_8:
+ copyUtf8ReadBuffer(count);
+ break;
+ case ENCODING_ISO_8859_1:
+ copyIso8859_1ReadBuffer(count, (char) 0);
+ break;
+
+ // two byte builtins
+ case ENCODING_UCS_2_12:
+ copyUcs2ReadBuffer(count, 8, 0);
+ break;
+ case ENCODING_UCS_2_21:
+ copyUcs2ReadBuffer(count, 0, 8);
+ break;
+
+ // four byte builtins
+ case ENCODING_UCS_4_1234:
+ copyUcs4ReadBuffer(count, 24, 16, 8, 0);
+ break;
+ case ENCODING_UCS_4_4321:
+ copyUcs4ReadBuffer(count, 0, 8, 16, 24);
+ break;
+ case ENCODING_UCS_4_2143:
+ copyUcs4ReadBuffer(count, 16, 24, 0, 8);
+ break;
+ case ENCODING_UCS_4_3412:
+ copyUcs4ReadBuffer(count, 8, 0, 24, 16);
+ break;
+ }
+ }
+ else
+ {
+ readBufferLength = readBufferPos;
+ }
+
+ readBufferPos = 0;
+
+ // Filter out all carriage returns if we've seen any
+ // (including any saved from a previous read)
+ if (sawCR)
+ {
+ filterCR(count >= 0);
+ sawCR = false;
+
+ // must actively report EOF, lest some CRs get lost.
+ if (readBufferLength == 0 && count >= 0)
+ {
+ readDataChunk();
+ }
+ }
+
+ if (count > 0)
+ {
+ currentByteCount += count;
+ }
+ }
+
+ /**
+ * Filter carriage returns in the read buffer.
+ * CRLF becomes LF; CR becomes LF.
+ * @param moreData true iff more data might come from the same source
+ * @see #readDataChunk
+ * @see #readBuffer
+ * @see #readBufferOverflow
+ */
+ private void filterCR(boolean moreData)
+ {
+ int i, j;
+
+ readBufferOverflow = -1;
+
+loop:
+ for (i = j = readBufferPos; j < readBufferLength; i++, j++)
+ {
+ switch (readBuffer[j])
+ {
+ case '\r':
+ if (j == readBufferLength - 1)
+ {
+ if (moreData)
+ {
+ readBufferOverflow = '\r';
+ readBufferLength--;
+ }
+ else // CR at end of buffer
+ {
+ readBuffer[i++] = '\n';
+ }
+ break loop;
+ }
+ else if (readBuffer[j + 1] == '\n')
+ {
+ j++;
+ }
+ readBuffer[i] = '\n';
+ break;
+
+ case '\n':
+ default:
+ readBuffer[i] = readBuffer[j];
+ break;
+ }
+ }
+ readBufferLength = i;
+ }
+
+ /**
+ * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
+ * When readDataChunk () calls this method, the raw bytes are in
+ * rawReadBuffer, and the final characters will appear in
+ * readBuffer.
+ * Note that as of Unicode 3.1, good practice became a requirement,
+ * so that each Unicode character has exactly one UTF-8 representation.
+ * @param count The number of bytes to convert.
+ * @see #readDataChunk
+ * @see #rawReadBuffer
+ * @see #readBuffer
+ * @see #getNextUtf8Byte
+ */
+ private void copyUtf8ReadBuffer(int count)
+ throws SAXException, IOException
+ {
+ int i = 0;
+ int j = readBufferPos;
+ int b1;
+ char c = 0;
+
+ /*
+ // check once, so the runtime won't (if it's smart enough)
+ if (count < 0 || count > rawReadBuffer.length)
+ throw new ArrayIndexOutOfBoundsException (Integer.toString (count));
+ */
+
+ while (i < count)
+ {
+ b1 = rawReadBuffer[i++];
+
+ // Determine whether we are dealing
+ // with a one-, two-, three-, or four-
+ // byte sequence.
+ if (b1 < 0)
+ {
+ if ((b1 & 0xe0) == 0xc0)
+ {
+ // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
+ c = (char) (((b1 & 0x1f) << 6)
+ | getNextUtf8Byte(i++, count));
+ if (c < 0x0080)
+ {
+ encodingError("Illegal two byte UTF-8 sequence",
+ c, 0);
+ }
+
+ //Sec 2.11
+ // [1] the two-character sequence #xD #xA
+ // [2] the two-character sequence #xD #x85
+ if ((c == 0x0085 || c == 0x000a) && sawCR)
+ {
+ continue;
+ }
+
+ // Sec 2.11
+ // [3] the single character #x85
+
+ if (c == 0x0085 && xmlVersion == XML_11)
+ {
+ readBuffer[j++] = '\r';
+ }
+ }
+ else if ((b1 & 0xf0) == 0xe0)
+ {
+ // 3-byte sequence:
+ // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
+ // most CJKV characters
+ c = (char) (((b1 & 0x0f) << 12) |
+ (getNextUtf8Byte(i++, count) << 6) |
+ getNextUtf8Byte(i++, count));
+ //sec 2.11
+ //[4] the single character #x2028
+ if (c == 0x2028 && xmlVersion == XML_11)
+ {
+ readBuffer[j++] = '\r';
+ sawCR = true;
+ continue;
+ }
+ if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff))
+ {
+ encodingError("Illegal three byte UTF-8 sequence",
+ c, 0);
+ }
+ }
+ else if ((b1 & 0xf8) == 0xf0)
+ {
+ // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
+ // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
+ // (uuuuu = wwww + 1)
+ // "Surrogate Pairs" ... from the "Astral Planes"
+ // Unicode 3.1 assigned the first characters there
+ int iso646 = b1 & 07;
+ iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
+ iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
+ iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
+
+ if (iso646 <= 0xffff)
+ {
+ encodingError("Illegal four byte UTF-8 sequence",
+ iso646, 0);
+ }
+ else
+ {
+ if (iso646 > 0x0010ffff)
+ {
+ encodingError("UTF-8 value out of range for Unicode",
+ iso646, 0);
+ }
+ iso646 -= 0x010000;
+ readBuffer[j++] = (char) (0xd800 | (iso646 >> 10));
+ readBuffer[j++] = (char) (0xdc00 | (iso646 & 0x03ff));
+ continue;
+ }
+ }
+ else
+ {
+ // The five and six byte encodings aren't supported;
+ // they exceed the Unicode (and XML) range.
+ encodingError("unsupported five or six byte UTF-8 sequence",
+ 0xff & b1, i);
+ // NOTREACHED
+ c = 0;
+ }
+ }
+ else
+ {
+ // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
+ // (US-ASCII character, "common" case, one branch to here)
+ c = (char) b1;
+ }
+ readBuffer[j++] = c;
+ if (c == '\r')
+ {
+ sawCR = true;
+ }
+ }
+ // How many characters have we read?
+ readBufferLength = j;
+ }
+
+ /**
+ * Return the next byte value in a UTF-8 sequence.
+ * If it is not possible to get a byte from the current
+ * entity, throw an exception.
+ * @param pos The current position in the rawReadBuffer.
+ * @param count The number of bytes in the rawReadBuffer
+ * @return The significant six bits of a non-initial byte in
+ * a UTF-8 sequence.
+ * @exception EOFException If the sequence is incomplete.
+ */
+ private int getNextUtf8Byte(int pos, int count)
+ throws SAXException, IOException
+ {
+ int val;
+
+ // Take a character from the buffer
+ // or from the actual input stream.
+ if (pos < count)
+ {
+ val = rawReadBuffer[pos];
+ }
+ else
+ {
+ val = is.read();
+ if (val == -1)
+ {
+ encodingError("unfinished multi-byte UTF-8 sequence at EOF",
+ -1, pos);
+ }
+ }
+
+ // Check for the correct bits at the start.
+ if ((val & 0xc0) != 0x80)
+ {
+ encodingError("bad continuation of multi-byte UTF-8 sequence",
+ val, pos + 1);
+ }
+
+ // Return the significant bits.
+ return (val & 0x3f);
+ }
+
+ /**
+ * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into
+ * UTF-16 characters.
+ *
+ * When readDataChunk () calls this method, the raw bytes are in
+ * rawReadBuffer, and the final characters will appear in
+ * readBuffer.
+ *
+ * @param count The number of bytes to convert.
+ * @param mask For ASCII conversion, 0x7f; else, 0xff.
+ * @see #readDataChunk
+ * @see #rawReadBuffer
+ * @see #readBuffer
+ */
+ private void copyIso8859_1ReadBuffer(int count, char mask)
+ throws IOException
+ {
+ int i, j;
+ for (i = 0, j = readBufferPos; i < count; i++, j++)
+ {
+ char c = (char) (rawReadBuffer[i] & 0xff);
+ if ((c & mask) != 0)
+ {
+ throw new CharConversionException("non-ASCII character U+"
+ + Integer.toHexString(c));
+ }
+ if (c == 0x0085 && xmlVersion == XML_11)
+ {
+ c = '\r';
+ }
+ readBuffer[j] = c;
+ if (c == '\r')
+ {
+ sawCR = true;
+ }
+ }
+ readBufferLength = j;
+ }
+
+ /**
+ * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters
+ * (as used in Java string manipulation).
+ *
+ * When readDataChunk () calls this method, the raw bytes are in
+ * rawReadBuffer, and the final characters will appear in
+ * readBuffer.
+ * @param count The number of bytes to convert.
+ * @param shift1 The number of bits to shift byte 1.
+ * @param shift2 The number of bits to shift byte 2
+ * @see #readDataChunk
+ * @see #rawReadBuffer
+ * @see #readBuffer
+ */
+ private void copyUcs2ReadBuffer(int count, int shift1, int shift2)
+ throws SAXException
+ {
+ int j = readBufferPos;
+
+ if (count > 0 && (count % 2) != 0)
+ {
+ encodingError("odd number of bytes in UCS-2 encoding", -1, count);
+ }
+ // The loops are faster with less internal brancing; hence two
+ if (shift1 == 0)
+ { // "UTF-16-LE"
+ for (int i = 0; i < count; i += 2)
+ {
+ char c = (char) (rawReadBuffer[i + 1] << 8);
+ c |= 0xff & rawReadBuffer[i];
+ readBuffer[j++] = c;
+ if (c == '\r')
+ {
+ sawCR = true;
+ }
+ }
+ }
+ else
+ { // "UTF-16-BE"
+ for (int i = 0; i < count; i += 2)
+ {
+ char c = (char) (rawReadBuffer[i] << 8);
+ c |= 0xff & rawReadBuffer[i + 1];
+ readBuffer[j++] = c;
+ if (c == '\r')
+ {
+ sawCR = true;
+ }
+ }
+ }
+ readBufferLength = j;
+ }
+
+ /**
+ * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
+ *
+ * When readDataChunk () calls this method, the raw bytes are in
+ * rawReadBuffer, and the final characters will appear in
+ * readBuffer.
+ * Java has Unicode chars, and this routine uses surrogate pairs
+ * for ISO-10646 values between 0x00010000 and 0x000fffff. An
+ * exception is thrown if the ISO-10646 character has no Unicode
+ * representation.
+ *
+ * @param count The number of bytes to convert.
+ * @param shift1 The number of bits to shift byte 1.
+ * @param shift2 The number of bits to shift byte 2
+ * @param shift3 The number of bits to shift byte 2
+ * @param shift4 The number of bits to shift byte 2
+ * @see #readDataChunk
+ * @see #rawReadBuffer
+ * @see #readBuffer
+ */
+ private void copyUcs4ReadBuffer(int count, int shift1, int shift2,
+ int shift3, int shift4)
+ throws SAXException
+ {
+ int j = readBufferPos;
+
+ if (count > 0 && (count % 4) != 0)
+ {
+ encodingError("number of bytes in UCS-4 encoding " +
+ "not divisible by 4",
+ -1, count);
+ }
+ for (int i = 0; i < count; i += 4)
+ {
+ int value = (((rawReadBuffer [i] & 0xff) << shift1) |
+ ((rawReadBuffer [i + 1] & 0xff) << shift2) |
+ ((rawReadBuffer [i + 2] & 0xff) << shift3) |
+ ((rawReadBuffer [i + 3] & 0xff) << shift4));
+ if (value < 0x0000ffff)
+ {
+ readBuffer [j++] = (char) value;
+ if (value == (int) '\r')
+ {
+ sawCR = true;
+ }
+ }
+ else if (value < 0x0010ffff)
+ {
+ value -= 0x010000;
+ readBuffer[j++] = (char) (0xd8 | ((value >> 10) & 0x03ff));
+ readBuffer[j++] = (char) (0xdc | (value & 0x03ff));
+ }
+ else
+ {
+ encodingError("UCS-4 value out of range for Unicode",
+ value, i);
+ }
+ }
+ readBufferLength = j;
+ }
+
+ /**
+ * Report a character encoding error.
+ */
+ private void encodingError(String message, int value, int offset)
+ throws SAXException
+ {
+ if (value != -1)
+ {
+ message = message + " (character code: 0x" +
+ Integer.toHexString(value) + ')';
+ error(message);
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // Local Variables.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Re-initialize the variables for each parse.
+ */
+ private void initializeVariables()
+ {
+ // First line
+ line = 1;
+ column = 0;
+
+ // Set up the buffers for data and names
+ dataBufferPos = 0;
+ dataBuffer = new char[DATA_BUFFER_INITIAL];
+ nameBufferPos = 0;
+ nameBuffer = new char[NAME_BUFFER_INITIAL];
+
+ // Set up the DTD hash tables
+ elementInfo = new HashMap();
+ entityInfo = new HashMap();
+ notationInfo = new HashMap();
+ skippedPE = false;
+
+ // Set up the variables for the current
+ // element context.
+ currentElement = null;
+ currentElementContent = CONTENT_UNDECLARED;
+
+ // Set up the input variables
+ sourceType = INPUT_NONE;
+ inputStack = new LinkedList();
+ entityStack = new LinkedList();
+ externalEntity = null;
+ tagAttributePos = 0;
+ tagAttributes = new String[100];
+ rawReadBuffer = new byte[READ_BUFFER_MAX];
+ readBufferOverflow = -1;
+
+ scratch = new InputSource();
+
+ inLiteral = false;
+ expandPE = false;
+ peIsError = false;
+
+ doReport = false;
+
+ inCDATA = false;
+
+ symbolTable = new Object[SYMBOL_TABLE_LENGTH][];
+ }
+
+ static class ExternalIdentifiers
+ {
+
+ String publicId;
+ String systemId;
+ String baseUri;
+
+ ExternalIdentifiers()
+ {
+ }
+
+ ExternalIdentifiers(String publicId, String systemId, String baseUri)
+ {
+ this.publicId = publicId;
+ this.systemId = systemId;
+ this.baseUri = baseUri;
+ }
+
+ }
+
+ static class EntityInfo
+ {
+
+ int type;
+ ExternalIdentifiers ids;
+ String value;
+ String notationName;
+
+ }
+
+ static class AttributeDecl
+ {
+
+ String type;
+ String value;
+ int valueType;
+ String enumeration;
+ String defaultValue;
+
+ }
+
+ static class ElementDecl
+ {
+
+ int contentType;
+ String contentModel;
+ HashMap attributes;
+
+ }
+
+ static class Input
+ {
+
+ int sourceType;
+ URLConnection externalEntity;
+ char[] readBuffer;
+ int readBufferPos;
+ int readBufferLength;
+ int line;
+ int encoding;
+ int readBufferOverflow;
+ InputStream is;
+ int currentByteCount;
+ int column;
+ Reader reader;
+
+ }
+
+}
diff --git a/libjava/classpath/gnu/xml/aelfred2/XmlReader.java b/libjava/classpath/gnu/xml/aelfred2/XmlReader.java
new file mode 100644
index 000000000..e0a047612
--- /dev/null
+++ b/libjava/classpath/gnu/xml/aelfred2/XmlReader.java
@@ -0,0 +1,373 @@
+/* XmlReader.java --
+ Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.xml.aelfred2;
+
+import java.io.IOException;
+import java.util.Locale;
+
+import org.xml.sax.*;
+import org.xml.sax.ext.*;
+
+import gnu.xml.pipeline.EventFilter;
+import gnu.xml.pipeline.ValidationConsumer;
+
+
+/**
+ * This SAX2 parser optionally layers a validator over the Ælfred2
+ * SAX2 parser. While this will not evaluate every XML validity constraint,
+ * it does support all the validity constraints that are of any real utility
+ * outside the strict SGML-compatible world. See the documentation for the
+ * SAXDriver class for information about the SAX2 features and properties
+ * that are supported, and documentation for the ValidationConsumer for
+ * information about what validity constraints may not be supported.
+ * (Ælfred2 tests some of those, even in non-validating mode, to
+ * achieve better conformance.)
+ *
+ * Note that due to its internal construction, you can't change most
+ * handlers until parse() returns. This diverges slightly from SAX, which
+ * expects later binding to be supported. Early binding involves less
+ * runtime overhead, which is an issue for event pipelines as used inside
+ * this parser. Rather than relying on the parser to handle late binding
+ * to your own handlers, do it yourself.
+ *
+ * @see SAXDriver
+ * @see gnu.xml.pipeline.ValidationConsumer
+ *
+ * @author David Brownell
+ */
+public final class XmlReader
+ implements XMLReader
+{
+
+ static class FatalErrorHandler
+ extends DefaultHandler2
+ {
+
+ public void error(SAXParseException e)
+ throws SAXException
+ {
+ throw e;
+ }
+
+ }
+
+ private SAXDriver aelfred2 = new SAXDriver();
+ private EventFilter filter = new EventFilter();
+ private boolean isValidating;
+ private boolean active;
+
+ /**
+ * Constructs a SAX Parser.
+ */
+ public XmlReader()
+ {
+ }
+
+ /**
+ * Constructs a SAX Parser, optionally treating validity errors
+ * as if they were fatal errors.
+ */
+ public XmlReader(boolean invalidIsFatal)
+ {
+ if (invalidIsFatal)
+ {
+ setErrorHandler(new FatalErrorHandler());
+ }
+ }
+
+ /**
+ * SAX2: Returns the object used to report the logical
+ * content of an XML document.
+ */
+ public ContentHandler getContentHandler()
+ {
+ return filter.getContentHandler();
+ }
+
+ /**
+ * SAX2: Assigns the object used to report the logical
+ * content of an XML document.
+ * @exception IllegalStateException if called mid-parse
+ */
+ public void setContentHandler(ContentHandler handler)
+ {
+ if (active)
+ {
+ throw new IllegalStateException("already parsing");
+ }
+ filter.setContentHandler(handler);
+ }
+
+ /**
+ * SAX2: Returns the object used to process declarations related
+ * to notations and unparsed entities.
+ */
+ public DTDHandler getDTDHandler()
+ {
+ return filter.getDTDHandler();
+ }
+
+ /**
+ * SAX1 Assigns DTD handler
+ * @exception IllegalStateException if called mid-parse
+ */
+ public void setDTDHandler(DTDHandler handler)
+ {
+ if (active)
+ {
+ throw new IllegalStateException("already parsing");
+ }
+ filter.setDTDHandler(handler);
+ }
+
+ /**
+ * SAX2: Returns the object used when resolving external
+ * entities during parsing (both general and parameter entities).
+ */
+ public EntityResolver getEntityResolver()
+ {
+ return aelfred2.getEntityResolver();
+ }
+
+ /**
+ * SAX1 Assigns parser's entity resolver
+ */
+ public void setEntityResolver(EntityResolver handler)
+ {
+ aelfred2.setEntityResolver(handler);
+ }
+
+ /**
+ * SAX2: Returns the object used to receive callbacks for XML
+ * errors of all levels (fatal, nonfatal, warning); this is never null;
+ */
+ public ErrorHandler getErrorHandler()
+ {
+ return aelfred2.getErrorHandler();
+ }
+
+ /**
+ * SAX1 Assigns error handler
+ * @exception IllegalStateException if called mid-parse
+ */
+ public void setErrorHandler(ErrorHandler handler)
+ {
+ if (active)
+ {
+ throw new IllegalStateException("already parsing");
+ }
+ aelfred2.setErrorHandler(handler);
+ }
+
+ /**
+ * SAX2: Assigns the specified property.
+ * @exception IllegalStateException if called mid-parse
+ */
+ public void setProperty(String propertyId, Object value)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ if (active)
+ {
+ throw new IllegalStateException("already parsing");
+ }
+ if (getProperty(propertyId) != value)
+ {
+ filter.setProperty(propertyId, value);
+ }
+ }
+
+ /**
+ * SAX2: Returns the specified property.
+ */
+ public Object getProperty(String propertyId)
+ throws SAXNotRecognizedException
+ {
+ if ((SAXDriver.PROPERTY + "declaration-handler").equals(propertyId)
+ || (SAXDriver.PROPERTY + "lexical-handler").equals(propertyId))
+ {
+ return filter.getProperty(propertyId);
+ }
+ throw new SAXNotRecognizedException(propertyId);
+ }
+
+ private void forceValidating()
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes",
+ true);
+ aelfred2.setFeature(SAXDriver.FEATURE + "external-general-entities",
+ true);
+ aelfred2.setFeature(SAXDriver.FEATURE + "external-parameter-entities",
+ true);
+ }
+
+ /**
+ * SAX2: Sets the state of features supported in this parser.
+ * Note that this parser requires reporting of namespace prefixes when
+ * validating.
+ */
+ public void setFeature(String featureId, boolean state)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ boolean value = getFeature(featureId);
+
+ if (state == value)
+ {
+ return;
+ }
+
+ if ((SAXDriver.FEATURE + "validation").equals(featureId))
+ {
+ if (active)
+ {
+ throw new SAXNotSupportedException("already parsing");
+ }
+ if (state)
+ {
+ forceValidating();
+ }
+ isValidating = state;
+ }
+ else
+ {
+ aelfred2.setFeature(featureId, state);
+ }
+ }
+
+ /**
+ * SAX2: Tells whether this parser supports the specified feature.
+ * At this time, this directly parallels the underlying SAXDriver,
+ * except that validation is optionally supported.
+ *
+ * @see SAXDriver
+ */
+ public boolean getFeature(String featureId)
+ throws SAXNotRecognizedException, SAXNotSupportedException
+ {
+ if ((SAXDriver.FEATURE + "validation").equals(featureId))
+ {
+ return isValidating;
+ }
+
+ return aelfred2.getFeature(featureId);
+ }
+
+ /**
+ * SAX1: Sets the locale used for diagnostics; currently,
+ * only locales using the English language are supported.
+ * @param locale The locale for which diagnostics will be generated
+ */
+ public void setLocale(Locale locale)
+ throws SAXException
+ {
+ aelfred2.setLocale(locale);
+ }
+
+ /**
+ * SAX1: Preferred API to parse an XML document, using a
+ * system identifier (URI).
+ */
+ public void parse(String systemId)
+ throws SAXException, IOException
+ {
+ parse(new InputSource(systemId));
+ }
+
+ /**
+ * SAX1: Underlying API to parse an XML document, used
+ * directly when no URI is available. When this is invoked,
+ * and the parser is set to validate, some features will be
+ * automatically reset to appropriate values: for reporting
+ * namespace prefixes, and incorporating external entities.
+ *
+ * @param source The XML input source.
+ *
+ * @exception IllegalStateException if called mid-parse
+ * @exception SAXException The handlers may throw any SAXException,
+ * and the parser normally throws SAXParseException objects.
+ * @exception IOException IOExceptions are normally through through
+ * the parser if there are problems reading the source document.
+ */
+ public void parse(InputSource source)
+ throws SAXException, IOException
+ {
+ EventFilter next;
+ boolean nsdecls;
+
+ synchronized (aelfred2)
+ {
+ if (active)
+ {
+ throw new IllegalStateException("already parsing");
+ }
+ active = true;
+ }
+
+ // set up the output pipeline
+ if (isValidating)
+ {
+ forceValidating();
+ next = new ValidationConsumer(filter);
+ }
+ else
+ {
+ next = filter;
+ }
+
+ // connect pipeline and error handler
+ // don't let _this_ call to bind() affect xmlns* attributes
+ nsdecls = aelfred2.getFeature(SAXDriver.FEATURE + "namespace-prefixes");
+ EventFilter.bind(aelfred2, next);
+ if (!nsdecls)
+ {
+ aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes",
+ false);
+ }
+
+ // parse, clean up
+ try
+ {
+ aelfred2.parse(source);
+ }
+ finally
+ {
+ active = false;
+ }
+ }
+
+}
diff --git a/libjava/classpath/gnu/xml/aelfred2/package.html b/libjava/classpath/gnu/xml/aelfred2/package.html
new file mode 100644
index 000000000..e20425844
--- /dev/null
+++ b/libjava/classpath/gnu/xml/aelfred2/package.html
@@ -0,0 +1,506 @@
+
+
+ This package contains Ælfred2, which includes an
+enhanced SAX2-compatible version of the Ælfred
+non-validating XML parser, a modular (and hence optional)
+DTD validating parser, and modular (and hence optional)
+JAXP glue to those.
+Use these like any other SAX2 parsers. Ælfred is a XML parser written in the java programming language.
+
+ In most Java applets and applications, XML should not be the central
+feature; instead, XML is the means to another end, such as loading
+configuration information, reading meta-data, or parsing transactions. When an XML parser is only a single component of a much larger
+program, it cannot be large, slow, or resource-intensive. With Java
+applets, in particular, code size is a significant issue. The standard
+modem is still not operating at 56 Kbaud, or sometimes even with data
+compression. Assuming an uncompressed 28.8 Kbaud modem, only about
+3 KBytes can be downloaded in one second; compression often doubles
+that speed, but a V.90 modem may not provide another doubling. When
+used with embedded processors, similar size concerns apply. Ælfred is designed for easy and efficient use over the Internet,
+based on the following principles: As you can see from this list, Ælfred is designed for production
+use, but neither validation nor perfect conformance was a requirement.
+Good validating parsers exist, including one in this package,
+and you should use them as appropriate. (See conformance reviews
+available at http://www.xml.com)
+ One of the main goals of Ælfred2 was to significantly improve
+conformance, while not significantly affecting the other goals stated above.
+Since the only use of this parser is with SAX, some classes could be
+removed, and so the overall size of Ælfred was actually reduced.
+Subsequent performance work produced a notable speedup (over twenty
+percent on larger files). That is, the tradeoffs between speed, size, and
+conformance were re-targeted towards conformance and support of newer APIs
+(SAX2), with a a positive performance impact. The role anticipated for this version of Ælfred is as a
+lightweight Free Software SAX parser that can be used in essentially every
+Java program where the handful of conformance violations (noted below)
+are acceptable.
+That certainly includes applets, and
+nowadays one must also mention embedded systems as being even more
+size-critical.
+At this writing, all parsers that are more conformant are
+significantly larger, even when counting the optional
+validation support in this version of Ælfred. Ælfred the Great (AElfred in ASCII) was King of Wessex, and
+some say of King of England, at the time of his death in 899 AD.
+Ælfred introduced a wide-spread literacy program in the hope that
+his people would learn to read English, at least, if Latin was too
+difficult for them. This Ælfred hopes to bring another sort of
+literacy to Java, using XML, at least, if full SGML is too difficult. The initial Æ ligature ("AE)" is also a reminder that XML is
+not limited to ASCII. The Ælfred parser currently builds in support for a handful
+of input encodings. Of course these include UTF-8 and UTF-16, which
+all XML parsers are required to support: If you use any encoding other than UTF-8 or UTF-16 you should
+make sure to label your data appropriately: Encodings accessed through Note that if you are using the Euro symbol with an fixed length
+eight bit encoding, you should probably be using the encoding label
+iso-8859-15 or, with a Microsoft OS, cp-1252.
+Of course, UTF-8 and UTF-16 handle the Euro symbol directly.
+ Known conformance issues should be of negligible importance for
+most applications, and include: When tested against the July 12, 1999 version of the OASIS
+XML Conformance test suite, an earlier version passed 1057 of 1067 tests.
+That contrasts with the original version, which passed 867. The
+current parser is top-ranked in terms of conformance, as is its
+validating sibling (which has some additional conformance violations
+imposed on it by SAX2 API deficiencies as well as some of the more
+curious SGML layering artifacts found in the XML specification). The XML 1.0 specification itself was not without problems,
+and after some delays the W3C has come out with a revised
+"second edition" specification. While that doesn't resolve all
+the problems identified the XML specification, many of the most
+egregious problems have been resolved. (You still need to drink
+magic Kool-Aid before some DTD-related issues make sense.)
+To the extent possible, this parser conforms to that second
+edition specification, and does well against corrected versions
+of the OASIS/NIST XML conformance test cases. See http://xmlconf.sourceforge.net
+for more information about SAX2/XML conformance testing.
+The software in this package is distributed under the GNU General Public
+License (with a special exception described below).
+
+A copy of GNU General Public License (GPL) is included in this distribution,
+in the file COPYING. If you do not have the source code, it is available at:
+
+ http://www.gnu.org/software/classpath/
+ Some of this documentation was modified from the original
+Ælfred README.txt file. All of it has been updated.
+ *
+ *
+ *
+ *
+ *
+ * Name
+ * Notes
+ *
+ *
+ * (URL)/external-general-entities
+ * Value defaults to true
+ * (URL)/external-parameter-entities
+ * Value defaults to true
+ * (URL)/is-standalone
+ * (PRELIMINARY) Returns true iff the document's parsing
+ * has started (some non-error event after startDocument()
+ * was reported) and the document's standalone flag is set.
+ * (URL)/namespace-prefixes
+ * Value defaults to false (but XML 1.0 names are
+ * always reported)
+ * (URL)/lexical-handler/parameter-entities
+ * Value is fixed at true
+ * (URL)/namespaces
+ * Value defaults to true
+ * (URL)/resolve-dtd-uris
+ * (PRELIMINARY) Value defaults to true
+ * (URL)/string-interning
+ * Value is fixed at true
+ * (URL)/use-attributes2
+ * (PRELIMINARY) Value is fixed at true
+ * (URL)/use-entity-resolver2
+ * (PRELIMINARY) Value defaults to true
+ *
+ * (URL)/validation
+ * Value is fixed at false
+ *
+ *
+ * (URL)/declaration-handler
+ * A declaration handler may be provided.
+ * (URL)/lexical-handler
+ * A lexical handler may be provided. SAXDriver
class as your entry point, as all
+ * internal parser interfaces are subject to change.
+ *
+ * @author Written by David Megginson <dmeggins@microstar.com>
+ * (version 1.2a with bugfixes)
+ * @author Updated by David Brownell <dbrownell@users.sourceforge.net>
+ * @see SAXDriver
+ */
+final class XmlParser
+{
+
+ // avoid slow per-character readCh()
+ private final static boolean USE_CHEATS = true;
+
+ ////////////////////////////////////////////////////////////////////////
+ // Constants.
+ ////////////////////////////////////////////////////////////////////////
+
+ //
+ // Constants for element content type.
+ //
+
+ /**
+ * Constant: an element has not been declared.
+ * @see #getElementContentType
+ */
+ public final static int CONTENT_UNDECLARED = 0;
+
+ /**
+ * Constant: the element has a content model of ANY.
+ * @see #getElementContentType
+ */
+ public final static int CONTENT_ANY = 1;
+
+ /**
+ * Constant: the element has declared content of EMPTY.
+ * @see #getElementContentType
+ */
+ public final static int CONTENT_EMPTY = 2;
+
+ /**
+ * Constant: the element has mixed content.
+ * @see #getElementContentType
+ */
+ public final static int CONTENT_MIXED = 3;
+
+ /**
+ * Constant: the element has element content.
+ * @see #getElementContentType
+ */
+ public final static int CONTENT_ELEMENTS = 4;
+
+
+ //
+ // Constants for the entity type.
+ //
+
+ /**
+ * Constant: the entity has not been declared.
+ * @see #getEntityType
+ */
+ public final static int ENTITY_UNDECLARED = 0;
+
+ /**
+ * Constant: the entity is internal.
+ * @see #getEntityType
+ */
+ public final static int ENTITY_INTERNAL = 1;
+
+ /**
+ * Constant: the entity is external, non-parsable data.
+ * @see #getEntityType
+ */
+ public final static int ENTITY_NDATA = 2;
+
+ /**
+ * Constant: the entity is external XML data.
+ * @see #getEntityType
+ */
+ public final static int ENTITY_TEXT = 3;
+
+ //
+ // Attribute type constants are interned literal strings.
+ //
+
+ //
+ // Constants for supported encodings. "external" is just a flag.
+ //
+ private final static int ENCODING_EXTERNAL = 0;
+ private final static int ENCODING_UTF_8 = 1;
+ private final static int ENCODING_ISO_8859_1 = 2;
+ private final static int ENCODING_UCS_2_12 = 3;
+ private final static int ENCODING_UCS_2_21 = 4;
+ private final static int ENCODING_UCS_4_1234 = 5;
+ private final static int ENCODING_UCS_4_4321 = 6;
+ private final static int ENCODING_UCS_4_2143 = 7;
+ private final static int ENCODING_UCS_4_3412 = 8;
+ private final static int ENCODING_ASCII = 9;
+
+ //
+ // Constants for attribute default value.
+ //
+
+ /**
+ * Constant: the attribute is not declared.
+ * @see #getAttributeDefaultValueType
+ */
+ public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
+
+ /**
+ * Constant: the attribute has a literal default value specified.
+ * @see #getAttributeDefaultValueType
+ * @see #getAttributeDefaultValue
+ */
+ public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
+
+ /**
+ * Constant: the attribute was declared #IMPLIED.
+ * @see #getAttributeDefaultValueType
+ */
+ public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
+
+ /**
+ * Constant: the attribute was declared #REQUIRED.
+ * @see #getAttributeDefaultValueType
+ */
+ public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
+
+ /**
+ * Constant: the attribute was declared #FIXED.
+ * @see #getAttributeDefaultValueType
+ * @see #getAttributeDefaultValue
+ */
+ public final static int ATTRIBUTE_DEFAULT_FIXED = 34;
+
+ //
+ // Constants for input.
+ //
+ private final static int INPUT_NONE = 0;
+ private final static int INPUT_INTERNAL = 1;
+ private final static int INPUT_STREAM = 3;
+ private final static int INPUT_READER = 5;
+
+ //
+ // Flags for reading literals.
+ //
+ // expand general entity refs (attribute values in dtd and content)
+ private final static int LIT_ENTITY_REF = 2;
+ // normalize this value (space chars) (attributes, public ids)
+ private final static int LIT_NORMALIZE = 4;
+ // literal is an attribute value
+ private final static int LIT_ATTRIBUTE = 8;
+ // don't expand parameter entities
+ private final static int LIT_DISABLE_PE = 16;
+ // don't expand [or parse] character refs
+ private final static int LIT_DISABLE_CREF = 32;
+ // don't parse general entity refs
+ private final static int LIT_DISABLE_EREF = 64;
+ // literal is a public ID value
+ private final static int LIT_PUBID = 256;
+
+ //
+ // Flags affecting PE handling in DTDs (if expandPE is true).
+ // PEs expand with space padding, except inside literals.
+ //
+ private final static int CONTEXT_NORMAL = 0;
+ private final static int CONTEXT_LITERAL = 1;
+
+ // Emit warnings for relative URIs with no base URI.
+ static boolean uriWarnings;
+ static
+ {
+ String key = "gnu.xml.aelfred2.XmlParser.uriWarnings";
+ GetPropertyAction a = new GetPropertyAction(key);
+ uriWarnings = "true".equals(AccessController.doPrivileged(a));
+ }
+
+ //
+ // The current XML handler interface.
+ //
+ private SAXDriver handler;
+
+ //
+ // I/O information.
+ //
+ private Reader reader; // current reader
+ private InputStream is; // current input stream
+ private int line; // current line number
+ private int column; // current column number
+ private int sourceType; // type of input source
+ private LinkedList inputStack; // stack of input soruces
+ private URLConnection externalEntity; // current external entity
+ private int encoding; // current character encoding
+ private int currentByteCount; // bytes read from current source
+ private InputSource scratch; // temporary
+
+ //
+ // Buffers for decoded but unparsed character input.
+ //
+ private char[] readBuffer;
+ private int readBufferPos;
+ private int readBufferLength;
+ private int readBufferOverflow; // overflow from last data chunk.
+
+ //
+ // Buffer for undecoded raw byte input.
+ //
+ private final static int READ_BUFFER_MAX = 16384;
+ private byte[] rawReadBuffer;
+
+
+ //
+ // Buffer for attribute values, char refs, DTD stuff.
+ //
+ private static int DATA_BUFFER_INITIAL = 4096;
+ private char[] dataBuffer;
+ private int dataBufferPos;
+
+ //
+ // Buffer for parsed names.
+ //
+ private static int NAME_BUFFER_INITIAL = 1024;
+ private char[] nameBuffer;
+ private int nameBufferPos;
+
+ //
+ // Save any standalone flag
+ //
+ private boolean docIsStandalone;
+
+ //
+ // Hashtables for DTD information on elements, entities, and notations.
+ // Populated until we start ignoring decls (because of skipping a PE)
+ //
+ private HashMap elementInfo;
+ private HashMap entityInfo;
+ private HashMap notationInfo;
+ private boolean skippedPE;
+
+ //
+ // Element type currently in force.
+ //
+ private String currentElement;
+ private int currentElementContent;
+
+ //
+ // Stack of entity names, to detect recursion.
+ //
+ private LinkedList entityStack;
+
+ //
+ // PE expansion is enabled in most chunks of the DTD, not all.
+ // When it's enabled, literals are treated differently.
+ //
+ private boolean inLiteral;
+ private boolean expandPE;
+ private boolean peIsError;
+
+ //
+ // can't report entity expansion inside two constructs:
+ // - attribute expansions (internal entities only)
+ // - markup declarations (parameter entities only)
+ //
+ private boolean doReport;
+
+ //
+ // Symbol table, for caching interned names.
+ //
+ // These show up wherever XML names or nmtokens are used: naming elements,
+ // attributes, PIs, notations, entities, and enumerated attribute values.
+ //
+ // NOTE: This hashtable doesn't grow. The default size is intended to be
+ // rather large for most documents. Example: one snapshot of the DocBook
+ // XML 4.1 DTD used only about 350 such names. As a rule, only pathological
+ // documents (ones that don't reuse names) should ever see much collision.
+ //
+ // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing.
+ // "2039" keeps the hash table size at about two memory pages on typical
+ // 32 bit hardware.
+ //
+ private final static int SYMBOL_TABLE_LENGTH = 2039;
+
+ private Object[][] symbolTable;
+
+ //
+ // Hash table of attributes found in current start tag.
+ //
+ private String[] tagAttributes;
+ private int tagAttributePos;
+
+ //
+ // Utility flag: have we noticed a CR while reading the last
+ // data chunk? If so, we will have to go back and normalise
+ // CR or CR/LF line ends.
+ //
+ private boolean sawCR;
+
+ //
+ // Utility flag: are we in CDATA? If so, whitespace isn't ignorable.
+ //
+ private boolean inCDATA;
+
+ //
+ // Xml version.
+ //
+ private static final int XML_10 = 0;
+ private static final int XML_11 = 1;
+ private int xmlVersion = XML_10;
+
+ //////////////////////////////////////////////////////////////////////
+ // Constructors.
+ ////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Construct a new parser with no associated handler.
+ * @see #setHandler
+ * @see #parse
+ */
+ // package private
+ XmlParser()
+ {
+ }
+
+ /**
+ * Set the handler that will receive parsing events.
+ * @param handler The handler to receive callback events.
+ * @see #parse
+ */
+ // package private
+ void setHandler(SAXDriver handler)
+ {
+ this.handler = handler;
+ }
+
+ /**
+ * Parse an XML document from the character stream, byte stream, or URI
+ * that you provide (in that order of preference). Any URI that you
+ * supply will become the base URI for resolving relative URI, and may
+ * be used to acquire a reader or byte stream.
+ *
+ *
+ * [1] document ::= prolog element Misc*
+ *
+ *
+ * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
+ *
+ * <!--
has already been read.)
+ */
+ private void parseComment()
+ throws Exception
+ {
+ char c;
+ boolean saved = expandPE;
+
+ expandPE = false;
+ parseUntil(endDelimComment);
+ require('>');
+ expandPE = saved;
+ handler.comment(dataBuffer, 0, dataBufferPos);
+ dataBufferPos = 0;
+ }
+
+ static final char[] startDelimPI = { '<', '?' };
+ static final char[] endDelimPI = { '?', '>' };
+
+ /**
+ * Parse a processing instruction and do a call-back.
+ *
+ * [16] PI ::= '<?' PITarget
+ * (S (Char* - (Char* '?>' Char*)))?
+ * '?>'
+ * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
+ *
+ * <?
has already been read.)
+ */
+ private void parsePI()
+ throws SAXException, IOException
+ {
+ String name;
+ boolean saved = expandPE;
+
+ expandPE = false;
+ name = readNmtoken(true);
+ //NE08
+ if (name.indexOf(':') >= 0)
+ {
+ error("Illegal character(':') in processing instruction name ",
+ name, null);
+ }
+ if ("xml".equalsIgnoreCase(name))
+ {
+ error("Illegal processing instruction target", name, null);
+ }
+ if (!tryRead(endDelimPI))
+ {
+ requireWhitespace();
+ parseUntil(endDelimPI);
+ }
+ expandPE = saved;
+ handler.processingInstruction(name, dataBufferToString());
+ }
+
+ static final char[] endDelimCDATA = { ']', ']', '>' };
+
+ private boolean isDirtyCurrentElement;
+
+ /**
+ * Parse a CDATA section.
+ *
+ * [18] CDSect ::= CDStart CData CDEnd
+ * [19] CDStart ::= '<![CDATA['
+ * [20] CData ::= (Char* - (Char* ']]>' Char*))
+ * [21] CDEnd ::= ']]>'
+ *
+ *
+ * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
+ *
+ * <?xml
and whitespace have already been read.)
+ * @return the encoding in the declaration, uppercased; or null
+ * @see #parseTextDecl
+ * @see #setupDecoding
+ */
+ private String parseXMLDecl(boolean ignoreEncoding)
+ throws SAXException, IOException
+ {
+ String version;
+ String encodingName = null;
+ String standalone = null;
+ int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
+ String inputEncoding = null;
+
+ switch (this.encoding)
+ {
+ case ENCODING_EXTERNAL:
+ case ENCODING_UTF_8:
+ inputEncoding = "UTF-8";
+ break;
+ case ENCODING_ISO_8859_1:
+ inputEncoding = "ISO-8859-1";
+ break;
+ case ENCODING_UCS_2_12:
+ inputEncoding = "UTF-16BE";
+ break;
+ case ENCODING_UCS_2_21:
+ inputEncoding = "UTF-16LE";
+ break;
+ }
+
+ // Read the version.
+ require("version");
+ parseEq();
+ checkLegalVersion(version = readLiteral(flags));
+ if (!version.equals("1.0"))
+ {
+ if (version.equals("1.1"))
+ {
+ handler.warn("expected XML version 1.0, not: " + version);
+ xmlVersion = XML_11;
+ }
+ else
+ {
+ error("illegal XML version", version, "1.0 or 1.1");
+ }
+ }
+ else
+ {
+ xmlVersion = XML_10;
+ }
+ // Try reading an encoding declaration.
+ boolean white = tryWhitespace();
+
+ if (tryRead("encoding"))
+ {
+ if (!white)
+ {
+ error("whitespace required before 'encoding='");
+ }
+ parseEq();
+ encodingName = readLiteral(flags);
+ if (!ignoreEncoding)
+ {
+ setupDecoding(encodingName);
+ }
+ }
+
+ // Try reading a standalone declaration
+ if (encodingName != null)
+ {
+ white = tryWhitespace();
+ }
+ if (tryRead("standalone"))
+ {
+ if (!white)
+ {
+ error("whitespace required before 'standalone='");
+ }
+ parseEq();
+ standalone = readLiteral(flags);
+ if ("yes".equals(standalone))
+ {
+ docIsStandalone = true;
+ }
+ else if (!"no".equals(standalone))
+ {
+ error("standalone flag must be 'yes' or 'no'");
+ }
+ }
+
+ skipWhitespace();
+ require("?>");
+
+ if (inputEncoding == null)
+ {
+ inputEncoding = encodingName;
+ }
+ return encodingName;
+ }
+
+ /**
+ * Parse a text declaration.
+ *
+ * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
+ * [80] EncodingDecl ::= S 'encoding' Eq
+ * ( '"' EncName '"' | "'" EncName "'" )
+ * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+ *
+ * <?xml
' and whitespace have already been read.)
+ * @return the encoding in the declaration, uppercased; or null
+ * @see #parseXMLDecl
+ * @see #setupDecoding
+ */
+ private String parseTextDecl(boolean ignoreEncoding)
+ throws SAXException, IOException
+ {
+ String encodingName = null;
+ int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
+
+ // Read an optional version.
+ if (tryRead ("version"))
+ {
+ String version;
+ parseEq();
+ checkLegalVersion(version = readLiteral(flags));
+
+ if (version.equals("1.1"))
+ {
+ if (xmlVersion == XML_10)
+ {
+ error("external subset has later version number.", "1.0",
+ version);
+ }
+ handler.warn("expected XML version 1.0, not: " + version);
+ xmlVersion = XML_11;
+ }
+ else if (!version.equals("1.0"))
+ {
+ error("illegal XML version", version, "1.0 or 1.1");
+ }
+ requireWhitespace();
+ }
+
+ // Read the encoding.
+ require("encoding");
+ parseEq();
+ encodingName = readLiteral(flags);
+ if (!ignoreEncoding)
+ {
+ setupDecoding(encodingName);
+ }
+ skipWhitespace();
+ require("?>");
+
+ return encodingName;
+ }
+
+ /**
+ * Sets up internal state so that we can decode an entity using the
+ * specified encoding. This is used when we start to read an entity
+ * and we have been given knowledge of its encoding before we start to
+ * read any data (e.g. from a SAX input source or from a MIME type).
+ *
+ *
+ * [27] Misc ::= Comment | PI | S
+ *
+ */
+ private void parseMisc()
+ throws Exception
+ {
+ while (true)
+ {
+ skipWhitespace();
+ if (tryRead(startDelimPI))
+ {
+ parsePI();
+ }
+ else if (tryRead(startDelimComment))
+ {
+ parseComment();
+ }
+ else
+ {
+ return;
+ }
+ }
+ }
+
+ /**
+ * Parse a document type declaration.
+ *
+ * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
+ * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
+ *
+ * <!DOCTYPE
has already been read.)
+ */
+ private void parseDoctypedecl()
+ throws Exception
+ {
+ String rootName;
+ ExternalIdentifiers ids;
+
+ // Read the document type name.
+ requireWhitespace();
+ rootName = readNmtoken(true);
+
+ // Read the External subset's IDs
+ skipWhitespace();
+ ids = readExternalIds(false, true);
+
+ // report (a) declaration of name, (b) lexical info (ids)
+ handler.doctypeDecl(rootName, ids.publicId, ids.systemId);
+
+ // Internal subset is parsed first, if present
+ skipWhitespace();
+ if (tryRead('['))
+ {
+
+ // loop until the subset ends
+ while (true)
+ {
+ doReport = expandPE = true;
+ skipWhitespace();
+ doReport = expandPE = false;
+ if (tryRead(']'))
+ {
+ break; // end of subset
+ }
+ else
+ {
+ // WFC, PEs in internal subset (only between decls)
+ peIsError = expandPE = true;
+ parseMarkupdecl();
+ peIsError = expandPE = false;
+ }
+ }
+ }
+ skipWhitespace();
+ require('>');
+
+ // Read the external subset, if any
+ InputSource subset;
+
+ if (ids.systemId == null)
+ {
+ subset = handler.getExternalSubset(rootName,
+ handler.getSystemId());
+ }
+ else
+ {
+ subset = null;
+ }
+ if (ids.systemId != null || subset != null)
+ {
+ pushString(null, ">");
+
+ // NOTE: [dtd] is so we say what SAX2 expects,
+ // though it's misleading (subset, not entire dtd)
+ if (ids.systemId != null)
+ {
+ pushURL(true, "[dtd]", ids, null, null, null, true);
+ }
+ else
+ {
+ handler.warn("modifying document by adding external subset");
+ pushURL(true, "[dtd]",
+ new ExternalIdentifiers(subset.getPublicId(),
+ subset.getSystemId(),
+ null),
+ subset.getCharacterStream(),
+ subset.getByteStream(),
+ subset.getEncoding(),
+ false);
+ }
+
+ // Loop until we end up back at '>'
+ while (true)
+ {
+ doReport = expandPE = true;
+ skipWhitespace();
+ doReport = expandPE = false;
+ if (tryRead('>'))
+ {
+ break;
+ }
+ else
+ {
+ expandPE = true;
+ parseMarkupdecl();
+ expandPE = false;
+ }
+ }
+
+ // the ">" string isn't popped yet
+ if (inputStack.size() != 1)
+ {
+ error("external subset has unmatched '>'");
+ }
+ }
+
+ // done dtd
+ handler.endDoctype();
+ expandPE = false;
+ doReport = true;
+ }
+
+ /**
+ * Parse a markup declaration in the internal or external DTD subset.
+ *
+ * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
+ * | NotationDecl | PI | Comment
+ * [30] extSubsetDecl ::= (markupdecl | conditionalSect
+ * | PEReference | S) *
+ *
+ *
+ * [39] element ::= EmptyElementTag | STag content ETag
+ * [40] STag ::= '<' Name (S Attribute)* S? '>'
+ * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
+ *
+ *
+ * [41] Attribute ::= Name Eq AttValue
+ *
+ * @param name The name of the attribute's element.
+ * @see SAXDriver#attribute
+ */
+ private void parseAttribute(String name)
+ throws Exception
+ {
+ String aname;
+ String type;
+ String value;
+ int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
+
+ // Read the attribute name.
+ aname = readNmtoken(true);
+ type = getAttributeType(name, aname);
+
+ // Parse '='
+ parseEq();
+
+ // Read the value, normalizing whitespace
+ // unless it is CDATA.
+ if (handler.stringInterning)
+ {
+ if (type == "CDATA" || type == null)
+ {
+ value = readLiteral(flags);
+ }
+ else
+ {
+ value = readLiteral(flags | LIT_NORMALIZE);
+ }
+ }
+ else
+ {
+ if (type == null || type.equals("CDATA"))
+ {
+ value = readLiteral(flags);
+ }
+ else
+ {
+ value = readLiteral(flags | LIT_NORMALIZE);
+ }
+ }
+
+ // WFC: no duplicate attributes
+ for (int i = 0; i < tagAttributePos; i++)
+ {
+ if (aname.equals(tagAttributes [i]))
+ {
+ error("duplicate attribute", aname, null);
+ }
+ }
+
+ // Inform the handler about the
+ // attribute.
+ handler.attribute(aname, value, true);
+ dataBufferPos = 0;
+
+ // Note that the attribute has been
+ // specified.
+ if (tagAttributePos == tagAttributes.length)
+ {
+ String newAttrib[] = new String[tagAttributes.length * 2];
+ System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
+ tagAttributes = newAttrib;
+ }
+ tagAttributes[tagAttributePos++] = aname;
+ }
+
+ /**
+ * Parse an equals sign surrounded by optional whitespace.
+ *
+ * [25] Eq ::= S? '=' S?
+ *
+ */
+ private void parseEq()
+ throws SAXException, IOException
+ {
+ skipWhitespace();
+ require('=');
+ skipWhitespace();
+ }
+
+ /**
+ * Parse an end tag.
+ *
+ * [42] ETag ::= '' Name S? '>'
+ *
+ *
+ * [43] content ::= (element | CharData | Reference
+ * | CDSect | PI | Comment)*
+ * [67] Reference ::= EntityRef | CharRef
+ *
+ *
+ * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
+ *
+ */
+ private void parseContentspec(String name)
+ throws Exception
+ {
+ // FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ...
+ if (tryRead("EMPTY"))
+ {
+ setElement(name, CONTENT_EMPTY, null, null);
+ if (!skippedPE)
+ {
+ handler.getDeclHandler().elementDecl(name, "EMPTY");
+ }
+ return;
+ }
+ else if (tryRead("ANY"))
+ {
+ setElement(name, CONTENT_ANY, null, null);
+ if (!skippedPE)
+ {
+ handler.getDeclHandler().elementDecl(name, "ANY");
+ }
+ return;
+ }
+ else
+ {
+ String model;
+ char[] saved;
+
+ require('(');
+ saved = readBuffer;
+ dataBufferAppend('(');
+ skipWhitespace();
+ if (tryRead("#PCDATA"))
+ {
+ dataBufferAppend("#PCDATA");
+ parseMixed(saved);
+ model = dataBufferToString();
+ setElement(name, CONTENT_MIXED, model, null);
+ }
+ else
+ {
+ parseElements(saved);
+ model = dataBufferToString();
+ setElement(name, CONTENT_ELEMENTS, model, null);
+ }
+ if (!skippedPE)
+ {
+ handler.getDeclHandler().elementDecl(name, model);
+ }
+ }
+ }
+
+ /**
+ * Parse an element-content model.
+ *
+ * [47] elements ::= (choice | seq) ('?' | '*' | '+')?
+ * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')'
+ * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
+ *
+ *
+ *
+ * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
+ *
+ */
+ private void parseCp()
+ throws Exception
+ {
+ if (tryRead('('))
+ {
+ dataBufferAppend('(');
+ parseElements(readBuffer);
+ }
+ else
+ {
+ dataBufferAppend(readNmtoken(true));
+ char c = readCh();
+ switch (c)
+ {
+ case '?':
+ case '*':
+ case '+':
+ dataBufferAppend(c);
+ break;
+ default:
+ unread(c);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Parse mixed content.
+ *
+ * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*'
+ * | '(' S? ('#PCDATA') S? ')'
+ *
+ *
+ * @param saved Buffer for entity that should have the terminal ')'
+ */
+ private void parseMixed(char[] saved)
+ throws Exception
+ {
+ // Check for PCDATA alone.
+ skipWhitespace();
+ if (tryRead(')'))
+ {
+ // VC: Proper Group/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Group/PE nesting");
+ }
+
+ dataBufferAppend(")*");
+ tryRead('*');
+ return;
+ }
+
+ // Parse mixed content.
+ skipWhitespace();
+ while (!tryRead(")"))
+ {
+ require('|');
+ dataBufferAppend('|');
+ skipWhitespace();
+ dataBufferAppend(readNmtoken(true));
+ skipWhitespace();
+ }
+
+ // VC: Proper Group/PE Nesting
+ if (readBuffer != saved)
+ {
+ handler.verror("Illegal Group/PE nesting");
+ }
+
+ require('*');
+ dataBufferAppend(")*");
+ }
+
+ /**
+ * Parse an attribute list declaration.
+ *
+ * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+ *
+ *
+ * [53] AttDef ::= S Name S AttType S DefaultDecl
+ *
+ */
+ private void parseAttDef(String elementName)
+ throws Exception
+ {
+ String name;
+ String type;
+ String enumer = null;
+
+ // Read the attribute name.
+ name = readNmtoken(true);
+
+ // Read the attribute type.
+ requireWhitespace();
+ type = readAttType();
+
+ // Get the string of enumerated values if necessary.
+ if (handler.stringInterning)
+ {
+ if ("ENUMERATION" == type || "NOTATION" == type)
+ {
+ enumer = dataBufferToString();
+ }
+ }
+ else
+ {
+ if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
+ {
+ enumer = dataBufferToString();
+ }
+ }
+
+ // Read the default value.
+ requireWhitespace();
+ parseDefault(elementName, name, type, enumer);
+ }
+
+ /**
+ * Parse the attribute type.
+ *
+ * [54] AttType ::= StringType | TokenizedType | EnumeratedType
+ * [55] StringType ::= 'CDATA'
+ * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
+ * | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
+ * [57] EnumeratedType ::= NotationType | Enumeration
+ *
+ */
+ private String readAttType()
+ throws Exception
+ {
+ if (tryRead('('))
+ {
+ parseEnumeration(false);
+ return "ENUMERATION";
+ }
+ else
+ {
+ String typeString = readNmtoken(true);
+ if (handler.stringInterning)
+ {
+ if ("NOTATION" == typeString)
+ {
+ parseNotationType();
+ return typeString;
+ }
+ else if ("CDATA" == typeString
+ || "ID" == typeString
+ || "IDREF" == typeString
+ || "IDREFS" == typeString
+ || "ENTITY" == typeString
+ || "ENTITIES" == typeString
+ || "NMTOKEN" == typeString
+ || "NMTOKENS" == typeString)
+ {
+ return typeString;
+ }
+ }
+ else
+ {
+ if ("NOTATION".equals(typeString))
+ {
+ parseNotationType();
+ return typeString;
+ }
+ else if ("CDATA".equals(typeString)
+ || "ID".equals(typeString)
+ || "IDREF".equals(typeString)
+ || "IDREFS".equals(typeString)
+ || "ENTITY".equals(typeString)
+ || "ENTITIES".equals(typeString)
+ || "NMTOKEN".equals(typeString)
+ || "NMTOKENS".equals(typeString))
+ {
+ return typeString;
+ }
+ }
+ error("illegal attribute type", typeString, null);
+ return null;
+ }
+ }
+
+ /**
+ * Parse an enumeration.
+ *
+ * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
+ *
+ *
+ * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks
+ * (S? '|' S? name)* S? ')'
+ *
+ *
+ * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
+ * | (('#FIXED' S)? AttValue)
+ *
+ */
+ private void parseDefault(String elementName, String name,
+ String type, String enumer)
+ throws Exception
+ {
+ int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
+ String value = null;
+ int flags = LIT_ATTRIBUTE;
+ boolean saved = expandPE;
+ String defaultType = null;
+
+ // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace
+ // chars to spaces (doesn't matter when that's done if it doesn't
+ // interfere with char refs expanding to whitespace).
+
+ if (!skippedPE)
+ {
+ flags |= LIT_ENTITY_REF;
+ if (handler.stringInterning)
+ {
+ if ("CDATA" != type)
+ {
+ flags |= LIT_NORMALIZE;
+ }
+ }
+ else
+ {
+ if (!"CDATA".equals(type))
+ {
+ flags |= LIT_NORMALIZE;
+ }
+ }
+ }
+
+ expandPE = false;
+ if (tryRead('#'))
+ {
+ if (tryRead("FIXED"))
+ {
+ defaultType = "#FIXED";
+ valueType = ATTRIBUTE_DEFAULT_FIXED;
+ requireWhitespace();
+ value = readLiteral(flags);
+ }
+ else if (tryRead("REQUIRED"))
+ {
+ defaultType = "#REQUIRED";
+ valueType = ATTRIBUTE_DEFAULT_REQUIRED;
+ }
+ else if (tryRead("IMPLIED"))
+ {
+ defaultType = "#IMPLIED";
+ valueType = ATTRIBUTE_DEFAULT_IMPLIED;
+ }
+ else
+ {
+ error("illegal keyword for attribute default value");
+ }
+ }
+ else
+ {
+ value = readLiteral(flags);
+ }
+ expandPE = saved;
+ setAttribute(elementName, name, type, enumer, value, valueType);
+ if (handler.stringInterning)
+ {
+ if ("ENUMERATION" == type)
+ {
+ type = enumer;
+ }
+ else if ("NOTATION" == type)
+ {
+ type = "NOTATION " + enumer;
+ }
+ }
+ else
+ {
+ if ("ENUMERATION".equals(type))
+ {
+ type = enumer;
+ }
+ else if ("NOTATION".equals(type))
+ {
+ type = "NOTATION " + enumer;
+ }
+ }
+ if (!skippedPE)
+ {
+ handler.getDeclHandler().attributeDecl(elementName, name, type,
+ defaultType, value);
+ }
+ }
+
+ /**
+ * Parse a conditional section.
+ *
+ * [61] conditionalSect ::= includeSect || ignoreSect
+ * [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
+ * extSubsetDecl ']]>'
+ * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
+ * ignoreSectContents* ']]>'
+ * [64] ignoreSectContents ::= Ignore
+ * ('<![' ignoreSectContents* ']]>' Ignore )*
+ * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* )
+ *
+ *
+ * [66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
+ *
+ *
+ * [66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
+ *
+ *
+ * [68] EntityRef ::= '&' Name ';'
+ *
+ *
+ * [69] PEReference ::= '%' Name ';'
+ *
+ *
+ * [70] EntityDecl ::= GEDecl | PEDecl
+ * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
+ * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
+ * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
+ * [74] PEDef ::= EntityValue | ExternalID
+ * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
+ * | 'PUBLIC' S PubidLiteral S SystemLiteral
+ * [76] NDataDecl ::= S 'NDATA' S Name
+ *
+ *
+ * [82] NotationDecl ::= '<!NOTATION' S Name S
+ * (ExternalID | PublicID) S? '>'
+ * [83] PublicID ::= 'PUBLIC' S PubidLiteral
+ *
+ *
+ * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
+ *
+ */
+ private void parseCharData()
+ throws Exception
+ {
+ char c;
+ int state = 0;
+ boolean pureWhite = false;
+
+ // assert (dataBufferPos == 0);
+
+ // are we expecting pure whitespace? it might be dirty...
+ if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement)
+ {
+ pureWhite = true;
+ }
+
+ // always report right out of readBuffer
+ // to minimize (pointless) buffer copies
+ while (true)
+ {
+ int lineAugment = 0;
+ int columnAugment = 0;
+ int i;
+
+loop:
+ for (i = readBufferPos; i < readBufferLength; i++)
+ {
+ switch (c = readBuffer[i])
+ {
+ case '\n':
+ lineAugment++;
+ columnAugment = 0;
+ // pureWhite unmodified
+ break;
+ case '\r': // should not happen!!
+ case '\t':
+ case ' ':
+ // pureWhite unmodified
+ columnAugment++;
+ break;
+ case '&':
+ case '<':
+ columnAugment++;
+ // pureWhite unmodified
+ // CLEAN end of text sequence
+ state = 1;
+ break loop;
+ case ']':
+ // that's not a whitespace char, and
+ // can not terminate pure whitespace either
+ pureWhite = false;
+ if ((i + 2) < readBufferLength)
+ {
+ if (readBuffer [i + 1] == ']'
+ && readBuffer [i + 2] == '>')
+ {
+ // ERROR end of text sequence
+ state = 2;
+ break loop;
+ }
+ }
+ else
+ {
+ // FIXME missing two end-of-buffer cases
+ }
+ columnAugment++;
+ break;
+ default:
+ if ((c < 0x0020 || c > 0xFFFD)
+ || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
+ && xmlVersion == XML_11))
+ {
+ error("illegal XML character U+"
+ + Integer.toHexString(c));
+ }
+ // that's not a whitespace char
+ pureWhite = false;
+ columnAugment++;
+ }
+ }
+
+ // report text thus far
+ if (lineAugment > 0)
+ {
+ line += lineAugment;
+ column = columnAugment;
+ }
+ else
+ {
+ column += columnAugment;
+ }
+
+ // report characters/whitspace
+ int length = i - readBufferPos;
+
+ if (length != 0)
+ {
+ if (pureWhite)
+ {
+ handler.ignorableWhitespace(readBuffer,
+ readBufferPos, length);
+ }
+ else
+ {
+ handler.charData(readBuffer, readBufferPos, length);
+ }
+ readBufferPos = i;
+ }
+
+ if (state != 0)
+ {
+ break;
+ }
+
+ // fill next buffer from this entity, or
+ // pop stack and continue with previous entity
+ unread(readCh());
+ }
+ if (!pureWhite)
+ {
+ isDirtyCurrentElement = true;
+ }
+ // finish, maybe with error
+ if (state != 1) // finish, no error
+ {
+ error("character data may not contain ']]>'");
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // High-level reading and scanning methods.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Require whitespace characters.
+ */
+ private void requireWhitespace()
+ throws SAXException, IOException
+ {
+ char c = readCh();
+ if (isWhitespace(c))
+ {
+ skipWhitespace();
+ }
+ else
+ {
+ error("whitespace required", c, null);
+ }
+ }
+
+ /**
+ * Skip whitespace characters.
+ *
+ * [3] S ::= (#x20 | #x9 | #xd | #xa)+
+ *
+ */
+ private void skipWhitespace()
+ throws SAXException, IOException
+ {
+ // Start with a little cheat. Most of
+ // the time, the white space will fall
+ // within the current read buffer; if
+ // not, then fall through.
+ if (USE_CHEATS)
+ {
+ int lineAugment = 0;
+ int columnAugment = 0;
+
+loop:
+ for (int i = readBufferPos; i < readBufferLength; i++)
+ {
+ switch (readBuffer[i])
+ {
+ case ' ':
+ case '\t':
+ case '\r':
+ columnAugment++;
+ break;
+ case '\n':
+ lineAugment++;
+ columnAugment = 0;
+ break;
+ case '%':
+ if (expandPE)
+ {
+ break loop;
+ }
+ // else fall through...
+ default:
+ readBufferPos = i;
+ if (lineAugment > 0)
+ {
+ line += lineAugment;
+ column = columnAugment;
+ }
+ else
+ {
+ column += columnAugment;
+ }
+ return;
+ }
+ }
+ }
+
+ // OK, do it the slow way.
+ char c = readCh ();
+ while (isWhitespace(c))
+ {
+ c = readCh();
+ }
+ unread(c);
+ }
+
+ /**
+ * Read a name or (when parsing an enumeration) name token.
+ *
+ * [5] Name ::= (Letter | '_' | ':') (NameChar)*
+ * [7] Nmtoken ::= (NameChar)+
+ *
+ */
+ private String readNmtoken(boolean isName)
+ throws SAXException, IOException
+ {
+ char c;
+
+ if (USE_CHEATS)
+ {
+loop:
+ for (int i = readBufferPos; i < readBufferLength; i++)
+ {
+ c = readBuffer[i];
+ switch (c)
+ {
+ case '%':
+ if (expandPE)
+ {
+ break loop;
+ }
+ // else fall through...
+
+ // What may legitimately come AFTER a name/nmtoken?
+ case '<': case '>': case '&':
+ case ',': case '|': case '*': case '+': case '?':
+ case ')':
+ case '=':
+ case '\'': case '"':
+ case '[':
+ case ' ': case '\t': case '\r': case '\n':
+ case ';':
+ case '/':
+ int start = readBufferPos;
+ if (i == start)
+ {
+ error("name expected", readBuffer[i], null);
+ }
+ readBufferPos = i;
+ return intern(readBuffer, start, i - start);
+
+ default:
+ // FIXME ... per IBM's OASIS test submission, these:
+ // ? U+06dd
+ // Combining U+309B
+ //these switches are kind of ugly but at least we won't
+ //have to go over the whole lits for each char
+ if (isName && i == readBufferPos)
+ {
+ char c2 = (char) (c & 0x00f0);
+ switch (c & 0xff00)
+ {
+ //starting with 01
+ case 0x0100:
+ switch (c2)
+ {
+ case 0x0030:
+ if (c == 0x0132 || c == 0x0133 || c == 0x013f)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x0040:
+ if (c == 0x0140 || c == 0x0149)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x00c0:
+ if (c == 0x01c4 || c == 0x01cc)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x00f0:
+ if (c == 0x01f1 || c == 0x01f3)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x00b0:
+ if (c == 0x01f1 || c == 0x01f3)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ default:
+ if (c == 0x017f)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ }
+
+ break;
+ //starting with 11
+ case 0x1100:
+ switch (c2)
+ {
+ case 0x0000:
+ if (c == 0x1104 || c == 0x1108 ||
+ c == 0x110a || c == 0x110d)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x0030:
+ if (c == 0x113b || c == 0x113f)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x0040:
+ if (c == 0x1141 || c == 0x114d
+ || c == 0x114f )
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x0050:
+ if (c == 0x1151 || c == 0x1156)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x0060:
+ if (c == 0x1162 || c == 0x1164
+ || c == 0x1166 || c == 0x116b
+ || c == 0x116f)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ case 0x00b0:
+ if (c == 0x11b6 || c == 0x11b9
+ || c == 0x11bb || c == 0x116f)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ break;
+ default:
+ if (c == 0x1174 || c == 0x119f
+ || c == 0x11ac || c == 0x11c3
+ || c == 0x11f1)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ }
+ break;
+ default:
+ if (c == 0x0e46 || c == 0x1011
+ || c == 0x212f || c == 0x0587
+ || c == 0x0230 )
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ }
+ }
+ // punt on exact tests from Appendix A; approximate
+ // them using the Unicode ID start/part rules
+ if (i == readBufferPos && isName)
+ {
+ if (!Character.isUnicodeIdentifierStart(c)
+ && c != ':' && c != '_')
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(c));
+ }
+ }
+ else if (!Character.isUnicodeIdentifierPart(c)
+ && c != '-' && c != ':' && c != '_' && c != '.'
+ && !isExtender(c))
+ {
+ error("Not a name character, U+"
+ + Integer.toHexString(c));
+ }
+ }
+ }
+ }
+
+ nameBufferPos = 0;
+
+ // Read the first character.
+ while (true)
+ {
+ c = readCh();
+ switch (c)
+ {
+ case '%':
+ case '<': case '>': case '&':
+ case ',': case '|': case '*': case '+': case '?':
+ case ')':
+ case '=':
+ case '\'': case '"':
+ case '[':
+ case ' ': case '\t': case '\n': case '\r':
+ case ';':
+ case '/':
+ unread(c);
+ if (nameBufferPos == 0)
+ {
+ error ("name expected");
+ }
+ // punt on exact tests from Appendix A, but approximate them
+ if (isName
+ && !Character.isUnicodeIdentifierStart(nameBuffer[0])
+ && ":_".indexOf(nameBuffer[0]) == -1)
+ {
+ error("Not a name start character, U+"
+ + Integer.toHexString(nameBuffer[0]));
+ }
+ String s = intern(nameBuffer, 0, nameBufferPos);
+ nameBufferPos = 0;
+ return s;
+ default:
+ // punt on exact tests from Appendix A, but approximate them
+
+ if ((nameBufferPos != 0 || !isName)
+ && !Character.isUnicodeIdentifierPart(c)
+ && ":-_.".indexOf(c) == -1
+ && !isExtender(c))
+ {
+ error("Not a name character, U+"
+ + Integer.toHexString(c));
+ }
+ if (nameBufferPos >= nameBuffer.length)
+ {
+ nameBuffer =
+ (char[]) extendArray(nameBuffer,
+ nameBuffer.length, nameBufferPos);
+ }
+ nameBuffer[nameBufferPos++] = c;
+ }
+ }
+ }
+
+ private static boolean isExtender(char c)
+ {
+ // [88] Extender ::= ...
+ return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
+ || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005
+ || (c >= 0x3031 && c <= 0x3035)
+ || (c >= 0x309d && c <= 0x309e)
+ || (c >= 0x30fc && c <= 0x30fe);
+ }
+
+ /**
+ * Read a literal. With matching single or double quotes as
+ * delimiters (and not embedded!) this is used to parse:
+ *
+ * [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ...
+ * [10] AttValue ::= ... ([^<&] | Reference)* ...
+ * [11] SystemLiteral ::= ... (URLchar - "'")* ...
+ * [12] PubidLiteral ::= ... (PubidChar - "'")* ...
+ *
+ * as well as the quoted strings in XML and text declarations
+ * (for version, encoding, and standalone) which have their
+ * own constraints.
+ */
+ private String readLiteral(int flags)
+ throws SAXException, IOException
+ {
+ char delim, c;
+ int startLine = line;
+ boolean saved = expandPE;
+ boolean savedReport = doReport;
+
+ // Find the first delimiter.
+ delim = readCh();
+ if (delim != '"' && delim != '\'')
+ {
+ error("expected '\"' or \"'\"", delim, null);
+ return null;
+ }
+ inLiteral = true;
+ if ((flags & LIT_DISABLE_PE) != 0)
+ {
+ expandPE = false;
+ }
+ doReport = false;
+
+ // Each level of input source has its own buffer; remember
+ // ours, so we won't read the ending delimiter from any
+ // other input source, regardless of entity processing.
+ char[] ourBuf = readBuffer;
+
+ // Read the literal.
+ try
+ {
+ c = readCh();
+ boolean ampRead = false;
+loop:
+ while (! (c == delim && readBuffer == ourBuf))
+ {
+ switch (c)
+ {
+ // attributes and public ids are normalized
+ // in almost the same ways
+ case '\n':
+ case '\r':
+ if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
+ {
+ c = ' ';
+ }
+ break;
+ case '\t':
+ if ((flags & LIT_ATTRIBUTE) != 0)
+ {
+ c = ' ';
+ }
+ break;
+ case '&':
+ c = readCh();
+ // Char refs are expanded immediately, except for
+ // all the cases where it's deferred.
+ if (c == '#')
+ {
+ if ((flags & LIT_DISABLE_CREF) != 0)
+ {
+ dataBufferAppend('&');
+ break;
+ }
+ parseCharRef(false /* Do not do flushDataBuffer */);
+
+ // exotic WFness risk: this is an entity literal,
+ // dataBuffer [dataBufferPos - 1] == '&', and
+ // following chars are a _partial_ entity/char ref
+
+ // It looks like an entity ref ...
+ }
+ else
+ {
+ unread(c);
+ // Expand it?
+ if ((flags & LIT_ENTITY_REF) > 0)
+ {
+ parseEntityRef(false);
+ if (String.valueOf(readBuffer).equals("&"))
+ {
+ ampRead = true;
+ }
+ //Is it just data?
+ }
+ else if ((flags & LIT_DISABLE_EREF) != 0)
+ {
+ dataBufferAppend('&');
+
+ // OK, it will be an entity ref -- expanded later.
+ }
+ else
+ {
+ String name = readNmtoken(true);
+ require(';');
+ dataBufferAppend('&');
+ dataBufferAppend(name);
+ dataBufferAppend(';');
+ }
+ }
+ c = readCh();
+ continue loop;
+
+ case '<':
+ // and why? Perhaps so "&foo;" expands the same
+ // inside and outside an attribute?
+ if ((flags & LIT_ATTRIBUTE) != 0)
+ {
+ error("attribute values may not contain '<'");
+ }
+ break;
+
+ // We don't worry about case '%' and PE refs, readCh does.
+
+ default:
+ break;
+ }
+ dataBufferAppend(c);
+ c = readCh();
+ }
+ }
+ catch (EOFException e)
+ {
+ error("end of input while looking for delimiter (started on line "
+ + startLine + ')', null, Character.toString(delim));
+ }
+ inLiteral = false;
+ expandPE = saved;
+ doReport = savedReport;
+
+ // Normalise whitespace if necessary.
+ if ((flags & LIT_NORMALIZE) > 0)
+ {
+ dataBufferNormalize();
+ }
+
+ // Return the value.
+ return dataBufferToString();
+ }
+
+ /**
+ * Try reading external identifiers.
+ * A system identifier is not required for notations.
+ * @param inNotation Are we parsing a notation decl?
+ * @param isSubset Parsing external subset decl (may be omitted)?
+ * @return A three-member String array containing the identifiers,
+ * or nulls. Order: public, system, baseURI.
+ */
+ private ExternalIdentifiers readExternalIds(boolean inNotation,
+ boolean isSubset)
+ throws Exception
+ {
+ char c;
+ ExternalIdentifiers ids = new ExternalIdentifiers();
+ int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
+
+ if (tryRead("PUBLIC"))
+ {
+ requireWhitespace();
+ ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags);
+ if (inNotation)
+ {
+ skipWhitespace();
+ c = readCh();
+ unread(c);
+ if (c == '"' || c == '\'')
+ {
+ ids.systemId = readLiteral(flags);
+ }
+ }
+ else
+ {
+ requireWhitespace();
+ ids.systemId = readLiteral(flags);
+ }
+
+ for (int i = 0; i < ids.publicId.length(); i++)
+ {
+ c = ids.publicId.charAt(i);
+ if (c >= 'a' && c <= 'z')
+ {
+ continue;
+ }
+ if (c >= 'A' && c <= 'Z')
+ {
+ continue;
+ }
+ if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(c) != -1)
+ {
+ continue;
+ }
+ error("illegal PUBLIC id character U+"
+ + Integer.toHexString(c));
+ }
+ }
+ else if (tryRead("SYSTEM"))
+ {
+ requireWhitespace();
+ ids.systemId = readLiteral(flags);
+ }
+ else if (!isSubset)
+ {
+ error("missing SYSTEM or PUBLIC keyword");
+ }
+
+ if (ids.systemId != null)
+ {
+ if (ids.systemId.indexOf('#') != -1)
+ {
+ handler.verror("SYSTEM id has a URI fragment: " + ids.systemId);
+ }
+ ids.baseUri = handler.getSystemId();
+ if (ids.baseUri == null && uriWarnings)
+ {
+ handler.warn("No base URI; hope URI is absolute: "
+ + ids.systemId);
+ }
+ }
+
+ return ids;
+ }
+
+ /**
+ * Test if a character is whitespace.
+ *
+ * [3] S ::= (#x20 | #x9 | #xd | #xa)+
+ *
+ * @param c The character to test.
+ * @return true if the character is whitespace.
+ */
+ private final boolean isWhitespace(char c)
+ {
+ if (c > 0x20)
+ {
+ return false;
+ }
+ if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d)
+ {
+ return true;
+ }
+ return false; // illegal ...
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // Utility routines.
+ //////////////////////////////////////////////////////////////////////
+
+ /**
+ * Add a character to the data buffer.
+ */
+ private void dataBufferAppend(char c)
+ {
+ // Expand buffer if necessary.
+ if (dataBufferPos >= dataBuffer.length)
+ {
+ dataBuffer = (char[]) extendArray(dataBuffer,
+ dataBuffer.length, dataBufferPos);
+ }
+ dataBuffer[dataBufferPos++] = c;
+ }
+
+ /**
+ * Add a string to the data buffer.
+ */
+ private void dataBufferAppend(String s)
+ {
+ dataBufferAppend(s.toCharArray(), 0, s.length());
+ }
+
+ /**
+ * Append (part of) a character array to the data buffer.
+ */
+ private void dataBufferAppend(char[] ch, int start, int length)
+ {
+ dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length,
+ dataBufferPos + length);
+
+ System.arraycopy(ch, start, dataBuffer, dataBufferPos, length);
+ dataBufferPos += length;
+ }
+
+ /**
+ * Normalise space characters in the data buffer.
+ */
+ private void dataBufferNormalize()
+ {
+ int i = 0;
+ int j = 0;
+ int end = dataBufferPos;
+
+ // Skip spaces at the start.
+ while (j < end && dataBuffer[j] == ' ')
+ {
+ j++;
+ }
+
+ // Skip whitespace at the end.
+ while (end > j && dataBuffer[end - 1] == ' ')
+ {
+ end --;
+ }
+
+ // Start copying to the left.
+ while (j < end)
+ {
+
+ char c = dataBuffer[j++];
+
+ // Normalise all other spaces to
+ // a single space.
+ if (c == ' ')
+ {
+ while (j < end && dataBuffer[j++] == ' ')
+ {
+ continue;
+ }
+ dataBuffer[i++] = ' ';
+ dataBuffer[i++] = dataBuffer[j - 1];
+ }
+ else
+ {
+ dataBuffer[i++] = c;
+ }
+ }
+
+ // The new length is <= the old one.
+ dataBufferPos = i;
+ }
+
+ /**
+ * Convert the data buffer to a string.
+ */
+ private String dataBufferToString()
+ {
+ String s = new String(dataBuffer, 0, dataBufferPos);
+ dataBufferPos = 0;
+ return s;
+ }
+
+ /**
+ * Flush the contents of the data buffer to the handler, as
+ * appropriate, and reset the buffer for new input.
+ */
+ private void dataBufferFlush()
+ throws SAXException
+ {
+ if (currentElementContent == CONTENT_ELEMENTS
+ && dataBufferPos > 0
+ && !inCDATA)
+ {
+ // We can't just trust the buffer to be whitespace, there
+ // are (error) cases when it isn't
+ for (int i = 0; i < dataBufferPos; i++)
+ {
+ if (!isWhitespace(dataBuffer[i]))
+ {
+ handler.charData(dataBuffer, 0, dataBufferPos);
+ dataBufferPos = 0;
+ }
+ }
+ if (dataBufferPos > 0)
+ {
+ handler.ignorableWhitespace(dataBuffer, 0, dataBufferPos);
+ dataBufferPos = 0;
+ }
+ }
+ else if (dataBufferPos > 0)
+ {
+ handler.charData(dataBuffer, 0, dataBufferPos);
+ dataBufferPos = 0;
+ }
+ }
+
+ /**
+ * Require a string to appear, or throw an exception.
+ * ==
instead of String.equals ()
.
+ *
+ *
+ *
+ * @param ename The name of the entity (if any) causing the new input.
+ * @see #popInput
+ * @see #sourceType
+ * @see #externalEntity
+ * @see #readBuffer
+ * @see #readBufferPos
+ * @see #readBufferLength
+ * @see #line
+ * @see #encoding
+ */
+ private void pushInput(String ename)
+ throws SAXException
+ {
+ // Check for entity recursion.
+ if (ename != null)
+ {
+ Iterator entities = entityStack.iterator();
+ while (entities.hasNext())
+ {
+ String e = (String) entities.next();
+ if (e != null && e == ename)
+ {
+ error("recursive reference to entity", ename, null);
+ }
+ }
+ }
+ entityStack.addLast(ename);
+
+ // Don't bother if there is no current input.
+ if (sourceType == INPUT_NONE)
+ {
+ return;
+ }
+
+ // Set up a snapshot of the current
+ // input source.
+ Input input = new Input();
+
+ input.sourceType = sourceType;
+ input.externalEntity = externalEntity;
+ input.readBuffer = readBuffer;
+ input.readBufferPos = readBufferPos;
+ input.readBufferLength = readBufferLength;
+ input.line = line;
+ input.encoding = encoding;
+ input.readBufferOverflow = readBufferOverflow;
+ input.is = is;
+ input.currentByteCount = currentByteCount;
+ input.column = column;
+ input.reader = reader;
+
+ // Push it onto the stack.
+ inputStack.addLast(input);
+ }
+
+ /**
+ * Restore a previous input source.
+ *
+
+
+
+
About Ælfred
+
+Design Principles
+
+
+
+
+
+About the Name Ælfred
+
+Character Encodings
+
+
+
+
+
+
+<?xml version="1.0" encoding="ISO-8859-15"?>
+
+
+java.io.InputStreamReader
+are now fully supported for both external labels (such as MIME types)
+and internal types (as shown above).
+There is one limitation in the support for internal labels:
+the encodings must be derived from the US-ASCII encoding,
+the EBCDIC family of encodings is not recognized.
+Note that Java defines its
+own encoding names, which don't always correspond to the standard
+Internet encoding names defined by the IETF/IANA, and that Java
+may even require use of nonstandard encoding names.
+Please report
+such problems; some of them can be worked around in this parser,
+and many can be worked around by using external labels.
+Known Conformance Violations
+
+
+
+
+
+Copyright and distribution terms
+
+
+ Linking this library statically or dynamically with other modules is
+ making a combined work based on this library. Thus, the terms and
+ conditions of the GNU General Public License cover the whole
+ combination.
+
+ As a special exception, the copyright holders of this library give you
+ permission to link this library with independent modules to produce an
+ executable, regardless of the license terms of these independent
+ modules, and to copy and distribute the resulting executable under
+ terms of your choice, provided that you also meet, for each linked
+ independent module, the terms and conditions of the license of that
+ module. An independent module is a module which is not derived from
+ or based on this library. If you modify this library, you may extend
+ this exception to your version of the library, but you are not
+ obligated to do so. If you do not wish to do so, delete this
+ exception statement from your version.
+
+ Parts derived from code which carried the following notice:
+
+ Copyright (c) 1997, 1998 by Microstar Software Ltd.
+
+ AElfred is free for both commercial and non-commercial use and
+ redistribution, provided that Microstar's copyright and disclaimer are
+ retained intact. You are free to modify AElfred for your own use and
+ to redistribute AElfred with your modifications, provided that the
+ modifications are clearly documented.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ merchantability or fitness for a particular purpose. Please use it AT
+ YOUR OWN RISK.
+
+
+
As noted above, Microstar has not updated this parser since +the summer of 1998, when it released version 1.2a on its web site. +This release is intended to benefit the developer community by +refocusing the API on SAX2, and improving conformance to the extent +that most developers should not need to use another XML parser.
+ +The code has been cleaned up (referring to the XML 1.0 spec in +all the production numbers in +comments, rather than some preliminary draft, for one example) and +has been sped up a bit as well. +JAXP support has been added, although developers are still +strongly encouraged to use the SAX2 APIs directly.
+ + +The original version of Ælfred did not support the +SAX2 APIs.
+ +This version supports the SAX2 APIs, exposing the standard +boolean feature descriptors. It supports the "DeclHandler" property +to provide access to all DTD declarations not already exposed +through the SAX1 API. The "LexicalHandler" property is supported, +exposing entity boundaries (including the unnamed external subset) and +things like comments and CDATA boundaries. SAX1 compatibility is +currently provided.
+ + +In the 'pipeline' package in this same software distribution is an +XML Validation component +using any full SAX2 event stream (including all document type declarations) +to validate. There is now a XmlReader class +which combines that class and this enhanced Ælfred parser, creating +an optionally validating SAX2 parser.
+ +As noted in the documentation for that validating component, certain +validity constraints can't reliably be tested by a layered validator. +These include all constraints relying on +layering violations (exposing XML at the level of tokens or below, +required since XML isn't a context-free grammar), some that +SAX2 doesn't support, and a few others. The resulting validating +parser is conformant enough for most applications that aren't doing +strange SGML tricks with DTDs. +Moreover, that validating filter can be used without +a parser ... any application component that emits SAX event streams +can DTD-validate its output on demand.
+ +You'll have noticed that the original version of Ælfred +had small size as a top goal. Ælfred2 normally includes a +DTD validation layer, but you can package without that. +Similarly, JAXP factory support is available but optional. +Then the main added cost due to this revision are for +supporting the SAX2 API itself; DTD validation is as +cleanly layered as allowed by SAX2.
+ +Bugs fixed in Ælfred2 include:
+ +Other bugs may also have been fixed.
+ +For better overall validation support, some of the validity +constraints that can't be verified using the SAX2 event stream +are now reported directly by Ælfred2.
+ + + -- cgit v1.2.3