/* XMLParser.java -- Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Classpath; see the file COPYING. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Linking this library statically or dynamically with other modules is making a combined work based on this library. Thus, the terms and conditions of the GNU General Public License cover the whole combination. As a special exception, the copyright holders of this library give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify this library, you may extend this exception to your version of the library, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. Partly derived from code which carried the following notice: Copyright (c) 1997, 1998 by Microstar Software Ltd. AElfred is free for both commercial and non-commercial use and redistribution, provided that Microstar's copyright and disclaimer are retained intact. You are free to modify AElfred for your own use and to redistribute AElfred with your modifications, provided that the modifications are clearly documented. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of merchantability or fitness for a particular purpose. Please use it AT YOUR OWN RISK. */ package gnu.xml.stream; import gnu.java.lang.CPStringBuilder; import java.io.BufferedInputStream; import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.Map; import java.util.NoSuchElementException; import java.util.StringTokenizer; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; import javax.xml.namespace.QName; import javax.xml.stream.Location; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLReporter; import javax.xml.stream.XMLResolver; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import gnu.java.net.CRLFInputStream; import gnu.classpath.debug.TeeInputStream; import gnu.classpath.debug.TeeReader; /** * An XML parser. * This parser supports the following additional StAX properties: * * * * * * * * * * *

gnu.xml.stream.stringInterning	Boolean	Indicates whether markup strings will be interned
gnu.xml.stream.xmlBase	Boolean	Indicates whether XML Base processing will be performed
gnu.xml.stream.baseURI	String	Returns the base URI of the current event

* * @see http://www.w3.org/TR/REC-xml/ * @see http://www.w3.org/TR/xml11/ * @see http://www.w3.org/TR/REC-xml-names * @see http://www.w3.org/TR/xml-names11 * @see http://www.w3.org/TR/xmlbase/ * * @author Chris Burdess */ public class XMLParser implements XMLStreamReader, NamespaceContext { // -- parser state machine states -- private static final int INIT = 0; // start state private static final int PROLOG = 1; // in prolog private static final int CONTENT = 2; // in content private static final int EMPTY_ELEMENT = 3; // empty element state private static final int MISC = 4; // in Misc (after root element) // -- parameters for parsing literals -- private final static int LIT_ENTITY_REF = 2; private final static int LIT_NORMALIZE = 4; private final static int LIT_ATTRIBUTE = 8; private final static int LIT_DISABLE_PE = 16; private final static int LIT_DISABLE_CREF = 32; private final static int LIT_DISABLE_EREF = 64; private final static int LIT_PUBID = 256; // -- types of attribute values -- final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; final static int ATTRIBUTE_DEFAULT_FIXED = 34; // -- additional event types -- final static int START_ENTITY = 50; final static int END_ENTITY = 51; /** * The current input. */ private Input input; /** * Stack of inputs representing XML general entities. * The input representing the XML input stream or reader is always the * first element in this stack. */ private LinkedList inputStack = new LinkedList(); /** * Stack of start-entity events to be reported. */ private LinkedList startEntityStack = new LinkedList(); /** * Stack of end-entity events to be reported. */ private LinkedList endEntityStack = new LinkedList(); /** * Current parser state within the main state machine. */ private int state = INIT; /** * The (type of the) current event. */ private int event; /** * The element name stack. The first element in this stack will be the * root element. */ private LinkedList stack = new LinkedList(); /** * Stack of namespace contexts. These are maps specifying prefix-to-URI * mappings. The first element in this stack is the most recent namespace * context (i.e. the other way around from the element name stack). */ private LinkedList namespaces = new LinkedList(); /** * The base-URI stack. This holds the base URI context for each element. * The first element in this stack is the most recent context (i.e. the * other way around from the element name stack). */ private LinkedList bases = new LinkedList(); /** * The list of attributes for the current element, in the order defined in * the XML stream. */ private ArrayList attrs = new ArrayList(); /** * Buffer for text and character data. */ private StringBuffer buf = new StringBuffer(); /** * Buffer for NMTOKEN strings (markup). */ private StringBuffer nmtokenBuf = new StringBuffer(); /** * Buffer for string literals. (e.g. attribute values) */ private StringBuffer literalBuf = new StringBuffer(); /** * Temporary Unicode character buffer used during character data reads. */ private int[] tmpBuf = new int[1024]; /** * The element content model for the current element. */ private ContentModel currentContentModel; /** * The validation stack. This holds lists of the elements seen for each * element, in order to determine whether the names and order of these * elements match the content model for the element. The last entry in * this stack represents the current element. */ private LinkedList validationStack; /** * These sets contain the IDs and the IDREFs seen in the document, to * ensure that IDs are unique and that each IDREF refers to an ID in the * document. */ private HashSet ids, idrefs; /** * The target and data associated with the current processing instruction * event. */ private String piTarget, piData; /** * The XML version declared in the XML declaration. */ private String xmlVersion; /** * The encoding declared in the XML declaration. */ private String xmlEncoding; /** * The standalone value declared in the XML declaration. */ private Boolean xmlStandalone; /** * The document type definition. */ Doctype doctype; /** * State variables for determining parameter-entity expansion. */ private boolean expandPE, peIsError; /** * Whether this is a validating parser. */ private final boolean validating; /** * Whether strings representing markup will be interned. */ private final boolean stringInterning; /** * If true, CDATA sections will be merged with adjacent text nodes into a * single event. */ private final boolean coalescing; /** * Whether to replace general entity references with their replacement * text automatically during parsing. * Otherwise entity-reference events will be issued. */ private final boolean replaceERefs; /** * Whether to support external entities. */ private final boolean externalEntities; /** * Whether to support DTDs. */ private final boolean supportDTD; /** * Whether to support XML namespaces. If true, namespace information will * be available. Otherwise namespaces will simply be reported as ordinary * attributes. */ private final boolean namespaceAware; /** * Whether to support XML Base. If true, URIs specified in xml:base * attributes will be honoured when resolving external entities. */ private final boolean baseAware; /** * Whether to report extended event types (START_ENTITY and END_ENTITY) * in addition to the standard event types. Used by the SAX parser. */ private final boolean extendedEventTypes; /** * The reporter to receive parsing warnings. */ final XMLReporter reporter; /** * Callback interface for resolving external entities. */ final XMLResolver resolver; // -- Constants for testing the next kind of markup event -- private static final String TEST_START_ELEMENT = "<"; private static final String TEST_END_ELEMENT = ""; private static final String TEST_END_COMMENT = "--"; private static final String TEST_END_PI = "?>"; private static final String TEST_END_CDATA = "]]>"; /** * The general entities predefined by the XML specification. */ private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap(); static { PREDEFINED_ENTITIES.put("amp", "&"); PREDEFINED_ENTITIES.put("lt", "<"); PREDEFINED_ENTITIES.put("gt", ">"); PREDEFINED_ENTITIES.put("apos", "'"); PREDEFINED_ENTITIES.put("quot", "\""); } /** * Creates a new XML parser for the given input stream. * This constructor should be used where possible, as it allows the * encoding of the XML data to be correctly determined from the stream. * @param in the input stream * @param systemId the URL from which the input stream was retrieved * (necessary if there are external entities to be resolved) * @param validating if the parser is to be a validating parser * @param namespaceAware if the parser should support XML Namespaces * @param coalescing if CDATA sections should be merged into adjacent text * nodes * @param replaceERefs if entity references should be automatically * replaced by their replacement text (otherwise they will be reported as * entity-reference events) * @param externalEntities if external entities should be loaded * @param supportDTD if support for the XML DTD should be enabled * @param baseAware if the parser should support XML Base to resolve * external entities * @param stringInterning whether strings will be interned during parsing * @param reporter the reporter to receive warnings during processing * @param resolver the callback interface used to resolve external * entities */ public XMLParser(InputStream in, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver) { this.validating = validating; this.namespaceAware = namespaceAware; this.coalescing = coalescing; this.replaceERefs = replaceERefs; this.externalEntities = externalEntities; this.supportDTD = supportDTD; this.baseAware = baseAware; this.stringInterning = stringInterning; this.extendedEventTypes = extendedEventTypes; this.reporter = reporter; this.resolver = resolver; if (validating) { validationStack = new LinkedList(); ids = new HashSet(); idrefs = new HashSet(); } String debug = System.getProperty("gnu.xml.debug.input"); if (debug != null) { try { File file = File.createTempFile(debug, ".xml"); in = new TeeInputStream(in, new FileOutputStream(file)); } catch (IOException e) { RuntimeException e2 = new RuntimeException(); e2.initCause(e); throw e2; } } systemId = canonicalize(systemId); pushInput(new Input(in, null, null, systemId, null, null, false, true)); } /** * Creates a new XML parser for the given character stream. * This constructor is only available for compatibility with the JAXP * APIs, which permit XML to be parsed from a character stream. Because * the encoding specified by the character stream may conflict with that * specified in the XML declaration, this method should be avoided where * possible. * @param in the input stream * @param systemId the URL from which the input stream was retrieved * (necessary if there are external entities to be resolved) * @param validating if the parser is to be a validating parser * @param namespaceAware if the parser should support XML Namespaces * @param coalescing if CDATA sections should be merged into adjacent text * nodes * @param replaceERefs if entity references should be automatically * replaced by their replacement text (otherwise they will be reported as * entity-reference events) * @param externalEntities if external entities should be loaded * @param supportDTD if support for the XML DTD should be enabled * @param baseAware if the parser should support XML Base to resolve * external entities * @param stringInterning whether strings will be interned during parsing * @param reporter the reporter to receive warnings during processing * @param resolver the callback interface used to resolve external * entities */ public XMLParser(Reader reader, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver) { this.validating = validating; this.namespaceAware = namespaceAware; this.coalescing = coalescing; this.replaceERefs = replaceERefs; this.externalEntities = externalEntities; this.supportDTD = supportDTD; this.baseAware = baseAware; this.stringInterning = stringInterning; this.extendedEventTypes = extendedEventTypes; this.reporter = reporter; this.resolver = resolver; if (validating) { validationStack = new LinkedList(); ids = new HashSet(); idrefs = new HashSet(); } String debug = System.getProperty("gnu.xml.debug.input"); if (debug != null) { try { File file = File.createTempFile(debug, ".xml"); reader = new TeeReader(reader, new FileWriter(file)); } catch (IOException e) { RuntimeException e2 = new RuntimeException(); e2.initCause(e); throw e2; } } systemId = canonicalize(systemId); pushInput(new Input(null, reader, null, systemId, null, null, false, true)); } // -- NamespaceContext -- public String getNamespaceURI(String prefix) { if (XMLConstants.XML_NS_PREFIX.equals(prefix)) return XMLConstants.XML_NS_URI; if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix)) return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; for (Iterator i = namespaces.iterator(); i.hasNext(); ) { LinkedHashMap ctx = (LinkedHashMap) i.next(); String namespaceURI = (String) ctx.get(prefix); if (namespaceURI != null) return namespaceURI; } return null; } public String getPrefix(String namespaceURI) { if (XMLConstants.XML_NS_URI.equals(namespaceURI)) return XMLConstants.XML_NS_PREFIX; if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) return XMLConstants.XMLNS_ATTRIBUTE; for (Iterator i = namespaces.iterator(); i.hasNext(); ) { LinkedHashMap ctx = (LinkedHashMap) i.next(); if (ctx.containsValue(namespaceURI)) { for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); String uri = (String) entry.getValue(); if (uri.equals(namespaceURI)) return (String) entry.getKey(); } } } return null; } public Iterator getPrefixes(String namespaceURI) { if (XMLConstants.XML_NS_URI.equals(namespaceURI)) return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator(); if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator(); LinkedList acc = new LinkedList(); for (Iterator i = namespaces.iterator(); i.hasNext(); ) { LinkedHashMap ctx = (LinkedHashMap) i.next(); if (ctx.containsValue(namespaceURI)) { for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); String uri = (String) entry.getValue(); if (uri.equals(namespaceURI)) acc.add(entry.getKey()); } } } return acc.iterator(); } // -- XMLStreamReader -- public void close() throws XMLStreamException { stack = null; namespaces = null; bases = null; buf = null; attrs = null; doctype = null; inputStack = null; validationStack = null; ids = null; idrefs = null; } public NamespaceContext getNamespaceContext() { return this; } public int getAttributeCount() { return attrs.size(); } public String getAttributeLocalName(int index) { Attribute a = (Attribute) attrs.get(index); return a.localName; } public String getAttributeNamespace(int index) { String prefix = getAttributePrefix(index); return getNamespaceURI(prefix); } public String getAttributePrefix(int index) { Attribute a = (Attribute) attrs.get(index); return a.prefix; } public QName getAttributeName(int index) { Attribute a = (Attribute) attrs.get(index); String namespaceURI = getNamespaceURI(a.prefix); return new QName(namespaceURI, a.localName, a.prefix); } public String getAttributeType(int index) { Attribute a = (Attribute) attrs.get(index); return a.type; } private String getAttributeType(String elementName, String attName) { if (doctype != null) { AttributeDecl att = doctype.getAttributeDecl(elementName, attName); if (att != null) return att.type; } return "CDATA"; } public String getAttributeValue(int index) { Attribute a = (Attribute) attrs.get(index); return a.value; } public String getAttributeValue(String namespaceURI, String localName) { for (Iterator i = attrs.iterator(); i.hasNext(); ) { Attribute a = (Attribute) i.next(); if (a.localName.equals(localName)) { String uri = getNamespaceURI(a.prefix); if ((uri == null && namespaceURI == null) || (uri != null && uri.equals(namespaceURI))) return a.value; } } return null; } boolean isAttributeDeclared(int index) { if (doctype == null) return false; Attribute a = (Attribute) attrs.get(index); String qn = ("".equals(a.prefix)) ? a.localName : a.prefix + ":" + a.localName; String elementName = buf.toString(); return doctype.isAttributeDeclared(elementName, qn); } public String getCharacterEncodingScheme() { return xmlEncoding; } public String getElementText() throws XMLStreamException { if (event != XMLStreamConstants.START_ELEMENT) throw new XMLStreamException("current event must be START_ELEMENT"); CPStringBuilder elementText = new CPStringBuilder(); int depth = stack.size(); while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth) { switch (next()) { case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.SPACE: elementText.append(buf.toString()); } } return elementText.toString(); } public String getEncoding() { return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding; } public int getEventType() { return event; } public String getLocalName() { switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: String qName = buf.toString(); int ci = qName.indexOf(':'); String localName = (ci == -1) ? qName : qName.substring(ci + 1); if (stringInterning) localName = localName.intern(); return localName; default: return null; } } public Location getLocation() { return input; } public QName getName() { switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: String qName = buf.toString(); int ci = qName.indexOf(':'); String localName = (ci == -1) ? qName : qName.substring(ci + 1); if (stringInterning) localName = localName.intern(); String prefix = (ci == -1) ? (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : qName.substring(0, ci); if (stringInterning && prefix != null) prefix = prefix.intern(); String namespaceURI = getNamespaceURI(prefix); return new QName(namespaceURI, localName, prefix); default: return null; } } public int getNamespaceCount() { if (!namespaceAware || namespaces.isEmpty()) return 0; switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); return ctx.size(); default: return 0; } } public String getNamespacePrefix(int index) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); int count = 0; for (Iterator i = ctx.keySet().iterator(); i.hasNext(); ) { String prefix = (String) i.next(); if (count++ == index) return prefix; } return null; } public String getNamespaceURI() { switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: String qName = buf.toString(); int ci = qName.indexOf(':'); if (ci == -1) return null; String prefix = qName.substring(0, ci); return getNamespaceURI(prefix); default: return null; } } public String getNamespaceURI(int index) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); int count = 0; for (Iterator i = ctx.values().iterator(); i.hasNext(); ) { String uri = (String) i.next(); if (count++ == index) return uri; } return null; } public String getPIData() { return piData; } public String getPITarget() { return piTarget; } public String getPrefix() { switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: String qName = buf.toString(); int ci = qName.indexOf(':'); String prefix = (ci == -1) ? (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : qName.substring(0, ci); if (stringInterning && prefix != null) prefix = prefix.intern(); return prefix; default: return null; } } public Object getProperty(String name) throws IllegalArgumentException { if (name == null) throw new IllegalArgumentException("name is null"); if (XMLInputFactory.ALLOCATOR.equals(name)) return null; if (XMLInputFactory.IS_COALESCING.equals(name)) return coalescing ? Boolean.TRUE : Boolean.FALSE; if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name)) return namespaceAware ? Boolean.TRUE : Boolean.FALSE; if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name)) return replaceERefs ? Boolean.TRUE : Boolean.FALSE; if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name)) return externalEntities ? Boolean.TRUE : Boolean.FALSE; if (XMLInputFactory.IS_VALIDATING.equals(name)) return Boolean.FALSE; if (XMLInputFactory.REPORTER.equals(name)) return reporter; if (XMLInputFactory.RESOLVER.equals(name)) return resolver; if (XMLInputFactory.SUPPORT_DTD.equals(name)) return supportDTD ? Boolean.TRUE : Boolean.FALSE; if ("gnu.xml.stream.stringInterning".equals(name)) return stringInterning ? Boolean.TRUE : Boolean.FALSE; if ("gnu.xml.stream.xmlBase".equals(name)) return baseAware ? Boolean.TRUE : Boolean.FALSE; if ("gnu.xml.stream.baseURI".equals(name)) return getXMLBase(); return null; } public String getText() { return buf.toString(); } public char[] getTextCharacters() { return buf.toString().toCharArray(); } public int getTextCharacters(int sourceStart, char[] target, int targetStart, int length) throws XMLStreamException { length = Math.min(sourceStart + buf.length(), length); int sourceEnd = sourceStart + length; buf.getChars(sourceStart, sourceEnd, target, targetStart); return length; } public int getTextLength() { return buf.length(); } public int getTextStart() { return 0; } public String getVersion() { return (xmlVersion == null) ? "1.0" : xmlVersion; } public boolean hasName() { switch (event) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: return true; default: return false; } } public boolean hasText() { switch (event) { case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.SPACE: return true; default: return false; } } public boolean isAttributeSpecified(int index) { Attribute a = (Attribute) attrs.get(index); return a.specified; } public boolean isCharacters() { return (event == XMLStreamConstants.CHARACTERS); } public boolean isEndElement() { return (event == XMLStreamConstants.END_ELEMENT); } public boolean isStandalone() { return Boolean.TRUE.equals(xmlStandalone); } public boolean isStartElement() { return (event == XMLStreamConstants.START_ELEMENT); } public boolean isWhiteSpace() { return (event == XMLStreamConstants.SPACE); } public int nextTag() throws XMLStreamException { do { switch (next()) { case XMLStreamConstants.START_ELEMENT: case XMLStreamConstants.END_ELEMENT: case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.SPACE: case XMLStreamConstants.COMMENT: case XMLStreamConstants.PROCESSING_INSTRUCTION: break; default: throw new XMLStreamException("Unexpected event type: " + event); } } while (event != XMLStreamConstants.START_ELEMENT && event != XMLStreamConstants.END_ELEMENT); return event; } public void require(int type, String namespaceURI, String localName) throws XMLStreamException { if (event != type) throw new XMLStreamException("Current event type is " + event); if (event == XMLStreamConstants.START_ELEMENT || event == XMLStreamConstants.END_ELEMENT) { String ln = getLocalName(); if (!ln.equals(localName)) throw new XMLStreamException("Current local-name is " + ln); String uri = getNamespaceURI(); if ((uri == null && namespaceURI != null) || (uri != null && !uri.equals(namespaceURI))) throw new XMLStreamException("Current namespace URI is " + uri); } } public boolean standaloneSet() { return (xmlStandalone != null); } public boolean hasNext() throws XMLStreamException { return (event != XMLStreamConstants.END_DOCUMENT && event != -1); } public int next() throws XMLStreamException { if (event == XMLStreamConstants.END_ELEMENT) { // Pop namespace context if (namespaceAware && !namespaces.isEmpty()) namespaces.removeFirst(); // Pop base context if (baseAware && !bases.isEmpty()) bases.removeFirst(); } if (!startEntityStack.isEmpty()) { String entityName = (String) startEntityStack.removeFirst(); buf.setLength(0); buf.append(entityName); event = START_ENTITY; return extendedEventTypes ? event : next(); } else if (!endEntityStack.isEmpty()) { String entityName = (String) endEntityStack.removeFirst(); buf.setLength(0); buf.append(entityName); event = END_ENTITY; return extendedEventTypes ? event : next(); } try { if (!input.initialized) input.init(); switch (state) { case CONTENT: if (tryRead(TEST_END_ELEMENT)) { readEndElement(); if (stack.isEmpty()) state = MISC; event = XMLStreamConstants.END_ELEMENT; } else if (tryRead(TEST_COMMENT)) { readComment(false); event = XMLStreamConstants.COMMENT; } else if (tryRead(TEST_PI)) { readPI(false); event = XMLStreamConstants.PROCESSING_INSTRUCTION; } else if (tryRead(TEST_CDATA)) { readCDSect(); event = XMLStreamConstants.CDATA; } else if (tryRead(TEST_START_ELEMENT)) { state = readStartElement(); event = XMLStreamConstants.START_ELEMENT; } else { // Check for character reference or predefined entity mark(8); int c = readCh(); if (c == 0x26) // '&' { c = readCh(); if (c == 0x23) // '#' { reset(); event = readCharData(null); } else { // entity reference reset(); readCh(); // & readReference(); String ref = buf.toString(); String text = (String) PREDEFINED_ENTITIES.get(ref); if (text != null) { event = readCharData(text); } else if (replaceERefs && !isUnparsedEntity(ref)) { // this will report a start-entity event boolean external = false; if (doctype != null) { Object entity = doctype.getEntity(ref); if (entity instanceof ExternalIds) external = true; } expandEntity(ref, false, external); event = next(); } else { event = XMLStreamConstants.ENTITY_REFERENCE; } } } else { reset(); event = readCharData(null); if (validating && doctype != null) validatePCData(buf.toString()); } } break; case EMPTY_ELEMENT: String elementName = (String) stack.removeLast(); buf.setLength(0); buf.append(elementName); state = stack.isEmpty() ? MISC : CONTENT; event = XMLStreamConstants.END_ELEMENT; if (validating && doctype != null) endElementValidationHook(); break; case INIT: // XMLDecl? if (tryRead(TEST_XML_DECL)) readXMLDecl(); input.finalizeEncoding(); event = XMLStreamConstants.START_DOCUMENT; state = PROLOG; break; case PROLOG: // Misc* (doctypedecl Misc*)? skipWhitespace(); if (doctype == null && tryRead(TEST_DOCTYPE_DECL)) { readDoctypeDecl(); event = XMLStreamConstants.DTD; } else if (tryRead(TEST_COMMENT)) { readComment(false); event = XMLStreamConstants.COMMENT; } else if (tryRead(TEST_PI)) { readPI(false); event = XMLStreamConstants.PROCESSING_INSTRUCTION; } else if (tryRead(TEST_START_ELEMENT)) { state = readStartElement(); event = XMLStreamConstants.START_ELEMENT; } else { int c = readCh(); error("no root element: U+" + Integer.toHexString(c)); } break; case MISC: // Comment | PI | S skipWhitespace(); if (tryRead(TEST_COMMENT)) { readComment(false); event = XMLStreamConstants.COMMENT; } else if (tryRead(TEST_PI)) { readPI(false); event = XMLStreamConstants.PROCESSING_INSTRUCTION; } else { if (event == XMLStreamConstants.END_DOCUMENT) throw new NoSuchElementException(); int c = readCh(); if (c != -1) error("Only comments and PIs may appear after " + "the root element"); event = XMLStreamConstants.END_DOCUMENT; } break; default: event = -1; } return event; } catch (IOException e) { XMLStreamException e2 = new XMLStreamException(); e2.initCause(e); throw e2; } } // package private /** * Returns the current element name. */ String getCurrentElement() { return (String) stack.getLast(); } // private private void mark(int limit) throws IOException { input.mark(limit); } private void reset() throws IOException { input.reset(); } private int read() throws IOException { return input.read(); } private int read(int[] b, int off, int len) throws IOException { return input.read(b, off, len); } /** * Parsed character read. */ private int readCh() throws IOException, XMLStreamException { int c = read(); if (expandPE && c == 0x25) // '%' { if (peIsError) error("PE reference within decl in internal subset."); expandPEReference(); return readCh(); } return c; } /** * Reads the next character, ensuring it is the character specified. * @param delim the character to match * @exception XMLStreamException if the next character is not the * specified one */ private void require(char delim) throws IOException, XMLStreamException { mark(1); int c = readCh(); if (delim != c) { reset(); error("required character (got U+" + Integer.toHexString(c) + ")", new Character(delim)); } } /** * Reads the next few characters, ensuring they match the string specified. * @param delim the string to match * @exception XMLStreamException if the next characters do not match the * specified string */ private void require(String delim) throws IOException, XMLStreamException { char[] chars = delim.toCharArray(); int len = chars.length; mark(len); int off = 0; do { int l2 = read(tmpBuf, off, len - off); if (l2 == -1) { reset(); error("EOF before required string", delim); } off += l2; } while (off < len); for (int i = 0; i < chars.length; i++) { if (chars[i] != tmpBuf[i]) { reset(); error("required string", delim); } } } /** * Try to read a single character. On failure, reset the stream. * @param delim the character to test * @return true if the character matched delim, false otherwise. */ private boolean tryRead(char delim) throws IOException, XMLStreamException { mark(1); int c = readCh(); if (delim != c) { reset(); return false; } return true; } /** * Tries to read the specified characters. * If successful, the stream is positioned after the last character, * otherwise it is reset. * @param test the string to test * @return true if the characters matched the test string, false otherwise. */ private boolean tryRead(String test) throws IOException { char[] chars = test.toCharArray(); int len = chars.length; mark(len); int count = 0; int l2 = read(tmpBuf, 0, len); if (l2 == -1) { reset(); return false; } count += l2; // check the characters we received first before doing additional reads for (int i = 0; i < count; i++) { if (chars[i] != tmpBuf[i]) { reset(); return false; } } while (count < len) { // force read int c = read(); if (c == -1) { reset(); return false; } tmpBuf[count] = (char) c; // check each character as it is read if (chars[count] != tmpBuf[count]) { reset(); return false; } count++; } return true; } /** * Reads characters until the specified test string is encountered. * @param delim the string delimiting the end of the characters */ private void readUntil(String delim) throws IOException, XMLStreamException { int startLine = input.line; try { while (!tryRead(delim)) { int c = readCh(); if (c == -1) throw new EOFException(); else if (input.xml11) { if (!isXML11Char(c) || isXML11RestrictedChar(c)) error("illegal XML 1.1 character", "U+" + Integer.toHexString(c)); } else if (!isChar(c)) error("illegal XML character", "U+" + Integer.toHexString(c)); buf.append(Character.toChars(c)); } } catch (EOFException e) { error("end of input while looking for delimiter "+ "(started on line " + startLine + ')', delim); } } /** * Reads any whitespace characters. * @return true if whitespace characters were read, false otherwise */ private boolean tryWhitespace() throws IOException, XMLStreamException { boolean white; boolean ret = false; do { mark(1); int c = readCh(); while (c == -1 && inputStack.size() > 1) { popInput(); c = readCh(); } white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); if (white) ret = true; } while (white); reset(); return ret; } /** * Skip over any whitespace characters. */ private void skipWhitespace() throws IOException, XMLStreamException { boolean white; do { mark(1); int c = readCh(); while (c == -1 && inputStack.size() > 1) { popInput(); c = readCh(); } white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); } while (white); reset(); } /** * Try to read as many whitespace characters as are available. * @exception XMLStreamException if no whitespace characters were seen */ private void requireWhitespace() throws IOException, XMLStreamException { if (!tryWhitespace()) error("whitespace required"); } /** * Returns the current base URI for resolving external entities. */ String getXMLBase() { if (baseAware) { for (Iterator i = bases.iterator(); i.hasNext(); ) { String base = (String) i.next(); if (base != null) return base; } } return input.systemId; } /** * Push the specified text input source. */ private void pushInput(String name, String text, boolean report, boolean normalize) throws IOException, XMLStreamException { // Check for recursion if (name != null && !"".equals(name)) { for (Iterator i = inputStack.iterator(); i.hasNext(); ) { Input ctx = (Input) i.next(); if (name.equals(ctx.name)) error("entities may not be self-recursive", name); } } else report = false; pushInput(new Input(null, new StringReader(text), input.publicId, input.systemId, name, input.inputEncoding, report, normalize)); } /** * Push the specified external input source. */ private void pushInput(String name, ExternalIds ids, boolean report, boolean normalize) throws IOException, XMLStreamException { if (!externalEntities) return; String url = canonicalize(absolutize(input.systemId, ids.systemId)); // Check for recursion for (Iterator i = inputStack.iterator(); i.hasNext(); ) { Input ctx = (Input) i.next(); if (url.equals(ctx.systemId)) error("entities may not be self-recursive", url); if (name != null && !"".equals(name) && name.equals(ctx.name)) error("entities may not be self-recursive", name); } if (name == null || "".equals(name)) report = false; InputStream in = null; if (resolver != null) { Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(), null); if (obj instanceof InputStream) in = (InputStream) obj; } if (in == null) in = resolve(url); if (in == null) error("unable to resolve external entity", (ids.systemId != null) ? ids.systemId : ids.publicId); pushInput(new Input(in, null, ids.publicId, url, name, null, report, normalize)); input.init(); if (tryRead(TEST_XML_DECL)) readTextDecl(); input.finalizeEncoding(); } /** * Push the specified input source (general entity) onto the input stack. */ private void pushInput(Input input) { if (input.report) startEntityStack.addFirst(input.name); inputStack.addLast(input); if (this.input != null) input.xml11 = this.input.xml11; this.input = input; } /** * Returns a canonicalized version of the specified URL. * This is largely to work around a problem with the specification of * file URLs. */ static String canonicalize(String url) { if (url == null) return null; if (url.startsWith("file:") && !url.startsWith("file://")) url = "file://" + url.substring(5); return url; } /** * "Absolutize" a URL. This resolves a relative URL into an absolute one. * @param base the current base URL * @param href the (absolute or relative) URL to resolve */ public static String absolutize(String base, String href) { if (href == null) return null; int ci = href.indexOf(':'); if (ci > 1 && isURLScheme(href.substring(0, ci))) { // href is absolute already return href; } if (base == null) base = ""; else { int i = base.lastIndexOf('/'); if (i != -1) base = base.substring(0, i + 1); else base = ""; } if ("".equals(base)) { // assume file URL relative to current directory base = System.getProperty("user.dir"); if (base.charAt(0) == '/') base = base.substring(1); base = "file:///" + base.replace(File.separatorChar, '/'); if (!base.endsWith("/")) base += "/"; } // We can't use java.net.URL here to do the parsing, as it searches for // a protocol handler. A protocol handler may not be registered for the // URL scheme here. Do it manually. // // Set aside scheme and host portion of base URL String basePrefix = null; ci = base.indexOf(':'); if (ci > 1 && isURLScheme(base.substring(0, ci))) { if (base.length() > (ci + 3) && base.charAt(ci + 1) == '/' && base.charAt(ci + 2) == '/') { int si = base.indexOf('/', ci + 3); if (si == -1) base = null; else { basePrefix = base.substring(0, si); base = base.substring(si); } } else base = null; } if (base == null) // unknown or malformed base URL, use href return href; if (href.startsWith("/")) // absolute href pathname return (basePrefix == null) ? href : basePrefix + href; // relative href pathname if (!base.endsWith("/")) { int lsi = base.lastIndexOf('/'); if (lsi == -1) base = "/"; else base = base.substring(0, lsi + 1); } while (href.startsWith("../") || href.startsWith("./")) { if (href.startsWith("../")) { // strip last path component from base int lsi = base.lastIndexOf('/', base.length() - 2); if (lsi > -1) base = base.substring(0, lsi + 1); href = href.substring(3); // strip ../ prefix } else { href = href.substring(2); // strip ./ prefix } } return (basePrefix == null) ? base + href : basePrefix + base + href; } /** * Indicates whether the specified characters match the scheme portion of * a URL. * @see RFC 1738 section 2.1 */ private static boolean isURLScheme(String text) { int len = text.length(); for (int i = 0; i < len; i++) { char c = text.charAt(i); if (c == '+' || c == '.' || c == '-') continue; if (c < 65 || (c > 90 && c < 97) || c > 122) return false; } return true; } /** * Returns an input stream for the given URL. */ static InputStream resolve(String url) throws IOException { try { return new URL(url).openStream(); } catch (MalformedURLException e) { return null; } catch (IOException e) { IOException e2 = new IOException("error resolving " + url); e2.initCause(e); throw e2; } } /** * Pops the current input source (general entity) off the stack. */ private void popInput() { Input old = (Input) inputStack.removeLast(); if (old.report) endEntityStack.addFirst(old.name); input = (Input) inputStack.getLast(); } /** * Parse an entity text declaration. */ private void readTextDecl() throws IOException, XMLStreamException { final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; requireWhitespace(); if (tryRead("version")) { readEq(); String v = readLiteral(flags, false); if ("1.0".equals(v)) input.xml11 = false; else if ("1.1".equals(v)) { Input i1 = (Input) inputStack.getFirst(); if (!i1.xml11) error("external entity specifies later version number"); input.xml11 = true; } else throw new XMLStreamException("illegal XML version: " + v); requireWhitespace(); } require("encoding"); readEq(); String enc = readLiteral(flags, false); skipWhitespace(); require("?>"); input.setInputEncoding(enc); } /** * Parse the XML declaration. */ private void readXMLDecl() throws IOException, XMLStreamException { final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; requireWhitespace(); require("version"); readEq(); xmlVersion = readLiteral(flags, false); if ("1.0".equals(xmlVersion)) input.xml11 = false; else if ("1.1".equals(xmlVersion)) input.xml11 = true; else throw new XMLStreamException("illegal XML version: " + xmlVersion); boolean white = tryWhitespace(); if (tryRead("encoding")) { if (!white) error("whitespace required before 'encoding='"); readEq(); xmlEncoding = readLiteral(flags, false); white = tryWhitespace(); } if (tryRead("standalone")) { if (!white) error("whitespace required before 'standalone='"); readEq(); String standalone = readLiteral(flags, false); if ("yes".equals(standalone)) xmlStandalone = Boolean.TRUE; else if ("no".equals(standalone)) xmlStandalone = Boolean.FALSE; else error("standalone flag must be 'yes' or 'no'", standalone); } skipWhitespace(); require("?>"); if (xmlEncoding != null) input.setInputEncoding(xmlEncoding); } /** * Parse the DOCTYPE declaration. */ private void readDoctypeDecl() throws IOException, XMLStreamException { if (!supportDTD) error("parser was configured not to support DTDs"); requireWhitespace(); String rootName = readNmtoken(true); skipWhitespace(); ExternalIds ids = readExternalIds(false, true); doctype = this.new Doctype(rootName, ids.publicId, ids.systemId); // Parse internal subset first skipWhitespace(); if (tryRead('[')) { while (true) { expandPE = true; skipWhitespace(); expandPE = false; if (tryRead(']')) break; else readMarkupdecl(false); } } skipWhitespace(); require('>'); // Parse external subset if (ids.systemId != null && externalEntities) { pushInput("", ">", false, false); pushInput("[dtd]", ids, true, true); // loop until we get back to ">" while (true) { expandPE = true; skipWhitespace(); expandPE = false; mark(1); int c = readCh(); if (c == 0x3e) // '>' break; else if (c == -1) popInput(); else { reset(); expandPE = true; readMarkupdecl(true); expandPE = true; } } if (inputStack.size() != 2) error("external subset has unmatched '>'"); popInput(); } checkDoctype(); if (validating) validateDoctype(); // Make rootName available for reading buf.setLength(0); buf.append(rootName); } /** * Checks the well-formedness of the DTD. */ private void checkDoctype() throws XMLStreamException { // TODO check entity recursion } /** * Parse the markupdecl production. */ private void readMarkupdecl(boolean inExternalSubset) throws IOException, XMLStreamException { boolean saved = expandPE; mark(1); require('<'); reset(); expandPE = false; if (tryRead(TEST_ELEMENT_DECL)) { expandPE = saved; readElementDecl(); } else if (tryRead(TEST_ATTLIST_DECL)) { expandPE = saved; readAttlistDecl(); } else if (tryRead(TEST_ENTITY_DECL)) { expandPE = saved; readEntityDecl(inExternalSubset); } else if (tryRead(TEST_NOTATION_DECL)) { expandPE = saved; readNotationDecl(inExternalSubset); } else if (tryRead(TEST_PI)) { readPI(true); expandPE = saved; } else if (tryRead(TEST_COMMENT)) { readComment(true); expandPE = saved; } else if (tryRead("")) { readMarkupdecl(inExternalSubset); skipWhitespace(); } } else if (tryRead("IGNORE")) { skipWhitespace(); require('['); expandPE = false; for (int nesting = 1; nesting > 0; ) { int c = readCh(); switch (c) { case 0x3c: // '<' if (tryRead("![")) nesting++; break; case 0x5d: // ']' if (tryRead("]>")) nesting--; break; case -1: throw new EOFException(); } } expandPE = saved; } else error("conditional section must begin with INCLUDE or IGNORE"); } else error("expected markup declaration"); } /** * Parse the elementdecl production. */ private void readElementDecl() throws IOException, XMLStreamException { requireWhitespace(); boolean saved = expandPE; expandPE = (inputStack.size() > 1); String name = readNmtoken(true); expandPE = saved; requireWhitespace(); readContentspec(name); skipWhitespace(); require('>'); } /** * Parse the contentspec production. */ private void readContentspec(String elementName) throws IOException, XMLStreamException { if (tryRead("EMPTY")) doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel()); else if (tryRead("ANY")) doctype.addElementDecl(elementName, "ANY", new AnyContentModel()); else { ContentModel model; CPStringBuilder acc = new CPStringBuilder(); require('('); acc.append('('); skipWhitespace(); if (tryRead("#PCDATA")) { // mixed content acc.append("#PCDATA"); MixedContentModel mm = new MixedContentModel(); model = mm; skipWhitespace(); if (tryRead(')')) { acc.append(")"); if (tryRead('*')) { mm.min = 0; mm.max = -1; } } else { while (!tryRead(")")) { require('|'); acc.append('|'); skipWhitespace(); String name = readNmtoken(true); acc.append(name); mm.addName(name); skipWhitespace(); } require('*'); acc.append(")*"); mm.min = 0; mm.max = -1; } } else model = readElements(acc); doctype.addElementDecl(elementName, acc.toString(), model); } } /** * Parses an element content model. */ private ElementContentModel readElements(CPStringBuilder acc) throws IOException, XMLStreamException { int separator; ElementContentModel model = new ElementContentModel(); // Parse first content particle skipWhitespace(); model.addContentParticle(readContentParticle(acc)); // End or separator skipWhitespace(); int c = readCh(); switch (c) { case 0x29: // ')' acc.append(')'); mark(1); c = readCh(); switch (c) { case 0x3f: // '?' acc.append('?'); model.min = 0; model.max = 1; break; case 0x2a: // '*' acc.append('*'); model.min = 0; model.max = -1; break; case 0x2b: // '+' acc.append('+'); model.min = 1; model.max = -1; break; default: reset(); } return model; // done case 0x7c: // '|' model.or = true; // fall through case 0x2c: // ',' separator = c; acc.append(Character.toChars(c)); break; default: error("bad separator in content model", "U+" + Integer.toHexString(c)); return model; } // Parse subsequent content particles while (true) { skipWhitespace(); model.addContentParticle(readContentParticle(acc)); skipWhitespace(); c = readCh(); if (c == 0x29) // ')' { acc.append(')'); break; } else if (c != separator) { error("bad separator in content model", "U+" + Integer.toHexString(c)); return model; } else acc.append(c); } // Check for occurrence indicator mark(1); c = readCh(); switch (c) { case 0x3f: // '?' acc.append('?'); model.min = 0; model.max = 1; break; case 0x2a: // '*' acc.append('*'); model.min = 0; model.max = -1; break; case 0x2b: // '+' acc.append('+'); model.min = 1; model.max = -1; break; default: reset(); } return model; } /** * Parse a cp production. */ private ContentParticle readContentParticle(CPStringBuilder acc) throws IOException, XMLStreamException { ContentParticle cp = new ContentParticle(); if (tryRead('(')) { acc.append('('); cp.content = readElements(acc); } else { String name = readNmtoken(true); acc.append(name); cp.content = name; mark(1); int c = readCh(); switch (c) { case 0x3f: // '?' acc.append('?'); cp.min = 0; cp.max = 1; break; case 0x2a: // '*' acc.append('*'); cp.min = 0; cp.max = -1; break; case 0x2b: // '+' acc.append('+'); cp.min = 1; cp.max = -1; break; default: reset(); } } return cp; } /** * Parse an attribute-list definition. */ private void readAttlistDecl() throws IOException, XMLStreamException { requireWhitespace(); boolean saved = expandPE; expandPE = (inputStack.size() > 1); String elementName = readNmtoken(true); expandPE = saved; boolean white = tryWhitespace(); while (!tryRead('>')) { if (!white) error("whitespace required before attribute definition"); readAttDef(elementName); white = tryWhitespace(); } } /** * Parse a single attribute definition. */ private void readAttDef(String elementName) throws IOException, XMLStreamException { String name = readNmtoken(true); requireWhitespace(); CPStringBuilder acc = new CPStringBuilder(); HashSet values = new HashSet(); String type = readAttType(acc, values); if (validating) { if ("ID".equals(type)) { // VC: One ID per Element Type for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); AttributeDecl decl = (AttributeDecl) entry.getValue(); if ("ID".equals(decl.type)) error("element types must not have more than one ID " + "attribute"); } } else if ("NOTATION".equals(type)) { // VC: One Notation Per Element Type for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); AttributeDecl decl = (AttributeDecl) entry.getValue(); if ("NOTATION".equals(decl.type)) error("element types must not have more than one NOTATION " + "attribute"); } // VC: No Notation on Empty Element ContentModel model = doctype.getElementModel(elementName); if (model != null && model.type == ContentModel.EMPTY) error("attributes of type NOTATION must not be declared on an " + "element declared EMPTY"); } } String enumer = null; if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) enumer = acc.toString(); else values = null; requireWhitespace(); readDefault(elementName, name, type, enumer, values); } /** * Parse an attribute type. */ private String readAttType(CPStringBuilder acc, HashSet values) throws IOException, XMLStreamException { if (tryRead('(')) { readEnumeration(false, acc, values); return "ENUMERATION"; } else { String typeString = readNmtoken(true); if ("NOTATION".equals(typeString)) { readNotationType(acc, values); return typeString; } else if ("CDATA".equals(typeString) || "ID".equals(typeString) || "IDREF".equals(typeString) || "IDREFS".equals(typeString) || "ENTITY".equals(typeString) || "ENTITIES".equals(typeString) || "NMTOKEN".equals(typeString) || "NMTOKENS".equals(typeString)) return typeString; else { error("illegal attribute type", typeString); return null; } } } /** * Parse an enumeration. */ private void readEnumeration(boolean isNames, CPStringBuilder acc, HashSet values) throws IOException, XMLStreamException { acc.append('('); // first token skipWhitespace(); String token = readNmtoken(isNames); acc.append(token); values.add(token); // subsequent tokens skipWhitespace(); while (!tryRead(')')) { require('|'); acc.append('|'); skipWhitespace(); token = readNmtoken(isNames); // VC: No Duplicate Tokens if (validating && values.contains(token)) error("duplicate token", token); acc.append(token); values.add(token); skipWhitespace(); } acc.append(')'); } /** * Parse a notation type for an attribute. */ private void readNotationType(CPStringBuilder acc, HashSet values) throws IOException, XMLStreamException { requireWhitespace(); require('('); readEnumeration(true, acc, values); } /** * Parse the default value for an attribute. */ private void readDefault(String elementName, String name, String type, String enumeration, HashSet values) throws IOException, XMLStreamException { int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; int flags = LIT_ATTRIBUTE; String value = null, defaultType = null; boolean saved = expandPE; if (!"CDATA".equals(type)) flags |= LIT_NORMALIZE; expandPE = false; if (tryRead('#')) { if (tryRead("FIXED")) { defaultType = "#FIXED"; valueType = ATTRIBUTE_DEFAULT_FIXED; requireWhitespace(); value = readLiteral(flags, false); } else if (tryRead("REQUIRED")) { defaultType = "#REQUIRED"; valueType = ATTRIBUTE_DEFAULT_REQUIRED; } else if (tryRead("IMPLIED")) { defaultType = "#IMPLIED"; valueType = ATTRIBUTE_DEFAULT_IMPLIED; } else error("illegal keyword for attribute default value"); } else value = readLiteral(flags, false); expandPE = saved; if (validating) { if ("ID".equals(type)) { // VC: Attribute Default Value Syntactically Correct if (value != null && !isNmtoken(value, true)) error("default value must match Name production", value); // VC: ID Attribute Default if (valueType != ATTRIBUTE_DEFAULT_REQUIRED && valueType != ATTRIBUTE_DEFAULT_IMPLIED) error("ID attributes must have a declared default of " + "#IMPLIED or #REQUIRED"); } else if (value != null) { // VC: Attribute Default Value Syntactically Correct if ("IDREF".equals(type) || "ENTITY".equals(type)) { if (!isNmtoken(value, true)) error("default value must match Name production", value); } else if ("IDREFS".equals(type) || "ENTITIES".equals(type)) { StringTokenizer st = new StringTokenizer(value); while (st.hasMoreTokens()) { String token = st.nextToken(); if (!isNmtoken(token, true)) error("default value must match Name production", token); } } else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type)) { if (!isNmtoken(value, false)) error("default value must match Nmtoken production", value); } else if ("NMTOKENS".equals(type)) { StringTokenizer st = new StringTokenizer(value); while (st.hasMoreTokens()) { String token = st.nextToken(); if (!isNmtoken(token, false)) error("default value must match Nmtoken production", token); } } } } // Register attribute def AttributeDecl attribute = new AttributeDecl(type, value, valueType, enumeration, values, inputStack.size() != 1); doctype.addAttributeDecl(elementName, name, attribute); } /** * Parse the EntityDecl production. */ private void readEntityDecl(boolean inExternalSubset) throws IOException, XMLStreamException { int flags = 0; // Check if parameter entity boolean peFlag = false; expandPE = false; requireWhitespace(); if (tryRead('%')) { peFlag = true; requireWhitespace(); } expandPE = true; // Read entity name String name = readNmtoken(true); if (name.indexOf(':') != -1) error("illegal character ':' in entity name", name); if (peFlag) name = "%" + name; requireWhitespace(); mark(1); int c = readCh(); reset(); if (c == 0x22 || c == 0x27) // " | ' { // Internal entity replacement text String value = readLiteral(flags | LIT_DISABLE_EREF, true); int ai = value.indexOf('&'); while (ai != -1) { int sci = value.indexOf(';', ai); if (sci == -1) error("malformed reference in entity value", value); String ref = value.substring(ai + 1, sci); int[] cp = UnicodeReader.toCodePointArray(ref); if (cp.length == 0) error("malformed reference in entity value", value); if (cp[0] == 0x23) // # { if (cp.length == 1) error("malformed reference in entity value", value); if (cp[1] == 0x78) // 'x' { if (cp.length == 2) error("malformed reference in entity value", value); for (int i = 2; i < cp.length; i++) { int x = cp[i]; if (x < 0x30 || (x > 0x39 && x < 0x41) || (x > 0x46 && x < 0x61) || x > 0x66) error("malformed character reference in entity value", value); } } else { for (int i = 1; i < cp.length; i++) { int x = cp[i]; if (x < 0x30 || x > 0x39) error("malformed character reference in entity value", value); } } } else { if (!isNameStartCharacter(cp[0], input.xml11)) error("malformed reference in entity value", value); for (int i = 1; i < cp.length; i++) { if (!isNameCharacter(cp[i], input.xml11)) error("malformed reference in entity value", value); } } ai = value.indexOf('&', sci); } doctype.addEntityDecl(name, value, inExternalSubset); } else { ExternalIds ids = readExternalIds(false, false); // Check for NDATA boolean white = tryWhitespace(); if (!peFlag && tryRead("NDATA")) { if (!white) error("whitespace required before NDATA"); requireWhitespace(); ids.notationName = readNmtoken(true); } doctype.addEntityDecl(name, ids, inExternalSubset); } // finish skipWhitespace(); require('>'); } /** * Parse the NotationDecl production. */ private void readNotationDecl(boolean inExternalSubset) throws IOException, XMLStreamException { requireWhitespace(); String notationName = readNmtoken(true); if (notationName.indexOf(':') != -1) error("illegal character ':' in notation name", notationName); if (validating) { // VC: Unique Notation Name ExternalIds notation = doctype.getNotation(notationName); if (notation != null) error("duplicate notation name", notationName); } requireWhitespace(); ExternalIds ids = readExternalIds(true, false); ids.notationName = notationName; doctype.addNotationDecl(notationName, ids, inExternalSubset); skipWhitespace(); require('>'); } /** * Returns a tuple {publicId, systemId}. */ private ExternalIds readExternalIds(boolean inNotation, boolean isSubset) throws IOException, XMLStreamException { int c; int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; ExternalIds ids = new ExternalIds(); if (tryRead("PUBLIC")) { requireWhitespace(); ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false); if (inNotation) { skipWhitespace(); mark(1); c = readCh(); reset(); if (c == 0x22 || c == 0x27) // " | ' { String href = readLiteral(flags, false); ids.systemId = absolutize(input.systemId, href); } } else { requireWhitespace(); String href = readLiteral(flags, false); ids.systemId = absolutize(input.systemId, href); } // Check valid URI characters for (int i = 0; i < ids.publicId.length(); i++) { char d = ids.publicId.charAt(i); if (d >= 'a' && d <= 'z') continue; if (d >= 'A' && d <= 'Z') continue; if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1) continue; error("illegal PUBLIC id character", "U+" + Integer.toHexString(d)); } } else if (tryRead("SYSTEM")) { requireWhitespace(); String href = readLiteral(flags, false); ids.systemId = absolutize(input.systemId, href); } else if (!isSubset) { error("missing SYSTEM or PUBLIC keyword"); } if (ids.systemId != null && !inNotation) { if (ids.systemId.indexOf('#') != -1) error("SYSTEM id has a URI fragment", ids.systemId); } return ids; } /** * Parse the start of an element. * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT) */ private int readStartElement() throws IOException, XMLStreamException { // Read element name String elementName = readNmtoken(true); attrs.clear(); // Push namespace context if (namespaceAware) { if (elementName.charAt(0) == ':' || elementName.charAt(elementName.length() - 1) == ':') error("not a QName", elementName); namespaces.addFirst(new LinkedHashMap()); } // Read element content boolean white = tryWhitespace(); mark(1); int c = readCh(); while (c != 0x2f && c != 0x3e) // '/' | '>' { // Read attribute reset(); if (!white) error("need whitespace between attributes"); readAttribute(elementName); white = tryWhitespace(); mark(1); c = readCh(); } // supply defaulted attributes if (doctype != null) { for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); String attName = (String) entry.getKey(); AttributeDecl decl = (AttributeDecl) entry.getValue(); if (validating) { switch (decl.valueType) { case ATTRIBUTE_DEFAULT_REQUIRED: // VC: Required Attribute if (decl.value == null && !attributeSpecified(attName)) error("value for " + attName + " attribute is required"); break; case ATTRIBUTE_DEFAULT_FIXED: // VC: Fixed Attribute Default for (Iterator j = attrs.iterator(); j.hasNext(); ) { Attribute a = (Attribute) j.next(); if (attName.equals(a.name) && !decl.value.equals(a.value)) error("value for " + attName + " attribute must be " + decl.value); } break; } } if (namespaceAware && attName.equals("xmlns")) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) continue; // namespace was specified } else if (namespaceAware && attName.startsWith("xmlns:")) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.containsKey(attName.substring(6))) continue; // namespace was specified } else if (attributeSpecified(attName)) continue; if (decl.value == null) continue; // VC: Standalone Document Declaration if (validating && decl.external && xmlStandalone == Boolean.TRUE) error("standalone must be 'no' if attributes inherit values " + "from externally declared markup declarations"); Attribute attr = new Attribute(attName, decl.type, false, decl.value); if (namespaceAware) { if (!addNamespace(attr)) attrs.add(attr); } else attrs.add(attr); } } if (baseAware) { String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base"); String base = getXMLBase(); bases.addFirst(absolutize(base, uri)); } if (namespaceAware) { // check prefix bindings int ci = elementName.indexOf(':'); if (ci != -1) { String prefix = elementName.substring(0, ci); String uri = getNamespaceURI(prefix); if (uri == null) error("unbound element prefix", prefix); else if (input.xml11 && "".equals(uri)) error("XML 1.1 unbound element prefix", prefix); } for (Iterator i = attrs.iterator(); i.hasNext(); ) { Attribute attr = (Attribute) i.next(); if (attr.prefix != null && !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix)) { String uri = getNamespaceURI(attr.prefix); if (uri == null) error("unbound attribute prefix", attr.prefix); else if (input.xml11 && "".equals(uri)) error("XML 1.1 unbound attribute prefix", attr.prefix); } } } if (validating && doctype != null) { validateStartElement(elementName); currentContentModel = doctype.getElementModel(elementName); if (currentContentModel == null) error("no element declaration", elementName); validationStack.add(new LinkedList()); } // make element name available for read buf.setLength(0); buf.append(elementName); // push element onto stack stack.addLast(elementName); switch (c) { case 0x3e: // '>' return CONTENT; case 0x2f: // '/' require('>'); return EMPTY_ELEMENT; } return -1; // to satisfy compiler } /** * Indicates whether the specified attribute name was specified for the * current element. */ private boolean attributeSpecified(String attName) { for (Iterator j = attrs.iterator(); j.hasNext(); ) { Attribute a = (Attribute) j.next(); if (attName.equals(a.name)) return true; } return false; } /** * Parse an attribute. */ private void readAttribute(String elementName) throws IOException, XMLStreamException { // Read attribute name String attributeName = readNmtoken(true); String type = getAttributeType(elementName, attributeName); readEq(); // Read literal final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; String value = (type == null || "CDATA".equals(type)) ? readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false); // add attribute event Attribute attr = this.new Attribute(attributeName, type, true, value); if (namespaceAware) { if (attributeName.charAt(0) == ':' || attributeName.charAt(attributeName.length() - 1) == ':') error("not a QName", attributeName); else if (attributeName.equals("xmlns")) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) error("duplicate default namespace"); } else if (attributeName.startsWith("xmlns:")) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.containsKey(attributeName.substring(6))) error("duplicate namespace", attributeName.substring(6)); } else if (attrs.contains(attr)) error("duplicate attribute", attributeName); } else if (attrs.contains(attr)) error("duplicate attribute", attributeName); if (validating && doctype != null) { // VC: Attribute Value Type AttributeDecl decl = doctype.getAttributeDecl(elementName, attributeName); if (decl == null) error("attribute must be declared", attributeName); if ("ENUMERATION".equals(decl.type)) { // VC: Enumeration if (!decl.values.contains(value)) error("value does not match enumeration " + decl.enumeration, value); } else if ("ID".equals(decl.type)) { // VC: ID if (!isNmtoken(value, true)) error("ID values must match the Name production"); if (ids.contains(value)) error("Duplicate ID", value); ids.add(value); } else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type)) { StringTokenizer st = new StringTokenizer(value); while (st.hasMoreTokens()) { String token = st.nextToken(); // VC: IDREF if (!isNmtoken(token, true)) error("IDREF values must match the Name production"); idrefs.add(token); } } else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type)) { StringTokenizer st = new StringTokenizer(value); while (st.hasMoreTokens()) { String token = st.nextToken(); // VC: Name Token if (!isNmtoken(token, false)) error("NMTOKEN values must match the Nmtoken production"); } } else if ("ENTITY".equals(decl.type)) { // VC: Entity Name if (!isNmtoken(value, true)) error("ENTITY values must match the Name production"); Object entity = doctype.getEntity(value); if (entity == null || !(entity instanceof ExternalIds) || ((ExternalIds) entity).notationName == null) error("ENTITY values must match the name of an unparsed " + "entity declared in the DTD"); } else if ("NOTATION".equals(decl.type)) { if (!decl.values.contains(value)) error("NOTATION values must match a declared notation name", value); // VC: Notation Attributes ExternalIds notation = doctype.getNotation(value); if (notation == null) error("NOTATION values must match the name of a notation " + "declared in the DTD", value); } } if (namespaceAware) { if (!addNamespace(attr)) attrs.add(attr); } else attrs.add(attr); } /** * Determines whether the specified attribute is a namespace declaration, * and adds it to the current namespace context if so. Returns false if * the attribute is an ordinary attribute. */ private boolean addNamespace(Attribute attr) throws XMLStreamException { if ("xmlns".equals(attr.name)) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null) error("Duplicate default namespace declaration"); if (XMLConstants.XML_NS_URI.equals(attr.value)) error("can't bind XML namespace"); ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value); return true; } else if ("xmlns".equals(attr.prefix)) { LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); if (ctx.get(attr.localName) != null) error("Duplicate namespace declaration for prefix", attr.localName); if (XMLConstants.XML_NS_PREFIX.equals(attr.localName)) { if (!XMLConstants.XML_NS_URI.equals(attr.value)) error("can't redeclare xml prefix"); else return false; // treat as attribute } if (XMLConstants.XML_NS_URI.equals(attr.value)) error("can't bind non-xml prefix to XML namespace"); if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName)) error("can't redeclare xmlns prefix"); if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value)) error("can't bind non-xmlns prefix to XML Namespace namespace"); if ("".equals(attr.value) && !input.xml11) error("illegal use of 1.1-style prefix unbinding in 1.0 document"); ctx.put(attr.localName, attr.value); return true; } return false; } /** * Parse a closing tag. */ private void readEndElement() throws IOException, XMLStreamException { // pop element off stack String expected = (String) stack.removeLast(); require(expected); skipWhitespace(); require('>'); // Make element name available buf.setLength(0); buf.append(expected); if (validating && doctype != null) endElementValidationHook(); } /** * Validate the end of an element. * Called on an end-element or empty element if validating. */ private void endElementValidationHook() throws XMLStreamException { validateEndElement(); validationStack.removeLast(); if (stack.isEmpty()) currentContentModel = null; else { String parent = (String) stack.getLast(); currentContentModel = doctype.getElementModel(parent); } } /** * Parse a comment. */ private void readComment(boolean inDTD) throws IOException, XMLStreamException { boolean saved = expandPE; expandPE = false; buf.setLength(0); readUntil(TEST_END_COMMENT); require('>'); expandPE = saved; if (inDTD) doctype.addComment(buf.toString()); } /** * Parse a processing instruction. */ private void readPI(boolean inDTD) throws IOException, XMLStreamException { boolean saved = expandPE; expandPE = false; piTarget = readNmtoken(true); if (piTarget.indexOf(':') != -1) error("illegal character in PI target", new Character(':')); if ("xml".equalsIgnoreCase(piTarget)) error("illegal PI target", piTarget); if (tryRead(TEST_END_PI)) piData = null; else { if (!tryWhitespace()) error("whitespace required between PI target and data"); buf.setLength(0); readUntil(TEST_END_PI); piData = buf.toString(); } expandPE = saved; if (inDTD) doctype.addPI(piTarget, piData); } /** * Parse an entity reference. */ private void readReference() throws IOException, XMLStreamException { buf.setLength(0); String entityName = readNmtoken(true); require(';'); buf.setLength(0); buf.append(entityName); } /** * Read an CDATA section. */ private void readCDSect() throws IOException, XMLStreamException { buf.setLength(0); readUntil(TEST_END_CDATA); } /** * Read character data. * @return the type of text read (CHARACTERS or SPACE) */ private int readCharData(String prefix) throws IOException, XMLStreamException { boolean white = true; buf.setLength(0); if (prefix != null) buf.append(prefix); boolean done = false; boolean entities = false; while (!done) { // Block read mark(tmpBuf.length); int len = read(tmpBuf, 0, tmpBuf.length); if (len == -1) { if (inputStack.size() > 1) { popInput(); // report end-entity done = true; } else throw new EOFException(); } for (int i = 0; i < len && !done; i++) { int c = tmpBuf[i]; switch (c) { case 0x20: case 0x09: case 0x0a: case 0x0d: buf.append(Character.toChars(c)); break; // whitespace case 0x26: // '&' reset(); read(tmpBuf, 0, i); // character reference? mark(3); c = readCh(); // & c = readCh(); if (c == 0x23) // '#' { mark(1); c = readCh(); boolean hex = (c == 0x78); // 'x' if (!hex) reset(); char[] ch = readCharacterRef(hex ? 16 : 10); buf.append(ch, 0, ch.length); for (int j = 0; j < ch.length; j++) { switch (ch[j]) { case 0x20: case 0x09: case 0x0a: case 0x0d: break; // whitespace default: white = false; } } } else { // entity reference reset(); c = readCh(); // & String entityName = readNmtoken(true); require(';'); String text = (String) PREDEFINED_ENTITIES.get(entityName); if (text != null) buf.append(text); else { pushInput("", "&" + entityName + ";", false, false); done = true; break; } } // continue processing i = -1; mark(tmpBuf.length); len = read(tmpBuf, 0, tmpBuf.length); if (len == -1) { if (inputStack.size() > 1) { popInput(); done = true; } else throw new EOFException(); } entities = true; break; // end of text sequence case 0x3e: // '>' int l = buf.length(); if (l > 1 && buf.charAt(l - 1) == ']' && buf.charAt(l - 2) == ']') error("Character data may not contain unescaped ']]>'"); buf.append(Character.toChars(c)); break; case 0x3c: // '<' reset(); // read i characters int count = 0, remaining = i; do { int r = read(tmpBuf, 0, remaining); count += r; remaining -= r; } while (count < i); i = len; if (coalescing && tryRead(TEST_CDATA)) readUntil(TEST_END_CDATA); // read CDATA section into buf else done = true; // end of text sequence break; default: if (input.xml11) { if (!isXML11Char(c) || isXML11RestrictedChar(c)) error("illegal XML 1.1 character", "U+" + Integer.toHexString(c)); } else if (!isChar(c)) error("illegal XML character", "U+" + Integer.toHexString(c)); white = false; buf.append(Character.toChars(c)); } } // if text buffer >= 2MB, return it as a chunk // to avoid excessive memory use if (buf.length() >= 2097152) done = true; } if (entities) normalizeCRLF(buf); return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS; } /** * Expands the specified entity. */ private void expandEntity(String name, boolean inAttr, boolean normalize) throws IOException, XMLStreamException { if (doctype != null) { Object value = doctype.getEntity(name); if (value != null) { if (xmlStandalone == Boolean.TRUE) { // VC: Standalone Document Declaration if (doctype.isEntityExternal(name)) error("reference to external entity in standalone document"); else if (value instanceof ExternalIds) { ExternalIds ids = (ExternalIds) value; if (ids.notationName != null && doctype.isNotationExternal(ids.notationName)) error("reference to external notation in " + "standalone document"); } } if (value instanceof String) { String text = (String) value; if (inAttr && text.indexOf('<') != -1) error("< in attribute value"); pushInput(name, text, !inAttr, normalize); } else if (inAttr) error("reference to external entity in attribute value", name); else pushInput(name, (ExternalIds) value, !inAttr, normalize); return; } } error("reference to undeclared entity", name); } /** * Indicates whether the specified entity is unparsed. */ private boolean isUnparsedEntity(String name) { if (doctype != null) { Object value = doctype.getEntity(name); if (value != null && value instanceof ExternalIds) return ((ExternalIds) value).notationName != null; } return false; } /** * Read an equals sign. */ private void readEq() throws IOException, XMLStreamException { skipWhitespace(); require('='); skipWhitespace(); } /** * Character read for reading literals. * @param recognizePEs whether to recognize parameter-entity references */ private int literalReadCh(boolean recognizePEs) throws IOException, XMLStreamException { int c = recognizePEs ? readCh() : read(); while (c == -1) { if (inputStack.size() > 1) { inputStack.removeLast(); input = (Input) inputStack.getLast(); // Don't issue end-entity c = recognizePEs ? readCh() : read(); } else throw new EOFException(); } return c; } /** * Read a string literal. */ private String readLiteral(int flags, boolean recognizePEs) throws IOException, XMLStreamException { boolean saved = expandPE; int delim = readCh(); if (delim != 0x27 && delim != 0x22) error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim)); literalBuf.setLength(0); if ((flags & LIT_DISABLE_PE) != 0) expandPE = false; boolean entities = false; int inputStackSize = inputStack.size(); do { int c = literalReadCh(recognizePEs); if (c == delim && inputStackSize == inputStack.size()) break; switch (c) { case 0x0a: case 0x0d: if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) c = 0x20; // normalize to space break; case 0x09: if ((flags & LIT_ATTRIBUTE) != 0) c = 0x20; // normalize to space break; case 0x26: // '&' mark(2); c = readCh(); if (c == 0x23) // '#' { if ((flags & LIT_DISABLE_CREF) != 0) { reset(); c = 0x26; // '&' } else { mark(1); c = readCh(); boolean hex = (c == 0x78); // 'x' if (!hex) reset(); char[] ref = readCharacterRef(hex ? 16 : 10); for (int i = 0; i < ref.length; i++) literalBuf.append(ref[i]); entities = true; continue; } } else { if ((flags & LIT_DISABLE_EREF) != 0) { reset(); c = 0x26; // '&' } else { reset(); String entityName = readNmtoken(true); require(';'); String text = (String) PREDEFINED_ENTITIES.get(entityName); if (text != null) literalBuf.append(text); else expandEntity(entityName, (flags & LIT_ATTRIBUTE) != 0, true); entities = true; continue; } } break; case 0x3c: // '<' if ((flags & LIT_ATTRIBUTE) != 0) error("attribute values may not contain '<'"); break; case -1: if (inputStack.size() > 1) { popInput(); continue; } throw new EOFException(); default: if ((c < 0x0020 || c > 0xfffd) || (c >= 0xd800 && c < 0xdc00) || (input.xml11 && (c >= 0x007f) && (c <= 0x009f) && (c != 0x0085))) error("illegal character", "U+" + Integer.toHexString(c)); } literalBuf.append(Character.toChars(c)); } while (true); expandPE = saved; if (entities) normalizeCRLF(literalBuf); if ((flags & LIT_NORMALIZE) > 0) literalBuf = normalize(literalBuf); return literalBuf.toString(); } /** * Performs attribute-value normalization of the text buffer. * This discards leading and trailing whitespace, and replaces sequences * of whitespace with a single space. */ private StringBuffer normalize(StringBuffer buf) { StringBuffer acc = new StringBuffer(); int len = buf.length(); int avState = 0; for (int i = 0; i < len; i++) { char c = buf.charAt(i); if (c == ' ') avState = (avState == 0) ? 0 : 1; else { if (avState == 1) acc.append(' '); acc.append(c); avState = 2; } } return acc; } /** * Replace any CR/LF pairs in the buffer with LF. * This may be necessary if combinations of CR or LF were declared as * (character) entity references in the input. */ private void normalizeCRLF(StringBuffer buf) { int len = buf.length() - 1; for (int i = 0; i < len; i++) { char c = buf.charAt(i); if (c == '\r' && buf.charAt(i + 1) == '\n') { buf.deleteCharAt(i--); len--; } } } /** * Parse and expand a parameter entity reference. */ private void expandPEReference() throws IOException, XMLStreamException { String name = readNmtoken(true, new StringBuffer()); require(';'); mark(1); // ensure we don't reset to before the semicolon if (doctype != null) { String entityName = "%" + name; Object entity = doctype.getEntity(entityName); if (entity != null) { if (xmlStandalone == Boolean.TRUE) { if (doctype.isEntityExternal(entityName)) error("reference to external parameter entity in " + "standalone document"); } if (entity instanceof String) { pushInput(name, (String) entity, false, input.normalize); //pushInput(name, " " + (String) entity + " "); } else { //pushInput("", " "); pushInput(name, (ExternalIds) entity, false, input.normalize); //pushInput("", " "); } } else error("reference to undeclared parameter entity", name); } else error("reference to parameter entity without doctype", name); } /** * Parse the digits in a character reference. * @param base the base of the digits (10 or 16) */ private char[] readCharacterRef(int base) throws IOException, XMLStreamException { CPStringBuilder b = new CPStringBuilder(); for (int c = readCh(); c != 0x3b && c != -1; c = readCh()) b.append(Character.toChars(c)); try { int ord = Integer.parseInt(b.toString(), base); if (input.xml11) { if (!isXML11Char(ord)) error("illegal XML 1.1 character reference " + "U+" + Integer.toHexString(ord)); } else { if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d)) || (ord >= 0xd800 && ord <= 0xdfff) || ord == 0xfffe || ord == 0xffff || ord > 0x0010ffff) error("illegal XML character reference " + "U+" + Integer.toHexString(ord)); } return Character.toChars(ord); } catch (NumberFormatException e) { error("illegal characters in character reference", b.toString()); return null; } } /** * Parses an NMTOKEN or Name production. * @param isName if a Name, otherwise an NMTOKEN */ private String readNmtoken(boolean isName) throws IOException, XMLStreamException { return readNmtoken(isName, nmtokenBuf); } /** * Parses an NMTOKEN or Name production using the specified buffer. * @param isName if a Name, otherwise an NMTOKEN * @param buf the character buffer to use */ private String readNmtoken(boolean isName, StringBuffer buf) throws IOException, XMLStreamException { buf.setLength(0); int c = readCh(); if (isName) { if (!isNameStartCharacter(c, input.xml11)) error("not a name start character", "U+" + Integer.toHexString(c)); } else { if (!isNameCharacter(c, input.xml11)) error("not a name character", "U+" + Integer.toHexString(c)); } buf.append(Character.toChars(c)); do { mark(1); c = readCh(); switch (c) { case 0x25: // '%' case 0x3c: // '<' case 0x3e: // '>' case 0x26: // '&' case 0x2c: // ',' case 0x7c: // '|' case 0x2a: // '*' case 0x2b: // '+' case 0x3f: // '?' case 0x29: // ')' case 0x3d: // '=' case 0x27: // '\'' case 0x22: // '"' case 0x5b: // '[' case 0x20: // ' ' case 0x09: // '\t' case 0x0a: // '\n' case 0x0d: // '\r' case 0x3b: // ';' case 0x2f: // '/' case -1: reset(); return intern(buf.toString()); default: if (!isNameCharacter(c, input.xml11)) error("not a name character", "U+" + Integer.toHexString(c)); else buf.append(Character.toChars(c)); } } while (true); } /** * Indicates whether the specified Unicode character is an XML 1.1 Char. */ public static boolean isXML11Char(int c) { return ((c >= 0x0001 && c <= 0xD7FF) || (c >= 0xE000 && c < 0xFFFE) || (c >= 0x10000 && c <= 0x10FFFF)); } /** * Indicates whether the specified Unicode character is an XML 1.1 * RestrictedChar. */ public static boolean isXML11RestrictedChar(int c) { return ((c >= 0x0001 && c <= 0x0008) || (c >= 0x000B && c <= 0x000C) || (c >= 0x000E && c <= 0x001F) || (c >= 0x007F && c <= 0x0084) || (c >= 0x0086 && c <= 0x009F)); } /** * Indicates whether the specified text matches the Name or Nmtoken * production. */ private boolean isNmtoken(String text, boolean isName) { try { int[] cp = UnicodeReader.toCodePointArray(text); if (cp.length == 0) return false; if (isName) { if (!isNameStartCharacter(cp[0], input.xml11)) return false; } else { if (!isNameCharacter(cp[0], input.xml11)) return false; } for (int i = 1; i < cp.length; i++) { if (!isNameCharacter(cp[i], input.xml11)) return false; } return true; } catch (IOException e) { return false; } } /** * Indicates whether the specified Unicode character is a Name start * character. */ public static boolean isNameStartCharacter(int c, boolean xml11) { if (xml11) return ((c >= 0x0041 && c <= 0x005a) || (c >= 0x0061 && c <= 0x007a) || c == 0x3a | c == 0x5f | (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x2FF) || (c >= 0x370 && c <= 0x37D) || (c >= 0x37F && c <= 0x1FFF) || (c >= 0x200C && c <= 0x200D) || (c >= 0x2070 && c <= 0x218F) || (c >= 0x2C00 && c <= 0x2FEF) || (c >= 0x3001 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF)); else return (c == 0x5f || c == 0x3a || isLetter(c)); } /** * Indicates whether the specified Unicode character is a Name non-initial * character. */ public static boolean isNameCharacter(int c, boolean xml11) { if (xml11) return ((c >= 0x0041 && c <= 0x005a) || (c >= 0x0061 && c <= 0x007a) || (c >= 0x0030 && c <= 0x0039) || c == 0x3a | c == 0x5f | c == 0x2d | c == 0x2e | c == 0xB7 | (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x2FF) || (c >= 0x300 && c <= 0x37D) || (c >= 0x37F && c <= 0x1FFF) || (c >= 0x200C && c <= 0x200D) || (c >= 0x203F && c <= 0x2040) || (c >= 0x2070 && c <= 0x218F) || (c >= 0x2C00 && c <= 0x2FEF) || (c >= 0x3001 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF)); else return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a || isLetter(c) || isDigit(c) || isCombiningChar(c) || isExtender(c)); } /** * Indicates whether the specified Unicode character matches the Letter * production. */ public static boolean isLetter(int c) { if ((c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) || (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF) || (c >= 0x0100 && c <= 0x0131) || (c >= 0x0134 && c <= 0x013E) || (c >= 0x0141 && c <= 0x0148) || (c >= 0x014A && c <= 0x017E) || (c >= 0x0180 && c <= 0x01C3) || (c >= 0x01CD && c <= 0x01F0) || (c >= 0x01F4 && c <= 0x01F5) || (c >= 0x01FA && c <= 0x0217) || (c >= 0x0250 && c <= 0x02A8) || (c >= 0x02BB && c <= 0x02C1) || c == 0x0386 || (c >= 0x0388 && c <= 0x038A) || c == 0x038C || (c >= 0x038E && c <= 0x03A1) || (c >= 0x03A3 && c <= 0x03CE) || (c >= 0x03D0 && c <= 0x03D6) || c == 0x03DA || c == 0x03DC || c == 0x03DE || c == 0x03E0 || (c >= 0x03E2 && c <= 0x03F3) || (c >= 0x0401 && c <= 0x040C) || (c >= 0x040E && c <= 0x044F) || (c >= 0x0451 && c <= 0x045C) || (c >= 0x045E && c <= 0x0481) || (c >= 0x0490 && c <= 0x04C4) || (c >= 0x04C7 && c <= 0x04C8) || (c >= 0x04CB && c <= 0x04CC) || (c >= 0x04D0 && c <= 0x04EB) || (c >= 0x04EE && c <= 0x04F5) || (c >= 0x04F8 && c <= 0x04F9) || (c >= 0x0531 && c <= 0x0556) || c == 0x0559 || (c >= 0x0561 && c <= 0x0586) || (c >= 0x05D0 && c <= 0x05EA) || (c >= 0x05F0 && c <= 0x05F2) || (c >= 0x0621 && c <= 0x063A) || (c >= 0x0641 && c <= 0x064A) || (c >= 0x0671 && c <= 0x06B7) || (c >= 0x06BA && c <= 0x06BE) || (c >= 0x06C0 && c <= 0x06CE) || (c >= 0x06D0 && c <= 0x06D3) || c == 0x06D5 || (c >= 0x06E5 && c <= 0x06E6) || (c >= 0x0905 && c <= 0x0939) || c == 0x093D || (c >= 0x0958 && c <= 0x0961) || (c >= 0x0985 && c <= 0x098C) || (c >= 0x098F && c <= 0x0990) || (c >= 0x0993 && c <= 0x09A8) || (c >= 0x09AA && c <= 0x09B0) || c == 0x09B2 || (c >= 0x09B6 && c <= 0x09B9) || (c >= 0x09DC && c <= 0x09DD) || (c >= 0x09DF && c <= 0x09E1) || (c >= 0x09F0 && c <= 0x09F1) || (c >= 0x0A05 && c <= 0x0A0A) || (c >= 0x0A0F && c <= 0x0A10) || (c >= 0x0A13 && c <= 0x0A28) || (c >= 0x0A2A && c <= 0x0A30) || (c >= 0x0A32 && c <= 0x0A33) || (c >= 0x0A35 && c <= 0x0A36) || (c >= 0x0A38 && c <= 0x0A39) || (c >= 0x0A59 && c <= 0x0A5C) || c == 0x0A5E || (c >= 0x0A72 && c <= 0x0A74) || (c >= 0x0A85 && c <= 0x0A8B) || c == 0x0A8D || (c >= 0x0A8F && c <= 0x0A91) || (c >= 0x0A93 && c <= 0x0AA8) || (c >= 0x0AAA && c <= 0x0AB0) || (c >= 0x0AB2 && c <= 0x0AB3) || (c >= 0x0AB5 && c <= 0x0AB9) || c == 0x0ABD || c == 0x0AE0 || (c >= 0x0B05 && c <= 0x0B0C) || (c >= 0x0B0F && c <= 0x0B10) || (c >= 0x0B13 && c <= 0x0B28) || (c >= 0x0B2A && c <= 0x0B30) || (c >= 0x0B32 && c <= 0x0B33) || (c >= 0x0B36 && c <= 0x0B39) || c == 0x0B3D || (c >= 0x0B5C && c <= 0x0B5D) || (c >= 0x0B5F && c <= 0x0B61) || (c >= 0x0B85 && c <= 0x0B8A) || (c >= 0x0B8E && c <= 0x0B90) || (c >= 0x0B92 && c <= 0x0B95) || (c >= 0x0B99 && c <= 0x0B9A) || c == 0x0B9C || (c >= 0x0B9E && c <= 0x0B9F) || (c >= 0x0BA3 && c <= 0x0BA4) || (c >= 0x0BA8 && c <= 0x0BAA) || (c >= 0x0BAE && c <= 0x0BB5) || (c >= 0x0BB7 && c <= 0x0BB9) || (c >= 0x0C05 && c <= 0x0C0C) || (c >= 0x0C0E && c <= 0x0C10) || (c >= 0x0C12 && c <= 0x0C28) || (c >= 0x0C2A && c <= 0x0C33) || (c >= 0x0C35 && c <= 0x0C39) || (c >= 0x0C60 && c <= 0x0C61) || (c >= 0x0C85 && c <= 0x0C8C) || (c >= 0x0C8E && c <= 0x0C90) || (c >= 0x0C92 && c <= 0x0CA8) || (c >= 0x0CAA && c <= 0x0CB3) || (c >= 0x0CB5 && c <= 0x0CB9) || c == 0x0CDE || (c >= 0x0CE0 && c <= 0x0CE1) || (c >= 0x0D05 && c <= 0x0D0C) || (c >= 0x0D0E && c <= 0x0D10) || (c >= 0x0D12 && c <= 0x0D28) || (c >= 0x0D2A && c <= 0x0D39) || (c >= 0x0D60 && c <= 0x0D61) || (c >= 0x0E01 && c <= 0x0E2E) || c == 0x0E30 || (c >= 0x0E32 && c <= 0x0E33) || (c >= 0x0E40 && c <= 0x0E45) || (c >= 0x0E81 && c <= 0x0E82) || c == 0x0E84 || (c >= 0x0E87 && c <= 0x0E88) || c == 0x0E8A || c == 0x0E8D || (c >= 0x0E94 && c <= 0x0E97) || (c >= 0x0E99 && c <= 0x0E9F) || (c >= 0x0EA1 && c <= 0x0EA3) || c == 0x0EA5 || c == 0x0EA7 || (c >= 0x0EAA && c <= 0x0EAB) || (c >= 0x0EAD && c <= 0x0EAE) || c == 0x0EB0 || (c >= 0x0EB2 && c <= 0x0EB3) || c == 0x0EBD || (c >= 0x0EC0 && c <= 0x0EC4) || (c >= 0x0F40 && c <= 0x0F47) || (c >= 0x0F49 && c <= 0x0F69) || (c >= 0x10A0 && c <= 0x10C5) || (c >= 0x10D0 && c <= 0x10F6) || c == 0x1100 || (c >= 0x1102 && c <= 0x1103) || (c >= 0x1105 && c <= 0x1107) || c == 0x1109 || (c >= 0x110B && c <= 0x110C) || (c >= 0x110E && c <= 0x1112) || c == 0x113C || c == 0x113E || c == 0x1140 || c == 0x114C || c == 0x114E || c == 0x1150 || (c >= 0x1154 && c <= 0x1155) || c == 0x1159 || (c >= 0x115F && c <= 0x1161) || c == 0x1163 || c == 0x1165 || c == 0x1167 || c == 0x1169 || (c >= 0x116D && c <= 0x116E) || (c >= 0x1172 && c <= 0x1173) || c == 0x1175 || c == 0x119E || c == 0x11A8 || c == 0x11AB || (c >= 0x11AE && c <= 0x11AF) || (c >= 0x11B7 && c <= 0x11B8) || c == 0x11BA || (c >= 0x11BC && c <= 0x11C2) || c == 0x11EB || c == 0x11F0 || c == 0x11F9 || (c >= 0x1E00 && c <= 0x1E9B) || (c >= 0x1EA0 && c <= 0x1EF9) || (c >= 0x1F00 && c <= 0x1F15) || (c >= 0x1F18 && c <= 0x1F1D) || (c >= 0x1F20 && c <= 0x1F45) || (c >= 0x1F48 && c <= 0x1F4D) || (c >= 0x1F50 && c <= 0x1F57) || c == 0x1F59 || c == 0x1F5B || c == 0x1F5D || (c >= 0x1F5F && c <= 0x1F7D) || (c >= 0x1F80 && c <= 0x1FB4) || (c >= 0x1FB6 && c <= 0x1FBC) || c == 0x1FBE || (c >= 0x1FC2 && c <= 0x1FC4) || (c >= 0x1FC6 && c <= 0x1FCC) || (c >= 0x1FD0 && c <= 0x1FD3) || (c >= 0x1FD6 && c <= 0x1FDB) || (c >= 0x1FE0 && c <= 0x1FEC) || (c >= 0x1FF2 && c <= 0x1FF4) || (c >= 0x1FF6 && c <= 0x1FFC) || c == 0x2126 || (c >= 0x212A && c <= 0x212B) || c == 0x212E || (c >= 0x2180 && c <= 0x2182) || (c >= 0x3041 && c <= 0x3094) || (c >= 0x30A1 && c <= 0x30FA) || (c >= 0x3105 && c <= 0x312C) || (c >= 0xAC00 && c <= 0xD7A3)) return true; // BaseChar if ((c >= 0x4e00 && c <= 0x9fa5) || c == 0x3007 || (c >= 0x3021 && c <= 0x3029)) return true; // Ideographic return false; } /** * Indicates whether the specified Unicode character matches the Digit * production. */ public static boolean isDigit(int c) { return ((c >= 0x0030 && c <= 0x0039) || (c >= 0x0660 && c <= 0x0669) || (c >= 0x06F0 && c <= 0x06F9) || (c >= 0x0966 && c <= 0x096F) || (c >= 0x09E6 && c <= 0x09EF) || (c >= 0x0A66 && c <= 0x0A6F) || (c >= 0x0AE6 && c <= 0x0AEF) || (c >= 0x0B66 && c <= 0x0B6F) || (c >= 0x0BE7 && c <= 0x0BEF) || (c >= 0x0C66 && c <= 0x0C6F) || (c >= 0x0CE6 && c <= 0x0CEF) || (c >= 0x0D66 && c <= 0x0D6F) || (c >= 0x0E50 && c <= 0x0E59) || (c >= 0x0ED0 && c <= 0x0ED9) || (c >= 0x0F20 && c <= 0x0F29)); } /** * Indicates whether the specified Unicode character matches the * CombiningChar production. */ public static boolean isCombiningChar(int c) { return ((c >= 0x0300 && c <= 0x0345) || (c >= 0x0360 && c <= 0x0361) || (c >= 0x0483 && c <= 0x0486) || (c >= 0x0591 && c <= 0x05A1) || (c >= 0x05A3 && c <= 0x05B9) || (c >= 0x05BB && c <= 0x05BD) || c == 0x05BF || (c >= 0x05C1 && c <= 0x05C2) || c == 0x05C4 || (c >= 0x064B && c <= 0x0652) || c == 0x0670 || (c >= 0x06D6 && c <= 0x06DC) || (c >= 0x06DD && c <= 0x06DF) || (c >= 0x06E0 && c <= 0x06E4) || (c >= 0x06E7 && c <= 0x06E8) || (c >= 0x06EA && c <= 0x06ED) || (c >= 0x0901 && c <= 0x0903) || c == 0x093C || (c >= 0x093E && c <= 0x094C) || c == 0x094D || (c >= 0x0951 && c <= 0x0954) || (c >= 0x0962 && c <= 0x0963) || (c >= 0x0981 && c <= 0x0983) || c == 0x09BC || c == 0x09BE || c == 0x09BF || (c >= 0x09C0 && c <= 0x09C4) || (c >= 0x09C7 && c <= 0x09C8) || (c >= 0x09CB && c <= 0x09CD) || c == 0x09D7 || (c >= 0x09E2 && c <= 0x09E3) || c == 0x0A02 || c == 0x0A3C || c == 0x0A3E || c == 0x0A3F || (c >= 0x0A40 && c <= 0x0A42) || (c >= 0x0A47 && c <= 0x0A48) || (c >= 0x0A4B && c <= 0x0A4D) || (c >= 0x0A70 && c <= 0x0A71) || (c >= 0x0A81 && c <= 0x0A83) || c == 0x0ABC || (c >= 0x0ABE && c <= 0x0AC5) || (c >= 0x0AC7 && c <= 0x0AC9) || (c >= 0x0ACB && c <= 0x0ACD) || (c >= 0x0B01 && c <= 0x0B03) || c == 0x0B3C || (c >= 0x0B3E && c <= 0x0B43) || (c >= 0x0B47 && c <= 0x0B48) || (c >= 0x0B4B && c <= 0x0B4D) || (c >= 0x0B56 && c <= 0x0B57) || (c >= 0x0B82 && c <= 0x0B83) || (c >= 0x0BBE && c <= 0x0BC2) || (c >= 0x0BC6 && c <= 0x0BC8) || (c >= 0x0BCA && c <= 0x0BCD) || c == 0x0BD7 || (c >= 0x0C01 && c <= 0x0C03) || (c >= 0x0C3E && c <= 0x0C44) || (c >= 0x0C46 && c <= 0x0C48) || (c >= 0x0C4A && c <= 0x0C4D) || (c >= 0x0C55 && c <= 0x0C56) || (c >= 0x0C82 && c <= 0x0C83) || (c >= 0x0CBE && c <= 0x0CC4) || (c >= 0x0CC6 && c <= 0x0CC8) || (c >= 0x0CCA && c <= 0x0CCD) || (c >= 0x0CD5 && c <= 0x0CD6) || (c >= 0x0D02 && c <= 0x0D03) || (c >= 0x0D3E && c <= 0x0D43) || (c >= 0x0D46 && c <= 0x0D48) || (c >= 0x0D4A && c <= 0x0D4D) || c == 0x0D57 || c == 0x0E31 || (c >= 0x0E34 && c <= 0x0E3A) || (c >= 0x0E47 && c <= 0x0E4E) || c == 0x0EB1 || (c >= 0x0EB4 && c <= 0x0EB9) || (c >= 0x0EBB && c <= 0x0EBC) || (c >= 0x0EC8 && c <= 0x0ECD) || (c >= 0x0F18 && c <= 0x0F19) || c == 0x0F35 || c == 0x0F37 || c == 0x0F39 || c == 0x0F3E || c == 0x0F3F || (c >= 0x0F71 && c <= 0x0F84) || (c >= 0x0F86 && c <= 0x0F8B) || (c >= 0x0F90 && c <= 0x0F95) || c == 0x0F97 || (c >= 0x0F99 && c <= 0x0FAD) || (c >= 0x0FB1 && c <= 0x0FB7) || c == 0x0FB9 || (c >= 0x20D0 && c <= 0x20DC) || c == 0x20E1 || (c >= 0x302A && c <= 0x302F) || c == 0x3099 || c == 0x309A); } /** * Indicates whether the specified Unicode character matches the Extender * production. */ public static boolean isExtender(int c) { return (c == 0x00B7 || c == 0x02D0 || c == 0x02D1 || c == 0x0387 || c == 0x0640 || c == 0x0E46 || c == 0x0EC6 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035) || (c >= 0x309D && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)); } /** * Indicates whether the specified Unicode character matches the Char * production. */ public static boolean isChar(int c) { return (c >= 0x20 && c < 0xd800) || (c >= 0xe00 && c < 0xfffe) || (c >= 0x10000 && c < 0x110000) || c == 0xa || c == 0x9 || c == 0xd; } /** * Interns the specified text or not, depending on the value of * stringInterning. */ private String intern(String text) { return stringInterning ? text.intern() : text; } /** * Report a parsing error. */ private void error(String message) throws XMLStreamException { error(message, null); } /** * Report a parsing error. */ private void error(String message, Object info) throws XMLStreamException { if (info != null) { if (info instanceof String) message += ": \"" + ((String) info) + "\""; else if (info instanceof Character) message += ": '" + ((Character) info) + "'"; } throw new XMLStreamException(message); } /** * Perform validation of a start-element event. */ private void validateStartElement(String elementName) throws XMLStreamException { if (currentContentModel == null) { // root element // VC: Root Element Type if (!elementName.equals(doctype.rootName)) error("root element name must match name in DTD"); return; } // VC: Element Valid switch (currentContentModel.type) { case ContentModel.EMPTY: error("child element found in empty element", elementName); break; case ContentModel.ELEMENT: LinkedList ctx = (LinkedList) validationStack.getLast(); ctx.add(elementName); break; case ContentModel.MIXED: MixedContentModel mm = (MixedContentModel) currentContentModel; if (!mm.containsName(elementName)) error("illegal element for content model", elementName); break; } } /** * Perform validation of an end-element event. */ private void validateEndElement() throws XMLStreamException { if (currentContentModel == null) { // root element // VC: IDREF if (!idrefs.containsAll(ids)) error("IDREF values must match the value of some ID attribute"); return; } // VC: Element Valid switch (currentContentModel.type) { case ContentModel.ELEMENT: LinkedList ctx = (LinkedList) validationStack.getLast(); ElementContentModel ecm = (ElementContentModel) currentContentModel; validateElementContent(ecm, ctx); break; } } /** * Perform validation of character data. */ private void validatePCData(String text) throws XMLStreamException { // VC: Element Valid switch (currentContentModel.type) { case ContentModel.EMPTY: error("character data found in empty element", text); break; case ContentModel.ELEMENT: boolean white = true; int len = text.length(); for (int i = 0; i < len; i++) { char c = text.charAt(i); if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { white = false; break; } } if (!white) error("character data found in element with element content", text); else if (xmlStandalone == Boolean.TRUE && currentContentModel.external) // VC: Standalone Document Declaration error("whitespace in element content of externally declared " + "element in standalone document"); break; } } /** * Validates the specified validation context (list of child elements) * against the element content model for the current element. */ private void validateElementContent(ElementContentModel model, LinkedList children) throws XMLStreamException { // Use regular expression CPStringBuilder buf = new CPStringBuilder(); for (Iterator i = children.iterator(); i.hasNext(); ) { buf.append((String) i.next()); buf.append(' '); } String c = buf.toString(); String regex = createRegularExpression(model); if (!c.matches(regex)) error("element content "+model.text+" does not match expression "+regex, c); } /** * Creates the regular expression used to validate an element content * model. */ private String createRegularExpression(ElementContentModel model) { if (model.regex == null) { CPStringBuilder buf = new CPStringBuilder(); buf.append('('); for (Iterator i = model.contentParticles.iterator(); i.hasNext(); ) { ContentParticle cp = (ContentParticle) i.next(); if (cp.content instanceof String) { buf.append('('); buf.append((String) cp.content); buf.append(' '); buf.append(')'); if (cp.max == -1) { if (cp.min == 0) buf.append('*'); else buf.append('+'); } else if (cp.min == 0) buf.append('?'); } else { ElementContentModel ecm = (ElementContentModel) cp.content; buf.append(createRegularExpression(ecm)); } if (model.or && i.hasNext()) buf.append('|'); } buf.append(')'); if (model.max == -1) { if (model.min == 0) buf.append('*'); else buf.append('+'); } else if (model.min == 0) buf.append('?'); model.regex = buf.toString(); } return model.regex; } /** * Performs validation of a document type declaration event. */ void validateDoctype() throws XMLStreamException { for (Iterator i = doctype.entityIterator(); i.hasNext(); ) { Map.Entry entry = (Map.Entry) i.next(); Object entity = entry.getValue(); if (entity instanceof ExternalIds) { ExternalIds ids = (ExternalIds) entity; if (ids.notationName != null) { // VC: Notation Declared ExternalIds notation = doctype.getNotation(ids.notationName); if (notation == null) error("Notation name must match the declared name of a " + "notation", ids.notationName); } } } } /** * Simple test harness for reading an XML file. * args[0] is the filename of the XML file * If args[1] is "-x", enable XInclude processing */ public static void main(String[] args) throws Exception { boolean validating = false; boolean namespaceAware = false; boolean xIncludeAware = false; int pos = 0; while (pos < args.length && args[pos].startsWith("-")) { if ("-x".equals(args[pos])) xIncludeAware = true; else if ("-v".equals(args[pos])) validating = true; else if ("-n".equals(args[pos])) namespaceAware = true; pos++; } if (pos >= args.length) { System.out.println("Syntax: XMLParser [-n] [-v] [-x] [ [...]]"); System.out.println("\t-n: use namespace aware mode"); System.out.println("\t-v: use validating parser"); System.out.println("\t-x: use XInclude aware mode"); System.exit(2); } while (pos < args.length) { XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]), absolutize(null, args[pos]), validating, // validating namespaceAware, // namespaceAware true, // coalescing, true, // replaceERefs true, // externalEntities true, // supportDTD true, // baseAware true, // stringInterning true, // extendedEventTypes null, null); XMLStreamReader reader = p; if (xIncludeAware) reader = new XIncludeFilter(p, args[pos], true, true, true); try { int event; //do while (reader.hasNext()) { event = reader.next(); Location loc = reader.getLocation(); System.out.print(loc.getLineNumber() + ":" + loc.getColumnNumber() + " "); switch (event) { case XMLStreamConstants.START_DOCUMENT: System.out.println("START_DOCUMENT version=" + reader.getVersion() + " encoding=" + reader.getEncoding()); break; case XMLStreamConstants.END_DOCUMENT: System.out.println("END_DOCUMENT"); break; case XMLStreamConstants.START_ELEMENT: System.out.println("START_ELEMENT " + reader.getName()); int l = reader.getNamespaceCount(); for (int i = 0; i < l; i++) System.out.println("\tnamespace " + reader.getNamespacePrefix(i) + "='" + reader.getNamespaceURI(i)+"'"); l = reader.getAttributeCount(); for (int i = 0; i < l; i++) System.out.println("\tattribute " + reader.getAttributeName(i) + "='" + reader.getAttributeValue(i) + "'"); break; case XMLStreamConstants.END_ELEMENT: System.out.println("END_ELEMENT " + reader.getName()); break; case XMLStreamConstants.CHARACTERS: System.out.println("CHARACTERS '" + encodeText(reader.getText()) + "'"); break; case XMLStreamConstants.CDATA: System.out.println("CDATA '" + encodeText(reader.getText()) + "'"); break; case XMLStreamConstants.SPACE: System.out.println("SPACE '" + encodeText(reader.getText()) + "'"); break; case XMLStreamConstants.DTD: System.out.println("DTD " + reader.getText()); break; case XMLStreamConstants.ENTITY_REFERENCE: System.out.println("ENTITY_REFERENCE " + reader.getText()); break; case XMLStreamConstants.COMMENT: System.out.println("COMMENT '" + encodeText(reader.getText()) + "'"); break; case XMLStreamConstants.PROCESSING_INSTRUCTION: System.out.println("PROCESSING_INSTRUCTION " + reader.getPITarget() + " " + reader.getPIData()); break; case START_ENTITY: System.out.println("START_ENTITY " + reader.getText()); break; case END_ENTITY: System.out.println("END_ENTITY " + reader.getText()); break; default: System.out.println("Unknown event: " + event); } } } catch (XMLStreamException e) { Location l = reader.getLocation(); System.out.println("At line "+l.getLineNumber()+ ", column "+l.getColumnNumber()+ " of "+l.getSystemId()); throw e; } pos++; } } /** * Escapes control characters in the specified text. For debugging. */ private static String encodeText(String text) { CPStringBuilder b = new CPStringBuilder(); int len = text.length(); for (int i = 0; i < len; i++) { char c = text.charAt(i); switch (c) { case '\t': b.append("\\t"); break; case '\n': b.append("\\n"); break; case '\r': b.append("\\r"); break; default: b.append(c); } } return b.toString(); } /** * An attribute instance. */ class Attribute { /** * Attribute name. */ final String name; /** * Attribute type as declared in the DTD, or CDATA otherwise. */ final String type; /** * Whether the attribute was specified or defaulted. */ final boolean specified; /** * The attribute value. */ final String value; /** * The namespace prefix. */ final String prefix; /** * The namespace local-name. */ final String localName; Attribute(String name, String type, boolean specified, String value) { this.name = name; this.type = type; this.specified = specified; this.value = value; int ci = name.indexOf(':'); if (ci == -1) { prefix = null; localName = intern(name); } else { prefix = intern(name.substring(0, ci)); localName = intern(name.substring(ci + 1)); } } public boolean equals(Object other) { if (other instanceof Attribute) { Attribute a = (Attribute) other; if (namespaceAware) { if (!a.localName.equals(localName)) return false; String auri = getNamespaceURI(a.prefix); String uri = getNamespaceURI(prefix); if (uri == null && (auri == null || (input.xml11 && "".equals(auri)))) return true; if (uri != null) { if ("".equals(uri) && input.xml11 && "".equals(auri)) return true; return uri.equals(auri); } return false; } else return a.name.equals(name); } return false; } public String toString() { CPStringBuilder buf = new CPStringBuilder(getClass().getName()); buf.append('['); buf.append("name="); buf.append(name); if (value != null) { buf.append(",value="); buf.append(value); } if (type != null) { buf.append(",type="); buf.append(type); } if (specified) buf.append(",specified"); buf.append(']'); return buf.toString(); } } /** * Representation of a DTD. */ class Doctype { /** * Name of the root element. */ final String rootName; /** * Public ID, if any, of external subset. */ final String publicId; /** * System ID (URL), if any, of external subset. */ final String systemId; /** * Map of element names to content models. */ private final LinkedHashMap elements = new LinkedHashMap(); /** * Map of element names to maps of attribute declarations. */ private final LinkedHashMap attlists = new LinkedHashMap(); /** * Map of entity names to entities (String or ExternalIds). */ private final LinkedHashMap entities = new LinkedHashMap(); /** * Map of notation names to ExternalIds. */ private final LinkedHashMap notations = new LinkedHashMap(); /** * Map of anonymous keys to comments. */ private final LinkedHashMap comments = new LinkedHashMap(); /** * Map of anonymous keys to processing instructions (String[2] * containing {target, data}). */ private final LinkedHashMap pis = new LinkedHashMap(); /** * List of keys to all markup entries in the DTD. */ private final LinkedList entries = new LinkedList(); /** * Set of the entities defined in the external subset. */ private final HashSet externalEntities = new HashSet(); /** * Set of the notations defined in the external subset. */ private final HashSet externalNotations = new HashSet(); /** * Counter for making anonymous keys. */ private int anon = 1; /** * Constructor. */ Doctype(String rootName, String publicId, String systemId) { this.rootName = rootName; this.publicId = publicId; this.systemId = systemId; } /** * Adds an element declaration. * @param name the element name * @param text the content model text * @param model the parsed content model */ void addElementDecl(String name, String text, ContentModel model) { if (elements.containsKey(name)) return; model.text = text; model.external = (inputStack.size() != 1); elements.put(name, model); entries.add("E" + name); } /** * Adds an attribute declaration. * @param ename the element name * @param aname the attribute name * @param decl the attribute declaration details */ void addAttributeDecl(String ename, String aname, AttributeDecl decl) { LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); if (attlist == null) { attlist = new LinkedHashMap(); attlists.put(ename, attlist); } else if (attlist.containsKey(aname)) return; attlist.put(aname, decl); String key = "A" + ename; if (!entries.contains(key)) entries.add(key); } /** * Adds an entity declaration. * @param name the entity name * @param text the entity replacement text * @param inExternalSubset if we are in the exernal subset */ void addEntityDecl(String name, String text, boolean inExternalSubset) { if (entities.containsKey(name)) return; entities.put(name, text); entries.add("e" + name); if (inExternalSubset) externalEntities.add(name); } /** * Adds an entity declaration. * @param name the entity name * @param ids the external IDs * @param inExternalSubset if we are in the exernal subset */ void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset) { if (entities.containsKey(name)) return; entities.put(name, ids); entries.add("e" + name); if (inExternalSubset) externalEntities.add(name); } /** * Adds a notation declaration. * @param name the notation name * @param ids the external IDs * @param inExternalSubset if we are in the exernal subset */ void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset) { if (notations.containsKey(name)) return; notations.put(name, ids); entries.add("n" + name); if (inExternalSubset) externalNotations.add(name); } /** * Adds a comment. */ void addComment(String text) { String key = Integer.toString(anon++); comments.put(key, text); entries.add("c" + key); } /** * Adds a processing instruction. */ void addPI(String target, String data) { String key = Integer.toString(anon++); pis.put(key, new String[] {target, data}); entries.add("p" + key); } /** * Returns the content model for the specified element. * @param name the element name */ ContentModel getElementModel(String name) { return (ContentModel) elements.get(name); } /** * Returns the attribute definition for the given attribute * @param ename the element name * @param aname the attribute name */ AttributeDecl getAttributeDecl(String ename, String aname) { LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); return (attlist == null) ? null : (AttributeDecl) attlist.get(aname); } /** * Indicates whether the specified attribute was declared in the DTD. * @param ename the element name * @param aname the attribute name */ boolean isAttributeDeclared(String ename, String aname) { LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); return (attlist == null) ? false : attlist.containsKey(aname); } /** * Returns an iterator over the entries in the attribute list for the * given element. * @param ename the element name */ Iterator attlistIterator(String ename) { LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); return (attlist == null) ? Collections.EMPTY_LIST.iterator() : attlist.entrySet().iterator(); } /** * Returns the entity (String or ExternalIds) for the given entity name. */ Object getEntity(String name) { return entities.get(name); } /** * Indicates whether the specified entity was declared in the external * subset. */ boolean isEntityExternal(String name) { return externalEntities.contains(name); } /** * Returns an iterator over the entity map entries. */ Iterator entityIterator() { return entities.entrySet().iterator(); } /** * Returns the notation IDs for the given notation name. */ ExternalIds getNotation(String name) { return (ExternalIds) notations.get(name); } /** * Indicates whether the specified notation was declared in the external * subset. */ boolean isNotationExternal(String name) { return externalNotations.contains(name); } /** * Returns the comment associated with the specified (anonymous) key. */ String getComment(String key) { return (String) comments.get(key); } /** * Returns the processing instruction associated with the specified * (anonymous) key. */ String[] getPI(String key) { return (String[]) pis.get(key); } /** * Returns an iterator over the keys of the markup entries in this DTD, * in the order declared. */ Iterator entryIterator() { return entries.iterator(); } } /** * Combination of an ExternalID and an optional NDataDecl. */ class ExternalIds { /** * The public ID. */ String publicId; /** * The system ID. */ String systemId; /** * The notation name declared with the NDATA keyword. */ String notationName; } /** * A content model. */ abstract class ContentModel { static final int EMPTY = 0; static final int ANY = 1; static final int ELEMENT = 2; static final int MIXED = 3; int min; int max; final int type; String text; boolean external; ContentModel(int type) { this.type = type; min = 1; max = 1; } } /** * The EMPTY content model. */ class EmptyContentModel extends ContentModel { EmptyContentModel() { super(ContentModel.EMPTY); min = 0; max = 0; } } /** * The ANY content model. */ class AnyContentModel extends ContentModel { AnyContentModel() { super(ContentModel.ANY); min = 0; max = -1; } } /** * An element content model. */ class ElementContentModel extends ContentModel { LinkedList contentParticles; boolean or; String regex; // regular expression cache ElementContentModel() { super(ContentModel.ELEMENT); contentParticles = new LinkedList(); } void addContentParticle(ContentParticle cp) { contentParticles.add(cp); } } class ContentParticle { int min = 1; int max = 1; Object content; // Name (String) or ElementContentModel } /** * A mixed content model. */ class MixedContentModel extends ContentModel { private HashSet names; MixedContentModel() { super(ContentModel.MIXED); names = new HashSet(); } void addName(String name) { names.add(name); } boolean containsName(String name) { return names.contains(name); } } /** * An attribute definition. */ class AttributeDecl { /** * The attribute type (CDATA, ID, etc). */ final String type; /** * The default value. */ final String value; /** * The value type (#FIXED, #IMPLIED, etc). */ final int valueType; /** * The enumeration text. */ final String enumeration; /** * The enumeration tokens. */ final HashSet values; /** * Whether this attribute declaration occurred in the external subset. */ final boolean external; AttributeDecl(String type, String value, int valueType, String enumeration, HashSet values, boolean external) { this.type = type; this.value = value; this.valueType = valueType; this.enumeration = enumeration; this.values = values; this.external = external; } } /** * An XML input source. */ static class Input implements Location { int line = 1, markLine; int column, markColumn; int offset, markOffset; final String publicId, systemId, name; final boolean report; // report start- and end-entity final boolean normalize; // normalize CR, etc to LF InputStream in; Reader reader; UnicodeReader unicodeReader; boolean initialized; boolean encodingDetected; String inputEncoding; boolean xml11; Input(InputStream in, Reader reader, String publicId, String systemId, String name, String inputEncoding, boolean report, boolean normalize) { if (inputEncoding == null) inputEncoding = "UTF-8"; this.inputEncoding = inputEncoding; this.publicId = publicId; this.systemId = systemId; this.name = name; this.report = report; this.normalize = normalize; if (in != null) { if (reader != null) throw new IllegalStateException("both byte and char streams "+ "specified"); if (normalize) in = new CRLFInputStream(in); in = new BufferedInputStream(in); this.in = in; } else { this.reader = normalize ? new CRLFReader(reader) : reader; unicodeReader = new UnicodeReader(this.reader); } initialized = false; } // -- Location -- public int getCharacterOffset() { return offset; } public int getColumnNumber() { return column; } public int getLineNumber() { return line; } public String getPublicId() { return publicId; } public String getSystemId() { return systemId; } void init() throws IOException { if (initialized) return; if (in != null) detectEncoding(); initialized = true; } void mark(int len) throws IOException { markOffset = offset; markLine = line; markColumn = column; if (unicodeReader != null) unicodeReader.mark(len); else in.mark(len); } /** * Character read. */ int read() throws IOException { offset++; int ret = (unicodeReader != null) ? unicodeReader.read() : in.read(); if (normalize && (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))) { // Normalize CR etc to LF ret = 0x0a; } // Locator handling if (ret == 0x0a) { line++; column = 0; } else column++; return ret; } /** * Block read. */ int read(int[] b, int off, int len) throws IOException { int ret; if (unicodeReader != null) { ret = unicodeReader.read(b, off, len); } else { byte[] b2 = new byte[len]; ret = in.read(b2, 0, len); if (ret != -1) { String s = new String(b2, 0, ret, inputEncoding); int[] c = UnicodeReader.toCodePointArray(s); ret = c.length; System.arraycopy(c, 0, b, off, ret); } } if (ret != -1) { // Locator handling for (int i = 0; i < ret; i++) { int c = b[off + i]; if (normalize && (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))) { // Normalize CR etc to LF c = 0x0a; b[off + i] = c; } if (c == 0x0a) { line++; column = 0; } else column++; } } return ret; } void reset() throws IOException { if (unicodeReader != null) unicodeReader.reset(); else in.reset(); offset = markOffset; line = markLine; column = markColumn; } // Detection of input encoding private static final int[] SIGNATURE_UCS_4_1234 = new int[] { 0x00, 0x00, 0x00, 0x3c }; private static final int[] SIGNATURE_UCS_4_4321 = new int[] { 0x3c, 0x00, 0x00, 0x00 }; private static final int[] SIGNATURE_UCS_4_2143 = new int[] { 0x00, 0x00, 0x3c, 0x00 }; private static final int[] SIGNATURE_UCS_4_3412 = new int[] { 0x00, 0x3c, 0x00, 0x00 }; private static final int[] SIGNATURE_UCS_2_12 = new int[] { 0xfe, 0xff }; private static final int[] SIGNATURE_UCS_2_21 = new int[] { 0xff, 0xfe }; private static final int[] SIGNATURE_UCS_2_12_NOBOM = new int[] { 0x00, 0x3c, 0x00, 0x3f }; private static final int[] SIGNATURE_UCS_2_21_NOBOM = new int[] { 0x3c, 0x00, 0x3f, 0x00 }; private static final int[] SIGNATURE_UTF_8 = new int[] { 0x3c, 0x3f, 0x78, 0x6d }; private static final int[] SIGNATURE_UTF_8_BOM = new int[] { 0xef, 0xbb, 0xbf }; /** * Detect the input encoding. */ private void detectEncoding() throws IOException { int[] signature = new int[4]; in.mark(4); for (int i = 0; i < 4; i++) signature[i] = in.read(); in.reset(); // 4-byte encodings if (equals(SIGNATURE_UCS_4_1234, signature)) { in.read(); in.read(); in.read(); in.read(); setInputEncoding("UTF-32BE"); encodingDetected = true; } else if (equals(SIGNATURE_UCS_4_4321, signature)) { in.read(); in.read(); in.read(); in.read(); setInputEncoding("UTF-32LE"); encodingDetected = true; } else if (equals(SIGNATURE_UCS_4_2143, signature) || equals(SIGNATURE_UCS_4_3412, signature)) throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering"); // 2-byte encodings else if (equals(SIGNATURE_UCS_2_12, signature)) { in.read(); in.read(); setInputEncoding("UTF-16BE"); encodingDetected = true; } else if (equals(SIGNATURE_UCS_2_21, signature)) { in.read(); in.read(); setInputEncoding("UTF-16LE"); encodingDetected = true; } else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature)) { //setInputEncoding("UTF-16BE"); throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); } else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature)) { //setInputEncoding("UTF-16LE"); throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); } // ASCII-derived encodings else if (equals(SIGNATURE_UTF_8, signature)) { // UTF-8 input encoding implied, TextDecl } else if (equals(SIGNATURE_UTF_8_BOM, signature)) { in.read(); in.read(); in.read(); setInputEncoding("UTF-8"); encodingDetected = true; } } private static boolean equals(int[] b1, int[] b2) { for (int i = 0; i < b1.length; i++) { if (b1[i] != b2[i]) return false; } return true; } void setInputEncoding(String encoding) throws IOException { if (encoding.equals(inputEncoding)) return; if ("UTF-16".equalsIgnoreCase(encoding) && inputEncoding.startsWith("UTF-16")) return; if (encodingDetected) throw new UnsupportedEncodingException("document is not in its " + "declared encoding " + inputEncoding + ": " + encoding); inputEncoding = encoding; finalizeEncoding(); } void finalizeEncoding() throws IOException { if (reader != null) return; reader = new BufferedReader(new InputStreamReader(in, inputEncoding)); unicodeReader = new UnicodeReader(reader); mark(1); } } }