/* XMLParser.java --
Copyright (C) 2005, 2015 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version.
Partly derived from code which carried the following notice:
Copyright (c) 1997, 1998 by Microstar Software Ltd.
AElfred is free for both commercial and non-commercial use and
redistribution, provided that Microstar's copyright and disclaimer are
retained intact. You are free to modify AElfred for your own use and
to redistribute AElfred with your modifications, provided that the
modifications are clearly documented.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
merchantability or fitness for a particular purpose. Please use it AT
YOUR OWN RISK.
*/
package gnu.xml.stream;
import gnu.java.lang.CPStringBuilder;
import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.StringTokenizer;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLResolver;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import gnu.java.net.CRLFInputStream;
import gnu.classpath.debug.TeeInputStream;
import gnu.classpath.debug.TeeReader;
/**
* An XML parser.
* This parser supports the following additional StAX properties:
*
*
gnu.xml.stream.stringInterning
*
Boolean
*
Indicates whether markup strings will be interned
*
gnu.xml.stream.xmlBase
*
Boolean
*
Indicates whether XML Base processing will be performed
*
gnu.xml.stream.baseURI
*
String
*
Returns the base URI of the current event
*
*
* @see http://www.w3.org/TR/REC-xml/
* @see http://www.w3.org/TR/xml11/
* @see http://www.w3.org/TR/REC-xml-names
* @see http://www.w3.org/TR/xml-names11
* @see http://www.w3.org/TR/xmlbase/
*
* @author Chris Burdess
*/
public class XMLParser
implements XMLStreamReader, NamespaceContext
{
// -- parser state machine states --
private static final int INIT = 0; // start state
private static final int PROLOG = 1; // in prolog
private static final int CONTENT = 2; // in content
private static final int EMPTY_ELEMENT = 3; // empty element state
private static final int MISC = 4; // in Misc (after root element)
// -- parameters for parsing literals --
private final static int LIT_ENTITY_REF = 2;
private final static int LIT_NORMALIZE = 4;
private final static int LIT_ATTRIBUTE = 8;
private final static int LIT_DISABLE_PE = 16;
private final static int LIT_DISABLE_CREF = 32;
private final static int LIT_DISABLE_EREF = 64;
private final static int LIT_PUBID = 256;
// -- types of attribute values --
final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
final static int ATTRIBUTE_DEFAULT_FIXED = 34;
// -- additional event types --
final static int START_ENTITY = 50;
final static int END_ENTITY = 51;
/**
* The current input.
*/
private Input input;
/**
* Stack of inputs representing XML general entities.
* The input representing the XML input stream or reader is always the
* first element in this stack.
*/
private Deque inputStack = new LinkedList();
/**
* Stack of start-entity events to be reported.
*/
private Deque startEntityStack = new LinkedList();
/**
* Stack of end-entity events to be reported.
*/
private Deque endEntityStack = new LinkedList();
/**
* Current parser state within the main state machine.
*/
private int state = INIT;
/**
* The (type of the) current event.
*/
private int event;
/**
* The element name stack. The first element in this stack will be the
* root element.
*/
private Deque stack = new LinkedList();
/**
* Stack of namespace contexts. These are maps specifying prefix-to-URI
* mappings. The first element in this stack is the most recent namespace
* context (i.e. the other way around from the element name stack).
*/
private Deque