// balxml_minireader.h                                                -*-C++-*-

// ----------------------------------------------------------------------------
//                                   NOTICE
//
// This component is not up to date with current BDE coding standards, and
// should not be used as an example for new development.
// ----------------------------------------------------------------------------

#ifndef INCLUDED_BALXML_MINIREADER
#define INCLUDED_BALXML_MINIREADER

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide light-weight implementation of 'balxml::Reader' protocol.
//
//@CLASSES:
//   balxml::MiniReader: light-weight 'balxml::Reader' implementation
//
//@SEE_ALSO: balxml_reader, balxml_errorinfo
//
//@DESCRIPTION: The 'balxml::MiniReader' class is a light-weight
// implementation of 'balxml::Reader' interface.  The API acts as a currentNode
// going forward on the document stream and stopping at each node in the way.
// The current node refers to the node on which the reader is positioned.  The
// user's code keeps control of the progress and simply calls a 'read'
// function repeatedly to progress to each node in sequence in document order.
// This provides a far more standard, easy to use and powerful API than the
// existing SAX.
//
// Data Validation
// - - - - - - - -
// The 'balxml::MiniReader' 'class' is not a validating reader
// ('balxml::ValidatingReader').  As a result while parsing data it does not
// make an attempt to ensure the correctness of either the data or the
// structure of the incoming XML.  The 'class' accepts characters as element
// data that the XML standard considers invalid.  For example the '&' and '<'
// characters in element data will parse without error.  Similarly, it does not
// return an error if the read data does not conform to its specified schema.
// To get stricter data validation, clients should use a concrete
// implementation of a validating reader (such as 'a_xercesc::Reader') instead.
//
///Usage
///-----
// For this example, we will use 'balxml::MiniReader' to read each node in an
// XML document.  We do not care about whitespace, so we use the following
// utility function to skip over any whitespace nodes.  This makes our example
// more portable to other implementations of the 'balxml::Reader' protocol that
// handle whitespace differently from 'balxml::MiniReader'.
//..
//  int advancePastWhiteSpace(balxml::Reader& reader) {
//      const char *whiteSpace = "\n\r\t ";
//      const char *value = '\0';
//      int         type = 0;
//      int         rc = 0;
//
//      do {
//          rc    = reader.advanceToNextNode();
//          value = reader.nodeValue();
//          type  = reader.nodeType();
//      } while(0 == rc &&
//              type == balxml::Reader::BAEXML_NODE_TYPE_WHITESPACE ||
//              (type == balxml::Reader::BAEXML_NODE_TYPE_TEXT &&
//               bsl::strlen(value) == bsl::strspn(value, whiteSpace)));
//
//      assert( reader.nodeType() !=
//                                balxml::Reader::BAEXML_NODE_TYPE_WHITESPACE);
//
//      return rc;
//  }
//..
// The main program parses an XML string using the TestReader
//..
//  int main()
//  {
//..
// The following string describes xml for a very simple user directory.  The
// top level element contains one xml namespace attribute, with one embedded
// entry describing a user.
//..
//      const char TEST_XML_STRING[] =
//         "<?xml version='1.0' encoding='UTF-8'?>\n"
//         "<directory-entry xmlns:dir="
//                                "'http://bloomberg.com/schemas/directory'>\n"
//         "    <name>John Smith</name>\n"
//         "    <phone dir:phonetype='cell'>212-318-2000</phone>\n"
//         "    <address/>\n"
//         "</directory-entry>\n";
//..
// In order to read the XML, we first need to construct a
// 'balxml::NamespaceRegistry' object, a 'balxml::PrefixStack' object, and a
// 'TestReader' object, where 'TestReader' is a derived implementation of
// 'balxml_reader'.
//..
//      balxml::NamespaceRegistry namespaces;
//      balxml::PrefixStack prefixStack(&namespaces);
//      balxml::MiniReader miniReader; balxml::Reader& reader = miniReader;
//
//      assert(!reader.isOpen());
//..
// The reader uses a 'balxml::PrefixStack' to manage namespace prefixes so we
// need to set it before we call open.
//..
//      reader.setPrefixStack(&prefixStack);
//      assert(reader.prefixStack());
//      assert(reader.prefixStack() == &prefixStack);
//..
// Now we call the 'open' method to setup the reader for parsing using the data
// contained in the in the XML string.
//..
//      reader.open(TEST_XML_STRING, sizeof(TEST_XML_STRING) -1, 0, "UTF-8");
//..
// Confirm that the 'bdem::Reader' has opened properly
//..
//      assert( reader.isOpen());
//      assert(!bsl::strncmp(reader.documentEncoding(), "UTF-8", 5));
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_NONE);
//      assert(!reader.nodeName());
//      assert(!reader.nodeHasValue());
//      assert(!reader.nodeValue());
//      assert(!reader.nodeDepth());
//      assert(!reader.numAttributes());
//      assert(!reader.isEmptyElement());
//..
// Advance through all the nodes and assert all information contained at each
// node is correct.
//
// Assert the next node's document type is xml.
//..
//      int rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() ==
//                           balxml::Reader::BAEXML_NODE_TYPE_XML_DECLARATION);
//      assert(!bsl::strcmp(reader.nodeName(), "xml"));
//      assert( reader.nodeHasValue());
//      assert(!bsl::strcmp(reader.nodeValue(),
//                          "version='1.0' encoding='UTF-8'"));
//      assert( reader.nodeDepth() == 1);
//      assert(!reader.numAttributes());
//      assert(!reader.isEmptyElement());
//      assert( 0 == rc);
//      assert( reader.nodeDepth() == 1);
//..
// Advance to the top level element, which has one attribute, the xml
// namespace.  Assert the namespace information has been added correctly to the
// prefix stack.
//..
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "directory-entry"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 1);
//      assert( reader.numAttributes() == 1);
//      assert(!reader.isEmptyElement());
//
//      assert(!bsl::strcmp(prefixStack.lookupNamespacePrefix("dir"), "dir"));
//      assert(prefixStack.lookupNamespaceId("dir") == 0);
//      assert(!bsl::strcmp(prefixStack.lookupNamespaceUri("dir"),
//                          "http://bloomberg.com/schemas/directory"));
//..
// The XML being read contains one entry describing a user, advance the users
// name name and assert all information can be read correctly.
//..
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "name"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 2);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//
//      rc = reader.advanceToNextNode();
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_TEXT);
//      assert( reader.nodeHasValue());
//      assert(!bsl::strcmp(reader.nodeValue(), "John Smith"));
//      assert( reader.nodeDepth() == 3);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//
//      rc = reader.advanceToNextNode();
//      assert( 0 == rc);
//      assert( reader.nodeType() ==
//                               balxml::Reader::BAEXML_NODE_TYPE_END_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "name"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 2);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//..
// Advance to the user's phone number and assert all information can be read
// correctly.
//..
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "phone"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 2);
//      assert( reader.numAttributes() == 1);
//      assert(!reader.isEmptyElement());
//..
// The phone node has one attribute, look it up and assert the
// 'balxml::ElementAttribute' contains valid information and that the prefix
// returns the correct namespace URI from the prefix stack.
//..
//      balxml::ElementAttribute elemAttr;
//
//      rc = reader.lookupAttribute(&elemAttr, 0);
//      assert( 0 == rc);
//      assert(!elemAttr.isNull());
//      assert(!bsl::strcmp(elemAttr.qualifiedName(), "dir:phonetype"));
//      assert(!bsl::strcmp(elemAttr.value(), "cell"));
//      assert(!bsl::strcmp(elemAttr.prefix(), "dir"));
//      assert(!bsl::strcmp(elemAttr.localName(), "phonetype"));
//      assert(!bsl::strcmp(elemAttr.namespaceUri(),
//                          "http://bloomberg.com/schemas/directory"));
//      assert( elemAttr.namespaceId() == 0);
//
//      assert(!bsl::strcmp(prefixStack.lookupNamespaceUri(elemAttr.prefix()),
//                          elemAttr.namespaceUri()));
//
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_TEXT);
//      assert( reader.nodeHasValue());
//      assert(!bsl::strcmp(reader.nodeValue(), "212-318-2000"));
//      assert( reader.nodeDepth() == 3);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() ==
//                               balxml::Reader::BAEXML_NODE_TYPE_END_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "phone"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 2);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//..
// Advance to the user's address and assert all information can be read
// correctly.
//..
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() == balxml::Reader::BAEXML_NODE_TYPE_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "address"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 2);
//      assert( reader.numAttributes() == 0);
//      assert( reader.isEmptyElement());
//..
// Advance to the end element.
//..
//      rc = advancePastWhiteSpace(reader);
//      assert( 0 == rc);
//      assert( reader.nodeType() ==
//                               balxml::Reader::BAEXML_NODE_TYPE_END_ELEMENT);
//      assert(!bsl::strcmp(reader.nodeName(), "directory-entry"));
//      assert(!reader.nodeHasValue());
//      assert( reader.nodeDepth() == 1);
//      assert( reader.numAttributes() == 0);
//      assert(!reader.isEmptyElement());
//..
// Close the reader.
//..
//      reader.close();
//      assert(!reader.isOpen());
//
//      return 0;
//  }
//..

#include <balscm_version.h>

#include <balxml_reader.h>
#include <balxml_elementattribute.h>
#include <balxml_namespaceregistry.h>
#include <balxml_prefixstack.h>

#include <bslma_allocator.h>

#include <bsl_cstring.h>
#include <bsl_cstddef.h>
#include <bsl_cstdlib.h>
#include <bsl_fstream.h>
#include <bsl_string.h>
#include <bsl_vector.h>

namespace BloombergLP  {
namespace balxml {

                              // ================
                              // class MiniReader
                              // ================

class MiniReader :  public Reader {
    // This 'class' provides a concrete and efficient implementation of the
    // 'Reader' protocol.

  private:
    // PRIVATE TYPES
    enum {
        k_MIN_BUFSIZE     = 1024,        // MIN - 1 KB
        k_MAX_BUFSIZE     = 1024 * 128,  // MAX - 128 KB
        k_DEFAULT_BUFSIZE = 1024 * 8,    // DEFAULT - 8 KB
        k_DEFAULT_DEPTH   = 20           // Average expected deep
    };                                   // to minimize allocations

    typedef ElementAttribute Attribute;
    typedef bsl::vector<Attribute> AttributeVector;

    struct Node;
    friend struct Node;
    struct Node {
        enum {
            k_NODE_NO_FLAGS = 0x0000,
            k_NODE_EMPTY    = 0x0001
        };

        NodeType         d_type;
        const char      *d_qualifiedName;
        const char      *d_prefix;
        const char      *d_localName;
        const char      *d_value;
        int              d_namespaceId;
        const char      *d_namespaceUri;
        int              d_flags;
        AttributeVector  d_attributes;
        size_t           d_attrCount;
        size_t           d_namespaceCount;
        int              d_startPos;
        int              d_endPos;

        Node(bslma::Allocator *basicAllocator = 0);
        Node(const Node& other, bslma::Allocator *basicAllocator = 0);

        void reset();
        void swap(Node& other);
        void addAttribute(const Attribute& attr);
    };

    typedef bsl::pair<bsl::string, int> Element;

    typedef bsl::vector<Element> ElementVector;

    enum State {
        ST_INITIAL,   // Initial state after successful open
        ST_TAG_BEGIN, // Current position - next symbol after '<'
        ST_TAG_END,   // Current position - next symbol after '>'
        ST_EOF,       // End of Data is reached successfully
        ST_ERROR,     // Parser error : prevents from further scanning
        ST_CLOSED     // close method has been called
    };

    enum Flags {
        FLG_READ_EOF    = 0x0001,  // End of input data
        FLG_ROOT_CLOSED = 0x0002   // Root closed
    };

    enum StringType {
        // The return value of 'searchCommentCDataOrElementName', says what
        // node the function has found.

        e_STRINGTYPE_NONE,
        e_STRINGTYPE_COMMENT,
        e_STRINGTYPE_CDATA,
        e_STRINGTYPE_START_ELEMENT,
        e_STRINGTYPE_END_ELEMENT
    };

  private:
    // PRIVATE DATA
    bslma::Allocator         *d_allocator;
    State                     d_state;
    int                       d_flags;
    int                       d_readSize;
    bsl::vector<char>         d_parseBuf;
    int                       d_streamOffset;

    bsl::ifstream             d_stream;
    bsl::streambuf           *d_streamBuf;
    const char *              d_memStream;      // memory buffer to decode from
    size_t                    d_memSize;        // memory buffer size

    char                     *d_startPtr;
    char                     *d_endPtr;
    char                     *d_scanPtr;        // pointer used to traverse the
                                                // input

    char                     *d_markPtr;        // pointer to the previous node
                                                // value

    char                     *d_attrNamePtr;
    char                     *d_attrValPtr;

    int                       d_lineNum;      // current line number

    int                       d_lineOffset;   // offset at the beginning of
                                              // current line

    ErrorInfo                 d_errorInfo;
    XmlResolverFunctor        d_resolver;

    NamespaceRegistry         d_ownNamespaces;
    PrefixStack               d_ownPrefixes;
    PrefixStack              *d_prefixes;

    Node                      d_currentNode;
    size_t                    d_activeNodesCount;  // active nodes count
    ElementVector             d_activeNodes;       // active nodes stack

    bsl::string               d_baseURL;
    bsl::string               d_encoding;
    bsl::string               d_dummyStr;

    unsigned int              d_options;      // option flags for the reader

  private:
    // NOT IMPLEMENTED
    MiniReader(const MiniReader&);             // = delete;
    MiniReader& operator=(const MiniReader&);  // = delete;

    // PRIVATE MANIPULATORS
    Node&       currentNode();
    const Node& currentNode() const;

    int setError(ErrorInfo::Severity error, const bsl::string &msg);

    int setParseError(const char *errText,
                      const char *startFragment,
                      const char *endFragment);

    // HIGH LEVEL PARSING PRIMITIVES

    void  preAdvance();
    const bsl::string& findNamespace(const char *prefix) const;
    const bsl::string& findNamespace(const bsl::string &prefix) const;
    int   checkPrefixes();

    void pushElementName();
        // Push the 'currentNode()'s data onto the 'd_activeNodes' stack.

    int   scanNode();
        // Scan the node at the current position.
    int   updateAttributes();
    int   updateElementInfo();

    int   addAttribute();

    int   scanAttributes();
    int   scanEndElementRaw();
        // Scan an end element without updating the element info.

    int   scanEndElement();
    int   scanExclaimConstruct();
    int   scanOpenTag();
    int   scanProcessingInstruction();
    int   scanStartElement();
    int   scanText();

    StringType searchCommentCDataOrEndElementName(const bsl::string& name);
        // Scan the input for a comment, a CDATA section, the specified element
        // 'name', or the end tag corresponding to 'name'.  Stop at the first
        // instance of either one of those strings and update the internal read
        // pointer (d_scanPtr) to point to the next character after the string
        // read.  Return the string type found.

    StringType searchElementName(const bsl::string& name);
        // Scan the input for the specified element 'name', or the end tag
        // corresponding to 'name'.  Stop at the first instance and update the
        // internal read pointer (d_scanPtr) to point to the next character
        // after the string read.  Return the string type found.  Notice that
        // this method (unlike 'searchCommentCDataOrElementName') does not
        // return 'e_STRINGTYPE_COMMENT' or 'e_STRINGTYPE_CDATA'.

    // LOW LEVEL PARSING PRIMITIVES
    const char *rebasePointer(const char *ptr, const char *newBase);
    void  rebasePointers(const char *newBase, size_t newLength);

    int   readInput();
    int   doOpen(const char *url, const char *encoding);

    int   peekChar();
        // Return the character at the current position, and zero if the end of
        // stream was reached.

    int   readAtLeast(bsl::ptrdiff_t number);
        // Call 'readInput' until there are at least the specified 'number' of
        // characters in the buffer.  Return zero if 'number' characters cannot
        // be read, and return a positive value otherwise.

    int   getChar();
        // Return the character at the current position and then advance the
        // current position.  If the end of stream is reached the return value
        // is zero.  The behavior is undefined if this method is called once
        // the end is reached.

    int   getCharAndSet(char ch);
        // Set the specified symbol 'ch' at the current position.  Return the
        // original character at the current position, and advance the current
        // position.  If the end of stream is reached the return value is zero.
        // The behavior is undefined if this method is called once the end is
        // reached.

    bool  checkForNewLine();
        // Check if the current symbol is NL and adjust line number
        // information.  Return 'true' if it was NL, otherwise 'false'

    int   skipSpaces();
        // Skip spaces and set the current position to first non space
        // character or to end if there is no non space found symbol.  Return
        // the character at the new current position.

    int   scanForSymbol(char symbol);
        // Scan for the specified 'symbol' and set the current position to the
        // found symbol.  Return the character at the new current position.  If
        // the symbol is not found, the current position is set to end and
        // returned value is zero.

    int   scanForSymbolOrSpace(char symbol1, char symbol2);
    int   scanForSymbolOrSpace(char symbol);
        // Scan one of the specified 'symbol', 'symbol1', or 'symbol2'
        // characters or any space character and set the current position to
        // the found symbol.  Return the character at the new current position.
        // If there were no symbols found, the current position is set to end
        // and returned value is zero.

    int   scanForString(const char * str);
        // Scan for the required string and set the current position to the
        // first character of the found string.  Return the character at the
        // new current position.  If there were no symbols found, the current
        // position is set to end and returned value is zero.

    bool skipIfMatch(const char *str);
        // Compare the content of the buffer, starting from the current
        // position, with the specified string 'str'.  If matches, advance the
        // current position by the length of 'str' and return 'true'; otherwise
        // return 'false' and the current position is unmodified.

  public:
    // PUBLIC CREATORS
    virtual ~MiniReader();

    explicit MiniReader(bslma::Allocator *basicAllocator = 0);
    explicit MiniReader(int bufSize, bslma::Allocator *basicAllocator = 0);
        // Construct a reader with the optionally specified 'bufSize'.  The
        // instantiated MiniReader will utilize a memory buffer of 'bufSize'
        // while reading the input document.  Optionally specify a
        // 'basicAllocator' used to supply memory.  If 'basicAllocator' is 0,
        // the currently installed default allocator is used.  Note that
        // 'bufSize' is a hint, which may be modified or ignored if it is not
        // within a "sane" range.

    //------------------------------------------------
    // INTERFACE Reader
    //------------------------------------------------

    // MANIPULATORS - SETUP METHODS
    virtual void setPrefixStack(PrefixStack *prefixes);
        // Set the prefix stack to the stack at the specified 'prefixes'
        // address or disable prefix stack support if 'prefixes' == 0.  This
        // stack is used to push and pop namespace prefixes as the parse
        // progresses, so that, at any point, the stack will reflect the set of
        // active prefixes for the current node.  It is legitimate to pass a
        // stack that already contains prefixes, these prefixes shall be
        // preserved when 'close' is called, i.e., the prefix stack shall be
        // returned to the stack depth it had when 'setPrefixStack' was called.
        // The behavior is undefined if this method is called after calling
        // 'open' and before calling 'close'.

    virtual void setResolver(XmlResolverFunctor resolver);
        // Set the external XML resource resolver to the specified 'resolver'.
        // The XML resource resolver is used by the 'balxml_reader' to find and
        // open an external resources (See the 'XmlResolverFunctor' typedef for
        // more details).  The XML resource resolver remains valid; it is not
        // affected by a call to 'close' and should be available until the
        // reader is destroyed.  The behavior is undefined if this method is
        // called after calling 'open' and before calling 'close'.

    // MANIPULATORS - OPEN/CLOSE AND NAVIGATION METHODS
    virtual int open(const char *filename, const char *encoding = 0);
        // Set up the reader for parsing using the data contained in the XML
        // file described by the specified 'filename', and set the encoding
        // value to the optionally specified 'encoding' ("ASCII", "UTF-8",
        // etc).  Returns 0 on success and non-zero otherwise.  The encoding
        // passed to 'Reader::open' will take effect only when there is no
        // encoding information in the original document, i.e., the encoding
        // information obtained from the XML file described by the 'filename'
        // trumps all.  If there is no encoding provided within the document
        // and 'encoding' is null or a blank string is passed, then set the
        // encoding to the default "UTF-8".  It is an error to 'open' a reader
        // that is already open.  Note that the reader will not be on a valid
        // node until 'advanceToNextNode' is called.

    virtual int open(const char  *buffer,
                     bsl::size_t  size,
                     const char  *url = 0,
                     const char  *encoding = 0);
        // Set up the reader for parsing using the data contained in the
        // specified (XML) 'buffer' of the specified 'size', set the base URL
        // to the optionally specified 'url' and set the encoding value to the
        // optionally specified 'encoding' ("ASCII", "UTF-8", etc).  Return 0
        // on success and non-zero otherwise.  If 'url' is null 0 or a blank
        // string is passed, then base URL will be empty.  The encoding passed
        // to 'Reader::open' will take effect only when there is no encoding
        // information in the original document, i.e., the encoding information
        // obtained from the (XML) 'buffer' trumps all.  If there is no
        // encoding provided within the document and 'encoding' is null or a
        // blank string is passed, then set the encoding to the default
        // "UTF-8".  It is an error to 'open' a reader that is already open.
        // Note that the reader will not be on a valid node until
        // 'advanceToNextNode' is called.

    virtual int open(bsl::streambuf *stream,
                     const char     *url = 0,
                     const char     *encoding = 0);
        // Set up the reader for parsing using the data contained in the
        // specified (XML) 'stream', set the base URL to the optionally
        // specified 'url' and set the encoding value to the optionally
        // specified 'encoding' ("ASCII", "UTF-8", etc).  Return 0 on success
        // and non-zero otherwise.  If 'url' is null or a blank string is
        // passed, then base URL will be empty.  The encoding passed to
        // 'Reader::open' will take effect only when there is no encoding
        // information in the original document, i.e., the encoding information
        // obtained from the (XML) 'stream' trumps all.  If there is no
        // encoding provided within the document and 'encoding' is null or a
        // blank string is passed, then set the encoding to the default
        // "UTF-8".  It is an error to 'open' a reader that is already open.
        // Note that the reader will not be on a valid node until
        // 'advanceToNextNode' is called.

    virtual void close();
        // Close the reader.  Most, but not all state is reset.  Specifically,
        // the XML resource resolver and the prefix stack remain.  The prefix
        // stack shall be returned to the stack depth it had when
        // 'setPrefixStack' was called.  Call the method 'open' to reuse the
        // reader.  Note that 'close' invalidates all strings and data
        // structures obtained via 'Reader' accessors.  E.g., the pointer
        // returned from 'nodeName' for this node will not be valid once
        // 'close' is called.

    virtual int advanceToEndNode();
        // Skip all the sub elements of the current node and position the
        // reader on its corresponding end node.  While skipping ensure that
        // the elements being skipped are well-formed and do not contain any
        // parsing errors.  Return 0 on successful skip, and a negative number
        // otherwise (error).  The behavior is undefined unless
        // 'baexml_Reader::BAEXML_NODE_TYPE_ELEMENT == node.type()'.  Note that
        // each call to 'advanceToEndNode' invalidates strings and data
        // structures returned when 'Reader' accessors were called for the
        // "prior node".  E.g., the pointer returned from 'nodeName' for this
        // node won't be valid once 'advanceToEndNode' is called.  Note that
        // this method leaves the reader pointing to an end node, so calling
        // one of the 'advanceToEndNode' immediately after will not advance the
        // reader further (first call 'advanceToNextNode' before calling the
        // 'advanceToEndNode' function again).

    virtual int advanceToEndNodeRaw();
        // Skip all the sub elements of the current node and position the
        // reader on its corresponding end node, and (unlike
        // 'advanceToNextNode') perform no checks to ensure that the elements
        // being skipped are well-formed and that they do not contain any
        // parsing errors.  Return 0 on successful skip, and a negative number
        // otherwise (error).  The behavior is undefined unless
        // 'baexml_Reader::BAEXML_NODE_TYPE_ELEMENT == node.type()'.  Note that
        // each call to 'advanceToEndNodeRaw' invalidates strings and data
        // structures returned when 'Reader' accessors were called for the
        // "prior node".  E.g., the pointer returned from 'nodeName' for this
        // node will not be valid once 'advanceToEndNodeRaw' is called.  Note
        // that this method leaves the reader pointing to an end node, so
        // calling one of the 'advanceToEndNodeRaw' immediately after will not
        // advance the reader further (first call 'advanceToNextNode' before
        // calling the 'advanceToEndNodeRaw' function again).

    virtual int advanceToEndNodeRawBare();
        // Skip all the sub elements of the current node and position the
        // reader on its corresponding end node, and (unlike
        // 'advanceToNextNode') perform no checks to ensure that the elements
        // being skipped are well-formed and that they do not contain any
        // parsing errors.  Unlike 'advanceToEndNodeRaw' this method does not
        // expect (allow) comments or CDATA nodes in the input XML, in other
        // words it is expecting "bare" XML.  Return 0 on successful skip, and
        // a negative number otherwise (error).  The behavior is undefined
        // unless 'baexml_Reader::BAEXML_NODE_TYPE_ELEMENT == node.type()'.
        // The behavior is also undefined if the input XML contains comment or
        // CDATA nodes.  Note that each call to 'advanceToEndNodeRawBare'
        // invalidates strings and data structures returned when 'Reader'
        // accessors were called for the "prior node".  E.g., the pointer
        // returned from 'nodeName' for this node will not be valid once
        // 'advanceToEndNodeRawBare' is called.  Note that this method leaves
        // the reader pointing to an end node, so calling one of the
        // 'advanceToEndNodeRawBare' immediately after will not advance the
        // reader further (first call 'advanceToNextNode' before calling the
        // 'advanceToEndNodeRawBare' function again).

    virtual int advanceToNextNode();
        // Move to the next node in the data steam created by 'open' thus
        // allowing the node's properties to be queried via the 'Reader'
        // accessors.  Return 0 on successful read, 1 if there are no more
        // nodes to read, and a negative number otherwise.  Note that each call
        // to 'advanceToNextNode' invalidates strings and data structures
        // returned when 'Reader' accessors were called for the "prior node".
        // E.g., the pointer returned from 'nodeName' for this node will not be
        // valid once 'advanceToNextNode' is called.  Note that the reader will
        // not be on a valid node until the first call to 'advanceToNextNode'
        // after the reader is opened.

    virtual int lookupAttribute(ElementAttribute *attribute, int index) const;
        // Find the attribute at the specified 'index' in the current node, and
        // fill in the specified 'attribute' structure.  Return 0 on success, 1
        // if no attribute is found at the 'index', and an a negative value
        // otherwise.  The strings that were filled into the 'attribute'
        // structure are invalid upon the next 'advanceToNextNode' or 'close'
        // is called.

    virtual int lookupAttribute(ElementAttribute *attribute,
                                const char       *qname) const;
        // Find the attribute with the specified 'qname' (qualified name) in
        // the current node, and fill in the specified 'attribute' structure.
        // Return 0 on success, 1 if there is no attribute found with 'qname',
        // and a negative value otherwise.  The strings that were filled into
        // the 'attribute' structure are invalid upon the next
        // 'advanceToNextNode' or 'close' is called.

    virtual int lookupAttribute(ElementAttribute *attribute,
                                const char       *localName,
                                const char       *namespaceUri) const;
        // Find the attribute with the specified 'localName' and specified
        // 'namespaceUri' in the current node, and fill in the specified
        // 'attribute' structure.  Return 0 on success, 1 if there is no
        // attribute found with 'localName' and 'namespaceUri', and a negative
        // value otherwise.  If 'namespaceUri' == 0 or a blank string is
        // passed, then the document's default namespace will be used.  The
        // strings that were filled into the 'attribute' structure are invalid
        // upon the next 'advanceToNextNode' or 'close' is called.

    virtual int lookupAttribute(ElementAttribute *attribute,
                                const char       *localName,
                                int               namespaceId) const;
        // Find the attribute with the specified 'localName' and specified
        // 'namespaceId' in the current node, and fill in the specified
        // 'attribute' structure.  Return 0 on success, 1 if there is no
        // attribute found with 'localName' and 'namespaceId', and a negative
        // value otherwise.  If 'namespaceId' == -1, then the document's
        // default namespace will be used.  The strings that were filled into
        // the 'attribute' structure are invalid upon the next
        // 'advanceToNextNode' or 'close' is called.

    virtual void setOptions(unsigned int flags);
        // Set the options to the flags in the specified 'flags'.  The options
        // for the reader are persistent, i.e., the options are not reset by
        // 'close'.  The behavior is undefined if this method is called after
        // calling 'open' and before calling 'close'.

    // ACCESSORS
    virtual const char *documentEncoding() const;
        // Return the document encoding or NULL on error.  The returned pointer
        // is owned by this object and must not be modified or deallocated by
        // the caller.  The returned pointer becomes invalid when 'close' is
        // called or the reader is destroyed.

    virtual XmlResolverFunctor resolver() const;
        // Return the external XML resource resolver.

    virtual bool isOpen() const;
        // Return true if 'open' was called successfully and 'close' has not
        // yet been called and false otherwise.

    virtual const ErrorInfo& errorInfo() const;
        // Return a reference to the non-modifiable error information for this
        // reader.  The returned value becomes invalid when 'close' is called
        // or the reader is destroyed.

    virtual int getLineNumber() const;
        // Return the current line number within the input stream.  The current
        // line is the last line for which the reader has not yet seen a
        // newline.  Lines are counted starting at one from the time a stream
        // is provide to 'open'.  Return 0 if not available.  Note that a
        // derived-class implementation is not required to count lines and may
        // just return 0.

    virtual int getColumnNumber() const;
        // Return the current column number within the input stream.  The
        // current column number is the number of characters since the last
        // newline was read by the reader plus one, i.e., the first column of
        // each line is column number one.  Return 0 if not available.  Note
        // that a derived-class implementation is not required to count
        // columns and may just return 0.

    virtual PrefixStack *prefixStack() const;
        // Return a pointer to the modifiable prefix stack that is used by this
        // reader to manage namespace prefixes or 0 if namespace support is
        // disabled.  The behavior is undefined if the returned prefix stack is
        // augmented in any way after calling 'open' and before calling
        // 'close'.

    virtual NodeType nodeType() const;
        // Return the node type of the current node if the reader 'isOpen' and
        // has not encounter an error and 'Reader::NONE' otherwise.

    virtual const char *nodeName() const;
        // Return the qualified name of the current node if the current node
        // has a name and NULL otherwise.  The returned pointer is owned by
        // this object and must not be modified or deallocated by the caller.
        // The returned pointer becomes invalid upon the next
        // 'advanceToNextNode', when 'close' is called or the reader is
        // destroyed.

    virtual const char *nodeLocalName() const;
        // Return the local name of the current node if the current node has a
        // local name and NULL otherwise.  The returned pointer is owned by
        // this object and must not be modified or deallocated by the caller.
        // The returned pointer becomes invalid upon the next
        // 'advanceToNextNode', when 'close' is called or the reader is
        // destroyed.

    virtual const char *nodePrefix() const;
        // Return the prefix name of the current node if the correct node has a
        // prefix name and NULL otherwise.  The returned pointer is owned by
        // this object and must not be modified or deallocated by the caller.
        // The returned pointer becomes invalid upon the next
        // 'advanceToNextNode', when 'close' is called or the reader is
        // destroyed.

    virtual int nodeNamespaceId() const;
        // Return the namespace ID of the current node if the current node has
        // a namespace id and a negative number otherwise.

    virtual const char *nodeNamespaceUri() const;
        // Return the namespace URI name of the current node if the current
        // node has a namespace URI and NULL otherwise.  The returned pointer
        // is owned by this object and must not be modified or deallocated by
        // the caller.  The returned pointer becomes invalid upon the next
        // 'advanceToNextNode', when 'close' is called or the reader is
        // destroyed.

    virtual const char *nodeBaseUri() const;
        // Return the base URI name of the current node if the current node has
        // a base URI and NULL otherwise.  The returned pointer is owned by
        // this object and must not be modified or deallocated by the caller.
        // The returned pointer becomes invalid upon the next
        // 'advanceToNextNode', when 'close' is called or the reader is
        // destroyed.

    virtual bool nodeHasValue() const;
        // Return true if the current node has a value and false otherwise.

    virtual const char *nodeValue() const;
        // Return the value of the current node if the current node has a value
        // and NULL otherwise.  The returned pointer is owned by this object
        // and must not be modified or deallocated by the caller.  The returned
        // pointer becomes invalid upon the next 'advanceToNextNode', when
        // 'close' is called or the reader is destroyed.

    virtual int nodeDepth() const;
        // Return the nesting depth of the current node in the XML document.
        // The root node has depth 0.

    virtual int numAttributes() const;
        // Return the number of attributes for the current node if that node
        // has attributes and 0 otherwise.

    virtual bool isEmptyElement() const;
        // Return true if the current node is an element (i.e., node type is
        // 'NODE_TYPE_ELEMENT') that ends with '/>'; and false otherwise.
        // Note that '<a/>' will be considered empty but '<a></a>' will not.

    virtual unsigned int options() const;
        // Return the option flags.

    // ACCESSORS
    // SPECIFIC FOR MiniReader
    int getCurrentPosition() const;
        // Return the current scanner position as offset from the beginning of
        // document.

    int nodeStartPosition() const;
        // Return the byte position within the document corresponding to the
        // first byte of the current node.

    int nodeEndPosition() const;
        // Return the byte position within the document corresponding to the
        // byte following after the last byte of the current node.

};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                              // ----------------
                              // class MiniReader
                              // ----------------

inline
MiniReader::Node& MiniReader::currentNode()
{
    return d_currentNode;
}

inline
const MiniReader::Node& MiniReader::currentNode() const
{
    return d_currentNode;
}

inline
int MiniReader::peekChar()
{
    if (d_scanPtr >= d_endPtr) {
        if (readInput() == 0) {
            return 0;                                                 // RETURN
        }
    }

    return *d_scanPtr;
}

inline
int MiniReader::getChar()
{
    if (d_scanPtr >= d_endPtr) {
        if (readInput() == 0) {
            return 0;                                                 // RETURN
        }
    }
    return *d_scanPtr++;
}

inline
bool MiniReader::checkForNewLine()
{
    if (*d_scanPtr == '\n') {
        ++d_lineNum;
        d_lineOffset = getCurrentPosition() + 1;

        return true;                                                  // RETURN
    }

    return false;
}

inline
int MiniReader::getCharAndSet(char ch)
{
    //checkForNewLine();   // modify line, column

    int rc = peekChar();   // get current char

    if (rc != 0) {
        checkForNewLine();
        *d_scanPtr++ = ch;  // replace, advance position
    }
    return rc;
}

inline
const char *MiniReader::rebasePointer(const char *ptr, const char *newBase)
{
    if (ptr && ptr >= d_markPtr && ptr <= d_endPtr) {
        return newBase + (ptr - d_markPtr);                           // RETURN
    }
    return ptr;
}

inline
int MiniReader::getCurrentPosition() const
{
    return static_cast<int>(d_streamOffset + (d_scanPtr - d_startPtr));
}

inline
int MiniReader::nodeStartPosition() const
{
    return currentNode().d_startPos;
}

inline
int MiniReader::nodeEndPosition() const
{
    return currentNode().d_endPos;
}

}  // close package namespace
}  // close enterprise namespace

#endif  // INCLUDED_BALXML_MINIREADER

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------