// balxml_validatingreader.h -*-C++-*- // ---------------------------------------------------------------------------- // NOTICE // // This component is not up to date with current BDE coding standards, and // should not be used as an example for new development. // ---------------------------------------------------------------------------- #ifndef INCLUDED_BALXML_VALIDATINGREADER #define INCLUDED_BALXML_VALIDATINGREADER #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide a common reader protocol for parsing and validating XML. // //@CLASSES: // balxml::ValidatingReader: reader protocol for parsing and validating XML // //@SEE_ALSO: balxml_reader // //@DESCRIPTION: This component represents an abstract class // 'balxml::ValidatingReader' - an XML reader that provides data validation // against DTD or/and XML Schemas(XSD). The 'balxml::ValidatingReader' // inherits from the 'balxml::Reader' interface and therefore fully compliant // with it. In addition, 'balxml::ValidatingReader' provides additional // methods to control the validation. The 'enableValidation' method specifies // what type of validation the reader should perform. Setting 'validationFlag' // to 'false' produces a non-validating reader. Setting it to 'true' forces // the reader perform the validation of input XML data against XSD schemas. // ///Schema Location and obtaining Schemas ///------------------------------------- // In validating mode the reader should be able obtain external XSD schemas. // 'balxml::ValidatingReader' requires that all schema sources must be // represented in the form of 'bsl::streambuf' objects. According to W3C // standard an information about external XSD schemas can be defined in three // places: // //: o In an instance document, the attribute 'xsi:schemaLocation' provides //: hints from the author to a processor regarding the location of schema //: documents. The 'schemaLocation' attribute value consists of one or more //: pairs of URI references, separated by white space. The first member of //: each pair is a namespace name, and the second member of the pair is a //: hint describing where to find an appropriate schema document for that //: namespace. The presence of these hints does not require the processor to //: obtain or use the cited schema documents, and the processor is free to //: use other schemas obtained by any suitable means. For example, XercesC //: has a property XercesSchemaExternalSchemaLocation, that informs parser //: about available schemas exactly in the same format as the attribute //: 'schemaLocation' in the document instance. // // Example: //.. // <purchaseReport // xmlns="http://www.example.com/Report" // xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" // xsi:schemaLocation="http://www.example.com/Report // http://www.example.com/Report.xsd" // period="P3M" periodEnding="1999-12-31"> //.. // //: o In a schema, the 'include' element has a required 'schemaLocation' //: attribute, and it contains a URI reference which must identify a schema //: document. //: //: o Also in a schema, the import element has optional namespace and //: 'schemaLocation' attributes. If present, the 'schemaLocation' attribute //: is understood in a way which parallels the interpretation of //: 'xsi:schemaLocation' in (1). Specifically, it provides a hint from the //: author to a processor regarding the location of a schema document that //: the author warrants supplies the required components for the namespace //: identified by the namespace attribute. // // For all mentioned cases, having the URI reference which identifies a schema // and an optional namespace, the processor(parser) should obtain // 'bsl::streambuf' object for the schema. For this purpose // 'balxml::ValidatingReader' interface defines the two level schemas // resolution process: // //: 1 The reader(parser) must lookup schema in internal cache. If the schema //: is found, it must be used. //: //: 2 Otherwise reader must use the associated resolver to obtain schema (see //: 'balxml::Reader::XmlResolverFunctor'). // // Both the schema cache and resolver should be setup before the method 'open' // is called. // ///Schema Cache ///------------ // 'balxml::ValidatingReader' provides two abstract methods to maintain the // schema cache: // //: o 'addSchema', add a schema to the cache //: o 'removeSchemas', clear the cache and remove all schemas // ///Thread Safety ///------------- // This component does not provide any functions that present a thread safety // issue, since the 'balxml::Reader' class is abstract and cannot be // instantiated. There is no guarantee that any specific derived class will // provide a thread-safe implementation. // ///Usage ///----- // In this example, we will create a validating parser that parses and // validates document again the schema. //.. // #include <a_xercesc_reader.h> // // #include <balxml_validatingreader.h> // #include <balxml_errorinfo.h> // #include <iostream> // #include <sstream> //.. // The following string describes an XSD schema for the documents we are going // to parse: //.. // const char TEST_XSD_STRING[] = // "<?xml version='1.0' encoding='UTF-8'?>" // "<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'" // " xmlns='http://bloomberg.com/schemas/directory'" // " targetNamespace='http://bloomberg.com/schemas/directory'" // " elementFormDefault='qualified'" // " attributeFormDefault='qualified' >" // " " // "<xsd:complexType name='entryType'>" // " <xsd:sequence>" // " <xsd:element name='name' type='xsd:string'/>" // " <xsd:element name='phone'>" // " <xsd:complexType>" // " <xsd:simpleContent>" // " <xsd:extension base='xsd:string'>" // " <xsd:attribute name='phonetype' type='xsd:string'/>" // " </xsd:extension>" // " </xsd:simpleContent>" // " </xsd:complexType>" // " </xsd:element>" // " <xsd:element name='address' type='xsd:string'/>" // " </xsd:sequence>" // "</xsd:complexType>" // " " // "<xsd:element name='directory-entry' type='entryType'/>" // "</xsd:schema>"; //.. // The following string describes correct XML for a conforming schema. The // top-level element contains one XML namespace attribute, with one embedded // entry describing a user: //.. // const char TEST_GOOD_XML_STRING[] = // "<?xml version='1.0' encoding='UTF-8'?>\n" // "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n" // " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n" // " xsi:schemaLocation='http://bloomberg.com/schemas/directory \n" // " aaa.xsd' >\n" // " <name>John Smith</name>\n" // " <phone dir:phonetype='cell'>212-318-2000</phone>\n" // " <address/>\n" // "</directory-entry>\n"; //.. // The following string describes invalid XML. More specifically, the XML // document is well-formed, but does not conform to our schema: //.. // const char TEST_BAD_XML_STRING[] = // "<?xml version='1.0' encoding='UTF-8'?>\n" // "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n" // " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n" // " xsi:schemaLocation='http://bloomberg.com/schemas/directory \n" // " aaa.xsd' >\n" // " <name>John Smith</name>\n" // " <phone dir:phonetype='cell'>212-318-2000</phone>\n" // "</directory-entry>\n"; //.. // Now we define a 'parse' method for parsing an XML document and validating // against an XSD schema: //.. //int parse(balxml::ValidatingReader *reader, // const char *xmlData, // const char *xsdSchema) //{ //.. // In order to read the XML, we first need to construct a // 'balxml::NamespaceRegistry' object, a 'balxml::PrefixStack' object, and a // 'TestReader' object, where 'TestReader' is a derived implementation of // 'balxml_validatingreader'. //.. // balxml::NamespaceRegistry namespaces; // balxml::PrefixStack prefixStack(&namespaces); // // ASSERT(!reader->isOpen()); //.. // The reader uses a 'balxml::PrefixStack' to manage namespace prefixes so we // need to set it before we call open. //.. // reader->setPrefixStack(&prefixStack); // ASSERT(reader->prefixStack() == &prefixStack); //.. // Setup validation //.. // reader->removeSchemas(); // // reader->enableValidation(true); // ASSERT(reader->validationFlag()); // // bsl::istringstream schemaStream(xsdSchema); // reader->addSchema("aaa.xsd", schemaStream.rdbuf()); //.. // Now we call the 'open' method to setup the reader for parsing using the data // contained in the in the XML string. //.. // int rc = reader->open(xmlData, bsl::strlen(xmlData), 0, "UTF-8"); // ASSERT(rc == 0); //.. // Confirm that the 'bdem::Reader' has opened properly //.. // ASSERT(reader->isOpen()); //.. // Do actual document reading //.. // while(1) { // rc = reader->advanceToNextNode (); // if (rc != 0) { // break; // } //.. // process current node here //.. // } //.. // Cleanup and close the reader. //.. // reader->close(); // ASSERT(!reader->isOpen()); // // reader->setPrefixStack(0); // ASSERT(reader->prefixStack() == 0); // // return rc; //} //.. // The main program parses an XML string using the TestReader //.. //int usageExample() //{ // a_xercesc::Reader reader; // // int rc = parse(&reader, TEST_GOOD_XML_STRING, TEST_XSD_STRING); //.. // Normal end of data //.. // ASSERT(rc==1); // // int rc = parse(&reader, TEST_BAD_XML_STRING, TEST_XSD_STRING); //.. // Parser error - document validation failed //.. // ASSERT(rc==-1); // // return 0; //} //.. #include <balscm_version.h> #include <balxml_reader.h> namespace BloombergLP { namespace balxml { // ====================== // class ValidatingReader // ====================== class ValidatingReader : public Reader { // TBD Class description public: // CREATORS virtual ~ValidatingReader(); // Destroy this object. // MANIPULATORS virtual void enableValidation(bool validationFlag) = 0; // Enable or disable XML validation, if the specified 'validationFlag' // is true and disable it otherwise. A validating reader is often // faster if validation is disabled. This operation does not take // effect until the next call to the 'open' method. virtual int addSchema(const char *location, bsl::streambuf *schema) = 0; // Associate the specified 'location' with the XSD document in the // specified 'schema' stream. Return 0 on success and non-zero on // error. This method may be called more than once to add multiple // location-schema associations to a pre-resolved schema cache. During // XML validation, a reference to a schema with a specific location // will be looked up in the schema cache. Only if this resolution // fails, is the schema resolver used to find the external schema. // (See 'setResolver' in the 'Reader' base class.) The effects of // calling this method when the reader is already open is not specified // and a derived class implementation may treat it as an error. The // behavior is undefined unless 'schema' is a repositionable stream // (i.e., it must be possible to read from the stream, then seek to the // beginning of the stream and read the same bytes again.) virtual void removeSchemas() = 0; // Remove all location-to-schema associations that were added using // 'addSchema'. This method should be called each time after parsing // is finished and the reader would be re-used for parsing other // documents with different schemas. Note that calling 'close' on the // reader *does* *not* remove the schemas. // ACCESSORS virtual bool validationFlag() const = 0; // Return true if the reader has validation turned on false otherwise. }; } // close package namespace // ============================================================================ // INLINE DEFINITIONS // ============================================================================ } // close enterprise namespace #endif // ---------------------------------------------------------------------------- // Copyright 2015 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ----------------------------- END-OF-FILE ----------------------------------