BDE 4.14.0 Production release
Loading...
Searching...
No Matches
balxml_validatingreader.h
Go to the documentation of this file.
1/// @file balxml_validatingreader.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// balxml_validatingreader.h -*-C++-*-
8#ifndef INCLUDED_BALXML_VALIDATINGREADER
9#define INCLUDED_BALXML_VALIDATINGREADER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup balxml_validatingreader balxml_validatingreader
15/// @brief Provide a common reader protocol for parsing and validating XML.
16/// @addtogroup bal
17/// @{
18/// @addtogroup balxml
19/// @{
20/// @addtogroup balxml_validatingreader
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#balxml_validatingreader-purpose"> Purpose</a>
25/// * <a href="#balxml_validatingreader-classes"> Classes </a>
26/// * <a href="#balxml_validatingreader-description"> Description </a>
27/// * <a href="#balxml_validatingreader-schema-location-and-obtaining-schemas"> Schema Location and obtaining Schemas </a>
28/// * <a href="#balxml_validatingreader-schema-cache"> Schema Cache </a>
29/// * <a href="#balxml_validatingreader-thread-safety"> Thread Safety </a>
30/// * <a href="#balxml_validatingreader-usage"> Usage </a>
31/// * <a href="#balxml_validatingreader-example-1-basic-usage"> Example 1: Basic Usage </a>
32///
33/// # Purpose {#balxml_validatingreader-purpose}
34/// Provide a common reader protocol for parsing and validating XML.
35///
36/// # Classes {#balxml_validatingreader-classes}
37///
38/// - balxml::ValidatingReader: reader protocol for parsing and validating XML
39///
40/// @see balxml_reader
41///
42/// # Description {#balxml_validatingreader-description}
43/// This component represents an abstract class
44/// `balxml::ValidatingReader` - an XML reader that provides data validation
45/// against DTD or/and XML Schemas(XSD). The `balxml::ValidatingReader`
46/// inherits from the `balxml::Reader` interface and therefore fully compliant
47/// with it. In addition, `balxml::ValidatingReader` provides additional
48/// methods to control the validation. The `enableValidation` method specifies
49/// what type of validation the reader should perform. Setting `validationFlag`
50/// to `false` produces a non-validating reader. Setting it to `true` forces
51/// the reader perform the validation of input XML data against XSD schemas.
52///
53/// ## Schema Location and obtaining Schemas {#balxml_validatingreader-schema-location-and-obtaining-schemas}
54///
55///
56/// In validating mode the reader should be able obtain external XSD schemas.
57/// `balxml::ValidatingReader` requires that all schema sources must be
58/// represented in the form of `bsl::streambuf` objects. According to W3C
59/// standard an information about external XSD schemas can be defined in three
60/// places:
61///
62/// * In an instance document, the attribute `xsi:schemaLocation` provides
63/// hints from the author to a processor regarding the location of schema
64/// documents. The `schemaLocation` attribute value consists of one or more
65/// pairs of URI references, separated by white space. The first member of
66/// each pair is a namespace name, and the second member of the pair is a
67/// hint describing where to find an appropriate schema document for that
68/// namespace. The presence of these hints does not require the processor to
69/// obtain or use the cited schema documents, and the processor is free to
70/// use other schemas obtained by any suitable means. For example, XercesC
71/// has a property XercesSchemaExternalSchemaLocation, that informs parser
72/// about available schemas exactly in the same format as the attribute
73/// `schemaLocation` in the document instance.
74///
75/// Example:
76/// @code
77/// <purchaseReport
78/// xmlns="http://www.example.com/Report"
79/// xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
80/// xsi:schemaLocation="http://www.example.com/Report
81/// http://www.example.com/Report.xsd"
82/// period="P3M" periodEnding="1999-12-31">
83/// @endcode
84///
85/// * In a schema, the `include` element has a required `schemaLocation`
86/// attribute, and it contains a URI reference which must identify a schema
87/// document.
88/// * Also in a schema, the import element has optional namespace and
89/// `schemaLocation` attributes. If present, the `schemaLocation` attribute
90/// is understood in a way which parallels the interpretation of
91/// `xsi:schemaLocation` in (1). Specifically, it provides a hint from the
92/// author to a processor regarding the location of a schema document that
93/// the author warrants supplies the required components for the namespace
94/// identified by the namespace attribute.
95///
96/// For all mentioned cases, having the URI reference which identifies a schema
97/// and an optional namespace, the processor(parser) should obtain
98/// `bsl::streambuf` object for the schema. For this purpose
99/// `balxml::ValidatingReader` interface defines the two level schemas
100/// resolution process:
101///
102/// 1. The reader(parser) must lookup schema in internal cache. If the schema
103/// is found, it must be used.
104/// 2. Otherwise reader must use the associated resolver to obtain schema (see
105/// `balxml::Reader::XmlResolverFunctor`).
106///
107/// Both the schema cache and resolver should be setup before the method `open`
108/// is called.
109///
110/// ## Schema Cache {#balxml_validatingreader-schema-cache}
111///
112///
113/// `balxml::ValidatingReader` provides two abstract methods to maintain the
114/// schema cache:
115///
116/// * `addSchema`, add a schema to the cache
117/// * `removeSchemas`, clear the cache and remove all schemas
118///
119/// ## Thread Safety {#balxml_validatingreader-thread-safety}
120///
121///
122/// This component does not provide any functions that present a thread safety
123/// issue, since the `balxml::Reader` class is abstract and cannot be
124/// instantiated. There is no guarantee that any specific derived class will
125/// provide a thread-safe implementation.
126///
127/// ## Usage {#balxml_validatingreader-usage}
128///
129///
130/// This section illustrates intended use of this component.
131///
132/// ### Example 1: Basic Usage {#balxml_validatingreader-example-1-basic-usage}
133///
134///
135/// In this example, we will create a validating parser that parses and
136/// validates document again the schema.
137/// @code
138/// #include <a_xercesc_reader.h>
139///
140/// #include <balxml_validatingreader.h>
141/// #include <balxml_errorinfo.h>
142/// #include <iostream>
143/// #include <sstream>
144/// @endcode
145/// The following string describes an XSD schema for the documents we are going
146/// to parse:
147/// @code
148/// const char TEST_XSD_STRING[] =
149/// "<?xml version='1.0' encoding='UTF-8'?>"
150/// "<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'"
151/// " xmlns='http://bloomberg.com/schemas/directory'"
152/// " targetNamespace='http://bloomberg.com/schemas/directory'"
153/// " elementFormDefault='qualified'"
154/// " attributeFormDefault='qualified' >"
155/// " "
156/// "<xsd:complexType name='entryType'>"
157/// " <xsd:sequence>"
158/// " <xsd:element name='name' type='xsd:string'/>"
159/// " <xsd:element name='phone'>"
160/// " <xsd:complexType>"
161/// " <xsd:simpleContent>"
162/// " <xsd:extension base='xsd:string'>"
163/// " <xsd:attribute name='phonetype' type='xsd:string'/>"
164/// " </xsd:extension>"
165/// " </xsd:simpleContent>"
166/// " </xsd:complexType>"
167/// " </xsd:element>"
168/// " <xsd:element name='address' type='xsd:string'/>"
169/// " </xsd:sequence>"
170/// "</xsd:complexType>"
171/// " "
172/// "<xsd:element name='directory-entry' type='entryType'/>"
173/// "</xsd:schema>";
174/// @endcode
175/// The following string describes correct XML for a conforming schema. The
176/// top-level element contains one XML namespace attribute, with one embedded
177/// entry describing a user:
178/// @code
179/// const char TEST_GOOD_XML_STRING[] =
180/// "<?xml version='1.0' encoding='UTF-8'?>\n"
181/// "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n"
182/// " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"
183/// " xsi:schemaLocation='http://bloomberg.com/schemas/directory \n"
184/// " aaa.xsd' >\n"
185/// " <name>John Smith</name>\n"
186/// " <phone dir:phonetype='cell'>212-318-2000</phone>\n"
187/// " <address/>\n"
188/// "</directory-entry>\n";
189/// @endcode
190/// The following string describes invalid XML. More specifically, the XML
191/// document is well-formed, but does not conform to our schema:
192/// @code
193/// const char TEST_BAD_XML_STRING[] =
194/// "<?xml version='1.0' encoding='UTF-8'?>\n"
195/// "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n"
196/// " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"
197/// " xsi:schemaLocation='http://bloomberg.com/schemas/directory \n"
198/// " aaa.xsd' >\n"
199/// " <name>John Smith</name>\n"
200/// " <phone dir:phonetype='cell'>212-318-2000</phone>\n"
201/// "</directory-entry>\n";
202/// @endcode
203/// Now we define a `parse` method for parsing an XML document and validating
204/// against an XSD schema:
205/// @code
206/// int parse(balxml::ValidatingReader *reader,
207/// const char *xmlData,
208/// const char *xsdSchema)
209/// {
210/// @endcode
211/// In order to read the XML, we first need to construct a
212/// `balxml::NamespaceRegistry` object, a `balxml::PrefixStack` object, and a
213/// `TestReader` object, where `TestReader` is a derived implementation of
214/// @ref balxml_validatingreader .
215/// @code
216/// balxml::NamespaceRegistry namespaces;
217/// balxml::PrefixStack prefixStack(&namespaces);
218///
219/// ASSERT(!reader->isOpen());
220/// @endcode
221/// The reader uses a `balxml::PrefixStack` to manage namespace prefixes so we
222/// need to set it before we call open.
223/// @code
224/// reader->setPrefixStack(&prefixStack);
225/// ASSERT(reader->prefixStack() == &prefixStack);
226/// @endcode
227/// Setup validation
228/// @code
229/// reader->removeSchemas();
230///
231/// reader->enableValidation(true);
232/// ASSERT(reader->validationFlag());
233///
234/// bsl::istringstream schemaStream(xsdSchema);
235/// reader->addSchema("aaa.xsd", schemaStream.rdbuf());
236/// @endcode
237/// Now we call the `open` method to setup the reader for parsing using the data
238/// contained in the in the XML string.
239/// @code
240/// int rc = reader->open(xmlData, bsl::strlen(xmlData), 0, "UTF-8");
241/// ASSERT(rc == 0);
242/// @endcode
243/// Confirm that the `bdem::Reader` has opened properly
244/// @code
245/// ASSERT(reader->isOpen());
246/// @endcode
247/// Do actual document reading
248/// @code
249/// while(1) {
250/// rc = reader->advanceToNextNode ();
251/// if (rc != 0) {
252/// break;
253/// }
254/// @endcode
255/// process current node here
256/// @code
257/// }
258/// @endcode
259/// Cleanup and close the reader.
260/// @code
261/// reader->close();
262/// ASSERT(!reader->isOpen());
263///
264/// reader->setPrefixStack(0);
265/// ASSERT(reader->prefixStack() == 0);
266///
267/// return rc;
268/// }
269/// @endcode
270/// The main program parses an XML string using the TestReader
271/// @code
272/// int usageExample()
273/// {
274/// a_xercesc::Reader reader;
275///
276/// int rc = parse(&reader, TEST_GOOD_XML_STRING, TEST_XSD_STRING);
277/// @endcode
278/// Normal end of data
279/// @code
280/// ASSERT(rc==1);
281///
282/// int rc = parse(&reader, TEST_BAD_XML_STRING, TEST_XSD_STRING);
283/// @endcode
284/// Parser error - document validation failed
285/// @code
286/// ASSERT(rc==-1);
287///
288/// return 0;
289/// }
290/// @endcode
291/// @}
292/** @} */
293/** @} */
294
295/** @addtogroup bal
296 * @{
297 */
298/** @addtogroup balxml
299 * @{
300 */
301/** @addtogroup balxml_validatingreader
302 * @{
303 */
304
305#include <balscm_version.h>
306
307#include <balxml_reader.h>
308
309#include <bsls_keyword.h>
310
311
312
313namespace balxml {
314 // ======================
315 // class ValidatingReader
316 // ======================
317
318/// TBD Class description
319///
320/// See @ref balxml_validatingreader
321class ValidatingReader : public Reader {
322
323 public:
324 // CREATORS
325
326 /// Destroy this object.
328
329 // MANIPULATORS
330
331 /// Enable or disable XML validation, if the specified `validationFlag`
332 /// is true and disable it otherwise. A validating reader is often
333 /// faster if validation is disabled. This operation does not take
334 /// effect until the next call to the `open` method.
335 virtual void enableValidation(bool validationFlag) = 0;
336
337 /// Associate the specified `location` with the XSD document in the
338 /// specified `schema` stream. Return 0 on success and non-zero on
339 /// error. This method may be called more than once to add multiple
340 /// location-schema associations to a pre-resolved schema cache. During
341 /// XML validation, a reference to a schema with a specific location
342 /// will be looked up in the schema cache. Only if this resolution
343 /// fails, is the schema resolver used to find the external schema.
344 /// (See `setResolver` in the `Reader` base class.) The effects of
345 /// calling this method when the reader is already open is not specified
346 /// and a derived class implementation may treat it as an error. The
347 /// behavior is undefined unless `schema` is a repositionable stream
348 /// (i.e., it must be possible to read from the stream, then seek to the
349 /// beginning of the stream and read the same bytes again.)
350 virtual int addSchema(const char *location, bsl::streambuf *schema) = 0;
351
352 /// Remove all location-to-schema associations that were added using
353 /// `addSchema`. This method should be called each time after parsing
354 /// is finished and the reader would be re-used for parsing other
355 /// documents with different schemas. Note that calling `close` on the
356 /// reader *does* *not* remove the schemas.
357 virtual void removeSchemas() = 0;
358
359 // ACCESSORS
360
361 /// Return true if the reader has validation turned on false otherwise.
362 virtual bool validationFlag() const = 0;
363};
364
365} // close package namespace
366
367
368#endif
369
370// ----------------------------------------------------------------------------
371// Copyright 2015 Bloomberg Finance L.P.
372//
373// Licensed under the Apache License, Version 2.0 (the "License");
374// you may not use this file except in compliance with the License.
375// You may obtain a copy of the License at
376//
377// http://www.apache.org/licenses/LICENSE-2.0
378//
379// Unless required by applicable law or agreed to in writing, software
380// distributed under the License is distributed on an "AS IS" BASIS,
381// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
382// See the License for the specific language governing permissions and
383// limitations under the License.
384// ----------------------------- END-OF-FILE ----------------------------------
385
386/** @} */
387/** @} */
388/** @} */
Definition balxml_reader.h:835
Definition balxml_validatingreader.h:321
virtual void removeSchemas()=0
~ValidatingReader() BSLS_KEYWORD_OVERRIDE
Destroy this object.
virtual int addSchema(const char *location, bsl::streambuf *schema)=0
virtual void enableValidation(bool validationFlag)=0
virtual bool validationFlag() const =0
Return true if the reader has validation turned on false otherwise.
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
#define BSLS_KEYWORD_OVERRIDE
Definition bsls_keyword.h:653
Definition balxml_base64parser.h:150
Definition bdlb_printmethods.h:283