BDE 4.14.0 Production release
Loading...
Searching...
No Matches
balxml_minireader.h
Go to the documentation of this file.
1/// @file balxml_minireader.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// balxml_minireader.h -*-C++-*-
8#ifndef INCLUDED_BALXML_MINIREADER
9#define INCLUDED_BALXML_MINIREADER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup balxml_minireader balxml_minireader
15/// @brief Provide light-weight implementation of `balxml::Reader` protocol.
16/// @addtogroup bal
17/// @{
18/// @addtogroup balxml
19/// @{
20/// @addtogroup balxml_minireader
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#balxml_minireader-purpose"> Purpose</a>
25/// * <a href="#balxml_minireader-classes"> Classes </a>
26/// * <a href="#balxml_minireader-description"> Description </a>
27/// * <a href="#balxml_minireader-usage"> Usage </a>
28/// * <a href="#balxml_minireader-example-1-basic-usage"> Example 1: Basic Usage </a>
29///
30/// # Purpose {#balxml_minireader-purpose}
31/// Provide light-weight implementation of `balxml::Reader` protocol.
32///
33/// # Classes {#balxml_minireader-classes}
34///
35/// - balxml::MiniReader: light-weight `balxml::Reader` implementation
36///
37/// @see balxml_reader, balxml_errorinfo
38///
39/// # Description {#balxml_minireader-description}
40/// The `balxml::MiniReader` class is a light-weight
41/// implementation of `balxml::Reader` interface. The API acts as a currentNode
42/// going forward on the document stream and stopping at each node in the way.
43/// The current node refers to the node on which the reader is positioned. The
44/// user's code keeps control of the progress and simply calls a `read`
45/// function repeatedly to progress to each node in sequence in document order.
46/// This provides a far more standard, easy to use and powerful API than the
47/// existing SAX.
48///
49/// Data Validation
50/// - - - - - - - -
51/// The `balxml::MiniReader` `class` is not a validating reader
52/// (`balxml::ValidatingReader`). As a result while parsing data it does not
53/// make an attempt to ensure the correctness of either the data or the
54/// structure of the incoming XML. The `class` accepts characters as element
55/// data that the XML standard considers invalid. For example the `&` and `<`
56/// characters in element data will parse without error. Similarly, it does not
57/// return an error if the read data does not conform to its specified schema.
58/// To get stricter data validation, clients should use a concrete
59/// implementation of a validating reader (such as `a_xercesc::Reader`) instead.
60///
61/// ## Usage {#balxml_minireader-usage}
62///
63///
64/// This section illustrates intended use of this component.
65///
66/// ### Example 1: Basic Usage {#balxml_minireader-example-1-basic-usage}
67///
68///
69/// For this example, we will use `balxml::MiniReader` to read each node in an
70/// XML document. We do not care about whitespace, so we use the following
71/// utility function to skip over any whitespace nodes. This makes our example
72/// more portable to other implementations of the `balxml::Reader` protocol that
73/// handle whitespace differently from `balxml::MiniReader`.
74/// @code
75/// int advancePastWhiteSpace(balxml::Reader& reader) {
76/// const char *whiteSpace = "\n\r\t ";
77/// const char *value = '\0';
78/// int type = 0;
79/// int rc = 0;
80///
81/// do {
82/// rc = reader.advanceToNextNode();
83/// value = reader.nodeValue();
84/// type = reader.nodeType();
85/// } while(0 == rc &&
86/// type == balxml::Reader::e_NODE_TYPE_WHITESPACE ||
87/// (type == balxml::Reader::e_NODE_TYPE_TEXT &&
88/// bsl::strlen(value) == bsl::strspn(value, whiteSpace)));
89///
90/// assert( reader.nodeType() !=
91/// balxml::Reader::e_NODE_TYPE_WHITESPACE);
92///
93/// return rc;
94/// }
95/// @endcode
96/// The main program parses an XML string using the TestReader
97/// @code
98/// int main()
99/// {
100/// @endcode
101/// The following string describes xml for a very simple user directory. The
102/// top level element contains one xml namespace attribute, with one embedded
103/// entry describing a user.
104/// @code
105/// const char TEST_XML_STRING[] =
106/// "<?xml version='1.0' encoding='UTF-8'?>\n"
107/// "<directory-entry xmlns:dir="
108/// "'http://bloomberg.com/schemas/directory'>\n"
109/// " <name>John Smith</name>\n"
110/// " <phone dir:phonetype='cell'>212-318-2000</phone>\n"
111/// " <address/>\n"
112/// "</directory-entry>\n";
113/// @endcode
114/// In order to read the XML, we first need to construct a
115/// `balxml::NamespaceRegistry` object, a `balxml::PrefixStack` object, and a
116/// `TestReader` object, where `TestReader` is a derived implementation of
117/// @ref balxml_reader .
118/// @code
119/// balxml::NamespaceRegistry namespaces;
120/// balxml::PrefixStack prefixStack(&namespaces);
121/// balxml::MiniReader miniReader; balxml::Reader& reader = miniReader;
122///
123/// assert(!reader.isOpen());
124/// @endcode
125/// The reader uses a `balxml::PrefixStack` to manage namespace prefixes so we
126/// need to set it before we call open.
127/// @code
128/// reader.setPrefixStack(&prefixStack);
129/// assert(reader.prefixStack());
130/// assert(reader.prefixStack() == &prefixStack);
131/// @endcode
132/// Now we call the `open` method to setup the reader for parsing using the data
133/// contained in the in the XML string.
134/// @code
135/// reader.open(TEST_XML_STRING, sizeof(TEST_XML_STRING) -1, 0, "UTF-8");
136/// @endcode
137/// Confirm that the `bdem::Reader` has opened properly
138/// @code
139/// assert( reader.isOpen());
140/// assert(!bsl::strncmp(reader.documentEncoding(), "UTF-8", 5));
141/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_NONE);
142/// assert(!reader.nodeName());
143/// assert(!reader.nodeHasValue());
144/// assert(!reader.nodeValue());
145/// assert(!reader.nodeDepth());
146/// assert(!reader.numAttributes());
147/// assert(!reader.isEmptyElement());
148/// @endcode
149/// Advance through all the nodes and assert all information contained at each
150/// node is correct.
151///
152/// Assert the next node's document type is xml.
153/// @code
154/// int rc = advancePastWhiteSpace(reader);
155/// assert( 0 == rc);
156/// assert( reader.nodeType() ==
157/// balxml::Reader::e_NODE_TYPE_XML_DECLARATION);
158/// assert(!bsl::strcmp(reader.nodeName(), "xml"));
159/// assert( reader.nodeHasValue());
160/// assert(!bsl::strcmp(reader.nodeValue(),
161/// "version='1.0' encoding='UTF-8'"));
162/// assert( reader.nodeDepth() == 1);
163/// assert(!reader.numAttributes());
164/// assert(!reader.isEmptyElement());
165/// assert( 0 == rc);
166/// assert( reader.nodeDepth() == 1);
167/// @endcode
168/// Advance to the top level element, which has one attribute, the xml
169/// namespace. Assert the namespace information has been added correctly to the
170/// prefix stack.
171/// @code
172/// rc = advancePastWhiteSpace(reader);
173/// assert( 0 == rc);
174/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_ELEMENT);
175/// assert(!bsl::strcmp(reader.nodeName(), "directory-entry"));
176/// assert(!reader.nodeHasValue());
177/// assert( reader.nodeDepth() == 1);
178/// assert( reader.numAttributes() == 1);
179/// assert(!reader.isEmptyElement());
180///
181/// assert(!bsl::strcmp(prefixStack.lookupNamespacePrefix("dir"), "dir"));
182/// assert(prefixStack.lookupNamespaceId("dir") == 0);
183/// assert(!bsl::strcmp(prefixStack.lookupNamespaceUri("dir"),
184/// "http://bloomberg.com/schemas/directory"));
185/// @endcode
186/// The XML being read contains one entry describing a user, advance the users
187/// name name and assert all information can be read correctly.
188/// @code
189/// rc = advancePastWhiteSpace(reader);
190/// assert( 0 == rc);
191/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_ELEMENT);
192/// assert(!bsl::strcmp(reader.nodeName(), "name"));
193/// assert(!reader.nodeHasValue());
194/// assert( reader.nodeDepth() == 2);
195/// assert( reader.numAttributes() == 0);
196/// assert(!reader.isEmptyElement());
197///
198/// rc = reader.advanceToNextNode();
199/// assert( 0 == rc);
200/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_TEXT);
201/// assert( reader.nodeHasValue());
202/// assert(!bsl::strcmp(reader.nodeValue(), "John Smith"));
203/// assert( reader.nodeDepth() == 3);
204/// assert( reader.numAttributes() == 0);
205/// assert(!reader.isEmptyElement());
206///
207/// rc = reader.advanceToNextNode();
208/// assert( 0 == rc);
209/// assert( reader.nodeType() ==
210/// balxml::Reader::e_NODE_TYPE_END_ELEMENT);
211/// assert(!bsl::strcmp(reader.nodeName(), "name"));
212/// assert(!reader.nodeHasValue());
213/// assert( reader.nodeDepth() == 2);
214/// assert( reader.numAttributes() == 0);
215/// assert(!reader.isEmptyElement());
216/// @endcode
217/// Advance to the user's phone number and assert all information can be read
218/// correctly.
219/// @code
220/// rc = advancePastWhiteSpace(reader);
221/// assert( 0 == rc);
222/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_ELEMENT);
223/// assert(!bsl::strcmp(reader.nodeName(), "phone"));
224/// assert(!reader.nodeHasValue());
225/// assert( reader.nodeDepth() == 2);
226/// assert( reader.numAttributes() == 1);
227/// assert(!reader.isEmptyElement());
228/// @endcode
229/// The phone node has one attribute, look it up and assert the
230/// `balxml::ElementAttribute` contains valid information and that the prefix
231/// returns the correct namespace URI from the prefix stack.
232/// @code
233/// balxml::ElementAttribute elemAttr;
234///
235/// rc = reader.lookupAttribute(&elemAttr, 0);
236/// assert( 0 == rc);
237/// assert(!elemAttr.isNull());
238/// assert(!bsl::strcmp(elemAttr.qualifiedName(), "dir:phonetype"));
239/// assert(!bsl::strcmp(elemAttr.value(), "cell"));
240/// assert(!bsl::strcmp(elemAttr.prefix(), "dir"));
241/// assert(!bsl::strcmp(elemAttr.localName(), "phonetype"));
242/// assert(!bsl::strcmp(elemAttr.namespaceUri(),
243/// "http://bloomberg.com/schemas/directory"));
244/// assert( elemAttr.namespaceId() == 0);
245///
246/// assert(!bsl::strcmp(prefixStack.lookupNamespaceUri(elemAttr.prefix()),
247/// elemAttr.namespaceUri()));
248///
249/// rc = advancePastWhiteSpace(reader);
250/// assert( 0 == rc);
251/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_TEXT);
252/// assert( reader.nodeHasValue());
253/// assert(!bsl::strcmp(reader.nodeValue(), "212-318-2000"));
254/// assert( reader.nodeDepth() == 3);
255/// assert( reader.numAttributes() == 0);
256/// assert(!reader.isEmptyElement());
257///
258/// rc = advancePastWhiteSpace(reader);
259/// assert( 0 == rc);
260/// assert( reader.nodeType() ==
261/// balxml::Reader::e_NODE_TYPE_END_ELEMENT);
262/// assert(!bsl::strcmp(reader.nodeName(), "phone"));
263/// assert(!reader.nodeHasValue());
264/// assert( reader.nodeDepth() == 2);
265/// assert( reader.numAttributes() == 0);
266/// assert(!reader.isEmptyElement());
267/// @endcode
268/// Advance to the user's address and assert all information can be read
269/// correctly.
270/// @code
271/// rc = advancePastWhiteSpace(reader);
272/// assert( 0 == rc);
273/// assert( reader.nodeType() == balxml::Reader::e_NODE_TYPE_ELEMENT);
274/// assert(!bsl::strcmp(reader.nodeName(), "address"));
275/// assert(!reader.nodeHasValue());
276/// assert( reader.nodeDepth() == 2);
277/// assert( reader.numAttributes() == 0);
278/// assert( reader.isEmptyElement());
279/// @endcode
280/// Advance to the end element.
281/// @code
282/// rc = advancePastWhiteSpace(reader);
283/// assert( 0 == rc);
284/// assert( reader.nodeType() ==
285/// balxml::Reader::e_NODE_TYPE_END_ELEMENT);
286/// assert(!bsl::strcmp(reader.nodeName(), "directory-entry"));
287/// assert(!reader.nodeHasValue());
288/// assert( reader.nodeDepth() == 1);
289/// assert( reader.numAttributes() == 0);
290/// assert(!reader.isEmptyElement());
291/// @endcode
292/// Close the reader.
293/// @code
294/// reader.close();
295/// assert(!reader.isOpen());
296///
297/// return 0;
298/// }
299/// @endcode
300/// @}
301/** @} */
302/** @} */
303
304/** @addtogroup bal
305 * @{
306 */
307/** @addtogroup balxml
308 * @{
309 */
310/** @addtogroup balxml_minireader
311 * @{
312 */
313
314#include <balscm_version.h>
315
316#include <balxml_reader.h>
319#include <balxml_prefixstack.h>
320
321#include <bslma_allocator.h>
322
323#include <bsls_keyword.h>
324
325#include <bsl_cstring.h>
326#include <bsl_cstddef.h>
327#include <bsl_cstdlib.h>
328#include <bsl_fstream.h>
329#include <bsl_string.h>
330#include <bsl_vector.h>
331
332
333namespace balxml {
334
335 // ================
336 // class MiniReader
337 // ================
338
339/// This `class` provides a concrete and efficient implementation of the
340/// `Reader` protocol.
341///
342/// See @ref balxml_minireader
343class MiniReader : public Reader {
344
345 private:
346 // PRIVATE TYPES
347 enum {
348 k_MIN_BUFSIZE = 1024, // MIN - 1 KB
349 k_MAX_BUFSIZE = 1024 * 128, // MAX - 128 KB
350 k_DEFAULT_BUFSIZE = 1024 * 8, // DEFAULT - 8 KB
351 k_DEFAULT_DEPTH = 20 // Average expected deep
352 }; // to minimize allocations
353
356
357 struct Node;
358 friend struct Node;
359 struct Node {
360 enum {
361 k_NODE_NO_FLAGS = 0x0000,
362 k_NODE_EMPTY = 0x0001
363 };
364
365 NodeType d_type;
366 const char *d_qualifiedName;
367 const char *d_prefix;
368 const char *d_localName;
369 const char *d_value;
370 int d_namespaceId;
371 const char *d_namespaceUri;
372 int d_flags;
373 AttributeVector d_attributes;
374 size_t d_attrCount;
375 size_t d_namespaceCount;
376 int d_startPos;
377 int d_endPos;
378
379 Node(bslma::Allocator *basicAllocator = 0);
380 Node(const Node& other, bslma::Allocator *basicAllocator = 0);
381
382 void reset();
383 void swap(Node& other);
384 void addAttribute(const Attribute& attr);
385 };
386
387 typedef bsl::pair<bsl::string, int> Element;
388
389 typedef bsl::vector<Element> ElementVector;
390
391 enum State {
392 ST_INITIAL, // Initial state after successful open
393 ST_TAG_BEGIN, // Current position - next symbol after '<'
394 ST_TAG_END, // Current position - next symbol after '>'
395 ST_EOF, // End of Data is reached successfully
396 ST_ERROR, // Parser error : prevents from further scanning
397 ST_CLOSED // close method has been called
398 };
399
400 enum Flags {
401 FLG_READ_EOF = 0x0001, // End of input data
402 FLG_ROOT_CLOSED = 0x0002 // Root closed
403 };
404
405 enum StringType {
406 // The return value of 'searchCommentCDataOrElementName', says what
407 // node the function has found.
408
409 e_STRINGTYPE_NONE,
410 e_STRINGTYPE_COMMENT,
411 e_STRINGTYPE_CDATA,
412 e_STRINGTYPE_START_ELEMENT,
413 e_STRINGTYPE_END_ELEMENT
414 };
415
416 private:
417 // PRIVATE DATA
418 bslma::Allocator *d_allocator;
419 State d_state;
420 int d_flags;
421 int d_readSize;
422 bsl::vector<char> d_parseBuf;
423 int d_streamOffset;
424
425 bsl::ifstream d_stream;
426 bsl::streambuf *d_streamBuf;
427 const char * d_memStream; // memory buffer to decode from
428 size_t d_memSize; // memory buffer size
429
430 char *d_startPtr;
431 char *d_endPtr;
432 char *d_scanPtr; // pointer used to traverse the
433 // input
434
435 char *d_markPtr; // pointer to the previous node
436 // value
437
438 char *d_attrNamePtr;
439 char *d_attrValPtr;
440
441 int d_lineNum; // current line number
442
443 int d_lineOffset; // offset at the beginning of
444 // current line
445
446 ErrorInfo d_errorInfo;
447 XmlResolverFunctor d_resolver;
448
449 NamespaceRegistry d_ownNamespaces;
450 PrefixStack d_ownPrefixes;
451 PrefixStack *d_prefixes;
452
453 Node d_currentNode;
454 size_t d_activeNodesCount; // active nodes count
455 ElementVector d_activeNodes; // active nodes stack
456
457 bsl::string d_baseURL;
458 bsl::string d_encoding;
459 bsl::string d_dummyStr;
460
461 unsigned int d_options; // option flags for the reader
462
463 private:
464 // NOT IMPLEMENTED
465 MiniReader(const MiniReader&); // = delete;
466 MiniReader& operator=(const MiniReader&); // = delete;
467
468 // PRIVATE MANIPULATORS
469 Node& currentNode();
470 const Node& currentNode() const;
471
472 int setError(ErrorInfo::Severity error, const bsl::string &msg);
473
474 int setParseError(const char *errText,
475 const char *startFragment,
476 const char *endFragment);
477
478 // HIGH LEVEL PARSING PRIMITIVES
479
480 void preAdvance();
481 const bsl::string& findNamespace(const char *prefix) const;
482 const bsl::string& findNamespace(const bsl::string &prefix) const;
483 int checkPrefixes();
484
485 /// Push the `currentNode()`s data onto the `d_activeNodes` stack.
486 void pushElementName();
487
488 int scanNode();
489 int updateAttributes();
490 int updateElementInfo();
491
492 int addAttribute();
493 /// Scan the node at the current position.
494
495 int scanAttributes();
496
497 int scanEndElementRaw();
498
499 int scanEndElement();
500 int scanExclaimConstruct();
501 int scanOpenTag();
502 int scanProcessingInstruction();
503 int scanStartElement();
504 /// Scan an end element without updating the element info.
505 int scanText();
506
507 /// Scan the input for a comment, a CDATA section, the specified element
508 /// `name`, or the end tag corresponding to `name`. Stop at the first
509 /// instance of either one of those strings and update the internal read
510 /// pointer (d_scanPtr) to point to the next character after the string
511 /// read. Return the string type found.
512 StringType searchCommentCDataOrEndElementName(const bsl::string& name);
513
514 /// Scan the input for the specified element `name`, or the end tag
515 /// corresponding to `name`. Stop at the first instance and update the
516 /// internal read pointer (d_scanPtr) to point to the next character
517 /// after the string read. Return the string type found. Notice that
518 /// this method (unlike `searchCommentCDataOrElementName`) does not
519 /// return `e_STRINGTYPE_COMMENT` or `e_STRINGTYPE_CDATA`.
520 StringType searchElementName(const bsl::string& name);
521
522 // LOW LEVEL PARSING PRIMITIVES
523 const char *rebasePointer(const char *ptr, const char *newBase);
524 void rebasePointers(const char *newBase, size_t newLength);
525
526 int readInput();
527 int doOpen(const char *url, const char *encoding);
528
529 /// Return the character at the current position, and zero if the end of
530 /// stream was reached.
531 int peekChar();
532
533 /// Call `readInput` until there are at least the specified `number` of
534 /// characters in the buffer. Return zero if `number` characters cannot
535 /// be read, and return a positive value otherwise.
536 int readAtLeast(bsl::ptrdiff_t number);
537
538 /// Return the character at the current position and then advance the
539 /// current position. If the end of stream is reached the return value
540 /// is zero. The behavior is undefined if this method is called once
541 /// the end is reached.
542 int getChar();
543
544 /// Set the specified symbol `ch` at the current position. Return the
545 /// original character at the current position, and advance the current
546 /// position. If the end of stream is reached the return value is zero.
547 /// The behavior is undefined if this method is called once the end is
548 /// reached.
549 int getCharAndSet(char ch);
550
551 /// Check if the current symbol is NL and adjust line number
552 /// information. Return `true` if it was NL, otherwise `false`
553 bool checkForNewLine();
554
555 /// Skip spaces and set the current position to first non space
556 /// character or to end if there is no non space found symbol. Return
557 /// the character at the new current position.
558 int skipSpaces();
559
560 /// Scan for the specified `symbol` and set the current position to the
561 /// found symbol. Return the character at the new current position. If
562 /// the symbol is not found, the current position is set to end and
563 /// returned value is zero.
564 int scanForSymbol(char symbol);
565
566 int scanForSymbolOrSpace(char symbol1, char symbol2);
567 /// Scan one of the specified `symbol`, `symbol1`, or `symbol2`
568 /// characters or any space character and set the current position to
569 /// the found symbol. Return the character at the new current position.
570 /// If there were no symbols found, the current position is set to end
571 /// and returned value is zero.
572 int scanForSymbolOrSpace(char symbol);
573
574 /// Scan for the required string and set the current position to the
575 /// first character of the found string. Return the character at the
576 /// new current position. If there were no symbols found, the current
577 /// position is set to end and returned value is zero.
578 int scanForString(const char * str);
579
580 /// Compare the content of the buffer, starting from the current
581 /// position, with the specified string `str`. If matches, advance the
582 /// current position by the length of `str` and return `true`; otherwise
583 /// return `false` and the current position is unmodified.
584 bool skipIfMatch(const char *str);
585
586 public:
587 // PUBLIC CREATORS
589
590 explicit MiniReader(bslma::Allocator *basicAllocator = 0);
591 /// Construct a reader with the optionally specified `bufSize`. The
592 /// instantiated MiniReader will utilize a memory buffer of `bufSize`
593 /// while reading the input document. Optionally specify a
594 /// `basicAllocator` used to supply memory. If `basicAllocator` is 0,
595 /// the currently installed default allocator is used. Note that
596 /// `bufSize` is a hint, which may be modified or ignored if it is not
597 /// within a "sane" range.
598 explicit MiniReader(int bufSize, bslma::Allocator *basicAllocator = 0);
599
600 //------------------------------------------------
601 // INTERFACE Reader
602 //------------------------------------------------
603
604 // MANIPULATORS - SETUP METHODS
605
606 /// Set the prefix stack to the stack at the specified `prefixes`
607 /// address or disable prefix stack support if `prefixes` == 0. This
608 /// stack is used to push and pop namespace prefixes as the parse
609 /// progresses, so that, at any point, the stack will reflect the set of
610 /// active prefixes for the current node. It is legitimate to pass a
611 /// stack that already contains prefixes, these prefixes shall be
612 /// preserved when `close` is called, i.e., the prefix stack shall be
613 /// returned to the stack depth it had when `setPrefixStack` was called.
614 /// The behavior is undefined if this method is called after calling
615 /// `open` and before calling `close`.
617
618 /// Set the external XML resource resolver to the specified `resolver`.
619 /// The XML resource resolver is used by the @ref balxml_reader to find and
620 /// open an external resources (See the `XmlResolverFunctor` typedef for
621 /// more details). The XML resource resolver remains valid; it is not
622 /// affected by a call to `close` and should be available until the
623 /// reader is destroyed. The behavior is undefined if this method is
624 /// called after calling `open` and before calling `close`.
626
627 // MANIPULATORS - OPEN/CLOSE AND NAVIGATION METHODS
628
629 /// Set up the reader for parsing using the data contained in the XML
630 /// file described by the specified `filename`, and set the encoding
631 /// value to the optionally specified `encoding` ("ASCII", "UTF-8",
632 /// etc). Returns 0 on success and non-zero otherwise. The encoding
633 /// passed to `Reader::open` will take effect only when there is no
634 /// encoding information in the original document, i.e., the encoding
635 /// information obtained from the XML file described by the `filename`
636 /// trumps all. If there is no encoding provided within the document
637 /// and `encoding` is null or a blank string is passed, then set the
638 /// encoding to the default "UTF-8". It is an error to `open` a reader
639 /// that is already open. Note that the reader will not be on a valid
640 /// node until `advanceToNextNode` is called.
641 int open(const char *filename,
642 const char *encoding = 0) BSLS_KEYWORD_OVERRIDE;
643
644 /// Set up the reader for parsing using the data contained in the
645 /// specified (XML) `buffer` of the specified `size`, set the base URL
646 /// to the optionally specified `url` and set the encoding value to the
647 /// optionally specified `encoding` ("ASCII", "UTF-8", etc). Return 0
648 /// on success and non-zero otherwise. If `url` is null 0 or a blank
649 /// string is passed, then base URL will be empty. The encoding passed
650 /// to `Reader::open` will take effect only when there is no encoding
651 /// information in the original document, i.e., the encoding information
652 /// obtained from the (XML) `buffer` trumps all. If there is no
653 /// encoding provided within the document and `encoding` is null or a
654 /// blank string is passed, then set the encoding to the default
655 /// "UTF-8". It is an error to `open` a reader that is already open.
656 /// Note that the reader will not be on a valid node until
657 /// `advanceToNextNode` is called.
658 int open(const char *buffer,
659 bsl::size_t size,
660 const char *url = 0,
661 const char *encoding = 0) BSLS_KEYWORD_OVERRIDE;
662
663 /// Set up the reader for parsing using the data contained in the
664 /// specified (XML) `stream`, set the base URL to the optionally
665 /// specified `url` and set the encoding value to the optionally
666 /// specified `encoding` ("ASCII", "UTF-8", etc). Return 0 on success
667 /// and non-zero otherwise. If `url` is null or a blank string is
668 /// passed, then base URL will be empty. The encoding passed to
669 /// `Reader::open` will take effect only when there is no encoding
670 /// information in the original document, i.e., the encoding information
671 /// obtained from the (XML) `stream` trumps all. If there is no
672 /// encoding provided within the document and `encoding` is null or a
673 /// blank string is passed, then set the encoding to the default
674 /// "UTF-8". It is an error to `open` a reader that is already open.
675 /// Note that the reader will not be on a valid node until
676 /// `advanceToNextNode` is called.
677 int open(bsl::streambuf *stream,
678 const char *url = 0,
679 const char *encoding = 0) BSLS_KEYWORD_OVERRIDE;
680
681 /// Close the reader. Most, but not all state is reset. Specifically,
682 /// the XML resource resolver and the prefix stack remain. The prefix
683 /// stack shall be returned to the stack depth it had when
684 /// `setPrefixStack` was called. Call the method `open` to reuse the
685 /// reader. Note that `close` invalidates all strings and data
686 /// structures obtained via `Reader` accessors. E.g., the pointer
687 /// returned from `nodeName` for this node will not be valid once
688 /// `close` is called.
690
691 /// Skip all the sub elements of the current node and position the
692 /// reader on its corresponding end node. While skipping ensure that
693 /// the elements being skipped are well-formed and do not contain any
694 /// parsing errors. Return 0 on successful skip, and a negative number
695 /// otherwise (error). The behavior is undefined unless
696 /// `balxml::Reader::e_NODE_TYPE_ELEMENT == node.type()`. Note that
697 /// each call to `advanceToEndNode` invalidates strings and data
698 /// structures returned when `Reader` accessors were called for the
699 /// "prior node". E.g., the pointer returned from `nodeName` for this
700 /// node won't be valid once `advanceToEndNode` is called. Note that
701 /// this method leaves the reader pointing to an end node, so calling
702 /// one of the `advanceToEndNode` immediately after will not advance the
703 /// reader further (first call `advanceToNextNode` before calling the
704 /// `advanceToEndNode` function again).
705 virtual int advanceToEndNode();
706
707 /// Skip all the sub elements of the current node and position the
708 /// reader on its corresponding end node, and (unlike
709 /// `advanceToNextNode`) perform no checks to ensure that the elements
710 /// being skipped are well-formed and that they do not contain any
711 /// parsing errors. Return 0 on successful skip, and a negative number
712 /// otherwise (error). The behavior is undefined unless
713 /// `balxml::Reader::e_NODE_TYPE_ELEMENT == node.type()`. Note that
714 /// each call to `advanceToEndNodeRaw` invalidates strings and data
715 /// structures returned when `Reader` accessors were called for the
716 /// "prior node". E.g., the pointer returned from `nodeName` for this
717 /// node will not be valid once `advanceToEndNodeRaw` is called. Note
718 /// that this method leaves the reader pointing to an end node, so
719 /// calling one of the `advanceToEndNodeRaw` immediately after will not
720 /// advance the reader further (first call `advanceToNextNode` before
721 /// calling the `advanceToEndNodeRaw` function again).
722 virtual int advanceToEndNodeRaw();
723
724 /// Skip all the sub elements of the current node and position the
725 /// reader on its corresponding end node, and (unlike
726 /// `advanceToNextNode`) perform no checks to ensure that the elements
727 /// being skipped are well-formed and that they do not contain any
728 /// parsing errors. Unlike `advanceToEndNodeRaw` this method does not
729 /// expect (allow) comments or CDATA nodes in the input XML, in other
730 /// words it is expecting "bare" XML. Return 0 on successful skip, and
731 /// a negative number otherwise (error). The behavior is undefined
732 /// unless `balxml::Reader::e_NODE_TYPE_ELEMENT == node.type()`. The
733 /// behavior is also undefined if the input XML contains comment or
734 /// CDATA nodes. Note that each call to `advanceToEndNodeRawBare`
735 /// invalidates strings and data structures returned when `Reader`
736 /// accessors were called for the "prior node". E.g., the pointer
737 /// returned from `nodeName` for this node will not be valid once
738 /// `advanceToEndNodeRawBare` is called. Note that this method leaves
739 /// the reader pointing to an end node, so calling one of the
740 /// `advanceToEndNodeRawBare` immediately after will not advance the
741 /// reader further (first call `advanceToNextNode` before calling the
742 /// `advanceToEndNodeRawBare` function again).
744
745 /// Move to the next node in the data steam created by `open` thus
746 /// allowing the node's properties to be queried via the `Reader`
747 /// accessors. Return 0 on successful read, 1 if there are no more
748 /// nodes to read, and a negative number otherwise. Note that each call
749 /// to `advanceToNextNode` invalidates strings and data structures
750 /// returned when `Reader` accessors were called for the "prior node".
751 /// E.g., the pointer returned from `nodeName` for this node will not be
752 /// valid once `advanceToNextNode` is called. Note that the reader will
753 /// not be on a valid node until the first call to `advanceToNextNode`
754 /// after the reader is opened.
756
757 /// Find the attribute at the specified `index` in the current node, and
758 /// fill in the specified `attribute` structure. Return 0 on success, 1
759 /// if no attribute is found at the `index`, and an a negative value
760 /// otherwise. The strings that were filled into the `attribute`
761 /// structure are invalid upon the next `advanceToNextNode` or `close`
762 /// is called.
764 int index) const BSLS_KEYWORD_OVERRIDE;
765
766 /// Find the attribute with the specified `qname` (qualified name) in
767 /// the current node, and fill in the specified `attribute` structure.
768 /// Return 0 on success, 1 if there is no attribute found with `qname`,
769 /// and a negative value otherwise. The strings that were filled into
770 /// the `attribute` structure are invalid upon the next
771 /// `advanceToNextNode` or `close` is called.
773 const char *qname) const BSLS_KEYWORD_OVERRIDE;
774
775 /// Find the attribute with the specified `localName` and specified
776 /// `namespaceUri` in the current node, and fill in the specified
777 /// `attribute` structure. Return 0 on success, 1 if there is no
778 /// attribute found with `localName` and `namespaceUri`, and a negative
779 /// value otherwise. If `namespaceUri` == 0 or a blank string is
780 /// passed, then the document's default namespace will be used. The
781 /// strings that were filled into the `attribute` structure are invalid
782 /// upon the next `advanceToNextNode` or `close` is called.
784 const char *localName,
785 const char *namespaceUri) const
787
788 /// Find the attribute with the specified `localName` and specified
789 /// `namespaceId` in the current node, and fill in the specified
790 /// `attribute` structure. Return 0 on success, 1 if there is no
791 /// attribute found with `localName` and `namespaceId`, and a negative
792 /// value otherwise. If `namespaceId` == -1, then the document's
793 /// default namespace will be used. The strings that were filled into
794 /// the `attribute` structure are invalid upon the next
795 /// `advanceToNextNode` or `close` is called.
797 const char *localName,
798 int namespaceId) const
800
801 /// Set the options to the flags in the specified `flags`. The options
802 /// for the reader are persistent, i.e., the options are not reset by
803 /// `close`. The behavior is undefined if this method is called after
804 /// calling `open` and before calling `close`.
805 void setOptions(unsigned int flags) BSLS_KEYWORD_OVERRIDE;
806
807 // ACCESSORS
808
809 /// Return the document encoding or NULL on error. The returned pointer
810 /// is owned by this object and must not be modified or deallocated by
811 /// the caller. The returned pointer becomes invalid when `close` is
812 /// called or the reader is destroyed.
814
815 /// Return the external XML resource resolver.
817
818 /// Return true if `open` was called successfully and `close` has not
819 /// yet been called and false otherwise.
821
822 /// Return a reference to the non-modifiable error information for this
823 /// reader. The returned value becomes invalid when `close` is called
824 /// or the reader is destroyed.
826
827 /// Return the current line number within the input stream. The current
828 /// line is the last line for which the reader has not yet seen a
829 /// newline. Lines are counted starting at one from the time a stream
830 /// is provide to `open`. Return 0 if not available. Note that a
831 /// derived-class implementation is not required to count lines and may
832 /// just return 0.
834
835 /// Return the current column number within the input stream. The
836 /// current column number is the number of characters since the last
837 /// newline was read by the reader plus one, i.e., the first column of
838 /// each line is column number one. Return 0 if not available. Note
839 /// that a derived-class implementation is not required to count
840 /// columns and may just return 0.
842
843 /// Return a pointer to the modifiable prefix stack that is used by this
844 /// reader to manage namespace prefixes or 0 if namespace support is
845 /// disabled. The behavior is undefined if the returned prefix stack is
846 /// augmented in any way after calling `open` and before calling
847 /// `close`.
849
850 /// Return the node type of the current node if the reader `isOpen` and
851 /// has not encounter an error and `Reader::NONE` otherwise.
853
854 /// Return the qualified name of the current node if the current node
855 /// has a name and NULL otherwise. The returned pointer is owned by
856 /// this object and must not be modified or deallocated by the caller.
857 /// The returned pointer becomes invalid upon the next
858 /// `advanceToNextNode`, when `close` is called or the reader is
859 /// destroyed.
860 const char *nodeName() const BSLS_KEYWORD_OVERRIDE;
861
862 /// Return the local name of the current node if the current node has a
863 /// local name and NULL otherwise. The returned pointer is owned by
864 /// this object and must not be modified or deallocated by the caller.
865 /// The returned pointer becomes invalid upon the next
866 /// `advanceToNextNode`, when `close` is called or the reader is
867 /// destroyed.
869
870 /// Return the prefix name of the current node if the correct node has a
871 /// prefix name and NULL otherwise. The returned pointer is owned by
872 /// this object and must not be modified or deallocated by the caller.
873 /// The returned pointer becomes invalid upon the next
874 /// `advanceToNextNode`, when `close` is called or the reader is
875 /// destroyed.
877
878 /// Return the namespace ID of the current node if the current node has
879 /// a namespace id and a negative number otherwise.
881
882 /// Return the namespace URI name of the current node if the current
883 /// node has a namespace URI and NULL otherwise. The returned pointer
884 /// is owned by this object and must not be modified or deallocated by
885 /// the caller. The returned pointer becomes invalid upon the next
886 /// `advanceToNextNode`, when `close` is called or the reader is
887 /// destroyed.
889
890 /// Return the base URI name of the current node if the current node has
891 /// a base URI and NULL otherwise. The returned pointer is owned by
892 /// this object and must not be modified or deallocated by the caller.
893 /// The returned pointer becomes invalid upon the next
894 /// `advanceToNextNode`, when `close` is called or the reader is
895 /// destroyed.
897
898 /// Return true if the current node has a value and false otherwise.
900
901 /// Return the value of the current node if the current node has a value
902 /// and NULL otherwise. The returned pointer is owned by this object
903 /// and must not be modified or deallocated by the caller. The returned
904 /// pointer becomes invalid upon the next `advanceToNextNode`, when
905 /// `close` is called or the reader is destroyed.
906 const char *nodeValue() const BSLS_KEYWORD_OVERRIDE;
907
908 /// Return the nesting depth of the current node in the XML document.
909 /// The root node has depth 0.
911
912 /// Return the number of attributes for the current node if that node
913 /// has attributes and 0 otherwise.
915
916 /// Return true if the current node is an element (i.e., node type is
917 /// `NODE_TYPE_ELEMENT`) that ends with `/>`; and false otherwise.
918 /// Note that `<a/>` will be considered empty but `<a></a>` will not.
920
921 /// Return the option flags.
922 unsigned int options() const BSLS_KEYWORD_OVERRIDE;
923
924 // ACCESSORS
925 // SPECIFIC FOR MiniReader
926
927 /// Return the current scanner position as offset from the beginning of
928 /// document.
929 int getCurrentPosition() const;
930
931 /// Return the byte position within the document corresponding to the
932 /// first byte of the current node.
933 int nodeStartPosition() const;
934
935 /// Return the byte position within the document corresponding to the
936 /// byte following after the last byte of the current node.
937 int nodeEndPosition() const;
938
939};
940
941// ============================================================================
942// INLINE DEFINITIONS
943// ============================================================================
944
945 // ----------------
946 // class MiniReader
947 // ----------------
948
949inline
950MiniReader::Node& MiniReader::currentNode()
951{
952 return d_currentNode;
953}
954
955inline
956const MiniReader::Node& MiniReader::currentNode() const
957{
958 return d_currentNode;
959}
960
961inline
962int MiniReader::peekChar()
963{
964 if (d_scanPtr >= d_endPtr) {
965 if (readInput() == 0) {
966 return 0; // RETURN
967 }
968 }
969
970 return *d_scanPtr;
971}
972
973inline
974int MiniReader::getChar()
975{
976 if (d_scanPtr >= d_endPtr) {
977 if (readInput() == 0) {
978 return 0; // RETURN
979 }
980 }
981 return *d_scanPtr++;
982}
983
984inline
985bool MiniReader::checkForNewLine()
986{
987 if (*d_scanPtr == '\n') {
988 ++d_lineNum;
989 d_lineOffset = getCurrentPosition() + 1;
990
991 return true; // RETURN
992 }
993
994 return false;
995}
996
997inline
998int MiniReader::getCharAndSet(char ch)
999{
1000 //checkForNewLine(); // modify line, column
1001
1002 int rc = peekChar(); // get current char
1003
1004 if (rc != 0) {
1005 checkForNewLine();
1006 *d_scanPtr++ = ch; // replace, advance position
1007 }
1008 return rc;
1009}
1010
1011inline
1012const char *MiniReader::rebasePointer(const char *ptr, const char *newBase)
1013{
1014 if (ptr && ptr >= d_markPtr && ptr <= d_endPtr) {
1015 return newBase + (ptr - d_markPtr); // RETURN
1016 }
1017 return ptr;
1018}
1019
1020inline
1022{
1023 return static_cast<int>(d_streamOffset + (d_scanPtr - d_startPtr));
1024}
1025
1026inline
1028{
1029 return currentNode().d_startPos;
1030}
1031
1032inline
1034{
1035 return currentNode().d_endPos;
1036}
1037
1038} // close package namespace
1039
1040
1041#endif // INCLUDED_BALXML_MINIREADER
1042
1043// ----------------------------------------------------------------------------
1044// Copyright 2015 Bloomberg Finance L.P.
1045//
1046// Licensed under the Apache License, Version 2.0 (the "License");
1047// you may not use this file except in compliance with the License.
1048// You may obtain a copy of the License at
1049//
1050// http://www.apache.org/licenses/LICENSE-2.0
1051//
1052// Unless required by applicable law or agreed to in writing, software
1053// distributed under the License is distributed on an "AS IS" BASIS,
1054// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1055// See the License for the specific language governing permissions and
1056// limitations under the License.
1057// ----------------------------- END-OF-FILE ----------------------------------
1058
1059/** @} */
1060/** @} */
1061/** @} */
Definition balxml_elementattribute.h:289
Definition balxml_errorinfo.h:353
Severity
Definition balxml_errorinfo.h:358
Definition balxml_minireader.h:343
~MiniReader() BSLS_KEYWORD_OVERRIDE
int advanceToNextNode() BSLS_KEYWORD_OVERRIDE
bool nodeHasValue() const BSLS_KEYWORD_OVERRIDE
Return true if the current node has a value and false otherwise.
void close() BSLS_KEYWORD_OVERRIDE
int open(const char *filename, const char *encoding=0) BSLS_KEYWORD_OVERRIDE
int getCurrentPosition() const
Definition balxml_minireader.h:1021
const char * nodeValue() const BSLS_KEYWORD_OVERRIDE
unsigned int options() const BSLS_KEYWORD_OVERRIDE
Return the option flags.
const char * documentEncoding() const BSLS_KEYWORD_OVERRIDE
NodeType nodeType() const BSLS_KEYWORD_OVERRIDE
XmlResolverFunctor resolver() const BSLS_KEYWORD_OVERRIDE
Return the external XML resource resolver.
const char * nodePrefix() const BSLS_KEYWORD_OVERRIDE
virtual int advanceToEndNodeRaw()
const char * nodeBaseUri() const BSLS_KEYWORD_OVERRIDE
const char * nodeNamespaceUri() const BSLS_KEYWORD_OVERRIDE
const ErrorInfo & errorInfo() const BSLS_KEYWORD_OVERRIDE
int getColumnNumber() const BSLS_KEYWORD_OVERRIDE
const char * nodeName() const BSLS_KEYWORD_OVERRIDE
friend struct Node
Definition balxml_minireader.h:358
int nodeDepth() const BSLS_KEYWORD_OVERRIDE
int nodeNamespaceId() const BSLS_KEYWORD_OVERRIDE
const char * nodeLocalName() const BSLS_KEYWORD_OVERRIDE
void setResolver(XmlResolverFunctor resolver) BSLS_KEYWORD_OVERRIDE
void setPrefixStack(PrefixStack *prefixes) BSLS_KEYWORD_OVERRIDE
virtual int advanceToEndNode()
int nodeEndPosition() const
Definition balxml_minireader.h:1033
int numAttributes() const BSLS_KEYWORD_OVERRIDE
bool isOpen() const BSLS_KEYWORD_OVERRIDE
int lookupAttribute(ElementAttribute *attribute, int index) const BSLS_KEYWORD_OVERRIDE
virtual int advanceToEndNodeRawBare()
int getLineNumber() const BSLS_KEYWORD_OVERRIDE
int nodeStartPosition() const
Definition balxml_minireader.h:1027
bool isEmptyElement() const BSLS_KEYWORD_OVERRIDE
void setOptions(unsigned int flags) BSLS_KEYWORD_OVERRIDE
PrefixStack * prefixStack() const BSLS_KEYWORD_OVERRIDE
Definition balxml_prefixstack.h:137
Definition balxml_reader.h:835
bsl::function< StreamBufPtr(const char *location, const char *namespaceUri)> XmlResolverFunctor
Definition balxml_reader.h:920
NodeType
Definition balxml_reader.h:839
Definition bslstl_string.h:1281
Forward declaration.
Definition bslstl_function.h:934
Definition bslstl_pair.h:1210
Definition bslstl_vector.h:1025
Definition bslma_allocator.h:457
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
#define BSLS_KEYWORD_OVERRIDE
Definition bsls_keyword.h:653
Definition balxml_base64parser.h:150
Definition bdlb_printmethods.h:283
Definition balxml_encoderoptions.h:68