// balxml_formatter_compactimpl.h -*-C++-*- #ifndef INCLUDED_BALXML_FORMATTER_COMPACTIMPL #define INCLUDED_BALXML_FORMATTER_COMPACTIMPL #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide a minimal-whitespace implementation for 'balxml_formatter'. // //@CLASSES: // balxml::Formatter_CompactImplState: state of formatter state machine // balxml::Formatter_CompactImplStateId: labels for formatter state // balxml::Formatter_CompactImplUtil: actions of formatter state machine // //@DESCRIPTION: This private, subordinate component to 'balxml_formatter' // provides an in-core value semantic attribute class, // 'balxml::Formatter_CompactImplState', and a utility 'struct', // 'balxml::Formatter_CompactImplUtil', that implements XML printing operations // using the state value type. These two classes work in conjunction to // implement a state machine for printing an XML document having minimal // whitespace, given a sequence of tokens to emit. The class // 'balxml::Formatter_CompactImplStateId' enumerates the set of labels for // distinct states of 'balxml::Formatter_CompactImplState', upon which most // control-flow decisions of 'balxml::Formatter_CompactImplUtil' are // based. #include <balscm_version.h> #include <balxml_encoderoptions.h> #include <balxml_formatterwhitespacetype.h> #include <balxml_typesprintutil.h> #include <bsl_ostream.h> #include <bsl_string_view.h> namespace BloombergLP { namespace balxml { // =================================== // struct Formatter_CompactImplStateId // =================================== struct Formatter_CompactImplStateId { // This 'struct' provides a namespace for enumerating a set of labels for // distinct states of 'Formatter_CompactImplState'. // TYPES enum Enum { e_AT_START, // This state indicates that the current write position of the // formatter is at the start of the document. The formatter is // only allowed to add an XML header when in this state. e_IN_TAG, // This state indicates that the current write position of the // formatter is immediately after the name of an opening tag, or // otherwise immediately after the value of an attribute of an // opening tag. In this state, most token printing operations, // other than adding attributes, need to emit a ">" character to // close the currently open tag before emitting their content. For // example: //.. // 1| <someTag // `---------^ // // * Note that there is no '>' character yet //.. // or: //.. // 1| <someTag attr="value" otherAttr="42" // `-------------------------------------^ //.. e_FIRST_DATA_BETWEEN_TAGS, // This state indicates that the current write position of the // formatter is either 1) after a closing tag, or 2) after a // complete opening tag and before any data for the tag. In this // state, data printing operations do *not* need to emit delimiting // whitespace for their content. Note that comments are not // considered data. For example: //.. // 1| <someTag> // `----------^ //.. // or: //.. // 1| <someTag><!-- comment --> // `--------------------------^ //.. // or: //.. // 1| <someTag></someTag> // `--------------------^ //.. e_TRAILING_DATA_BETWEEN_TAGS // This state indicates that the current write position of the // formatter is after one or more data tokens for the // currently-open tag. In this state, data printing operations // need to emit delimiting whitespace for their content. For // example: //.. // 1| <someTag>data // `--------------^ //.. // or: //.. // 1| <someTag>some list data // `------------------------^ //.. }; }; // ================================ // class Formatter_CompactImplState // ================================ class Formatter_CompactImplState { // This class provides an in-core, value-semantic attribute type that // maintains all of the state information needed to print an XML document // with minimal whitespace using the operations provided by // 'Formatter_CompactImplUtil'. public: // TYPES typedef Formatter_CompactImplStateId Id; private: // DATA Id::Enum d_id; // the canonical "state" in the state machine, upon which most // control-flow decisions are based when printing int d_indentLevel; // the current open XML tag nesting depth int d_initialIndentLevel; // the XML tag nesting depth to use at the start of the document, and // the value to which 'd_indentLevel' is set upon 'reset', which no // printing operations modify int d_spacesPerLevel; // an option that the formatter does not use in compact mode, but // which is accessible to clients in any mode int d_column; // an approximation of the current column number, which does not take // into account the number of characters used to print any data, and // which the formatter does not use in compact mode, but is accessible // to clients in any mode public: // CREATORS Formatter_CompactImplState(); // Create a 'Formatter_CompactImplState' having an 'id' attribute of // 'Id::e_AT_START', and 'indentLevel', 'initialIndentLevel', // 'spacesPerLevel', and 'column' attributes of 0. Formatter_CompactImplState(int indentLevel, int spacesPerLevel); // Create a 'Formatter_CompactImplState' having an 'id' attribute of // 'Id::e_AT_START', the specified 'indentLevel' and 'spacesPerLevel', // as well as an 'initialIndentLevel' attribute equal to 'indentLevel', // and a 'column' attribute of 0. Formatter_CompactImplState(Id::Enum id, int indentLevel, int initialIndentLevel, int spacesPerLevel, int column); // Create a 'Formatter_CompactImplState' having the specified 'id', // 'indentLevel', 'initialIndentLevel', 'spacesPerLevel', and 'column' // attributes. // MANIPULATORS int& column(); // Return a reference providing modifiable access to the 'column' // attribute of this object. Id::Enum& id(); // Return a reference providing modifiable access to the 'id' attribute // of this object. int& indentLevel(); // Return a reference providing modifiable access to the 'indentLevel' // attribute of this object. int& initialIndentLevel(); // Return a reference providing modifiable access to the // 'initialIndentLevel' attribute of this object. int& spacesPerLevel(); // Return a reference providing modifiable access to the // 'spacesPerLevel' attribute of this object. // ACCESSORS const int& column() const; // Return a reference providing non-modifiable access to the 'column' // attribute of this object. const Id::Enum& id() const; // Return a reference providing non-modifiable access to the 'id' // attribute of this object. const int& indentLevel() const; // Return a reference providing non-modifiable access to the // 'indentLevel' attribute of this object. const int& initialIndentLevel() const; // Return a reference providing non-modifiable access to the // 'initialIndentLevel' attribute of this object. const int& spacesPerLevel() const; // Return a reference providing non-modifiable access to the // 'spacesPerLevel' attribute of this object. }; // ================================ // struct Formatter_CompactImplUtil // ================================ struct Formatter_CompactImplUtil { // This utility 'struct' provides a namespace for a suite of operations // used to pretty-print XML documents given a sequence of tokens to emit. // Together with 'Formatter_CompactImplState', this 'struct' provides an // implementation of a state machine for such pretty-printing. // TYPES typedef Formatter_CompactImplState State; typedef Formatter_CompactImplStateId StateId; typedef FormatterWhitespaceType WhitespaceType; private: // PRIVATE CLASS METHODS static void addCommentImpl(bsl::ostream& stream, State *state, const bsl::string_view& comment, const bsl::string_view& openMarker, const bsl::string_view& closeMarker); // Write the specified 'openMarker', 'comment', and 'closeMarker' into // the specified 'stream', with formatting depending on the specified // 'state', and update the 'state' accordingly. Note that if an // element-opening tag is not completed with a '>', this function will // add '>'. public: // CLASS METHODS template <class VALUE_TYPE> static bsl::ostream& addAttribute( bsl::ostream& stream, State *state, const bsl::string_view& name, const VALUE_TYPE& value, int valueFormattingMode = 0, const EncoderOptions& encoderOptions = EncoderOptions()); // Add an attribute of the specified 'name' and specified 'value' to // the currently open element in the specified 'stream', with // formatting depending on the specified 'state', and update the // 'state' accordingly. Return the 'stream'. 'value' can be of the // following types: 'char', 'short', 'int', 'bsls::Types::Int64', // 'float', 'double', 'bsl::string', 'bdlt::Datetime', 'bdlt::Date', // and 'bdlt::Time'. Precede this name="value" pair with a single // space. Wrap line (write the attribute on next line with proper // indentation), if the length of name="value" is too long. Optionally // specify a 'valueFormattingMode' and 'encoderOptions' to control the // formatting of 'value'. If 'value' is of type 'bsl::string', it is // truncated at any invalid UTF-8 byte-sequence or any control // character. The list of invalid control characters includes // characters in the range '[0x00, 0x20)' and '0x7F' (DEL) but does not // include '0x9', '0xA', and '0x0D'. The five special characters: // apostrophe, double quote, ampersand, less than, and greater than are // escaped in the output XML. If 'value' is of type 'char', it is cast // to a signed byte value with a range '[ -128 .. 127 ]'. The // behavior is undefined unless the last manipulator was 'openElement' // or 'addAttribute'. static bsl::ostream& addBlankLine(bsl::ostream& stream, State *state); // Insert one or two newline characters into the specified 'stream' // stream such that a blank line results, depending on the specified // 'state', and update the 'state' accordingly. Return the 'stream'. // If the last output was a newline, then only one newline is added, // otherwise two newlines are added. If following a call to // 'openElement', or 'addAttribute', add a closing '>' to the opened // tag. static bsl::ostream& addComment( bsl::ostream& stream, State *state, const bsl::string_view& comment, bool forceNewline = true); // !DEPRECATED!: Use 'addValidComment' instead. // // Write the specified 'comment' into the specified 'stream', with // formatting depending on the specified 'state', and update the // 'state' accordingly. Return the 'stream'. The optionally specified // 'forceNewline', if true, forces to start a new line solely for the // comment if it's not on a new line already. Otherwise, comments // continue on current line. If an element-opening tag is not // completed with a '>', 'addComment' will add '>'. template <class VALUE_TYPE> static bsl::ostream& addData( bsl::ostream& stream, State *state, const VALUE_TYPE& value, int formattingMode = 0, const EncoderOptions& encoderOptions = EncoderOptions()); // Add the specified 'value' as the data content to the specified // 'stream', with formatting depending on the specified 'state', and // update 'state' accordingly. Return the 'stream'. 'value' can be of // the following types: 'char', 'short', 'int', 'bsls::Types::Int64', // 'float', 'double', 'bsl::string', 'bdlt::Datetime', 'bdlt::Date', // and 'bdlt::Time'. Perform no line-wrapping or indentation as if the // whitespace constraint were always 'BAEXML_PRESERVE_WHITESPACE' in // 'openElement', with the only exception that an initial newline and // an initial indent is added when 'openElement' specifies // 'BAEXML_NEWLINE_INDENT' option. If 'value' is of type // 'bsl::string', it is truncated at any invalid UTF-8 byte-sequence or // any control character. The list of invalid control characters // includes characters in the range '[0x00, 0x20)' and '0x7F' (DEL) but // does not include '0x9', '0xA', and '0x0D'. The five special // characters: apostrophe, double quote, ampersand, less than, and // greater than are escaped in the output XML. If 'value' is of type // 'char', it is cast to a signed byte value with a range of '[ -128 .. // 127 ]'. Optionally specify the 'formattingMode' and // 'encoderOptions' to specify the format used to encode 'value'. The // behavior is undefined if the call is made when there are no opened // elements. template <class TYPE> static bsl::ostream& addElementAndData( bsl::ostream& stream, State *state, const bsl::string_view& name, const TYPE& value, int formattingMode = 0, const EncoderOptions& encoderOptions = EncoderOptions()); // Add element of the specified 'name' and the specified 'value' as the // data content to the specified 'stream', with formatting depending on // the specified 'state' and the optionally specified 'encoderOptions', // and update 'state' accordingly. Return the 'stream'. This has the // same effect as calling the following sequence: 'openElement(name); // addData(value), closeElement(name);'. Optionally specify the // 'formattingMode'. static bsl::ostream& addHeader(bsl::ostream& stream, State *state, const bsl::string_view& encoding); // Add XML header with optionally specified 'encoding' to the specified // 'stream', with formatting depending on the specified 'state', and // update 'state' accordingly. Return the 'stream'. Version is always // "1.0". The behavior is undefined unless 'addHeader' is the first // manipulator (with the exception of 'rawOutputStream') after // construction or 'reset'. template <class VALUE_TYPE> static bsl::ostream& addListData( bsl::ostream& stream, State *state, const VALUE_TYPE& value, int formattingMode = 0, const EncoderOptions& encoderOptions = EncoderOptions()); // Add the specified 'value' as the data content to the specified // 'stream', with formatting depending on the specified 'state', and // update 'state' accordingly. Return the 'stream'. 'value' can be of // the following types: 'char', 'short', 'int', 'bsls::Types::Int64', // 'float', 'double', 'bsl::string', 'bdlt::Datetime', 'bdlt::Date', // and 'bdlt::Time'. Prefix the 'value' with a space('0x20') unless // the data being added is the first data on a line. When adding the // data makes the line too long, perform line-wrapping and indentation // as determined by the whitespace constraint used when the current // element is opened with 'openElement'. If 'value' is of type // 'bsl::string', it is truncated at any invalid UTF-8 byte-sequence or // any control character. The list of invalid control characters // includes characters in the range '[0x00, 0x20)' and '0x7F' (DEL) but // does not include '0x9', '0xA', and '0x0D'. The five special // characters: apostrophe, double quote, ampersand, less than, and // greater than are escaped in the output XML. If 'value' is of type // 'char', it is cast to a signed byte value with a range of '[ -128 .. // 127 ]'. Optionally specify the 'formattingMode' and // 'encoderOptions' to specify the format used to encode 'value'. The // behavior is undefined if the call is made when there are no opened // elements. static bsl::ostream& addNewline(bsl::ostream& stream, State *state); // Insert a literal newline into the XML output of the specified // 'stream', with formatting depending on the specified 'state', and // update 'state' accordingly. Return the 'stream'. If following a // call to 'openElement', or 'addAttribute', add a closing '>' to the // opened tag. static int addValidComment( bsl::ostream& stream, State *state, const bsl::string_view& comment, bool forceNewline = true, bool omitEnclosingWhitespace = false); // Write the specified 'comment' into the specified 'stream', with // formatting depending on the specified 'state', and update the // 'state' accordingly. If the optionally specified 'forceNewline' is // 'true' then a new line is inserted for comments not already on a new // line. Also optionally specify an 'omitEnclosingWhitespace' that // specifies if a space character should be omitted before and after // 'comment'. If 'omitEnclosingWhitespace' is not specified then a // space character is inserted before and after 'comment'. Return 0 on // success, and non-zero value otherwise. Note that a non-zero return // value is returned if either 'comment' contains '--' or if // 'omitEnclosingWhitespace' is 'true' and 'comment' ends with '-'. // Also note that if an element-opening tag is not completed with a // '>', 'addValidComment' will add '>'. static bsl::ostream& closeElement(bsl::ostream& stream, State *state, const bsl::string_view& name); // Decrement the indent level and add the closing tag for the element // of the specified 'name' to the specified 'stream', with formatting // depending on the specified 'state', and update 'state' accordingly. // Return the 'stream'. If the element does not have content, write // '/>' and a newline into stream. Otherwise, write '</name>' and a // newline. If this '</name>' does not share the same line with data, // or it follows another element's closing tag, indent properly before // writing '</name>' and the newline. If 'name' is root element, flush // the output stream. The behavior is undefined if 'name' is not the // most recently opened element that's yet to be closed. static bsl::ostream& flush(bsl::ostream& stream, State *state); // Insert the closing '>' if there is an incomplete tag, and flush the // specified output 'stream', with formatting depending on the // specified 'state', and update 'state' accordingly. Return the // 'stream'. static bsl::ostream& openElement(bsl::ostream& stream, State *state, const bsl::string_view& name, WhitespaceType::Enum whitespaceMode = WhitespaceType::e_PRESERVE_WHITESPACE); // Open an element of the specified 'name' at current indent level with // the optionally specified whitespace constraint 'whitespaceMode' for // its textual data to the specified 'stream', with formatting // depending on the specified 'state', and update 'state' accordingly, // incrementing the indent level. Return the 'stream'. // 'whitespaceMode' constrains how textual data is written with // 'addListData' for the current element, but not its nested elements. // The behavior is undefined if 'openElement' is called after the root // element is closed and there is no subsequent call to 'reset'. static void reset(State *state); // Reset the specified formatter 'state' such that it can be used to // format a new XML document as if the formatter were just constructed. }; // ============================================================================ // INLINE DEFINITIONS // ============================================================================ // -------------------------------- // class Formatter_CompactImplState // -------------------------------- // CREATORS inline Formatter_CompactImplState::Formatter_CompactImplState() : d_id() , d_indentLevel() , d_initialIndentLevel() , d_spacesPerLevel() , d_column() { } inline Formatter_CompactImplState::Formatter_CompactImplState(int indentLevel, int spacesPerLevel) : d_id() , d_indentLevel(indentLevel) , d_initialIndentLevel(indentLevel) , d_spacesPerLevel(spacesPerLevel) , d_column() { } inline Formatter_CompactImplState::Formatter_CompactImplState( Id::Enum id, int indentLevel, int initialIndentLevel, int spacesPerLevel, int column) : d_id(id) , d_indentLevel(indentLevel) , d_initialIndentLevel(initialIndentLevel) , d_spacesPerLevel(spacesPerLevel) , d_column(column) { } // MANIPULATORS inline int& Formatter_CompactImplState::column() { return d_column; } inline Formatter_CompactImplStateId::Enum& Formatter_CompactImplState::id() { return d_id; } inline int& Formatter_CompactImplState::indentLevel() { return d_indentLevel; } inline int& Formatter_CompactImplState::initialIndentLevel() { return d_initialIndentLevel; } inline int& Formatter_CompactImplState::spacesPerLevel() { return d_spacesPerLevel; } // ACCESSORS inline const int& Formatter_CompactImplState::column() const { return d_column; } inline const Formatter_CompactImplStateId::Enum& Formatter_CompactImplState::id() const { return d_id; } inline const int& Formatter_CompactImplState::indentLevel() const { return d_indentLevel; } inline const int& Formatter_CompactImplState::initialIndentLevel() const { return d_initialIndentLevel; } inline const int& Formatter_CompactImplState::spacesPerLevel() const { return d_spacesPerLevel; } // -------------------------------- // struct Formatter_CompactImplUtil // -------------------------------- // CLASS METHODS template <class VALUE_TYPE> bsl::ostream& Formatter_CompactImplUtil::addAttribute( bsl::ostream& stream, State *state, const bsl::string_view& name, const VALUE_TYPE& value, int formattingMode, const EncoderOptions& encoderOptions) { stream << ' ' << name << "=\""; TypesPrintUtil::print(stream, value, formattingMode, &encoderOptions); stream << '"'; // Minimum output if value is empty. state->column() += static_cast<int>(name.length()) + 4; return stream; } template <class VALUE_TYPE> bsl::ostream& Formatter_CompactImplUtil::addData( bsl::ostream& stream, State *state, const VALUE_TYPE& value, int formattingMode, const EncoderOptions& encoderOptions) { // Step 1: Print a sequence of conditional tokens to 'stream' and update // the column number and indentation level of 'state'. if (StateId::e_IN_TAG == state->id()) { stream << '>'; state->column() += 1; } TypesPrintUtil::print(stream, value, formattingMode, &encoderOptions); state->column() += 1; // Step 2: Update the ID of 'state'. state->id() = StateId::e_TRAILING_DATA_BETWEEN_TAGS; return stream; } template <class TYPE> bsl::ostream& Formatter_CompactImplUtil::addElementAndData( bsl::ostream& stream, State *state, const bsl::string_view& name, const TYPE& value, int formattingMode, const EncoderOptions& encoderOptions) { openElement(stream, state, name, WhitespaceType::e_PRESERVE_WHITESPACE); addData(stream, state, value, formattingMode, encoderOptions); closeElement(stream, state, name); return stream; } template <class VALUE_TYPE> bsl::ostream& Formatter_CompactImplUtil::addListData( bsl::ostream& stream, State *state, const VALUE_TYPE& value, int formattingMode, const EncoderOptions& encoderOptions) { // Step 1: Print a sequence of conditional tokens to 'stream' and update // the column number and indentation level of 'state'. if (StateId::e_IN_TAG == state->id()) { stream << '>'; state->column() += 1; } if (StateId::e_FIRST_DATA_BETWEEN_TAGS != state->id() && StateId::e_IN_TAG != state->id()) { stream << ' '; state->column() += 1; } TypesPrintUtil::print(stream, value, formattingMode, &encoderOptions); state->column() += 1; // assume value is not empty // Step 2: Update the ID of 'state'. state->id() = StateId::e_TRAILING_DATA_BETWEEN_TAGS; return stream; } inline void Formatter_CompactImplUtil::reset(State *state) { state->column() = 0; state->id() = StateId::e_AT_START; state->indentLevel() = state->initialIndentLevel(); } } // close package namespace } // close enterprise namespace #endif // INCLUDED_BALXML_FORMATTER_COMPACTIMPL // ---------------------------------------------------------------------------- // Copyright 2021 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ----------------------------- END-OF-FILE ----------------------------------