doxygen/bde_api_prod/bdlde__hexencoder_8h_source.html

/// @file bdlde_hexencoder.h

///

/// The content of this file has been pre-processed for Doxygen.

///


// bdlde_hexencoder.h                                                 -*-C++-*-

#ifndef INCLUDED_BDLDE_HEXENCODER

#define INCLUDED_BDLDE_HEXENCODER


#include <bsls_ident.h>

BSLS_IDENT("$Id: $")


/// @defgroup bdlde_hexencoder bdlde_hexencoder

/// @brief  Provide mechanism for encoding text into hexadecimal.

/// @addtogroup bdl

/// @{

/// @addtogroup bdlde

/// @{

/// @addtogroup bdlde_hexencoder

/// @{

///

/// <h1> Outline </h1>

/// * <a href="#bdlde_hexencoder-purpose"> Purpose</a>

/// * <a href="#bdlde_hexencoder-classes"> Classes </a>

/// * <a href="#bdlde_hexencoder-description"> Description </a>

///   * <a href="#bdlde_hexencoder-hex-encoding"> Hex Encoding </a>

///   * <a href="#bdlde_hexencoder-hex-decoding"> Hex Decoding </a>

///   * <a href="#bdlde_hexencoder-usage"> Usage </a>

///     * <a href="#bdlde_hexencoder-example-1-basic-usage-of-bdlde-hexencoder"> Example 1: Basic Usage of bdlde::HexEncoder </a>

///

/// # Purpose {#bdlde_hexencoder-purpose}

/// Provide mechanism for encoding text into hexadecimal.

///

/// # Classes {#bdlde_hexencoder-classes}

///

/// -  bdlde::HexEncoder: mechanism for encoding text into hexadecimal

///

/// @see  bdlde_hexdecoder

///

/// # Description {#bdlde_hexencoder-description}

/// This component provides a class, `bdlde::HexEncoder`, for

/// encoding plain text into its hexadecimal representation.

///

/// `bdlde::HexEncoder` and `bdlde::HexDecoder` provide a pair of template

/// functions (each parameterized separately on both input and output iterators)

/// that can be used respectively to encode and to decode byte sequences of

/// arbitrary length into and from the printable Hex representation.

///

/// Each instance of either the encoder or decoder retains the state of the

/// conversion from one supplied input to the next, enabling the processing of

/// segmented input -- i.e., processing resumes where it left off with the next

/// invocation on new input.  Instance methods are provided for both the

/// encoder and decoder to (1) assert the end of input, (2) determine whether

/// the input so far is currently acceptable, and (3) indicate whether a

/// non-recoverable error has occurred.

///

/// ## Hex Encoding {#bdlde_hexencoder-hex-encoding}

///

///

/// The data stream is processed one byte at a time from left to right.  Each

/// byte

/// @code

///     7 6 5 4 3 2 1 0

///    +-+-+-+-+-+-+-+-+

///    |               |

///    +-+-+-+-+-+-+-+-+

///     `------v------'

///           Byte

/// @endcode

/// is segmented into two intermediate 4-bit quantities.

/// @code

///     3 2 1 0 3 2 1 0

///    +-+-+-+-+-+-+-+-+

///    |       |       |

///    +-+-+-+-+-+-+-+-+

///     `--v--' `--v--'

///      char0   char1

/// @endcode

/// Each 4-bit quantity is in turn used as an index into the following character

/// table to generate an 8-bit character.

/// @code

///    =================

///    *  Hex Alphabet *

///    -----------------

///    Val Enc  Val Enc

///    --- ---  --- ---

///      0 '0'    8 '8'

///      1 '1'    9 '9'

///      2 '2'   10 'A'

///      3 '3'   11 'B'

///      4 '4'   12 'C'

///      5 '5'   13 'D'

///      6 '6'   14 'E'

///      7 '7'   15 'F'

///    =================

/// @endcode

/// Depending on the settings encoder represents values from 10 to 15 as

/// uppercase (`A`-`F`) or lowercase letters(`a`-`f`).

///

/// Input values of increasing length along with their corresponding Hex

/// encodings are illustrated below:

/// @code

///       Data: /* nothing */

///   Encoding: /* nothing */

///

///       Data: "0"     (0011 0000)

///   Encoding: 30

///

///       Data: "01"    (0011 0000 0011 0001)

///   Encoding: 3031

///

///       Data: "01A"   (0011 0000 0011 0001 1000 0001)

///   Encoding: 303141

///

///       Data: "01A?"  (0011 0000 0011 0001 1000 0001 0011 1111)

///   Encoding: 3031413F

/// @endcode

///

/// ## Hex Decoding {#bdlde_hexencoder-hex-decoding}

///

///

/// The data stream is processed two bytes at a time from left to right.  Each

/// sequence of two 8-bit quantities

/// @code

///     7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0

///    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

///    |               |               |

///    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

///     `------v------' `------v------'

///          Byte0           Byte1

/// @endcode

/// is segmented into four intermediate 4-bit quantities.

/// @code

///     3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0

///    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

///    |       |       |       |       |

///    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

///     `--v--' `--v--' `--v--' `--v--'

///     chunk0   chunk1  chunk2  chunk3

/// @endcode

/// The second and forth chunks are combined to get the resulting 8-bit

/// character.

///

/// Whitespace characters are ignored.  On any non-alphabet character the

/// decoder reports an error.  In order for a Hex encoding to be valid the

/// length of the input data (excluding any whitespace characters) must be a

/// multiple of two.

///

/// Input values of increasing length along with their corresponding Hex

/// encodings are illustrated below (note that the encoded whitespace character

/// is skipped and the resulting string does not contain it):

/// @code

///       Data: /* nothing */

///   Encoding: /* nothing */

///

///       Data: "4"       (0000 0100)

///   Encoding: /* nothing */

///

///       Data: "41"      (0000 0100 0000 0001)

///   Encoding: A

///

///       Data: "412"     (0000 0100 0000 0001 0000 0010)

///   Encoding: A

///

///       Data: "4120"    (0000 0100 0000 0001 0000 0010 0000 0000)

///   Encoding: A

///

///       Data: "41203"   (0000 0100 0000 0001 0000 0010 0000 0000

///                        0000 0011)

///   Encoding: A

///

///       Data: "41203F"  (0011 0000 0011 0001 1000 0001 0010 0011

///                        0000 0011 0000 1111)

///   Encoding: A?

/// @endcode

///

/// ## Usage {#bdlde_hexencoder-usage}

///

///

/// This section illustrates intended use of this component.

///

/// ### Example 1: Basic Usage of bdlde::HexEncoder {#bdlde_hexencoder-example-1-basic-usage-of-bdlde-hexencoder}

///

///

/// The following example shows using a `bdlde::HexEncoder` object to encode

/// bytes into a hexidecimal format. For dependency reasons, a more complete

/// example, showing both encoding and decoding can be found in

/// @ref bdlde_hexdecoder .

///

/// In the example below, we implement a function `streamEncoder`, that reads

/// text from `bsl::istream`, encodes that text into hex representation, and

/// writes the encoded text to a `bsl::ostream`.  `streamEncoder` returns 0 on

/// success and a negative value if the input data could not be successfully

/// encoded or if there is an I/O  error.

/// @code

/// /// Read the entire contents of the specified input stream `is`, convert

/// /// the input plain text to hex representation, and write the encoded

/// /// text to the specified output stream `os`.  Return 0 on success, and

/// /// a negative value otherwise.

/// int streamEncoder(bsl::ostream& os, bsl::istream& is)

/// {

///     enum {

///         SUCCESS      =  0,

///         ENCODE_ERROR = -1,

///         IO_ERROR     = -2

///     };

/// @endcode

/// First we create an object, create buffers for storing data, and start loop

/// that runs while the input stream contains some data:

/// @code

///     bdlde::HexEncoder converter;

///

///     const int INBUFFER_SIZE  = 1 << 10;

///     const int OUTBUFFER_SIZE = 1 << 10;

///

///     char inputBuffer[INBUFFER_SIZE];

///     char outputBuffer[OUTBUFFER_SIZE];

///

///     char *output    = outputBuffer;

///     char *outputEnd = outputBuffer + sizeof outputBuffer;

///

///     while (is.good()) {  // input stream not exhausted

/// @endcode

/// On each iteration we read some data from the input stream:

/// @code

///         is.read(inputBuffer, sizeof inputBuffer);

///

///         const char *input    = inputBuffer;

///         const char *inputEnd = input + is.gcount();

///

///         while (input < inputEnd) { // input encoding not complete

///

///             int numOut;

///             int numIn;

/// @endcode

/// Convert obtained text using `bdlde::HexEncoder`:

/// @code

///             int status = converter.convert(

///                                      output,

///                                      &numOut,

///                                      &numIn,

///                                      input,

///                                      inputEnd,

///                                      static_cast<int>(outputEnd - output));

///             if (status < 0) {

///                 return ENCODE_ERROR;                              // RETURN

///             }

///

///             output += numOut;

///             input  += numIn;

/// @endcode

/// And write encoded text to the output stream:

/// @code

///             if (output == outputEnd) {  // output buffer full; write data

///                 os.write(outputBuffer, sizeof outputBuffer);

///                 if (os.fail()) {

///                     return IO_ERROR;                              // RETURN

///                 }

///                 output = outputBuffer;

///             }

///         }

///     }

///

///     while (1) {

///         int numOut = 0;

/// @endcode

/// Then, we need to store the unhandled symbol (if there is one) to the output

/// buffer and complete the work of our encoder:

/// @code

///         int more = converter.endConvert(

///                                     output,

///                                     &numOut,

///                                     static_cast<int>(outputEnd - output));

///         if (more < 0) {

///             return ENCODE_ERROR;                                  // RETURN

///         }

///

///         output += numOut;

///

///         if (!more) { // no more output

///             break;

///         }

///

///         assert(output == outputEnd);  // output buffer is full

///

///         os.write(outputBuffer, sizeof outputBuffer);  // write buffer

///         if (os.fail()) {

///             return IO_ERROR;                                      // RETURN

///         }

///         output = outputBuffer;

///     }

///

///     if (output > outputBuffer) {

///         os.write(outputBuffer, output - outputBuffer);

///     }

///

///     return is.eof() && os.good() ? SUCCESS : IO_ERROR;

/// }

/// @endcode

/// Next, to demonstrate how our function works we need to create a stream with

/// data to encode.  Assume that we have some character buffer,

/// `BLOOMBERG_NEWS`, and a function, `streamDecoder` mirroring the work of the

/// `streamEncoder`.  Below we should encode this string into a hexidecimal

/// format:

/// @code

/// bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS,

///                                         strlen(BLOOMBERG_NEWS)));

/// bsl::stringstream  outStream;

/// bsl::stringstream  backInStream;

/// @endcode

/// Then, we use our function to encode text:

/// @code

/// assert(0 == streamEncoder(outStream, inStream));

/// @endcode

/// This example does *not* decode the resulting hexidecimal text, for a

/// more complete example, see @ref bdlde_hexdecoder .

/// @}

/** @} */

/** @} */


/** @addtogroup bdl

 * @{

 */

/** @addtogroup bdlde

 * @{

 */

/** @addtogroup bdlde_hexencoder

 * @{

 */


#include <bdlscm_version.h>


#include <bsls_assert.h>


namespace bdlde {


                       // ================

                       // class HexEncoder

                       // ================


/// This class implements a mechanism capable of converting data of

/// arbitrary length to its corresponding Hex representation.

///

/// See @ref bdlde_hexencoder


class HexEncoder {


    // PRIVATE TYPES


    /// Symbolic state values for the encoder

    enum States {

        e_ERROR_STATE   = -1,  // input is irreparably invalid

        e_INPUT_STATE   =  0,  // general input state

        e_DONE_STATE    =  1   // any additional input is error

    };


    // DATA

    int         d_state;          // current state of this object


    char        d_deferred;       // retained output character


    int         d_outputLength;   // total number of output characters


    bool        d_upperCaseFlag;  // flag to indicate if uppercase letters are

                                  // used


    const char *d_encodeTable_p;  // hexadecimal alphabet


    // NOT IMPLEMENTED

    HexEncoder(const HexEncoder&);

    HexEncoder& operator=(const HexEncoder&);


  public:

    // CREATORS


    /// Create a Hex encoder in the initial state.  Optionally specify the

    /// `upperCaseLetters` to indicate if values from 10 to 15 are encoded

    /// as uppercase letters(`A`-`F`) or as lowercase letters(`a`-`f`).

    explicit HexEncoder(bool upperCaseLetters = true);


    /// Destroy this object.

     ~HexEncoder() = default;


    // MANIPULATORS


    /// Append to the buffer addressed by the specified `out` pending

    /// character (if there is such) up to the optionally specified

    /// `maxNumOut` limit (default is negative, meaning no limit).  When

    /// there is no pending output and `maxNumOut` is still not reached,

    /// begin to consume and encode a sequence of input characters starting

    /// at the specified `begin` position, up to but not including the

    /// specified `end` position.  Any resulting output is written to the

    /// `out` buffer up to the (cumulative) `maxNumOut` limit.  If

    /// `maxNumOut` limit is reached, no further input will be consumed.

    /// Load into the (optionally) specified `numOut` and `numIn` the number

    /// of output bytes produced and input bytes consumed, respectively.

    /// Return a non-negative value on success and a negative value

    /// otherwise.  A successful return status indicates the number of

    /// characters that would be output if `endConvert` were called

    /// subsequently with no output limit.  These bytes *may* be available

    /// for output if this method is called with a sufficiently large

    /// `maxNumOut`.  Note that calling this method after `endConvert` has

    /// been invoked without an intervening `reset` call will place this

    /// instance in an error state, and return an error status.  Note also

    /// that it is recommended that after all calls to `convert` are

    /// finished, the `endConvert` method be called to complete the encoding

    /// of any unprocessed input characters.

    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>

    int convert(OUTPUT_ITERATOR out,

                INPUT_ITERATOR  begin,

                INPUT_ITERATOR  end);

    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>

    int convert(OUTPUT_ITERATOR  out,

                int             *numOut,

                int             *numIn,

                INPUT_ITERATOR   begin,

                INPUT_ITERATOR   end,

                int              maxNumOut = -1);


    /// Terminate encoding for this encoder; write any retained output

    /// (e.g., from a previous call to `convert` with a non-zero output

    /// limit argument) to the specified `out` buffer.  Optionally specify

    /// the `maxNumOut` limit on the number of bytes to output; if

    /// `maxNumOut` is negative, no limit is imposed.  Load into the

    /// (optionally) specified `numOut` the number of output bytes produced.

    /// Return a non-negative value on success and a negative value

    /// otherwise.  A successful return status indicates the number of

    /// characters that would be output if `endConvert` were called

    /// subsequently with no output limit.  Any retained bytes are available

    /// on a subsequent call to `endConvert`.  Once this method is called,

    /// no additional input may be supplied without an intervening call to

    /// `reset`; once this method returns a zero status, a subsequent call

    /// will place this encoder in the error state, and return an error

    /// status.

    template <class OUTPUT_ITERATOR>

    int endConvert(OUTPUT_ITERATOR out);

    template <class OUTPUT_ITERATOR>

    int endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut = -1);


    /// Reset this encoder to its initial state (i.e., as if no input had

    /// been consumed).

    void reset();


    // ACCESSORS


    /// Return `true` if the input read so far by this encoder is considered

    /// syntactically complete, and `false` otherwise.

    bool isAcceptable() const;


    /// Return `true` if this encoder is in the done state (i.e.,

    /// `endConvert` has been called and any additional input will result in

    /// an error), and if there is no pending output, and `false` otherwise.

    bool isDone() const;


    /// Return `true` if there is no possibility of achieving an

    /// "acceptable" result, and `false` otherwise.  Note that for an

    /// encoder, no input can cause an error; the possible errors result

    /// either from a call to the `convert` method after the `endConvert`

    /// method is called the first time, or from a call to the `endConvert`

    /// method after the `endConvert` method has returned successfully.

    bool isError() const;


    /// Return `true` if this encoder is in the initial state (i.e., as if

    /// no input had been consumed), and `false` otherwise.

    bool isInitialState() const;


    /// Return `true` if this encoder represents values from 10 to 15 as

    /// uppercase letters(`A`-`F`), and `false` if these values are

    /// represented as lowercase letters(`a`-`f`).

    bool isUpperCase() const;


    /// Return the number of characters that would be output if `endConvert`

    /// were called with no output limit.

    int numOutputPending() const;


    /// Return the total length of the output emitted by this encoder

    /// (possibly after one or more calls to the `convert` or the `input`

    /// methods) since its initial construction or the latest `reset`.

    int outputLength() const;

};


// ============================================================================

//                             INLINE DEFINITIONS

// ============================================================================


// MANIPULATORS

template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>


int HexEncoder::convert(OUTPUT_ITERATOR out,

                        INPUT_ITERATOR  begin,

                        INPUT_ITERATOR  end)

{

    int dummyNumOut;

    int dummyNumIn;


    return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);

}


template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>


int HexEncoder::convert(OUTPUT_ITERATOR  out,

                        int             *numOut,

                        int             *numIn,

                        INPUT_ITERATOR   begin,

                        INPUT_ITERATOR   end,

                        int              maxNumOut)

{

    BSLS_ASSERT(numOut);

    BSLS_ASSERT(numIn);


    if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {

        int rv = e_DONE_STATE == d_state ? -2 : -1;

        d_state = e_ERROR_STATE;

        *numOut = 0;

        *numIn = 0;

        return rv;                                                    // RETURN

    }


    if (0 == maxNumOut) {

        *numOut = 0;

        *numIn = 0;

        return 0;                                                     // RETURN

    }


    int numConsumed = 0;

    int numEmitted = 0;


    // First we need to output pending symbol left over from the previous call.


    if (d_deferred) {

        *out = d_deferred;

        ++out;

        ++numEmitted;

        d_deferred = 0;

    }


    // Then we can handle new input.


    while (begin != end && numEmitted != maxNumOut) {

        if (d_deferred) {

            *out = d_deferred;

            ++out;

            ++numEmitted;

            d_deferred = 0;

            ++begin;

        }

        else {

            const char digit = static_cast<char>(*begin);

            ++numConsumed;


            *out = d_encodeTable_p[(digit >> 4) & 0x0f];

            ++out;

            ++numEmitted;

            d_deferred = d_encodeTable_p[digit & 0x0f];

        }

    }


    *numOut = numEmitted;

    d_outputLength += numEmitted;

    *numIn = numConsumed;

    return d_deferred ? 1 : 0;

}


template <class OUTPUT_ITERATOR>


int HexEncoder::endConvert(OUTPUT_ITERATOR out)

{

    int dummyNumOut;


    return endConvert(out, &dummyNumOut, -1);

}


template <class OUTPUT_ITERATOR>


int HexEncoder::endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut)

{

    BSLS_ASSERT(numOut);


    if (e_ERROR_STATE == d_state) {

        return -1;                                                    // RETURN

    }


    if (e_DONE_STATE == d_state && !d_deferred) {

        d_state = e_ERROR_STATE;

        return -1;                                                    // RETURN

    }


    d_state = e_DONE_STATE;


    if (d_deferred) {

        if (0 == maxNumOut) {

            return 1;                                                 // RETURN

        }

        else {

            *out = d_deferred;

            *numOut = 1;

            d_deferred = 0;

            d_outputLength++;

        }

    }


    return 0;

}


inline


void HexEncoder::reset()

{

    d_state = e_INPUT_STATE;

    d_deferred = 0;

    d_outputLength = 0;

}


// ACCESSORS

inline


bool HexEncoder::isAcceptable() const

{

    return e_INPUT_STATE == d_state && 0 == d_deferred;

}


inline


bool HexEncoder::isDone() const

{

    return e_DONE_STATE == d_state && 0 == d_deferred;

}


inline


bool HexEncoder::isError() const

{

    return e_ERROR_STATE == d_state;

}


inline


bool HexEncoder::isInitialState() const

{

    return e_INPUT_STATE == d_state && 0 == d_outputLength;

}


inline


bool HexEncoder::isUpperCase() const

{

    return d_upperCaseFlag;

}


inline


int HexEncoder::numOutputPending() const

{

    return d_deferred ? 1 : 0;

}


inline


int HexEncoder::outputLength() const

{

    return d_outputLength;

}


}  // close package namespace


#endif


// ----------------------------------------------------------------------------

// Copyright 2022 Bloomberg Finance L.P.

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

// ----------------------------- END-OF-FILE ----------------------------------


/** @} */

/** @} */

/** @} */

bsls_assert.h

bsls_ident.h

bdlde::HexEncoder
Definition bdlde_hexencoder.h:347

bdlde::HexEncoder::isAcceptable
bool isAcceptable() const
Definition bdlde_hexencoder.h:613

bdlde::HexEncoder::outputLength
int outputLength() const
Definition bdlde_hexencoder.h:649

bdlde::HexEncoder::isUpperCase
bool isUpperCase() const
Definition bdlde_hexencoder.h:637

bdlde::HexEncoder::isError
bool isError() const
Definition bdlde_hexencoder.h:625

bdlde::HexEncoder::convert
int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end)
Definition bdlde_hexencoder.h:490

bdlde::HexEncoder::isDone
bool isDone() const
Definition bdlde_hexencoder.h:619

bdlde::HexEncoder::HexEncoder
HexEncoder(bool upperCaseLetters=true)

bdlde::HexEncoder::~HexEncoder
~HexEncoder()=default
Destroy this object.

bdlde::HexEncoder::isInitialState
bool isInitialState() const
Definition bdlde_hexencoder.h:631

bdlde::HexEncoder::reset
void reset()
Definition bdlde_hexencoder.h:604

bdlde::HexEncoder::numOutputPending
int numOutputPending() const
Definition bdlde_hexencoder.h:643

bdlde::HexEncoder::endConvert
int endConvert(OUTPUT_ITERATOR out)
Definition bdlde_hexencoder.h:565

BSLS_ASSERT
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804

BSLS_IDENT
#define BSLS_IDENT(str)
Definition bsls_ident.h:195

bdlde
Definition bdlde_base64alphabet.h:118