// bdlde_hexencoder.h -*-C++-*- #ifndef INCLUDED_BDLDE_HEXENCODER #define INCLUDED_BDLDE_HEXENCODER #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide automata converting to hex encodings. // //@CLASSES: // bdlde::HexEncoder: automaton for Quoted-Printable encoding // //@SEE_ALSO: bdlde_hexdecoder // //@DESCRIPTION: This component provides a class, 'bdlde::HexEncoder', for // encoding plain text into its hexadecimal representation. // // 'bdlde::HexEncoder' and 'bdlde::HexDecoder' provide a pair of template // functions (each parameterized separately on both input and output iterators) // that can be used respectively to encode and to decode byte sequences of // arbitrary length into and from the printable Hex representation. // // Each instance of either the encoder or decoder retains the state of the // conversion from one supplied input to the next, enabling the processing of // segmented input -- i.e., processing resumes where it left off with the next // invocation on new input. Instance methods are provided for both the // encoder and decoder to (1) assert the end of input, (2) determine whether // the input so far is currently acceptable, and (3) indicate whether a // non-recoverable error has occurred. // ///Hex Encoding ///------------ // The data stream is processed one byte at a time from left to right. Each // byte //.. // 7 6 5 4 3 2 1 0 // +-+-+-+-+-+-+-+-+ // | | // +-+-+-+-+-+-+-+-+ // `------v------' // Byte //.. // is segmented into two intermediate 4-bit quantities. //.. // 3 2 1 0 3 2 1 0 // +-+-+-+-+-+-+-+-+ // | | | // +-+-+-+-+-+-+-+-+ // `--v--' `--v--' // char0 char1 //.. // Each 4-bit quantity is in turn used as an index into the following character // table to generate an 8-bit character. //.. // ================= // * Hex Alphabet * // ----------------- // Val Enc Val Enc // --- --- --- --- // 0 '0' 8 '8' // 1 '1' 9 '9' // 2 '2' 10 'A' // 3 '3' 11 'B' // 4 '4' 12 'C' // 5 '5' 13 'D' // 6 '6' 14 'E' // 7 '7' 15 'F' // ================= //.. // Depending on the settings encoder represents values from 10 to 15 as // uppercase ('A'-'F') or lowercase letters('a'-'f'). // // Input values of increasing length along with their corresponding Hex // encodings are illustrated below: //.. // Data: /* nothing */ // Encoding: /* nothing */ // // Data: "0" (0011 0000) // Encoding: 30 // // Data: "01" (0011 0000 0011 0001) // Encoding: 3031 // // Data: "01A" (0011 0000 0011 0001 1000 0001) // Encoding: 303141 // // Data: "01A?" (0011 0000 0011 0001 1000 0001 0011 1111) // Encoding: 3031413F //.. // ///Hex Decoding ///------------ // The data stream is processed two bytes at a time from left to right. Each // sequence of two 8-bit quantities //.. // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // `------v------' `------v------' // Byte0 Byte1 //.. // is segmented into four intermediate 4-bit quantities. //.. // 3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // `--v--' `--v--' `--v--' `--v--' // chunk0 chunk1 chunk2 chunk3 //.. // The second and forth chunks are combined to get the resulting 8-bit // character. // // Whitespace characters are ignored. On any non-alphabet character the // decoder reports an error. In order for a Hex encoding to be valid the // length of the input data (excluding any whitespace characters) must be a // multiple of two. // // Input values of increasing length along with their corresponding Hex // encodings are illustrated below (note that the encoded whitespace character // is skipped and the resulting string does not contain it): //.. // Data: /* nothing */ // Encoding: /* nothing */ // // Data: "4" (0000 0100) // Encoding: /* nothing */ // // Data: "41" (0000 0100 0000 0001) // Encoding: A // // Data: "412" (0000 0100 0000 0001 0000 0010) // Encoding: A // // Data: "4120" (0000 0100 0000 0001 0000 0010 0000 0000) // Encoding: A // // Data: "41203" (0000 0100 0000 0001 0000 0010 0000 0000 // 0000 0011) // Encoding: A // // Data: "41203F" (0011 0000 0011 0001 1000 0001 0010 0011 // 0000 0011 0000 1111) // Encoding: A? //.. // ///Usage // This section illustrates intended use of this component. // ///Example 1: Basic Usage of 'bdlde::HexEncoder' ///- - - - - - - - - - - - - - - - - - - - - - - // The following example shows how to use a 'bdlde::HexEncoder' object to // implement a function, 'streamEncoder', that reads text from 'bsl::istream', // encodes that text into hex representation , and writes the encoded text to a // 'bsl::ostream'. 'streamEncoder' returns 0 on success and a negative value // if the input data could not be successfully encoded or if there is an I/O // error. //.. // int streamEncoder(bsl::ostream& os, bsl::istream& is) // // Read the entire contents of the specified input stream 'is', convert // // the input plain text to hex representation, and write the encoded // // text to the specified output stream 'os'. Return 0 on success, and // // a negative value otherwise. // { // enum { // SUCCESS = 0, // ENCODE_ERROR = -1, // IO_ERROR = -2 // }; //.. // First we create an object, create buffers for storing data, and start loop // that runs while the input stream contains some data: //.. // bdlde::HexEncoder converter; // // const int INBUFFER_SIZE = 1 << 10; // const int OUTBUFFER_SIZE = 1 << 10; // // char inputBuffer[INBUFFER_SIZE]; // char outputBuffer[OUTBUFFER_SIZE]; // // char *output = outputBuffer; // char *outputEnd = outputBuffer + sizeof outputBuffer; // // while (is.good()) { // input stream not exhausted //.. // On each iteration we read some data from the input stream: //.. // is.read(inputBuffer, sizeof inputBuffer); // // const char *input = inputBuffer; // const char *inputEnd = input + is.gcount(); // // while (input < inputEnd) { // input encoding not complete // // int numOut; // int numIn; //.. // Convert obtained text using 'bdlde::HexEncoder': //.. // int status = converter.convert( // output, // &numOut, // &numIn, // input, // inputEnd, // static_cast<int>(outputEnd - output)); // if (status < 0) { // return ENCODE_ERROR; // RETURN // } // // output += numOut; // input += numIn; //.. // And write encoded text to the output stream: //.. // if (output == outputEnd) { // output buffer full; write data // os.write(outputBuffer, sizeof outputBuffer); // if (os.fail()) { // return IO_ERROR; // RETURN // } // output = outputBuffer; // } // } // } // // while (1) { // int numOut = 0; //.. // Then, we need to store the unhandled symbol (if there is one) to the output // buffer and complete the work of our encoder: //.. // int more = converter.endConvert( // output, // &numOut, // static_cast<int>(outputEnd - output)); // if (more < 0) { // return ENCODE_ERROR; // RETURN // } // // output += numOut; // // if (!more) { // no more output // break; // } // // assert(output == outputEnd); // output buffer is full // // os.write(outputBuffer, sizeof outputBuffer); // write buffer // if (os.fail()) { // return IO_ERROR; // RETURN // } // output = outputBuffer; // } // // if (output > outputBuffer) { // os.write(outputBuffer, output - outputBuffer); // } // // return is.eof() && os.good() ? SUCCESS : IO_ERROR; // } //.. // Next, to demonstrate how our function works we need to create a stream with // data to encode. Assume that we have some character buffer, // 'BLOOMBERG_NEWS', and a function, 'streamDecoder' mirroring the work of the // 'streamEncoder': //.. // bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS, // sizeof(BLOOMBERG_NEWS))); // bsl::stringstream outStream; // bsl::stringstream backInStream; //.. // Then, we use our function to encode text: //.. // assert(0 == streamEncoder(outStream, inStream)); //.. // Now, we decode this text back using mirror function: //.. // assert(0 == streamDecoder(backInStream, outStream)); //.. // Finally, we observe that the output fully matches the original text: //.. // assert(0 == strcmp(BLOOMBERG_NEWS, backInStream.str().c_str())); //.. #include <bdlscm_version.h> #include <bsls_assert.h> namespace BloombergLP { namespace bdlde { // ================ // class HexEncoder // ================ class HexEncoder { // This class implements a mechanism capable of converting data of // arbitrary length to its corresponding Hex representation. // PRIVATE TYPES enum States { // Symbolic state values for the encoder e_ERROR_STATE = -1, // input is irreparably invalid e_INPUT_STATE = 0, // general input state e_DONE_STATE = 1 // any additional input is error }; // DATA int d_state; // current state of this object char d_deferred; // retained output character int d_outputLength; // total number of output characters bool d_upperCaseFlag; // flag to indicate if uppercase letters are // used const char *d_encodeTable_p; // hexadecimal alphabet // NOT IMPLEMENTED HexEncoder(const HexEncoder&); HexEncoder& operator=(const HexEncoder&); public: // CREATORS explicit HexEncoder(bool upperCaseLetters = true); // Create a Hex encoder in the initial state. Optionally specify the // 'upperCaseLetters' to indicate if values from 10 to 15 are encoded // as uppercase letters('A'-'F') or as lowercase letters('a'-'f'). // ~HexEncoder() = default; // Destroy this object. // MANIPULATORS template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end); template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int convert(OUTPUT_ITERATOR out, int *numOut, int *numIn, INPUT_ITERATOR begin, INPUT_ITERATOR end, int maxNumOut = -1); // Append to the buffer addressed by the specified 'out' pending // character (if there is such) up to the optionally specified // 'maxNumOut' limit (default is negative, meaning no limit). When // there is no pending output and 'maxNumOut' is still not reached, // begin to consume and encode a sequence of input characters starting // at the specified 'begin' position, up to but not including the // specified 'end' position. Any resulting output is written to the // 'out' buffer up to the (cumulative) 'maxNumOut' limit. If // 'maxNumOut' limit is reached, no further input will be consumed. // Load into the (optionally) specified 'numOut' and 'numIn' the number // of output bytes produced and input bytes consumed, respectively. // Return a non-negative value on success and a negative value // otherwise. A successful return status indicates the number of // characters that would be output if 'endConvert' were called // subsequently with no output limit. These bytes *may* be available // for output if this method is called with a sufficiently large // 'maxNumOut'. Note that calling this method after 'endConvert' has // been invoked without an intervening 'reset' call will place this // instance in an error state, and return an error status. Note also // that it is recommended that after all calls to 'convert' are // finished, the 'endConvert' method be called to complete the encoding // of any unprocessed input characters. template <class OUTPUT_ITERATOR> int endConvert(OUTPUT_ITERATOR out); template <class OUTPUT_ITERATOR> int endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut = -1); // Terminate encoding for this encoder; write any retained output // (e.g., from a previous call to 'convert' with a non-zero output // limit argument) to the specified 'out' buffer. Optionally specify // the 'maxNumOut' limit on the number of bytes to output; if // 'maxNumOut' is negative, no limit is imposed. Load into the // (optionally) specified 'numOut' the number of output bytes produced. // Return a non-negative value on success and a negative value // otherwise. A successful return status indicates the number of // characters that would be output if 'endConvert' were called // subsequently with no output limit. Any retained bytes are available // on a subsequent call to 'endConvert'. Once this method is called, // no additional input may be supplied without an intervening call to // 'reset'; once this method returns a zero status, a subsequent call // will place this encoder in the error state, and return an error // status. void reset(); // Reset this encoder to its initial state (i.e., as if no input had // been consumed). // ACCESSORS bool isAcceptable() const; // Return 'true' if the input read so far by this encoder is considered // syntactically complete, and 'false' otherwise. bool isDone() const; // Return 'true' if this encoder is in the done state (i.e., // 'endConvert' has been called and any additional input will result in // an error), and if there is no pending output, and 'false' otherwise. bool isError() const; // Return 'true' if there is no possibility of achieving an // "acceptable" result, and 'false' otherwise. Note that for an // encoder, no input can cause an error; the possible errors result // either from a call to the 'convert' method after the 'endConvert' // method is called the first time, or from a call to the 'endConvert' // method after the 'endConvert' method has returned successfully. bool isInitialState() const; // Return 'true' if this encoder is in the initial state (i.e., as if // no input had been consumed), and 'false' otherwise. bool isUpperCase() const; // Return 'true' if this encoder represents values from 10 to 15 as // uppercase letters('A'-'F'), and 'false' if these values are // represented as lowercase letters('a'-'f'). int numOutputPending() const; // Return the number of characters that would be output if 'endConvert' // were called with no output limit. int outputLength() const; // Return the total length of the output emitted by this encoder // (possibly after one or more calls to the 'convert' or the 'input' // methods) since its initial construction or the latest 'reset'. }; // ============================================================================ // INLINE DEFINITIONS // ============================================================================ // MANIPULATORS template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int HexEncoder::convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end) { int dummyNumOut; int dummyNumIn; return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1); } template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int HexEncoder::convert(OUTPUT_ITERATOR out, int *numOut, int *numIn, INPUT_ITERATOR begin, INPUT_ITERATOR end, int maxNumOut) { BSLS_ASSERT(numOut); BSLS_ASSERT(numIn); if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) { int rv = e_DONE_STATE == d_state ? -2 : -1; d_state = e_ERROR_STATE; *numOut = 0; *numIn = 0; return rv; // RETURN } if (0 == maxNumOut) { *numOut = 0; *numIn = 0; return 0; // RETURN } int numConsumed = 0; int numEmitted = 0; // First we need to output pending symbol left over from the previous call. if (d_deferred) { *out = d_deferred; ++out; ++numEmitted; d_deferred = 0; } // Then we can handle new input. while (begin != end && numEmitted != maxNumOut) { if (d_deferred) { *out = d_deferred; ++out; ++numEmitted; d_deferred = 0; ++begin; } else { const char digit = static_cast<char>(*begin); ++numConsumed; *out = d_encodeTable_p[(digit >> 4) & 0x0f]; ++out; ++numEmitted; d_deferred = d_encodeTable_p[digit & 0x0f]; } } *numOut = numEmitted; d_outputLength += numEmitted; *numIn = numConsumed; return d_deferred ? 1 : 0; } template <class OUTPUT_ITERATOR> int HexEncoder::endConvert(OUTPUT_ITERATOR out) { int dummyNumOut; return endConvert(out, &dummyNumOut, -1); } template <class OUTPUT_ITERATOR> int HexEncoder::endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut) { BSLS_ASSERT(numOut); if (e_ERROR_STATE == d_state) { return -1; // RETURN } if (e_DONE_STATE == d_state && !d_deferred) { d_state = e_ERROR_STATE; return -1; // RETURN } d_state = e_DONE_STATE; if (d_deferred) { if (0 == maxNumOut) { return 1; // RETURN } else { *out = d_deferred; *numOut = 1; d_deferred = 0; d_outputLength++; } } return 0; } inline void HexEncoder::reset() { d_state = e_INPUT_STATE; d_deferred = 0; d_outputLength = 0; } // ACCESSORS inline bool HexEncoder::isAcceptable() const { return e_INPUT_STATE == d_state && 0 == d_deferred; } inline bool HexEncoder::isDone() const { return e_DONE_STATE == d_state && 0 == d_deferred; } inline bool HexEncoder::isError() const { return e_ERROR_STATE == d_state; } inline bool HexEncoder::isInitialState() const { return e_INPUT_STATE == d_state && 0 == d_outputLength; } inline bool HexEncoder::isUpperCase() const { return d_upperCaseFlag; } inline int HexEncoder::numOutputPending() const { return d_deferred ? 1 : 0; } inline int HexEncoder::outputLength() const { return d_outputLength; } } // close package namespace } // close enterprise namespace #endif // ---------------------------------------------------------------------------- // Copyright 2022 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ----------------------------- END-OF-FILE ----------------------------------