// bdlde_hexdecoder.h -*-C++-*- #ifndef INCLUDED_BDLDE_HEXDECODER #define INCLUDED_BDLDE_HEXDECODER #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide automata converting from hex encodings. // //@CLASSES: // bdlde::HexDecoder: automata for hex decoding // //@SEE_ALSO: bdlde_hexencoder // //@DESCRIPTION: This component provides a class, 'bdlde::HexDecoder', for // decoding hexadecimal representation into plain text. // // 'bdlde::HexEncoder' and 'bdlde::HexDecoder' provide a pair of template // functions (each parameterized separately on both input and output iterators) // that can be used respectively to encode and to decode byte sequences of // arbitrary length into and from the printable Hex representation. // // Each instance of either the encoder or decoder retains the state of the // conversion from one supplied input to the next, enabling the processing of // segmented input -- i.e., processing resumes where it left off with the next // invocation on new input. Instance methods are provided for both the // encoder and decoder to (1) assert the end of input, (2) determine whether // the input so far is currently acceptable, and (3) indicate whether a // non-recoverable error has occurred. ///Hex Encoding ///------------ // The data stream is processed one byte at a time from left to right. Each // byte //.. // 7 6 5 4 3 2 1 0 // +-+-+-+-+-+-+-+-+ // | | // +-+-+-+-+-+-+-+-+ // `------v------' // Byte //.. // is segmented into two intermediate 4-bit quantities. //.. // 3 2 1 0 3 2 1 0 // +-+-+-+-+-+-+-+-+ // | | | // +-+-+-+-+-+-+-+-+ // `--v--' `--v--' // char0 char1 //.. // Each 4-bit quantity is in turn used as an index into the following character // table to generate an 8-bit character. //.. // ================= // * Hex Alphabet * // ----------------- // Val Enc Val Enc // --- --- --- --- // 0 '0' 8 '8' // 1 '1' 9 '9' // 2 '2' 10 'A' // 3 '3' 11 'B' // 4 '4' 12 'C' // 5 '5' 13 'D' // 6 '6' 14 'E' // 7 '7' 15 'F' // ================= //.. // Depending on the settings encoder represents values from 10 to 15 as // uppercase ('A'-'F') or lowercase letters('a'-'f'). // // Input values of increasing length along with their corresponding Hex // encodings are illustrated below: //.. // Data: /* nothing */ // Encoding: /* nothing */ // // Data: "0" (0011 0000) // Encoding: 30 // // Data: "01" (0011 0000 0011 0001) // Encoding: 3031 // // Data: "01A" (0011 0000 0011 0001 1000 0001) // Encoding: 303141 // // Data: "01A?" (0011 0000 0011 0001 1000 0001 0011 1111) // Encoding: 3031413F //.. // ///Hex Decoding ///------------ // The data stream is processed two bytes at a time from left to right. Each // sequence of two 8-bit quantities //.. // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // `------v------' `------v------' // Byte0 Byte1 //.. // is segmented into four intermediate 4-bit quantities. //.. // 3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // `--v--' `--v--' `--v--' `--v--' // chunk0 chunk1 chunk2 chunk3 //.. // The second and forth chunks are combined to get the resulting 8-bit // character. // // Whitespace characters are ignored. On any non-alphabet character the // decoder reports an error. In order for a Hex encoding to be valid the // length of the input data (excluding any whitespace characters) must be a // multiple of two. // // Input values of increasing length along with their corresponding Hex // encodings are illustrated below (note that the encoded whitespace character // is skipped and the resulting string does not contain it): //.. // Data: /* nothing */ // Encoding: /* nothing */ // // Data: "4" (0000 0100) // Encoding: /* nothing */ // // Data: "41" (0000 0100 0000 0001) // Encoding: A // // Data: "412" (0000 0100 0000 0001 0000 0010) // Encoding: A // // Data: "4120" (0000 0100 0000 0001 0000 0010 0000 0000) // Encoding: A // // Data: "41203" (0000 0100 0000 0001 0000 0010 0000 0000 // 0000 0011) // Encoding: A // // Data: "41203F" (0011 0000 0011 0001 1000 0001 0010 0011 // 0000 0011 0000 1111) // Encoding: A? //.. // ///Usage ///----- // This section illustrates intended use of this component. // ///Example 1: Basic Usage of 'bdlde::HexDecoder' ///- - - - - - - - - - - - - - - - - - - - - - - // The following example shows how to use a 'bdlde::HexDecoder' object to // implement a function, 'streamDecoder', that reads hex representation from // 'bsl::istream', decodes that text, and writes the decoded text to a // 'bsl::ostream'. 'streamDecoder' returns 0 on success and a negative value // if the input data could not be successfully decoded or if there is an I/O // error. //.. // int streamDecoder(bsl::ostream& os, bsl::istream& is) // // Read the entire contents of the specified input stream 'is', convert // // the input hex encoding into plain text, and write the decoded text // // to the specified output stream 'os'. Return 0 on success, and a // // negative value otherwise. // { // enum { // SUCCESS = 0, // DECODE_ERROR = -1, // IO_ERROR = -2 // }; //.. // First we create an object, create buffers for storing data, and start loop // that runs while the input stream contains some data: //.. // bdlde::HexDecoder converter; // // const int INBUFFER_SIZE = 1 << 10; // const int OUTBUFFER_SIZE = 1 << 10; // // char inputBuffer[INBUFFER_SIZE]; // char outputBuffer[OUTBUFFER_SIZE]; // // char *output = outputBuffer; // char *outputEnd = outputBuffer + sizeof outputBuffer; // // while (is.good()) { // input stream not exhausted //.. // On each iteration we read some data from the input stream: //.. // is.read(inputBuffer, sizeof inputBuffer); // // const char *input = inputBuffer; // const char *inputEnd = input + is.gcount(); // // while (input < inputEnd) { // input encoding not complete // // int numOut = 0; // int numIn = 0; //.. // Convert obtained text using 'bdlde::HexDecoder': //.. // int status = converter.convert( // output, // &numOut, // &numIn, // input, // inputEnd, // static_cast<int>(outputEnd - output)); // if (status < 0) { // return DECODE_ERROR; // RETURN // } // // output += numOut; // input += numIn; //.. // And write decoded text to the output stream: //.. // if (output == outputEnd) { // output buffer full; write data // os.write(outputBuffer, sizeof outputBuffer); // if (os.fail()) { // return IO_ERROR; // RETURN // } // output = outputBuffer; // } // } // } // // if (output > outputBuffer) { // os.write (outputBuffer, output - outputBuffer); // } //.. // Then we need to complete the work of our decoder: //.. // int more = converter.endConvert(); // if (more < 0) { // return DECODE_ERROR; // RETURN // } // // return is.eof() && os.good() ? SUCCESS : IO_ERROR; // } //.. // Next, to demonstrate how our function works we need to create a stream with // encoded data. Assume that we have some character buffer, 'BLOOMBERG_NEWS', // and a function, 'streamEncoder' mirroring the work of the 'streamDecoder': //.. // bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS, // sizeof(BLOOMBERG_NEWS))); // bsl::stringstream outStream; // bsl::stringstream backInStream; // // assert(0 == streamEncoder(outStream, inStream)); //.. // Now, we use our function to decode text: //.. // assert(0 == streamDecoder(backInStream, outStream)); //.. // Finally, we observe that the output fully matches the original text: //.. // assert(0 == strcmp(BLOOMBERG_NEWS, backInStream.str().c_str())); //.. #include <bdlscm_version.h> #include <bsls_assert.h> #include <bsl_iterator.h> namespace BloombergLP { namespace bdlde { class HexDecoder { // This class implements a mechanism capable of converting data of // arbitrary length from its corresponding Hex representation. // PRIVATE TYPES enum States { // Symbolic state values for the decoder. e_ERROR_STATE = -1, // input is irreparably invalid e_INPUT_STATE = 0, // general input state e_DONE_STATE = 1 // any additional input is an error }; // DATA int d_state; // current state of this object char d_firstDigit; // first (left) hex digit to decode int d_outputLength; // total number of output characters const char *d_decodeTable_p; // character code table // PRIVATE CLASS METHODS static bool isSpace(char character); // Return 'true' if the specified 'character' is whitespace (i.e., // space, tab, CR, NL, VT, or FF), and 'false' otherwise. static bool isXdigit(char character); // Return 'true' if the specified 'character' is a hex digit, and // 'false' otherwise. // NOT IMPLEMENTED HexDecoder(const HexDecoder&); HexDecoder& operator=(const HexDecoder&); public: // CREATORS HexDecoder(); // Create a Hex decoder in the initial state. // ~HexDecoder() = default; // Destroy this object. // MANIPULATORS template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end); template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int convert(OUTPUT_ITERATOR out, int *numOut, int *numIn, INPUT_ITERATOR begin, INPUT_ITERATOR end, int maxNumOut = -1); // Append to the buffer addressed by the specified 'out' all pending // output (if there is any) up to the optionally specified 'maxNumOut' // limit (default is negative, meaning no limit). When there is no // pending output and the 'maxNumOut' is still not reached, begin to // consume and decode a sequence of input characters starting at the // specified 'begin' position, up to but not including the specified // 'end' position. Any resulting output is written to the 'out' buffer // up to the (cumulative) 'maxNumOut' limit. If 'maxNumOut' limit is // reached, no further input will be consumed. Load into the // (optionally) specified 'numOut' and 'numIn' the number of output // bytes produced and input bytes consumed, respectively. Return 0 on // success and a negative value otherwise. Note that calling this // method after 'endConvert' has been invoked without an intervening // 'reset' call will place this instance in an error state, and return // an error status. int endConvert(); // Terminate encoding for this decoder. Return 0 on success, and a // negative value otherwise. void reset(); // Reset this decoder to its initial state (i.e., as if no input had // been consumed). // ACCESSORS bool isAcceptable() const; // Return 'true' if the input read so far by this decoder is considered // syntactically complete and all resulting output has been emitted; // return 'false' otherwise. Note that there must not be any // unprocessed characters accumulated in the input buffer of this // decoder. bool isDone() const; // Return 'true' if this decoder is in the done state (i.e., // 'endConvert' has been called and any additional input will result in // an error), and if there is no pending output; return 'false' // otherwise. bool isError() const; // Return 'true' if this decoder has encountered an irrecoverable error // and 'false' otherwise. An irrecoverable error is one for which // there is no subsequent possibility of achieving an "acceptable" // result (as defined by the 'isAcceptable' method). bool isInitialState() const; // Return 'true' if this decoder is in the initial state (i.e., as if // no input had been consumed) and 'false' otherwise. bool isMaximal() const; // Return 'true' if the input to this decoder is maximal (i.e., the // input contains an end-of-input sentinel, signaling that no further // input should be expected). *Always* returns 'false' for Hex // decoders since the encoding scheme does not specify an end-of-input // sentinel. int outputLength() const; // Return the total length of the output emitted by this decoder // (possibly after several calls to the 'convert' or the 'input' // methods) since its initial construction or the latest 'reset'. }; // ============================================================================ // INLINE DEFINITIONS // ============================================================================ // PRIVATE CLASS METHODS inline bool HexDecoder::isSpace(char character) { static const bool k_SPACE_TABLE[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, // 00 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70 }; return k_SPACE_TABLE[static_cast<unsigned char>(character)]; } inline bool HexDecoder::isXdigit(char character) { static const bool k_XDIGIT_TABLE[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 30 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70 }; return k_XDIGIT_TABLE[static_cast<unsigned char>(character)]; } // MANIPULATORS template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int HexDecoder::convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end) { int dummyNumOut; int dummyNumIn; return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1); } template <class OUTPUT_ITERATOR, class INPUT_ITERATOR> int HexDecoder::convert(OUTPUT_ITERATOR out, int *numOut, int *numIn, INPUT_ITERATOR begin, INPUT_ITERATOR end, int maxNumOut) { BSLS_ASSERT(numOut); BSLS_ASSERT(numIn); if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) { int rv = e_DONE_STATE == d_state ? -2 : -1; d_state = e_ERROR_STATE; *numOut = 0; *numIn = 0; return rv; // RETURN } if (0 == maxNumOut) { *numOut = 0; *numIn = 0; return 0; // RETURN } INPUT_ITERATOR originalBegin = begin; int numEmitted = 0; while (begin != end && numEmitted != maxNumOut) { const char digit = static_cast<char>(*begin); ++begin; if (!isSpace(digit)) { if (!isXdigit(digit)) { *numOut = numEmitted; d_outputLength += numEmitted; *numIn = static_cast<int>(bsl::distance(originalBegin, begin)); d_state = e_ERROR_STATE; return -1; // RETURN } if (0 == d_firstDigit) { d_firstDigit = digit; } else { char value = static_cast<char>( (d_decodeTable_p[static_cast<int>(d_firstDigit)] << 4) | (d_decodeTable_p[static_cast<int>(digit )])); *out = value; ++out; ++numEmitted; d_firstDigit = 0; } } } *numOut = numEmitted; d_outputLength += numEmitted; *numIn = static_cast<int>(bsl::distance(originalBegin, begin)); return 0; } inline void HexDecoder::reset() { d_state = e_INPUT_STATE; d_firstDigit = 0; d_outputLength = 0; } // ACCESSORS inline bool HexDecoder::isAcceptable() const { return e_INPUT_STATE == d_state && !d_firstDigit; } inline bool HexDecoder::isDone() const { return e_DONE_STATE == d_state; } inline bool HexDecoder::isError() const { return e_ERROR_STATE == d_state; } inline bool HexDecoder::isInitialState() const { return e_INPUT_STATE == d_state && 0 == d_outputLength && !d_firstDigit; } inline bool HexDecoder::isMaximal() const { return false; } inline int HexDecoder::outputLength() const { return d_outputLength; } } // close package namespace } // close enterprise namespace #endif // ---------------------------------------------------------------------------- // Copyright 2022 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ----------------------------- END-OF-FILE ----------------------------------