BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlde_hexdecoder.h
Go to the documentation of this file.
1/// @file bdlde_hexdecoder.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlde_hexdecoder.h -*-C++-*-
8#ifndef INCLUDED_BDLDE_HEXDECODER
9#define INCLUDED_BDLDE_HEXDECODER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlde_hexdecoder bdlde_hexdecoder
15/// @brief Provide mechanism for decoding text from hexadecimal.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlde
19/// @{
20/// @addtogroup bdlde_hexdecoder
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlde_hexdecoder-purpose"> Purpose</a>
25/// * <a href="#bdlde_hexdecoder-classes"> Classes </a>
26/// * <a href="#bdlde_hexdecoder-description"> Description </a>
27/// * <a href="#bdlde_hexdecoder-hex-encoding"> Hex Encoding </a>
28/// * <a href="#bdlde_hexdecoder-hex-decoding"> Hex Decoding </a>
29/// * <a href="#bdlde_hexdecoder-usage"> Usage </a>
30/// * <a href="#bdlde_hexdecoder-example-1-basic-usage-of-bdlde-hexdecoder"> Example 1: Basic Usage of bdlde::HexDecoder </a>
31///
32/// # Purpose {#bdlde_hexdecoder-purpose}
33/// Provide mechanism for decoding text from hexadecimal.
34///
35/// # Classes {#bdlde_hexdecoder-classes}
36///
37/// - bdlde::HexDecoder: mechanism for decoding text from hexadecimal
38///
39/// @see bdlde_hexencoder
40///
41/// # Description {#bdlde_hexdecoder-description}
42/// This component provides a class, `bdlde::HexDecoder`, for
43/// decoding hexadecimal representation into plain text.
44///
45/// `bdlde::HexEncoder` and `bdlde::HexDecoder` provide a pair of template
46/// functions (each parameterized separately on both input and output iterators)
47/// that can be used respectively to encode and to decode byte sequences of
48/// arbitrary length into and from the printable Hex representation.
49///
50/// Each instance of either the encoder or decoder retains the state of the
51/// conversion from one supplied input to the next, enabling the processing of
52/// segmented input -- i.e., processing resumes where it left off with the next
53/// invocation on new input. Instance methods are provided for both the
54/// encoder and decoder to (1) assert the end of input, (2) determine whether
55/// the input so far is currently acceptable, and (3) indicate whether a
56/// non-recoverable error has occurred.
57///
58/// ## Hex Encoding {#bdlde_hexdecoder-hex-encoding}
59///
60///
61/// The data stream is processed one byte at a time from left to right. Each
62/// byte
63/// @code
64/// 7 6 5 4 3 2 1 0
65/// +-+-+-+-+-+-+-+-+
66/// | |
67/// +-+-+-+-+-+-+-+-+
68/// `------v------'
69/// Byte
70/// @endcode
71/// is segmented into two intermediate 4-bit quantities.
72/// @code
73/// 3 2 1 0 3 2 1 0
74/// +-+-+-+-+-+-+-+-+
75/// | | |
76/// +-+-+-+-+-+-+-+-+
77/// `--v--' `--v--'
78/// char0 char1
79/// @endcode
80/// Each 4-bit quantity is in turn used as an index into the following character
81/// table to generate an 8-bit character.
82/// @code
83/// =================
84/// * Hex Alphabet *
85/// -----------------
86/// Val Enc Val Enc
87/// --- --- --- ---
88/// 0 '0' 8 '8'
89/// 1 '1' 9 '9'
90/// 2 '2' 10 'A'
91/// 3 '3' 11 'B'
92/// 4 '4' 12 'C'
93/// 5 '5' 13 'D'
94/// 6 '6' 14 'E'
95/// 7 '7' 15 'F'
96/// =================
97/// @endcode
98/// Depending on the settings encoder represents values from 10 to 15 as
99/// uppercase (`A`-`F`) or lowercase letters(`a`-`f`).
100///
101/// Input values of increasing length along with their corresponding Hex
102/// encodings are illustrated below:
103/// @code
104/// Data: /* nothing */
105/// Encoding: /* nothing */
106///
107/// Data: "0" (0011 0000)
108/// Encoding: 30
109///
110/// Data: "01" (0011 0000 0011 0001)
111/// Encoding: 3031
112///
113/// Data: "01A" (0011 0000 0011 0001 1000 0001)
114/// Encoding: 303141
115///
116/// Data: "01A?" (0011 0000 0011 0001 1000 0001 0011 1111)
117/// Encoding: 3031413F
118/// @endcode
119///
120/// ## Hex Decoding {#bdlde_hexdecoder-hex-decoding}
121///
122///
123/// The data stream is processed two bytes at a time from left to right. Each
124/// sequence of two 8-bit quantities
125/// @code
126/// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
127/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128/// | | |
129/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
130/// `------v------' `------v------'
131/// Byte0 Byte1
132/// @endcode
133/// is segmented into four intermediate 4-bit quantities.
134/// @code
135/// 3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0
136/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
137/// | | | | |
138/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
139/// `--v--' `--v--' `--v--' `--v--'
140/// chunk0 chunk1 chunk2 chunk3
141/// @endcode
142/// The second and forth chunks are combined to get the resulting 8-bit
143/// character.
144///
145/// Whitespace characters are ignored. On any non-alphabet character the
146/// decoder reports an error. In order for a Hex encoding to be valid the
147/// length of the input data (excluding any whitespace characters) must be a
148/// multiple of two.
149///
150/// Input values of increasing length along with their corresponding Hex
151/// encodings are illustrated below (note that the encoded whitespace character
152/// is skipped and the resulting string does not contain it):
153/// @code
154/// Data: /* nothing */
155/// Encoding: /* nothing */
156///
157/// Data: "4" (0000 0100)
158/// Encoding: /* nothing */
159///
160/// Data: "41" (0000 0100 0000 0001)
161/// Encoding: A
162///
163/// Data: "412" (0000 0100 0000 0001 0000 0010)
164/// Encoding: A
165///
166/// Data: "4120" (0000 0100 0000 0001 0000 0010 0000 0000)
167/// Encoding: A
168///
169/// Data: "41203" (0000 0100 0000 0001 0000 0010 0000 0000
170/// 0000 0011)
171/// Encoding: A
172///
173/// Data: "41203F" (0011 0000 0011 0001 1000 0001 0010 0011
174/// 0000 0011 0000 1111)
175/// Encoding: A?
176/// @endcode
177///
178/// ## Usage {#bdlde_hexdecoder-usage}
179///
180///
181/// This section illustrates intended use of this component.
182///
183/// ### Example 1: Basic Usage of bdlde::HexDecoder {#bdlde_hexdecoder-example-1-basic-usage-of-bdlde-hexdecoder}
184///
185///
186/// The following example shows how to use a `bdlde::HexDecoder` object to
187/// implement a function, `streamDecoder`, that reads hex representation from
188/// `bsl::istream`, decodes that text, and writes the decoded text to a
189/// `bsl::ostream`. `streamDecoder` returns 0 on success and a negative value
190/// if the input data could not be successfully decoded or if there is an I/O
191/// error.
192/// @code
193/// /// Read the entire contents of the specified input stream `is`, convert
194/// /// the input hex encoding into plain text, and write the decoded text
195/// /// to the specified output stream `os`. Return 0 on success, and a
196/// /// negative value otherwise.
197/// int streamDecoder(bsl::ostream& os, bsl::istream& is)
198/// {
199/// enum {
200/// SUCCESS = 0,
201/// DECODE_ERROR = -1,
202/// IO_ERROR = -2
203/// };
204/// @endcode
205/// First we create an object, create buffers for storing data, and start loop
206/// that runs while the input stream contains some data:
207/// @code
208/// bdlde::HexDecoder converter;
209///
210/// const int INBUFFER_SIZE = 1 << 10;
211/// const int OUTBUFFER_SIZE = 1 << 10;
212///
213/// char inputBuffer[INBUFFER_SIZE];
214/// char outputBuffer[OUTBUFFER_SIZE];
215///
216/// char *output = outputBuffer;
217/// char *outputEnd = outputBuffer + sizeof outputBuffer;
218///
219/// while (is.good()) { // input stream not exhausted
220/// @endcode
221/// On each iteration we read some data from the input stream:
222/// @code
223/// is.read(inputBuffer, sizeof inputBuffer);
224///
225/// const char *input = inputBuffer;
226/// const char *inputEnd = input + is.gcount();
227///
228/// while (input < inputEnd) { // input encoding not complete
229///
230/// int numOut = 0;
231/// int numIn = 0;
232/// @endcode
233/// Convert obtained text using `bdlde::HexDecoder`:
234/// @code
235/// int status = converter.convert(
236/// output,
237/// &numOut,
238/// &numIn,
239/// input,
240/// inputEnd,
241/// static_cast<int>(outputEnd - output));
242/// if (status < 0) {
243/// return DECODE_ERROR; // RETURN
244/// }
245///
246/// output += numOut;
247/// input += numIn;
248/// @endcode
249/// And write decoded text to the output stream:
250/// @code
251/// if (output == outputEnd) { // output buffer full; write data
252/// os.write(outputBuffer, sizeof outputBuffer);
253/// if (os.fail()) {
254/// return IO_ERROR; // RETURN
255/// }
256/// output = outputBuffer;
257/// }
258/// }
259/// }
260///
261/// if (output > outputBuffer) {
262/// os.write (outputBuffer, output - outputBuffer);
263/// }
264/// @endcode
265/// Then we need to complete the work of our decoder:
266/// @code
267/// int more = converter.endConvert();
268/// if (more < 0) {
269/// return DECODE_ERROR; // RETURN
270/// }
271///
272/// return is.eof() && os.good() ? SUCCESS : IO_ERROR;
273/// }
274/// @endcode
275/// Next, to demonstrate how our function works we need to create a stream with
276/// encoded data. Assume that we have some character string, `BLOOMBERG_NEWS`,
277/// and a function, `streamEncoder` mirroring the work of the `streamDecoder`:
278/// @code
279/// bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS,
280/// strlen(BLOOMBERG_NEWS)));
281/// bsl::stringstream outStream;
282/// bsl::stringstream backInStream;
283///
284/// assert(0 == streamEncoder(outStream, inStream));
285/// @endcode
286/// Now, we use our function to decode text:
287/// @code
288/// assert(0 == streamDecoder(backInStream, outStream));
289/// @endcode
290/// Finally, we observe that the output fully matches the original text:
291/// @code
292/// assert(0 == strcmp(BLOOMBERG_NEWS, backInStream.str().c_str()));
293/// @endcode
294/// @}
295/** @} */
296/** @} */
297
298/** @addtogroup bdl
299 * @{
300 */
301/** @addtogroup bdlde
302 * @{
303 */
304/** @addtogroup bdlde_hexdecoder
305 * @{
306 */
307
308#include <bdlscm_version.h>
309
310#include <bsls_assert.h>
311
312#include <bsl_iterator.h>
313
314
315namespace bdlde {
316
317/// This class implements a mechanism capable of converting data of
318/// arbitrary length from its corresponding Hex representation.
319///
320/// See @ref bdlde_hexdecoder
322
323 // PRIVATE TYPES
324
325 /// Symbolic state values for the decoder.
326 enum States {
327 e_ERROR_STATE = -1, // input is irreparably invalid
328 e_INPUT_STATE = 0, // general input state
329 e_DONE_STATE = 1 // any additional input is an error
330 };
331
332 // DATA
333 int d_state; // current state of this object
334 char d_firstDigit; // first (left) hex digit to decode
335 int d_outputLength; // total number of output characters
336 const char *d_decodeTable_p; // character code table
337
338 // PRIVATE CLASS METHODS
339
340 /// Return `true` if the specified `character` is whitespace (i.e.,
341 /// space, tab, CR, NL, VT, or FF), and `false` otherwise.
342 static bool isSpace(char character);
343
344 /// Return `true` if the specified `character` is a hex digit, and
345 /// `false` otherwise.
346 static bool isXdigit(char character);
347
348 // NOT IMPLEMENTED
349 HexDecoder(const HexDecoder&);
350 HexDecoder& operator=(const HexDecoder&);
351
352 public:
353 // CREATORS
354
355 /// Create a Hex decoder in the initial state.
357
358 /// Destroy this object.
359 ~HexDecoder() = default;
360
361 // MANIPULATORS
362
363 /// Append to the buffer addressed by the specified `out` all pending
364 /// output (if there is any) up to the optionally specified `maxNumOut`
365 /// limit (default is negative, meaning no limit). When there is no
366 /// pending output and the `maxNumOut` is still not reached, begin to
367 /// consume and decode a sequence of input characters starting at the
368 /// specified `begin` position, up to but not including the specified
369 /// `end` position. Any resulting output is written to the `out` buffer
370 /// up to the (cumulative) `maxNumOut` limit. If `maxNumOut` limit is
371 /// reached, no further input will be consumed. Load into the
372 /// (optionally) specified `numOut` and `numIn` the number of output
373 /// bytes produced and input bytes consumed, respectively. Return 0 on
374 /// success and a negative value otherwise. Note that calling this
375 /// method after `endConvert` has been invoked without an intervening
376 /// `reset` call will place this instance in an error state, and return
377 /// an error status.
378 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
379 int convert(OUTPUT_ITERATOR out,
380 INPUT_ITERATOR begin,
381 INPUT_ITERATOR end);
382 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
383 int convert(OUTPUT_ITERATOR out,
384 int *numOut,
385 int *numIn,
386 INPUT_ITERATOR begin,
387 INPUT_ITERATOR end,
388 int maxNumOut = -1);
389
390 /// Terminate encoding for this decoder. Return 0 on success, and a
391 /// negative value otherwise.
393
394 /// Reset this decoder to its initial state (i.e., as if no input had
395 /// been consumed).
396 void reset();
397
398 // ACCESSORS
399
400 /// Return `true` if the input read so far by this decoder is considered
401 /// syntactically complete and all resulting output has been emitted;
402 /// return `false` otherwise. Note that there must not be any
403 /// unprocessed characters accumulated in the input buffer of this
404 /// decoder.
405 bool isAcceptable() const;
406
407 /// Return `true` if this decoder is in the done state (i.e.,
408 /// `endConvert` has been called and any additional input will result in
409 /// an error), and if there is no pending output; return `false`
410 /// otherwise.
411 bool isDone() const;
412
413 /// Return `true` if this decoder has encountered an irrecoverable error
414 /// and `false` otherwise. An irrecoverable error is one for which
415 /// there is no subsequent possibility of achieving an "acceptable"
416 /// result (as defined by the `isAcceptable` method).
417 bool isError() const;
418
419 /// Return `true` if this decoder is in the initial state (i.e., as if
420 /// no input had been consumed) and `false` otherwise.
421 bool isInitialState() const;
422
423 /// Return `true` if the input to this decoder is maximal (i.e., the
424 /// input contains an end-of-input sentinel, signaling that no further
425 /// input should be expected). *Always* returns `false` for Hex
426 /// decoders since the encoding scheme does not specify an end-of-input
427 /// sentinel.
428 bool isMaximal() const;
429
430 /// Return the total length of the output emitted by this decoder
431 /// (possibly after several calls to the `convert` or the `input`
432 /// methods) since its initial construction or the latest `reset`.
433 int outputLength() const;
434};
435
436// ============================================================================
437// INLINE DEFINITIONS
438// ============================================================================
439
440// PRIVATE CLASS METHODS
441inline
442bool HexDecoder::isSpace(char character)
443{
444 static const bool k_SPACE_TABLE[256] = {
445 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
446 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, // 00
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
448 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20
449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30
450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40
451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50
452 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70
454 };
455
456 return k_SPACE_TABLE[static_cast<unsigned char>(character)];
457}
458
459inline
460bool HexDecoder::isXdigit(char character)
461{
462 static const bool k_XDIGIT_TABLE[256] = {
463 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20
467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 30
468 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40
469 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50
470 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70
472 };
473
474 return k_XDIGIT_TABLE[static_cast<unsigned char>(character)];
475}
476
477// MANIPULATORS
478template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
479int HexDecoder::convert(OUTPUT_ITERATOR out,
480 INPUT_ITERATOR begin,
481 INPUT_ITERATOR end)
482{
483 int dummyNumOut;
484 int dummyNumIn;
485
486 return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
487}
488
489template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
490int HexDecoder::convert(OUTPUT_ITERATOR out,
491 int *numOut,
492 int *numIn,
493 INPUT_ITERATOR begin,
494 INPUT_ITERATOR end,
495 int maxNumOut)
496{
497 BSLS_ASSERT(numOut);
498 BSLS_ASSERT(numIn);
499
500 if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {
501 int rv = e_DONE_STATE == d_state ? -2 : -1;
502 d_state = e_ERROR_STATE;
503 *numOut = 0;
504 *numIn = 0;
505 return rv; // RETURN
506 }
507
508 if (0 == maxNumOut) {
509 *numOut = 0;
510 *numIn = 0;
511 return 0; // RETURN
512 }
513
514 INPUT_ITERATOR originalBegin = begin;
515 int numEmitted = 0;
516
517 while (begin != end && numEmitted != maxNumOut) {
518 const char digit = static_cast<char>(*begin);
519 ++begin;
520
521 if (!isSpace(digit)) {
522 if (!isXdigit(digit)) {
523 *numOut = numEmitted;
524 d_outputLength += numEmitted;
525 *numIn = static_cast<int>(bsl::distance(originalBegin, begin));
526 d_state = e_ERROR_STATE;
527 return -1; // RETURN
528 }
529
530 if (0 == d_firstDigit) {
531 d_firstDigit = digit;
532 }
533 else {
534 char value = static_cast<char>(
535 (d_decodeTable_p[static_cast<int>(d_firstDigit)] << 4) |
536 (d_decodeTable_p[static_cast<int>(digit )]));
537 *out = value;
538
539 ++out;
540 ++numEmitted;
541 d_firstDigit = 0;
542 }
543 }
544 }
545
546 *numOut = numEmitted;
547 d_outputLength += numEmitted;
548 *numIn = static_cast<int>(bsl::distance(originalBegin, begin));
549 return 0;
550}
551
552inline
554{
555 d_state = e_INPUT_STATE;
556 d_firstDigit = 0;
557 d_outputLength = 0;
558}
559
560// ACCESSORS
561inline
563{
564 return e_INPUT_STATE == d_state && !d_firstDigit;
565}
566
567inline
569{
570 return e_DONE_STATE == d_state;
571}
572
573inline
575{
576 return e_ERROR_STATE == d_state;
577}
578
579inline
581{
582 return e_INPUT_STATE == d_state && 0 == d_outputLength && !d_firstDigit;
583}
584
585inline
587{
588 return false;
589}
590
591inline
593{
594 return d_outputLength;
595}
596
597} // close package namespace
598
599
600#endif
601
602// ----------------------------------------------------------------------------
603// Copyright 2022 Bloomberg Finance L.P.
604//
605// Licensed under the Apache License, Version 2.0 (the "License");
606// you may not use this file except in compliance with the License.
607// You may obtain a copy of the License at
608//
609// http://www.apache.org/licenses/LICENSE-2.0
610//
611// Unless required by applicable law or agreed to in writing, software
612// distributed under the License is distributed on an "AS IS" BASIS,
613// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
614// See the License for the specific language governing permissions and
615// limitations under the License.
616// ----------------------------- END-OF-FILE ----------------------------------
617
618/** @} */
619/** @} */
620/** @} */
Definition bdlde_hexdecoder.h:321
void reset()
Definition bdlde_hexdecoder.h:553
bool isInitialState() const
Definition bdlde_hexdecoder.h:580
bool isAcceptable() const
Definition bdlde_hexdecoder.h:562
int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end)
Definition bdlde_hexdecoder.h:479
HexDecoder()
Create a Hex decoder in the initial state.
int outputLength() const
Definition bdlde_hexdecoder.h:592
bool isDone() const
Definition bdlde_hexdecoder.h:568
bool isMaximal() const
Definition bdlde_hexdecoder.h:586
bool isError() const
Definition bdlde_hexdecoder.h:574
~HexDecoder()=default
Destroy this object.
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlde_base64alphabet.h:118