BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlde_hexencoder.h
Go to the documentation of this file.
1/// @file bdlde_hexencoder.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlde_hexencoder.h -*-C++-*-
8#ifndef INCLUDED_BDLDE_HEXENCODER
9#define INCLUDED_BDLDE_HEXENCODER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlde_hexencoder bdlde_hexencoder
15/// @brief Provide mechanism for encoding text into hexadecimal.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlde
19/// @{
20/// @addtogroup bdlde_hexencoder
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlde_hexencoder-purpose"> Purpose</a>
25/// * <a href="#bdlde_hexencoder-classes"> Classes </a>
26/// * <a href="#bdlde_hexencoder-description"> Description </a>
27/// * <a href="#bdlde_hexencoder-hex-encoding"> Hex Encoding </a>
28/// * <a href="#bdlde_hexencoder-hex-decoding"> Hex Decoding </a>
29/// * <a href="#bdlde_hexencoder-usage"> Usage </a>
30/// * <a href="#bdlde_hexencoder-example-1-basic-usage-of-bdlde-hexencoder"> Example 1: Basic Usage of bdlde::HexEncoder </a>
31///
32/// # Purpose {#bdlde_hexencoder-purpose}
33/// Provide mechanism for encoding text into hexadecimal.
34///
35/// # Classes {#bdlde_hexencoder-classes}
36///
37/// - bdlde::HexEncoder: mechanism for encoding text into hexadecimal
38///
39/// @see bdlde_hexdecoder
40///
41/// # Description {#bdlde_hexencoder-description}
42/// This component provides a class, `bdlde::HexEncoder`, for
43/// encoding plain text into its hexadecimal representation.
44///
45/// `bdlde::HexEncoder` and `bdlde::HexDecoder` provide a pair of template
46/// functions (each parameterized separately on both input and output iterators)
47/// that can be used respectively to encode and to decode byte sequences of
48/// arbitrary length into and from the printable Hex representation.
49///
50/// Each instance of either the encoder or decoder retains the state of the
51/// conversion from one supplied input to the next, enabling the processing of
52/// segmented input -- i.e., processing resumes where it left off with the next
53/// invocation on new input. Instance methods are provided for both the
54/// encoder and decoder to (1) assert the end of input, (2) determine whether
55/// the input so far is currently acceptable, and (3) indicate whether a
56/// non-recoverable error has occurred.
57///
58/// ## Hex Encoding {#bdlde_hexencoder-hex-encoding}
59///
60///
61/// The data stream is processed one byte at a time from left to right. Each
62/// byte
63/// @code
64/// 7 6 5 4 3 2 1 0
65/// +-+-+-+-+-+-+-+-+
66/// | |
67/// +-+-+-+-+-+-+-+-+
68/// `------v------'
69/// Byte
70/// @endcode
71/// is segmented into two intermediate 4-bit quantities.
72/// @code
73/// 3 2 1 0 3 2 1 0
74/// +-+-+-+-+-+-+-+-+
75/// | | |
76/// +-+-+-+-+-+-+-+-+
77/// `--v--' `--v--'
78/// char0 char1
79/// @endcode
80/// Each 4-bit quantity is in turn used as an index into the following character
81/// table to generate an 8-bit character.
82/// @code
83/// =================
84/// * Hex Alphabet *
85/// -----------------
86/// Val Enc Val Enc
87/// --- --- --- ---
88/// 0 '0' 8 '8'
89/// 1 '1' 9 '9'
90/// 2 '2' 10 'A'
91/// 3 '3' 11 'B'
92/// 4 '4' 12 'C'
93/// 5 '5' 13 'D'
94/// 6 '6' 14 'E'
95/// 7 '7' 15 'F'
96/// =================
97/// @endcode
98/// Depending on the settings encoder represents values from 10 to 15 as
99/// uppercase (`A`-`F`) or lowercase letters(`a`-`f`).
100///
101/// Input values of increasing length along with their corresponding Hex
102/// encodings are illustrated below:
103/// @code
104/// Data: /* nothing */
105/// Encoding: /* nothing */
106///
107/// Data: "0" (0011 0000)
108/// Encoding: 30
109///
110/// Data: "01" (0011 0000 0011 0001)
111/// Encoding: 3031
112///
113/// Data: "01A" (0011 0000 0011 0001 1000 0001)
114/// Encoding: 303141
115///
116/// Data: "01A?" (0011 0000 0011 0001 1000 0001 0011 1111)
117/// Encoding: 3031413F
118/// @endcode
119///
120/// ## Hex Decoding {#bdlde_hexencoder-hex-decoding}
121///
122///
123/// The data stream is processed two bytes at a time from left to right. Each
124/// sequence of two 8-bit quantities
125/// @code
126/// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
127/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128/// | | |
129/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
130/// `------v------' `------v------'
131/// Byte0 Byte1
132/// @endcode
133/// is segmented into four intermediate 4-bit quantities.
134/// @code
135/// 3 2 1 0 3 2 1 0 3 2 1 0 3 2 1 0
136/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
137/// | | | | |
138/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
139/// `--v--' `--v--' `--v--' `--v--'
140/// chunk0 chunk1 chunk2 chunk3
141/// @endcode
142/// The second and forth chunks are combined to get the resulting 8-bit
143/// character.
144///
145/// Whitespace characters are ignored. On any non-alphabet character the
146/// decoder reports an error. In order for a Hex encoding to be valid the
147/// length of the input data (excluding any whitespace characters) must be a
148/// multiple of two.
149///
150/// Input values of increasing length along with their corresponding Hex
151/// encodings are illustrated below (note that the encoded whitespace character
152/// is skipped and the resulting string does not contain it):
153/// @code
154/// Data: /* nothing */
155/// Encoding: /* nothing */
156///
157/// Data: "4" (0000 0100)
158/// Encoding: /* nothing */
159///
160/// Data: "41" (0000 0100 0000 0001)
161/// Encoding: A
162///
163/// Data: "412" (0000 0100 0000 0001 0000 0010)
164/// Encoding: A
165///
166/// Data: "4120" (0000 0100 0000 0001 0000 0010 0000 0000)
167/// Encoding: A
168///
169/// Data: "41203" (0000 0100 0000 0001 0000 0010 0000 0000
170/// 0000 0011)
171/// Encoding: A
172///
173/// Data: "41203F" (0011 0000 0011 0001 1000 0001 0010 0011
174/// 0000 0011 0000 1111)
175/// Encoding: A?
176/// @endcode
177///
178/// ## Usage {#bdlde_hexencoder-usage}
179///
180///
181/// This section illustrates intended use of this component.
182///
183/// ### Example 1: Basic Usage of bdlde::HexEncoder {#bdlde_hexencoder-example-1-basic-usage-of-bdlde-hexencoder}
184///
185///
186/// The following example shows using a `bdlde::HexEncoder` object to encode
187/// bytes into a hexidecimal format. For dependency reasons, a more complete
188/// example, showing both encoding and decoding can be found in
189/// @ref bdlde_hexdecoder .
190///
191/// In the example below, we implement a function `streamEncoder`, that reads
192/// text from `bsl::istream`, encodes that text into hex representation, and
193/// writes the encoded text to a `bsl::ostream`. `streamEncoder` returns 0 on
194/// success and a negative value if the input data could not be successfully
195/// encoded or if there is an I/O error.
196/// @code
197/// /// Read the entire contents of the specified input stream `is`, convert
198/// /// the input plain text to hex representation, and write the encoded
199/// /// text to the specified output stream `os`. Return 0 on success, and
200/// /// a negative value otherwise.
201/// int streamEncoder(bsl::ostream& os, bsl::istream& is)
202/// {
203/// enum {
204/// SUCCESS = 0,
205/// ENCODE_ERROR = -1,
206/// IO_ERROR = -2
207/// };
208/// @endcode
209/// First we create an object, create buffers for storing data, and start loop
210/// that runs while the input stream contains some data:
211/// @code
212/// bdlde::HexEncoder converter;
213///
214/// const int INBUFFER_SIZE = 1 << 10;
215/// const int OUTBUFFER_SIZE = 1 << 10;
216///
217/// char inputBuffer[INBUFFER_SIZE];
218/// char outputBuffer[OUTBUFFER_SIZE];
219///
220/// char *output = outputBuffer;
221/// char *outputEnd = outputBuffer + sizeof outputBuffer;
222///
223/// while (is.good()) { // input stream not exhausted
224/// @endcode
225/// On each iteration we read some data from the input stream:
226/// @code
227/// is.read(inputBuffer, sizeof inputBuffer);
228///
229/// const char *input = inputBuffer;
230/// const char *inputEnd = input + is.gcount();
231///
232/// while (input < inputEnd) { // input encoding not complete
233///
234/// int numOut;
235/// int numIn;
236/// @endcode
237/// Convert obtained text using `bdlde::HexEncoder`:
238/// @code
239/// int status = converter.convert(
240/// output,
241/// &numOut,
242/// &numIn,
243/// input,
244/// inputEnd,
245/// static_cast<int>(outputEnd - output));
246/// if (status < 0) {
247/// return ENCODE_ERROR; // RETURN
248/// }
249///
250/// output += numOut;
251/// input += numIn;
252/// @endcode
253/// And write encoded text to the output stream:
254/// @code
255/// if (output == outputEnd) { // output buffer full; write data
256/// os.write(outputBuffer, sizeof outputBuffer);
257/// if (os.fail()) {
258/// return IO_ERROR; // RETURN
259/// }
260/// output = outputBuffer;
261/// }
262/// }
263/// }
264///
265/// while (1) {
266/// int numOut = 0;
267/// @endcode
268/// Then, we need to store the unhandled symbol (if there is one) to the output
269/// buffer and complete the work of our encoder:
270/// @code
271/// int more = converter.endConvert(
272/// output,
273/// &numOut,
274/// static_cast<int>(outputEnd - output));
275/// if (more < 0) {
276/// return ENCODE_ERROR; // RETURN
277/// }
278///
279/// output += numOut;
280///
281/// if (!more) { // no more output
282/// break;
283/// }
284///
285/// assert(output == outputEnd); // output buffer is full
286///
287/// os.write(outputBuffer, sizeof outputBuffer); // write buffer
288/// if (os.fail()) {
289/// return IO_ERROR; // RETURN
290/// }
291/// output = outputBuffer;
292/// }
293///
294/// if (output > outputBuffer) {
295/// os.write(outputBuffer, output - outputBuffer);
296/// }
297///
298/// return is.eof() && os.good() ? SUCCESS : IO_ERROR;
299/// }
300/// @endcode
301/// Next, to demonstrate how our function works we need to create a stream with
302/// data to encode. Assume that we have some character buffer,
303/// `BLOOMBERG_NEWS`, and a function, `streamDecoder` mirroring the work of the
304/// `streamEncoder`. Below we should encode this string into a hexidecimal
305/// format:
306/// @code
307/// bsl::istringstream inStream(bsl::string(BLOOMBERG_NEWS,
308/// strlen(BLOOMBERG_NEWS)));
309/// bsl::stringstream outStream;
310/// bsl::stringstream backInStream;
311/// @endcode
312/// Then, we use our function to encode text:
313/// @code
314/// assert(0 == streamEncoder(outStream, inStream));
315/// @endcode
316/// This example does *not* decode the resulting hexidecimal text, for a
317/// more complete example, see @ref bdlde_hexdecoder .
318/// @}
319/** @} */
320/** @} */
321
322/** @addtogroup bdl
323 * @{
324 */
325/** @addtogroup bdlde
326 * @{
327 */
328/** @addtogroup bdlde_hexencoder
329 * @{
330 */
331
332#include <bdlscm_version.h>
333
334#include <bsls_assert.h>
335
336
337namespace bdlde {
338
339 // ================
340 // class HexEncoder
341 // ================
342
343/// This class implements a mechanism capable of converting data of
344/// arbitrary length to its corresponding Hex representation.
345///
346/// See @ref bdlde_hexencoder
348
349 // PRIVATE TYPES
350
351 /// Symbolic state values for the encoder
352 enum States {
353 e_ERROR_STATE = -1, // input is irreparably invalid
354 e_INPUT_STATE = 0, // general input state
355 e_DONE_STATE = 1 // any additional input is error
356 };
357
358 // DATA
359 int d_state; // current state of this object
360
361 char d_deferred; // retained output character
362
363 int d_outputLength; // total number of output characters
364
365 bool d_upperCaseFlag; // flag to indicate if uppercase letters are
366 // used
367
368 const char *d_encodeTable_p; // hexadecimal alphabet
369
370
371 // NOT IMPLEMENTED
372 HexEncoder(const HexEncoder&);
373 HexEncoder& operator=(const HexEncoder&);
374
375 public:
376 // CREATORS
377
378 /// Create a Hex encoder in the initial state. Optionally specify the
379 /// `upperCaseLetters` to indicate if values from 10 to 15 are encoded
380 /// as uppercase letters(`A`-`F`) or as lowercase letters(`a`-`f`).
381 explicit HexEncoder(bool upperCaseLetters = true);
382
383 /// Destroy this object.
384 ~HexEncoder() = default;
385
386 // MANIPULATORS
387
388 /// Append to the buffer addressed by the specified `out` pending
389 /// character (if there is such) up to the optionally specified
390 /// `maxNumOut` limit (default is negative, meaning no limit). When
391 /// there is no pending output and `maxNumOut` is still not reached,
392 /// begin to consume and encode a sequence of input characters starting
393 /// at the specified `begin` position, up to but not including the
394 /// specified `end` position. Any resulting output is written to the
395 /// `out` buffer up to the (cumulative) `maxNumOut` limit. If
396 /// `maxNumOut` limit is reached, no further input will be consumed.
397 /// Load into the (optionally) specified `numOut` and `numIn` the number
398 /// of output bytes produced and input bytes consumed, respectively.
399 /// Return a non-negative value on success and a negative value
400 /// otherwise. A successful return status indicates the number of
401 /// characters that would be output if `endConvert` were called
402 /// subsequently with no output limit. These bytes *may* be available
403 /// for output if this method is called with a sufficiently large
404 /// `maxNumOut`. Note that calling this method after `endConvert` has
405 /// been invoked without an intervening `reset` call will place this
406 /// instance in an error state, and return an error status. Note also
407 /// that it is recommended that after all calls to `convert` are
408 /// finished, the `endConvert` method be called to complete the encoding
409 /// of any unprocessed input characters.
410 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
411 int convert(OUTPUT_ITERATOR out,
412 INPUT_ITERATOR begin,
413 INPUT_ITERATOR end);
414 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
415 int convert(OUTPUT_ITERATOR out,
416 int *numOut,
417 int *numIn,
418 INPUT_ITERATOR begin,
419 INPUT_ITERATOR end,
420 int maxNumOut = -1);
421
422 /// Terminate encoding for this encoder; write any retained output
423 /// (e.g., from a previous call to `convert` with a non-zero output
424 /// limit argument) to the specified `out` buffer. Optionally specify
425 /// the `maxNumOut` limit on the number of bytes to output; if
426 /// `maxNumOut` is negative, no limit is imposed. Load into the
427 /// (optionally) specified `numOut` the number of output bytes produced.
428 /// Return a non-negative value on success and a negative value
429 /// otherwise. A successful return status indicates the number of
430 /// characters that would be output if `endConvert` were called
431 /// subsequently with no output limit. Any retained bytes are available
432 /// on a subsequent call to `endConvert`. Once this method is called,
433 /// no additional input may be supplied without an intervening call to
434 /// `reset`; once this method returns a zero status, a subsequent call
435 /// will place this encoder in the error state, and return an error
436 /// status.
437 template <class OUTPUT_ITERATOR>
438 int endConvert(OUTPUT_ITERATOR out);
439 template <class OUTPUT_ITERATOR>
440 int endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut = -1);
441
442 /// Reset this encoder to its initial state (i.e., as if no input had
443 /// been consumed).
444 void reset();
445
446 // ACCESSORS
447
448 /// Return `true` if the input read so far by this encoder is considered
449 /// syntactically complete, and `false` otherwise.
450 bool isAcceptable() const;
451
452 /// Return `true` if this encoder is in the done state (i.e.,
453 /// `endConvert` has been called and any additional input will result in
454 /// an error), and if there is no pending output, and `false` otherwise.
455 bool isDone() const;
456
457 /// Return `true` if there is no possibility of achieving an
458 /// "acceptable" result, and `false` otherwise. Note that for an
459 /// encoder, no input can cause an error; the possible errors result
460 /// either from a call to the `convert` method after the `endConvert`
461 /// method is called the first time, or from a call to the `endConvert`
462 /// method after the `endConvert` method has returned successfully.
463 bool isError() const;
464
465 /// Return `true` if this encoder is in the initial state (i.e., as if
466 /// no input had been consumed), and `false` otherwise.
467 bool isInitialState() const;
468
469 /// Return `true` if this encoder represents values from 10 to 15 as
470 /// uppercase letters(`A`-`F`), and `false` if these values are
471 /// represented as lowercase letters(`a`-`f`).
472 bool isUpperCase() const;
473
474 /// Return the number of characters that would be output if `endConvert`
475 /// were called with no output limit.
476 int numOutputPending() const;
477
478 /// Return the total length of the output emitted by this encoder
479 /// (possibly after one or more calls to the `convert` or the `input`
480 /// methods) since its initial construction or the latest `reset`.
481 int outputLength() const;
482};
483
484// ============================================================================
485// INLINE DEFINITIONS
486// ============================================================================
487
488// MANIPULATORS
489template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
490int HexEncoder::convert(OUTPUT_ITERATOR out,
491 INPUT_ITERATOR begin,
492 INPUT_ITERATOR end)
493{
494 int dummyNumOut;
495 int dummyNumIn;
496
497 return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
498}
499
500template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
501int HexEncoder::convert(OUTPUT_ITERATOR out,
502 int *numOut,
503 int *numIn,
504 INPUT_ITERATOR begin,
505 INPUT_ITERATOR end,
506 int maxNumOut)
507{
508 BSLS_ASSERT(numOut);
509 BSLS_ASSERT(numIn);
510
511 if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {
512 int rv = e_DONE_STATE == d_state ? -2 : -1;
513 d_state = e_ERROR_STATE;
514 *numOut = 0;
515 *numIn = 0;
516 return rv; // RETURN
517 }
518
519 if (0 == maxNumOut) {
520 *numOut = 0;
521 *numIn = 0;
522 return 0; // RETURN
523 }
524
525 int numConsumed = 0;
526 int numEmitted = 0;
527
528 // First we need to output pending symbol left over from the previous call.
529
530 if (d_deferred) {
531 *out = d_deferred;
532 ++out;
533 ++numEmitted;
534 d_deferred = 0;
535 }
536
537 // Then we can handle new input.
538
539 while (begin != end && numEmitted != maxNumOut) {
540 if (d_deferred) {
541 *out = d_deferred;
542 ++out;
543 ++numEmitted;
544 d_deferred = 0;
545 ++begin;
546 }
547 else {
548 const char digit = static_cast<char>(*begin);
549 ++numConsumed;
550
551 *out = d_encodeTable_p[(digit >> 4) & 0x0f];
552 ++out;
553 ++numEmitted;
554 d_deferred = d_encodeTable_p[digit & 0x0f];
555 }
556 }
557
558 *numOut = numEmitted;
559 d_outputLength += numEmitted;
560 *numIn = numConsumed;
561 return d_deferred ? 1 : 0;
562}
563
564template <class OUTPUT_ITERATOR>
565int HexEncoder::endConvert(OUTPUT_ITERATOR out)
566{
567 int dummyNumOut;
568
569 return endConvert(out, &dummyNumOut, -1);
570}
571
572template <class OUTPUT_ITERATOR>
573int HexEncoder::endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut)
574{
575 BSLS_ASSERT(numOut);
576
577 if (e_ERROR_STATE == d_state) {
578 return -1; // RETURN
579 }
580
581 if (e_DONE_STATE == d_state && !d_deferred) {
582 d_state = e_ERROR_STATE;
583 return -1; // RETURN
584 }
585
586 d_state = e_DONE_STATE;
587
588 if (d_deferred) {
589 if (0 == maxNumOut) {
590 return 1; // RETURN
591 }
592 else {
593 *out = d_deferred;
594 *numOut = 1;
595 d_deferred = 0;
596 d_outputLength++;
597 }
598 }
599
600 return 0;
601}
602
603inline
605{
606 d_state = e_INPUT_STATE;
607 d_deferred = 0;
608 d_outputLength = 0;
609}
610
611// ACCESSORS
612inline
614{
615 return e_INPUT_STATE == d_state && 0 == d_deferred;
616}
617
618inline
620{
621 return e_DONE_STATE == d_state && 0 == d_deferred;
622}
623
624inline
626{
627 return e_ERROR_STATE == d_state;
628}
629
630inline
632{
633 return e_INPUT_STATE == d_state && 0 == d_outputLength;
634}
635
636inline
638{
639 return d_upperCaseFlag;
640}
641
642inline
644{
645 return d_deferred ? 1 : 0;
646}
647
648inline
650{
651 return d_outputLength;
652}
653
654} // close package namespace
655
656
657#endif
658
659// ----------------------------------------------------------------------------
660// Copyright 2022 Bloomberg Finance L.P.
661//
662// Licensed under the Apache License, Version 2.0 (the "License");
663// you may not use this file except in compliance with the License.
664// You may obtain a copy of the License at
665//
666// http://www.apache.org/licenses/LICENSE-2.0
667//
668// Unless required by applicable law or agreed to in writing, software
669// distributed under the License is distributed on an "AS IS" BASIS,
670// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
671// See the License for the specific language governing permissions and
672// limitations under the License.
673// ----------------------------- END-OF-FILE ----------------------------------
674
675/** @} */
676/** @} */
677/** @} */
Definition bdlde_hexencoder.h:347
bool isAcceptable() const
Definition bdlde_hexencoder.h:613
int outputLength() const
Definition bdlde_hexencoder.h:649
bool isUpperCase() const
Definition bdlde_hexencoder.h:637
bool isError() const
Definition bdlde_hexencoder.h:625
int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end)
Definition bdlde_hexencoder.h:490
bool isDone() const
Definition bdlde_hexencoder.h:619
HexEncoder(bool upperCaseLetters=true)
~HexEncoder()=default
Destroy this object.
bool isInitialState() const
Definition bdlde_hexencoder.h:631
void reset()
Definition bdlde_hexencoder.h:604
int numOutputPending() const
Definition bdlde_hexencoder.h:643
int endConvert(OUTPUT_ITERATOR out)
Definition bdlde_hexencoder.h:565
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlde_base64alphabet.h:118