BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlde_base64decoder.h
Go to the documentation of this file.
1/// @file bdlde_base64decoder.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlde_base64decoder.h -*-C++-*-
8#ifndef INCLUDED_BDLDE_BASE64DECODER
9#define INCLUDED_BDLDE_BASE64DECODER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlde_base64decoder bdlde_base64decoder
15/// @brief Provide automata for converting to and from Base64 encodings.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlde
19/// @{
20/// @addtogroup bdlde_base64decoder
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlde_base64decoder-purpose"> Purpose</a>
25/// * <a href="#bdlde_base64decoder-classes"> Classes </a>
26/// * <a href="#bdlde_base64decoder-description"> Description </a>
27/// * <a href="#bdlde_base64decoder-base-64-encoding"> Base 64 Encoding </a>
28/// * <a href="#bdlde_base64decoder-base-64-encoding-with-url-and-filename-safe-alphabet"> Base 64 Encoding with URL and Filename Safe Alphabet </a>
29/// * <a href="#bdlde_base64decoder-base-64-decoding"> Base 64 Decoding </a>
30/// * <a href="#bdlde_base64decoder-usage"> Usage </a>
31/// * <a href="#bdlde_base64decoder-example-1-basic-usage"> Example 1: Basic Usage </a>
32///
33/// # Purpose {#bdlde_base64decoder-purpose}
34/// Provide automata for converting to and from Base64 encodings.
35///
36/// # Classes {#bdlde_base64decoder-classes}
37///
38/// - bdlde::Base64Decoder: automata performing Base64 decoding operations
39///
40/// @see bdlde_base64encoder
41///
42/// # Description {#bdlde_base64decoder-description}
43/// This component a `class`, `bdlde::Base64Decoder`, which
44/// provides a pair of template functions (each parameterized separately on both
45/// input and output iterators) that can be used respectively to encode and to
46/// decode byte sequences of arbitrary length into and from the printable Base64
47/// representation described in Section 6.8 "Base64 Content Transfer Encoding"
48/// of RFC 2045, "Multipurpose Internet Mail Extensions (MIME) Part One: Format
49/// of Internet Message Bodies."
50///
51/// The `bdlde::Base64Encoder` and `bdlde::Base64Decoder` support the standard
52/// "base64" encoding (described in https://tools.ietf.org/html/rfc4648) as well
53/// as the "Base 64 Encoding with URL and Filename Safe Alphabet", or
54/// "base64url", encoding. The "base64url" encoding is very similar to "base64"
55/// but substitutes a couple characters in the encoded alphabet to avoid
56/// characters that conflict with special characters in URL syntax or filename
57/// descriptions (replacing `+` for `-`. and `/` for `_`). See
58/// {Base 64 Encoding with URL and Filename Safe Alphabet} for more information.
59///
60/// Each instance of either the encoder or decoder retains the state of the
61/// conversion from one supplied input to the next, enabling the processing of
62/// segmented input -- i.e., processing resumes where it left off with the next
63/// invocation on new input. Instance methods are provided for both the
64/// encoder and decoder to (1) assert the end of input, (2) determine whether
65/// the input so far is currently acceptable, and (3) indicate whether a
66/// non-recoverable error has occurred.
67///
68/// ## Base 64 Encoding {#bdlde_base64decoder-base-64-encoding}
69///
70///
71/// The data stream is processed three bytes at a time from left to right (a
72/// final quantum consisting of one or two bytes, as discussed below, is handled
73/// specially). Each sequence of three 8-bit quantities
74/// @code
75/// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
76/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
77/// | | | |
78/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
79/// `------v------' `------v------' `------v------'
80/// Byte2 Byte1 Byte0
81/// @endcode
82/// is segmented into four intermediate 6-bit quantities.
83/// @code
84/// 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0
85/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
86/// | | | | |
87/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
88/// `----v----' `----v----' `----v----' `----v----'
89/// char3 char2 char1 char0
90/// @endcode
91/// Each 6-bit quantity is in turn used as an index into the following character
92/// table to generate an 8-bit character. The four resulting characters hence
93/// form the encoding for the original 3-byte sequence.
94/// @code
95/// ======================================================================
96/// * The Basic BASE-64 Alphabet *
97/// ----------------------------------------------------------------------
98/// Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc
99/// --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
100/// 0 'A' 8 'I' 16 'Q' 24 'Y' 32 'g' 40 'o' 48 'w' 56 '4'
101/// 1 'B' 9 'J' 17 'R' 25 'Z' 33 'h' 41 'p' 49 'x' 57 '5'
102/// 2 'C' 10 'K' 18 'S' 26 'a' 34 'i' 42 'q' 50 'y' 58 '6'
103/// 3 'D' 11 'L' 19 'T' 27 'b' 35 'j' 43 'r' 51 'z' 59 '7'
104/// 4 'E' 12 'M' 20 'U' 28 'c' 36 'k' 44 's' 52 '0' 60 '8'
105/// 5 'F' 13 'N' 21 'V' 29 'd' 37 'l' 45 't' 53 '1' 61 '9'
106/// 6 'G' 14 'O' 22 'W' 30 'e' 38 'm' 46 'u' 54 '2' 62 '+'
107/// 7 'H' 15 'P' 23 'X' 31 'f' 39 'n' 47 'v' 55 '3' 63 '/'
108/// ======================================================================
109/// @endcode
110/// This component also supports a slightly different alphabet, "base64url",
111/// that is more appropriate if the encoded representation would be used in a
112/// file name or URL (see
113/// {Base 64 Encoding with URL and Filename Safe Alphabet}).
114///
115/// The 3-byte grouping of the input is only a design of convenience and not a
116/// requirement. When the number of bytes in the input stream is not divisible
117/// by 3, sufficient 0 bits are padded on the right to achieve an integral
118/// number of 6-bit character indices. Then one of two special cases will apply
119/// for the final processing step:
120///
121/// I) There is a single byte of data, in which case there will be two Base64
122/// encoding characters (the second of which will be one of [AQgw]) followed by
123/// two equal (`=`) signs.
124///
125/// II) There are exactly two bytes of data, in which case there will be
126/// three Base64 encoding characters (the third of which will be one of
127/// [AEIMQUYcgkosw048] followed by a single equal (`=`) sign.
128///
129/// The MIME standard requires that the maximum line length of emitted text not
130/// exceed 76 characters exclusive of CRLF. The caller may override this
131/// default if desired.
132///
133/// Input values of increasing length along with their corresponding Base64
134/// encodings are illustrated below:
135/// @code
136/// Data: /* nothing */
137/// Encoding: /* nothing */
138///
139/// Data: 0x01
140/// Encoding: AQ==
141///
142/// Data: 0x01 0x02
143/// Encoding: AQI=
144///
145/// Data: 0x01 0x02 0x03
146/// Encoding: AQID
147///
148/// Data: 0x01 0x02 0x03 0x04
149/// Encoding: AQIDBA==
150/// @endcode
151/// In order for a Base64 encoding to be valid, the input data must be either of
152/// length a multiple of three (constituting maximal input), or have been
153/// terminated explicitly by the `endConvert` method (initiating bit padding
154/// when necessary).
155///
156/// ## Base 64 Encoding with URL and Filename Safe Alphabet {#bdlde_base64decoder-base-64-encoding-with-url-and-filename-safe-alphabet}
157///
158///
159/// The encoder and decoder in this component also support the "base64url"
160/// encoding, which is the same as standard "base64" but substitutes (a couple)
161/// characters in the alphabet that are treated as special characters when used
162/// in a URL or in a file system. The following table is technically identical
163/// to the table presented in {Base 64 Encoding}, except for the 62:nd and 63:rd
164/// alphabet character, that indicates `-` and `_` respectively.
165/// @code
166/// ======================================================================
167/// * The "URL and Filename Safe" BASE-64 Alphabet *
168/// ----------------------------------------------------------------------
169/// Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc
170/// --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
171/// 0 'A' 8 'I' 16 'Q' 24 'Y' 32 'g' 40 'o' 48 'w' 56 '4'
172/// 1 'B' 9 'J' 17 'R' 25 'Z' 33 'h' 41 'p' 49 'x' 57 '5'
173/// 2 'C' 10 'K' 18 'S' 26 'a' 34 'i' 42 'q' 50 'y' 58 '6'
174/// 3 'D' 11 'L' 19 'T' 27 'b' 35 'j' 43 'r' 51 'z' 59 '7'
175/// 4 'E' 12 'M' 20 'U' 28 'c' 36 'k' 44 's' 52 '0' 60 '8'
176/// 5 'F' 13 'N' 21 'V' 29 'd' 37 'l' 45 't' 53 '1' 61 '9'
177/// 6 'G' 14 'O' 22 'W' 30 'e' 38 'm' 46 'u' 54 '2' 62 '-'
178/// 7 'H' 15 'P' 23 'X' 31 'f' 39 'n' 47 'v' 55 '3' 63 '_'
179/// ======================================================================
180/// @endcode
181/// ## Base 64 Decoding {#bdlde_base64decoder-base-64-decoding}
182///
183///
184/// The degree to which decoding detects errors can significantly affect
185/// performance. The standard permits all non-Base64 characters to be treated
186/// as whitespace. One variant mode of this decoder does just that; the other
187/// reports an error if a bad (i.e., non-whitespace) character is detected. The
188/// mode of the instance is configurable. The standard imposes a maximum of 76
189/// characters exclusive of CRLF; however, the decoder implemented in this
190/// component will handle lines of arbitrary length.
191///
192/// The following kinds of errors can occur during decoding and are reported
193/// with the following priority:
194/// @code
195/// BAD DATA: A character (other than whitespace) that is not a member of the
196/// Base64 character set (including '='). Note that this error
197/// is detected only if the 'decoder' is explicitly configured (at
198/// construction) to do so.
199///
200/// BAD FORMAT: An '=' character precedes a valid numeric Base64 character,
201/// more than two '=' characters appear (possibly separated by
202/// non-Base64 characters), a numeric Base64 character other than
203/// [AEIMQUYcgkosw048] precedes a single terminal '=' character,
204/// or a character other than [AQgw] precedes a terminal pair of
205/// consecutive '=' characters.
206/// @endcode
207/// The `isError` method is used to detect such anomalies, and the `numIn`
208/// output parameter (indicating the number of input characters consumed)
209/// or possibly the iterator itself (for iterators with reference-semantics)
210/// identifies the offending character.
211///
212/// Note that the existence of an `=` can be used to reliably indicate the end
213/// of the valid data, but no such assurance is possible when the length (in
214/// bytes) of the initial input data sequence before encoding was evenly
215/// divisible by 3.
216///
217/// ## Usage {#bdlde_base64decoder-usage}
218///
219///
220/// This section illustrates intended use of this component.
221///
222/// ### Example 1: Basic Usage {#bdlde_base64decoder-example-1-basic-usage}
223///
224///
225/// The following example shows how to use a `bdlde::Base64Decoder` object to
226/// implement a function, `streamconverter`, that reads text from a
227/// `bsl::istream`, decodes that text from base 64 representation, and writes
228/// the decoded text to a `bsl::ostream`. `streamconverter` returns 0 on
229/// success and a negative value if the input data could not be successfully
230/// decoded or if there is an I/O error.
231/// @code
232/// streamdecoder.h -*-C++-*-
233///
234/// int streamDecoder(bsl::ostream& os, bsl::istream& is);
235/// // Read the entire contents of the specified input stream 'is', convert
236/// // the input base-64 encoding into plain text, and write the decoded
237/// // text to the specified output stream 'os'. Return 0 on success, and a
238/// // negative value otherwise.
239/// @endcode
240/// We will use fixed-sized input and output buffers in the implementation, but,
241/// because of the flexibility of `bsl::istream` and the output-buffer
242/// monitoring functionality of `bdlde::Base64Decoder`, the fixed buffer sizes
243/// do *not* limit the quantity of data that can be read, decoded, or written to
244/// the output stream. The implementation file is as follows.
245/// @code
246/// streamdecoder.cpp -*-C++-*-
247///
248/// #include <streamdecoder.h>
249///
250/// #include <bdlde_base64decoder.h>
251///
252/// namespace BloombergLP {
253///
254/// int streamDecoder(bsl::ostream& os, bsl::istream& is)
255/// {
256/// enum {
257/// SUCCESS = 0,
258/// DECODE_ERROR = -1,
259/// IO_ERROR = -2
260/// };
261/// @endcode
262/// We declare a `bdlde::Base64Decoder` object `converter`, which will decode
263/// the input data. Note that various internal buffers and cursors are used as
264/// needed without further comment. We read as much data as is available from
265/// the user-supplied input stream `is` *or* as much as will fit in
266/// `inputBuffer` before beginning conversion. To obtain unobstructedly the
267/// output that results from decoding the entire input stream (even in the case
268/// of errors), the base64 decoder is configured not to detect errors.
269/// @code
270/// bdlde::Base64Decoder converter(false); // Do not report errors.
271///
272/// const int INBUFFER_SIZE = 1 << 10;
273/// const int OUTBUFFER_SIZE = 1 << 10;
274///
275/// char inputBuffer[INBUFFER_SIZE];
276/// char outputBuffer[OUTBUFFER_SIZE];
277///
278/// char *output = outputBuffer;
279/// char *outputEnd = outputBuffer + sizeof outputBuffer;
280///
281/// while (is.good()) { // input stream not exhausted
282///
283/// is.read(inputBuffer, sizeof inputBuffer);
284/// @endcode
285/// With `inputBuffer` now populated, we'll use `converter` in an inner `while`
286/// loop to decode the input and write the decoded data to `outputBuffer` (via
287/// the `output` cursor'). Note that if the call to `converter.convert` fails,
288/// our function terminates with a negative status.
289/// @code
290/// const char *input = inputBuffer;
291/// const char *inputEnd = input + is.gcount();
292///
293/// while (input < inputEnd) { // input encoding not complete
294///
295/// int numOut;
296/// int numIn;
297///
298/// int status = converter.convert(output, &numOut, &numIn,
299/// input, inputEnd,
300/// outputEnd - output);
301/// if (status < 0) {
302/// return DECODE_ERROR; // RETURN
303/// }
304/// @endcode
305/// If the call to `converter.convert` returns successfully, we'll see if the
306/// output buffer is full, and if so, write its contents to the user-supplied
307/// output stream `os`. Note how we use the values of `numOut` and `numIn`
308/// generated by `convert` to update the relevant cursors.
309/// @code
310/// output += numOut;
311/// input += numIn;
312///
313/// if (output == outputEnd) { // output buffer full; write data
314/// os.write (outputBuffer, sizeof outputBuffer);
315/// if (os.fail()) {
316/// return IO_ERROR; // RETURN
317/// }
318/// output = outputBuffer;
319/// }
320/// }
321/// }
322/// @endcode
323/// We have now exited both the input and the "decode" loops. `converter` may
324/// still hold decoded output characters, and so we call `converter.endConvert`
325/// to emit any retained output. To guarantee correct behavior, we call this
326/// method in an infinite loop, because it is possible that the retained output
327/// can fill the output buffer. In that case, we solve the problem by writing
328/// the contents of the output buffer to `os` within the loop. The most likely
329/// case, however, is that `endConvert` will return 0, in which case we exit the
330/// loop and write any data remaining in `outputBuffer` to `os`. As above, if
331/// `endConvert` fails, we exit the function with a negative return status.
332/// @code
333/// while (1) {
334///
335/// int numOut;
336///
337/// int more = converter.endConvert(output, &numOut, outputEnd-output);
338/// if (more < 0) {
339/// return DECODE_ERROR; // RETURN
340/// }
341///
342/// output += numOut;
343///
344/// if (!more) { // no more output
345/// break;
346/// }
347///
348/// assert (output == outputEnd); // output buffer is full
349///
350/// os.write (outputBuffer, sizeof outputBuffer); // write buffer
351/// if (os.fail()) {
352/// return IO_ERROR; // RETURN
353/// }
354/// output = outputBuffer;
355/// }
356///
357/// if (output > outputBuffer) { // still data in output buffer; write it
358/// // all
359/// os.write(outputBuffer, output - outputBuffer);
360/// }
361///
362/// return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
363/// }
364///
365/// } // Close namespace BloombergLP
366/// @endcode
367/// For ease of reading, we repeat the full content of the `streamconverter.cpp`
368/// file without interruption.
369/// @code
370/// streamdecoder.cpp -*-C++-*-
371///
372/// #include <streamdecoder.h>
373///
374/// #include <bdlde_base64decoder.h>
375///
376/// namespace BloombergLP {
377///
378/// int streamDecoder(bsl::ostream& os, bsl::istream& is)
379/// {
380/// enum {
381/// SUCCESS = 0,
382/// DECODE_ERROR = -1,
383/// IO_ERROR = -2
384/// };
385///
386/// bdlde::Base64Decoder converter(false); // Do not report errors.
387///
388/// const int INBUFFER_SIZE = 1 << 10;
389/// const int OUTBUFFER_SIZE = 1 << 10;
390///
391/// char inputBuffer[INBUFFER_SIZE];
392/// char outputBuffer[OUTBUFFER_SIZE];
393///
394/// char *output = outputBuffer;
395/// char *outputEnd = outputBuffer + sizeof outputBuffer;
396///
397/// while (is.good()) { // input stream not exhausted
398///
399/// is.read(inputBuffer, sizeof inputBuffer);
400///
401/// const char *input = inputBuffer;
402/// const char *inputEnd = input + is.gcount();
403///
404/// while (input < inputEnd) { // input encoding not complete
405///
406/// int numOut;
407/// int numIn;
408///
409/// int status = converter.convert(output, &numOut, &numIn,
410/// input, inputEnd,
411/// outputEnd - output);
412/// if (status < 0) {
413/// return DECODE_ERROR; // RETURN
414/// }
415///
416/// output += numOut;
417/// input += numIn;
418///
419/// if (output == outputEnd) { // output buffer full; write data
420/// os.write(outputBuffer, sizeof outputBuffer);
421/// if (os.fail()) {
422/// return IO_ERROR; // RETURN
423/// }
424/// output = outputBuffer;
425/// }
426/// }
427/// }
428///
429/// while (1) {
430///
431/// int numOut;
432///
433/// int more = converter.endConvert(output, &numOut, outputEnd-output);
434/// if (more < 0) {
435/// return DECODE_ERROR; // RETURN
436/// }
437///
438/// output += numOut;
439///
440/// if (!more) { // no more output
441/// break;
442/// }
443///
444/// assert (output == outputEnd); // output buffer is full
445///
446/// os.write (outputBuffer, sizeof outputBuffer); // write buffer
447/// if (os.fail()) {
448/// return IO_ERROR; // RETURN
449/// }
450/// output = outputBuffer;
451/// }
452///
453/// if (output > outputBuffer) {
454/// os.write (outputBuffer, output - outputBuffer);
455/// }
456///
457/// return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
458/// }
459///
460/// } // Close namespace BloombergLP
461/// @endcode
462/// @}
463/** @} */
464/** @} */
465
466/** @addtogroup bdl
467 * @{
468 */
469/** @addtogroup bdlde
470 * @{
471 */
472/** @addtogroup bdlde_base64decoder
473 * @{
474 */
475
476#include <bdlscm_version.h>
477
478#include <bdlde_base64alphabet.h>
481
482#include <bslmf_assert.h>
483
484#include <bsls_alignedbuffer.h>
485#include <bsls_assert.h>
487#include <bsls_performancehint.h>
488#include <bsls_review.h>
489#include <bsls_types.h>
490
491#include <bsl_cstring.h>
492#include <bsl_cstdint.h>
493#include <bsl_iostream.h>
494
495#ifdef __SSE4_2__
496#include <emmintrin.h>
497#include <smmintrin.h>
498#include <tmmintrin.h>
499#endif
500
501
502namespace bdlde {
503
504 // ===================
505 // class Base64Decoder
506 // ===================
507
508/// This class implements a mechanism capable of converting data of
509/// arbitrary length from its corresponding Base64 representation.
510///
511/// See @ref bdlde_base64decoder
513
514 public:
515 // PUBLIC TYPES
517
518 // PUBLIC CONSTANTS
521
522 private:
523 // PRIVATE TYPES
526
527 enum State {
528 // Symbolic state values.
529
530 e_ERROR_STATE = -1, // input is irreparably invalid
531 e_INPUT_STATE = 0, // general input state
532 e_NEED_EQUAL_STATE = 1, // need an '='
533 e_SOFT_DONE_STATE = 2, // only ignorable input and 'endConvert'
534 e_DONE_STATE = 3 // any additional input is an error
535 };
536
537 // INSTANCE DATA
538 int d_outputLength; // total number of output
539 // characters
540
541 const char *const d_alphabet_p; // selected alphabet based on
542 // specified alphabet type
543
544 const bool *const d_ignorable_p; // selected table of ignorable
545 // characters based on specified
546 // error-reporting mode
547
548 unsigned d_stack; // word containing 6-bit chunks of
549 // data to be assembled into bytes
550
551 int d_bitsInStack; // number of bits in 'd_stack'
552
553 State d_state; // state of this object as defined
554 // by the 'State' enum.
555
556 const Alphabet d_alphabet; // 'e_BASIC' or 'e_URL'.
557
558 const IgnoreMode::Enum d_ignoreMode; // 'e_IGNORE_NONE',
559 // 'e_IGNORE_WHITESPACE', or
560 // 'e_IGNORE_UNRECOGNIZED'
561
562 const bool d_isPadded; // 'true' means '=' padding is
563 // required, 'false' means '=' is
564 // an error
565
566 private:
567 // NOT IMPLEMENTED
569 Base64Decoder& operator=(const Base64Decoder&);
570
571 // PRIVATE ACCESSORS
572
573 /// Return the number bits of output there are (either already done or
574 /// to be done) since the end of the last 4-bytes of input. Note that
575 /// input to this decoder, other than ignored whitespace or garbage,
576 /// comes in 4 byte quads, each of which results in 3 bytes of output,
577 /// and this accessor is particularly useful in calculating output for
578 /// the last partial quad of input.
579 int residualBits(int bytesOutputSoFar) const;
580
581 public:
582 // CLASS METHODS
583
584 /// Return the maximum number of decoded bytes that could result from an
585 /// input byte sequence of the specified `inputLength` provided to the
586 /// `convert` and `endConvert` methods of this decoder. The behavior is
587 /// undefined unless `0 <= inputLength`. Note that the result is
588 /// independent of which options are provided to the decoder.
589 static int maxDecodedLength(int inputLength);
590
591 // CREATORS
592
593 explicit
595 /// Create a Base64 decoder with options determined by the specfied
596 /// `options`.
597
598 BSLS_DEPRECATE_FEATURE("bdl", "Base64Decoder", "use options c'tor")
599 /// Create a Base64 decoder in the initial state. Unrecognized
600 /// characters (i.e., non-base64 characters other than whitespace) will
601 /// be treated as errors if the specified
602 /// `unrecognizedNonWhitespaceIsErrorFlag` is `true`, and ignored
603 /// otherwise. Optionally specify an alphabet used to decode input
604 /// characters. If `alphabet` is not specified, then the basic
605 /// alphabet, "base64", is used. Padded input is assumed.
606 ///
607 /// @deprecated Use the overload that takes `options` instead.
608 explicit
609 Base64Decoder(bool unrecognizedNonWhitespaceIsErrorFlag,
611
612 /// Destroy this object.
614
615 // MANIPULATORS
616
617 /// Decode the sequence of input characters starting at the specified
618 /// `begin` position up to, but not including, the specified `end`
619 /// position, writing any resulting output characters to the specified
620 /// `out` buffer. Optionally specify the `maxNumOut` limit on the
621 /// number of bytes to output; if `maxNumOut` is negative, no limit is
622 /// imposed. If the `maxNumOut` limit is reached, no further input will
623 /// be consumed. Load into the (optionally) specified `numOut` and
624 /// `numIn` the number of output bytes produced and input bytes
625 /// consumed, respectively. Return a non-negative value on success, -1
626 /// on an input error, and -2 if the `endConvert` method has already
627 /// been called without an intervening `resetState` call. A return
628 /// status of -1 indicates that the data at `begin` + `numIn`
629 /// constitutes an irrecoverably undecodable input sequence (i.e., the
630 /// data cannot be extended to form any valid encoding). A positive
631 /// return status indicates the number of valid processed output bytes
632 /// retained by this decoder and not written to `out` because
633 /// `maxNumOut` has been reached; these bytes are available for output
634 /// if this method is called with appropriate input. Note that it is
635 /// recommended that after all calls to `convert` are finished, the
636 /// `endConvert` method be called to complete the encoding of any
637 /// unprocessed input characters that do not complete a 3-byte sequence.
638 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
639 int convert(OUTPUT_ITERATOR out,
640 INPUT_ITERATOR begin,
641 INPUT_ITERATOR end);
642 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
643 int convert(OUTPUT_ITERATOR out,
644 int *numOut,
645 int *numIn,
646 INPUT_ITERATOR begin,
647 INPUT_ITERATOR end,
648 int maxNumOut = -1);
649
650 /// Terminate decoding for this decoder; write any retained output
651 /// (e.g., from a previous call to `convert` with a non-zero optionally
652 /// specified `maxNumOut` argument) to the specified `out` buffer;
653 /// encode any unprocessed input characters that do not complete a
654 /// 3-byte sequence. The argument `maxNumOut` is the limit on the
655 /// number of bytes to output; if `maxNumOut` is negative, no limit is
656 /// imposed. Load into the (optionally) specified `numOut` the number
657 /// of output bytes produced. Return 0 on success, the positive number
658 /// of bytes *still* retained by this decoder if the `maxNumOut` limit
659 /// was reached, and a negative value otherwise. Any retained bytes are
660 /// available on a subsequent call to `endConvert`. Once this method is
661 /// called, no additional input may be supplied without an intervening
662 /// call to `resetState`; once this method returns a zero status, a
663 /// subsequent call will place this decoder in the error state, and
664 /// return an error status.
665 template <class OUTPUT_ITERATOR>
666 int endConvert(OUTPUT_ITERATOR out);
667 template <class OUTPUT_ITERATOR>
668 int endConvert(OUTPUT_ITERATOR out,
669 int *numOut,
670 int maxNumOut = -1);
671
672 /// Reset this instance to its initial state (i.e., as if no input had
673 /// been consumed).
674 void resetState();
675
676 // ACCESSORS
677
678 /// Return the alphabet supplied at construction of this object.
679 Alphabet alphabet() const;
680
681 /// Return the `ignoreMode` state of this decoder.
682 IgnoreMode::Enum ignoreMode() const;
683
684 /// Return `true` if the input read so far is considered syntactically
685 /// complete, and `false` otherwise. Note that the number of relevant
686 /// input characters must be divisible by 4.
687 bool isAcceptable() const;
688
689 /// Return `true` if the current input is acceptable and any additional
690 /// input (including `endConvert`) would be an error, and `false`
691 /// otherwise. Note that if this decoder `isDone` then all resulting
692 /// output has been emitted to `out`.
693 bool isDone() const;
694
695 /// Return `true` if there is no possibility of achieving an
696 /// "acceptable" result, and `false` otherwise.
697 bool isError() const;
698
699 /// Return `true` if this instance is in the initial state (i.e., as
700 /// if no input had been consumed), and `false` otherwise.
701 bool isInitialState() const;
702
703 /// Return `true` if the current input is acceptable and any additional
704 /// input (other than `endConvert`) would be an error, and `false`
705 /// otherwise.
706 bool isMaximal() const;
707
708 /// Return `true` if this object is configured for padded input and
709 /// `false` otherwise.
710 bool isPadded() const;
711
713 /// Return `true` if this mechanism is currently configured to report an
714 /// error when an unrecognized character (i.e., a character other than
715 /// one of the 64 "numeric" base-64 characters, `=`, or whitespace) is
716 /// encountered, and `false` otherwise.
717 ///
718 /// @deprecated use the `ignoreMode` accessor instead.
719 bool isUnrecognizedAnError() const;
720
721 /// Return a `Base64DecoderOptions` object representing the
722 /// configuration of this decoder.
723 DecoderOptions options() const;
724
725 /// Return the total length of the output emitted thus far.
726 int outputLength() const;
727};
728
729// ============================================================================
730// INLINE DEFINITIONS
731// ============================================================================
732
733 // -------------------
734 // class Base64Decoder
735 // -------------------
736
737// PRIVATE CLASS METHODs
738inline
739int Base64Decoder::residualBits(int bytesOutputSoFar) const
740{
741 BSLS_ASSERT(0 <= bytesOutputSoFar);
742 BSLS_ASSERT(0 <= d_bitsInStack);
743
744 // If one byte has been read since that last completed quad of input, the
745 // result will be 6, and it will be an error if no more input is available.
746 //
747 // If two bytes have been read since the last complete quad of input, the
748 // result will be 12, and if input is done:
749 //: o If all output has been done, 'd_stack == 0'
750 //:
751 //: o If a byte of output remains to be done, the low-order 4 bytes of
752 //: 'd_stack' should be 0.
753 //
754 // If three bytes have been read since the last complete quad of input, the
755 // result will be 18, and if input is done,
756 //: o If the last 2 bytes of output have been done, the low-order 2 bytes
757 //: of 'd_stack' should be 0.
758 //:
759 //: o If one of the last 2 bytes of output have been done, there will be 10
760 //: bits in the stack, the low-order 2 bits of which should be 0.
761 //:
762 //: o If none of the last 2 bytes of output to be done, there will be 18
763 //: bits in the stack, the low-order 2 bits of which will be 0.
764
765 int ret = ((bytesOutputSoFar % 3) * 8 + d_bitsInStack) % 24;
766 BSLS_ASSERT(e_INPUT_STATE != d_state || 0 == ret % 6);
767 return ret;
768}
769
770// CLASS METHODS
771inline
773{
774 BSLS_ASSERT(0 <= inputLength);
775
776 return (inputLength + 3) / 4 * 3;
777}
778
779// MANIPULATORS
780template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
781int Base64Decoder::convert(OUTPUT_ITERATOR out,
782 INPUT_ITERATOR begin,
783 INPUT_ITERATOR end)
784{
785 int dummyNumOut;
786 int dummyNumIn;
787
788 return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
789}
790
791template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
792int Base64Decoder::convert(OUTPUT_ITERATOR out,
793 int *numOut,
794 int *numIn,
795 INPUT_ITERATOR begin,
796 INPUT_ITERATOR end,
797 int maxNumOut)
798{
799 BSLS_ASSERT(numOut);
800 BSLS_ASSERT(numIn);
801
802 if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {
803 int rv = e_DONE_STATE == d_state ? -2 : -1;
804 d_state = e_ERROR_STATE;
805 *numOut = 0;
806 *numIn = 0;
807 return rv; // RETURN
808 }
809
810 int numEmitted = 0;
811
812 // Emit as many output bytes as possible.
813
814 while (8 <= d_bitsInStack && numEmitted != maxNumOut) {
815 d_bitsInStack -= 8;
816 *out = static_cast<char>((d_stack >> d_bitsInStack) & 0xff);
817 ++out;
818 ++numEmitted;
819 }
820
821 // Consume as many input bytes as possible.
822
823 *numIn = 0;
824
825 if (e_INPUT_STATE == d_state) {
826 while (18 >= d_bitsInStack && begin != end) {
827 const unsigned char byte = static_cast<unsigned char>(*begin);
828
829 ++begin;
830 ++*numIn;
831
832 unsigned char converted = static_cast<unsigned char>(
833 d_alphabet_p[byte]);
834
835 if (converted < 64) {
836 d_stack = (d_stack << 6) | converted;
837 d_bitsInStack += 6;
838 if (8 <= d_bitsInStack && numEmitted != maxNumOut) {
839 d_bitsInStack -= 8;
840 *out = static_cast<char>(
841 (d_stack >> d_bitsInStack) & 0xff);
842 ++out;
843 ++numEmitted;
844 }
845 }
846 else if (!d_ignorable_p[byte]) {
847 if ('=' == byte && d_isPadded) {
848 const int residual = residualBits(
849 d_outputLength + numEmitted);
850 // 'residual' is 0, 6, 12, or 18.
851 //: o If it's 0, that's an error since no '=' should be
852 //: needed.
853 //:
854 //: o If it's 6, that's an error because an incomplete
855 //: byte has been input.
856 //:
857 //: o 12 means 2 bytes have been read, meaning we have to
858 //: do 1 byte of output (which we may have already done).
859 //: The low-order 4 bits of stack should either be
860 //: 0 or the stack should be empty.
861 //:
862 //: o 18 means 3 bytes have been read, meaning we have to
863 //: do 2 bytes of output (some or all of which we may
864 //: have already done). The low-order 2 bits of stack
865 //: should either be 0 or the stack should be empty.
866
867 const int leftOver = residual % 8;
868 d_state = 0 != (d_stack & ((1 << leftOver) - 1))
869 ? e_ERROR_STATE
870 : 12 == residual
871 ? e_NEED_EQUAL_STATE
872 : 18 == residual
873 ? e_SOFT_DONE_STATE
874 : e_ERROR_STATE;
875 d_stack >>= leftOver;
876 d_bitsInStack -= leftOver;
877 }
878 else {
879 d_state = e_ERROR_STATE;
880 }
881 break;
882 }
883 }
884 }
885
886 if (e_NEED_EQUAL_STATE == d_state) {
887 BSLS_ASSERT(d_isPadded);
888
889 while (begin != end) {
890 const unsigned char byte = static_cast<unsigned char>(*begin);
891
892 ++begin;
893 ++*numIn;
894
895 if (!d_ignorable_p[byte]) {
896 if ('=' == byte) {
897 d_state = e_SOFT_DONE_STATE;
898 }
899 else {
900 d_state = e_ERROR_STATE;
901 }
902 break;
903 }
904 }
905 }
906 if (e_SOFT_DONE_STATE == d_state) {
907 while (begin != end) {
908 const unsigned char byte = static_cast<unsigned char>(*begin);
909
910 ++begin;
911 ++*numIn;
912
913 if (!d_ignorable_p[byte]) {
914 d_state = e_ERROR_STATE;
915 break;
916 }
917 }
918 }
919
920 *numOut = numEmitted;
921 d_outputLength += numEmitted;
922
923 return e_ERROR_STATE == d_state ? -1 : d_bitsInStack / 8;
924}
925
926template<>
927inline
928int Base64Decoder::convert<char *, const char *>(
929 char *out,
930 int *numOut,
931 int *numIn,
932 const char *begin,
933 const char *end,
934 int maxNumOut)
935{
936 BSLS_ASSERT(numOut);
937 BSLS_ASSERT(numIn);
938
940 e_ERROR_STATE == d_state || e_DONE_STATE == d_state)) {
941 int rv = e_DONE_STATE == d_state ? -2 : -1;
942 d_state = e_ERROR_STATE;
943 *numOut = 0;
944 *numIn = 0;
945 return rv; // RETURN
946 }
947
948 int numEmitted = 0;
949
950 // Emit as many output bytes as possible.
951
952 if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(8 <= d_bitsInStack)) {
953 while (8 <= d_bitsInStack && numEmitted != maxNumOut) {
954 d_bitsInStack -= 8;
955 *out = static_cast<char>((d_stack >> d_bitsInStack) & 0xff);
956 ++out;
957 ++numEmitted;
958 }
959 }
960
961 // Consume as many input bytes as possible.
962
963 const char *originalBegin = begin;
964
965 if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(e_INPUT_STATE == d_state)) {
966 if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(d_bitsInStack == 0)) {
967 // Optimize for common case
968#ifdef __SSE4_2__
969 // Load 16-byte slices of LUT. Note that the entire 256-byte LUT
970 // is *not* loaded, but only the middle slices that are non-ff.
971 const __m128i *alphabetSlices =
972 reinterpret_cast<const __m128i *>(d_alphabet_p);
973 __m128i lut5 = _mm_loadu_si128(alphabetSlices + 7);
974 __m128i lut4 = _mm_loadu_si128(alphabetSlices + 6);
975 __m128i lut3 = _mm_loadu_si128(alphabetSlices + 5);
976 __m128i lut2 = _mm_loadu_si128(alphabetSlices + 4);
977 __m128i lut1 = _mm_loadu_si128(alphabetSlices + 3);
978 __m128i lut0 = _mm_loadu_si128(alphabetSlices + 2);
979
980 // Heavily inspired by techniques outlined in
981 // http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html
982
983 // xor LUT fragments together for pshufb-xor chaining below.
984 lut5 = _mm_xor_si128(lut5, lut4);
985 lut4 = _mm_xor_si128(lut4, lut3);
986 lut3 = _mm_xor_si128(lut3, lut2);
987 lut2 = _mm_xor_si128(lut2, lut1);
988 lut1 = _mm_xor_si128(lut1, lut0);
989
990 while (end - begin >= 16 && static_cast<unsigned>(numEmitted + 12)
991 <= static_cast<unsigned>(maxNumOut)) {
992 // Load 16 base64 characters (will eventually be transformed
993 // into 12 bytes)
994 __m128i x = _mm_loadu_si128(
995 reinterpret_cast<const __m128i *>(begin));
996
997 // Offset indexes to match first LUT slice at offset 0x20
998 x = _mm_subs_epi8(x, _mm_set1_epi8(0x20));
999
1000 // If indexes were < 0x20, 'x' will contain negative values
1001 // which we will check for later (minimum bounds check)
1002 __m128i tooSmall = x;
1003
1004 // Using the characters as indexes, look up the corresponding
1005 // values from the LUT. If an index is non-negative, only its
1006 // low 4 bits are considered. If an index is negative, 0 is
1007 // returned for its lookup value.
1008 __m128i decoded = _mm_shuffle_epi8(lut0, x);
1009
1010 // Advance to the next LUT slice. Note that if the previous
1011 // slice was the correct one for a given index, the index will
1012 // become negative after this, resulting in subsequent lookups
1013 // simply xor-ing 0 (harmless no-ops).
1014 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1015
1016 // Perform the next lookup using the same low 4 bits of each
1017 // non-negative index. The result is then xor-ed with the
1018 // previous lookup result. For negative indices, this is a
1019 // no-op, while for non-negative indices, the xor with the
1020 // previous LUT slice value cancels out the xor-ing done to the
1021 // LUT slices above the loop, leaving the original value from
1022 // this LUT slice.
1023 decoded = _mm_xor_si128(decoded, _mm_shuffle_epi8(lut1, x));
1024
1025 // Continue to advance to each LUT slice
1026 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1027 decoded = _mm_xor_si128(decoded, _mm_shuffle_epi8(lut2, x));
1028 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1029 decoded = _mm_xor_si128(decoded, _mm_shuffle_epi8(lut3, x));
1030 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1031 decoded = _mm_xor_si128(decoded, _mm_shuffle_epi8(lut4, x));
1032 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1033 decoded = _mm_xor_si128(decoded, _mm_shuffle_epi8(lut5, x));
1034 x = _mm_subs_epi8(x, _mm_set1_epi8(0x10));
1035
1036 // At this point, the indexes in 'x' should be negative, as
1037 // we've exhausted all populated LUT slices. If any are not,
1038 // that indicates the maximum bounds check failed.
1039
1040 // Check the minimum and maximum bounds were respected, as well
1041 // as for any 'ff' values loaded from LUT slices themselves.
1043 !_mm_testz_si128(tooSmall | decoded | ~x,
1044 _mm_set1_epi8(static_cast<char>(0x80))))) {
1045 // Unknown char; could be error or could be a character to
1046 // ignore; either way fall back to regular decoding
1047 break;
1048 }
1049
1050 // 'decoded' currently contains dwords layed out like
1051 // |00aaaaaa|00bbbbbb|00cccccc|00dddddd|. Convert to
1052 // |0000aaaa aabbbbbb|0000cccc ccdddddd| with a multiply-add.
1053 decoded = _mm_maddubs_epi16(decoded, _mm_set1_epi16(0x0140));
1054
1055 // Convert to final form of
1056 // |00000000 aaaaaabb bbbbcccc ccdddddd| with another multiply-
1057 // add. Note that each triplet of values is aligned to a byte
1058 // boundary following this operation.
1059 decoded = _mm_madd_epi16(decoded, _mm_set1_epi32(0x00011000));
1060
1061 // Take care of endianness and last four one-byte gaps by
1062 // explicitly selecting each byte we want in order.
1063 __m128i selection = _mm_set_epi64(
1064 reinterpret_cast<__m64>(0xffffffff0c0d0e08ull),
1065 reinterpret_cast<__m64>(0x090a040506000102ull));
1066 decoded = _mm_shuffle_epi8(decoded, selection);
1067
1068 // Store the result
1069 memcpy(out, &decoded, 12);
1070
1071 begin += 16;
1072 numEmitted += 12;
1073 out += 12;
1074 }
1075#endif
1076 while (end - begin >= 4 && static_cast<unsigned>(numEmitted + 3)
1077 <= static_cast<unsigned>(maxNumOut)) {
1079 uint8_t *in = reinterpret_cast<uint8_t *>(inBuffer.buffer());
1080 memcpy(in, begin, 4);
1081
1082 uint8_t x[4];
1083 x[0] = static_cast<uint8_t>(d_alphabet_p[in[0]]);
1084 x[1] = static_cast<uint8_t>(d_alphabet_p[in[1]]);
1085 x[2] = static_cast<uint8_t>(d_alphabet_p[in[2]]);
1086 x[3] = static_cast<uint8_t>(d_alphabet_p[in[3]]);
1087
1088 uint32_t x4;
1089 memcpy(&x4, x, sizeof(x4));
1090 if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(x4 & 0x80808080u)) {
1091 // Unknown char; could be error or could be a character to
1092 // ignore; either way fall back to char-by-char decoding
1093 break;
1094 }
1095
1096 out[0] = static_cast<char>((x[0] << 2) | (x[1] >> 4));
1097 out[1] = static_cast<char>((x[1] << 4) | (x[2] >> 2));
1098 out[2] = static_cast<char>((x[2] << 6) | (x[3] >> 0));
1099
1100 begin += 4;
1101 numEmitted += 3;
1102 out += 3;
1103 }
1104 }
1105
1106 while (18 >= d_bitsInStack && begin != end) {
1107 const unsigned char byte = static_cast<unsigned char>(*begin);
1108
1109 ++begin;
1110
1111 unsigned char converted = static_cast<unsigned char>(
1112 d_alphabet_p[byte]);
1113
1114 if (converted < 64) {
1115 d_stack = (d_stack << 6) | converted;
1116 d_bitsInStack += 6;
1117 if (8 <= d_bitsInStack && numEmitted != maxNumOut) {
1118 d_bitsInStack -= 8;
1119 *out = static_cast<char>(
1120 (d_stack >> d_bitsInStack) & 0xff);
1121 ++out;
1122 ++numEmitted;
1123 }
1124 }
1125 else if (!d_ignorable_p[byte]) {
1126 if ('=' == byte && d_isPadded) {
1127 const int residual = residualBits(
1128 d_outputLength + numEmitted);
1129 // 'residual' is 0, 6, 12, or 18.
1130 //: o If it's 0, that's an error since no '=' should be
1131 //: needed.
1132 //:
1133 //: o If it's 6, that's an error because an incomplete
1134 //: byte has been input.
1135 //:
1136 //: o 12 means 2 bytes have been read, meaning we have to
1137 //: do 1 byte of output (which we may have already done).
1138 //: The low-order 4 bits of stack should either be
1139 //: 0 or the stack should be empty.
1140 //:
1141 //: o 18 means 3 bytes have been read, meaning we have to
1142 //: do 2 bytes of output (some or all of which we may
1143 //: have already done). The low-order 2 bits of stack
1144 //: should either be 0 or the stack should be empty.
1145
1146 const int leftOver = residual % 8;
1147 d_state = 0 != (d_stack & ((1 << leftOver) - 1))
1148 ? e_ERROR_STATE
1149 : 12 == residual
1150 ? e_NEED_EQUAL_STATE
1151 : 18 == residual
1152 ? e_SOFT_DONE_STATE
1153 : e_ERROR_STATE;
1154 d_stack >>= leftOver;
1155 d_bitsInStack -= leftOver;
1156 }
1157 else {
1158 d_state = e_ERROR_STATE;
1159 }
1160 break;
1161 }
1162 }
1163 }
1164
1165 if (e_NEED_EQUAL_STATE == d_state) {
1166 BSLS_ASSERT(d_isPadded);
1167
1168 while (begin != end) {
1169 const unsigned char byte = static_cast<unsigned char>(*begin);
1170
1171 ++begin;
1172
1173 if (!d_ignorable_p[byte]) {
1174 if ('=' == byte) {
1175 d_state = e_SOFT_DONE_STATE;
1176 }
1177 else {
1178 d_state = e_ERROR_STATE;
1179 }
1180 break;
1181 }
1182 }
1183 }
1184 if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(e_SOFT_DONE_STATE == d_state
1185 && begin != end)) {
1186 do {
1187 const unsigned char byte = static_cast<unsigned char>(*begin);
1188
1189 ++begin;
1190
1191 if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(!d_ignorable_p[byte])) {
1192 d_state = e_ERROR_STATE;
1193 break;
1194 }
1195 } while (begin != end);
1196 }
1197
1198 *numIn = static_cast<int>(begin - originalBegin);
1199 *numOut = numEmitted;
1200 d_outputLength += numEmitted;
1201
1202 return e_ERROR_STATE == d_state ? -1 : d_bitsInStack / 8;
1203}
1204
1205template<>
1206inline
1207int Base64Decoder::convert<unsigned char *, const unsigned char *>(
1208 unsigned char *out,
1209 int *numOut,
1210 int *numIn,
1211 const unsigned char *begin,
1212 const unsigned char *end,
1213 int maxNumOut)
1214{
1215 return convert(reinterpret_cast<char *>(out),
1216 numOut,
1217 numIn,
1218 reinterpret_cast<const char *>(begin),
1219 reinterpret_cast<const char *>(end),
1220 maxNumOut);
1221}
1222
1223
1224template <class OUTPUT_ITERATOR>
1225int Base64Decoder::endConvert(OUTPUT_ITERATOR out)
1226{
1227 int dummyNumOut;
1228
1229 return endConvert(out, &dummyNumOut, -1);
1230}
1231
1232template <class OUTPUT_ITERATOR>
1233int Base64Decoder::endConvert(OUTPUT_ITERATOR out,
1234 int *numOut,
1235 int maxNumOut)
1236{
1237 BSLS_ASSERT(numOut);
1238
1239 if (!d_isPadded && e_INPUT_STATE == d_state) {
1240 const int residual = residualBits(d_outputLength);
1241 const int leftOver = residual % 8;
1242 if (BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(6 == residual ||
1243 0 != (d_stack & ((1 << leftOver) - 1)))) {
1244 d_state = e_ERROR_STATE;
1245 *numOut = 0;
1246 return -1; // RETURN
1247 }
1248 else {
1249 d_stack >>= leftOver;
1250 d_bitsInStack -= leftOver;
1251 }
1252 }
1253
1254 if (e_ERROR_STATE == d_state || e_NEED_EQUAL_STATE == d_state ||
1255 (e_DONE_STATE == d_state && 0 == d_bitsInStack) ||
1256 (d_isPadded && e_INPUT_STATE == d_state &&
1257 0 != residualBits(d_outputLength))) {
1258 d_state = e_ERROR_STATE;
1259 *numOut = 0;
1260 return -1; // RETURN
1261 }
1262
1263 BSLS_ASSERT(0 == d_bitsInStack % 8);
1264
1265 d_state = e_DONE_STATE;
1266
1267 int numEmitted;
1268 for (numEmitted = 0; 8 <= d_bitsInStack && numEmitted != maxNumOut;
1269 ++numEmitted) {
1270 d_bitsInStack -= 8;
1271 *out++ = static_cast<char>((d_stack >> d_bitsInStack) & 0xff);
1272 }
1273
1274 *numOut = numEmitted;
1275 d_outputLength += numEmitted;
1276
1277 return d_bitsInStack / 8;
1278}
1279
1280inline
1282{
1283 d_state = e_INPUT_STATE;
1284 d_outputLength = 0;
1285 d_bitsInStack = 0;
1286}
1287
1288// ACCESSORS
1289inline
1291{
1292 return d_alphabet;
1293}
1294
1295inline
1297{
1298 return d_ignoreMode;
1299}
1300
1301inline
1303{
1304 const int residual = residualBits(d_outputLength);
1305 return (0 == residual && e_INPUT_STATE == d_state) ||
1306 e_SOFT_DONE_STATE == d_state || e_DONE_STATE == d_state;
1307}
1308
1309inline
1311{
1312 return !d_bitsInStack && e_DONE_STATE == d_state;
1313}
1314
1315inline
1317{
1318 return e_ERROR_STATE == d_state;
1319}
1320
1321inline
1323{
1324 return e_INPUT_STATE == d_state
1325 && 0 == d_bitsInStack
1326 && 0 == d_outputLength;
1327}
1328
1329inline
1331{
1332 return e_SOFT_DONE_STATE == d_state
1333 || (d_bitsInStack && e_DONE_STATE == d_state);
1334}
1335
1336inline
1338{
1339 return d_isPadded;
1340}
1341
1342inline
1347
1348inline
1353
1354inline
1356{
1357 return d_outputLength;
1358}
1359
1360} // close package namespace
1361
1362
1363#endif
1364
1365// ----------------------------------------------------------------------------
1366// Copyright 2015 Bloomberg Finance L.P.
1367//
1368// Licensed under the Apache License, Version 2.0 (the "License");
1369// you may not use this file except in compliance with the License.
1370// You may obtain a copy of the License at
1371//
1372// http://www.apache.org/licenses/LICENSE-2.0
1373//
1374// Unless required by applicable law or agreed to in writing, software
1375// distributed under the License is distributed on an "AS IS" BASIS,
1376// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1377// See the License for the specific language governing permissions and
1378// limitations under the License.
1379// ----------------------------- END-OF-FILE ----------------------------------
1380
1381/** @} */
1382/** @} */
1383/** @} */
Definition bdlde_base64decoderoptions.h:222
static Base64DecoderOptions custom(IgnoreMode::Enum ignoreMode, Base64Alphabet::Enum alphabet, bool padded)
Definition bdlde_base64decoderoptions.h:397
Definition bdlde_base64decoder.h:512
bool isInitialState() const
Definition bdlde_base64decoder.h:1322
static int maxDecodedLength(int inputLength)
Definition bdlde_base64decoder.h:772
bool isError() const
Definition bdlde_base64decoder.h:1316
bool isDone() const
Definition bdlde_base64decoder.h:1310
Base64Alphabet::Enum Alphabet
Definition bdlde_base64decoder.h:516
int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end)
Definition bdlde_base64decoder.h:781
Alphabet alphabet() const
Return the alphabet supplied at construction of this object.
Definition bdlde_base64decoder.h:1290
bool isAcceptable() const
Definition bdlde_base64decoder.h:1302
IgnoreMode::Enum ignoreMode() const
Return the ignoreMode state of this decoder.
Definition bdlde_base64decoder.h:1296
int outputLength() const
Return the total length of the output emitted thus far.
Definition bdlde_base64decoder.h:1355
Base64Decoder(const Base64DecoderOptions &options)
int endConvert(OUTPUT_ITERATOR out)
Definition bdlde_base64decoder.h:1225
static const Alphabet e_BASIC
Definition bdlde_base64decoder.h:519
void resetState()
Definition bdlde_base64decoder.h:1281
bool isUnrecognizedAnError() const
Definition bdlde_base64decoder.h:1343
bool isMaximal() const
Definition bdlde_base64decoder.h:1330
DecoderOptions options() const
Definition bdlde_base64decoder.h:1349
bool isPadded() const
Definition bdlde_base64decoder.h:1337
static const Alphabet e_URL
Definition bdlde_base64decoder.h:520
Definition bsls_alignedbuffer.h:261
char * buffer()
Definition bsls_alignedbuffer.h:294
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_DEPRECATE_FEATURE(UOR, FEATURE, MESSAGE)
Definition bsls_deprecatefeature.h:319
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
#define BSLS_PERFORMANCEHINT_PREDICT_LIKELY(expr)
Definition bsls_performancehint.h:451
#define BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(expr)
Definition bsls_performancehint.h:452
Definition bdlde_base64alphabet.h:118
Enum
Definition bdlde_base64alphabet.h:135
@ e_URL
Definition bdlde_base64alphabet.h:137
@ e_BASIC
Definition bdlde_base64alphabet.h:136
Definition bdlde_base64ignoremode.h:133
Enum
Definition bdlde_base64ignoremode.h:136
@ e_IGNORE_UNRECOGNIZED
Definition bdlde_base64ignoremode.h:142