BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlde_base64encoder.h
Go to the documentation of this file.
1/// @file bdlde_base64encoder.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlde_base64encoder.h -*-C++-*-
8#ifndef INCLUDED_BDLDE_BASE64ENCODER
9#define INCLUDED_BDLDE_BASE64ENCODER
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlde_base64encoder bdlde_base64encoder
15/// @brief Provide automata for converting to and from Base64 encodings.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlde
19/// @{
20/// @addtogroup bdlde_base64encoder
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlde_base64encoder-purpose"> Purpose</a>
25/// * <a href="#bdlde_base64encoder-classes"> Classes </a>
26/// * <a href="#bdlde_base64encoder-description"> Description </a>
27/// * <a href="#bdlde_base64encoder-base-64-encoding"> Base 64 Encoding </a>
28/// * <a href="#bdlde_base64encoder-base-64-encoding-with-url-and-filename-safe-alphabet"> Base 64 Encoding with URL and Filename Safe Alphabet </a>
29/// * <a href="#bdlde_base64encoder-base-64-decoding"> Base 64 Decoding </a>
30/// * <a href="#bdlde_base64encoder-usage"> Usage </a>
31/// * <a href="#bdlde_base64encoder-example-1-basic-usage"> Example 1: Basic Usage </a>
32///
33/// # Purpose {#bdlde_base64encoder-purpose}
34/// Provide automata for converting to and from Base64 encodings.
35///
36/// # Classes {#bdlde_base64encoder-classes}
37///
38/// - bdlde::Base64Encoder: automata performing Base64 encoding operations
39///
40/// @see bdlde_base64decoder
41///
42/// # Description {#bdlde_base64encoder-description}
43/// This component provides a `class`, `bdlde::Base64Encoder`,
44/// which provides a pair of template functions (each parameterized separately
45/// on both input and output iterators) that can be used respectively to encode
46/// and to decode byte sequences of arbitrary length into and from the printable
47/// Base64 representation described in Section 6.8 "Base64 Content Transfer
48/// Encoding" of RFC 2045, "Multipurpose Internet Mail Extensions (MIME) Part
49/// One: Format of Internet Message Bodies."
50///
51/// The `bdlde::Base64Encoder` and `bdlde::Base64Decoder` support the standard
52/// "base64" encoding (described in https://tools.ietf.org/html/rfc4648) as well
53/// as the "Base 64 Encoding with URL and Filename Safe Alphabet", or
54/// "base64url", encoding. The "base64url" encoding is very similar to "base64"
55/// but substitutes a couple characters in the encoded alphabet to avoid
56/// characters that conflict with special characters in URL syntax or filename
57/// descriptions (replacing `+` for `-`. and `/` for `_`). See
58/// {Base 64 Encoding with URL and Filename Safe Alphabet} for more information.
59///
60/// Each instance of either the encoder or decoder retains the state of the
61/// conversion from one supplied input to the next, enabling the processing of
62/// segmented input -- i.e., processing resumes where it left off with the next
63/// invocation on new input. Instance methods are provided for both the encoder
64/// and decoder to (1) assert the end of input, (2) determine whether the input
65/// so far is currently acceptable, and (3) indicate whether a non-recoverable
66/// error has occurred.
67///
68/// ## Base 64 Encoding {#bdlde_base64encoder-base-64-encoding}
69///
70///
71/// The data stream is processed three bytes at a time from left to right (a
72/// final quantum consisting of one or two bytes, as discussed below, is handled
73/// specially). Each sequence of three 8-bit quantities
74/// @code
75/// 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
76/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
77/// | | | |
78/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
79/// `------v------' `------v------' `------v------'
80/// Byte2 Byte1 Byte0
81/// @endcode
82/// is segmented into four intermediate 6-bit quantities.
83/// @code
84/// 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0
85/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
86/// | | | | |
87/// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
88/// `----v----' `----v----' `----v----' `----v----'
89/// char3 char2 char1 char0
90/// @endcode
91/// Each 6-bit quantity is in turn used as an index into the following character
92/// table to generate an 8-bit character. The four resulting characters hence
93/// form the encoding for the original 3-byte sequence.
94/// @code
95/// ======================================================================
96/// * Table of Numeric BASE-64 Encoding Characters *
97/// ----------------------------------------------------------------------
98/// Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc
99/// --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
100/// 0 'A' 8 'I' 16 'Q' 24 'Y' 32 'g' 40 'o' 48 'w' 56 '4'
101/// 1 'B' 9 'J' 17 'R' 25 'Z' 33 'h' 41 'p' 49 'x' 57 '5'
102/// 2 'C' 10 'K' 18 'S' 26 'a' 34 'i' 42 'q' 50 'y' 58 '6'
103/// 3 'D' 11 'L' 19 'T' 27 'b' 35 'j' 43 'r' 51 'z' 59 '7'
104/// 4 'E' 12 'M' 20 'U' 28 'c' 36 'k' 44 's' 52 '0' 60 '8'
105/// 5 'F' 13 'N' 21 'V' 29 'd' 37 'l' 45 't' 53 '1' 61 '9'
106/// 6 'G' 14 'O' 21 'W' 30 'e' 38 'm' 46 'u' 54 '2' 62 '+'
107/// 7 'H' 15 'P' 22 'X' 31 'f' 39 'n' 47 'v' 55 '3' 63 '/'
108/// ======================================================================
109/// @endcode
110/// This component also supports a slightly different alphabet, "base64url",
111/// that is more appropriate if the encoded representation would be used in a
112/// file name or URL (see
113/// {Base 64 Encoding with URL and Filename Safe Alphabet}).
114///
115/// The 3-byte grouping of the input is only a design of convenience and not a
116/// requirement. When the number of bytes in the input stream is not divisible
117/// by 3, sufficient 0 bits are padded on the right to achieve an integral
118/// number of 6-bit character indices. Then one of two special cases will apply
119/// for the final processing step:
120///
121/// I) There is a single byte of data, in which case there will be two Base64
122/// encoding characters (the second of which will be one of [AQgw]) followed by
123/// two equal (`=`) signs.
124///
125/// II) There are exactly two bytes of data, in which case there will be three
126/// Base64 encoding characters (the third of which will be one of
127/// [AEIMQUYcgkosw048] followed by a single equal (`=`) sign.
128///
129/// The MIME standard requires that the maximum line length of emitted text not
130/// exceed 76 characters exclusive of CRLF. The caller may override this
131/// default if desired.
132///
133/// Input values of increasing length along with their corresponding Base64
134/// encodings are illustrated below:
135/// @code
136/// Data: /* nothing */
137/// Encoding: /* nothing */
138///
139/// Data: 0x01
140/// Encoding: AQ==
141///
142/// Data: 0x01 0x02
143/// Encoding: AQI=
144///
145/// Data: 0x01 0x02 0x03
146/// Encoding: AQID
147///
148/// Data: 0x01 0x02 0x03 0x04
149/// Encoding: AQIDBA==
150/// @endcode
151/// In order for a Base64 encoding to be valid, the input data must be either of
152/// length a multiple of three (constituting maximal input), or have been
153/// terminated explicitly by the `endConvert` method (initiating bit padding
154/// when necessary).
155///
156/// ## Base 64 Encoding with URL and Filename Safe Alphabet {#bdlde_base64encoder-base-64-encoding-with-url-and-filename-safe-alphabet}
157///
158///
159/// The encoder and decoder in this component also support the "base64url"
160/// encoding, which is the same as standard "base64" but substitutes (a couple)
161/// characters in the alphabet that are treated as special characters when used
162/// in a URL or in a file system. The following table is technically identical
163/// to the table presented in {Base 64 Encoding}, except for the 62:nd and 63:rd
164/// alphabet character, that indicates `-` and `_` respectively.
165/// @code
166/// ======================================================================
167/// * The "URL and Filename Safe" BASE-64 Alphabet *
168/// ----------------------------------------------------------------------
169/// Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc Val Enc
170/// --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
171/// 0 'A' 8 'I' 16 'Q' 24 'Y' 32 'g' 40 'o' 48 'w' 56 '4'
172/// 1 'B' 9 'J' 17 'R' 25 'Z' 33 'h' 41 'p' 49 'x' 57 '5'
173/// 2 'C' 10 'K' 18 'S' 26 'a' 34 'i' 42 'q' 50 'y' 58 '6'
174/// 3 'D' 11 'L' 19 'T' 27 'b' 35 'j' 43 'r' 51 'z' 59 '7'
175/// 4 'E' 12 'M' 20 'U' 28 'c' 36 'k' 44 's' 52 '0' 60 '8'
176/// 5 'F' 13 'N' 21 'V' 29 'd' 37 'l' 45 't' 53 '1' 61 '9'
177/// 6 'G' 14 'O' 22 'W' 30 'e' 38 'm' 46 'u' 54 '2' 62 '-'
178/// 7 'H' 15 'P' 23 'X' 31 'f' 39 'n' 47 'v' 55 '3' 63 '_'
179/// ======================================================================
180/// @endcode
181/// ## Base 64 Decoding {#bdlde_base64encoder-base-64-decoding}
182///
183///
184/// The degree to which decoding detects errors can significantly affect
185/// performance. The standard permits all non-Base64 characters to be treated
186/// as whitespace. One variant mode of this decoder does just that; the other
187/// reports an error if a bad (i.e., non-whitespace) character is detected. The
188/// mode of the instance is configurable. The standard imposes a maximum of 76
189/// characters exclusive of CRLF; however, the decoder implemented in this
190/// component will handle lines of arbitrary length.
191///
192/// The following kinds of errors can occur during decoding and are reported
193/// with the following priority:
194/// @code
195/// BAD DATA: A character (other than whitespace) that is not a member of the
196/// Base64 character set (including '='). Note that this error
197/// is detected only if the 'decoder' is explicitly configured (at
198/// construction) to do so.
199///
200/// BAD FORMAT: An '=' character precedes a valid numeric Base64 character,
201/// more than two '=' characters appear (possibly separated by
202/// non-Base64 characters), a numeric Base64 character other than
203/// [AEIMQUYcgkosw048] precedes a single terminal '=' character,
204/// or a character other than [AQgw] precedes a terminal pair of
205/// consecutive '=' characters.
206/// @endcode
207/// The `isError` method is used to detect such anomalies, and the `numIn`
208/// output parameter (indicating the number of input characters consumed) or
209/// possibly the iterator itself (for iterators with reference-semantics)
210/// identifies the offending character.
211///
212/// Note that the existence of an `=` can be used to reliably indicate the end
213/// of the valid data, but no such assurance is possible when the length (in
214/// bytes) of the initial input data sequence before encoding was evenly
215/// divisible by 3.
216///
217/// ## Usage {#bdlde_base64encoder-usage}
218///
219///
220/// This section illustrates intended use of this component.
221///
222/// ### Example 1: Basic Usage {#bdlde_base64encoder-example-1-basic-usage}
223///
224///
225/// The following example shows how to use a `bdlde::Base64Encoder` object to
226/// implement a function, `streamEncoder`, that reads text from a
227/// `bsl::istream`, encodes that text in base 64 representation, and writes the
228/// encoded text to a `bsl::ostream`. `streamEncoder` returns 0 on success
229/// and a negative value if the input data could not be successfully encoded or
230/// if there is an I/O error.
231/// @code
232/// streamencoder.h -*-C++-*-
233///
234/// /// Read the entire contents of the specified input stream 'is', convert
235/// /// the input plain text to base 64 encoding, and write the encoded text
236/// /// to the specified output stream 'os'. Return 0 on success, and a
237/// /// negative value otherwise.
238/// int streamEncoder(bsl::ostream& os, bsl::istream& is);
239/// @endcode
240/// We will use fixed-sized input and output buffers in the implementation, but,
241/// because of the flexibility of `bsl::istream` and the output-buffer
242/// monitoring functionality of `bdlde::Base64Encoder`, the fixed buffer sizes
243/// do *not* limit the quantity of data that can be read, encoded, or written to
244/// the output stream. The implementation file is as follows.
245/// @code
246/// streamencoder.cpp -*-C++-*-
247///
248/// #include <streamencoder.h>
249///
250/// #include <bdlde_base64encoder.h>
251///
252/// namespace BloombergLP {
253///
254/// int streamEncoder(bsl::ostream& os, bsl::istream& is)
255/// {
256/// enum {
257/// SUCCESS = 0,
258/// ENCODE_ERROR = -1,
259/// IO_ERROR = -2
260/// };
261/// @endcode
262/// We declare a `bdlde::Base64Encoder` object `converter`, which will encode
263/// the input data. Note that various internal buffers and cursors are used as
264/// needed without further comment. We read as much data as is available from
265/// the user-supplied input stream `is` *or* as much as will fit in
266/// `inputBuffer` before beginning conversion.
267/// @code
268/// bdlde::Base64Encoder converter;
269///
270/// const int INBUFFER_SIZE = 1 << 10;
271/// const int OUTBUFFER_SIZE = 1 << 10;
272///
273/// char inputBuffer[INBUFFER_SIZE];
274/// char outputBuffer[OUTBUFFER_SIZE];
275///
276/// char *output = outputBuffer;
277/// char *outputEnd = outputBuffer + sizeof outputBuffer;
278///
279/// while (is.good()) { // input stream not exhausted
280///
281/// is.read(inputBuffer, sizeof inputBuffer);
282/// @endcode
283/// With `inputBuffer` now populated, we'll use `converter` in an inner `while`
284/// loop to encode the input and write the encoded data to `outputBuffer` (via
285/// the `output` cursor'). Note that if the call to `converter.convert` fails,
286/// our function terminates with a negative status.
287/// @code
288/// const char *input = inputBuffer;
289/// const char *inputEnd = input + is.gcount();
290///
291/// while (input < inputEnd) { // input encoding not complete
292///
293/// int numOut;
294/// int numIn;
295///
296/// int status = converter.convert(output, &numOut, &numIn,
297/// input, inputEnd,
298/// outputEnd - output);
299/// if (status < 0) {
300/// return ENCODE_ERROR; // RETURN
301/// }
302/// @endcode
303/// If the call to `converter.convert` returns successfully, we'll see if the
304/// output buffer is full, and if so, write its contents to the user-supplied
305/// output stream `os`. Note how we use the values of `numOut` and `numIn`
306/// generated by `convert` to update the relevant cursors.
307/// @code
308/// output += numOut;
309/// input += numIn;
310///
311/// if (output == outputEnd) { // output buffer full; write data
312/// os.write (outputBuffer, sizeof outputBuffer);
313/// if (os.fail()) {
314/// return IO_ERROR; // RETURN
315/// }
316/// output = outputBuffer;
317/// }
318/// }
319/// }
320/// @endcode
321/// We have now exited both the input and the "encode" loops. `converter` may
322/// still hold encoded output characters, and so we call `converter.endConvert`
323/// to emit any retained output. To guarantee correct behavior, we call this
324/// method in an infinite loop, because it is possible that the retained output
325/// can fill the output buffer. In that case, we solve the problem by writing
326/// the contents of the output buffer to `os` within the loop. The most likely
327/// case, however, is that `endConvert` will return 0, in which case we exit the
328/// loop and write any data remaining in `outputBuffer` to `os`. As above, if
329/// `endConvert` fails, we exit the function with a negative return status.
330/// @code
331/// while (1) {
332///
333/// int numOut;
334///
335/// int more = converter.endConvert(output, &numOut, outputEnd-output);
336/// if (more < 0) {
337/// return ENCODE_ERROR; // RETURN
338/// }
339///
340/// output += numOut;
341///
342/// if (!more) { // no more output
343/// break;
344/// }
345///
346/// assert (output == outputEnd); // output buffer is full
347///
348/// os.write (outputBuffer, sizeof outputBuffer); // write buffer
349/// if (os.fail()) {
350/// return IO_ERROR; // RETURN
351/// }
352/// output = outputBuffer;
353/// }
354///
355/// if (output > outputBuffer) { // still data in output buffer; write it
356/// // all
357/// os.write(outputBuffer, output - outputBuffer);
358/// }
359///
360/// return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
361/// }
362///
363/// } // Close namespace BloombergLP
364/// @endcode
365/// For ease of reading, we repeat the full content of the `streamencoder.cpp`
366/// file without interruption.
367/// @code
368/// streamencoder.cpp -*-C++-*-
369///
370/// #include <streamencoder.h>
371///
372/// #include <bdlde_base64encoder.h>
373///
374/// namespace BloombergLP {
375///
376/// int streamEncoder(bsl::ostream& os, bsl::istream& is)
377/// {
378/// enum {
379/// SUCCESS = 0,
380/// ENCODE_ERROR = -1,
381/// IO_ERROR = -2
382/// };
383///
384/// bdlde::Base64Encoder converter;
385///
386/// const int INBUFFER_SIZE = 1 << 10;
387/// const int OUTBUFFER_SIZE = 1 << 10;
388///
389/// char inputBuffer[INBUFFER_SIZE];
390/// char outputBuffer[OUTBUFFER_SIZE];
391///
392/// char *output = outputBuffer;
393/// char *outputEnd = outputBuffer + sizeof outputBuffer;
394///
395/// while (is.good()) { // input stream not exhausted
396///
397/// is.read(inputBuffer, sizeof inputBuffer);
398///
399/// const char *input = inputBuffer;
400/// const char *inputEnd = input + is.gcount();
401///
402/// while (input < inputEnd) { // input encoding not complete
403///
404/// int numOut;
405/// int numIn;
406///
407/// int status = converter.convert(output, &numOut, &numIn,
408/// input, inputEnd,
409/// outputEnd - output);
410/// if (status < 0) {
411/// return ENCODE_ERROR; // RETURN
412/// }
413///
414/// output += numOut;
415/// input += numIn;
416///
417/// if (output == outputEnd) { // output buffer full; write data
418/// os.write(outputBuffer, sizeof outputBuffer);
419/// if (os.fail()) {
420/// return IO_ERROR; // RETURN
421/// }
422/// output = outputBuffer;
423/// }
424/// }
425/// }
426///
427/// while (1) {
428///
429/// int numOut;
430///
431/// int more = converter.endConvert(output, &numOut, outputEnd-output);
432/// if (more < 0) {
433/// return ENCODE_ERROR; // RETURN
434/// }
435///
436/// output += numOut;
437///
438/// if (!more) { // no more output
439/// break;
440/// }
441///
442/// assert (output == outputEnd); // output buffer is full
443///
444/// os.write (outputBuffer, sizeof outputBuffer); // write buffer
445/// if (os.fail()) {
446/// return IO_ERROR; // RETURN
447/// }
448/// output = outputBuffer;
449/// }
450///
451/// if (output > outputBuffer) {
452/// os.write (outputBuffer, output - outputBuffer);
453/// }
454///
455/// return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
456/// }
457///
458/// } // close namespace BloombergLP
459/// @endcode
460/// @}
461/** @} */
462/** @} */
463
464/** @addtogroup bdl
465 * @{
466 */
467/** @addtogroup bdlde
468 * @{
469 */
470/** @addtogroup bdlde_base64encoder
471 * @{
472 */
473
474#include <bdlscm_version.h>
475
476#include <bdlde_base64alphabet.h>
478
479#include <bsls_assert.h>
481#include <bsls_review.h>
482
483#include <bsl_cstddef.h>
484#include <bsl_limits.h>
485
486
487namespace bdlde {
488
489 // ===================
490 // class Base64Encoder
491 // ===================
492
493/// This class implements a mechanism capable of converting data of
494/// arbitrary length to its corresponding Base64 representation.
495///
496/// See @ref bdlde_base64encoder
498
499 // TYPES
501
502 public:
503 // PUBLIC TYPES
505
506 // PUBLIC CLASS DATA
509
510 private:
511 // PRIVATE TYPES
512 enum State {
513 // Symbolic state values.
514
515 e_ERROR_STATE = -1, // Input is irreparably invalid.
516 e_INITIAL_STATE = 0, // Ready to accept input.
517 e_DONE_STATE = 1 // Any additional input is an error.
518 };
519
520 // INSTANCE DATA
521 const int d_maxLineLength; // maximum length of output line
522 int d_lineLength; // current length of output line
523 int d_outputLength; // total number of output characters
524 unsigned d_stack; // storage of non-emitted input
525 int d_bitsInStack; // number of bits in 'd_stack'
526 const char * const d_alphabet_p; // alphabet
527 State d_state; // state as per above enum 'State'
528 const Alphabet d_alphabet; // alphabet
529 const bool d_isPadded; // is output tail-padded with '='
530
531 private:
532 // NOT IMPLEMENTED
534 Base64Encoder& operator=(const Base64Encoder&);
535
536 // PRIVATE CLASS METHODS
537
538 /// Return the expected length of output, not including CRLF's, given
539 /// the specified `options` and `inputLength`. The behavior is
540 /// undefined if `inputLength` is large enough to overflow the result.
541 static
542 bsl::size_t lengthWithoutCrlfs(const EncoderOptions& options,
543 bsl::size_t inputLength);
544
545 // PRIVATE MANIPULATORS
546
547 /// Append a soft new line to the specified `out` if necessary and then
548 /// append the specified `character` without the total number of emitted
549 /// characters equaling the specified `maxLength`. The behavior is
550 /// undefined unless the total number of emitted characters does not
551 /// equal `maxLength` at entry to this method.
552 template <class OUTPUT_ITERATOR>
553 void append(OUTPUT_ITERATOR *out, char character, int maxLength);
554
555 /// Append a soft new line to the specified `out` if necessary and then
556 /// emit an internally buffered character without the total number of
557 /// emitted characters equaling the specified `maxLength`. The
558 /// behavior is undefined unless the total number of emitted characters
559 /// does not equal `maxLength` at entry to this method and the internal
560 /// buffer contains at least one character of output.
561 template <class OUTPUT_ITERATOR>
562 void encode(OUTPUT_ITERATOR *out, int maxLength);
563
564 /// Set the state to the specified `newState`.
565 void setState(State newState);
566
567 // PRIVATE ACCESSORS
568
569 /// Return `true` if an output sequence of the specified `numBytes` from
570 /// this encoder would be an acceptable input to a `Base64Decoder`
571 /// expecting padded input, and `false` otherwise. The behavior is
572 /// undefined unless padding is enabled.
573 bool isResidualOutput(int numBytes) const;
574
575 /// Return the state of this encoder.
576 State state() const;
577
578 public:
579 // CLASS METHODS
580
581 /// Return the exact number of encoded bytes that would result from an
582 /// input byte sequence of the specified `inputLength` provided to the
583 /// `convert` method of an encoder configured with the specified
584 /// `options`. The behavior is undefined if `inputLength` is large
585 /// enough for the result to overflow a `size_t`.
586 static bsl::size_t encodedLength(const EncoderOptions& options,
587 bsl::size_t inputLength);
588
590 "encodedLength",
591 "use overload with 'options'")
592 /// Return the exact number of encoded bytes that would result from an
593 /// input byte sequence of the specified `inputLength` provided to the
594 /// `convert` method of an encoder with the maximum allowable
595 /// line-length of the output being 76 characters (as recommended by the
596 /// MIME standard). The behavior is undefined unless
597 /// `0 <= inputLength`.
598 ///
599 static
600 int encodedLength(int inputLength);
601 /// @deprecated use the overload with `options` instead.
602
605 "use overload with 'options'")
606 /// Return the exact number of encoded bytes that would result from an
607 /// input byte sequence of the specified `inputLength` provided to the
608 /// `convert` method of an encoder configured with the specified
609 /// `maxLineLength`. The behavior is undefined unless
610 /// `0 <= inputLength` and `0 <= maxLineLength`. Note that if
611 /// `maxLineLength` is 0, no CRLF characters will appear in the output.
612 /// Note also that the number of encoded bytes need not be the number of
613 /// *output* bytes.
614 ///
615 /// @deprecated use the overload with `options` instead.
616 static
617 int encodedLength(int inputLength, int maxLineLength);
618
619 /// Return the exact number of encoded lines that would result from an
620 /// input byte sequence of the specified `inputLength` provided to the
621 /// `convert` method of an encoder configured with the specified
622 /// `options`. The behavior is undefined if `inputLength` is large
623 /// enough to overflow the result.
624 static bsl::size_t encodedLines(const EncoderOptions& options,
625 bsl::size_t inputLength);
626
628 "encodedLines",
629 "use overload with 'options'")
630 /// Return the exact number of encoded lines that would result from an
631 /// input byte sequence of the specified `inputLength` provided to the
632 /// `convert` method of an encoder with the maximum allowable
633 /// line-length of the output being 76 characters (as recommended by the
634 /// MIME standard). The behavior is undefined unless
635 /// `0 <= inputLength`. Note also that the number of encoded bytes need
636 /// not be the number of *output* bytes.
637 ///
638 static
639 int encodedLines(int inputLength);
640 /// @deprecated use the overload with `options` instead.
641
643 "encodedLines",
644 "use overload with 'options'")
645 /// Return the exact number of encoded lines that would result from an
646 /// input byte sequence of the specified `inputLength` provided to the
647 /// `convert` method of an encoder configured with the specified
648 /// `maxLineLength`. The behavior is undefined unless
649 /// `0 <= inputLength` and `0 <= maxLineLength`. Note that if
650 /// `maxLineLength` is 0, no CRLF characters will appear in the output.
651 /// Note also that the number of encoded bytes need not be the number of
652 /// *output* bytes.
653 ///
654 /// @deprecated use the overload with `options` instead.
655 static
656 int encodedLines(int inputLength, int maxLineLength);
657
658 // CREATORS
659
660 explicit
662
665 "use overload with 'options'")
666 /// Create a Base64 encoder in the initial state, defaulting the maximum
667 /// allowable line-length of the output to 76 (as recommended by the
668 /// MIME standard). Optionally specify an alphabet used to encode
669 /// characters. If `alphabet` is not specified, then the basic
670 /// alphabet, "base64", is used. Note that the `convert` and
671 /// `endConvert` methods of this encoder will insert a CRLF to prevent
672 /// each line of the output from exceeding 76 characters.
673 ///
674 /// @deprecated Create and pass an `options` object instead.
675 explicit
677 /// Create a Base64 encoder in the initial state, defaulting the state
678 /// of the maximum allowable line-length, the padding, and the alphabet
679 /// according to the values of the specified `options`.
680
683 "use overload with 'options'")
684 /// Create a Base64 encoder in the initial state, setting the maximum
685 /// allowable line-length of the output to the specified
686 /// `maxLineLength`. Specifying 0 for `maxLineLength` will result in a
687 /// single output line (i.e., one with no CRLF in it). Optionally
688 /// specify an alphabet used to encode characters. If `alphabet` is not
689 /// specified, then the basic alphabet, "base64", is used.The behavior
690 /// is undefined unless `0 <= maxLineLength`. Note that when
691 /// `maxLineLength` is positive, the `convert` and `endConvert` methods
692 /// of this encoder will insert a CRLF to prevent each line of the
693 /// output from exceeding `maxLineLength`.
694 ///
695 /// @deprecated Create and pass an `options` object instead.
696 explicit
698
699 /// Destroy this object.
701
702 // MANIPULATORS
703
704 /// Encode the sequence of input characters starting at the specified
705 /// `begin` position up to, but not including, the specified `end`
706 /// position, writing any resulting output characters to the specified
707 /// `out` buffer. Optionally specify the `maxNumOut` limit on the
708 /// number of bytes to output; if `maxNumOut` is negative, no limit is
709 /// imposed. If the `maxNumOut` limit is reached, no further input will
710 /// be consumed. Load into the (optionally) specified `numOut` and
711 /// `numIn` the number of output bytes produced and input bytes
712 /// consumed, respectively. Return a non-negative value on success and
713 /// a negative value otherwise. A positive return status indicates the
714 /// number of valid processed output bytes retained by this encoder and
715 /// not written to `out` because `maxNumOut` has been reached; these
716 /// bytes are available for output if this method is called with
717 /// appropriate input. Note that calling this method after `endConvert`
718 /// has been invoked without an intervening `resetState` call will place
719 /// this instance in an error state, and return an error status. Note
720 /// also that it is recommended that after all calls to `convert` are
721 /// finished, the `endConvert` method be called to complete the encoding
722 /// of any unprocessed input characters that do not complete a 3-byte
723 /// sequence.
724 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
725 int convert(OUTPUT_ITERATOR out,
726 INPUT_ITERATOR begin,
727 INPUT_ITERATOR end);
728 template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
729 int convert(OUTPUT_ITERATOR out,
730 int *numOut,
731 int *numIn,
732 INPUT_ITERATOR begin,
733 INPUT_ITERATOR end,
734 int maxNumOut = -1);
735
736 /// Terminate encoding for this encoder; write any retained output
737 /// (e.g., from a previous call to `convert`) to the specified `out`
738 /// buffer. Optionally specify the `maxNumOut` limit on the number of
739 /// bytes to output; if `maxNumOut` is negative, no limit is imposed.
740 /// Load into the (optionally) specified `numOut` the number of output
741 /// bytes produced. Return 0 if output was successfully completed and a
742 /// non-zero value otherwise. Any retained bytes are available on a
743 /// subsequent call to `endConvert`. Once this method is called, no
744 /// additional input may be supplied without an intervening call to
745 /// `resetState`; once this method returns a zero status, a subsequent
746 /// call will place this encoder in the error state, and return an error
747 /// status.
748 template <class OUTPUT_ITERATOR>
749 int endConvert(OUTPUT_ITERATOR out);
750 template <class OUTPUT_ITERATOR>
751 int endConvert(OUTPUT_ITERATOR out, int *numOut, int maxNumOut = -1);
752
753 /// Reset this instance to its initial state (i.e., as if no input had
754 /// been consumed).
755 void resetState();
756
757 // ACCESSORS
758
759 /// Return the alphabet supplied at construction of this object.
761
762 /// Return `true` if the input read so far is considered syntactically
763 /// complete and all resulting output has been emitted to `out`, and
764 /// `false` otherwise. Note that `endConvert` must be called if the
765 /// total length of all data processed is not divisible by 3.
766 bool isAcceptable() const;
767
768 /// Return `true` if the current input is acceptable and any additional
769 /// input (including `endConvert`) would be an error, and `false`
770 /// otherwise. Note that if this decoder `isDone` then all resulting
771 /// output has been emitted to `out`.
772 bool isDone() const;
773
774 /// Return `true` if there is no possibility of achieving an
775 /// "acceptable" result, and `false` otherwise.
776 bool isError() const;
777
778 /// Return `true` if this instance is in the initial state (i.e., as if
779 /// no input had been consumed), and `false` otherwise.
780 bool isInitialState() const;
781
782 /// Return true if padding by `=` characters was specified at
783 /// construction of this object.
784 bool isPadded() const;
785
786 /// Return the currently installed value for the maximum line length.
787 int maxLineLength() const;
788
789 /// Return an `options` object reflecting the options this object was
790 /// configured with.
791 EncoderOptions options() const;
792
793 /// Return the total length of the output emitted thus far (including
794 /// soft line breaks where appropriate).
795 int outputLength() const;
796};
797
798// ============================================================================
799// INLINE DEFINITIONS
800// ============================================================================
801
802 // -------------------
803 // class Base64Encoder
804 // -------------------
805
806// PRIVATE CLASS METHODS
807inline
808bsl::size_t Base64Encoder::lengthWithoutCrlfs(
809 const EncoderOptions& options,
810 bsl::size_t inputLength)
811{
812 static const bsl::size_t maxSize_t = bsl::numeric_limits<
813 bsl::size_t>::max();
814 (void) maxSize_t;
815
816 if (0 == inputLength) {
817 return 0; // RETURN
818 }
819
820 const bsl::size_t numTripletsRoundedDown = (inputLength + 2) / 3 - 1;
821 const bsl::size_t numResidual = inputLength - numTripletsRoundedDown * 3;
822
823 // 'numResidual' is in the range '[ 1 .. 3 ]'. If 'numResidual' is '1'
824 // byte, that takes 2 bytes to encode, 2 bytes takes 3 bytes to encode, 3
825 // bytes takes 4 bytes to encode.
826
827 const bsl::size_t pad = options.isPadded() ? 4 : numResidual + 1;
828
829 BSLS_ASSERT(numTripletsRoundedDown <= (maxSize_t - pad) / 4);
830
831 return numTripletsRoundedDown * 4 + pad;
832}
833
834// PRIVATE MANIPULATORS
835template <class OUTPUT_ITERATOR>
836void Base64Encoder::append(OUTPUT_ITERATOR *out,
837 char value,
838 int maxLength)
839{
840 BSLS_ASSERT(out);
841
842 if (d_maxLineLength && d_lineLength >= d_maxLineLength) {
843 if (d_lineLength == d_maxLineLength) {
844 **out = '\r';
845 ++*out;
846 ++d_outputLength;
847 ++d_lineLength;
848 if (d_outputLength == maxLength) {
849 return; // RETURN
850 }
851 }
852 **out = '\n';
853 ++*out;
854 ++d_outputLength;
855 d_lineLength = 0;
856 if (d_outputLength == maxLength) {
857 return; // RETURN
858 }
859 }
860 **out = value;
861 ++*out;
862 ++d_outputLength;
863 ++d_lineLength;
864}
865
866template <class OUTPUT_ITERATOR>
867void Base64Encoder::encode(OUTPUT_ITERATOR *out, int maxLength)
868{
869 BSLS_ASSERT(out);
870
871 if (d_maxLineLength && d_lineLength >= d_maxLineLength) {
872 if (d_lineLength == d_maxLineLength) {
873 **out = '\r';
874 ++*out;
875 ++d_outputLength;
876 ++d_lineLength;
877 if (d_outputLength == maxLength) {
878 return; // RETURN
879 }
880 }
881 **out = '\n';
882 ++*out;
883 ++d_outputLength;
884 d_lineLength = 0;
885 if (d_outputLength == maxLength) {
886 return; // RETURN
887 }
888 }
889 d_bitsInStack -= 6;
890 **out = d_alphabet_p[(d_stack >> d_bitsInStack) & 0x3f];
891 ++*out;
892 ++d_outputLength;
893 ++d_lineLength;
894}
895
896inline
897void Base64Encoder::setState(State newState)
898{
899 d_state = newState;
900}
901
902// PRIVATE ACCESSORS
903inline
904bool Base64Encoder::isResidualOutput(int numBytes) const
905{
906 BSLS_ASSERT(0 <= numBytes);
907 BSLS_ASSERT(d_isPadded);
908
909 if (d_maxLineLength) {
910 const int lineSize = d_maxLineLength + 2;
911 const int linesSoFar = numBytes / lineSize;
912 const int bytesSinceLastCrlf = numBytes - linesSoFar * lineSize;
913 const int partialCrlf = d_maxLineLength < bytesSinceLastCrlf;
914 const int nonCrlfBytes = linesSoFar * d_maxLineLength +
915 bytesSinceLastCrlf - partialCrlf;
916
917 return 0 != nonCrlfBytes % 4; // RETURN
918 }
919 else {
920 return 0 != numBytes % 4; // RETURN
921 }
922}
923
924inline
925Base64Encoder::State Base64Encoder::state() const
926{
927 return d_state;
928}
929
930// CLASS METHODS
931inline
933 bsl::size_t inputLength)
934{
935 static const bsl::size_t maxSize_t = bsl::numeric_limits<
936 bsl::size_t>::max();
937 (void) maxSize_t;
938
939 if (0 == inputLength) {
940 return 0; // RETURN
941 }
942
943 const bsl::size_t length = lengthWithoutCrlfs(options, inputLength);
944 const bsl::size_t numCrlfs = 0 == options.maxLineLength()
945 ? 0
946 : (length - 1) / options.maxLineLength();
947
948
949 BSLS_ASSERT(numCrlfs <= maxSize_t / 2);
950 BSLS_ASSERT(length <= maxSize_t - numCrlfs * 2);
951
952 return length + 2 * numCrlfs;
953}
954
955inline
956int Base64Encoder::encodedLength(int inputLength, int maxLineLength)
957{
958 BSLS_ASSERT(0 <= inputLength);
960
961 return static_cast<int>(encodedLength(EncoderOptions::custom(maxLineLength,
962 e_BASIC,
963 true),
964 inputLength));
965}
966
967inline
968int Base64Encoder::encodedLength(int inputLength)
969{
970 BSLS_ASSERT(0 <= inputLength);
971
972 return static_cast<int>(encodedLength(EncoderOptions::mime(),
973 inputLength));
974}
975
976inline
978 bsl::size_t inputLength)
979{
980 return 1 +
981 (0 == options.maxLineLength()
982 ? 0
983 : lengthWithoutCrlfs(options, inputLength) / options.maxLineLength());
984}
985
986inline
987int Base64Encoder::encodedLines(int inputLength, int maxLineLength)
988{
989 BSLS_ASSERT(0 <= inputLength);
991
992 return static_cast<int>(encodedLines(EncoderOptions::custom(maxLineLength,
993 e_BASIC,
994 true),
995 inputLength));
996}
997
998inline
999int Base64Encoder::encodedLines(int inputLength)
1000{
1001 BSLS_ASSERT(0 <= inputLength);
1002
1003 return static_cast<int>(encodedLines(EncoderOptions::mime(), inputLength));
1004}
1005
1006// MANIPULATORS
1007template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
1008int Base64Encoder::convert(OUTPUT_ITERATOR out,
1009 INPUT_ITERATOR begin,
1010 INPUT_ITERATOR end)
1011{
1012 int dummyNumOut;
1013 int dummyNumIn;
1014
1015 return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
1016}
1017
1018template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
1019int Base64Encoder::convert(OUTPUT_ITERATOR out,
1020 int *numOut,
1021 int *numIn,
1022 INPUT_ITERATOR begin,
1023 INPUT_ITERATOR end,
1024 int maxNumOut)
1025{
1026 int dummyNumOut;
1027 if (!numOut) {
1028 numOut = &dummyNumOut;
1029 }
1030 int dummyNumIn;
1031 if (!numIn) {
1032 numIn = &dummyNumIn;
1033 }
1034
1035 if (e_ERROR_STATE == state() || e_DONE_STATE == state()) {
1036 setState(e_ERROR_STATE);
1037 *numOut = 0;
1038 *numIn = 0;
1039 return -1; // RETURN
1040 }
1041
1042 const int initialLength = d_outputLength;
1043 const int maxLength = d_outputLength + maxNumOut;
1044
1045 // Emit as many output bytes as possible.
1046
1047 while (6 <= d_bitsInStack && d_outputLength != maxLength) {
1048 encode(&out, maxLength);
1049 }
1050
1051 // Consume as many input bytes as possible.
1052
1053 int tmpNumIn = 0;
1054
1055 while (4 >= d_bitsInStack && begin != end) {
1056 const unsigned char byte = static_cast<unsigned char>(*begin);
1057
1058 ++begin;
1059 ++tmpNumIn;
1060
1061 d_stack = (d_stack << 8) | byte;
1062 d_bitsInStack += 8;
1063
1064 if (d_outputLength != maxLength) {
1065 encode(&out, maxLength);
1066 if (6 <= d_bitsInStack && d_outputLength != maxLength) {
1067 encode(&out, maxLength);
1068 }
1069 }
1070 }
1071
1072 *numIn = tmpNumIn;
1073 *numOut = d_outputLength - initialLength;
1074
1075 return 0;
1076}
1077
1078template <class OUTPUT_ITERATOR>
1079int Base64Encoder::endConvert(OUTPUT_ITERATOR out)
1080{
1081 int dummyNumOut;
1082
1083 return endConvert(out, &dummyNumOut, -1);
1084}
1085
1086template <class OUTPUT_ITERATOR>
1087int Base64Encoder::endConvert(OUTPUT_ITERATOR out,
1088 int *numOut,
1089 int maxNumOut)
1090{
1091 BSLS_ASSERT(numOut);
1092
1093 if (e_ERROR_STATE == state() || isDone()) {
1094 setState(e_ERROR_STATE);
1095 *numOut = 0;
1096 return -1; // RETURN
1097 }
1098
1099 const int initialLength = d_outputLength;
1100 const int maxLength = d_outputLength + maxNumOut;
1101
1102 // Handle trailing bits.
1103
1104 const int residualBits = d_bitsInStack % 6;
1105 if (residualBits) {
1106 const int shift = 6 - residualBits;
1107 d_stack = d_stack << shift;
1108 d_bitsInStack += shift;
1109 }
1110
1111 BSLS_ASSERT(0 == d_bitsInStack % 6);
1112
1113 // Emit as many output bytes as possible.
1114
1115 while (6 <= d_bitsInStack && d_outputLength != maxLength) {
1116 encode(&out, maxLength);
1117 }
1118
1119 // Append trailing '=' as necessary.
1120
1121 if (0 == d_bitsInStack) {
1122 while (true) {
1123 if (!d_isPadded || !isResidualOutput(d_outputLength)) {
1124 setState(e_DONE_STATE);
1125
1126 break;
1127 }
1128
1129 if (d_outputLength == maxLength) {
1130 break;
1131 }
1132
1133 append(&out, '=', maxLength);
1134 }
1135 }
1136
1137 *numOut = d_outputLength - initialLength;
1138
1139 return !isDone();
1140}
1141
1142inline
1144{
1145 setState(e_INITIAL_STATE);
1146 d_outputLength = 0;
1147 d_lineLength = 0;
1148 d_stack = 0;
1149 d_bitsInStack = 0;
1150}
1151
1152// ACCESSORS
1153inline
1155{
1156 return d_alphabet;
1157}
1158
1159inline
1161{
1162 return e_ERROR_STATE != state();
1163}
1164
1165inline
1167{
1168 return e_DONE_STATE == state()
1169 && !d_bitsInStack
1170 && (!d_isPadded || !isResidualOutput(d_outputLength));
1171}
1172
1173inline
1175{
1176 return e_ERROR_STATE == state();
1177}
1178
1179inline
1181{
1182 return 0 == d_outputLength && e_INITIAL_STATE == state();
1183}
1184
1185inline
1187{
1188 return d_isPadded;
1189}
1190
1191inline
1193{
1194 return d_maxLineLength;
1195}
1196
1197inline
1199{
1201 d_maxLineLength,
1202 alphabet(),
1203 d_isPadded);
1204}
1205
1206inline
1208{
1209 return d_outputLength;
1210}
1211} // close package namespace
1212
1213
1214
1215#endif
1216
1217// ----------------------------------------------------------------------------
1218// Copyright 2015 Bloomberg Finance L.P.
1219//
1220// Licensed under the Apache License, Version 2.0 (the "License");
1221// you may not use this file except in compliance with the License.
1222// You may obtain a copy of the License at
1223//
1224// http://www.apache.org/licenses/LICENSE-2.0
1225//
1226// Unless required by applicable law or agreed to in writing, software
1227// distributed under the License is distributed on an "AS IS" BASIS,
1228// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1229// See the License for the specific language governing permissions and
1230// limitations under the License.
1231// ----------------------------- END-OF-FILE ----------------------------------
1232
1233/** @} */
1234/** @} */
1235/** @} */
Definition bdlde_base64encoderoptions.h:214
static Base64EncoderOptions custom(int maxLineLength, Base64Alphabet::Enum alphabet, bool padded)
Definition bdlde_base64encoderoptions.h:386
bool isPadded() const
Return the value of the isPadded attribute.
Definition bdlde_base64encoderoptions.h:446
int maxLineLength() const
Return the value of the maxLineLength attribute.
Definition bdlde_base64encoderoptions.h:452
static Base64EncoderOptions mime()
Definition bdlde_base64encoderoptions.h:395
Definition bdlde_base64encoder.h:497
void resetState()
Definition bdlde_base64encoder.h:1143
static const Alphabet e_URL
Definition bdlde_base64encoder.h:508
int maxLineLength
Definition bdlde_base64encoder.h:617
static bsl::size_t encodedLength(const EncoderOptions &options, bsl::size_t inputLength)
Definition bdlde_base64encoder.h:932
static bsl::size_t encodedLines(const EncoderOptions &options, bsl::size_t inputLength)
Definition bdlde_base64encoder.h:977
int outputLength() const
Definition bdlde_base64encoder.h:1207
bool isInitialState() const
Definition bdlde_base64encoder.h:1180
static const Alphabet e_BASIC
Definition bdlde_base64encoder.h:507
int convert(OUTPUT_ITERATOR out, INPUT_ITERATOR begin, INPUT_ITERATOR end)
Definition bdlde_base64encoder.h:1008
bool isPadded() const
Definition bdlde_base64encoder.h:1186
int endConvert(OUTPUT_ITERATOR out)
Definition bdlde_base64encoder.h:1079
bool isError() const
Definition bdlde_base64encoder.h:1174
bool isAcceptable() const
Definition bdlde_base64encoder.h:1160
bool isDone() const
Definition bdlde_base64encoder.h:1166
EncoderOptions options() const
Definition bdlde_base64encoder.h:1198
Alphabet alphabet
Definition bdlde_base64encoder.h:697
BSLS_DEPRECATE_FEATURE("bdl", "encodedLength", "use overload with 'options'") static int encodedLength(int inputLength)
Base64Alphabet::Enum Alphabet
Definition bdlde_base64encoder.h:504
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_DEPRECATE_FEATURE(UOR, FEATURE, MESSAGE)
Definition bsls_deprecatefeature.h:319
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlde_base64alphabet.h:118
Definition bdlb_printmethods.h:283
Enum
Definition bdlde_base64alphabet.h:135
@ e_URL
Definition bdlde_base64alphabet.h:137
@ e_BASIC
Definition bdlde_base64alphabet.h:136