// bdlde_base64decoder.h                                              -*-C++-*-

// ----------------------------------------------------------------------------
//                                   NOTICE
//
// This component is not up to date with current BDE coding standards, and
// should not be used as an example for new development.
// ----------------------------------------------------------------------------

#ifndef INCLUDED_BDLDE_BASE64DECODER
#define INCLUDED_BDLDE_BASE64DECODER

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide automata for converting to and from Base64 encodings.
//
//@CLASSES:
//  bdlde::Base64Decoder: automata performing Base64 decoding operations
//
//@SEE_ALSO: 'bdlde_base64encoder'
//
//@DESCRIPTION: This component a 'class', 'bdlde::Base64Decoder', which
// provides a pair of template functions (each parameterized separately on both
// input and output iterators) that can be used respectively to encode and to
// decode byte sequences of arbitrary length into and from the printable Base64
// representation described in Section 6.8 "Base64 Content Transfer Encoding"
// of RFC 2045, "Multipurpose Internet Mail Extensions (MIME) Part One: Format
// of Internet Message Bodies."
//
// The 'bdlde::Base64Encoder' and 'bdlde::Base64Decoder' support the standard
// "base64" encoding (described in https://tools.ietf.org/html/rfc4648) as well
// as the "Base 64 Encoding with URL and Filename Safe Alphabet", or
// "base64url", encoding.  The "base64url" encoding is very similar to "base64"
// but substitutes a couple characters in the encoded alphabet to avoid
// characters that conflict with special characters in URL syntax or filename
// descriptions (replacing '+' for '-'. and '/' for '_').  See
// {Base 64 Encoding with URL and Filename Safe Alphabet} for more information.
//
// Each instance of either the encoder or decoder retains the state of the
// conversion from one supplied input to the next, enabling the processing of
// segmented input -- i.e., processing resumes where it left off with the next
// invocation on new input.  Instance methods are provided for both the
// encoder and decoder to (1) assert the end of input, (2) determine whether
// the input so far is currently acceptable, and (3) indicate whether a
// non-recoverable error has occurred.
//
///Base 64 Encoding
///----------------
// The data stream is processed three bytes at a time from left to right (a
// final quantum consisting of one or two bytes, as discussed below, is handled
// specially).  Each sequence of three 8-bit quantities
//..
//      7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
//     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//     |               |               |               |
//     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//      `------v------' `------v------' `------v------'
//           Byte2           Byte1           Byte0
//..
// is segmented into four intermediate 6-bit quantities.
//..
//      5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0 5 4 3 2 1 0
//     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//     |           |           |           |           |
//     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//      `----v----' `----v----' `----v----' `----v----'
//         char3       char2       char1        char0
//..
// Each 6-bit quantity is in turn used as an index into the following character
// table to generate an 8-bit character.  The four resulting characters hence
// form the encoding for the original 3-byte sequence.
//..
//     ======================================================================
//     *                   The Basic BASE-64 Alphabet                       *
//     ----------------------------------------------------------------------
//     Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc
//     --- ---  --- ---  --- ---  --- ---  --- ---  --- ---  --- ---  --- ---
//       0 'A'    8 'I'   16 'Q'   24 'Y'   32 'g'   40 'o'   48 'w'   56 '4'
//       1 'B'    9 'J'   17 'R'   25 'Z'   33 'h'   41 'p'   49 'x'   57 '5'
//       2 'C'   10 'K'   18 'S'   26 'a'   34 'i'   42 'q'   50 'y'   58 '6'
//       3 'D'   11 'L'   19 'T'   27 'b'   35 'j'   43 'r'   51 'z'   59 '7'
//       4 'E'   12 'M'   20 'U'   28 'c'   36 'k'   44 's'   52 '0'   60 '8'
//       5 'F'   13 'N'   21 'V'   29 'd'   37 'l'   45 't'   53 '1'   61 '9'
//       6 'G'   14 'O'   22 'W'   30 'e'   38 'm'   46 'u'   54 '2'   62 '+'
//       7 'H'   15 'P'   23 'X'   31 'f'   39 'n'   47 'v'   55 '3'   63 '/'
//     ======================================================================
//..
// This component also supports a slightly different alphabet, "base64url",
// that is more appropriate if the encoded representation would be used in a
// file name or URL (see
// {Base 64 Encoding with URL and Filename Safe Alphabet}).
//
// The 3-byte grouping of the input is only a design of convenience and not a
// requirement.  When the number of bytes in the input stream is not divisible
// by 3, sufficient 0 bits are padded on the right to achieve an integral
// number of 6-bit character indices.  Then one of two special cases will apply
// for the final processing step:
//
// I)  There is a single byte of data, in which case there will be two Base64
// encoding characters (the second of which will be one of [AQgw]) followed by
// two equal ('=') signs.
//
// II)  There are exactly two bytes of data, in which case there will be
// three Base64 encoding characters (the third of which will be one of
// [AEIMQUYcgkosw048] followed by a single equal ('=') sign.
//
// The MIME standard requires that the maximum line length of emitted text not
// exceed 76 characters exclusive of CRLF.  The caller may override this
// default if desired.
//
// Input values of increasing length along with their corresponding Base64
// encodings are illustrated below:
//..
//        Data: /* nothing */
//    Encoding: /* nothing */
//
//        Data: 0x01
//    Encoding: AQ==
//
//        Data: 0x01 0x02
//    Encoding: AQI=
//
//        Data: 0x01 0x02 0x03
//    Encoding: AQID
//
//        Data: 0x01 0x02 0x03 0x04
//    Encoding: AQIDBA==
//..
// In order for a Base64 encoding to be valid, the input data must be either of
// length a multiple of three (constituting maximal input), or have been
// terminated explicitly by the 'endConvert' method (initiating bit padding
// when necessary).
//
///Base 64 Encoding with URL and Filename Safe Alphabet
///----------------------------------------------------
// The encoder and decoder in this component also support the "base64url"
// encoding, which is the same as standard "base64" but substitutes (a couple)
// characters in the alphabet that are treated as special characters when used
// in a URL or in a file system.  The following table is technically identical
// to the table presented in {Base 64 Encoding}, except for the 62:nd and 63:rd
// alphabet character, that indicates '-' and '_' respectively.
//..
//     ======================================================================
//     *            The "URL and Filename Safe" BASE-64 Alphabet            *
//     ----------------------------------------------------------------------
//     Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc  Val Enc
//     --- ---  --- ---  --- ---  --- ---  --- ---  --- ---  --- ---  --- ---
//       0 'A'    8 'I'   16 'Q'   24 'Y'   32 'g'   40 'o'   48 'w'   56 '4'
//       1 'B'    9 'J'   17 'R'   25 'Z'   33 'h'   41 'p'   49 'x'   57 '5'
//       2 'C'   10 'K'   18 'S'   26 'a'   34 'i'   42 'q'   50 'y'   58 '6'
//       3 'D'   11 'L'   19 'T'   27 'b'   35 'j'   43 'r'   51 'z'   59 '7'
//       4 'E'   12 'M'   20 'U'   28 'c'   36 'k'   44 's'   52 '0'   60 '8'
//       5 'F'   13 'N'   21 'V'   29 'd'   37 'l'   45 't'   53 '1'   61 '9'
//       6 'G'   14 'O'   22 'W'   30 'e'   38 'm'   46 'u'   54 '2'   62 '-'
//       7 'H'   15 'P'   23 'X'   31 'f'   39 'n'   47 'v'   55 '3'   63 '_'
//     ======================================================================
//..
///Base 64 Decoding
///----------------
// The degree to which decoding detects errors can significantly affect
// performance.  The standard permits all non-Base64 characters to be treated
// as whitespace.  One variant mode of this decoder does just that; the other
// reports an error if a bad (i.e., non-whitespace) character is detected.  The
// mode of the instance is configurable.  The standard imposes a maximum of 76
// characters exclusive of CRLF; however, the decoder implemented in this
// component will handle lines of arbitrary length.
//
// The following kinds of errors can occur during decoding and are reported
// with the following priority:
//..
// BAD DATA: A character (other than whitespace) that is not a member of the
//           Base64 character set (including '=').  Note that this error
//           is detected only if the 'decoder' is explicitly configured (at
//           construction) to do so.
//
// BAD FORMAT: An '=' character precedes a valid numeric Base64 character,
//               more than two '=' characters appear (possibly separated by
//             non-Base64 characters), a numeric Base64 character other than
//             [AEIMQUYcgkosw048] precedes a single terminal '=' character,
//             or a character other than [AQgw] precedes a terminal pair of
//             consecutive '=' characters.
//..
// The 'isError' method is used to detect such anomalies, and the 'numIn'
// output parameter (indicating the number of input characters consumed)
// or possibly the iterator itself (for iterators with reference-semantics)
// identifies the offending character.
//
// Note that the existence of an '=' can be used to reliably indicate the end
// of the valid data, but no such assurance is possible when the length (in
// bytes) of the initial input data sequence before encoding was evenly
// divisible by 3.
//
///Usage
///-----
// The following example shows how to use a 'bdlde::Base64Decoder' object to
// implement a function, 'streamconverter', that reads text from a
// 'bsl::istream', decodes that text from base 64 representation, and writes
// the decoded text to a 'bsl::ostream'.  'streamconverter' returns 0 on
// success and a negative value if the input data could not be successfully
// decoded or if there is an I/O error.
//..
// streamdecoder.h                      -*-C++-*-
//
// int streamDecoder(bsl::ostream& os, bsl::istream& is);
//     // Read the entire contents of the specified input stream 'is', convert
//     // the input base-64 encoding into plain text, and write the decoded
//     // text to the specified output stream 'os'.  Return 0 on success, and a
//     // negative value otherwise.
//..
// We will use fixed-sized input and output buffers in the implementation, but,
// because of the flexibility of 'bsl::istream' and the output-buffer
// monitoring functionality of 'bdlde::Base64Decoder', the fixed buffer sizes
// do *not* limit the quantity of data that can be read, decoded, or written to
// the output stream.  The implementation file is as follows.
//..
// streamdecoder.cpp                    -*-C++-*-
//
// #include <streamdecoder.h>
//
// #include <bdlde_base64decoder.h>
//
// namespace BloombergLP {
//
// int streamDecoder(bsl::ostream& os, bsl::istream& is)
// {
//     enum {
//         SUCCESS      =  0,
//         DECODE_ERROR = -1,
//         IO_ERROR     = -2
//     };
//..
// We declare a 'bdlde::Base64Decoder' object 'converter', which will decode
// the input data.  Note that various internal buffers and cursors are used as
// needed without further comment.  We read as much data as is available from
// the user-supplied input stream 'is' *or* as much as will fit in
// 'inputBuffer' before beginning conversion.  To obtain unobstructedly the
// output that results from decoding the entire input stream (even in the case
// of errors), the base64 decoder is configured not to detect errors.
//..
//     bdlde::Base64Decoder converter(false);   // Do not report errors.
//
//     const int INBUFFER_SIZE  = 1 << 10;
//     const int OUTBUFFER_SIZE = 1 << 10;
//
//     char inputBuffer[INBUFFER_SIZE];
//     char outputBuffer[OUTBUFFER_SIZE];
//
//     char *output    = outputBuffer;
//     char *outputEnd = outputBuffer + sizeof outputBuffer;
//
//     while (is.good()) {  // input stream not exhausted
//
//         is.read(inputBuffer, sizeof inputBuffer);
//..
// With 'inputBuffer' now populated, we'll use 'converter' in an inner 'while'
// loop to decode the input and write the decoded data to 'outputBuffer' (via
// the 'output' cursor').  Note that if the call to 'converter.convert' fails,
// our function terminates with a negative status.
//..
//         const char *input    = inputBuffer;
//         const char *inputEnd = input + is.gcount();
//
//         while (input < inputEnd) { // input encoding not complete
//
//             int numOut;
//             int numIn;
//
//             int status = converter.convert(output, &numOut, &numIn,
//                                            input,   inputEnd,
//                                            outputEnd - output);
//             if (status < 0) {
//                 return DECODE_ERROR;                               // RETURN
//             }
//..
// If the call to 'converter.convert' returns successfully, we'll see if the
// output buffer is full, and if so, write its contents to the user-supplied
// output stream 'os'.  Note how we use the values of 'numOut' and 'numIn'
// generated by 'convert' to update the relevant cursors.
//..
//             output += numOut;
//             input  += numIn;
//
//             if (output == outputEnd) {  // output buffer full; write data
//                 os.write (outputBuffer, sizeof outputBuffer);
//                 if (os.fail()) {
//                     return IO_ERROR;                               // RETURN
//                 }
//                 output = outputBuffer;
//             }
//         }
//     }
//..
// We have now exited both the input and the "decode" loops.  'converter' may
// still hold decoded output characters, and so we call 'converter.endConvert'
// to emit any retained output.  To guarantee correct behavior, we call this
// method in an infinite loop, because it is possible that the retained output
// can fill the output buffer.  In that case, we solve the problem by writing
// the contents of the output buffer to 'os' within the loop.  The most likely
// case, however, is that 'endConvert' will return 0, in which case we exit the
// loop and write any data remaining in 'outputBuffer' to 'os'.  As above, if
// 'endConvert' fails, we exit the function with a negative return status.
//..
//     while (1) {
//
//         int numOut;
//
//         int more = converter.endConvert(output, &numOut, outputEnd-output);
//         if (more < 0) {
//             return DECODE_ERROR;                                   // RETURN
//         }
//
//         output += numOut;
//
//         if (!more) { // no more output
//             break;
//         }
//
//         assert (output == outputEnd);  // output buffer is full
//
//         os.write (outputBuffer, sizeof outputBuffer);  // write buffer
//         if (os.fail()) {
//             return IO_ERROR;                                       // RETURN
//         }
//         output = outputBuffer;
//     }
//
//     if (output > outputBuffer) { // still data in output buffer; write it
//                                  // all
//         os.write(outputBuffer, output - outputBuffer);
//     }
//
//     return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
// }
//
// } // Close namespace BloombergLP
//..
// For ease of reading, we repeat the full content of the 'streamconverter.cpp'
// file without interruption.
//..
// streamdecoder.cpp                    -*-C++-*-
//
// #include <streamdecoder.h>
//
// #include <bdlde_base64decoder.h>
//
// namespace BloombergLP {
//
// int streamDecoder(bsl::ostream& os, bsl::istream& is)
// {
//     enum {
//         SUCCESS      =  0,
//         DECODE_ERROR = -1,
//         IO_ERROR     = -2
//     };
//
//     bdlde::Base64Decoder converter(false);   // Do not report errors.
//
//     const int INBUFFER_SIZE  = 1 << 10;
//     const int OUTBUFFER_SIZE = 1 << 10;
//
//     char inputBuffer[INBUFFER_SIZE];
//     char outputBuffer[OUTBUFFER_SIZE];
//
//     char *output    = outputBuffer;
//     char *outputEnd = outputBuffer + sizeof outputBuffer;
//
//     while (is.good()) {  // input stream not exhausted
//
//         is.read(inputBuffer, sizeof inputBuffer);
//
//         const char *input    = inputBuffer;
//         const char *inputEnd = input + is.gcount();
//
//         while (input < inputEnd) { // input encoding not complete
//
//             int numOut;
//             int numIn;
//
//             int status = converter.convert(output, &numOut, &numIn,
//                                            input,   inputEnd,
//                                            outputEnd - output);
//             if (status < 0) {
//                 return DECODE_ERROR;                               // RETURN
//             }
//
//             output += numOut;
//             input  += numIn;
//
//             if (output == outputEnd) {  // output buffer full; write data
//                 os.write(outputBuffer, sizeof outputBuffer);
//                 if (os.fail()) {
//                     return IO_ERROR;                               // RETURN
//                 }
//                 output = outputBuffer;
//             }
//         }
//     }
//
//     while (1) {
//
//         int numOut;
//
//         int more = converter.endConvert(output, &numOut, outputEnd-output);
//         if (more < 0) {
//             return DECODE_ERROR;                                   // RETURN
//         }
//
//         output += numOut;
//
//         if (!more) { // no more output
//             break;
//         }
//
//         assert (output == outputEnd);  // output buffer is full
//
//         os.write (outputBuffer, sizeof outputBuffer);  // write buffer
//         if (os.fail()) {
//             return IO_ERROR;                                       // RETURN
//         }
//         output = outputBuffer;
//     }
//
//     if (output > outputBuffer) {
//         os.write (outputBuffer, output - outputBuffer);
//     }
//
//     return (is.eof() && os.good()) ? SUCCESS : IO_ERROR;
// }
//
// } // Close namespace BloombergLP
//..

#include <bdlscm_version.h>

#include <bdlde_base64alphabet.h>
#include <bdlde_base64decoderoptions.h>
#include <bdlde_base64ignoremode.h>

#include <bslmf_assert.h>

#include <bsls_assert.h>
#include <bsls_deprecatefeature.h>
#include <bsls_review.h>
#include <bsls_types.h>

#include <bsl_iostream.h>

namespace BloombergLP {
namespace bdlde {

                            // ===================
                            // class Base64Decoder
                            // ===================

class Base64Decoder {
    // This class implements a mechanism capable of converting data of
    // arbitrary length from its corresponding Base64 representation.

  public:
    // PUBLIC TYPES
    typedef Base64Alphabet::Enum Alphabet;

    // PUBLIC CONSTANTS
    static const Alphabet e_BASIC = Base64Alphabet::e_BASIC;
    static const Alphabet e_URL   = Base64Alphabet::e_URL;

  private:
    // PRIVATE TYPES
    typedef Base64DecoderOptions    DecoderOptions;
    typedef Base64IgnoreMode        IgnoreMode;

    enum State {
        // Symbolic state values.

        e_ERROR_STATE      = -1, // input is irreparably invalid
        e_INPUT_STATE      =  0, // general input state
        e_NEED_EQUAL_STATE =  1, // need an '='
        e_SOFT_DONE_STATE  =  2, // only ignorable input and 'endConvert'
        e_DONE_STATE       =  3  // any additional input is an error
    };

    // INSTANCE DATA
    int                    d_outputLength;  // total number of output
                                            // characters

    const char *const      d_alphabet_p;    // selected alphabet based on
                                            // specified alphabet type

    const bool *const      d_ignorable_p;   // selected table of ignorable
                                            // characters based on specified
                                            // error-reporting mode

    unsigned               d_stack;         // word containing 6-bit chunks of
                                            // data to be assembled into bytes

    int                    d_bitsInStack;   // number of bits in 'd_stack'

    State                  d_state;         // state of this object as defined
                                            // by the 'State' enum.

    const Alphabet         d_alphabet;      // 'e_BASIC' or 'e_URL'.

    const IgnoreMode::Enum d_ignoreMode;    // 'e_IGNORE_NONE',
                                            // 'e_IGNORE_WHITESPACE', or
                                            // 'e_IGNORE_UNRECOGNIZED'

    const bool             d_isPadded;      // 'true' means '=' padding is
                                            // required, 'false' means '=' is
                                            // an error

  private:
    // NOT IMPLEMENTED
    Base64Decoder(const Base64Decoder&);
    Base64Decoder& operator=(const Base64Decoder&);

    // PRIVATE ACCESSORS
    int residualBits(int bytesOutputSoFar) const;
        // Return the number bits of output there are (either already done or
        // to be done) since the end of the last 4-bytes of input.  Note that
        // input to this decoder, other than ignored whitespace or garbage,
        // comes in 4 byte quads, each of which results in 3 bytes of output,
        // and this accessor is particularly useful in calculating output for
        // the last partial quad of input.

  public:
    // CLASS METHODS
    static int maxDecodedLength(int inputLength);
        // Return the maximum number of decoded bytes that could result from an
        // input byte sequence of the specified 'inputLength' provided to the
        // 'convert' and 'endConvert' methods of this decoder.  The behavior is
        // undefined unless '0 <= inputLength'.  Note that the result is
        // independent of which options are provided to the decoder.

    // CREATORS
    explicit
    Base64Decoder(const Base64DecoderOptions& options);
        // Create a Base64 decoder with options determined by the specfied
        // 'options'.

    BSLS_DEPRECATE_FEATURE("bdl", "Base64Decoder", "use options c'tor")
    explicit
    Base64Decoder(bool     unrecognizedNonWhitespaceIsErrorFlag,
                  Alphabet alphabet = e_BASIC);
        // Create a Base64 decoder in the initial state.  Unrecognized
        // characters (i.e., non-base64 characters other than whitespace) will
        // be treated as errors if the specified
        // 'unrecognizedNonWhitespaceIsErrorFlag' is 'true', and ignored
        // otherwise.  Optionally specify an alphabet used to decode input
        // characters.  If 'alphabet' is not specified, then the basic
        // alphabet, "base64", is used.  Padded input is assumed.
        //
        // DEPRECATED: Use the overload that takes 'options' instead.

    ~Base64Decoder();
        // Destroy this object.

    // MANIPULATORS
    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
    int convert(OUTPUT_ITERATOR out,
                INPUT_ITERATOR  begin,
                INPUT_ITERATOR  end);
    template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
    int convert(OUTPUT_ITERATOR  out,
                int             *numOut,
                int             *numIn,
                INPUT_ITERATOR   begin,
                INPUT_ITERATOR   end,
                int              maxNumOut = -1);
        // Decode the sequence of input characters starting at the specified
        // 'begin' position up to, but not including, the specified 'end'
        // position, writing any resulting output characters to the specified
        // 'out' buffer.  Optionally specify the 'maxNumOut' limit on the
        // number of bytes to output; if 'maxNumOut' is negative, no limit is
        // imposed.  If the 'maxNumOut' limit is reached, no further input will
        // be consumed.  Load into the (optionally) specified 'numOut' and
        // 'numIn' the number of output bytes produced and input bytes
        // consumed, respectively.  Return a non-negative value on success, -1
        // on an input error, and -2 if the 'endConvert' method has already
        // been called without an intervening 'resetState' call.  A return
        // status of -1 indicates that the data at 'begin' + 'numIn'
        // constitutes an irrecoverably undecodable input sequence (i.e., the
        // data cannot be extended to form any valid encoding).  A positive
        // return status indicates the number of valid processed output bytes
        // retained by this decoder and not written to 'out' because
        // 'maxNumOut' has been reached; these bytes are available for output
        // if this method is called with appropriate input.  Note that it is
        // recommended that after all calls to 'convert' are finished, the
        // 'endConvert' method be called to complete the encoding of any
        // unprocessed input characters that do not complete a 3-byte sequence.

    template <class OUTPUT_ITERATOR>
    int endConvert(OUTPUT_ITERATOR out);
    template <class OUTPUT_ITERATOR>
    int endConvert(OUTPUT_ITERATOR  out,
                   int             *numOut,
                   int              maxNumOut = -1);
        // Terminate decoding for this decoder; write any retained output
        // (e.g., from a previous call to 'convert' with a non-zero optionally
        // specified 'maxNumOut' argument) to the specified 'out' buffer;
        // encode any unprocessed input characters that do not complete a
        // 3-byte sequence.  The argument 'maxNumOut' is the limit on the
        // number of bytes to output; if 'maxNumOut' is negative, no limit is
        // imposed.  Load into the (optionally) specified 'numOut' the number
        // of output bytes produced.  Return 0 on success, the positive number
        // of bytes *still* retained by this decoder if the 'maxNumOut' limit
        // was reached, and a negative value otherwise.  Any retained bytes are
        // available on a subsequent call to 'endConvert'.  Once this method is
        // called, no additional input may be supplied without an intervening
        // call to 'resetState'; once this method returns a zero status, a
        // subsequent call will place this decoder in the error state, and
        // return an error status.

    void resetState();
        // Reset this instance to its initial state (i.e., as if no input had
        // been consumed).

    // ACCESSORS
    Alphabet alphabet() const;
        // Return the alphabet supplied at construction of this object.

    IgnoreMode::Enum ignoreMode() const;
        // Return the 'ignoreMode' state of this decoder.

    bool isAcceptable() const;
        // Return 'true' if the input read so far is considered syntactically
        // complete, and 'false' otherwise.  Note that the number of relevant
        // input characters must be divisible by 4.

    bool isDone() const;
        // Return 'true' if the current input is acceptable and any additional
        // input (including 'endConvert') would be an error, and 'false'
        // otherwise.  Note that if this decoder 'isDone' then all resulting
        // output has been emitted to 'out'.

    bool isError() const;
        // Return 'true' if there is no possibility of achieving an
        // "acceptable" result, and 'false' otherwise.

    bool isInitialState() const;
        // Return 'true' if this instance is in the initial state (i.e., as
        // if no input had been consumed), and 'false' otherwise.

    bool isMaximal() const;
        // Return 'true' if the current input is acceptable and any additional
        // input (other than 'endConvert') would be an error, and 'false'
        // otherwise.

    bool isPadded() const;
        // Return 'true' if this object is configured for padded input and
        // 'false' otherwise.

    BSLS_DEPRECATE_FEATURE("bdl", "isUnrecognizedAnError", "use ignoreMode")
    bool isUnrecognizedAnError() const;
        // Return 'true' if this mechanism is currently configured to report an
        // error when an unrecognized character (i.e., a character other than
        // one of the 64 "numeric" base-64 characters, '=', or whitespace) is
        // encountered, and 'false' otherwise.
        //
        // DEPRECATED: use the 'ignoreMode' accessor instead.

    DecoderOptions options() const;
        // Return a 'Base64DecoderOptions' object representing the
        // configuration of this decoder.

    int outputLength() const;
        // Return the total length of the output emitted thus far.
};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                            // -------------------
                            // class Base64Decoder
                            // -------------------

// PRIVATE CLASS METHODs
inline
int Base64Decoder::residualBits(int  bytesOutputSoFar) const
{
    BSLS_ASSERT(0 <= bytesOutputSoFar);
    BSLS_ASSERT(0 <= d_bitsInStack);

    // If one byte has been read since that last completed quad of input, the
    // result will be 6, and it will be an error if no more input is available.
    //
    // If two bytes have been read since the last complete quad of input, the
    // result will be 12, and if input is done:
    //: o If all output has been done, 'd_stack == 0'
    //:
    //: o If a byte of output remains to be done, the low-order 4 bytes of
    //:   'd_stack' should be 0.
    //
    // If three bytes have been read since the last complete quad of input, the
    // result will be 18, and if input is done,
    //: o If the last 2 bytes of output have been done, the low-order 2 bytes
    //:   of 'd_stack' should be 0.
    //:
    //: o If one of the last 2 bytes of output have been done, there will be 10
    //:   bits in the stack, the low-order 2 bits of which should be 0.
    //:
    //: o If none of the last 2 bytes of output to be done, there will be 18
    //:   bits in the stack, the low-order 2 bits of which will be 0.

    int ret = ((bytesOutputSoFar % 3) * 8 + d_bitsInStack) % 24;
    BSLS_ASSERT(e_INPUT_STATE != d_state || 0 == ret % 6);
    return ret;
}

// CLASS METHODS
inline
int Base64Decoder::maxDecodedLength(int inputLength)
{
    BSLS_ASSERT(0 <= inputLength);

    return (inputLength + 3) / 4 * 3;
}

// MANIPULATORS
template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
int Base64Decoder::convert(OUTPUT_ITERATOR out,
                           INPUT_ITERATOR  begin,
                           INPUT_ITERATOR  end)
{
    int dummyNumOut;
    int dummyNumIn;

    return convert(out, &dummyNumOut, &dummyNumIn, begin, end, -1);
}

template <class OUTPUT_ITERATOR, class INPUT_ITERATOR>
int Base64Decoder::convert(OUTPUT_ITERATOR  out,
                           int             *numOut,
                           int             *numIn,
                           INPUT_ITERATOR   begin,
                           INPUT_ITERATOR   end,
                           int              maxNumOut)
{
    BSLS_ASSERT(numOut);
    BSLS_ASSERT(numIn);

    if (e_ERROR_STATE == d_state || e_DONE_STATE == d_state) {
        int rv = e_DONE_STATE == d_state ? -2 : -1;
        d_state = e_ERROR_STATE;
        *numOut = 0;
        *numIn = 0;
        return rv;                                                    // RETURN
    }

    int numEmitted = 0;

    // Emit as many output bytes as possible.

    while (8 <= d_bitsInStack && numEmitted != maxNumOut) {
        d_bitsInStack -= 8;
        *out = static_cast<char>((d_stack >> d_bitsInStack) & 0xff);
        ++out;
        ++numEmitted;
    }

    // Consume as many input bytes as possible.

    *numIn = 0;

    if (e_INPUT_STATE == d_state) {
        while (18 >= d_bitsInStack && begin != end) {
            const unsigned char byte = static_cast<unsigned char>(*begin);

            ++begin;
            ++*numIn;

            unsigned char converted = static_cast<unsigned char>(
                                                           d_alphabet_p[byte]);

            if (converted < 64) {
                d_stack = (d_stack << 6) | converted;
                d_bitsInStack += 6;
                if (8 <= d_bitsInStack && numEmitted != maxNumOut) {
                    d_bitsInStack -= 8;
                    *out = static_cast<char>(
                                            (d_stack >> d_bitsInStack) & 0xff);
                    ++out;
                    ++numEmitted;
                }
            }
            else if (!d_ignorable_p[byte]) {
                if ('=' == byte && d_isPadded) {
                    const int residual = residualBits(
                                                  d_outputLength + numEmitted);
                    // 'residual' is 0, 6, 12, or 18.
                    //: o If it's 0, that's an error since no '=' should be
                    //:   needed.
                    //:
                    //: o If it's 6, that's an error because an incomplete
                    //:   byte has been input.
                    //:
                    //: o 12 means 2 bytes have been read, meaning we have to
                    //:   do 1 byte of output (which we may have already done).
                    //:   The low-order 4 bits of stack should either be
                    //:   0 or the stack should be empty.
                    //:
                    //: o 18 means 3 bytes have been read, meaning we have to
                    //:   do 2 bytes of output (some or all of which we may
                    //:   have already done).  The low-order 2 bits of stack
                    //:   should either be 0 or the stack should be empty.

                    const int leftOver = residual % 8;
                    d_state = 0 != (d_stack & ((1 << leftOver) - 1))
                              ? e_ERROR_STATE
                              : 12 == residual
                              ? e_NEED_EQUAL_STATE
                              : 18 == residual
                              ? e_SOFT_DONE_STATE
                              : e_ERROR_STATE;
                    d_stack       >>= leftOver;
                    d_bitsInStack -=  leftOver;
                }
                else {
                    d_state = e_ERROR_STATE;
                }
                break;
            }
        }
    }

    if (e_NEED_EQUAL_STATE == d_state) {
        BSLS_ASSERT(d_isPadded);

        while (begin != end) {
            const unsigned char byte = static_cast<unsigned char>(*begin);

            ++begin;
            ++*numIn;

            if (!d_ignorable_p[byte]) {
                if ('=' == byte) {
                    d_state = e_SOFT_DONE_STATE;
                }
                else {
                    d_state = e_ERROR_STATE;
                }
                break;
            }
        }
    }
    if (e_SOFT_DONE_STATE == d_state) {
        while (begin != end) {
            const unsigned char byte = static_cast<unsigned char>(*begin);

            ++begin;
            ++*numIn;

            if (!d_ignorable_p[byte]) {
                d_state = e_ERROR_STATE;
                break;
            }
        }
    }

    *numOut = numEmitted;
    d_outputLength += numEmitted;

    return e_ERROR_STATE == d_state ? -1 : d_bitsInStack / 8;
}

template <class OUTPUT_ITERATOR>
int Base64Decoder::endConvert(OUTPUT_ITERATOR out)
{
    int dummyNumOut;

    return endConvert(out, &dummyNumOut, -1);
}

template <class OUTPUT_ITERATOR>
int Base64Decoder::endConvert(OUTPUT_ITERATOR  out,
                              int             *numOut,
                              int              maxNumOut)
{
    BSLS_ASSERT(numOut);

    if (!d_isPadded && e_INPUT_STATE == d_state) {
        const int residual = residualBits(d_outputLength);
        const int leftOver = residual % 8;
        if (6 == residual || 0 != (d_stack & ((1 << leftOver) - 1))) {
            d_state = e_ERROR_STATE;
        }
        else {
            d_stack       >>= leftOver;
            d_bitsInStack -=  leftOver;
        }
    }

    if (e_ERROR_STATE == d_state || e_NEED_EQUAL_STATE == d_state ||
                        (e_DONE_STATE == d_state && 0 == d_bitsInStack) ||
                        (d_isPadded && e_INPUT_STATE == d_state &&
                                          0 != residualBits(d_outputLength))) {
        d_state = e_ERROR_STATE;
        *numOut = 0;
        return -1;                                                    // RETURN
    }

    BSLS_ASSERT(0 == d_bitsInStack % 8);

    d_state = e_DONE_STATE;

    int numEmitted;
    for (numEmitted = 0; 8 <= d_bitsInStack && numEmitted != maxNumOut;
                                                                ++numEmitted) {
        d_bitsInStack -= 8;
        *out++ = static_cast<char>((d_stack >> d_bitsInStack) & 0xff);
    }

    *numOut = numEmitted;
    d_outputLength += numEmitted;

    return d_bitsInStack / 8;
}

inline
void Base64Decoder::resetState()
{
    d_state = e_INPUT_STATE;
    d_outputLength = 0;
    d_bitsInStack  = 0;
}

// ACCESSORS
inline
Base64Decoder::Alphabet Base64Decoder::alphabet() const
{
    return d_alphabet;
}

inline
Base64IgnoreMode::Enum Base64Decoder::ignoreMode() const
{
    return d_ignoreMode;
}

inline
bool Base64Decoder::isAcceptable() const
{
    const int residual = residualBits(d_outputLength);
    return (0 == residual && e_INPUT_STATE == d_state) ||
                       e_SOFT_DONE_STATE == d_state || e_DONE_STATE == d_state;
}

inline
bool Base64Decoder::isDone() const
{
    return !d_bitsInStack && e_DONE_STATE == d_state;
}

inline
bool Base64Decoder::isError() const
{
    return e_ERROR_STATE == d_state;
}

inline
bool Base64Decoder::isInitialState() const
{
    return e_INPUT_STATE == d_state
        && 0 == d_bitsInStack
        && 0 == d_outputLength;
}

inline
bool Base64Decoder::isMaximal() const
{
    return e_SOFT_DONE_STATE == d_state
                                 || (d_bitsInStack && e_DONE_STATE == d_state);
}

inline
bool Base64Decoder::isPadded() const
{
    return d_isPadded;
}

inline
bool Base64Decoder::isUnrecognizedAnError() const
{
    return IgnoreMode::e_IGNORE_UNRECOGNIZED != ignoreMode();
}

inline
Base64DecoderOptions Base64Decoder::options() const
{
    return DecoderOptions::custom(ignoreMode(), alphabet(), d_isPadded);
}

inline
int Base64Decoder::outputLength() const
{
    return d_outputLength;
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------