doxygen/bde_api_prod/bdlde__charconvertutf16_8h_source.html

/// @file bdlde_charconvertutf16.h

///

/// The content of this file has been pre-processed for Doxygen.

///


// bdlde_charconvertutf16.h                                           -*-C++-*-

#ifndef INCLUDED_BDLDE_CHARCONVERTUTF16

#define INCLUDED_BDLDE_CHARCONVERTUTF16


#include <bsls_ident.h>

BSLS_IDENT("$Id: $")


/// @defgroup bdlde_charconvertutf16 bdlde_charconvertutf16

/// @brief  Provide fast, safe conversion between UTF-8 and UTF-16 encodings.

/// @addtogroup bdl

/// @{

/// @addtogroup bdlde

/// @{

/// @addtogroup bdlde_charconvertutf16

/// @{

///

/// <h1> Outline </h1>

/// * <a href="#bdlde_charconvertutf16-purpose"> Purpose</a>

/// * <a href="#bdlde_charconvertutf16-classes"> Classes </a>

/// * <a href="#bdlde_charconvertutf16-description"> Description </a>

///   * <a href="#bdlde_charconvertutf16-history-and-motivation"> History and Motivation </a>

///   * <a href="#bdlde_charconvertutf16-wstrings-and-utf-16"> WSTRINGS and UTF-16 </a>

///   * <a href="#bdlde_charconvertutf16-usage"> Usage </a>

///     * <a href="#bdlde_charconvertutf16-example-1-translation-to-fixed-length-buffers"> Example 1: Translation to Fixed-Length Buffers </a>

///     * <a href="#bdlde_charconvertutf16-example-2-translation-to-stl-containers"> Example 2: Translation to STL Containers </a>

///

/// # Purpose {#bdlde_charconvertutf16-purpose}

/// Provide fast, safe conversion between UTF-8 and UTF-16 encodings.

///

/// # Classes {#bdlde_charconvertutf16-classes}

///

/// -  bdlde::CharConvertUtf16: namespace for conversions between UTF-8 and UTF-16

///

/// # Description {#bdlde_charconvertutf16-description}

/// This component provides a suite of static functions supporting

/// the *fast* conversion of *valid* UTF-8 encoded strings to *valid* UTF-16

/// 16-bit word arrays, wstrings, and vectors, and conversion of *valid* UTF-16

/// encoded word sequences to *valid* UTF-8 byte arrays, strings, and byte

/// vectors.  Invalid byte sequences and code points forbidden by either

/// encoding are removed and (optionally) replaced by a single word or byte

/// provided by the caller.  In UTF-16 -> UTF-8 conversion, the replacement word

/// must be a non-zero byte, in the other direction, it must be a single,

/// non-zero word.  The byte or word count and code point count that are

/// optionally returned through pointer arguments include the terminating null

/// code point in their count.  The byte order of the UTF-16 input or output can

/// be specified via the optional `byteOrder` argument, which is assumed to be

/// host byte order if not specified.  In functions taking UTF-8, input is in

/// the form of a `bslstl::StringRef` or a null-terminated `const char *`.  In

/// functions taking UTF-16, input is either in the form of a

/// `bslstl::StringRefWide` or a pointer to a null-terminated array of

/// `unsigned short` or `wchar_t`.

///

/// ## History and Motivation {#bdlde_charconvertutf16-history-and-motivation}

///

///

/// UTF-8 is an encoding that allows 32-bit character sets like Unicode

/// to be represented using (8-bit) byte strings, while allowing "standard

/// ASCII" strings to be used "as-is".  Note that UTF-8 is described in detail

/// in RFC 3629 (http://www.ietf.org/rfc/rfc3629.txt).

///

/// UTF-16 is a 16-bit encoding that allows Unicode code points up to 0x10ffff

/// to be encoded using one or two 16-bit values.  Note that UTF-16 is described

/// in detail in RFC 2781 (http://www.ietf.org/rfc/rfc2781.txt).

///

/// The functions here that translate to fixed buffers make a single pass

/// through the data.  The functions that translate to `bsl::string`s and STL

/// containers, however, like the `glib` conversion routines, make two passes: a

/// size estimation pass, after which the output container is sized

/// appropriately, and then the translation pass.

///

/// The methods that output to a `vector`, `string`, or `wstring` will all grow

/// the output object as necessary to fit the data, and in the end will exactly

/// resize the object to the output (including the terminating 0 for `vector`,

/// which is not included for `string` or `wstring`).  Note that in the case of

/// `string` or `wstring`, the terminating 0 code point is still included in the

/// code point count.

///

/// Non-minimal UTF-8 encodings of code points are reported as errors.  Octets

/// and post-conversion code points in the forbidden ranges are treated as

/// errors and removed (or replaced, if a replacement word is provided).

///

/// ## WSTRINGS and UTF-16 {#bdlde_charconvertutf16-wstrings-and-utf-16}

///

///

/// UTF-16 (or UTF-8, for that matter) can be stored in `wstring`s, but note

/// that the size of a `wstring::value_type`, also known as a `wchar_t` word,

/// varies across different platforms -- it is 4 bytes on Solaris, Linux, and

/// Darwin, and 2 bytes on AIX and Windows.  So a file of `wchar_t` words

/// written by one platform may not be readable by another.  Byte order is also

/// a consideration, and a non-host byte order can be handled by using the

/// optional `byteOrder` argument of these functions.  Another factor is that,

/// since UTF-16 words all fit in 2 bytes, using `wchar_t` to store UTF-16 is

/// very wasteful of space on many platforms.

///

/// ## Usage {#bdlde_charconvertutf16-usage}

///

///

/// This section illustrates intended use of this component.

///

/// ### Example 1: Translation to Fixed-Length Buffers {#bdlde_charconvertutf16-example-1-translation-to-fixed-length-buffers}

///

///

/// In this example, we will translate a string containing a non-ASCII code

/// point from UTF-16 to UTF-8 and back using fixed-length buffers.

///

/// First, we create a UTF-16 string spelling `ecole` in French, which begins

/// with `0xc9`, a non-ASCII `e` with an accent over it:

/// @code

/// unsigned short utf16String[] = { 0xc9, 'c', 'o', 'l', 'e', 0 };

/// @endcode

/// Then, we create a byte buffer to store the UTF-8 result of the translation

/// in, and variables to monitor counts of code points and bytes translated:

/// @code

/// char utf8String[7];

/// bsl::size_t numCodePoints, numBytes;

/// numCodePoints = numBytes = -1;    // garbage

/// @endcode

/// Next, we call `utf16ToUtf8` to do the translation:

/// @code

/// int rc = bdlde::CharConvertUtf16::utf16ToUtf8(utf8String,

///                                               sizeof(utf8String),

///                                               utf16String,

///                                               &numCodePoints,

///                                               &numBytes);

/// @endcode

/// Then, we observe that no errors or warnings occurred, and that the numbers

/// of code points and bytes were as expected.  Note that both `numCodePoints`

/// and `numBytes` include the terminating 0:

/// @code

/// assert(0 == rc);

/// assert(6 == numCodePoints);

/// assert(7 == numBytes);

/// @endcode

/// Next, we examine the length of the translated string:

/// @code

/// assert(numBytes - 1 == bsl::strlen(utf8String));

/// @endcode

/// Then, we examine the individual bytes of the translated UTF-8:

/// @code

/// assert((char)0xc3 == utf8String[0]);

/// assert((char)0x89 == utf8String[1]);

/// assert('c' ==        utf8String[2]);

/// assert('o' ==        utf8String[3]);

/// assert('l' ==        utf8String[4]);

/// assert('e' ==        utf8String[5]);

/// assert(0   ==        utf8String[6]);

/// @endcode

/// Next, in preparation for translation back to UTF-16, we create a buffer of

/// `short` values and the variable `numWords` to track the number of UTF-16

/// words occupied by the result:

/// @code

/// unsigned short secondUtf16String[6];

/// bsl::size_t numWords;

/// numCodePoints = numWords = -1;    // garbage

/// @endcode

/// Then, we do the reverse translation:

/// @code

/// rc = bdlde::CharConvertUtf16::utf8ToUtf16(secondUtf16String,

///                                           6,

///                                           utf8String,

///                                           &numCodePoints,

///                                           &numWords);

/// @endcode

/// Next, we observe that no errors or warnings were reported, and that the

/// number of code points and words were as expected.  Note that `numCodePoints`

/// and `numWords` both include the terminating 0:

/// @code

/// assert(0 == rc);

/// assert(6 == numCodePoints);

/// assert(6 == numWords);

/// @endcode

/// Now, we observe that our output is identical to the original UTF-16 string:

/// @code

/// assert(0 == bsl::memcmp(utf16String,

///                         secondUtf16String,

///                         sizeof(utf16String)));

/// @endcode

/// Finally, we examine the individual words of the reverse translation:

/// @code

/// assert(0xc9 == secondUtf16String[0]);

/// assert('c'  == secondUtf16String[1]);

/// assert('o'  == secondUtf16String[2]);

/// assert('l'  == secondUtf16String[3]);

/// assert('e'  == secondUtf16String[4]);

/// assert(0    == secondUtf16String[5]);

/// @endcode

///

/// ### Example 2: Translation to STL Containers {#bdlde_charconvertutf16-example-2-translation-to-stl-containers}

///

///

/// The following snippets of code illustrate a typical use of the

/// `bdlde::CharConvertUtf16` struct's utility functions, first converting from

/// UTF-8 to UTF-16, and then converting back to make sure the round trip

/// returns the same value, translating to STL containers in both directions.

///

/// First, we declare a string of UTF-8 containing single-, double-, triple-,

/// and quadruple-octet code points:

/// @code

/// const char utf8MultiLang[] = {

///     "Hello"                                         // -- ASCII

///     "\xce\x97"         "\xce\x95"       "\xce\xbb"  // -- Greek

///     "\xe4\xb8\xad"     "\xe5\x8d\x8e"               // -- Chinese

///     "\xe0\xa4\xad"     "\xe0\xa4\xbe"               // -- Hindi

///     "\xf2\x94\xb4\xa5" "\xf3\xb8\xac\x83" };        // -- Quad octets

/// @endcode

/// Then, we declare an `enum` summarizing the counts of code points in the

/// string and verify that the counts add up to the length of the string:

/// @code

/// enum { NUM_ASCII_CODE_POINTS   = 5,

///        NUM_GREEK_CODE_POINTS   = 3,

///        NUM_CHINESE_CODE_POINTS = 2,

///        NUM_HINDI_CODE_POINTS   = 2,

///        NUM_QUAD_CODE_POINTS    = 2 };

///

/// assert(1 * NUM_ASCII_CODE_POINTS +

///        2 * NUM_GREEK_CODE_POINTS +

///        3 * NUM_CHINESE_CODE_POINTS +

///        3 * NUM_HINDI_CODE_POINTS +

///        4 * NUM_QUAD_CODE_POINTS == bsl::strlen(utf8MultiLang));

/// @endcode

/// Next, we declare the vector where our UTF-16 output will go, and a variable

/// into which the number of code points (not bytes or words) written will be

/// stored.  It is not necessary to initialize `utf16CodePointsWritten`:

/// @code

/// bsl::vector<unsigned short> v16;

/// bsl::size_t utf16CodePointsWritten;

/// @endcode

/// Note that for performance, we should `v16.reserve(sizeof(utf8MultiLang))`,

/// but it's not strictly necessary -- the vector will automatically be grown to

/// the correct size.  Also note that if `v16` were not empty, that wouldn't be

/// a problem -- any contents will be discarded.

///

/// Then, we do the translation to UTF-16:

/// @code

/// int retVal = bdlde::CharConvertUtf16::utf8ToUtf16(&v16,

///                                                   utf8MultiLang,

///                                                   &utf16CodePointsWritten);

///

/// assert(0 == retVal);        // verify success

/// assert(0 == v16.back());    // verify null terminated

/// @endcode

/// Next, we verify that the number of code points (not bytes or words) that was

/// returned is correct:

/// @code

/// enum { EXPECTED_CODE_POINTS_WRITTEN =

///                     NUM_ASCII_CODE_POINTS + NUM_GREEK_CODE_POINTS +

///                     NUM_CHINESE_CODE_POINTS + NUM_HINDI_CODE_POINTS +

///                     NUM_QUAD_CODE_POINTS  + 1 };

///

/// assert(EXPECTED_CODE_POINTS_WRITTEN == utf16CodePointsWritten);

/// @endcode

/// Then, we verify that the number of 16-bit words written was correct.  The

/// quad octet code points each require 2 `short` words of output:

/// @code

/// enum { EXPECTED_UTF16_WORDS_WRITTEN =

///                     NUM_ASCII_CODE_POINTS + NUM_GREEK_CODE_POINTS +

///                     NUM_CHINESE_CODE_POINTS + NUM_HINDI_CODE_POINTS +

///                     NUM_QUAD_CODE_POINTS * 2 + 1 };

///

/// assert(EXPECTED_UTF16_WORDS_WRITTEN == v16.size());

/// @endcode

/// Next, we calculate and confirm the difference between the number of UTF-16

/// words output and the number of bytes input.  The ASCII code points will take

/// 1 16-bit word apiece, the Greek code points are double octets that will

/// become single `short` values, the Chinese code points are encoded as UTF-8

/// triple octets that will turn into single 16-bit words, the same for the

/// Hindi code points, and the quad code points are quadruple octets that will

/// turn into double `short` values:

/// @code

/// enum { SHRINKAGE = NUM_ASCII_CODE_POINTS   * (1-1) +

///                    NUM_GREEK_CODE_POINTS   * (2-1) +

///                    NUM_CHINESE_CODE_POINTS * (3-1) +

///                    NUM_HINDI_CODE_POINTS   * (3-1) +

///                    NUM_QUAD_CODE_POINTS    * (4-2) };

///

/// assert(v16.size() == sizeof(utf8MultiLang) - SHRINKAGE);

/// @endcode

/// Then, we go on to do the reverse `utf16ToUtf8` transform to turn it back

/// into UTF-8, and we should get a result identical to our original input.  We

/// declare a `bsl::string` for our output, and a variable to count the number

/// of code points (not bytes or words) translated:

/// @code

/// bsl::string s;

/// bsl::size_t uf8CodePointsWritten;

/// @endcode

/// Again, note that for performance, we should ideally

/// `s.reserve(3 * v16.size())` but it's not really necessary.

///

/// Now, we do the reverse transform:

/// @code

/// retVal = bdlde::CharConvertUtf16::utf16ToUtf8(&s,

///                                               v16.begin(),

///                                               &uf8CodePointsWritten);

/// @endcode

/// Finally, we verify that a successful status was returned, that the output of

/// the reverse transform was identical to the original input, and that the

/// number of code points translated was as expected:

/// @code

/// assert(0 == retVal);

/// assert(utf8MultiLang == s);

/// assert(s.length() + 1               == sizeof(utf8MultiLang));

///

/// assert(EXPECTED_CODE_POINTS_WRITTEN == uf8CodePointsWritten);

/// assert(utf16CodePointsWritten       == uf8CodePointsWritten);

/// @endcode

/// @}

/** @} */

/** @} */


/** @addtogroup bdl

 * @{

 */

/** @addtogroup bdlde

 * @{

 */

/** @addtogroup bdlde_charconvertutf16

 * @{

 */


#include <bdlscm_version.h>


#include <bdlde_byteorder.h>

#include <bdlde_charconvertstatus.h>


#include <bsls_libraryfeatures.h>


#include <bsl_cstddef.h>            // 'bsl::size_t'

#include <bsl_string.h>

#include <bsl_string_view.h>

#include <bsl_vector.h>


#include <string>

#include <vector>


namespace bdlde {

                          // =======================

                          // struct CharConvertUtf16

                          // =======================


/// This `struct` provides a namespace for a suite of static functions to

/// convert buffers or containers between UTF-8 and UTF-16.  Note that Byte

/// Order Mark (BOM) sequences are neither generated nor recognized as

/// special.  If a BOM is present in the input, it will be translated,

/// whether correct (`0xfeff`) or incorrect (`0xfffe`), into the output

/// without any special handling.


struct CharConvertUtf16 {


    // CLASS METHODS


                        // -- UTF-8 to UTF-16 Methods


    /// Return the number of words required to store the translation of the

    /// specified UTF-8 string `srcBuffer` into a 0 terminated UTF-16 string

    /// (including the 0 terminating word into the returned count).

    /// Optionally specify `endPtr`, referring to one past the last input

    /// character.  If `endPtr` is not supplied, or is 0, treat `srcBuffer`

    /// as 0 terminated.  Note that this function will return the size

    /// `utf8ToUtf16` will require, assuming the `errorWord` argument to

    /// `utf8ToUtf16` is non-zero.

    static bsl::size_t computeRequiredUtf16Words(const char *srcBuffer,

                                                 const char *endPtr = 0);


    /// Load into the specified `dstString` the result of converting the

    /// specified UTF-8 `srcString` to its UTF-16 equivalent.  Optionally

    /// specify `numCodePointsWritten`, which, if not 0, indicates the

    /// location of the modifiable variable into which the number of Unicode

    /// code points written, including the terminating null character, is to

    /// be loaded.  Optionally specify an `errorChar` to be substituted, if

    /// not 0, for invalid encodings in the input string.  Optionally

    /// specify `byteOrder` to indicate the byte order of the UTF-16 output;

    /// if `byteOrder` is not specified, the output is assumed to be in host

    /// byte order.  Return 0 on success and

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` otherwise.  Invalid

    /// encodings are multi-byte encoding parts out of sequence, non-minimal

    // UTF-8 encodings of code points, or code points outside the ranges

    static int utf8ToUtf16(

                          bsl::wstring            *dstString,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          wchar_t                  errorWord            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

    /// that UTF-16 can validly encode (in the range `[ 1 .. 0xd7ff ]` or

    /// `[ 0xe000 .. 0x10ffff ]`).  If `errorChar` is 0, invalid input code

    /// points are ignored (i.e., produce no corresponding output).  The

    /// behavior is undefined unless `srcString` is null-terminated when

    /// specified as a `const char *`.  Note that one code point can occupy

    /// multiple UTF-16 words, and that if `srcString` is a

    /// `bslstl::StringRef`, it may contain embedded null bytes that will be

    /// translated to null words embedded in the output.

    static int utf8ToUtf16(

                          std::wstring            *dstString,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          wchar_t                  errorWord            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf8ToUtf16(

                       std::pmr::wstring       *dstString,

                       const bsl::string_view&  srcString,

                       bsl::size_t             *numCodePointsWritten = 0,

                       wchar_t                  errorWord            = '?',

                       ByteOrder::Enum          byteOrder = ByteOrder::e_HOST);

#endif

    static int utf8ToUtf16(

                          bsl::wstring             *dstString,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          wchar_t                   errorWord            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf16(

                          std::wstring             *dstString,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          wchar_t                   errorWord            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf8ToUtf16(std::pmr::wstring *dstString,

                           const char        *srcString,

                           bsl::size_t       *numCodePointsWritten = 0,

                           wchar_t            errorWord            = '?',

                           ByteOrder::Enum    byteOrder = ByteOrder::e_HOST);

#endif

#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static int utf8ToUtf16(

                          bsl::u16string          *dstString,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          char16_t                 errorChar            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf16(

                          std::u16string          *dstString,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          char16_t                 errorChar            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf8ToUtf16(

                          std::pmr::u16string      *dstString,

                          const bsl::string_view&   srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          char16_t                  errorChar            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

    static int utf8ToUtf16(

                          bsl::u16string           *dstString,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          char16_t                  errorChar            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf16(

                          std::u16string           *dstString,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          char16_t                  errorChar            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf8ToUtf16(

                          std::pmr::u16string      *dstString,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          char16_t                  errorChar            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

#endif


    static int utf8ToUtf16(

                       bsl::vector<unsigned short> *dstVector,

                       const bsl::string_view&      srcString,

                       bsl::size_t                 *numCodePointsWritten = 0,

                       unsigned short               errorWord            = '?',

                       ByteOrder::Enum              byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstVector` the result of converting the

    /// specified UTF-8 `srcString` to its UTF-16 equivalent.  Optionally

    /// specify `numCodePointsWritten`, which (if not 0) indicates the

    /// location of the modifiable variable into which the number of UTF-16

    /// code points (including the null terminator) written is to be loaded.

    /// Optionally specify an `errorWord` to be substituted (if not 0) for

    /// invalid encodings in the input string.  Invalid encodings are

    /// multi-byte encoding parts out of sequence, non-minimal UTF-8

    /// encodings, or code points outside the ranges that UTF-16 can validly

    /// encode (in the range `[ 1 .. 0xd7ff ]` or `[ 0xe000 .. 0x10ffff ]`).

    /// If `errorWord` is 0, invalid input is ignored (i.e., produces no

    /// corresponding output).  Optionally specify `byteOrder` to indicate

    /// the byte order of the UTF-16 output; if `byteOrder` is not

    /// specified, the output is assumed to be in host byte order.  Any

    /// previous contents of the destination are discarded.  Return 0 on

    /// success and `CharConvertStatus::k_INVALID_INPUT_BIT` otherwise.  The

    /// behavior is undefined unless `errorWord` is either 0 or a valid

    /// single-word encoded UTF-16 code point (in the range

    /// `[ 1 .. 0xd7ff ]` or `[ 0xe000 .. 0xffff ]`) and `srcString` is

    /// null-terminated when specified as a `const char *`.  Note that one

    /// code point can occupy multiple 16-bit words.  Also note that the

    /// size of the result vector is always fitted to the null-terminated

    /// result, including the terminating 0.  Also note that if `srcString`

    /// is a `bslstl::StringRef`, it may contain embedded null bytes that

    /// will be translated to null words embedded in the output.

    static int utf8ToUtf16(

                       std::vector<unsigned short> *dstVector,

                       const bsl::string_view&      srcString,

                       bsl::size_t                 *numCodePointsWritten = 0,

                       unsigned short               errorWord            = '?',

                       ByteOrder::Enum              byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf8ToUtf16(

              std::pmr::vector<unsigned short> *dstVector,

              const bsl::string_view&           srcString,

              bsl::size_t                      *numCodePointsWritten = 0,

              unsigned short                    errorWord            = '?',

              ByteOrder::Enum                   byteOrder = ByteOrder::e_HOST);

#endif

    static int utf8ToUtf16(

                       bsl::vector<unsigned short> *dstVector,

                       const char                  *srcString,

                       bsl::size_t                 *numCodePointsWritten = 0,

                       unsigned short               errorWord            = '?',

                       ByteOrder::Enum              byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf16(

                       std::vector<unsigned short> *dstVector,

                       const char                  *srcString,

                       bsl::size_t                 *numCodePointsWritten = 0,

                       unsigned short               errorWord            = '?',

                       ByteOrder::Enum              byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf8ToUtf16(

              std::pmr::vector<unsigned short> *dstVector,

              const char                       *srcString,

              bsl::size_t                      *numCodePointsWritten = 0,

              unsigned short                    errorWord            = '?',

              ByteOrder::Enum                   byteOrder = ByteOrder::e_HOST);

#endif


    static int utf8ToUtf16(

                          unsigned short          *dstBuffer,

                          bsl::size_t              dstCapacity,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          bsl::size_t             *numWordsWritten      = 0,

                          unsigned short           errorWord            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf16(

                          unsigned short           *dstBuffer,

                          bsl::size_t               dstCapacity,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          bsl::size_t              *numWordsWritten      = 0,

                          unsigned short            errorWord            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);


    static int utf8ToUtf16(

                          wchar_t                 *dstBuffer,

                          bsl::size_t              dstCapacity,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          bsl::size_t             *numWordsWritten      = 0,

                          wchar_t                  errorWord            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstBuffer` of the specified `dstCapacity`,

    /// the result of converting the specified UTF-8 `srcString` to its

    /// UTF-16 equivalent.  Optionally specify `numCodePointsWritten`, which

    /// (if not 0) indicates the location of the variable into which the

    /// number of UTF-16 code points (including the null terminator) written

    /// is to be loaded.  Optionally specify `numWordsWritten`, which (if

    /// not 0) indicates the location of the modifiable variable into which

    /// the number of `short` *memory words* written (including the null

    /// terminator) is to be loaded.  Optionally specify an `errorWord` to

    /// be substituted (if not 0) for invalid encodings in the input string.

    /// Invalid encodings are multi-byte encoding parts out of sequence,

    /// non-minimal UTF-8 encodings of code points, or code points outside

    /// the ranges that UTF-16 can validly encode (in the range

    /// `[ 1 .. 0xd7ff ]` or `[ 0xe000 .. 0x10ffff ]`).  If `errorWord` is

    /// 0, invalid input sequences are ignored (i.e., produce no

    /// corresponding output).  Optionally specify `byteOrder` to indicate

    /// the byte order of the UTF-16 output; if `byteOrder` is not

    /// specified, the output is assumed to be in host byte order.  Return 0

    /// on success and a bit-wise or of the bits specified by

    /// `CharConvertStatus::Enum` otherwise to indicate that there were

    /// invalid input sequences or if `dstCapacity` was inadequate to store

    /// the output.  If `dstCapacity > 0` yet `dstCapacity` specifies a

    /// buffer too small to hold the output, the maximal null-terminated

    /// prefix of the properly converted result string is loaded into

    /// `dstBuffer`.  The behavior is undefined unless `dstBuffer` refers to

    /// an array of at least `dstCapacity` elements, `errorWord` is either 0

    /// or a valid single-word encoded UTF-16 code point (in the range

    /// `[ 1 .. 0xd7ff ]` or `[ 0xe000 .. 0xffff ]`), and `srcString` is

    /// null-terminated when supplied as a `const char *`.  Note that if

    /// `dstCapacity` is 0, `*dstBuffer` is not modified and this function

    /// returns a value with `CharConvertStatus::k_OUT_OF_SPACE_BIT` set and

    /// 0 is written into `*numCodePointsWritten` and `*numWordsWritten` (if

    /// those pointers are non-null), since there is insufficient space for

    /// even a null terminator alone.  Also note that one code point can

    /// occupy multiple 16-bit *words*, so that `*numWordsWritten` may be

    /// greater than `*numCodePointsWritten`, and therefore that an input

    /// `srcString` of `dstCapacity` code points may not fit into

    /// `dstBuffer`, however, an input `srcString` of `dstCapacity` bytes

    /// (including null terminator, if present) will always fit (since the

    /// UTF-8 encoding of a code point requires at least as many bytes as

    /// the UTF-16 encoding requires words).  Also note that if `srcString`

    /// is a `bslstl::StringRef`, it may contain embedded null bytes that

    /// will be translated to null words embedded in the output.

    static int utf8ToUtf16(

                          wchar_t                  *dstBuffer,

                          bsl::size_t               dstCapacity,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          bsl::size_t              *numWordsWritten      = 0,

                          wchar_t                   errorWord            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static int utf8ToUtf16(

                          char16_t                *dstBuffer,

                          bsl::size_t              dstCapacity,

                          const bsl::string_view&  srcString,

                          bsl::size_t             *numCodePointsWritten = 0,

                          bsl::size_t             *numWordsWritten      = 0,

                          char16_t                 errorChar            = '?',

                          ByteOrder::Enum          byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstBuffer` of the specified `dstCapacity`,

    /// the result of converting the specified UTF-8 `srcString` to its

    /// UTF-16 equivalent.  Optionally specify `numCodePointsWritten`, which

    /// (if not 0) indicates the location of the variable into which the

    /// number of UTF-16 code points (including the terminating 0) written

    /// is to be loaded.  Optionally specify `numWordsWritten`, which (if

    /// not 0) indicates the location of the modifiable variable into which

    /// the number of `short` *memory words* written (including the null

    /// terminator) is to be loaded.  Optionally specify an `errorWord` to

    /// be substituted (if not 0) for invalid encodings in the input string.

    /// Invalid encodings are multi-byte encoding parts out of sequence,

    /// non-minimal UTF-8 encodings of code points, or code points outside

    /// the ranges that UTF-16 can validly encode (in the range

    /// `[ 1 .. 0xd7ff ]` or `[ 0xde00 .. 0x10ffff ]`).  Optionally specify

    /// `byteOrder` to indicate the byte order of the UTF-16 output; if

    /// `byteOrder` is not specified, the output is assumed to be in host

    /// byte order.  If `errorWord` is 0, invalid input sequences are

    /// ignored (i.e., produce no corresponding output).  Return 0 on

    /// success and a bit-wise or of the bits specified by

    /// `CharConvertStatus::Enum` otherwise to indicate that there were

    /// invalid sequences or if `dstCapacity` was inadequate to store the

    /// output.  If `dstCapacity > 0` yet `dstCapacity` specifies a buffer

    /// too small to hold the output, the maximal null-terminated prefix of

    /// the properly converted result string is loaded into `dstBuffer`.

    /// The behavior is undefined unless `dstBuffer`, if specified, refers

    /// to an array of at least `dstCapacity` elements, `errorWord` is

    /// either 0 or a valid single-word encoded UTF-16 code point (in the

    /// range `[ 1 .. 0xd7ff ]` or `[ 0xe000 .. 0xffff ]`), and `srcString`

    /// is null-terminated if supplied as a `const char *`.  Note that if

    /// `dstCapacity` is 0, `*dstBuffer` is not modified and this function

    /// returns a value with `CharConvertStatus::k_OUT_OF_SPACE_BIT` set and

    /// 0 is written into `*numCodePointsWritten` and `*numWordsWritten` (if

    /// those pointers are non-null), since there is insufficient space for

    /// even a null terminator alone.  Also note that one code point can

    /// occupy multiple 16-bit words, so that `*numWordsWritten` may be

    /// greater than `*numCodePointsWritten`, and therefore that an input

    /// `srcString` of `dstCapacity` code points may not fit into

    /// `dstBuffer`.  However, an input `srcString` of `dstCapacity` bytes

    /// (including terminating 0, if present) will always fit (since the

    /// UTF-8 encoding of a code point requires at least as many bytes as

    /// the UTF-16 encoding requires words).  Also note that if `srcString`

    /// is a `bslstl::StringRef`, it may contain embedded null bytes that

    /// will be translated to null words embedded in the output.

    static int utf8ToUtf16(

                          char16_t                 *dstBuffer,

                          bsl::size_t               dstCapacity,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          bsl::size_t              *numWordsWritten      = 0,

                          char16_t                  errorChar            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#endif


                        // -- UTF-16 to UTF-8 Methods


    /// Return the length needed in bytes, for a buffer to hold the

    /// null-terminated UTF-8 string translated from the specified UTF-16

    /// string `srcBuffer` (including the terminating '\0' in the returned

    /// count).  Optionally specify `endPtr`, referring to one past the last

    /// input character.  If `endPtr` is not supplied, or is 0, treat

    /// `srcBuffer` as 0 terminated.  Optionally specify `byteOrder`

    /// indicating the byte order of `srcBuffer`; if `byteOrder` is not

    /// supplied, the host byte order is used.  Note that this function will

    /// return the size `utf16ToUtf8` will require, assuming the `errorByte`

    /// argument to `utf16ToUtf8` is non-zero.

    static bsl::size_t computeRequiredUtf8Bytes(

                                              const unsigned short *srcBuffer,

                                              const unsigned short *endPtr = 0,

                                              ByteOrder::Enum       byteOrder =

                                                            ByteOrder::e_HOST);

    static bsl::size_t computeRequiredUtf8Bytes(

                                              const wchar_t        *srcBuffer,

                                              const wchar_t        *endPtr = 0,

                                              ByteOrder::Enum       byteOrder =

                                                            ByteOrder::e_HOST);

#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static bsl::size_t computeRequiredUtf8Bytes(

                                              const char16_t       *srcBuffer,

                                              const char16_t       *endPtr = 0,

                                              ByteOrder::Enum       byteOrder =

                                                            ByteOrder::e_HOST);

#endif


    static int utf16ToUtf8(bsl::string          *dstString,

                           const unsigned short *srcString,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstString` the result of converting the

    /// specified UTF-16 `srcString` to its UTF-8 equivalent.  Optionally

    /// specify `numCodePointsWritten`, which (if not 0) indicates the

    /// location of the modifiable variable into which the number of Unicode

    /// code points written, including the null terminator, is to be loaded,

    /// where one code point may occupy multiple bytes.  Optionally specify

    /// an `errorByte` to be substituted (if not 0) for invalid encodings in

    /// the input string.  Invalid encodings are incomplete multi-word

    /// encodings or parts of a two-word encoding out of their proper

    /// sequence.  If `errorByte` is 0, invalid input sequences are ignored

    /// (i.e., produce no corresponding output).  Any previous contents of

    /// the destination are discarded.  Optionally specify `byteOrder` to

    /// indicate the byte order of the UTF-16 input; if `byteOrder` is not

    /// specified, the input is assumed to be in host byte order.  Return 0

    /// on success and `CharConvertStatus::k_INVALID_INPUT_BIT` if one or

    /// more invalid sequences were encountered in the input.  The behavior

    /// is undefined unless `errorByte` is either 0 or a valid single-byte

    /// Unicode code point (`0 < errorByte < 0x80`) and `srcString` is

    /// null-terminated if supplied as a `const wchar_t *`.  Note that if

    /// `srcString` is a `bslstl::StringRefWide`, it may contain embedded 0

    /// words that will be translated to null bytes embedded in the output.

    static int utf16ToUtf8(std::string          *dstString,

                           const unsigned short *srcString,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(

                          std::pmr::string     *dstString,

                          const unsigned short *srcString,

                          bsl::size_t          *numCodePointsWritten = 0,

                          char                  errorByte            = '?',

                          ByteOrder::Enum       byteOrder = ByteOrder::e_HOST);

#endif

    static int utf16ToUtf8(

                          bsl::string          *dstString,

                          const unsigned short *srcString,

                          bsl::size_t           srcLengthInWords,

                          bsl::size_t          *numCodePointsWritten = 0,

                          char                  errorByte            = '?',

                          ByteOrder::Enum       byteOrder = ByteOrder::e_HOST);

    static int utf16ToUtf8(

                          std::string          *dstString,

                          const unsigned short *srcString,

                          bsl::size_t           srcLengthInWords,

                          bsl::size_t          *numCodePointsWritten = 0,

                          char                  errorByte            = '?',

                          ByteOrder::Enum       byteOrder = ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(

                          std::pmr::string     *dstString,

                          const unsigned short *srcString,

                          bsl::size_t           srcLengthInWords,

                          bsl::size_t          *numCodePointsWritten = 0,

                          char                  errorByte            = '?',

                          ByteOrder::Enum       byteOrder = ByteOrder::e_HOST);

#endif

    static int utf16ToUtf8(

                      bsl::string              *dstString,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      std::string              *dstString,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(

                      std::pmr::string         *dstString,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#endif

    static int utf16ToUtf8(bsl::string     *dstString,

                           const wchar_t   *srcString,

                           bsl::size_t     *numCodePointsWritten = 0,

                           char             errorByte            = '?',

                           ByteOrder::Enum  byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(std::string     *dstString,

                           const wchar_t   *srcString,

                           bsl::size_t     *numCodePointsWritten = 0,

                           char             errorByte            = '?',

                           ByteOrder::Enum  byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(std::pmr::string *dstString,

                           const wchar_t    *srcString,

                           bsl::size_t      *numCodePointsWritten = 0,

                           char              errorByte            = '?',

                           ByteOrder::Enum   byteOrder            =

                                                            ByteOrder::e_HOST);

#endif

#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static int utf16ToUtf8(

                      bsl::string                *dstString,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      std::string                *dstString,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(

                      std::pmr::string           *dstString,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

    static int utf16ToUtf8(bsl::string     *dstString,

                           const char16_t  *srcString,

                           bsl::size_t     *numCodePointsWritten = 0,

                           char             errorByte            = '?',

                           ByteOrder::Enum  byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(std::string     *dstString,

                           const char16_t  *srcString,

                           bsl::size_t     *numCodePointsWritten = 0,

                           char             errorByte            = '?',

                           ByteOrder::Enum  byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf16ToUtf8(std::pmr::string *dstString,

                           const char16_t   *srcString,

                           bsl::size_t      *numCodePointsWritten = 0,

                           char              errorByte            = '?',

                           ByteOrder::Enum   byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

#endif


    static int utf16ToUtf8(bsl::vector<char>    *dstVector,

                           const unsigned short *srcString,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstVector` the null-terminated result of

    /// converting the specified UTF-16 `*srcString` to its UTF-8

    /// equivalent.  Optionally specify `srcLengthInWords`, the number of

    /// `unsigned short`s of input.  If `srcLengthInWords` is not specified,

    /// the input must be terminated by a null word.  Optionally specify

    /// `numCodePointsWritten`, which (if not 0) indicates the location of

    /// the modifiable variable into which the number of Unicode code points

    /// written, including the null terminator, is to be loaded, where one

    /// code point may occupy multiple bytes.  Optionally specify an

    /// `errorByte` to be substituted (if not 0) for invalid encodings in

    /// the input string.  Invalid encodings are incomplete multi-word

    /// encodings or parts of a two-word encoding out of their proper

    /// sequence.  If `errorByte` is 0, invalid input sequences are ignored

    /// (i.e., produce no corresponding output).  Optionally specify

    /// `byteOrder` to indicate the byte order of the UTF-16 input; if

    /// `byteOrder` is not specified, the input is assumed to be in host

    /// byte order.  Any previous contents of the destination are discarded.

    /// Return 0 on success and `CharConvertStatus::k_INVALID_INPUT_BIT` if

    /// one or more invalid sequences were encountered in the input.  The

    /// behavior is undefined unless either `srcLengthInWords` is passed or

    /// `srcString` is null-terminated, and `errorByte` is either 0 or a

    /// valid single-byte Unicode code point (`0 < errorByte < 0x80`).

    static int utf16ToUtf8(std::vector<char>    *dstVector,

                           const unsigned short *srcString,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(std::pmr::vector<char> *dstVector,

                           const unsigned short   *srcString,

                           bsl::size_t            *numCodePointsWritten = 0,

                           char                    errorByte            = '?',

                           ByteOrder::Enum         byteOrder            =

                                                            ByteOrder::e_HOST);

#endif

    static int utf16ToUtf8(bsl::vector<char>    *dstVector,

                           const unsigned short *srcString,

                           bsl::size_t           srcLengthInWords,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(std::vector<char>    *dstVector,

                           const unsigned short *srcString,

                           bsl::size_t           srcLengthInWords,

                           bsl::size_t          *numCodePointsWritten = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(std::pmr::vector<char> *dstVector,

                           const unsigned short   *srcString,

                           bsl::size_t             srcLengthInWords,

                           bsl::size_t            *numCodePointsWritten = 0,

                           char                    errorByte            = '?',

                           ByteOrder::Enum         byteOrder            =

                                                            ByteOrder::e_HOST);

#endif


    static int utf16ToUtf8(

                      bsl::vector<char>        *dstVector,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      std::vector<char>        *dstVector,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(

                      std::pmr::vector<char>   *dstVector,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

#endif

    static int utf16ToUtf8(

                      bsl::vector<char>            *dstVector,

                      const wchar_t                *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      std::vector<char>            *dstVector,

                      const wchar_t                *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(

                      std::pmr::vector<char>       *dstVector,

                      const wchar_t                *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

#endif


#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static int utf16ToUtf8(

                      bsl::vector<char>          *dstVector,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstVector` the null-terminated result of

    /// converting the specified UTF-16 `srcString` to its UTF-8 equivalent.

    /// Optionally specify `numCodePointsWritten`, which (if not 0)

    /// indicates the location of the modifiable variable into which the

    /// number of Unicode code points written, including the null

    /// terminator, is to be loaded, where one code point may occupy

    /// multiple bytes.  Optionally specify an `errorByte` to be substituted

    /// (if not 0) for invalid encodings in the input string.  Invalid

    /// encodings are incomplete multi-word encodings or parts of a two-word

    /// encoding out of their proper sequence.  If `errorByte` is 0, invalid

    /// input sequences are ignored (i.e., produce no corresponding output).

    /// Optionally specify `byteOrder` to indicate the byte order of the

    /// UTF-16 input; if `byteOrder` is not specified, the input is assumed

    /// to be in host byte order.  Any previous contents of the destination

    /// are discarded.  Return 0 on success and

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` if one or more invalid

    /// sequences were encountered in the input.  The behavior is undefined

    /// unless `errorByte` is either 0 or a valid single-byte Unicode code

    /// point (`0 < errorByte < 0x80`) and `srcString` is null-terminated if

    /// supplied as a `const wchar_t *`.  Note that if `srcString` is a

    /// `bslstl::StringRef`, it may contain embedded 0 words that will be

    /// translated to null bytes embedded in the output.

    static int utf16ToUtf8(

                      std::vector<char>          *dstVector,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(

                      std::pmr::vector<char>     *dstVector,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

    static int utf16ToUtf8(

                      bsl::vector<char>            *dstVector,

                      const char16_t               *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      std::vector<char>            *dstVector,

                      const char16_t               *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

# ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf16ToUtf8(

                      std::pmr::vector<char>       *dstVector,

                      const char16_t               *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

# endif

#endif


    static int utf16ToUtf8(char                 *dstBuffer,

                           bsl::size_t           dstCapacity,

                           const unsigned short *srcString,

                           bsl::size_t          *numCodePointsWritten = 0,

                           bsl::size_t          *numBytesWritten      = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(char                 *dstBuffer,

                           bsl::size_t           dstCapacity,

                           const unsigned short *srcString,

                           bsl::size_t           srcLengthInWords,

                           bsl::size_t          *numCodePointsWritten = 0,

                           bsl::size_t          *numBytesWritten      = 0,

                           char                  errorByte            = '?',

                           ByteOrder::Enum       byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      char                     *dstBuffer,

                      bsl::size_t               dstCapacity,

                      const bsl::wstring_view&  srcString,

                      bsl::size_t              *numCodePointsWritten = 0,

                      bsl::size_t              *numBytesWritten      = 0,

                      char                      errorByte            = '?',

                      ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load, into the specified `dstBuffer` of the specified `dstCapacity`,

    /// the result of converting the specified UTF-16 `srcString` to its

    /// UTF-8 equivalent.  Optionally specify `numCodePointsWritten`, which

    /// (if not 0) indicates the location of the modifiable variable into

    /// which the number of Unicode code points (including the terminating

    /// 0, if any) written is to be loaded, where one code point can occupy

    /// multiple bytes.  Optionally specify `numBytesWritten`, which (if not

    /// 0) indicates the location of the modifiable variable into which the

    /// number of bytes written (including the null terminator, if any) is

    /// to be loaded.  Optionally specify an `errorByte` to be substituted

    /// (if not 0) for invalid encodings in the input string.  Invalid

    /// encodings are incomplete multi-word encodings or parts of a two-word

    /// encoding out of their proper sequence.  If `errorByte` is 0, invalid

    /// input sequences are ignored (i.e., produce no corresponding output).

    /// Optionally specify `byteOrder` to indicate the byte order of the

    /// UTF-16 input; if `byteOrder` is not specified, the input is assumed

    /// to be in host byte order.  Return 0 on success and a bitwise-or of

    /// the flags defined by `CharConvertStatus::Enum` otherwise.

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` will be set if one or more

    /// invalid sequences were encountered in the input, and

    /// `CharConvertStatus::k_OUT_OF_SPACE_BIT` will be set if the output

    /// space was exhausted before conversion was complete.  The behavior is

    /// undefined unless `dstBuffer` refers to an array of at least

    /// `dstCapacity` elements, `errorByte` is either 0 or a valid

    /// single-byte Unicode code point (`0 < errorByte < 0x80`), and

    /// `srcString` is null-terminated if supplied as a pointer.  Note that

    /// if `dstCapacity` is 0, this function returns

    /// `CharConvertStatus::k_OUT_OF_SPACE_BIT` set and 0 is written into

    /// `*numCodePointsWritten` and `*numBytesWritten` (if those pointers

    /// are non-null), since there is insufficient space for even a null

    /// terminator alone.  Also note that since UTF-8 is a variable-length

    /// encoding, `numBytesWritten` may be up to four times

    /// `numCodePointsWritten`, and therefore that an input `srcString` of

    /// `dstCapacity` code points (including the terminating 0, if present)

    /// may not fit into `dstBuffer`.  A one-word (two-byte) UTF-16 code

    /// point will require one to three UTF-8 octets (bytes); a two-word

    /// (four-byte) UTF-16 code point will always require four UTF-8 octets.

    /// Also note that the amount of room needed will vary with the contents

    /// of the data and the language being translated, but never will the

    /// number of bytes output exceed three times the number of words input.

    /// Also note that, if `dstCapacity > 0`, then, after completion,

    /// `strlen(dstBuffer) + 1 == *numBytesWritten`.  Also note that if

    /// `srcString` is a `bslstl::StringRef`, it may contain embedded 0

    /// words that will be translated to null bytes embedded in the output.

    static int utf16ToUtf8(

                      char                         *dstBuffer,

                      bsl::size_t                   dstCapacity,

                      const wchar_t                *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      bsl::size_t                  *numBytesWritten      = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

#if defined(BSLS_COMPILERFEATURES_SUPPORT_UNICODE_CHAR_TYPES)

    static int utf16ToUtf8(

                      char                       *dstBuffer,

                      bsl::size_t                 dstCapacity,

                      const bsl::u16string_view&  srcString,

                      bsl::size_t                *numCodePointsWritten = 0,

                      bsl::size_t                *numBytesWritten      = 0,

                      char                        errorByte            = '?',

                      ByteOrder::Enum             byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf16ToUtf8(

                      char                         *dstBuffer,

                      bsl::size_t                   dstCapacity,

                      const char16_t               *srcString,

                      bsl::size_t                  *numCodePointsWritten = 0,

                      bsl::size_t                  *numBytesWritten      = 0,

                      char                          errorByte            = '?',

                      ByteOrder::Enum               byteOrder            =

                                                            ByteOrder::e_HOST);

#endif

};


}  // close package namespace


#endif


// ----------------------------------------------------------------------------

// Copyright 2015 Bloomberg Finance L.P.

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

// ----------------------------- END-OF-FILE ----------------------------------


/** @} */

/** @} */

/** @} */

bdlde_byteorder.h

bdlde_charconvertstatus.h

bsls_ident.h

bsls_libraryfeatures.h

bsl::basic_string_view
Definition bslstl_stringview.h:441

bsl::basic_string
Definition bslstl_string.h:1281

bsl::vector
Definition bslstl_vector.h:1025

BSLS_IDENT
#define BSLS_IDENT(str)
Definition bsls_ident.h:195

bdlde
Definition bdlde_base64alphabet.h:118

bdlde::ByteOrder::Enum
Enum
Definition bdlde_byteorder.h:134

bdlde::ByteOrder::e_HOST
@ e_HOST
Definition bdlde_byteorder.h:144

bdlde::CharConvertUtf16
Definition bdlde_charconvertutf16.h:354

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(wchar_t *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::computeRequiredUtf8Bytes
static bsl::size_t computeRequiredUtf8Bytes(const wchar_t *srcBuffer, const wchar_t *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(std::wstring *dstString, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::computeRequiredUtf8Bytes
static bsl::size_t computeRequiredUtf8Bytes(const unsigned short *srcBuffer, const unsigned short *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(bsl::wstring *dstString, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::computeRequiredUtf16Words
static bsl::size_t computeRequiredUtf16Words(const char *srcBuffer, const char *endPtr=0)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(wchar_t *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(bsl::wstring *dstString, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(std::wstring *dstString, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(std::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(std::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(unsigned short *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(unsigned short *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(bsl::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(std::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(bsl::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf16ToUtf8
static int utf16ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf16::utf8ToUtf16
static int utf8ToUtf16(bsl::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)