doxygen/bde_api_prod/bdlde__charconvertutf32_8h_source.html

/// @file bdlde_charconvertutf32.h

///

/// The content of this file has been pre-processed for Doxygen.

///


// bdlde_charconvertutf32.h                                           -*-C++-*-

#ifndef INCLUDED_BDLDE_CHARCONVERTUTF32

#define INCLUDED_BDLDE_CHARCONVERTUTF32


#include <bsls_ident.h>

BSLS_IDENT("$Id: $")


/// @defgroup bdlde_charconvertutf32 bdlde_charconvertutf32

/// @brief  Provide fast, safe conversion between UTF-8 encoding and UTF-32.

/// @addtogroup bdl

/// @{

/// @addtogroup bdlde

/// @{

/// @addtogroup bdlde_charconvertutf32

/// @{

///

/// <h1> Outline </h1>

/// * <a href="#bdlde_charconvertutf32-purpose"> Purpose</a>

/// * <a href="#bdlde_charconvertutf32-classes"> Classes </a>

/// * <a href="#bdlde_charconvertutf32-description"> Description </a>

///   * <a href="#bdlde_charconvertutf32-history-and-motivation"> History and Motivation </a>

///   * <a href="#bdlde_charconvertutf32-usage"> Usage </a>

///     * <a href="#bdlde_charconvertutf32-example-1-round-trip-multi-lingual-conversion"> Example 1: Round-Trip Multi-Lingual Conversion </a>

///

/// # Purpose {#bdlde_charconvertutf32-purpose}

/// Provide fast, safe conversion between UTF-8 encoding and UTF-32.

///

/// # Classes {#bdlde_charconvertutf32-classes}

///

/// -  bdlde::CharConvertUtf32: namespace for conversion between UTF-8 and UTF-32

///

/// # Description {#bdlde_charconvertutf32-description}

/// This component provides a `struct`, `bdlde::CharConvertUtf32`,

/// that provides a suite of static functions supporting the *fast* conversion

/// of UTF-8 data to UTF-32, and vice versa.  UTF-8 input can take the form of

/// null-terminated "C" strings or `bsl::string_view`s, while UTF-32 input can

/// only take the form of null-terminated buffers of `unsigned int`.  Output can

/// be to STL vectors, `bsl::string`s (in the case of UTF-8), and fixed-length

/// buffers.  Invalid byte sequences and code points forbidden by either

/// encoding are removed and (optionally) replaced by an error byte or word

/// provided by the caller.  The byte order of the UTF-32 input or output can be

/// specified via the optional `byteOrder` argument, which is assumed to be host

/// byte order if not specified.  The byte or word count and code point count

/// that are optionally returned through pointer arguments include the

/// terminating null byte or word.

///

/// ## History and Motivation {#bdlde_charconvertutf32-history-and-motivation}

///

///

/// UTF-8 is a Unicode encoding that allows 32-bit Unicode to be represented

/// using null-terminated (8-bit) byte strings, while allowing "standard ASCII"

/// strings to be used "as-is".  Note that UTF-8 is described in detail in RFC

/// 3629 (http://www.ietf.org/rfc/rfc3629.txt).

///

/// UTF-32 is simply a name for storing raw Unicode values as sequential

/// `unsigned int` values in memory.

///

/// Valid Unicode values are in the ranges `[ 1 .. 0xd7ff ]` and

/// `[ 0xe000 .. 0x10ffff ]`.  The value `0` is used to terminate sequences.

///

/// The functions here that translate to fixed buffers make a single pass

/// through the data.  The functions that translate to `bsl::string`s and

/// `bsl::vector`s, however, like the `glib` conversion routines, make two

/// passes: a size estimation pass, after which the output container is sized

/// appropriately, and then the translation pass.

///

/// The methods that output to a `vector` or `string` will all grow the output

/// object as necessary to fit the data, and in the end will exactly resize the

/// object to the output (including the terminating 0 for `vector`, not

/// including it for `string`).  The resizing will not affect the capacity.

///

/// Non-minimal UTF-8 encodings of code points are reported as errors.  Octets

/// and post-conversion code points in the forbidden ranges are treated as

/// errors and removed if 0 is specified as `errorWord`, or replaced with

/// `errorWord` otherwise.

///

/// ## Usage {#bdlde_charconvertutf32-usage}

///

///

/// This section illustrates intended use of this component.

///

/// ### Example 1: Round-Trip Multi-Lingual Conversion {#bdlde_charconvertutf32-example-1-round-trip-multi-lingual-conversion}

///

///

/// The following snippets of code illustrate a typical use of the

/// `bdlde::CharConvertUtf32` struct's utility functions, first converting from

/// UTF-8 to UTF-32, and then converting back to make sure the round trip

/// returns the same value.

///

/// First, we declare a string of UTF-8 containing single-, double-, triple-,

/// and quadruple-octet code points:

/// @code

/// const char utf8MultiLang[] = {

///     "Hello"                                         // -- ASCII

///     "\xce\x97"         "\xce\x95"       "\xce\xbb"  // -- Greek

///     "\xe4\xb8\xad"     "\xe5\x8d\x8e"               // -- Chinese

///     "\xe0\xa4\xad"     "\xe0\xa4\xbe"               // -- Hindi

///     "\xf2\x94\xb4\xa5" "\xf3\xb8\xac\x83" };        // -- Quad octets

/// @endcode

/// Then, we declare an `enum` summarizing the counts of code points in the

/// string and verify that the counts add up to the length of the string:

/// @code

/// enum { NUM_ASCII_CODE_POINTS   = 5,

///        NUM_GREEK_CODE_POINTS   = 3,

///        NUM_CHINESE_CODE_POINTS = 2,

///        NUM_HINDI_CODE_POINTS   = 2,

///        NUM_QUAD_CODE_POINTS    = 2 };

///

/// assert(1 * NUM_ASCII_CODE_POINTS +

///        2 * NUM_GREEK_CODE_POINTS +

///        3 * NUM_CHINESE_CODE_POINTS +

///        3 * NUM_HINDI_CODE_POINTS +

///        4 * NUM_QUAD_CODE_POINTS == bsl::strlen(utf8MultiLang));

/// @endcode

/// Next, we declare the vector where our UTF-32 output will go, and a variable

/// into which the number of code points written will be stored.  It is not

/// necessary to create a `utf32CodePointsWritten` variable, since the number of

/// code points will be the size of the vector when we are done.

/// @code

/// bsl::vector<unsigned int> v32;

/// @endcode

/// Note that it is a waste of time to `v32.reserve(sizeof(utf8MultiLang))`; it

/// is entirely redundant -- `v32` will automatically be grown to the correct

/// size.  Also note that if `v32` were not empty, that would not be a problem

/// -- any contents will be discarded.

///

/// Then, we do the translation to `UTF-32`:

/// @code

/// int retVal = bdlde::CharConvertUtf32::utf8ToUtf32(&v32,

///                                                   utf8MultiLang);

///

/// assert(0 == retVal);        // verify success

/// assert(0 == v32.back());    // verify null terminated

/// @endcode

/// Next, we verify that the number of code points that was returned is correct.

/// Note that in UTF-32, the number of Unicode code points written is the same

/// as the number of 32-bit words written:

/// @code

/// enum { EXPECTED_CODE_POINTS_WRITTEN =

///                 NUM_ASCII_CODE_POINTS +

///                 NUM_GREEK_CODE_POINTS +

///                 NUM_CHINESE_CODE_POINTS +

///                 NUM_HINDI_CODE_POINTS +

///                 NUM_QUAD_CODE_POINTS  + 1 };

/// assert(EXPECTED_CODE_POINTS_WRITTEN == v32.size());

/// @endcode

/// Next, we calculate and confirm the difference between the number of UTF-32

/// words output and the number of bytes input.  The ASCII bytes will take 1

/// 32-bit word apiece, the Greek code points are double octets that will become

/// single `unsigned int` values, the Chinese code points are encoded as UTF-8

/// triple octets that will turn into single 32-bit words, the same for the

/// Hindi code points, and the quad code points are quadruple octets that will

/// turn into single `unsigned int` words:

/// @code

/// enum { SHRINKAGE =

///                   NUM_ASCII_CODE_POINTS   * (1-1) +

///                   NUM_GREEK_CODE_POINTS   * (2-1) +

///                   NUM_CHINESE_CODE_POINTS * (3-1) +

///                   NUM_HINDI_CODE_POINTS   * (3-1) +

///                   NUM_QUAD_CODE_POINTS    * (4-1) };

///

/// assert(v32.size() == sizeof(utf8MultiLang) - SHRINKAGE);

/// @endcode

/// Then, we go on to do the reverse `utf32ToUtf8` transform to turn it back

/// into UTF-8, and we should get a result identical to our original input.

/// Declare a `bsl::string` for our output, and a variable to count the number

/// of code points translated:

/// @code

/// bsl::string s;

/// bsl::size_t codePointsWritten;

/// @endcode

/// Again, note that it would be a waste of time for the caller to `resize` or

/// `reserve` `v32`; it will be automatically `resize`d by the translator to the

/// right length.

///

/// Now, we do the reverse transform:

/// @code

/// retVal = bdlde::CharConvertUtf32::utf32ToUtf8(&s,

///                                               v32.begin(),

///                                               &codePointsWritten);

/// @endcode

/// Finally, we verify that a successful status was returned, that the output of

/// the reverse transform was identical to the original input, and that the

/// number of code points translated was as expected:

/// @code

/// assert(0 == retVal);

/// assert(utf8MultiLang  == s);

/// assert(s.length() + 1 == sizeof(utf8MultiLang));

///

/// assert(EXPECTED_CODE_POINTS_WRITTEN == codePointsWritten);

/// assert(v32.size()                   == codePointsWritten);

/// @endcode

/// @}

/** @} */

/** @} */


/** @addtogroup bdl

 * @{

 */

/** @addtogroup bdlde

 * @{

 */

/** @addtogroup bdlde_charconvertutf32

 * @{

 */


#include <bdlscm_version.h>


#include <bdlde_byteorder.h>

#include <bdlde_charconvertstatus.h>


#include <bsl_cstddef.h>            // 'bsl::size_t'

#include <bsl_string.h>

#include <bsl_string_view.h>

#include <bsl_vector.h>


#include <bsls_libraryfeatures.h>


#include <string>                   // 'std::string', 'std::pmr::string'

#include <vector>                   // 'std::vector', 'std::pmr::vector'


namespace bdlde {

                          // =======================

                          // struct CharConvertUtf32

                          // =======================


/// This `struct` provides a namespace for a suite of static functions to

/// convert buffers between UTF-8 and UTF-32.  Byte Order Mark (BOM) code

/// points are neither generated nor recognized as special, and thus may be

/// incorrect for the actual byte order of output.  If a BOM is present in

/// the input, it will be translated, whether correct (`0xfeff`) or

/// incorrect (`0xfffe`), into the output without any special handling.


struct CharConvertUtf32 {

    // CLASS METHODS


                              // UTF-8 to UTF-32 Methods


    static int utf8ToUtf32(bsl::vector<unsigned int> *dstVector,

                           const char                *srcString,

                           unsigned int               errorWord = '?',

                           ByteOrder::Enum            byteOrder =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstVector` the result of converting the

    /// specified UTF-8 `srcString` to its UTF-32 equivalent.  Optionally

    /// specify `errorWord` to be substituted, if not 0, for invalid

    /// encodings in the input string.  Optionally specify `byteOrder` to

    /// indicate the byte order of the UTF-32 output; if `byteOrder` is not

    /// specified, the output is assumed to be in host byte order.  Return 0

    /// on success and `CharConvertStatus::k_INVALID_INPUT_BIT` otherwise.

    /// Invalid encodings are multi-byte encoding parts out of sequence,

    /// non-minimal UTF-8 encodings, UTF-8 encodings more than four bytes in

    /// length, or code points outside the ranges that UTF-32 can validly

    /// encode (i.e., `[ 1 .. 0xd7ff ]` and `[ 0xe000 .. 0x10ffff ]`).  If

    /// `errorWord` is 0, invalid input sequences are ignored (i.e., produce

    /// no corresponding output).  Any previous contents of the destination

    /// are discarded.  The behavior is undefined unless `srcString` is

    /// null-terminated when specified as a `const char *`, and unless

    /// `errorWord` is either 0 or a valid Unicode code point.  Note that

    /// one code point always occupies one 32-bit *ord of output; there is

    /// no `numCodePointsWritten` argument since, after the call,

    /// `dstVector->size()` will equal the number of code points written.

    /// Also note that when the input is a `bsl::string_view`, it may

    /// contain embedded nulls, which are translated to zeroes in the

    /// output.  Also note that `errorWord` is assumed to be in host byte

    /// order.

    static int utf8ToUtf32(std::vector<unsigned int> *dstVector,

                           const char                *srcString,

                           unsigned int               errorWord = '?',

                           ByteOrder::Enum            byteOrder =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf8ToUtf32(

                std::pmr::vector<unsigned int> *dstVector,

                const char                     *srcString,

                unsigned int                    errorWord = '?',

                ByteOrder::Enum                 byteOrder = ByteOrder::e_HOST);

#endif

    static int utf8ToUtf32(bsl::vector<unsigned int> *dstVector,

                           const bsl::string_view&    srcString,

                           unsigned int               errorWord = '?',

                           ByteOrder::Enum            byteOrder =

                                                            ByteOrder::e_HOST);

    static int utf8ToUtf32(std::vector<unsigned int> *dstVector,

                           const bsl::string_view&    srcString,

                           unsigned int               errorWord = '?',

                           ByteOrder::Enum            byteOrder =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf8ToUtf32(

                std::pmr::vector<unsigned int> *dstVector,

                const bsl::string_view&         srcString,

                unsigned int                    errorWord = '?',

                ByteOrder::Enum                 byteOrder = ByteOrder::e_HOST);

#endif


    static int utf8ToUtf32(

                          unsigned int             *dstBuffer,

                          bsl::size_t               dstCapacity,

                          const char               *srcString,

                          bsl::size_t              *numCodePointsWritten = 0,

                          unsigned int              errorWord            = '?',

                          ByteOrder::Enum           byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstBuffer` of the specified `dstCapacity`,

    /// the result of converting the specified UTF-8 `srcString` to its

    /// UTF-32 equivalent.  Optionally specify `numCodePointsWritten`, which

    /// (if not 0) indicates the location of the variable into which the

    /// number of Unicode code points (including the null terminator)

    /// written is to be loaded.  Optionally specify `errorWord` to be

    /// substituted (if not 0) for invalid encodings in the input string.

    /// Invalid encodings are multi-byte encoding parts out of sequence,

    /// non-minimal UTF-8 encodings, UTF-8 encodings more than four bytes in

    /// length, or code points outside the ranges that UTF-32 can validly

    /// encode (i.e., `[ 1 .. 0xd7ff ]` and `[ 0xe000 .. 0x10ffff ]`).  If

    /// `errorWord` is 0, invalid input code points are ignored (i.e.,

    /// produce no corresponding output).  Optionally specify `byteOrder` to

    /// indicate the byte order of the UTF-32 output; if `byteOrder` is not

    /// specified, the output is assumed to be in host byte order.  Return 0

    /// on success and a bit-wise OR of the masks defined by

    /// `CharConvertStatus::Enum` otherwise, where

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` will be set if one or more

    /// invalid sequences were encountered in the input, and

    /// `CharConvertStatus::k_OUT_OF_SPACE_BIT` will be set if the

    /// output space was exhausted before conversion was complete.  If

    /// `dstCapacity > 0` yet `dstCapacity` specifies a buffer too small to

    /// hold the output, the maximal null-terminated prefix of the properly

    /// converted result string is loaded into `dstBuffer`.  The behavior is

    /// undefined unless `dstBuffer` refers to an array of at least

    /// `dstCapacity` elements, `srcString`, if specified as a

    /// `const char *`, is null-terminated, and `errorWord` is either 0 or a

    /// valid UTF-32 code point (in the range `[ 1 .. 0xd7ff ]` or

    /// `[ 0xe000 .. 0x10ffff ]`).  Note that if `dstCapacity` is 0,

    /// `*dstBuffer` is not modified and this function returns a value with

    /// `CharConvertStatus::k_OUT_OF_SPACE_BIT` set and 0 is written

    /// into `*numCodePointsWritten` (if that pointer is not 0), since there

    /// is insufficient space for even a null terminator alone.  Also note

    /// that one Unicode code point always occupies one 32-bit *word* in

    /// UTF-32, but may occupy more than one *byte* of UTF-8, so that

    /// `*numCodePointsWritten` equals the number of *words* written.  Also

    /// note that `errorWord` is assumed to be in host byte order.

    static int utf8ToUtf32(

                       unsigned int            *dstBuffer,

                       bsl::size_t              dstCapacity,

                       const bsl::string_view&  srcString,

                       bsl::size_t             *numCodePointsWritten = 0,

                       unsigned int             errorWord            = '?',

                       ByteOrder::Enum          byteOrder = ByteOrder::e_HOST);


                           // UTF-32 to UTF-8 Methods


    static int utf32ToUtf8(bsl::string           *dstString,

                           const unsigned int    *srcString,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstString` the result of converting the

    /// specified `srcString` of `UTF-32` values to `UTF-8` and return 0 on

    /// success or `CharConvertStatus::k_INVALID_INPUT_BIT` if invalid

    /// `UTF-32` values (in the range `[0xD800 .. 0xDFFF]` or above

    /// 0x10FFFF) are encountered.  Optionally specify `srcStringlength` as

    /// the number of `UTF-32` values to be converted.  If `srcStringLength`

    /// is specified, convert that many UTF-32 values from `srcString`

    /// (including zero values), otherwise convert values up to but not

    /// including a terminating zero value.  Optionally specify

    /// `numCodePointsWritten` to receive the number of `UTF-8` code points

    /// written to `dstString`, including the null-terminator.  Optionally

    /// specify `errorByte` as the character to be written to `dstString` as

    /// the translation of invalid `UTF-32` values; if not specified, `?` is

    /// used, and if given as 0, no character is written at all.  Optionally

    /// specify `byteOrder` to determine how `UTF-32` values in `srcString`

    /// are interpreted; if not given, host byte order is used.  The

    /// behavior is undefined if `errorByte` is 0x80 or above.  Note that if

    /// you are passing the `bsl::vector<unsigned int>` obtained from a call

    /// to `utf8ToUtf32` and using `srcStringLength`, you must take care to

    /// pass `vector.size() - 1` to `srcStringLength` to avoid embedding the

    /// terminating 0.

    static int utf32ToUtf8(std::string           *dstString,

                           const unsigned int    *srcString,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf32ToUtf8(std::pmr::string   *dstString,

                           const unsigned int *srcString,

                           bsl::size_t        *numCodePointsWritten = 0,

                           unsigned char       errorByte            = '?',

                           ByteOrder::Enum     byteOrder = ByteOrder::e_HOST);

#endif

    static int utf32ToUtf8(bsl::string           *dstString,

                           const unsigned int    *srcString,

                           bsl::size_t            srcStringLength,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf32ToUtf8(std::string           *dstString,

                           const unsigned int    *srcString,

                           bsl::size_t            srcStringLength,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING

    static int utf32ToUtf8(std::pmr::string   *dstString,

                           const unsigned int *srcString,

                           bsl::size_t         srcStringLength,

                           bsl::size_t        *numCodePointsWritten = 0,

                           unsigned char       errorByte            = '?',

                           ByteOrder::Enum     byteOrder = ByteOrder::e_HOST);

#endif


    static int utf32ToUtf8(bsl::vector<char>     *dstVector,

                           const unsigned int    *srcString,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Load into the specified `dstVector` the result of converting the

    /// specified `srcString` of `UTF-32` values to `UTF-8`, always followed by

    /// a null character, and return 0 on success or

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` if invalid `UTF-32` values (in

    /// the range `[0xD800 .. 0xDFFF]` or above 0x10FFFF) are seen.  Optionally

    /// specify `srcStringlength` as the number of `UTF-32` values to be

    /// converted.  If `srcStringLength` is specified, convert that many UTF-32

    /// values from `srcString` (including zero values), otherwise convert

    /// values up to but not including a terminating zero value.  Optionally

    /// specify `numCodePointsWritten` to receive the number of `UTF-8` code

    /// points written to `dstVector`.  Optionally specify `errorByte` as the

    /// character to be written to `dstVector` as the translation of invalid

    /// `UTF-32` values; if not specified, `?` is used, and if given as 0, no

    /// character is written at all.  Optionally specify `byteOrder` to

    /// determine how `UTF-32` values in `srcString` are interpreted; if not

    /// given, host byte order is used.  The behavior is undefined if

    /// `errorByte` is 0x80 or above.  Note that if you are passing the

    /// `bsl::vector<unsigned int>` obtained from a call to `utf8ToUtf32` and

    /// using `srcStringLength`, you must take care to pass `vector.size() - 1`

    /// to `srcStringLength` to avoid embedding the terminating 0.

    static int utf32ToUtf8(std::vector<char>     *dstVector,

                           const unsigned int    *srcString,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf32ToUtf8(

                        std::pmr::vector<char> *dstVector,

                        const unsigned int     *srcString,

                        bsl::size_t            *numCodePointsWritten = 0,

                        unsigned char           errorByte            = '?',

                        ByteOrder::Enum         byteOrder = ByteOrder::e_HOST);

#endif

    static int utf32ToUtf8(bsl::vector<char>     *dstVector,

                           const unsigned int    *srcString,

                           bsl::size_t            srcStringLength,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

    static int utf32ToUtf8(std::vector<char>     *dstVector,

                           const unsigned int    *srcString,

                           bsl::size_t            srcStringLength,

                           bsl::size_t           *numCodePointsWritten = 0,

                           unsigned char          errorByte            = '?',

                           ByteOrder::Enum        byteOrder            =

                                                            ByteOrder::e_HOST);

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR

    static int utf32ToUtf8(

                        std::pmr::vector<char> *dstVector,

                        const unsigned int     *srcString,

                        bsl::size_t             srcStringLength,

                        bsl::size_t            *numCodePointsWritten = 0,

                        unsigned char           errorByte            = '?',

                        ByteOrder::Enum         byteOrder = ByteOrder::e_HOST);

#endif


    static int utf32ToUtf8(char               *dstBuffer,

                           bsl::size_t         dstCapacity,

                           const unsigned int *srcString,

                           bsl::size_t        *numCodePointsWritten = 0,

                           bsl::size_t        *numBytesWritten      = 0,

                           unsigned char       errorByte            = '?',

                           ByteOrder::Enum     byteOrder            =

                                                            ByteOrder::e_HOST);

    /// Unless `dstCapacity == 0`, load into the specified `dstBuffer` all

    /// or as many complete `UTF-8` sequences converted from the specified

    /// `srcString` of UTF-32 as will fit, along with an always-present

    /// terminating null byte, into the specified `dstCapacity` bytes, and

    /// return 0 on success or a bit-wise OR of

    /// `CharConvertStatus::k_INVALID_INPUT_BIT` if invalid `UTF-32` values

    /// (in the range `[0xD800 .. 0xDFFF]` or above 0x10FFFF) are seen and

    /// `CharConvertStatus::k_OUT_OF_SPACE_BIT` if there is insufficient

    /// room for the entire result to be written.  If `dstCapacity == 0`

    /// return `CharConvertStatus::k_INVALID_OUT_OF_SPACE_BIT` without

    /// modifying `dstBuffer`.  Optionally specify `srcStringlength` as the

    /// number of `UTF-32` values to be converted.  If `srcStringLength` is

    /// specified, convert that many UTF-32 values from `srcString`

    /// (including zero values), otherwise convert values up to but not

    /// including a terminating zero value.  Optionally specify

    /// `numCodePointsWritten` to receive the number of `UTF-8` code points

    /// written to `dstBuffer`.  Optionally specify `numBytesWritten` to

    /// receive the number of bytes written to `dstBuffer`.  Optionally

    /// specify `errorByte` as the character to be written to `dstBuffer` as

    /// the translation of invalid `UTF-32` values; if not specified, `?` is

    /// used, and if given as 0, no character is written at all.  Optionally

    /// specify `byteOrder` to determine how `UTF-32` values in `srcString`

    /// are interpreted; if not given, host byte order is used.  The

    /// behavior is undefined if `errorByte` is 0x80 or above.  Note that if

    /// you are passing the `bsl::vector<unsigned int>` obtained from a call

    /// to `utf8ToUtf32` and using `srcStringLength`, you must take care to

    /// pass `vector.size() - 1` to `srcStringLength` to avoid embedding the

    /// terminating 0.

    static int utf32ToUtf8(char               *dstBuffer,

                           bsl::size_t         dstCapacity,

                           const unsigned int *srcString,

                           bsl::size_t         srcStringLength,

                           bsl::size_t        *numCodePointsWritten = 0,

                           bsl::size_t        *numBytesWritten      = 0,

                           unsigned char       errorByte            = '?',

                           ByteOrder::Enum     byteOrder            =

                                                            ByteOrder::e_HOST);

};


}  // close package namespace


#endif


// ----------------------------------------------------------------------------

// Copyright 2015 Bloomberg Finance L.P.

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

// ----------------------------- END-OF-FILE ----------------------------------


/** @} */

/** @} */

/** @} */

bdlde_byteorder.h

bdlde_charconvertstatus.h

bsls_ident.h

bsls_libraryfeatures.h

bsl::basic_string_view
Definition bslstl_stringview.h:441

bsl::basic_string
Definition bslstl_string.h:1281

bsl::vector
Definition bslstl_vector.h:1025

BSLS_IDENT
#define BSLS_IDENT(str)
Definition bsls_ident.h:195

bdlde
Definition bdlde_base64alphabet.h:118

bdlde::ByteOrder::Enum
Enum
Definition bdlde_byteorder.h:134

bdlde::ByteOrder::e_HOST
@ e_HOST
Definition bdlde_byteorder.h:144

bdlde::CharConvertUtf32
Definition bdlde_charconvertutf32.h:241

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(bsl::vector< unsigned int > *dstVector, const char *srcString, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(std::vector< char > *dstVector, const unsigned int *srcString, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(std::string *dstString, const unsigned int *srcString, bsl::size_t srcStringLength, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(bsl::vector< unsigned int > *dstVector, const bsl::string_view &srcString, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const unsigned int *srcString, bsl::size_t srcStringLength, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(std::vector< unsigned int > *dstVector, const bsl::string_view &srcString, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(bsl::vector< char > *dstVector, const unsigned int *srcString, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(unsigned int *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(unsigned int *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(std::string *dstString, const unsigned int *srcString, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(bsl::string *dstString, const unsigned int *srcString, bsl::size_t srcStringLength, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(bsl::string *dstString, const unsigned int *srcString, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(std::vector< char > *dstVector, const unsigned int *srcString, bsl::size_t srcStringLength, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf8ToUtf32
static int utf8ToUtf32(std::vector< unsigned int > *dstVector, const char *srcString, unsigned int errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(bsl::vector< char > *dstVector, const unsigned int *srcString, bsl::size_t srcStringLength, bsl::size_t *numCodePointsWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

bdlde::CharConvertUtf32::utf32ToUtf8
static int utf32ToUtf8(char *dstBuffer, bsl::size_t dstCapacity, const unsigned int *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, unsigned char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)