Quick Links:

bal | bbl | bdl | bsl

Static Public Member Functions

bdlde::CharConvertUtf16 Struct Reference

#include <bdlde_charconvertutf16.h>

List of all members.

Static Public Member Functions

static bsl::size_t computeRequiredUtf16Words (const char *srcBuffer, const char *endPtr=0)
static int utf8ToUtf16 (bsl::wstring *dstWstring, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (std::wstring *dstWstring, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (bsl::wstring *dstWstring, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (std::wstring *dstWstring, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (bsl::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (std::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (bsl::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (std::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (unsigned short *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (unsigned short *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (wchar_t *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf8ToUtf16 (wchar_t *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static bsl::size_t computeRequiredUtf8Bytes (const wchar_t *srcBuffer, const wchar_t *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static bsl::size_t computeRequiredUtf8Bytes (const unsigned short *srcBuffer, const unsigned short *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (bsl::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (std::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)
static int utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte= '?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

Detailed Description

This struct provides a namespace for a suite of static functions to convert buffers or containers between UTF-8 and UTF-16. Note that Byte Order Mark (BOM) sequences are neither generated nor recognized as special. If a BOM is present in the input, it will be translated, whether correct (0xfeff) or incorrect (0xfffe), into the output without any special handling.

See Component bdlde_charconvertutf16


Member Function Documentation

static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf16Words ( const char *  srcBuffer,
const char *  endPtr = 0 
) [static]

Return the number of words required to store the translation of the specified UTF-8 string srcBuffer into a 0 terminated UTF-16 string (including the 0 terminating word into the returned count). Optionally specify endPtr, referring to one past the last input character. If endPtr is not supplied, or is 0, treat srcBuffer as 0 terminated. Note that this function will return the size utf8ToUtf16 will require, assuming the errorWord argument to utf8ToUtf16 is non-zero.

static int bdlde::CharConvertUtf16::utf8ToUtf16 ( bsl::wstring dstWstring,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( std::wstring *  dstWstring,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( bsl::wstring dstWstring,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( std::wstring *  dstWstring,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstWstring the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which, if not 0, indicates the location of the modifiable variable into which the number of Unicode code points written, including the terminating null word, is to be loaded. Optionally specify an errorWord to be substituted, if not 0, for invalid encodings in the input string. Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT otherwise. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings of code points, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorWord is 0, invalid input code points are ignored (i.e., produce no corresponding output). The behavior is undefined unless srcString is null-terminated when specified as a const char *. Note that one code point can occupy multiple 16-bit words, and that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

static int bdlde::CharConvertUtf16::utf8ToUtf16 ( bsl::vector< unsigned short > *  dstVector,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( std::vector< unsigned short > *  dstVector,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( bsl::vector< unsigned short > *  dstVector,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( std::vector< unsigned short > *  dstVector,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstVector the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify an errorWord to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorWord is 0, invalid input is ignored (i.e., produces no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT otherwise. The behavior is undefined unless errorWord is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0xffff ]) and srcString is null-terminated when specified as a const char *. Note that one code point can occupy multiple 16-bit words. Also note that the size of the result vector is always fitted to the null-terminated result, including the terminating 0. Also note that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

static int bdlde::CharConvertUtf16::utf8ToUtf16 ( unsigned short *  dstBuffer,
bsl::size_t  dstCapacity,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numWordsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( unsigned short *  dstBuffer,
bsl::size_t  dstCapacity,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numWordsWritten = 0,
unsigned short  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify numWordsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of short memory words written (including the null terminator) is to be loaded. Optionally specify an errorWord to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings of code points, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorWord is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Return 0 on success and a bit-wise or of the bits specified by CharConvertStatus::Enum otherwise to indicate that there were invalid input sequences or if dstCapacity was inadequate to store the output. If dstCapacity > 0 yet dstCapacity specifies a buffer too small to hold the output, the maximal null-terminated prefix of the properly converted result string is loaded into dstBuffer. The behavior is undefined unless dstBuffer refers to an array of at least dstCapacity elements, errorWord is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0xffff ]), and srcString is null-terminated when supplied as a const char *. Note that if dstCapacity is 0, *dstBuffer is not modified and this function returns a value with CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numWordsWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that one code point can occupy multiple 16-bit words, so that *numWordsWritten may be greater than *numCodePointsWritten, and therefore that an input srcString of dstCapacity code points may not fit into dstBuffer, however, an input srcString of dstCapacity bytes (including null terminator, if present) will always fit (since the UTF-8 encoding of a code point requires at least as many bytes as the UTF-16 encoding requires words). Also note that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

static int bdlde::CharConvertUtf16::utf8ToUtf16 ( wchar_t *  dstBuffer,
bsl::size_t  dstCapacity,
const bsl::string_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numWordsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf8ToUtf16 ( wchar_t *  dstBuffer,
bsl::size_t  dstCapacity,
const char *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numWordsWritten = 0,
wchar_t  errorWord = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the variable into which the number of UTF-16 code points (including the terminating 0) written is to be loaded. Optionally specify numWordsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of short memory words written (including the null terminator) is to be loaded. Optionally specify an errorWord to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings of code points, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xde00 .. 0x10ffff ]). Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. If errorWord is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Return 0 on success and a bit-wise or of the bits specified by CharConvertStatus::Enum otherwise to indicate that there were invalid sequences or if dstCapacity was inadequate to store the output. If dstCapacity > 0 yet dstCapacity specifies a buffer too small to hold the output, the maximal null-terminated prefix of the properly converted result string is loaded into dstBuffer. The behavior is undefined unless dstBuffer, if specified, refers to an array of at least dstCapacity elements, errorWord is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0xffff ]), and srcString is null-terminated if supplied as a const char *. Note that if dstCapacity is 0, *dstBuffer is not modified and this function returns a value with CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numWordsWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that one code point can occupy multiple 16-bit words, so that *numWordsWritten may be greater than *numCodePointsWritten, and therefore that an input srcString of dstCapacity code points may not fit into dstBuffer. However, an input srcString of dstCapacity bytes (including terminating 0, if present) will always fit (since the UTF-8 encoding of a code point requires at least as many bytes as the UTF-16 encoding requires words). Also note that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf8Bytes ( const wchar_t *  srcBuffer,
const wchar_t *  endPtr = 0,
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf8Bytes ( const unsigned short *  srcBuffer,
const unsigned short *  endPtr = 0,
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Return the length needed in bytes, for a buffer to hold the null-terminated UTF-8 string translated from the specified UTF-16 string srcBuffer (including the terminating \0 in the returned count). Optionally specify endPtr, referring to one past the last input character. If endPtr is not supplied, or is 0, treat srcBuffer as 0 terminated. Optionally specify byteOrder indicating the byte order of srcBuffer; if byteOrder is not supplied, the host byte order is used. Note that this function will return the size utf16ToUtf8 will require, assuming the errorByte argument to utf16ToUtf8 is non-zero.

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::string dstString,
const unsigned short *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::string *  dstString,
const unsigned short *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::string dstString,
const unsigned short *  srcString,
bsl::size_t  srcLengthInWords,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::string *  dstString,
const unsigned short *  srcString,
bsl::size_t  srcLengthInWords,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstString the result of converting the specified UTF-16 *srcString to its UTF-8 equivalent. Optionally specify srcLengthInWords, the number of unsigned shorts of input. If srcLengthInWords is not specified, the input must be terminated by a null word. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless either srcLengthInWords is passed or srcString is null-terminated, and errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80). Note that the string length will be sized to the length of the output, such that strlen(dstString->c_str()) == dstString->length().

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::string dstString,
const bsl::wstring_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::string *  dstString,
const bsl::wstring_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::string dstString,
const wchar_t *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::string *  dstString,
const wchar_t *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstString the result of converting the specified UTF-16 srcString to its UTF-8 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Any previous contents of the destination are discarded. Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80) and srcString is null-terminated if supplied as a const wchar_t *. Note that if srcString is a bslstl::StringRefWide, it may contain embedded 0 words that will be translated to null bytes embedded in the output.

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::vector< char > *  dstVector,
const unsigned short *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::vector< char > *  dstVector,
const unsigned short *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::vector< char > *  dstVector,
const unsigned short *  srcString,
bsl::size_t  srcLengthInWords,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::vector< char > *  dstVector,
const unsigned short *  srcString,
bsl::size_t  srcLengthInWords,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstVector the null-terminated result of converting the specified UTF-16 *srcString to its UTF-8 equivalent. Optionally specify srcLengthInWords, the number of unsigned shorts of input. If srcLengthInWords is not specified, the input must be terminated by a null word. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless either srcLengthInWords is passed or srcString is null-terminated, and errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80).

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::vector< char > *  dstVector,
const bsl::wstring_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::vector< char > *  dstVector,
const bsl::wstring_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( bsl::vector< char > *  dstVector,
const wchar_t *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( std::vector< char > *  dstVector,
const wchar_t *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load into the specified dstVector the null-terminated result of converting the specified UTF-16 srcString to its UTF-8 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80) and srcString is null-terminated if supplied as a const wchar_t *. Note that if srcString is a bslstl::StringRef, it may contain embedded 0 words that will be translated to null bytes embedded in the output.

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( char *  dstBuffer,
bsl::size_t  dstCapacity,
const unsigned short *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numBytesWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( char *  dstBuffer,
bsl::size_t  dstCapacity,
const unsigned short *  srcString,
bsl::size_t  srcLengthInWords,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numBytesWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load, into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-16 *srcString to its null-terminated UTF-8 equivalent. Optionally specify srcLengthInWords, the number of unsigned shorts of input. If srcLengthInWords is not specified, the input must be terminated by a null word. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points (including the terminating 0, if any) written is to be loaded, where one code point can occupy multiple bytes. Optionally specify numBytesWritten, which (if not 0) indicates the location of the modifiable variable into which the number of bytes written (including the null terminator, if any) is to be loaded. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Return 0 on success and a bitwise-or of the masks defined by CharConvertStatus::Enum otherwise, where CharConvertStatus::k_INVALID_INPUT_BIT will be set if one or more invalid sequences were encountered in the input, and CharConvertStatus::k_OUT_OF_SPACE_BIT will be set if the output space was exhausted before conversion was complete. The behavior is undefined unless dstBuffer refers to an array of at least dstCapacity elements, either srcLengthInWords is passed or srcString is null-terminated, and errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80). Note that if dstCapacity is 0, this function returns CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numBytesWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that since UTF-8 is a variable-length encoding, numBytesWritten may be up to four times numCodePointsWritten, and therefore that an input srcString of dstCapacity words (including the terminating 0) may not fit into dstBuffer. A one-word (two-byte) UTF-16 code point will require one to three UTF-8 octets (bytes); a two-word (four-byte) UTF-16 code point will always require four UTF-8 octets. Also note that the amount of room needed will vary with the contents of the data and the language being translated, but never will the number of bytes output exceed three times the number of short words input. Also note that, if dstCapacity > 0, then, after completion, strlen(dstBuffer) + 1 == *numBytesWritten.

static int bdlde::CharConvertUtf16::utf16ToUtf8 ( char *  dstBuffer,
bsl::size_t  dstCapacity,
const bsl::wstring_view &  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numBytesWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]
static int bdlde::CharConvertUtf16::utf16ToUtf8 ( char *  dstBuffer,
bsl::size_t  dstCapacity,
const wchar_t *  srcString,
bsl::size_t *  numCodePointsWritten = 0,
bsl::size_t *  numBytesWritten = 0,
char  errorByte = '?',
ByteOrder::Enum  byteOrder = ByteOrder::e_HOST 
) [static]

Load, into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-16 srcString to its UTF-8 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points (including the terminating 0, if any) written is to be loaded, where one code point can occupy multiple bytes. Optionally specify numBytesWritten, which (if not 0) indicates the location of the modifiable variable into which the number of bytes written (including the null terminator, if any) is to be loaded. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Return 0 on success and a bitwise-or of the flags defined by CharConvertStatus::Enum otherwise. CharConvertStatus::k_INVALID_INPUT_BIT will be set if one or more invalid sequences were encountered in the input, and CharConvertStatus::k_OUT_OF_SPACE_BIT will be set if the output space was exhausted before conversion was complete. The behavior is undefined unless dstBuffer refers to an array of at least dstCapacity elements, errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80), and srcString is null-terminated if supplied as a const wchar_t *. Note that if dstCapacity is 0, this function returns CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numBytesWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that since UTF-8 is a variable-length encoding, numBytesWritten may be up to four times numCodePointsWritten, and therefore that an input srcString of dstCapacity code points (including the terminating 0, if present) may not fit into dstBuffer. A one-word (two-byte) UTF-16 code point will require one to three UTF-8 octets (bytes); a two-word (four-byte) UTF-16 code point will always require four UTF-8 octets. Also note that the amount of room needed will vary with the contents of the data and the language being translated, but never will the number of bytes output exceed three times the number of words input. Also note that, if dstCapacity > 0, then, after completion, strlen(dstBuffer) + 1 == *numBytesWritten. Also note that if srcString is a bslstl::StringRef, it may contain embedded 0 words that will be translated to null bytes embedded in the output.


The documentation for this struct was generated from the following file: