#include <bdlde_charconvertutf16.h>

Static Public Member Functions
static bsl::size_t	computeRequiredUtf16Words (const char srcBuffer, const char endPtr=0)

static int	utf8ToUtf16 (bsl::wstring dstString, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (std::wstring dstString, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (bsl::wstring dstString, const char srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (std::wstring dstString, const char srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (bsl::vector< unsigned short > dstVector, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (std::vector< unsigned short > dstVector, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (bsl::vector< unsigned short > dstVector, const char srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (std::vector< unsigned short > dstVector, const char srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (unsigned short dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (unsigned short dstBuffer, bsl::size_t dstCapacity, const char srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (wchar_t dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf8ToUtf16 (wchar_t dstBuffer, bsl::size_t dstCapacity, const char srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static bsl::size_t	computeRequiredUtf8Bytes (const unsigned short srcBuffer, const unsigned short endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static bsl::size_t	computeRequiredUtf8Bytes (const wchar_t srcBuffer, const wchar_t endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::string dstString, const unsigned short srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::string dstString, const unsigned short srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::string dstString, const unsigned short srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::string dstString, const unsigned short srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::string dstString, const bsl::wstring_view &srcString, bsl::size_t numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::string dstString, const bsl::wstring_view &srcString, bsl::size_t numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::string dstString, const wchar_t srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::string dstString, const wchar_t srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::vector< char > dstVector, const unsigned short srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::vector< char > dstVector, const unsigned short srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::vector< char > dstVector, const unsigned short srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::vector< char > dstVector, const unsigned short srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::vector< char > dstVector, const bsl::wstring_view &srcString, bsl::size_t numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::vector< char > dstVector, const bsl::wstring_view &srcString, bsl::size_t numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (bsl::vector< char > dstVector, const wchar_t srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (std::vector< char > dstVector, const wchar_t srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (char dstBuffer, bsl::size_t dstCapacity, const unsigned short srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (char dstBuffer, bsl::size_t dstCapacity, const unsigned short srcString, bsl::size_t srcLengthInWords, bsl::size_t numCodePointsWritten=0, bsl::size_t numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (char dstBuffer, bsl::size_t dstCapacity, const bsl::wstring_view &srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

static int	utf16ToUtf8 (char dstBuffer, bsl::size_t dstCapacity, const wchar_t srcString, bsl::size_t numCodePointsWritten=0, bsl::size_t numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST)

Detailed Description

This struct provides a namespace for a suite of static functions to convert buffers or containers between UTF-8 and UTF-16. Note that Byte Order Mark (BOM) sequences are neither generated nor recognized as special. If a BOM is present in the input, it will be translated, whether correct (0xfeff) or incorrect (0xfffe), into the output without any special handling.

Member Function Documentation

◆ computeRequiredUtf16Words()

static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf16Words	(	const char *	srcBuffer,
		const char *	endPtr = `0`
	)

static

Return the number of words required to store the translation of the specified UTF-8 string srcBuffer into a 0 terminated UTF-16 string (including the 0 terminating word into the returned count). Optionally specify endPtr, referring to one past the last input character. If endPtr is not supplied, or is 0, treat srcBuffer as 0 terminated. Note that this function will return the size utf8ToUtf16 will require, assuming the errorWord argument to utf8ToUtf16 is non-zero.

◆ computeRequiredUtf8Bytes() [1/2]

static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf8Bytes	(	const unsigned short *	srcBuffer,
		const unsigned short *	endPtr = `0`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Return the length needed in bytes, for a buffer to hold the null-terminated UTF-8 string translated from the specified UTF-16 string srcBuffer (including the terminating '\0' in the returned count). Optionally specify endPtr, referring to one past the last input character. If endPtr is not supplied, or is 0, treat srcBuffer as 0 terminated. Optionally specify byteOrder indicating the byte order of srcBuffer; if byteOrder is not supplied, the host byte order is used. Note that this function will return the size utf16ToUtf8 will require, assuming the errorByte argument to utf16ToUtf8 is non-zero.

◆ computeRequiredUtf8Bytes() [2/2]

static bsl::size_t bdlde::CharConvertUtf16::computeRequiredUtf8Bytes	(	const wchar_t *	srcBuffer,
		const wchar_t *	endPtr = `0`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [1/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::string *	dstString,
		const bsl::wstring_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [2/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::string *	dstString,
		const unsigned short *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [3/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::string *	dstString,
		const unsigned short *	srcString,
		bsl::size_t	srcLengthInWords,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [4/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::string *	dstString,
		const wchar_t *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [5/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::vector< char > *	dstVector,
		const bsl::wstring_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [6/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::vector< char > *	dstVector,
		const unsigned short *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [7/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::vector< char > *	dstVector,
		const unsigned short *	srcString,
		bsl::size_t	srcLengthInWords,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [8/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	bsl::vector< char > *	dstVector,
		const wchar_t *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [9/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	char *	dstBuffer,
		bsl::size_t	dstCapacity,
		const bsl::wstring_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numBytesWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [10/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	char *	dstBuffer,
		bsl::size_t	dstCapacity,
		const unsigned short *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numBytesWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [11/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	char *	dstBuffer,
		bsl::size_t	dstCapacity,
		const unsigned short *	srcString,
		bsl::size_t	srcLengthInWords,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numBytesWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [12/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	char *	dstBuffer,
		bsl::size_t	dstCapacity,
		const wchar_t *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numBytesWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load, into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-16 srcString to its UTF-8 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points (including the terminating 0, if any) written is to be loaded, where one code point can occupy multiple bytes. Optionally specify numBytesWritten, which (if not 0) indicates the location of the modifiable variable into which the number of bytes written (including the null terminator, if any) is to be loaded. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Return 0 on success and a bitwise-or of the flags defined by CharConvertStatus::Enum otherwise. CharConvertStatus::k_INVALID_INPUT_BIT will be set if one or more invalid sequences were encountered in the input, and CharConvertStatus::k_OUT_OF_SPACE_BIT will be set if the output space was exhausted before conversion was complete. The behavior is undefined unless dstBuffer refers to an array of at least dstCapacity elements, errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80), and srcString is null-terminated if supplied as a pointer. Note that if dstCapacity is 0, this function returns CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numBytesWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that since UTF-8 is a variable-length encoding, numBytesWritten may be up to four times numCodePointsWritten, and therefore that an input srcString of dstCapacity code points (including the terminating 0, if present) may not fit into dstBuffer. A one-word (two-byte) UTF-16 code point will require one to three UTF-8 octets (bytes); a two-word (four-byte) UTF-16 code point will always require four UTF-8 octets. Also note that the amount of room needed will vary with the contents of the data and the language being translated, but never will the number of bytes output exceed three times the number of words input. Also note that, if dstCapacity > 0, then, after completion, strlen(dstBuffer) + 1 == *numBytesWritten. Also note that if srcString is a bslstl::StringRef, it may contain embedded 0 words that will be translated to null bytes embedded in the output.

◆ utf16ToUtf8() [13/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::string *	dstString,
		const bsl::wstring_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [14/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::string *	dstString,
		const unsigned short *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load into the specified dstString the result of converting the specified UTF-16 srcString to its UTF-8 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Any previous contents of the destination are discarded. Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80) and srcString is null-terminated if supplied as a const wchar_t *. Note that if srcString is a bslstl::StringRefWide, it may contain embedded 0 words that will be translated to null bytes embedded in the output.

◆ utf16ToUtf8() [15/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::string *	dstString,
		const unsigned short *	srcString,
		bsl::size_t	srcLengthInWords,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [16/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::string *	dstString,
		const wchar_t *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [17/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::vector< char > *	dstVector,
		const bsl::wstring_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [18/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::vector< char > *	dstVector,
		const unsigned short *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load into the specified dstVector the null-terminated result of converting the specified UTF-16 *srcString to its UTF-8 equivalent. Optionally specify srcLengthInWords, the number of unsigned shorts of input. If srcLengthInWords is not specified, the input must be terminated by a null word. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 input; if byteOrder is not specified, the input is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT if one or more invalid sequences were encountered in the input. The behavior is undefined unless either srcLengthInWords is passed or srcString is null-terminated, and errorByte is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80).

◆ utf16ToUtf8() [19/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::vector< char > *	dstVector,
		const unsigned short *	srcString,
		bsl::size_t	srcLengthInWords,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf16ToUtf8() [20/20]

static int bdlde::CharConvertUtf16::utf16ToUtf8	(	std::vector< char > *	dstVector,
		const wchar_t *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		char	errorByte = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [1/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	bsl::vector< unsigned short > *	dstVector,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [2/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	bsl::vector< unsigned short > *	dstVector,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [3/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	bsl::wstring *	dstString,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load into the specified dstString the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which, if not 0, indicates the location of the modifiable variable into which the number of Unicode code points written, including the terminating null character, is to be loaded. Optionally specify an errorChar to be substituted, if not 0, for invalid encodings in the input string. Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT otherwise. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal

◆ utf8ToUtf16() [4/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	bsl::wstring *	dstString,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [5/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	std::vector< unsigned short > *	dstVector,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load into the specified dstVector the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify an errorWord to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorWord is 0, invalid input is ignored (i.e., produces no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT otherwise. The behavior is undefined unless errorWord is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0xffff ]) and srcString is null-terminated when specified as a const char *. Note that one code point can occupy multiple 16-bit words. Also note that the size of the result vector is always fitted to the null-terminated result, including the terminating 0. Also note that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

◆ utf8ToUtf16() [6/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	std::vector< unsigned short > *	dstVector,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [7/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	std::wstring *	dstString,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorChar is 0, invalid input code points are ignored (i.e., produce no corresponding output). The behavior is undefined unless srcString is null-terminated when specified as a const char *. Note that one code point can occupy multiple UTF-16 words, and that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

◆ utf8ToUtf16() [8/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	std::wstring *	dstString,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [9/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	unsigned short *	dstBuffer,
		bsl::size_t	dstCapacity,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numWordsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [10/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	unsigned short *	dstBuffer,
		bsl::size_t	dstCapacity,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numWordsWritten = `0`,
		unsigned short	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [11/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	wchar_t *	dstBuffer,
		bsl::size_t	dstCapacity,
		const bsl::string_view &	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numWordsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

◆ utf8ToUtf16() [12/12]

static int bdlde::CharConvertUtf16::utf8ToUtf16	(	wchar_t *	dstBuffer,
		bsl::size_t	dstCapacity,
		const char *	srcString,
		bsl::size_t *	numCodePointsWritten = `0`,
		bsl::size_t *	numWordsWritten = `0`,
		wchar_t	errorWord = `'?'`,
		ByteOrder::Enum	byteOrder = `ByteOrder::e_HOST`
	)

static

Load into the specified dstBuffer of the specified dstCapacity, the result of converting the specified UTF-8 srcString to its UTF-16 equivalent. Optionally specify numCodePointsWritten, which (if not 0) indicates the location of the variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify numWordsWritten, which (if not 0) indicates the location of the modifiable variable into which the number of short memory words written (including the null terminator) is to be loaded. Optionally specify an errorWord to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings of code points, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0x10ffff ]). If errorWord is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder to indicate the byte order of the UTF-16 output; if byteOrder is not specified, the output is assumed to be in host byte order. Return 0 on success and a bit-wise or of the bits specified by CharConvertStatus::Enum otherwise to indicate that there were invalid input sequences or if dstCapacity was inadequate to store the output. If dstCapacity > 0 yet dstCapacity specifies a buffer too small to hold the output, the maximal null-terminated prefix of the properly converted result string is loaded into dstBuffer. The behavior is undefined unless dstBuffer refers to an array of at least dstCapacity elements, errorWord is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ] or [ 0xe000 .. 0xffff ]), and srcString is null-terminated when supplied as a const char *. Note that if dstCapacity is 0, *dstBuffer is not modified and this function returns a value with CharConvertStatus::k_OUT_OF_SPACE_BIT set and 0 is written into *numCodePointsWritten and *numWordsWritten (if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that one code point can occupy multiple 16-bit words, so that *numWordsWritten may be greater than *numCodePointsWritten, and therefore that an input srcString of dstCapacity code points may not fit into dstBuffer, however, an input srcString of dstCapacity bytes (including null terminator, if present) will always fit (since the UTF-8 encoding of a code point requires at least as many bytes as the UTF-16 encoding requires words). Also note that if srcString is a bslstl::StringRef, it may contain embedded null bytes that will be translated to null words embedded in the output.

The documentation for this struct was generated from the following file:

bdlde_charconvertutf16.h

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ computeRequiredUtf16Words()

◆ computeRequiredUtf8Bytes() [1/2]

◆ computeRequiredUtf8Bytes() [2/2]

◆ utf16ToUtf8() [1/20]

◆ utf16ToUtf8() [2/20]

◆ utf16ToUtf8() [3/20]

◆ utf16ToUtf8() [4/20]

◆ utf16ToUtf8() [5/20]

◆ utf16ToUtf8() [6/20]

◆ utf16ToUtf8() [7/20]

◆ utf16ToUtf8() [8/20]

◆ utf16ToUtf8() [9/20]

◆ utf16ToUtf8() [10/20]

◆ utf16ToUtf8() [11/20]

◆ utf16ToUtf8() [12/20]

◆ utf16ToUtf8() [13/20]

◆ utf16ToUtf8() [14/20]

◆ utf16ToUtf8() [15/20]

◆ utf16ToUtf8() [16/20]

◆ utf16ToUtf8() [17/20]

◆ utf16ToUtf8() [18/20]

◆ utf16ToUtf8() [19/20]

◆ utf16ToUtf8() [20/20]

◆ utf8ToUtf16() [1/12]

◆ utf8ToUtf16() [2/12]

◆ utf8ToUtf16() [3/12]

◆ utf8ToUtf16() [4/12]

◆ utf8ToUtf16() [5/12]

◆ utf8ToUtf16() [6/12]

◆ utf8ToUtf16() [7/12]

◆ utf8ToUtf16() [8/12]

◆ utf8ToUtf16() [9/12]

◆ utf8ToUtf16() [10/12]

◆ utf8ToUtf16() [11/12]

◆ utf8ToUtf16() [12/12]