BDE 4.14.0 Production release
|
#include <bdlde_charconvertutf16.h>
Static Public Member Functions | |
static bsl::size_t | computeRequiredUtf16Words (const char *srcBuffer, const char *endPtr=0) |
static int | utf8ToUtf16 (bsl::wstring *dstString, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (std::wstring *dstString, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (bsl::wstring *dstString, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (std::wstring *dstString, const char *srcString, bsl::size_t *numCodePointsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (bsl::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (std::vector< unsigned short > *dstVector, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (bsl::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (std::vector< unsigned short > *dstVector, const char *srcString, bsl::size_t *numCodePointsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (unsigned short *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (unsigned short *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, unsigned short errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (wchar_t *dstBuffer, bsl::size_t dstCapacity, const bsl::string_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf8ToUtf16 (wchar_t *dstBuffer, bsl::size_t dstCapacity, const char *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numWordsWritten=0, wchar_t errorWord='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static bsl::size_t | computeRequiredUtf8Bytes (const unsigned short *srcBuffer, const unsigned short *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static bsl::size_t | computeRequiredUtf8Bytes (const wchar_t *srcBuffer, const wchar_t *endPtr=0, ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::string *dstString, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::string *dstString, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::string *dstString, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::string *dstString, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::vector< char > *dstVector, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::vector< char > *dstVector, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (bsl::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (std::vector< char > *dstVector, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const unsigned short *srcString, bsl::size_t srcLengthInWords, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const bsl::wstring_view &srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
static int | utf16ToUtf8 (char *dstBuffer, bsl::size_t dstCapacity, const wchar_t *srcString, bsl::size_t *numCodePointsWritten=0, bsl::size_t *numBytesWritten=0, char errorByte='?', ByteOrder::Enum byteOrder=ByteOrder::e_HOST) |
This struct
provides a namespace for a suite of static functions to convert buffers or containers between UTF-8 and UTF-16. Note that Byte Order Mark (BOM) sequences are neither generated nor recognized as special. If a BOM is present in the input, it will be translated, whether correct (0xfeff
) or incorrect (0xfffe
), into the output without any special handling.
|
static |
Return the number of words required to store the translation of the specified UTF-8 string srcBuffer
into a 0 terminated UTF-16 string (including the 0 terminating word into the returned count). Optionally specify endPtr
, referring to one past the last input character. If endPtr
is not supplied, or is 0, treat srcBuffer
as 0 terminated. Note that this function will return the size utf8ToUtf16
will require, assuming the errorWord
argument to utf8ToUtf16
is non-zero.
|
static |
Return the length needed in bytes, for a buffer to hold the null-terminated UTF-8 string translated from the specified UTF-16 string srcBuffer
(including the terminating '\0' in the returned count). Optionally specify endPtr
, referring to one past the last input character. If endPtr
is not supplied, or is 0, treat srcBuffer
as 0 terminated. Optionally specify byteOrder
indicating the byte order of srcBuffer
; if byteOrder
is not supplied, the host byte order is used. Note that this function will return the size utf16ToUtf8
will require, assuming the errorByte
argument to utf16ToUtf8
is non-zero.
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Load, into the specified dstBuffer
of the specified dstCapacity
, the result of converting the specified UTF-16 srcString
to its UTF-8 equivalent. Optionally specify numCodePointsWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points (including the terminating 0, if any) written is to be loaded, where one code point can occupy multiple bytes. Optionally specify numBytesWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of bytes written (including the null terminator, if any) is to be loaded. Optionally specify an errorByte
to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte
is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder
to indicate the byte order of the UTF-16 input; if byteOrder
is not specified, the input is assumed to be in host byte order. Return 0 on success and a bitwise-or of the flags defined by CharConvertStatus::Enum
otherwise. CharConvertStatus::k_INVALID_INPUT_BIT
will be set if one or more invalid sequences were encountered in the input, and CharConvertStatus::k_OUT_OF_SPACE_BIT
will be set if the output space was exhausted before conversion was complete. The behavior is undefined unless dstBuffer
refers to an array of at least dstCapacity
elements, errorByte
is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80
), and srcString
is null-terminated if supplied as a pointer. Note that if dstCapacity
is 0, this function returns CharConvertStatus::k_OUT_OF_SPACE_BIT
set and 0 is written into *numCodePointsWritten
and *numBytesWritten
(if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that since UTF-8 is a variable-length encoding, numBytesWritten
may be up to four times numCodePointsWritten
, and therefore that an input srcString
of dstCapacity
code points (including the terminating 0, if present) may not fit into dstBuffer
. A one-word (two-byte) UTF-16 code point will require one to three UTF-8 octets (bytes); a two-word (four-byte) UTF-16 code point will always require four UTF-8 octets. Also note that the amount of room needed will vary with the contents of the data and the language being translated, but never will the number of bytes output exceed three times the number of words input. Also note that, if dstCapacity > 0
, then, after completion, strlen(dstBuffer) + 1 == *numBytesWritten
. Also note that if srcString
is a bslstl::StringRef
, it may contain embedded 0 words that will be translated to null bytes embedded in the output.
|
static |
|
static |
Load into the specified dstString
the result of converting the specified UTF-16 srcString
to its UTF-8 equivalent. Optionally specify numCodePointsWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte
to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte
is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Any previous contents of the destination are discarded. Optionally specify byteOrder
to indicate the byte order of the UTF-16 input; if byteOrder
is not specified, the input is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT
if one or more invalid sequences were encountered in the input. The behavior is undefined unless errorByte
is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80
) and srcString
is null-terminated if supplied as a const wchar_t *
. Note that if srcString
is a bslstl::StringRefWide
, it may contain embedded 0 words that will be translated to null bytes embedded in the output.
|
static |
|
static |
|
static |
|
static |
Load into the specified dstVector
the null-terminated result of converting the specified UTF-16 *srcString
to its UTF-8 equivalent. Optionally specify srcLengthInWords
, the number of unsigned short
s of input. If srcLengthInWords
is not specified, the input must be terminated by a null word. Optionally specify numCodePointsWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of Unicode code points written, including the null terminator, is to be loaded, where one code point may occupy multiple bytes. Optionally specify an errorByte
to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are incomplete multi-word encodings or parts of a two-word encoding out of their proper sequence. If errorByte
is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder
to indicate the byte order of the UTF-16 input; if byteOrder
is not specified, the input is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT
if one or more invalid sequences were encountered in the input. The behavior is undefined unless either srcLengthInWords
is passed or srcString
is null-terminated, and errorByte
is either 0 or a valid single-byte Unicode code point (0 < errorByte < 0x80
).
|
static |
|
static |
|
static |
|
static |
|
static |
Load into the specified dstString
the result of converting the specified UTF-8 srcString
to its UTF-16 equivalent. Optionally specify numCodePointsWritten
, which, if not 0, indicates the location of the modifiable variable into which the number of Unicode code points written, including the terminating null character, is to be loaded. Optionally specify an errorChar
to be substituted, if not 0, for invalid encodings in the input string. Optionally specify byteOrder
to indicate the byte order of the UTF-16 output; if byteOrder
is not specified, the output is assumed to be in host byte order. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT
otherwise. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal
|
static |
|
static |
Load into the specified dstVector
the result of converting the specified UTF-8 srcString
to its UTF-16 equivalent. Optionally specify numCodePointsWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify an errorWord
to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ]
or [ 0xe000 .. 0x10ffff ]
). If errorWord
is 0, invalid input is ignored (i.e., produces no corresponding output). Optionally specify byteOrder
to indicate the byte order of the UTF-16 output; if byteOrder
is not specified, the output is assumed to be in host byte order. Any previous contents of the destination are discarded. Return 0 on success and CharConvertStatus::k_INVALID_INPUT_BIT
otherwise. The behavior is undefined unless errorWord
is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ]
or [ 0xe000 .. 0xffff ]
) and srcString
is null-terminated when specified as a const char *
. Note that one code point can occupy multiple 16-bit words. Also note that the size of the result vector is always fitted to the null-terminated result, including the terminating 0. Also note that if srcString
is a bslstl::StringRef
, it may contain embedded null bytes that will be translated to null words embedded in the output.
|
static |
|
static |
that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ]
or [ 0xe000 .. 0x10ffff ]
). If errorChar
is 0, invalid input code points are ignored (i.e., produce no corresponding output). The behavior is undefined unless srcString
is null-terminated when specified as a const char *
. Note that one code point can occupy multiple UTF-16 words, and that if srcString
is a bslstl::StringRef
, it may contain embedded null bytes that will be translated to null words embedded in the output.
|
static |
|
static |
|
static |
|
static |
|
static |
Load into the specified dstBuffer
of the specified dstCapacity
, the result of converting the specified UTF-8 srcString
to its UTF-16 equivalent. Optionally specify numCodePointsWritten
, which (if not 0) indicates the location of the variable into which the number of UTF-16 code points (including the null terminator) written is to be loaded. Optionally specify numWordsWritten
, which (if not 0) indicates the location of the modifiable variable into which the number of short
memory words written (including the null terminator) is to be loaded. Optionally specify an errorWord
to be substituted (if not 0) for invalid encodings in the input string. Invalid encodings are multi-byte encoding parts out of sequence, non-minimal UTF-8 encodings of code points, or code points outside the ranges that UTF-16 can validly encode (in the range [ 1 .. 0xd7ff ]
or [ 0xe000 .. 0x10ffff ]
). If errorWord
is 0, invalid input sequences are ignored (i.e., produce no corresponding output). Optionally specify byteOrder
to indicate the byte order of the UTF-16 output; if byteOrder
is not specified, the output is assumed to be in host byte order. Return 0 on success and a bit-wise or of the bits specified by CharConvertStatus::Enum
otherwise to indicate that there were invalid input sequences or if dstCapacity
was inadequate to store the output. If dstCapacity > 0
yet dstCapacity
specifies a buffer too small to hold the output, the maximal null-terminated prefix of the properly converted result string is loaded into dstBuffer
. The behavior is undefined unless dstBuffer
refers to an array of at least dstCapacity
elements, errorWord
is either 0 or a valid single-word encoded UTF-16 code point (in the range [ 1 .. 0xd7ff ]
or [ 0xe000 .. 0xffff ]
), and srcString
is null-terminated when supplied as a const char *
. Note that if dstCapacity
is 0, *dstBuffer
is not modified and this function returns a value with CharConvertStatus::k_OUT_OF_SPACE_BIT
set and 0 is written into *numCodePointsWritten
and *numWordsWritten
(if those pointers are non-null), since there is insufficient space for even a null terminator alone. Also note that one code point can occupy multiple 16-bit words, so that *numWordsWritten
may be greater than *numCodePointsWritten
, and therefore that an input srcString
of dstCapacity
code points may not fit into dstBuffer
, however, an input srcString
of dstCapacity
bytes (including null terminator, if present) will always fit (since the UTF-8 encoding of a code point requires at least as many bytes as the UTF-16 encoding requires words). Also note that if srcString
is a bslstl::StringRef
, it may contain embedded null bytes that will be translated to null words embedded in the output.