// bdlb_stringviewutil.h -*-C++-*- #ifndef INCLUDED_BDLB_STRINGVIEWUTIL #define INCLUDED_BDLB_STRINGVIEWUTIL #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide utility functions on 'bsl::string_view' containers. // //@CLASSES: // bdlb::StringViewUtil: namespace for functions on 'string_view' containers // //@SEE_ALSO: bslstl_stringview // //@DESCRIPTION: This component defines a utility 'struct', // 'bdlb::StringViewUtil', that provides a suite of functions that operate on // 'bsl::string_view' containers. // ///Synopsis of 'bsl::string_view' ///------------------------------ // The 'bsl::string_view' class provides 'bsl::string'-like access to an array // of bytes that need not be null terminated and that can have non-ASCII values // (i.e., '[128 .. 255]'). Although a 'bsl::string_view' object can itself be // changed, it cannot change its referent data (the array of bytes). The // lifetime of the referent data must exceed that of all 'bsl::string_view' // objects referring to it. Equality comparison of 'bsl::string_view' objects // compares the content of the referent data (not whether or not the object // refer to the same array of bytes). See {'bslstl_stringview'} for full // details. // ///Function Synopsis ///----------------- // The table below provides an outline of the functions provided by this // component. //.. // Function Purpose // -------------------------- ------------------------------------------------ // areEqualCaseless(SV, SV) case-insensitive equality comparison // lowerCaseCmp (SV, SV) lexical comparison of lower-case conversion // upperCaseCmp (SV, SV) lexical comparison of upper-case conversion // // ltrim(SV) exclude whitespace from left side of string // rtrim(SV) exclude whitespace from right side of string // trim(SV) exclude whitespace from both sides of string // // substr(SV, pos, num) substring, 'num' characters from 'pos' // // strstr (SV, SUBSV) find first substring in string // strstrCaseless (SV, SUBSV) find first substring in string, case insensitive // strrstr (SV, SUBSV) find last substring in string // strrstrCaseless(SV, SUBSV) find last substring in string, case insensitive // // findFirstOf (SV, ch, p) find first occurrence of any character from 'ch' // findLastOf (SV, ch, p) find last occurrence of any character from 'ch' // findFirstNotOf(SV, ch, p) find first occurrence of any char not from 'ch' // findLastNotOf (SV, ch, p) find last occurrence of any char not from 'ch' // // startsWith(SV, ch) find out if string starts with 'ch' // endsWith(SV, ch) find out if string ends with 'ch' //.. // Since 'bsl::string_view' objects know the length of the referent data these // utility functions can make certain performance improvements over the // classic, similarly named C language functions. // ///Character Encoding ///------------------ // These utilities assume ASCII encoding for character data when doing case // conversions and when determining if a character is in the whitespace // character set. // ///Caseless Comparisons /// - - - - - - - - - - // Caseless (i.e., case-insensitive) comparisons treat characters in the // sequence '[a .. z]' as equivalent to the respective characters in the // sequence '[A .. Z]'. This equivalence matches that of 'bsl::toupper'. // ///Whitespace Character Specification /// - - - - - - - - - - - - - - - - - // The following characters are classified as "whitespace": //.. // Character Description // --------- --------------- // ' ' blank-space // '\f' form-feed // '\n' newline // '\r' carriage return // '\t' horizontal tab // '\v' vertical tab //.. // This classification matches that of 'bsl::isspace'. // ///Usage ///----- // This section illustrates the intended use of this component. // ///Example 1: Trimming Whitespace //- - - - - - - - - - - - - - - - // Many applications must normalize user input by removing leading and trailing // whitespace characters to obtain the essential text that is the intended // input. Naturally, one would prefer to do this as efficiently as possible. // // Suppose the response entered by a user is captured in 'rawInput' below: //.. // const char * const rawInput = " \t\r\n Hello, world! \r\n"; // //1234 5 6 789 1234 5 6 // // 123456789ABCD // // Note lengths of whitespace and // // non-whitespace substrings for later. //.. // First, for this pedagogical example, we copy the contents at 'rawInput' for // later reference: //.. // const bsl::string copyRawInput(rawInput); //.. // Then, we create a 'bsl::string_view' object referring to the raw data. // Given a single argument of 'const char *', the constructor assumes the data // is a null-terminated string and implicitly calculates the length for the // reference: //.. // bsl::string_view text(rawInput); // // assert(rawInput == text.data()); // assert(9 + 13 + 6 == text.length()); //.. // Now, we invoke the 'bdlb::StringViewUtil::trim' method to find the "Hello, // world!" sequence in 'rawInput'. //.. // bsl::string_view textOfInterest = bdlb::StringViewUtil::trim(text); //.. // Finally, we observe the results: //.. // assert(bsl::string_view("Hello, world!") == textOfInterest); // assert(13 == textOfInterest.length()); // // assert(text.data() + 9 == textOfInterest.data()); // assert(text.length() - 9 - 6 == textOfInterest.length()); // // assert(rawInput == copyRawInput); //.. // Notice that, as expected, the 'textOfInterest' object refers to the "Hello, // world!" sub-sequence within the 'rawInput' byte array while the data at // 'rawInput' remains *unchanged*. #include <bdlscm_version.h> #include <bsls_review.h> #include <bsl_algorithm.h> // bsl::min #include <bsl_string_view.h> namespace BloombergLP { namespace bdlb { // ===================== // struct StringViewUtil // ===================== struct StringViewUtil { // This 'struct' provides a namespace for a suite of functions on // 'bsl::string_view' containers. // PUBLIC TYPES typedef bsl::string_view::size_type size_type; // Size type of string_view containers. // PUBLIC CLASS DATA static const size_type k_NPOS = bsl::string_view::npos; // Value used to denote "not-a-position", guaranteed to be outside the // 'range[0 .. bsl::string_view::max_size()]'. // CLASS METHODS // Comparison static bool areEqualCaseless(const bsl::string_view& lhs, const bsl::string_view& rhs); // Compare (the referent data of) the specified 'lhs' and 'rhs'. // Return 'true' if 'lhs' and 'rhs' are equal up to a case conversion, // and 'false' otherwise. See {Caseless Comparisons}. static int lowerCaseCmp(const bsl::string_view& lhs, const bsl::string_view& rhs); // Compare (the referent data of) the specified 'lhs' and 'rhs'. // Return 1 if, after a conversion to lower case, 'lhs' is greater than // 'rhs', 0 if 'lhs' and 'rhs' are equal up to a case conversion, and // -1 otherwise. See {Caseless Comparisons}. static int upperCaseCmp(const bsl::string_view& lhs, const bsl::string_view& rhs); // Compare (the referent data of) the specified 'lhs' and 'rhs'. // Return 1 if, after a conversion to upper case, 'lhs' is greater than // 'rhs', 0 if 'lhs' and 'rhs' are equal up to a case conversion, and // -1 otherwise. See {Caseless Comparisons}. // Trim static bsl::string_view ltrim(const bsl::string_view& string); // Return a 'bsl::string_view' object referring to the substring of // (the referent data of) the specified 'string' that excludes all // leading whitespace. See {Whitespace Character Specification}. If // 'string' consists entirely of whitespace, return a zero-length // reference to the end of 'string' (i.e., // 'bsl::string_view(string.end(), 0)'). static bsl::string_view rtrim(const bsl::string_view& string); // Return a 'bsl::string_view' object referring to the substring of // (the referent data of) the specified 'string' that excludes all // trailing whitespace. See {Whitespace Character Specification}. If // 'string' consists entirely of whitespace, return a zero-length // reference to the beginning of (the referent data of) 'string' // (i.e., 'bsl::string_view(string.data(), 0)'). static bsl::string_view trim(const bsl::string_view& string); // Return a 'bsl::string_view' object referring to the substring of // (the referent data of) the specified 'string' that excludes all // leading and trailing whitespace. See {Whitespace Character // Specification}. If 'string' consists entirely of whitespace, return // a zero-length reference to the beginning of (the referent data of) // 'string' (i.e., 'bsl::string_view(string.data(), 0)'). // Create 'subString' static bsl::string_view substr(const bsl::string_view& string, size_type position = 0, size_type numChars = k_NPOS); // Return a string whose value is the substring starting at the // optionally specified 'position' in the specified 'string', of length // the optionally specified 'numChars' or 'length() - position', // whichever is smaller. If 'position' is not specified, 0 is used // (i.e., the substring is from the beginning of this string). If // 'numChars' is not specified, 'k_NPOS' is used (i.e., the entire // suffix from 'position' to the end of the string is returned). The // behavior is undefined unless 'position' is within the string // boundaries ('0 <= position <= string.length()'). // Find 'subString' static bsl::string_view strstr(const bsl::string_view& string, const bsl::string_view& subString); // Return a 'bsl::string_view' object referring to the first occurrence // in (the referent data of) the specified 'string' at which (the // referent data of) the specified 'subString' is found, or // 'bsl::string_view()' if there is no such occurrence. If 'subString' // has zero length then a zero-length reference to the beginning of // 'string' is returned (i.e., 'bsl::string_view(string.data(), 0)'); static bsl::string_view strstrCaseless(const bsl::string_view& string, const bsl::string_view& subString); // Return a 'bsl::string_view' object referring to the first occurrence // in (the referent data of) the specified 'string' at which (the // referent data of) the specified 'subString' is found using // case-insensitive comparisons, or 'bsl::string_view()' if there is no // such occurrence. See {Caseless Comparisons}. If 'subString' has // zero length then a zero-length reference to the beginning of // 'string' is returned (i.e., 'bsl::string_view(string.data(), 0)'); static bsl::string_view strrstr(const bsl::string_view& string, const bsl::string_view& subString); // Return a 'bsl::string_view' object referring to the last occurrence // in (the referent data of) the specified 'string' at which (the // referent data of) the specified 'subString' is found, or // 'bsl::string_view()' if there is no such occurrence. If 'subString' // has zero length then a zero-length reference to the end of 'string' // is returned (i.e., 'bsl::string_view(string.end(), 0)'); static bsl::string_view strrstrCaseless(const bsl::string_view& string, const bsl::string_view& subString); // Return a 'bsl::string_view' object referring to the last occurrence // in (the referent data of) the specified 'string' at which (the // referent data of) the specified 'subString' is found using // case-insensitive comparisons, or 'bsl::string_view()' if there is no // such occurrence. See {Caseless Comparisons}. If 'subString' has // zero length then a zero-length reference to the end of 'string' is // returned (i.e., 'bsl::string_view(string.end(), 0)'); // Find first/last of/not of static size_type findFirstOf(const bsl::string_view& string, const bsl::string_view& characters, size_type position = 0); // Return the position of the *first* occurrence of a character // belonging to the specified 'characters', if such an occurrence can // can be found in the specified 'string' (on or *after* the optionally // specified 'position' if such a 'position' is specified), and return // 'k_NPOS' otherwise. static size_type findLastOf(const bsl::string_view& string, const bsl::string_view& characters, size_type position = k_NPOS); // Return the position of the *last* occurrence of a character // belonging to the specified 'characters', if such an occurrence can // can be found in the specified 'string' (on or *before* the // optionally specified 'position' if such a 'position' is specified), // and return 'k_NPOS' otherwise. static size_type findFirstNotOf(const bsl::string_view& string, const bsl::string_view& characters, size_type position = 0); // Return the position of the *first* occurrence of a character *not* // belonging to the specified 'characters', if such an occurrence can // be found in the specified 'string' (on or *after* the optionally // specified 'position' if such a 'position' is specified), and return // 'k_NPOS' otherwise. static size_type findLastNotOf(const bsl::string_view& string, const bsl::string_view& characters, size_type position = k_NPOS); // Return the position of the *last* occurrence of a character *not* // belonging to the specified 'characters', if such an occurrence can // be found in the specified 'string' (on or *before* the optionally // specified 'position' if such a 'position' is specified), and return // 'k_NPOS' otherwise. // Starts/ends with static bool startsWith(const bsl::string_view& string, const bsl::string_view& characters); static bool startsWith(const bsl::string_view& string, const char *characters); // Return 'true' if the specified 'string' begins with the specified // 'characters', and 'false' otherwise. static bool startsWith(const bsl::string_view& string, char character); // Return 'true' if the specified 'string' begins with the specified // 'character', and 'false' otherwise. static bool endsWith(const bsl::string_view& string, const bsl::string_view& characters); static bool endsWith(const bsl::string_view& string, const char *characters); // Return 'true' if the specified 'string' ends with the specified // 'characters', and 'false' otherwise. static bool endsWith(const bsl::string_view& string, char character); // Return 'true' if the specified 'string' ends with the specified // 'character', and 'false' otherwise. }; // ============================================================================ // INLINE DEFINITIONS // ============================================================================ // --------------------- // struct StringViewUtil // --------------------- // CLASS METHODS // Comparison inline bool StringViewUtil::areEqualCaseless(const bsl::string_view& lhs, const bsl::string_view& rhs) { if (lhs.length() != rhs.length()) { return false; // RETURN } return 0 == lowerCaseCmp(lhs, rhs); } // Trim inline bsl::string_view StringViewUtil::trim(const bsl::string_view& string) { return ltrim(rtrim(string)); } // Substring inline bsl::string_view StringViewUtil::substr(const bsl::string_view& string, size_type position, size_type numChars) { BSLS_ASSERT(position <= string.length()); return bsl::string_view(string.data() + position, bsl::min(numChars, string.length() - position)); } // Starts/ends with inline bool StringViewUtil::startsWith(const bsl::string_view& string, const bsl::string_view& characters) { if (characters.length() > string.length()) { return false; // RETURN } return characters == bsl::string_view(string.data(), characters.length()); } inline bool StringViewUtil::startsWith(const bsl::string_view& string, const char *characters) { BSLS_ASSERT_SAFE(characters); const char *nextChar = characters; bsl::string_view::const_iterator stringIt = string.begin(); while (stringIt != string.end()) { if (0 == *nextChar) { return true; // RETURN } if (*nextChar != *stringIt) { return false; // RETURN } ++stringIt; ++nextChar; } return 0 == *nextChar; } inline bool StringViewUtil::startsWith(const bsl::string_view& string, char character) { return (!string.empty() && character == string.front()); } inline bool StringViewUtil::endsWith(const bsl::string_view& string, const bsl::string_view& characters) { if (string.length() < characters.length()) { return false; // RETURN } bsl::string_view::size_type pos = string.length() - characters.length(); return 0 == bsl::string_view::traits_type::compare(string.data() + pos, characters.data(), characters.length()); } inline bool StringViewUtil::endsWith(const bsl::string_view& string, const char *characters) { BSLS_ASSERT_SAFE(characters); return endsWith(string, bsl::string_view(characters)); } inline bool StringViewUtil::endsWith(const bsl::string_view& string, char character) { return (!string.empty() && character == string.back()); } } // close package namespace } // close enterprise namespace #endif // ---------------------------------------------------------------------------- // Copyright 2020 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy // of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. // ----------------------------- END-OF-FILE ----------------------------------