BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlb_stringviewutil.h
Go to the documentation of this file.
1/// @file bdlb_stringviewutil.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlb_stringviewutil.h -*-C++-*-
8#ifndef INCLUDED_BDLB_STRINGVIEWUTIL
9#define INCLUDED_BDLB_STRINGVIEWUTIL
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlb_stringviewutil bdlb_stringviewutil
15/// @brief Provide utility functions on `bsl::string_view` containers.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlb
19/// @{
20/// @addtogroup bdlb_stringviewutil
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlb_stringviewutil-purpose"> Purpose</a>
25/// * <a href="#bdlb_stringviewutil-classes"> Classes </a>
26/// * <a href="#bdlb_stringviewutil-description"> Description </a>
27/// * <a href="#bdlb_stringviewutil-synopsis-of-bsl-string_view"> Synopsis of bsl::string_view </a>
28/// * <a href="#bdlb_stringviewutil-function-synopsis"> Function Synopsis </a>
29/// * <a href="#bdlb_stringviewutil-character-encoding"> Character Encoding </a>
30/// * <a href="#bdlb_stringviewutil-caseless-comparisons"> Caseless Comparisons </a>
31/// * <a href="#bdlb_stringviewutil-whitespace-character-specification"> Whitespace Character Specification </a>
32/// * <a href="#bdlb_stringviewutil-usage"> Usage </a>
33/// * <a href="#bdlb_stringviewutil-example-1-trimming-whitespace"> Example 1: Trimming Whitespace </a>
34///
35/// # Purpose {#bdlb_stringviewutil-purpose}
36/// Provide utility functions on `bsl::string_view` containers.
37///
38/// # Classes {#bdlb_stringviewutil-classes}
39///
40/// - bdlb::StringViewUtil: namespace for functions on @ref string_view containers
41///
42/// @see bslstl_stringview
43///
44/// # Description {#bdlb_stringviewutil-description}
45/// This component defines a utility `struct`,
46/// `bdlb::StringViewUtil`, that provides a suite of functions that operate on
47/// `bsl::string_view` containers.
48///
49/// ## Synopsis of bsl::string_view {#bdlb_stringviewutil-synopsis-of-bsl-string_view}
50///
51///
52/// The `bsl::string_view` class provides `bsl::string`-like access to an array
53/// of bytes that need not be null terminated and that can have non-ASCII values
54/// (i.e., `[128 .. 255]`). Although a `bsl::string_view` object can itself be
55/// changed, it cannot change its referent data (the array of bytes). The
56/// lifetime of the referent data must exceed that of all `bsl::string_view`
57/// objects referring to it. Equality comparison of `bsl::string_view` objects
58/// compares the content of the referent data (not whether or not the object
59/// refer to the same array of bytes). See @ref bslstl_stringview for full
60/// details.
61///
62/// ## Function Synopsis {#bdlb_stringviewutil-function-synopsis}
63///
64///
65/// The table below provides an outline of the functions provided by this
66/// component.
67/// @code
68/// Function Purpose
69/// -------------------------- ------------------------------------------------
70/// areEqualCaseless(SV, SV) case-insensitive equality comparison
71/// lowerCaseCmp (SV, SV) lexical comparison of lower-case conversion
72/// upperCaseCmp (SV, SV) lexical comparison of upper-case conversion
73///
74/// ltrim(SV) exclude whitespace from left side of string
75/// rtrim(SV) exclude whitespace from right side of string
76/// trim(SV) exclude whitespace from both sides of string
77///
78/// substr(SV, pos, num) substring, `num` characters from `pos`
79///
80/// strstr (SV, SUBSV) find first substring in string
81/// strstrCaseless (SV, SUBSV) find first substring in string, case insensitive
82/// strrstr (SV, SUBSV) find last substring in string
83/// strrstrCaseless(SV, SUBSV) find last substring in string, case insensitive
84///
85/// findFirstOf (SV, ch, p) find first occurrence of any character from `ch`
86/// findLastOf (SV, ch, p) find last occurrence of any character from `ch`
87/// findFirstNotOf(SV, ch, p) find first occurrence of any char not from `ch`
88/// findLastNotOf (SV, ch, p) find last occurrence of any char not from `ch`
89///
90/// startsWith(SV, ch) find out if string starts with `ch`
91/// endsWith(SV, ch) find out if string ends with `ch`
92/// @endcode
93/// Since `bsl::string_view` objects know the length of the referent data these
94/// utility functions can make certain performance improvements over the
95/// classic, similarly named C language functions.
96///
97/// ## Character Encoding {#bdlb_stringviewutil-character-encoding}
98///
99///
100/// These utilities assume ASCII encoding for character data when doing case
101/// conversions and when determining if a character is in the whitespace
102/// character set.
103///
104/// ### Caseless Comparisons {#bdlb_stringviewutil-caseless-comparisons}
105///
106///
107/// Caseless (i.e., case-insensitive) comparisons treat characters in the
108/// sequence `[a .. z]` as equivalent to the respective characters in the
109/// sequence `[A .. Z]`. This equivalence matches that of `bsl::toupper`.
110///
111/// ### Whitespace Character Specification {#bdlb_stringviewutil-whitespace-character-specification}
112///
113///
114/// The following characters are classified as "whitespace":
115/// @code
116/// Character Description
117/// --------- ---------------
118/// ' ' blank-space
119/// '\f' form-feed
120/// '\n' newline
121/// '\r' carriage return
122/// '\t' horizontal tab
123/// '\v' vertical tab
124/// @endcode
125/// This classification matches that of `bsl::isspace`.
126///
127/// ## Usage {#bdlb_stringviewutil-usage}
128///
129///
130/// This section illustrates the intended use of this component.
131///
132/// ### Example 1: Trimming Whitespace {#bdlb_stringviewutil-example-1-trimming-whitespace}
133///
134///
135/// Many applications must normalize user input by removing leading and trailing
136/// whitespace characters to obtain the essential text that is the intended
137/// input. Naturally, one would prefer to do this as efficiently as possible.
138///
139/// Suppose the response entered by a user is captured in `rawInput` below:
140/// @code
141/// const char * const rawInput = " \t\r\n Hello, world! \r\n";
142/// //1234 5 6 789 1234 5 6
143/// // 123456789ABCD
144/// // Note lengths of whitespace and
145/// // non-whitespace substrings for later.
146/// @endcode
147/// First, for this pedagogical example, we copy the contents at `rawInput` for
148/// later reference:
149/// @code
150/// const bsl::string copyRawInput(rawInput);
151/// @endcode
152/// Then, we create a `bsl::string_view` object referring to the raw data.
153/// Given a single argument of `const char *`, the constructor assumes the data
154/// is a null-terminated string and implicitly calculates the length for the
155/// reference:
156/// @code
157/// bsl::string_view text(rawInput);
158///
159/// assert(rawInput == text.data());
160/// assert(9 + 13 + 6 == text.length());
161/// @endcode
162/// Now, we invoke the `bdlb::StringViewUtil::trim` method to find the "Hello,
163/// world!" sequence in `rawInput`.
164/// @code
165/// bsl::string_view textOfInterest = bdlb::StringViewUtil::trim(text);
166/// @endcode
167/// Finally, we observe the results:
168/// @code
169/// assert(bsl::string_view("Hello, world!") == textOfInterest);
170/// assert(13 == textOfInterest.length());
171///
172/// assert(text.data() + 9 == textOfInterest.data());
173/// assert(text.length() - 9 - 6 == textOfInterest.length());
174///
175/// assert(rawInput == copyRawInput);
176/// @endcode
177/// Notice that, as expected, the `textOfInterest` object refers to the "Hello,
178/// world!" sub-sequence within the `rawInput` byte array while the data at
179/// `rawInput` remains *unchanged*.
180/// @}
181/** @} */
182/** @} */
183
184/** @addtogroup bdl
185 * @{
186 */
187/** @addtogroup bdlb
188 * @{
189 */
190/** @addtogroup bdlb_stringviewutil
191 * @{
192 */
193
194#include <bdlscm_version.h>
195
196#include <bsls_review.h>
197
198#include <bsl_algorithm.h> // bsl::min
199#include <bsl_string_view.h>
200
201
202namespace bdlb {
203 // =====================
204 // struct StringViewUtil
205 // =====================
206
207/// This `struct` provides a namespace for a suite of functions on
208/// `bsl::string_view` containers.
210
211 // PUBLIC TYPES
212
213 /// Size type of string_view containers.
215
216 // PUBLIC CLASS DATA
217
218 /// Value used to denote "not-a-position", guaranteed to be outside the
219 /// `range[0 .. bsl::string_view::max_size()]`.
221
222 // CLASS METHODS
223 // Comparison
224
225 /// Compare (the referent data of) the specified `lhs` and `rhs`.
226 /// Return `true` if `lhs` and `rhs` are equal up to a case conversion,
227 /// and `false` otherwise. See {Caseless Comparisons}.
228 static bool areEqualCaseless(const bsl::string_view& lhs,
229 const bsl::string_view& rhs);
230
231 /// Compare (the referent data of) the specified `lhs` and `rhs`.
232 /// Return 1 if, after a conversion to lower case, `lhs` is greater than
233 /// `rhs`, 0 if `lhs` and `rhs` are equal up to a case conversion, and
234 /// -1 otherwise. See {Caseless Comparisons}.
235 static int lowerCaseCmp(const bsl::string_view& lhs,
236 const bsl::string_view& rhs);
237
238 /// Compare (the referent data of) the specified `lhs` and `rhs`.
239 /// Return 1 if, after a conversion to upper case, `lhs` is greater than
240 /// `rhs`, 0 if `lhs` and `rhs` are equal up to a case conversion, and
241 /// -1 otherwise. See {Caseless Comparisons}.
242 static int upperCaseCmp(const bsl::string_view& lhs,
243 const bsl::string_view& rhs);
244
245 // Trim
246
247 /// Return a `bsl::string_view` object referring to the substring of
248 /// (the referent data of) the specified `string` that excludes all
249 /// leading whitespace. See {Whitespace Character Specification}. If
250 /// `string` consists entirely of whitespace, return a zero-length
251 /// reference to the end of `string` (i.e.,
252 /// `bsl::string_view(string.end(), 0)`).
254
255 /// Return a `bsl::string_view` object referring to the substring of
256 /// (the referent data of) the specified `string` that excludes all
257 /// trailing whitespace. See {Whitespace Character Specification}. If
258 /// `string` consists entirely of whitespace, return a zero-length
259 /// reference to the beginning of (the referent data of) `string`
260 /// (i.e., `bsl::string_view(string.data(), 0)`).
262
263 /// Return a `bsl::string_view` object referring to the substring of
264 /// (the referent data of) the specified `string` that excludes all
265 /// leading and trailing whitespace. See {Whitespace Character
266 /// Specification}. If `string` consists entirely of whitespace, return
267 /// a zero-length reference to the beginning of (the referent data of)
268 /// `string` (i.e., `bsl::string_view(string.data(), 0)`).
269 static bsl::string_view trim(const bsl::string_view& string);
270
271 // Create `subString`
272
273 /// Return a string whose value is the substring starting at the
274 /// optionally specified `position` in the specified `string`, of length
275 /// the optionally specified `numChars` or `length() - position`,
276 /// whichever is smaller. If `position` is not specified, 0 is used
277 /// (i.e., the substring is from the beginning of this string). If
278 /// `numChars` is not specified, `k_NPOS` is used (i.e., the entire
279 /// suffix from `position` to the end of the string is returned). The
280 /// behavior is undefined unless `position` is within the string
281 /// boundaries (`0 <= position <= string.length()`).
282 static bsl::string_view substr(const bsl::string_view& string,
283 size_type position = 0,
284 size_type numChars = k_NPOS);
285
286 // Find `subString`
287
288 /// Return a `bsl::string_view` object referring to the first occurrence
289 /// in (the referent data of) the specified `string` at which (the
290 /// referent data of) the specified `subString` is found, or
291 /// `bsl::string_view()` if there is no such occurrence. If `subString`
292 /// has zero length then a zero-length reference to the beginning of
293 /// `string` is returned (i.e., `bsl::string_view(string.data(), 0)`);
295 const bsl::string_view& subString);
296
297 /// Return a `bsl::string_view` object referring to the first occurrence
298 /// in (the referent data of) the specified `string` at which (the
299 /// referent data of) the specified `subString` is found using
300 /// case-insensitive comparisons, or `bsl::string_view()` if there is no
301 /// such occurrence. See {Caseless Comparisons}. If `subString` has
302 /// zero length then a zero-length reference to the beginning of
303 /// `string` is returned (i.e., `bsl::string_view(string.data(), 0)`);
305 const bsl::string_view& subString);
306
307 /// Return a `bsl::string_view` object referring to the last occurrence
308 /// in (the referent data of) the specified `string` at which (the
309 /// referent data of) the specified `subString` is found, or
310 /// `bsl::string_view()` if there is no such occurrence. If `subString`
311 /// has zero length then a zero-length reference to the end of `string`
312 /// is returned (i.e., `bsl::string_view(string.end(), 0)`);
314 const bsl::string_view& subString);
315
316 /// Return a `bsl::string_view` object referring to the last occurrence
317 /// in (the referent data of) the specified `string` at which (the
318 /// referent data of) the specified `subString` is found using
319 /// case-insensitive comparisons, or `bsl::string_view()` if there is no
320 /// such occurrence. See {Caseless Comparisons}. If `subString` has
321 /// zero length then a zero-length reference to the end of `string` is
322 /// returned (i.e., `bsl::string_view(string.end(), 0)`);
324 const bsl::string_view& subString);
325
326 // Find first/last of/not of
327
328 /// Return the position of the *first* occurrence of a character
329 /// belonging to the specified `characters`, if such an occurrence can
330 /// can be found in the specified `string` (on or *after* the optionally
331 /// specified `position` if such a `position` is specified), and return
332 /// `k_NPOS` otherwise.
334 const bsl::string_view& characters,
335 size_type position = 0);
336
337 /// Return the position of the *last* occurrence of a character
338 /// belonging to the specified `characters`, if such an occurrence can
339 /// can be found in the specified `string` (on or *before* the
340 /// optionally specified `position` if such a `position` is specified),
341 /// and return `k_NPOS` otherwise.
343 const bsl::string_view& characters,
344 size_type position = k_NPOS);
345
346 /// Return the position of the *first* occurrence of a character *not*
347 /// belonging to the specified `characters`, if such an occurrence can
348 /// be found in the specified `string` (on or *after* the optionally
349 /// specified `position` if such a `position` is specified), and return
350 /// `k_NPOS` otherwise.
352 const bsl::string_view& characters,
353 size_type position = 0);
354
355 /// Return the position of the *last* occurrence of a character *not*
356 /// belonging to the specified `characters`, if such an occurrence can
357 /// be found in the specified `string` (on or *before* the optionally
358 /// specified `position` if such a `position` is specified), and return
359 /// `k_NPOS` otherwise.
361 const bsl::string_view& characters,
362 size_type position = k_NPOS);
363
364 // Starts/ends with
365
366 static bool startsWith(const bsl::string_view& string,
367 const bsl::string_view& characters);
368 /// Return `true` if the specified `string` begins with the specified
369 /// `characters`, and `false` otherwise.
370 static bool startsWith(const bsl::string_view& string,
371 const char *characters);
372
373 /// Return `true` if the specified `string` begins with the specified
374 /// `character`, and `false` otherwise.
375 static bool startsWith(const bsl::string_view& string, char character);
376
377 static bool endsWith(const bsl::string_view& string,
378 const bsl::string_view& characters);
379 /// Return `true` if the specified `string` ends with the specified
380 /// `characters`, and `false` otherwise.
381 static bool endsWith(const bsl::string_view& string,
382 const char *characters);
383
384 /// Return `true` if the specified `string` ends with the specified
385 /// `character`, and `false` otherwise.
386 static bool endsWith(const bsl::string_view& string, char character);
387};
388
389// ============================================================================
390// INLINE DEFINITIONS
391// ============================================================================
392
393 // ---------------------
394 // struct StringViewUtil
395 // ---------------------
396
397// CLASS METHODS
398
399 // Comparison
400
401inline
403 const bsl::string_view& rhs)
404{
405 if (lhs.length() != rhs.length()) {
406 return false; // RETURN
407 }
408
409 return 0 == lowerCaseCmp(lhs, rhs);
410}
411
412 // Trim
413
414inline
416{
417 return ltrim(rtrim(string));
418}
419
420 // Substring
421
422inline
424 size_type position,
425 size_type numChars)
426{
427 BSLS_ASSERT(position <= string.length());
428
429 return bsl::string_view(string.data() + position,
430 bsl::min(numChars, string.length() - position));
431}
432
433 // Starts/ends with
434
435inline
437 const bsl::string_view& characters)
438{
439 if (characters.length() > string.length()) {
440 return false; // RETURN
441 }
442 return characters == bsl::string_view(string.data(), characters.length());
443}
444
445inline
447 const char *characters)
448{
449 BSLS_ASSERT_SAFE(characters);
450
451 const char *nextChar = characters;
452 bsl::string_view::const_iterator stringIt = string.begin();
453 while (stringIt != string.end()) {
454 if (0 == *nextChar) {
455 return true; // RETURN
456 }
457 if (*nextChar != *stringIt) {
458 return false; // RETURN
459 }
460 ++stringIt;
461 ++nextChar;
462 }
463 return 0 == *nextChar;
464}
465
466inline
467bool StringViewUtil::startsWith(const bsl::string_view& string, char character)
468{
469 return (!string.empty() && character == string.front());
470}
471
472inline
474 const bsl::string_view& characters)
475{
476 if (string.length() < characters.length()) {
477 return false; // RETURN
478 }
479
480 bsl::string_view::size_type pos = string.length() - characters.length();
481 return 0 == bsl::string_view::traits_type::compare(string.data() + pos,
482 characters.data(),
483 characters.length());
484}
485
486inline
488 const char *characters)
489{
490 BSLS_ASSERT_SAFE(characters);
491
492 return endsWith(string, bsl::string_view(characters));
493}
494
495inline
496bool StringViewUtil::endsWith(const bsl::string_view& string, char character)
497{
498 return (!string.empty() && character == string.back());
499}
500
501} // close package namespace
502
503
504#endif
505
506// ----------------------------------------------------------------------------
507// Copyright 2020 Bloomberg Finance L.P.
508//
509// Licensed under the Apache License, Version 2.0 (the "License"); you may not
510// use this file except in compliance with the License. You may obtain a copy
511// of the License at
512//
513// http://www.apache.org/licenses/LICENSE-2.0
514//
515// Unless required by applicable law or agreed to in writing, software
516// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
517// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
518// License for the specific language governing permissions and limitations
519// under the License.
520// ----------------------------- END-OF-FILE ----------------------------------
521
522/** @} */
523/** @} */
524/** @} */
Definition bslstl_stringview.h:441
std::size_t size_type
Definition bslstl_stringview.h:457
BSLS_KEYWORD_CONSTEXPR size_type length() const BSLS_KEYWORD_NOEXCEPT
Return the length of this view.
Definition bslstl_stringview.h:1685
BSLS_KEYWORD_CONSTEXPR const_pointer data() const BSLS_KEYWORD_NOEXCEPT
Definition bslstl_stringview.h:1760
BSLS_KEYWORD_CONSTEXPR_CPP17 int compare(basic_string_view other) const BSLS_KEYWORD_NOEXCEPT
Definition bslstl_stringview.h:1818
static const size_type npos
Definition bslstl_stringview.h:465
const value_type * const_iterator
Definition bslstl_stringview.h:451
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_ASSERT_SAFE(X)
Definition bsls_assert.h:1762
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlb_algorithmworkaroundutil.h:74
basic_string_view< char > string_view
Definition bslstl_stringview.h:1053
Definition bdlb_stringviewutil.h:209
static size_type findFirstNotOf(const bsl::string_view &string, const bsl::string_view &characters, size_type position=0)
static const size_type k_NPOS
Definition bdlb_stringviewutil.h:220
static bsl::string_view rtrim(const bsl::string_view &string)
static int lowerCaseCmp(const bsl::string_view &lhs, const bsl::string_view &rhs)
static bsl::string_view ltrim(const bsl::string_view &string)
bsl::string_view::size_type size_type
Size type of string_view containers.
Definition bdlb_stringviewutil.h:214
static size_type findLastNotOf(const bsl::string_view &string, const bsl::string_view &characters, size_type position=k_NPOS)
static bsl::string_view strrstr(const bsl::string_view &string, const bsl::string_view &subString)
static bsl::string_view strrstrCaseless(const bsl::string_view &string, const bsl::string_view &subString)
static bsl::string_view substr(const bsl::string_view &string, size_type position=0, size_type numChars=k_NPOS)
Definition bdlb_stringviewutil.h:423
static size_type findLastOf(const bsl::string_view &string, const bsl::string_view &characters, size_type position=k_NPOS)
static int upperCaseCmp(const bsl::string_view &lhs, const bsl::string_view &rhs)
static bool areEqualCaseless(const bsl::string_view &lhs, const bsl::string_view &rhs)
Definition bdlb_stringviewutil.h:402
static bool startsWith(const bsl::string_view &string, const bsl::string_view &characters)
Definition bdlb_stringviewutil.h:436
static bsl::string_view trim(const bsl::string_view &string)
Definition bdlb_stringviewutil.h:415
static bool endsWith(const bsl::string_view &string, const bsl::string_view &characters)
Definition bdlb_stringviewutil.h:473
static size_type findFirstOf(const bsl::string_view &string, const bsl::string_view &characters, size_type position=0)
static bsl::string_view strstrCaseless(const bsl::string_view &string, const bsl::string_view &subString)
static bsl::string_view strstr(const bsl::string_view &string, const bsl::string_view &subString)