BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bdlb_stringrefutil.h
Go to the documentation of this file.
1/// @file bdlb_stringrefutil.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bdlb_stringrefutil.h -*-C++-*-
8#ifndef INCLUDED_BDLB_STRINGREFUTIL
9#define INCLUDED_BDLB_STRINGREFUTIL
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bdlb_stringrefutil bdlb_stringrefutil
15/// @brief Provide utility functions on `bslstl::StringRef`-erenced strings.
16/// @addtogroup bdl
17/// @{
18/// @addtogroup bdlb
19/// @{
20/// @addtogroup bdlb_stringrefutil
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bdlb_stringrefutil-purpose"> Purpose</a>
25/// * <a href="#bdlb_stringrefutil-classes"> Classes </a>
26/// * <a href="#bdlb_stringrefutil-description"> Description </a>
27/// * <a href="#bdlb_stringrefutil-synopsis-of-bslstl-stringref"> Synopsis of bslstl::StringRef </a>
28/// * <a href="#bdlb_stringrefutil-function-synopsis"> Function Synopsis </a>
29/// * <a href="#bdlb_stringrefutil-character-encoding"> Character Encoding </a>
30/// * <a href="#bdlb_stringrefutil-caseless-comparisons"> Caseless Comparisons </a>
31/// * <a href="#bdlb_stringrefutil-whitespace-character-specification"> Whitespace Character Specification </a>
32/// * <a href="#bdlb_stringrefutil-usage"> Usage </a>
33/// * <a href="#bdlb_stringrefutil-example-1-trimming-whitespace"> Example 1: Trimming Whitespace </a>
34///
35/// # Purpose {#bdlb_stringrefutil-purpose}
36/// Provide utility functions on `bslstl::StringRef`-erenced strings.
37///
38/// @deprecated Use bdlb_stringviewutil instead.
39///
40/// # Classes {#bdlb_stringrefutil-classes}
41///
42/// - bdlb::StringRefUtil: namespace for functions on `bslstl::StringRef` strings
43///
44/// @see bdlb_string, bslstl_stringref
45///
46/// # Description {#bdlb_stringrefutil-description}
47/// This component defines a utility `struct`,
48/// `bdlb::StringRefUtil`, that provides a suite of functions that operate on
49/// `bslstl::StringRef` references to string data.
50///
51/// ## Synopsis of bslstl::StringRef {#bdlb_stringrefutil-synopsis-of-bslstl-stringref}
52///
53///
54/// The `bslstl::StringRef` class provides `bsl::string`-like access to an array
55/// of bytes that need not be null terminated and that can have non-ASCII values
56/// (i.e., `[128 .. 255]`). Although a `bslstl::StringRef` object can itself be
57/// changed, it cannot change its referent data (the array of bytes). The
58/// lifetime of the referent data must exceed that of all `bslstl::StringRef`
59/// objects referring to it. Equality comparison of `bslstl::StringRef` objects
60/// compares the content of the referent data (not whether or not the object
61/// refer to the same array of bytes). See @ref bslstl_stringref for full
62/// details.
63///
64/// ## Function Synopsis {#bdlb_stringrefutil-function-synopsis}
65///
66///
67/// The table below provides an outline of the functions provided by this
68/// component.
69/// @code
70/// Function Purpose
71/// -------------------------- --------------------------------------------
72/// areEqualCaseless(SR, SR) case-insensitive equality comparison
73/// lowerCaseCmp(SR, SR) lexical comparison of lower-case conversion
74/// upperCaseCmp(SR, SR) lexical comparison of upper-case conversion
75///
76/// ltrim(SR) exclude whitespace from left side of string
77/// rtrim(SR) exclude whitespace from right side of string
78/// trim(SR) exclude whitespace from both sides of string
79///
80/// substr(SR, pos, num) substring, `num` characters from `pos`
81///
82/// strstr (SR, SUBSR) find first substring in string
83/// strstrCaseless (SR, SUBSR) find first substring in string, case insensitive
84/// strrstr (SR, SUBSR) find last substring in string
85/// strrstrCaseless(SR, SUBSR) find last substring in string, case insensitive
86/// @endcode
87///
88/// Since `bslstl::StringRef` objects know the length of the referent data these
89/// utility functions can make certain performance improvements over the
90/// classic, similarly named C language functions.
91///
92/// ## Character Encoding {#bdlb_stringrefutil-character-encoding}
93///
94///
95/// These utilities assume ASCII encoding for character data when doing case
96/// conversions and when determining if a character is in the whitespace
97/// character set.
98///
99/// ### Caseless Comparisons {#bdlb_stringrefutil-caseless-comparisons}
100///
101///
102/// Caseless (i.e., case-insensitive) comparisons treat characters in the
103/// sequence `[a .. z]` as equivalent to the respective characters in the
104/// sequence `[A .. Z]`. This equivalence matches that of `bsl::toupper`.
105///
106/// ### Whitespace Character Specification {#bdlb_stringrefutil-whitespace-character-specification}
107///
108///
109/// The following characters are classified as "whitespace":
110/// @code
111/// Character Description
112/// --------- ---------------
113/// ' ' blank-space
114/// '\f' form-feed
115/// '\n' newline
116/// '\r' carriage return
117/// '\t' horizontal tab
118/// '\v' vertical tab
119/// @endcode
120/// This classification matches that of `bsl::isspace`.
121///
122/// ## Usage {#bdlb_stringrefutil-usage}
123///
124///
125/// This section illustrates the intended use of this component.
126///
127/// ### Example 1: Trimming Whitespace {#bdlb_stringrefutil-example-1-trimming-whitespace}
128///
129///
130/// Many applications must normalize user input by removing leading and trailing
131/// whitespace characters to obtain the essential text that is the intended
132/// input. Naturally, one would prefer to do this as efficiently as possible.
133///
134/// Suppose the response entered by a user is captured in `rawInput` below:
135/// @code
136/// const char * const rawInput = " \t\r\n Hello, world! \r\n";
137/// //1234 5 6 789 1234 5 6
138/// // 123456789ABCD
139/// // Note lengths of whitespace and
140/// // non-whitespace substrings for later.
141/// @endcode
142/// First, for this pedagogical example, we copy the contents at `rawInput` for
143/// later reference:
144/// @code
145/// const bsl::string copyRawInput(rawInput);
146/// @endcode
147/// Then, we create a `bslstl::StringRef` object referring to the raw data.
148/// Given a single argument of `const char *`, the constructor assumes the data
149/// is a null-terminated string and implicitly calculates the length for the
150/// reference:
151/// @code
152/// bslstl::StringRef text(rawInput);
153///
154/// assert(rawInput == text.data());
155/// assert(9 + 13 + 6 == text.length());
156/// @endcode
157/// Now, we invoke the `bdlb::StringRefUtil::trim` method to find the "Hello,
158/// world!" sequence in `rawInput`.
159/// @code
160/// bslstl::StringRef textOfInterest = bdlb::StringRefUtil::trim(text);
161/// @endcode
162/// Finally, we observe the results:
163/// @code
164/// assert("Hello, world!" == textOfInterest); // content comparison
165/// assert(13 == textOfInterest.length());
166///
167/// assert(text.data() + 9 == textOfInterest.data());
168/// assert(text.length() - 9 - 6 == textOfInterest.length());
169///
170/// assert(rawInput == copyRawInput); // content comparison
171/// @endcode
172/// Notice that, as expected, the `textOfInterest` object refers to the "Hello,
173/// world!" sub-sequence within the `rawInput` byte array while the data at
174/// `rawInput` remains *unchanged*.
175/// @}
176/** @} */
177/** @} */
178
179/** @addtogroup bdl
180 * @{
181 */
182/** @addtogroup bdlb
183 * @{
184 */
185/** @addtogroup bdlb_stringrefutil
186 * @{
187 */
188
189#include <bdlscm_version.h>
190
191#include <bsls_review.h>
192
193#include <bsl_string.h> // 'bslstl::StringRef'
194
195
196namespace bdlb {
197 // ====================
198 // struct StringRefUtil
199 // ====================
200
201/// This `struct` provides a namespace for a suite of functions on
202/// `bslstl::StringRef` references to strings.
204
205 // PUBLIC TYPES
206
207 /// Size type of string references.
209
210 // PUBLIC CLASS DATA
211
212 /// Value used to denote "not-a-position", guaranteed to be outside the
213 /// `range[0 .. INT_MAX]`.
214 static const size_type k_NPOS = ~size_type(0);
215
216 // CLASS METHODS
217 // Comparison
218
219 /// Compare (the referent data of) the specified `lhs` and `rhs`.
220 /// Return `true` if `lhs` and `rhs` are equal up to a case conversion,
221 /// and `false` otherwise. See {Caseless Comparisons}.
222 static bool areEqualCaseless(const bslstl::StringRef& lhs,
223 const bslstl::StringRef& rhs);
224
225 /// Compare (the referent data of) the specified `lhs` and `rhs`.
226 /// Return 1 if, after a conversion to lower case, `lhs` is greater than
227 /// `rhs`, 0 if `lhs` and `rhs` are equal up to a case conversion, and
228 /// -1 otherwise. See {Caseless Comparisons}.
229 static int lowerCaseCmp(const bslstl::StringRef& lhs,
230 const bslstl::StringRef& rhs);
231
232 /// Compare (the referent data of) the specified `lhs` and `rhs`.
233 /// Return 1 if, after a conversion to upper case, `lhs` is greater than
234 /// `rhs`, 0 if `lhs` and `rhs` are equal up to a case conversion, and
235 /// -1 otherwise. See {Caseless Comparisons}.
236 static int upperCaseCmp(const bslstl::StringRef& lhs,
237 const bslstl::StringRef& rhs);
238
239 // Trim
240
241 /// Return a `bslstl::StringRef` object referring to the substring of
242 /// (the referent data of) the specified `string` that excludes all
243 /// leading whitespace. See {Whitespace Character Specification}. If
244 /// `string` consists entirely of whitespace, return a zero-length
245 /// reference to the end of `string` (i.e.,
246 /// `bslstl::StringRef(string.end(), 0)`).
248
249 /// Return a `bslstl::StringRef` object referring to the substring of
250 /// (the referent data of) the specified `string` that excludes all
251 /// trailing whitespace. See {Whitespace Character Specification}. If
252 /// `string` consists entirely of whitespace, return a zero-length
253 /// reference to the beginning of (the referent data of) `string`
254 /// (i.e., `bslstl::StringRef(string.data(), 0)`).
256
257 /// Return a `bslstl::StringRef` object referring to the substring of
258 /// (the referent data of) the specified `string` that excludes all
259 /// leading and trailing whitespace. See {Whitespace Character
260 /// Specification}. If `string` consists entirely of whitespace, return
261 /// a zero-length reference to the beginning of (the referent data of)
262 /// `string` (i.e., `bslstl::StringRef(string.data(), 0)`).
263 static bslstl::StringRef trim(const bslstl::StringRef& string);
264
265 // Create `subString`
266
267 /// Return a string whose value is the substring starting at the
268 /// optionally specified `position` in the specified `string`, of length
269 /// the optionally specified `numChars` or `length() - position`,
270 /// whichever is smaller. If `position` is not specified, 0 is used
271 /// (i.e., the substring is from the beginning of this string). If
272 /// `numChars` is not specified, `k_NPOS` is used (i.e., the entire
273 /// suffix from `position` to the end of the string is returned). The
274 /// behavior is undefined unless `position` is within the string
275 /// boundaries (`0 <= position <= string.length()`).
277 const bslstl::StringRef& string,
278 size_type position = 0,
279 size_type numChars = k_NPOS);
280
281 // Find `subString`
282
283 /// Return a `bslstl::StringRef` object referring to the first
284 /// occurrence in (the referent data of) the specified `string` at which
285 /// (the referent data of) the specified `subString` is found, or
286 /// `bslstl::StringRef()` if there is no such occurrence. If
287 /// `subString` has zero length then a zero-length reference to the
288 /// beginning of `string` is returned (i.e.,
289 /// `bslstl::StringRef(string.data(), 0)`);
291 const bslstl::StringRef& subString);
292
293 /// Return a `bslstl::StringRef` object referring to the first
294 /// occurrence in (the referent data of) the specified `string` at which
295 /// (the referent data of) the specified `subString` is found using
296 /// case-insensitive comparisons, or `bslstl::StringRef()` if there is
297 /// no such occurrence. See {Caseless Comparisons}. If `subString` has
298 /// zero length then a zero-length reference to the beginning of
299 /// `string` is returned (i.e., `bslstl::StringRef(string.data(), 0)`);
301 const bslstl::StringRef& string,
302 const bslstl::StringRef& subString);
303
304 /// Return a `bslstl::StringRef` object referring to the last occurrence
305 /// in (the referent data of) the specified `string` at which (the
306 /// referent data of) the specified `subString` is found, or
307 /// `bslstl::StringRef()` if there is no such occurrence. If
308 /// `subString` has zero length then a zero-length reference to the end
309 /// of `string` is returned (i.e.,
310 /// `bslstl::StringRef(string.end(), 0)`);
312 const bslstl::StringRef& subString);
313
314 /// Return a `bslstl::StringRef` object referring to the last occurrence
315 /// in (the referent data of) the specified `string` at which (the
316 /// referent data of) the specified `subString` is found using
317 /// case-insensitive comparisons, or `bslstl::StringRef()` if there is
318 /// no such occurrence. See {Caseless Comparisons}. If `subString` has
319 /// zero length then a zero-length reference to the end of `string` is
320 /// returned (i.e., `bslstl::StringRef(string.end(), 0)`);
322 const bslstl::StringRef& string,
323 const bslstl::StringRef& subString);
324
325 // Find first/last of/not of
326
327 /// Return the position of the *first* occurrence of a character
328 /// belonging to the specified `characters`, if such an occurrence can
329 /// can be found in the specified `string` (on or *after* the
330 /// optionally specified `position` if such a `position` is specified),
331 /// and return `k_NPOS` otherwise.
332 static
334 const bslstl::StringRef& characters,
335 size_type position = 0u);
336
337 /// Return the position of the *last* occurrence of a character
338 /// belonging to the specified `characters`, if such an occurrence can
339 /// can be found in the specified `string` (on or *before* the
340 /// optionally specified `position` if such a `position` is specified),
341 /// and return `k_NPOS` otherwise.
342 static
344 const bslstl::StringRef& characters,
345 size_type position = k_NPOS);
346
347 /// Return the position of the *first* occurrence of a character *not*
348 /// belonging to the specified `characters`, if such an occurrence can
349 /// be found in the specified `string` (on or *after* the optionally
350 /// specified `position` if such a `position` is specified), and return
351 /// `k_NPOS` otherwise.
352 static
354 const bslstl::StringRef& characters,
355 size_type position = 0u);
356
357 /// Return the position of the *last* occurrence of a character *not*
358 /// belonging to the specified `characters`, if such an occurrence can
359 /// be found in the specified `string` (on or *before* the optionally
360 /// specified `position` if such a `position` is specified), and return
361 /// `k_NPOS` otherwise.
362 static
364 const bslstl::StringRef& characters,
365 size_type position = k_NPOS);
366};
367
368// ============================================================================
369// INLINE DEFINITIONS
370// ============================================================================
371
372 // --------------------
373 // struct StringRefUtil
374 // --------------------
375
376// CLASS METHODS
377
378 // Comparison
379
380inline
382 const bslstl::StringRef& rhs)
383{
384 if (lhs.length() != rhs.length()) {
385 return false; // RETURN
386 }
387
388 return 0 == lowerCaseCmp(lhs, rhs);
389}
390
391 // Trim
392
393inline
395{
396 return ltrim(rtrim(string));
397}
398
399 // Substring
400
401inline
403 size_type position,
404 size_type numChars)
405{
406 BSLS_ASSERT(position <= string.length());
407
408 return bslstl::StringRef(string, position, numChars);
409}
410
411} // close package namespace
412
413
414#endif
415
416// ----------------------------------------------------------------------------
417// Copyright 2016 Bloomberg Finance L.P.
418//
419// Licensed under the Apache License, Version 2.0 (the "License"); you may not
420// use this file except in compliance with the License. You may obtain a copy
421// of the License at
422//
423// http://www.apache.org/licenses/LICENSE-2.0
424//
425// Unless required by applicable law or agreed to in writing, software
426// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
427// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
428// License for the specific language governing permissions and limitations
429// under the License.
430// ----------------------------- END-OF-FILE ----------------------------------
431
432/** @} */
433/** @} */
434/** @} */
Definition bslstl_stringref.h:372
size_type length() const
Definition bslstl_stringref.h:958
std::size_t size_type
Standard Library general container requirements.
Definition bslstl_stringref.h:389
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bdlb_algorithmworkaroundutil.h:74
StringRefImp< char > StringRef
Definition bslstl_stringref.h:699
Definition bdlb_stringrefutil.h:203
static size_type findFirstNotOf(const bslstl::StringRef &string, const bslstl::StringRef &characters, size_type position=0u)
static bslstl::StringRef substr(const bslstl::StringRef &string, size_type position=0, size_type numChars=k_NPOS)
Definition bdlb_stringrefutil.h:402
static size_type findLastNotOf(const bslstl::StringRef &string, const bslstl::StringRef &characters, size_type position=k_NPOS)
bslstl::StringRef::size_type size_type
Size type of string references.
Definition bdlb_stringrefutil.h:208
static size_type findFirstOf(const bslstl::StringRef &string, const bslstl::StringRef &characters, size_type position=0u)
static size_type findLastOf(const bslstl::StringRef &string, const bslstl::StringRef &characters, size_type position=k_NPOS)
static int upperCaseCmp(const bslstl::StringRef &lhs, const bslstl::StringRef &rhs)
static bslstl::StringRef strstr(const bslstl::StringRef &string, const bslstl::StringRef &subString)
static bslstl::StringRef trim(const bslstl::StringRef &string)
Definition bdlb_stringrefutil.h:394
static bool areEqualCaseless(const bslstl::StringRef &lhs, const bslstl::StringRef &rhs)
Definition bdlb_stringrefutil.h:381
static bslstl::StringRef strstrCaseless(const bslstl::StringRef &string, const bslstl::StringRef &subString)
static const size_type k_NPOS
Definition bdlb_stringrefutil.h:214
static bslstl::StringRef rtrim(const bslstl::StringRef &string)
static int lowerCaseCmp(const bslstl::StringRef &lhs, const bslstl::StringRef &rhs)
static bslstl::StringRef ltrim(const bslstl::StringRef &string)
static bslstl::StringRef strrstrCaseless(const bslstl::StringRef &string, const bslstl::StringRef &subString)
static bslstl::StringRef strrstr(const bslstl::StringRef &string, const bslstl::StringRef &subString)