// bdlb_stringrefutil.h                                               -*-C++-*-
#ifndef INCLUDED_BDLB_STRINGREFUTIL
#define INCLUDED_BDLB_STRINGREFUTIL

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide utility functions on 'bslstl::StringRef'-erenced strings.
//
//@DEPRECATED: Use bdlb_stringviewutil instead.
//
//@CLASSES:
//  bdlb::StringRefUtil: namespace for functions on 'bslstl::StringRef' strings
//
//@SEE_ALSO: bdlb_String, bslstl_stringref
//
//@DESCRIPTION: This component defines a utility 'struct',
// 'bdlb::StringRefUtil', that provides a suite of functions that operate on
// 'bslstl::StringRef' references to string data.
//
///Synopsis of 'bslstl::StringRef'
///-------------------------------
// The 'bslstl::StringRef' class provides 'bsl::string'-like access to an array
// of bytes that need not be null terminated and that can have non-ASCII values
// (i.e., '[128 .. 255]').  Although a 'bslstl::StringRef' object can itself be
// changed, it cannot change its referent data (the array of bytes).  The
// lifetime of the referent data must exceed that of all 'bslstl::StringRef'
// objects referring to it.  Equality comparison of 'bslstl::StringRef' objects
// compares the content of the referent data (not whether or not the object
// refer to the same array of bytes).  See {'bslstl_stringref'} for full
// details.
//
///Function Synopsis
///-----------------
// The table below provides an outline of the functions provided by this
// component.
//..
//  Function                   Purpose
//  -------------------------- --------------------------------------------
//    areEqualCaseless(SR, SR) case-insensitive equality comparison
//        lowerCaseCmp(SR, SR) lexical comparison of lower-case conversion
//        upperCaseCmp(SR, SR) lexical comparison of upper-case conversion
//
//                   ltrim(SR) exclude whitespace from left  side  of string
//                   rtrim(SR) exclude whitespace from right side  of string
//                    trim(SR) exclude whitespace from both  sides of string
//
//        substr(SR, pos, num) substring, 'num' characters from 'pos'
//
//  strstr         (SR, SUBSR) find first substring in string
//  strstrCaseless (SR, SUBSR) find first substring in string, case insensitive
//  strrstr        (SR, SUBSR) find last  substring in string
//  strrstrCaseless(SR, SUBSR) find last  substring in string, case insensitive
//..
//
// Since 'bslstl::StringRef' objects know the length of the referent data these
// utility functions can make certain performance improvements over the
// classic, similarly named C language functions.
//
///Character Encoding
///------------------
// These utilities assume ASCII encoding for character data when doing case
// conversions and when determining if a character is in the whitespace
// character set.
//
///Caseless Comparisons
/// - - - - - - - - - -
// Caseless (i.e., case-insensitive) comparisons treat characters in the
// sequence '[a .. z]' as equivalent to the respective characters in the
// sequence '[A .. Z]'.  This equivalence matches that of 'bsl::toupper'.
//
///Whitespace Character Specification
/// - - - - - - - - - - - - - - - - -
// The following characters are classified as "whitespace":
//..
//      Character  Description
//      ---------  ---------------
//      ' '        blank-space
//      '\f'       form-feed
//      '\n'       newline
//      '\r'       carriage return
//      '\t'       horizontal tab
//      '\v'       vertical   tab
//..
// This classification matches that of 'bsl::isspace'.
//
///Usage
///-----
// This section illustrates the intended use of this component.
//
///Example 1: Trimming Whitespace
//- - - - - - - - - - - - - - - -
// Many applications must normalize user input by removing leading and trailing
// whitespace characters to obtain the essential text that is the intended
// input.  Naturally, one would prefer to do this as efficiently as possible.
//
// Suppose the response entered by a user is captured in 'rawInput' below:
//..
//  const char * const rawInput    = "    \t\r\n  Hello, world!    \r\n";
//                                  //1234 5 6 789             1234 5 6
//                                  //            123456789ABCD
//                                  // Note lengths of whitespace and
//                                  // non-whitespace substrings for later.
//..
// First, for this pedagogical example, we copy the contents at 'rawInput' for
// later reference:
//..
//  const bsl::string copyRawInput(rawInput);
//..
// Then, we create a 'bslstl::StringRef' object referring to the raw data.
// Given a single argument of 'const char *', the constructor assumes the data
// is a null-terminated string and implicitly calculates the length for the
// reference:
//..
//  bslstl::StringRef text(rawInput);
//
//  assert(rawInput   == text.data());
//  assert(9 + 13 + 6 == text.length());
//..
// Now, we invoke the 'bdlb::StringRefUtil::trim' method to find the "Hello,
// world!" sequence in 'rawInput'.
//..
//  bslstl::StringRef textOfInterest = bdlb::StringRefUtil::trim(text);
//..
// Finally, we observe the results:
//..
//  assert("Hello, world!"       == textOfInterest); // content comparison
//  assert(13                    == textOfInterest.length());
//
//  assert(text.data()   + 9     == textOfInterest.data());
//  assert(text.length() - 9 - 6 == textOfInterest.length());
//
//  assert(rawInput              == copyRawInput);   // content comparison
//..
// Notice that, as expected, the 'textOfInterest' object refers to the "Hello,
// world!" sub-sequence within the 'rawInput' byte array while the data at
// 'rawInput' remains *unchanged*.

#include <bdlscm_version.h>

#include <bsls_review.h>

#include <bsl_string.h>  // 'bslstl::StringRef'

namespace BloombergLP {
namespace bdlb {
                        // ====================
                        // struct StringRefUtil
                        // ====================

struct StringRefUtil {
    // This 'struct' provides a namespace for a suite of functions on
    // 'bslstl::StringRef' references to strings.

    // PUBLIC TYPES
    typedef bslstl::StringRef::size_type size_type;
        // Size type of string references.

    // PUBLIC CLASS DATA
    static const size_type k_NPOS = ~size_type(0);
        // Value used to denote "not-a-position", guaranteed to be outside the
        // 'range[0 .. INT_MAX]'.

    // CLASS METHODS
                        // Comparison

    static bool areEqualCaseless(const bslstl::StringRef& lhs,
                                 const bslstl::StringRef& rhs);
        // Compare (the referent data of) the specified 'lhs' and 'rhs'.
        // Return 'true' if 'lhs' and 'rhs' are equal up to a case conversion,
        // and 'false' otherwise.  See {Caseless Comparisons}.

    static int lowerCaseCmp(const bslstl::StringRef& lhs,
                            const bslstl::StringRef& rhs);
        // Compare (the referent data of) the specified 'lhs' and 'rhs'.
        // Return 1 if, after a conversion to lower case, 'lhs' is greater than
        // 'rhs', 0 if 'lhs' and 'rhs' are equal up to a case conversion, and
        // -1 otherwise.  See {Caseless Comparisons}.

    static int upperCaseCmp(const bslstl::StringRef& lhs,
                            const bslstl::StringRef& rhs);
        // Compare (the referent data of) the specified 'lhs' and 'rhs'.
        // Return 1 if, after a conversion to upper case, 'lhs' is greater than
        // 'rhs', 0 if 'lhs' and 'rhs' are equal up to a case conversion, and
        // -1 otherwise.  See {Caseless Comparisons}.

                        // Trim

    static bslstl::StringRef ltrim(const bslstl::StringRef& string);
        // Return a 'bslstl::StringRef' object referring to the substring of
        // (the referent data of) the specified 'string' that excludes all
        // leading whitespace.  See {Whitespace Character Specification}.  If
        // 'string' consists entirely of whitespace, return a zero-length
        // reference to the end of 'string' (i.e.,
        // 'bslstl::StringRef(string.end(), 0)').

    static bslstl::StringRef rtrim(const bslstl::StringRef& string);
        // Return a 'bslstl::StringRef' object referring to the substring of
        // (the referent data of) the specified 'string' that excludes all
        // trailing whitespace.  See {Whitespace Character Specification}.  If
        // 'string' consists entirely of whitespace, return a zero-length
        // reference to the beginning of (the referent data of) 'string'
        // (i.e., 'bslstl::StringRef(string.data(), 0)').

    static bslstl::StringRef trim(const bslstl::StringRef& string);
        // Return a 'bslstl::StringRef' object referring to the substring of
        // (the referent data of) the specified 'string' that excludes all
        // leading and trailing whitespace.  See {Whitespace Character
        // Specification}.  If 'string' consists entirely of whitespace, return
        // a zero-length reference to the beginning of (the referent data of)
        // 'string' (i.e., 'bslstl::StringRef(string.data(), 0)').

                        // Create 'subString'

    static bslstl::StringRef substr(
                                  const bslstl::StringRef& string,
                                  size_type                position = 0,
                                  size_type                numChars = k_NPOS);
        // Return a string whose value is the substring starting at the
        // optionally specified 'position' in the specified 'string', of length
        // the optionally specified 'numChars' or 'length() - position',
        // whichever is smaller.  If 'position' is not specified, 0 is used
        // (i.e., the substring is from the beginning of this string).  If
        // 'numChars' is not specified, 'k_NPOS' is used (i.e., the entire
        // suffix from 'position' to the end of the string is returned).  The
        // behavior is undefined unless 'position' is within the string
        // boundaries ('0 <= position <= string.length()').

                        // Find 'subString'

    static bslstl::StringRef strstr(const bslstl::StringRef& string,
                                    const bslstl::StringRef& subString);
        // Return a 'bslstl::StringRef' object referring to the first
        // occurrence in (the referent data of) the specified 'string' at which
        // (the referent data of) the specified 'subString' is found, or
        // 'bslstl::StringRef()' if there is no such occurrence.  If
        // 'subString' has zero length then a zero-length reference to the
        // beginning of 'string' is returned (i.e.,
        // 'bslstl::StringRef(string.data(), 0)');

    static bslstl::StringRef strstrCaseless(
                                           const bslstl::StringRef& string,
                                           const bslstl::StringRef& subString);
        // Return a 'bslstl::StringRef' object referring to the first
        // occurrence in (the referent data of) the specified 'string' at which
        // (the referent data of) the specified 'subString' is found using
        // case-insensitive comparisons, or 'bslstl::StringRef()' if there is
        // no such occurrence.  See {Caseless Comparisons}.  If 'subString' has
        // zero length then a zero-length reference to the beginning of
        // 'string' is returned (i.e., 'bslstl::StringRef(string.data(), 0)');

    static bslstl::StringRef strrstr(const bslstl::StringRef& string,
                                     const bslstl::StringRef& subString);
        // Return a 'bslstl::StringRef' object referring to the last occurrence
        // in (the referent data of) the specified 'string' at which (the
        // referent data of) the specified 'subString' is found, or
        // 'bslstl::StringRef()' if there is no such occurrence.  If
        // 'subString' has zero length then a zero-length reference to the end
        // of 'string' is returned (i.e.,
        // 'bslstl::StringRef(string.end(), 0)');

    static bslstl::StringRef strrstrCaseless(
                                           const bslstl::StringRef& string,
                                           const bslstl::StringRef& subString);
        // Return a 'bslstl::StringRef' object referring to the last occurrence
        // in (the referent data of) the specified 'string' at which (the
        // referent data of) the specified 'subString' is found using
        // case-insensitive comparisons, or 'bslstl::StringRef()' if there is
        // no such occurrence.  See {Caseless Comparisons}.  If 'subString' has
        // zero length then a zero-length reference to the end of 'string' is
        // returned (i.e., 'bslstl::StringRef(string.end(), 0)');

                        // Find first/last of/not of

    static
    size_type findFirstOf(const bslstl::StringRef& string,
                          const bslstl::StringRef& characters,
                          size_type                position = 0u);
        // Return the position of the *first* occurrence of a character
        // belonging to the specified 'characters', if such an occurrence can
        // can be found in the specified 'string' (on or *after* the
        // optionally specified 'position' if such a 'position' is specified),
        // and return 'k_NPOS' otherwise.

    static
    size_type findLastOf(const bslstl::StringRef& string,
                         const bslstl::StringRef& characters,
                         size_type                position = k_NPOS);
        // Return the position of the *last* occurrence of a character
        // belonging to the specified 'characters', if such an occurrence can
        // can be found in the specified 'string' (on or *before* the
        // optionally specified 'position' if such a 'position' is specified),
        // and return 'k_NPOS' otherwise.

    static
    size_type findFirstNotOf(const bslstl::StringRef& string,
                             const bslstl::StringRef& characters,
                             size_type                position = 0u);
        // Return the position of the *first* occurrence of a character *not*
        // belonging to the specified 'characters', if such an occurrence can
        // be found in the specified 'string' (on or *after* the optionally
        // specified 'position' if such a 'position' is specified), and return
        // 'k_NPOS' otherwise.

    static
    size_type findLastNotOf(const bslstl::StringRef& string,
                            const bslstl::StringRef& characters,
                            size_type                position = k_NPOS);
        // Return the position of the *last* occurrence of a character *not*
        // belonging to the specified 'characters', if such an occurrence can
        // be found in the specified 'string' (on or *before* the optionally
        // specified 'position' if such a 'position' is specified), and return
        // 'k_NPOS' otherwise.
};

// ============================================================================
//                        INLINE DEFINITIONS
// ============================================================================

                        // --------------------
                        // struct StringRefUtil
                        // --------------------

// CLASS METHODS

                        // Comparison

inline
bool StringRefUtil::areEqualCaseless(const bslstl::StringRef& lhs,
                                     const bslstl::StringRef& rhs)
{
    if (lhs.length() != rhs.length()) {
        return false;                                                 // RETURN
    }

    return 0 == lowerCaseCmp(lhs, rhs);
}

                        // Trim

inline
bslstl::StringRef StringRefUtil::trim(const bslstl::StringRef& string)
{
    return ltrim(rtrim(string));
}

                        // Substring

inline
bslstl::StringRef StringRefUtil::substr(const bslstl::StringRef& string,
                                        size_type                position,
                                        size_type                numChars)
{
    BSLS_ASSERT(position <= string.length());

    return bslstl::StringRef(string, position, numChars);
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2016 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License.  You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------- END-OF-FILE ----------------------------------