// bdls_pathutil.h                                                    -*-C++-*-

// ----------------------------------------------------------------------------
//                                   NOTICE
//
// This component is not up to date with current BDE coding standards, and
// should not be used as an example for new development.
// ----------------------------------------------------------------------------

#ifndef INCLUDED_BDLS_PATHUTIL
#define INCLUDED_BDLS_PATHUTIL

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide portable file path manipulation.
//
//@CLASSES:
//  bdls::PathUtil: Portable utility methods for manipulating paths
//
//@SEE_ALSO: bdls_filesystemutil
//
//@DESCRIPTION: This component provides utility methods for manipulating
// strings that represent paths in the filesystem.  Class methods of
// 'bdls::PathUtil' include platform-independent operations to add or remove
// filenames or relative paths at the end of a path string (by "filenames" we
// are referring to the names of any filesystem item, including regular files
// and directories).  There are also methods to parse the path to delimit the
// "root" as defined for the current platform; see {Parsing and Performance
// ('rootEnd' argument)} below.
//
// Paths that have a root are called *absolute* paths, whereas paths that do
// not have a root are *relative* paths.
//
// Note that this component does not perform filesystem operations.  In
// particular, no effort is made to verify the existence or accessibility of
// any segment of any path.
//
///Terminology
///-----------
// To introduce the terminology explored in this section, lets start with a
// Unix example:
//..
//  "/foo/bar/myfile.txt"
//..
// The elements of this path would be:
//..
//            Path: "/foo/bar/myfile.txt"
//            Root: "/"                       # the starting separator(s)
//  Leaf(Basename): "myfile.txt"
//       Extension: ".txt"
//         Dirname: "/foo/bar/"
//..
//
///Separator
///- - - - -
// A platform dependent character that separates elements of a path, such as
// directory names from each other and file names.  The separator character is
// the '/' (slash) on Unix (and the like) systems and '\' (backslash) on
// Windows systems.
//
///Path
/// - -
// An optional root, followed by optional directories, followed by an optional
// filename.
//
///Root
/// - -
// The root, if present, is at the beginning of a path and its presence
// determines if a path is absolute (the root is present) or relative (the root
// is not present).  The textual rules for what a root is are platform
// dependent.  See {Unix Root} and {Windows Root}.
//
// See also {Parsing and Performance ('rootEnd' argument)} for important notes
// about speeding up functions (especially on Windows) by not reparsing roots
// every time a function is called.
//
///Unix Root
///  -  -  -
// The Unix root consists of the separator characters at the beginning of a
// path, so the root of "/one" is "/", the root of "//two" is "//", while the
// root of "somefile" is "" (there is no root, relative path).
//
///Windows Root
///  -  -  -  -
// The Windows root is much more complicated than the Unix root, because
// Windows has three different flavors of paths: local (LFS), UNC, and Long UNC
// UNC (LUNC):
//
//: LFS: root consists of a drive letter followed by a colon (the name part)
//:      and then zero or more separators (the directory part).  E.g.,
//:      "c:\hello.txt", root is "c:\"; "c:tmp" root is "c:"
//:
//: UNC: root consists of two separators followed by a hostname and
//:      separator (the name part), and then a shared folder followed by one
//:      or more separators (the directory part).  e.g.,
//:      "\\servername\sharefolder\output\test.t" root is
//:      "\\servername\sharefolder\"
//:
//: LUNC: root starts with "\\?\".  Then follows either "UNC" followed by
//:       a UNC root, or an LFS root.  The "\\?\" is included as part of
//:       the root name.  e.g.,
//:      "\\?\UNC\servername\folder\hello" root is "\\?\UNC\servername\dir\"
//:      while "\\?\c:\windows\test" root is "\\?\\c:\"
//
///Leaf (a.k.a. Basename)
/// - - - - - - - - - - -
// The leaf is the rightmost name following the root, in other words: the last
// element of the path.  Note that several methods in this utility require a
// leaf to be present to function (such as 'getDirname').  Note that a relative
// path may contain a leaf only.  Examples:
//..
//  Path                            Leaf
//  ----                            ----
//  "/tmp/foo/bar.txt"              "bar.txt"
//  "c:\tmp\foo\bar.txt"            "bar.txt"
//  "\\server\share\tmp\foo.txt"    "foo.txt"
//  "/tmp/foo/"                     "foo"
//  "/tmp/"                         "tmp"
//  "/"                             Not Present
//..
//
///Extension
///- - - - - -
// An extension is a suffix of a leaf that begins with a dot and that does
// not contain additional dots. There are a few caveats. The special leaf
// names "." and ".." are considered to not have extensions. Furthermore,
// if a leaf's name begins with a dot, such dot is not considered when
// determining the extension. For example, the leaf ".bashrc" does not have
// an extension, but ".bbprofile.log" does, and its extension is ".log".
// We will say that a path has an extension if it has a leaf and its leaf
// has an extension. Note that for consistency reasons, our implementation
// differs from other standard implementations in the same way 'getLeaf'
// does: the path "/foo/bar.txt/" is considered to have an extension and
// its extension is ".txt". Examples:
//..
//  Path                            Extension
//  ----                            -------
//  "/tmp/foo/bar.txt"              ".txt"
//  "/tmp/foo/bar"                  Not Present
//  "/tmp/foo/bar.longextension"    ".longextension"
//  "/a/b.txt/"                     ".txt"
//  "/a/b.txt/."                    Not present
//  "/a.txt/b.txt/.."               Not present
//  "/a/.profile"                   Not present
//  "/a/.profile.backup"            ".backup"
//  "foo.txt"                       ".txt"
//..
//
///Dirname
///- - - -
// Dirname is the part of the path that contains the root but not the leaf.
// Note that the 'getDirname' utility method requires a leaf to be present to
// function.  Examples:
//..
//  Path                            Dirname
//  ----                            -------
//  "/tmp/foo/bar.txt"              "/tmp/foo/"
//  "c:\tmp\foo\bar.txt"            "c:\tmp\foo\"
//  "\\server\share\tmp\foo.txt"    "\\server\share\tmp\"
//  "/tmp/foo/"                     "/tmp"
//  "/tmp/"                         "/"
//  "/"                             no leaf -> error
//  "foo.txt"                       empty
//..
//
///Parsing and Performance ('rootEnd' argument)
///--------------------------------------------
// Most methods of this component will perform basic parsing of the beginning
// part of the path to determine what part of it is the "root" as defined for
// the current platform.  This parsing is trivial on Unix platforms but is
// slightly more involved for the Windows operating system.  To accommodate
// client code which is willing to store parsing results in order to maximize
// performance, all methods which parse the "root" of the path accept an
// optional argument delimiting the "root"; if this argument is specified,
// parsing is skipped.
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Basic Syntax
///- - - - - - - - - - - -
// We start with strings representing an absolute native path and a relative
// native path, respectively:
//..
//  #ifdef BSLS_PLATFORM_OS_WINDOWS
//  bsl::string tempPath  = "c:\\windows\\temp";
//  bsl::string otherPath = "22jan08\\log.txt";
//  #else
//  bsl::string tempPath  = "/var/tmp";
//  bsl::string otherPath = "22jan08/log.txt";
//  #endif
//..
// 'tempPath' is an absolute path, since it has a root.  It also has a leaf
// element ("temp"):
//..
//  assert(false == bdls::PathUtil::isRelative(tempPath));
//  assert(true  == bdls::PathUtil::isAbsolute(tempPath));
//  assert(true  == bdls::PathUtil::hasLeaf(tempPath));
//..
// We can add filenames to the path one at a time, or we can add another path
// if is relative.  We can also remove filenames from the end of the path one
// at a time:
//..
//  bdls::PathUtil::appendRaw(&tempPath, "myApp");
//  bdls::PathUtil::appendRaw(&tempPath, "logs");
//
//  assert(true == bdls::PathUtil::isRelative(otherPath));
//  assert(0    == bdls::PathUtil::appendIfValid(&tempPath, otherPath));
//  assert(true == bdls::PathUtil::hasLeaf(tempPath));
//
//  bdls::PathUtil::popLeaf(&tempPath);
//  bdls::PathUtil::appendRaw(&tempPath, "log2.txt");
//
//  #ifdef BSLS_PLATFORM_OS_WINDOWS
//  assert("c:\\windows\\temp\\myApp\\logs\\22jan08\\log2.txt" == tempPath);
//  #else
//  assert("/var/tmp/myApp/logs/22jan08/log2.txt"              == tempPath);
//  #endif
//..
// A relative path may be appended to any other path, even itself.  An absolute
// path may not be appended to any path, or undefined behavior will result:
//..
//  assert(0 == bdls::PathUtil::appendIfValid(&otherPath, otherPath));  // OK
//  /* bdls::PathUtil::append(&otherPath, tempPath); */ // UNDEFINED BEHAVIOR!
//..
// Note that there is no attempt to distinguish filenames that are regular
// files from filenames that are directories, or to verify the existence of
// paths in the filesystem.
//..
//  #ifdef BSLS_PLATFORM_OS_WINDOWS
//  assert("c:\\windows\\temp\\myApp\\logs\\22jan08\\log2.txt" == tempPath);
//  #else
//  assert("/var/tmp/myApp/logs/22jan08/log2.txt"              == tempPath);
//  #endif
//..
//
///Example 2: Parsing a path using 'splitFilename'
///- - - - - - - - - - - - - - - - - - - - - - - -
// Suppose we need to obtain all filenames from the path.
//
// First, we create a path for splitting and a storage for filenames:
//..
//  #ifdef BSLS_PLATFORM_OS_WINDOWS
//  const char                     *splitPath = "c:\\one\\two\\three\\four";
//  #else
//  const char                     *splitPath = "//one/two/three/four";
//  #endif
//  bsl::vector<bsl::string_view>  filenames;
//..
// Then, we run a cycle to sever filenames from the end one by one:
//..
//  bsl::string_view head;
//  bsl::string_view tail;
//  bsl::string_view path(splitPath);
//
//  do {
//      bdls::PathUtil::splitFilename(&head, &tail, path);
//      filenames.push_back(tail);
//      path = head;
//  } while (!tail.empty());
//..
// Now, verify the resulting values:
//..
//  assert(5           == filenames.size());
//
//  assert("four"      == filenames[0]);
//  assert("three"     == filenames[1]);
//  assert("two"       == filenames[2]);
//  assert("one"       == filenames[3]);
//  assert(""          == filenames[4]);
//..
// Finally, make sure that only the root remains of the original value:
//..
//  #ifdef BSLS_PLATFORM_OS_WINDOWS
//  assert("c:\\"      == head);
//  #else
//  assert("//"        == head);
//  #endif
//..

#include <bdlscm_version.h>

#include <bsls_assert.h>
#include <bsls_libraryfeatures.h>
#include <bsls_platform.h>
#include <bsls_review.h>

#include <bsl_string.h>
#include <bsl_string_view.h>

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
#include <memory_resource>  // 'std::pmr::polymorphic_allocator'
#endif

#include <string>           // 'std::string', 'std::pmr::string'

namespace BloombergLP {

namespace bdls {
                              // ===============
                              // struct PathUtil
                              // ===============

struct PathUtil {
    // This struct contains utility methods for platform-independent
    // manipulation of filesystem paths.  No method of this struct provides any
    // filesystem operations or accesses the filesystem as part of its
    // implementation.

    // CLASS METHODS
    static int appendIfValid(bsl::string             *path,
                             const bsl::string_view&  filename);
    static int appendIfValid(std::string             *path,
                             const bsl::string_view&  filename);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int appendIfValid(std::pmr::string        *path,
                             const bsl::string_view&  filename);
#endif
        // Append the specified 'filename' to the end of the specified 'path'
        // if 'filename' represents a relative path.  Return 0 on success, and
        // a non-zero value otherwise.  Note that any filesystem separator
        // characters at the end of 'filename' or 'path' will be discarded.
        // See {Terminology} for the definition of separator.

    static void appendRaw(bsl::string *path,
                          const char  *filename,
                          int          length  = -1,
                          int          rootEnd = -1);
    static void appendRaw(std::string *path,
                          const char  *filename,
                          int          length  = -1,
                          int          rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static void appendRaw(std::pmr::string *path,
                          const char       *filename,
                          int               length  = -1,
                          int               rootEnd = -1);
#endif
        // Append the specified 'filename' up to the optionally specified
        // 'length' to the end of the specified 'path'.  If 'length' is
        // negative, append the entire string.  If the optionally specified
        // 'rootEnd' offset is non-negative, it is taken as the position in
        // 'path' of the character following the root.  The behavior is
        // undefined if 'filename' represents an absolute path or if either
        // 'filename' or 'path' ends with the filesystem separator character.
        // The behavior is also undefined if 'filename' points to any part of
        // 'path' (i.e., 'filename' may not be an alias for 'path').  See
        // {Parsing and Performance ('rootEnd' argument)}.

    static int popLeaf(bsl::string *path, int rootEnd = -1);
    static int popLeaf(std::string *path, int rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int popLeaf(std::pmr::string *path, int rootEnd = -1);
#endif
        // Remove from the specified 'path' the rightmost filename following
        // the root; that is, remove the leaf element.  If the optionally
        // specified 'rootEnd' offset is non-negative, it is taken as the
        // position in 'path' of the character following the root.  Return 0 on
        // success, and a nonzero value otherwise; in particular, return a
        // nonzero value if 'path' does not have a leaf.  See {Parsing and
        // Performance ('rootEnd' argument)}.  See also {Terminology} for the
        // definition of leaf and root.

    static int getBasename(bsl::string             *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
    static int getBasename(std::string             *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int getBasename(std::pmr::string        *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
#endif
        // Load into the specified 'leaf' the value of the rightmost name in
        // the specified 'path' that follows the root; that is, the leaf
        // element.  If the optionally specified 'rootEnd' offset is
        // non-negative, it is taken as the position in 'path' of the character
        // following the root.  Return 0 on success, and a non-zero value
        // otherwise; in particular, return nonzero if 'path' does not have a
        // leaf.  Note that 'getBasename' is a synonym for 'getLeaf'.  See
        // {Parsing and Performance ('rootEnd' argument)}.  See also
        // {Terminology} for the definition of leaf and root.

    static int getDirname(bsl::string             *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
    static int getDirname(std::string             *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int getDirname(std::pmr::string        *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
#endif
        // Load into the specified 'dirname' the value of the directory part of
        // the specified 'path', that is, the root if it exists and all the
        // filenames except the last one (the leaf).  If the optionally
        // specified 'rootEnd' offset is non-negative, it is taken as the
        // position in 'path' of the character following the root.  Return 0 on
        // success, and a non-zero value otherwise; in particular, return a
        // nonzero value if 'path' does not have a leaf.  Note that in the case
        // of a relative path with a single filename, the function will succeed
        // and 'dirname' will be the empty string.  See {Parsing and
        // Performance ('rootEnd' argument)}.  See also {Terminology} for the
        // definition of directories and root.

    static int getLeaf(bsl::string             *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
    static int getLeaf(std::string             *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int getLeaf(std::pmr::string        *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#endif
        // Load into the specified 'leaf' the value of the rightmost name in
        // the specified 'path' that follows the root; that is, the leaf
        // element.  If the optionally specified 'rootEnd' offset is
        // non-negative, it is taken as the position in 'path' of the character
        // following the root.  Return 0 on success, and a non-zero value
        // otherwise; in particular, return nonzero if 'path' does not have a
        // leaf.  Note that 'getBasename' is a synonym for 'getLeaf'.  See
        // {Parsing and Performance ('rootEn'd argument)}.  See also
        // {Terminology} for the definition of leaf and root.

    static int getExtension(bsl::string             *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
    static int getExtension(std::string             *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int getExtension(std::pmr::string        *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
#endif
        // Load into the specified 'extension' the extension of 'path'.  If the
        // optionally specified 'rootEnd' offset is non-negative, it is taken
        // as the position in 'path' of the character following the root.
        // Return 0 if the path has an extension, and a non-zero value
        // otherwise. See {Parsing and Performance ('rootEnd' argument)}.  See
        // also {Terminology} for the definitions of extension and root.

    static int getRoot(bsl::string             *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
    static int getRoot(std::string             *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
    static int getRoot(std::pmr::string        *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#endif
        // Load into the specified 'root' the value of the root part of the
        // specified 'path'.  If the optionally specified 'rootEnd' offset is
        // non-negative, it is taken as the position in 'path' of the character
        // following the root.  Return 0 on success, and a non-zero value
        // otherwise; in particular, return a nonzero value if 'path' is
        // relative.  Note that the meaning of the root part is
        // platform-dependent.  See {Parsing and Performance ('rootEnd'
        // argument)}.  See also {Terminology} for the definition of root.

    static void splitFilename(bsl::string_view        *head,
                              bsl::string_view        *tail,
                              const bsl::string_view&  path,
                              int                      rootEnd = -1);
        // Load the last pathname component from the specified 'path' into the
        // specified 'tail' and everything leading up to that to the specified
        // 'head'.  If the optionally specified 'rootEnd' offset is
        // non-negative, it is taken as the position in 'path' of the character
        // following the root.  The 'tail' part never contains a slash; if
        // 'path' ends in a slash, 'tail' is empty.  If there is no slash in
        // 'path', 'head' is empty.  If 'path' is empty, both 'head' and 'tail'
        // are empty.  Trailing slashes are stripped from 'head' unless it is
        // the root.
        //..
        //  +------------------+------------+---------+
        //  |      PATH        |    HEAD    |   TAIL  |
        //  +==================+============+=========+
        //  | "one"            | ""         | "one"   |
        //  +------------------+------------+---------+
        //  | "/one/two/three" | "/one/two" | "three" |
        //  +------------------+------------+---------+
        //  | "//one/two///"   | "/one/two" | ""      |
        //  +------------------+------------+---------+
        //  | "c:\\one\\two"   | "c:\\one"  | "two"   |
        //  +------------------+------------+---------+
        //..
        // See {'Terminology'} for the definition of root.  The behavior is
        // undefined unless 'head != tail' and 'INT_MAX >= path.length()'.
        // Note that 'head' or 'tail' may point to the 'path' object when the
        // method is called.

    static bool isAbsolute(const bsl::string_view& path, int rootEnd = -1);
        // Return 'true' if the specified 'path' is absolute (has a root), and
        // 'false' otherwise.  If the optionally specified 'rootEnd' offset is
        // non-negative, it is taken as the position in 'path' of the character
        // following the root.  See {Parsing and Performance ('rootEnd'
        // argument)}.  See also {Terminology} for the definition of root.

    static bool isRelative(const bsl::string_view& path, int rootEnd = -1);
        // Return 'true' if the specified 'path' is relative (lacks a root),
        // and 'false' otherwise.  If the optionally specified 'rootEnd' offset
        // is non-negative, it is taken as the position in 'path' of the
        // character following the root.  See {Parsing and Performance
        // ('rootEnd' argument)}.  See also {Terminology} for the definition of
        // root.

    static bool hasLeaf(const bsl::string_view& path, int rootEnd = -1);
        // Return 'true' if the specified 'path' has a filename following the
        // root, and 'false' otherwise.  If the optionally specified 'rootEnd'
        // offset is non-negative, it is taken as the position in 'path' of the
        // character following the root.  See {Parsing and Performance
        // ('rootEnd' argument)}.  See also {Terminology} for the definition of
        // leaf.

    static int getRootEnd(const bsl::string_view& path);
        // Return the 0-based position in the specified 'path' of the character
        // following the root.  Note that a return value of 0 indicates a
        // relative path.  See {Parsing and Performance ('rootEnd' argument)}.
        // See also {Terminology} for the definition of root.
};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                               // --------------
                               // class PathUtil
                               // --------------

// CLASS METHODS
inline
int PathUtil::getBasename(bsl::string              *leaf,
                          const bsl::string_view&  path,
                          int                       rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}

inline
int PathUtil::getBasename(std::string              *leaf,
                          const bsl::string_view&  path,
                          int                       rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR
inline
int PathUtil::getBasename(std::pmr::string        *leaf,
                          const bsl::string_view&  path,
                          int                      rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}
#endif

}  // close package namespace

}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------