// bsls_bslsourcenameparserutil.h                                     -*-C++-*-
#ifndef INCLUDED_BSLS_BSLSOURCENAMEPARSERUTIL
#define INCLUDED_BSLS_BSLSOURCENAMEPARSERUTIL

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide low-level functions for parsing source file names.
//
//@CLASSES:
//  bsls::BslSourceNameParserUtil: utility namespace to parse source file names
//
//@SEE_ALSO:
//
//@DESCRIPTION: This component provides a namespace
// 'bsls::BslSourceNameParserUtil' for methods to parse source code file names
// (as may be reported by the '__FILE__' macro, or a listing), including
// BDE-style (Lakos-style) component and test driver source file names.  This
// component is not using any C++ library or 'bsl' dependencies.  In other
// words, it uses only some functionality of the C standard library, and C++
// core language features.  This is a necessary limitation because parsing file
// names may be used form the lowest levels of 'bsl' code, especially in test
// drivers.
//
///BDE-Style Source File Names
///---------------------------
// The majority of source files in a BDE-style code base are component header
// and implementation, and their test driver source files.  There are other,
// special, source files names that we describe only briefly.  In this section
// we introduce the basic construct of the component source file names as well
// as the component test driver file names as implemented by this utility.  We
// do not provide complete coverage of all possible BDE-style source file
// naming conventions, for that see John Lakos: Large Scale C++ Volume 1.
//
///Public Component Names
/// - - - - - - - - - - -
// A component consists of two source files sharing the same file name with
// different extensions: '.h' for the header file and '.cpp' for the
// implementation file.  The component file name itself consists of the package
// name the component belongs to (such as 'bsls'), followed by an underscore
// ('_'), followed by the name of the component (such as 'atomicoperations'),
// all lowercase letters, e.g., 'bsls_atomicoperations.h',
// 'bsls_atomicoperations.cpp'.
//
///Subordinate or Package-Internal Component Names
///- - - - - - - - - - - - - - - - - - - - - - - -
// Subordinate components are considered package-internal and, similarly to
// component-private type names, formed by adding further underscore-delimited
// "sections" to the file name (before the extension), such as:
// 'bsls_atomicoperations_default.h', 'bsls_atomicoperations_x64_all_gcc.h',
// 'bsls_assert_macroreset.h', or 'bsls_byteorderutil_impl.h'.
//
// There are two kinds of special subordinate components that are considered
// part of the main component, and they are in separate files only for
// non-structural (technical) reasons.  One such kind is the so-called
// subordinate test component, that will be introduced in the section below.
// The other is generated so-called '_cpp03' components for (mainly) emulating
// variadic templates in C++03.  Such generated subordinate components get
// '_cpp03' appended to their file name, right before the first dot '.' of the
// extension(s).
//
///Test Driver Source File Names
///- - - - - - - - - - - - - - -
// Test driver source file names are normally very simple: the same base name
// as the component, followed by '.t.cpp'.  However some components that
// provide class and functions templates with many template parameters require
// more than one test driver so the test driver files can compile, and compile
// in a reasonable amount of time.  (Most often old, C++03 compilers run out of
// resources or internal limits, but in case of optimized code even new
// compilers may try to use too much memory.)
//
// In the past, special private components were created when a test driver was
// too large to compile.  Those components are called "subordinate test
// (driver) components", and their names were created by having the last
// underscore-delimited "segment" of the name to start with '_test'.
// Optionally, after '_test' there could be a decimal number between 0-9, or
// 00-99.  (Due to legacy code that used text, we also allow alpha characters,
// like '_testconstructors'.)  So for an imaginary
// 'bslstl_unordinarymultikeymultimap.h' and '.cpp' we may have the expected
// 'bslstl_unordinarymultikeymultimap.t.cpp', but also
// 'bslstl_unordinarymultikeymultimap_test1.t.cpp'/'.h'/'.cpp', and so on.
//
// Newer complex components (and components that have been modernized) will not
// have subordinate test drivers, but so-called test driver parts.  Test driver
// part source file names are created by inserting a dot and a decimal number
// (between 0-9 or 00-99), right before the '.t.cpp'.  Our example from the
// previous paragraph would then have several test driver source files:
// 'bslstl_unordinarymultikeymultimap.0.t.cpp',
// 'bslstl_unordinarymultikeymultimap.1.t.cpp', and so on, but only one
// component 'bslstl_unordinarymultikeymultimap.h' and '.cpp'.
//
// Because the components that need many files for their test driver often use
// variadic templates (that need to be emulated in C++03) we often also have:
// 'bslstl_unordinarymultikeymultimap_test1_cpp03.t.cpp'/'.h'/'.cpp', etc., or
// 'bslstl_unordinarymultikeymultimap_cpp03.0.t.cpp' files as well.
//
// For the subordinate test components (including the ones that end with
// '_cpp03') we consider the main component name
// 'bslstl_unordinarymultikeymultimap' to be the component name part, because
// the other components exists only due to compiler technical limitations.
//
///Other Source File Names
///- - - - - - - - - - - -
// Special source files that are not components or component test drivers have
// either a special one letter prefix, a one letter "designator extension", or
// a first segment that is too long to be a package name (more than 6
// characters).  The current parsing logic in this utility does not directly
// recognize such source file names.  The parsing logic makes sure that such
// names will not be successfully parsed in a way that the reported component
// name could match a real component.
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Determining Component Name from Source File Name
///- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Suppose we need to determine the component name from one of its source file
// names.  To ease understanding we use string literals for source file names.
//
// First, we declare the result variables that the parser will fill:
//..
//  const char *componentStart  = 0;
//  size_t      componentLength = 0;
//..
// Next, we call the parser, saving its return value:
//..
//  int returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                        &componentStart,
//                                        &componentLength,
//                                        "groups/abc/abcx/abcx_name_cpp03.h");
//..
//  Now, we verify that the parsing was successful:
//..
//  assert(0 == returnCode);
//..
// Finally, we verify that the expected component name is found:
//..
//  assert(9 == componentLength &&
//         0 == memcmp("abcx_name", componentStart, 9));
//..
// Notice how the "_cpp03" suffix of the generated file has been removed.
//
///Example 2: Determining the Type of a Source File
/// - - - - - - - - - - - - - - - - - - - - - - - -
// Suppose we need to determine, in addition to the component name, what kind
// source file name do we have.  To ease understanding we use string literals
// for source file names.
//
// First, we declare the result variables that the parser will fill:
//..
//  const char *componentStart  = 0;
//  size_t      componentLength = 0;
//  unsigned    sourceType      = ~0u;
//..
// Next, we call the parser with the first name, passing the address of the
// optional output variable after the source file name:
//..
//  int returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                         &componentStart,
//                                         &componentLength,
//                                         "groups/abc/abcx/abcx_name_cpp03.h",
//                                         &sourceType);
//..
// Then, we verify that the parsing was successful, and the expected component
// name is found:
//..
//  assert(0 == returnCode);
//  assert(9 == componentLength &&
//         0 == memcmp("abcx_name", componentStart, 9));
//..
// Next, we verify the determined source file type by examining the "kind",
// stored in the bits masked by 'bsls::BslSourceNameParserUtil::k_MASK_KIND',
// and the flags stored in other bits:
//..
//  typedef bsls::BslSourceNameParserUtil Util;  // For brevity
//
//  assert(Util::k_HEADER == (sourceType & Util::k_MASK_KIND));
//
//  assert(0 == (sourceType & Util::k_IS_MULTIFILE_TEST  ));
//  assert(0 == (sourceType & Util::k_IS_SUBORDINATE_TEST));
//  assert(0 != (sourceType & Util::k_IS_CPP03_GENERATED ));
//..
// Then, we can verify a subordinate test component implementation file name.
// These names, and also headers for subordinate test components are special as
// they are not supposed to contain executable code.  They are just another
// test driver for their main component.
//..
//  returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                      &componentStart,
//                                      &componentLength,
//                                      "groups/abc/abcx/abcx_name_test12.cpp",
//                                      &sourceType);
//
//  assert(0 == returnCode);
//  assert(9 == componentLength &&
//         0 == memcmp("abcx_name", componentStart, 9));
//..
// Note that the main component name is reported.
//..
//  assert(Util::k_IMPL == (sourceType & Util::k_MASK_KIND));
//
//  assert(0 == (sourceType & Util::k_IS_MULTIFILE_TEST  ));
//  assert(0 != (sourceType & Util::k_IS_SUBORDINATE_TEST));
//  assert(0 == (sourceType & Util::k_IS_CPP03_GENERATED ));
//..
// Now, we verify a traditional test driver file name of a subordinate test
// component:
//..
//  returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                    &componentStart,
//                                    &componentLength,
//                                    "groups/abc/abcx/abcx_name_test12.t.cpp",
//                                    &sourceType);
//
//  assert(0 == returnCode);
//  assert(9 == componentLength &&
//         0 == memcmp("abcx_name", componentStart, 9));
//
//  assert(Util::k_TTEST == (sourceType & Util::k_MASK_KIND));
//
//  assert(0 == (sourceType & Util::k_IS_MULTIFILE_TEST  ));
//  assert(0 != (sourceType & Util::k_IS_SUBORDINATE_TEST));
//  assert(0 == (sourceType & Util::k_IS_CPP03_GENERATED ));
//..
// Finally, we verify a multi-file test driver source:
//..
//  returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                                  &componentStart,
//                                                  &componentLength,
//                                                  "wxya_other_cpp03.0.g.cpp",
//                                                  &sourceType);
//
//  assert(0 == returnCode);
//  assert(10 == componentLength &&
//         0 == memcmp("wxya_other", componentStart, 10));
//
//  assert(Util::k_GTEST == (sourceType & Util::k_MASK_KIND));
//
//  assert(0 != (sourceType & Util::k_IS_MULTIFILE_TEST  ));
//  assert(0 == (sourceType & Util::k_IS_SUBORDINATE_TEST));
//  assert(0 != (sourceType & Util::k_IS_CPP03_GENERATED ));
//..
//
///Example 3: Reporting Parsing Errors
///- - - - - - - - - - - - - - - - - -
// Suppose we need to parse source file names from an external source, and
// therefore we may need to report the reason for parsing failures for human
// readers (of log files).  To ease understanding we use string literals for
// source file names.
//
// First, we declare the result variables that the parser will fill:
//..
//  const char *componentStart  = 0;
//  size_t      componentLength = 0;
//..
// Next, we can call the parser with a too short file name and save the return
// value:
//..
//  int returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                                            &componentStart,
//                                                            &componentLength,
//                                                            "a.h");
//..
// Then, we verify that the parsing has failed:
//..
//  assert(0 != returnCode);
//..
// Next, we output a brief error message to the user if requested:
//..
//  if (verbose) {
//      printf("Error parsing source file name \"%s\": %s\n",
//             "a.h",
//             bsls::BslSourceNameParserUtil::errorMessage(returnCode));
//      // Output will indicate the file name was too short (to be a BDE name)
//  }
//..
// Now, we demonstrate another failing-to-parse source name and its error
// message:
//..
//  returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                                          &componentStart,
//                                                          &componentLength,
//                                                          "abcxyz_name.hpp");
//  assert(0 != returnCode);
//  if (verbose) {
//      printf("Error parsing source file name \"%s\": %s\n",
//             "abcxyz_name.hpp",
//             bsls::BslSourceNameParserUtil::errorMessage(returnCode));
//      // Output will indicate an unsupported extension
//  }
//..
// Finally, we demonstrate the "missing test driver tag" error:
//..
//  returnCode =  bsls::BslSourceNameParserUtil::getComponentName(
//                                                         &componentStart,
//                                                         &componentLength,
//                                                         "abcx_name..t.cpp");
//  assert(0 != returnCode);
//  if (verbose) {
//      printf("Error parsing source file name \"%s\": %s\n",
//             "abcx_name..t.cpp",
//             bsls::BslSourceNameParserUtil::errorMessage(returnCode));
//      // Output will indicate two dots next to each other in the file name
//  }
//..

#include <stddef.h>  // 'size_t'

namespace BloombergLP {
namespace bsls {

                      //===============================
                      // struct BslSourceNameParserUtil
                      //===============================

struct BslSourceNameParserUtil {
    // This 'struct' provides a namespace for 'static' utility functions that
    // parse source file names (as may be reported by the '__FILE__' macro),
    // including Lakos-style component source and test driver names.

    // PUBLIC TYPES
    enum SourceTypes {
        // Bit masks and constants that describe the meaning of the 'type_p'
        // parameter of 'getComponentName' below.  Only some bits of the lowest
        // significant word are used at the moment:
        //..
        //  7 6 5 4 3 2 10
        // |r|r|r|3|S|M|KK|
        //
        // K - two bits describing the extension (kind)
        //
        // M - a bit that is set only the file is a test driver source file,
        //     and it is a multi-file test driver that has a "segment" of
        //     (normally) decimal digits, e.g., "abcx_name.14.g.cpp".
        //
        // S - a bit that is set if the file belongs to a subordinate test
        //     driver, a file that has "_test", followed by non-underscore
        //     characters (except if it is also a generated simulation file for
        //     C++03, see below)
        //
        // 3 - a bit that is set if the file name is a generated simulation
        //     file for C++03 (simulates some C++11 features such as variadic
        //     templates up to a certain number of parameters etc)
        //
        // r - all other bits are reserved for future use
        //..
        // For example "abcx_name_testq_cpp03.g.cpp" will be

        k_MASK_KIND = 0x3,
        k_MASK_TEST = 0x2,

        k_HEADER    = 0x0,  // .h
        k_IMPL      = 0x1,  // .cpp
        k_TTEST     = 0x2,  // .t.cpp -- traditional test driver
        k_GTEST     = 0x3,  // .g.cpp -- Google test test driver

        k_IS_MULTIFILE_TEST   = 0x4,  // "[^a-z0-9]+.(t.cpp|g.cpp)"

        k_IS_SUBORDINATE_TEST = 0x8,  // "_test[^a-z0-9]*.(h|cpp|t.cpp|g.cpp)"

        k_IS_CPP03_GENERATED  = 0x10  // "_cpp03" at the very end, before exts
    };

    // CLASS METHODS

                               // Parsing

    static int getComponentName(const char **componentNamePtr,
                                size_t      *componentNameLength,
                                const char  *sourceName,
                                unsigned    *type_p = 0);
        // Parse the specified Lakos-style 'sourceName' source file name with
        // optional path portion to find the component name part.  Return zero
        // on success and a non-zero value if parsing failed.  In case of
        // success, fill the specified 'componentNamePtr' with a pointer to the
        // first character, and the specified 'componentNameLength' with the
        // number of character of the component name found.  Optionally specify
        // 'type_p'.  When 'type_p' is not 0 set the bits of the pointed
        // 'unsigned', according to 'SourceTypes', that describe the type of
        // the source file that was parsed.
        //
        // This function does not validate its input, it assumes that it is a
        // valid Lakos-style component source or test driver file name, or one
        // of the special names defined by John Lakos: Large Scale C++ Design
        // (application, adapter, etc).  If 'SourceName' is not as such, the
        // function may return a non-zero error value, or it may report success
        // with its output is unspecified.
        //
        // Subordinate test component sources are special, as they should not
        // contain code, only their test drivers.  The component name reported
        // for subordinate test drivers is the main component name.
        //
        // Use the 'errorMessage' function (in this utility) to get a static,
        // brief English textual description of a negative return value.

                             // Miscellaneous

    static const char *errorMessage(int errorCode);
        // Return a static, brief English error message that describes the
        // specified negative parsing 'errorCode'.  The behavior is undefined
        // unless  'errorCode < 0' and was returned by one of the parsing
        // methods (of this utility) that states in its contract to use this
        // method to get the description of an error code.
};

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2022 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------