// bdlb_hashutil.h                                                    -*-C++-*-

// ----------------------------------------------------------------------------
//                                   NOTICE
//
// This component is not up to date with current BDE coding standards, and
// should not be used as an example for new development.
// ----------------------------------------------------------------------------

#ifndef INCLUDED_BDLB_HASHUTIL
#define INCLUDED_BDLB_HASHUTIL

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide a utility of hash functions.
//
//@CLASSES:
//  bdlb::HashUtil: utility for hash functions
//
//@SEE_ALSO: bdlc_hashtable2, bdeimp_inthash, bdeimp_strhash, bdlde_crc32
//
//@DESCRIPTION: This component defines a utility 'struct', 'bdlb::HashUtil',
// that provides two hash functions, 'hash1' and 'hash2'.  These hash functions
// have a good funneling property, which can be formalized as documented below
// (see {Definitions and Formal Statement of No-Funneling Property}).  In
// short, these hashes have the property that even if two inputs differ in only
// a few bits, they have a negligible probability of producing collisions.  In
// addition, these two hash functions produce full 32-bit results and allow
// hash table sizes to be a power of two.  They are fast and reliable.  They
// should work equally well on all types of keys.
//
// The two hash functions 'hash1' and 'hash2' have similar characteristics.
// 'hash2' is a bit faster than 'hash1', especially for longer keys; it also
// does not produce many collisions as determined experimentally for the key
// types given in the example below.  On the other hand, it does not have the
// formal no-funnel guarantees of 'hash1' which is reasonably fast and provably
// achieves low collision probability.
//
// Furthermore, this component provides a third hash function 'hash0' that
// performs a simple hash by taking the modulus of a user specified size on the
// input key.  This hash function has a poor distribution, as the function
// exhibits a clustering effect when the input keys only differ slightly.
// However, 'hash0' is about 6 to 8 times faster than 'hash1' and 'hash2'.
//
// In general, use 'hash0' when speed matters, and 'hash1' or 'hash2' when a
// more thorough hash is needed (e.g., keys exhibit a pattern that results in
// many collision when using 'bdeimp', or cost of collision is high but hashing
// is not a bottleneck).
//
///Hashing Fundamental Types
///-------------------------
// In order to facilitate hashing user-defined types (see next section), this
// component supports hashing fundamental types in a manner that is both
// seemingly random (i.e., the hash is good) but predictable (i.e., identical
// on all platforms, irrespective of endianness).  Note that the following
// expressions for some 'key' of a fundamental integral type:
//..
//  bdlb::HashUtil::hash1((const char *)&key, sizeof(key));  // not recommended
//  bdlb::HashUtil::hash2((const char *)&key, sizeof(key));  // not recommended
//..
// return values that differ on big- and little-endian platforms if the key
// size is more than one byte.  Instead, the recommended way to hash this 'key'
// is simply to:
//..
//  bdlb::HashUtil::hash1(key);
//  bdlb::HashUtil::hash2(key);
//..
// This works if 'key' is of one of the following fundamental types:
//..
//  char, signed char, unsigned char,
//  short, unsigned short,
//  int, unsigned int,
//  long, unsigned long,
//  bsls::Types::Int64, bsls::Types::Uint64,
//  float, double,
//  const void *
//..
//
///Hashing User-defined Types
///--------------------------
// This component can be used for hashing a user-defined type as well.  The
// recommended way to do this is to provide a 'hash' class method for the type,
// that takes an instance of the class and a size.  For instance, let us
// consider the following class (the meaning and implementation are irrelevant
// here) with the following private data members:
//..
//  class UserDefinedTickerType {
//      // This class is an aggregate.
//
//      enum { CUSIP_LENGTH = 10 };
//
//      // PRIVATE DATA MEMBERS
//      bool      d_isConstant;          // not recommended, because of padding
//      char      d_cusip[CUSIP_LENGTH];
//      int       d_value;
//..
// A hash class method can be provided and documented as follows:
//..
//    public:
//      // CLASS METHODS
//      static int hash(const UserDefinedTickerType& object, int size);
//          // Return a hash value calculated from the specified 'object' using
//          // the specified 'size' as the number of slots.  The hash value
//          // is guaranteed to be in the range [0, size).
//
//      // Rest of the class definition omitted...
//  };
//..
// The implementation of 'hash' can be very simple, depending on the semantics
// of the class.  Here we assume that 'd_isConstant' is an implementation
// detail and that data members 'd_cusip' and 'd_value' should participate in
// the hash and that the character string should be hashed by value.  The
// implementation follows:
//..
//  int UserDefinedTickerType::hash(const UserDefinedTickerType& object,
//                                  int                          size)
//  {
//      return (bdlb::HashUtil::hash1(object.d_cusip, CUSIP_LENGTH) +
//                               bdlb::HashUtil::hash1(object.d_value)) % size;
//  }
//..
// Note that more intricate combinations of the individual hashes in the
// aggregate can be appropriate if the individual members in the aggregate my
// exhibit a pattern.
//
// CAVEAT: When hashing user-defined classes, because of the potential padding
// in the class footprint, the simple-minded hashes
//..
//  const UserDefinedTickerType& key;
//  bdlb::HashUtil::hash1((const char *)&key, sizeof(key)); // wrong, because
//  bdlb::HashUtil::hash2((const char *)&key, sizeof(key)); // of padding
//..
// may actually *not* produce the same hash values for copies of the same
// object.
//
///Definitions and Formal Statement of No-Funneling Property
///---------------------------------------------------------
// Let us say a given bit in the input affects a given bit in the output (which
// has 'v' bits) if two keys differing only at that input bit will differ at
// the given output bit about half the time.  We define a hash to be a
// funneling hash if there is some subset of size 't' of all the input bits
// that can affect only a subset of size 'u' of bits in the output, and 't > u'
// and 'v > u', and we say that the hash function has a funnel of those 't'
// input bits into those 'u' bits.  In that case, then 'u' of those 't' bits
// can cancel out the effects of the other 't - u', and so the set of keys
// differing only in the input bits of the funnel can produce no more than half
// that number of hash values.  (Those '2^t' keys can produce no more than
// '2^u' out of '2^v' hash values.)  Differing in only a few bits is a common
// pattern in human and computer keys, so a funneling hash is seriously flawed.
//
// This component offers a hash function ('hash1') that has no funnels, except
// for a funnel of 32 bits into 31 bits, which is a negligible vulnerability.
// The 'hash2' does not have known no-funneling properties, but operates on the
// same principle and is just as good (and faster) in experiments with chaining
// as in the usage below.  In addition, these two hash functions produce full
// 32-bit results and allow hash table sizes to be a power of two.
//
// For further details and references, the interested reader is invited to
// consult 'http://burtleburtle.net/bob/hash/evahash.html'.
//
///Usage
///-----
// We illustrate the usage of this component by some experiments (code and
// results) on the number of collision expected for various usage.  For
// returning the results of an experiment, we will use the following structure:
//..
//  struct ExperimentalResult {
//
//      // DATA MEMBERS
//      int    d_max;     // maximum length of a chain
//      double d_average; // average length of a chain
//      double d_sigma;   // standard deviation
//
//      // CREATORS
//      explicit
//      ExperimentalResult(int max = 0, double avg = 0, double sigma = 0)
//          // Create an experimental result reporting the optionally specified
//          // 'max', 'avg', and 'sigma' values.
//      : d_max(max), d_average(avg), d_sigma(sigma)
//      {}
//  };
//..
// For generating the data, we use a parameterized 'GENERATOR' of which we give
// three implementations below (sequential integers, character strings, and
// multiple-field keys).  This template parameter needs to validate the
// following syntactic expressions, which we call in this usage example our
// 'GeneratorConcept'.  In those requirements, 'generator' is an instance of
// 'GENERATOR'.
//..
//  const char *data   = generator.data();   // address of storage of current
//                                           // value
//  int         length = generator.length(); // length of storage of current
//                                           // value
//
//  generator.next();                        // advance to next value
//..
//
///Example 1: Using Chaining
///- - - - - - - - - - - - -
// The following code will check the number of collisions for a chaining-based
// hash table given a certain distribution of keys:
//..
//  template <class GENERATOR>
//  ExperimentalResult computeChainingCollisions(int       numElements,
//                                               int       size,
//                                               GENERATOR input)
//      // Simulate insertion of the specified 'numElements' generated by the
//      // specified 'input' generator into a hash table of the specified
//      // 'size' using chaining as the mechanism to resolve collisions.
//      // Return the results (maximum and average length of a chain, with
//      // standard deviation).  'GENERATOR' must be a model of the
//      // 'GeneratorConcept'.  For the good functioning of this function, it
//      // is required that 'input' never be default nor repeat itself.
//  {
//      bsl::vector<int> table(size);   // all counters are init'ed to 0
//
//      for (int i = 0; i < numElements; ++i, input.next()) {
//          unsigned int hash = Util::hash1(input.data(),
//                                                   input.length());
//          ++table[hash % size];
//      }
//
//      double avgLength    = 0; // average number of collisions
//      double sigmaLength  = 0; // standard deviation from average
//      int    maxLength    = 0; // maximum length of a chain
//
//      for (int k = 0; k < size; ++k) {
//          avgLength   += table[k];
//          sigmaLength += table[k] * table[k];
//          maxLength    = bsl::max(maxLength, table[k]);
//      }
//      avgLength   /= size;
//      sigmaLength  = bsl::sqrt(sigmaLength / size - avgLength * avgLength);
//
//      return ExperimentalResult(maxLength, avgLength, sigmaLength);
//  }
//..
//
///Example 2: Using Double-Hashing
///- - - - - - - - - - - - - - - -
// The following code will check the number of collisions for a double-hashing
// based hash table (such as 'bdlc_hashtable2') given a certain distribution
// of keys:
//..
//  template <class GENERATOR>
//  ExperimentalResult computeDoubleHashingCollisions(int       numElements,
//                                                    int       size,
//                                                    GENERATOR input)
//      // Simulate insertion of the specified 'numElements' generated by the
//      // specified 'input' generator into a hash table of the specified
//      // 'size' using double hashing as the mechanism to resolve collisions.
//      // Return the results (maximum and average length of a chain, with
//      // standard deviation).  'GENERATOR' must be a model of the
//      // 'GeneratorConcept'.  For the good functioning of this function, it
//      // is required that 'input' never be default nor repeat itself.
//  {
//      typedef typename GENERATOR::ResultType  ResultType;
//      bsl::vector<ResultType> table(size); // all counters are default-ct'ed
//
//      double avgLength    = 0; // average number of collisions
//      double sigmaLength  = 0; // standard deviation from average
//      int    maxLength    = 0; // maximum length of a chain
//
//      for (int i = 0; i < numElements; ++i, input.next()) {
//          unsigned int hash1 = Util::hash1(input.data(),
//                                                    input.length());
//
//          int chainLength = 0;
//          int bucket      = hash1 % size;
//          if (ResultType() == table[bucket]) {
//              table[bucket] = input.current();
//          } else {
//              unsigned int hash2 = Util::hash2(input.data(),
//                                                        input.length());
//              hash2 = 1 + hash2 % (size - 1);
//
//              while (++chainLength < size) {
//                  bucket = (bucket + hash2) % size;
//
//                  if (ResultType() == table[bucket]) {
//                      table[bucket] = input.current();
//                      break; // while loop
//                  }
//              }
//          }
//
//          if (chainLength == size) {
//              bsl::cerr << "\tCould not insert in doubly-hashed table\n";
//              avgLength   = bsl::numeric_limits<double>::infinity();
//              sigmaLength = bsl::numeric_limits<double>::infinity();
//              maxLength   = static_cast<int>(
//                                    bsl::numeric_limits<double>::infinity());
//              return ExperimentalResult(maxLength, avgLength, sigmaLength);
//                                                                    // RETURN
//          }
//
//          avgLength   += chainLength;
//          sigmaLength += chainLength * chainLength;
//          maxLength    = bsl::max(maxLength, chainLength);
//      }
//      avgLength   /= numElements;
//      sigmaLength  = bsl::sqrt(sigmaLength / numElements
//                             -   avgLength * avgLength);
//
//      return ExperimentalResult(maxLength, avgLength, sigmaLength);
//  }
//..
//
///Example 3: Creating a Generator of Sequential Integers
/// - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Here is a simple generator that returns a sequence of integers.  This
// generator has a period of 'INT_MAX / gcd(increment, INT_MAX+1)'.
//..
//  class SequentialIntegers {
//      int d_current;
//      int d_inc;
//
//    public:
//      // TYPES
//      typedef int ResultType;
//          // Type returned by this generator.
//
//      // CREATORS
//      explicit SequentialIntegers(int first = 1, int increment = 1)
//          // Create a generator returning integers in a sequence starting at
//          // the optionally specified 'first' integer, with the optionally
//          // specified 'increment'.
//          : d_current(first), d_inc(increment) {}
//
//      // MANIPULATORS
//      void next()
//          // Advance to the next element.
//      {
//          d_current += d_inc;
//      }
//
//      // ACCESSORS
//      ResultType current() const
//          // Return current element.
//      {
//          return d_current;
//      }
//
//      const char *data() const
//          // Return data buffer of result type.
//      {
//          return reinterpret_cast<const char*>(&d_current);
//      }
//
//      int length() const
//          // Return length of result type (in bytes).
//      {
//          return sizeof(ResultType);
//      }
//  };
//..
//
///Example 4: Creating a Character String Generator
/// - - - - - - - - - - - - - - - - - - - - - - - -
// Here is a simple generator that returns a sequence of strings that are a
// permutation of an initial string.  This generator has a period of
// 'factorial(initial.length())' where 'factorial(N)' returns the number of
// permutations of 'N' distinct elements.
//..
//  class SequentialStrings {
//      int         d_length;
//      bsl::string d_current;
//
//    public:
//      // TYPES
//      typedef bsl::string ResultType;
//          // Type returned by this generator.
//
//      // CREATORS
//      explicit SequentialStrings(bsl::string const& initial)
//          // Create a generator returning strings in a sequence starting at
//          // the specified 'initial' string (sorted by characters) and
//          // looping through all permutations of 'str'.  The behavior is
//          // undefined unless all the characters of the 'initial' string are
//          // distinct.
//          : d_length(initial.length()), d_current(initial)
//      {
//          bsl::sort(d_current.begin(), d_current.end());
//          assert(bsl::unique(d_current.begin(), d_current.end()) ==
//                                                            d_current.end());
//      }
//
//      // MANIPULATORS
//      void next()
//          // Advance to the next element.
//      {
//          bsl::next_permutation(d_current.begin(), d_current.end());
//      }
//
//      // ACCESSORS
//      ResultType current() const
//          // Return current element.
//      {
//          return d_current;
//      }
//
//      const char *data() const
//          // Return data buffer of result type.
//      {
//          return d_current.data();
//      }
//      int length() const
//          // Return length of result type (in bytes).
//      {
//          return d_current.length();
//      }
//  };
//..
//
///Example 5: Creating a Multiple-Field Keys Generator
///- - - - - - - - - - - - - - - - - - - - - - - - - -
// It is not uncommon for keys to consist of concatenated keys in multiple
// combinations.  We simulate this by a character string in which each
// character loops through a specified number of values.  The period of this
// generator is the product of the length of each character range.
//..
//  struct SequentialVector {
//      bsl::vector<char> d_ranges;
//      int               d_length;
//      bsl::string       d_current;
//
//    public:
//      // TYPES
//      typedef bsl::string ResultType;
//          // Type returned by this generator.
//
//      // CREATORS
//      explicit SequentialVector(bsl::vector<char> const& ranges)
//          // Create a generator returning strings having the same length as
//          // the specified 'ranges' (i.e., 'ranges.size()') in a sequence
//          // starting at the string with all null characters and looping
//          // through all the strings with each character at position 'i' in
//          // the specified range from 0 until 'ranges[i]' (excluded).  The
//          // behavior is undefined unless 'ranges' does not contain zero
//          // entries.
//          : d_ranges(ranges)
//          , d_length(ranges.size())
//          , d_current(d_length, (char)0)
//      {
//      }
//
//      // MANIPULATORS
//      void next()
//          // Advance to the next element.
//      {
//          for (int i = 0;
//               i < d_length && ++d_current[i] == d_ranges[i]; ++i) {
//              d_current[i] = 0;
//          }
//      }
//
//      // ACCESSORS
//      ResultType current() const
//          // Return current element.
//      {
//          return d_current;
//      }
//
//      const char *data() const
//          // Return data buffer of current value.
//      {
//          return d_current.data();
//      }
//
//      int length() const
//          // Return length of current value (in bytes).
//      {
//          return d_current.length();
//      }
//  };
//..
//
///Example 6: Experimental Results
///- - - - - - - - - - - - - - - -
// Checking the above distributions is done in a straightforward manner.  We do
// this at various load factors (the load factor is the percentage of the table
// that actually holds data; it is defined as 'N / SIZE' where 'N' is the
// number of elements and 'SIZE' is the size of the hash table).  Note that
// chaining accommodates arbitrary load factors, while double hashing requires
// that the load factor be strictly less than 1.
//..
//  int usageExample(int verbose, int veryVerbose, int /* veryVeryVerbose */) {
//      const int SIZE = 10000;
//      const int INC  = SIZE / 5; // load factors for every 20% percentile
//      const int COLS = (4*SIZE)/INC;
//
//      {
//          ExperimentalResult results[3][COLS];
//          bsls::Stopwatch    timer;
//
//          if (verbose) cout << "\nUsing chaining"
//                            << "\n--------------" << endl;
//
//          if (verbose) cout << "Sequential Integers\n";
//          timer.start();
//          for (int n = INC, i = 0; n < 4*SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              results[0][i] = computeChainingCollisions(
//                                                       n,
//                                                       SIZE,
//                                                       SequentialIntegers());
//              assert(results[0][i].d_average < 1.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) cout << "Sequential Strings\n";
//          timer.start();
//          for (int n = INC, i = 0; n < 4*SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              bsl::string initial("abcdefghij"); // period = 10! = 3628800
//              results[1][i] = computeChainingCollisions(
//                                                 n,
//                                                 SIZE,
//                                                 SequentialStrings(initial));
//              assert(results[1][i].d_average < 1.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) cout << "Sequential Vector\n";
//          timer.start();
//          for (int n = INC, i = 0; n < 4*SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              bsl::vector<char> ranges(6, static_cast<char>(4));
//                                                       // period = 4^6 = 4096
//              results[2][i] = computeChainingCollisions(
//                                                   n,
//                                                   SIZE,
//                                                   SequentialVector(ranges));
//              assert(results[2][i].d_average < 1.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) {
//              cout << "\nDisplaying average (max) for chaining:";
//              cout << "\n--------------------------------------n";
//              const char *ROW_LABELS[] = { "\nIntegers:",
//                                           "\nStrings :",
//                                           "\nVector  :",
//                                           "\nLoad factor:",
//              };
//              const int   NUM_ROWS     = sizeof  ROW_LABELS
//                                       / sizeof *ROW_LABELS - 1;
//
//              cout << ROW_LABELS[NUM_ROWS] << bsl::setprecision(2);
//              for (int n = INC; n < 4*SIZE; n += INC) {
//                  cout << "\t" << static_cast<double>(n) / SIZE;
//              }
//              for (int j = 0; j < NUM_ROWS; ++j) {
//                  cout << ROW_LABELS[j];
//                  for (int i = 0; i < COLS; ++i) {
//                      cout << "\t" << results[j][i].d_average;
//                      cout << "(" << results[j][i].d_max << ")";
//                  }
//                  cout << "\n";
//              }
//          }
//      }
//..
// We repeat the same steps with double hashing, except that due to the nature
// of the collision-resolution mechanism, the tolerance for the average must be
// slightly increased to 2.5 times the load factor, when the load factor is
// high.
//..
//      {
//          // const int SIZE = 1000003;   // must be a prime number
//          const int SIZE = 10007;     // must be a prime number
//          const int INC  = SIZE / 5; // load factors for every 20% percentile
//          const int COLS = SIZE/INC;
//
//          ExperimentalResult results[3][COLS];
//          bsls::Stopwatch    timer;
//
//          if (verbose) cout << "\nUsing double hashing"
//                            << "\n--------------------" << endl;
//
//          if (verbose) cout << "Sequential Integers\n";
//          timer.start();
//          for (int n = INC/2, i = 0; n < SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              results[0][i] = computeDoubleHashingCollisions(
//                                                       n,
//                                                       SIZE,
//                                                       SequentialIntegers());
//              assert(results[0][i].d_average < 2.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) cout << "Sequential Strings\n";
//          timer.start();
//          for (int n = INC/2, i = 0; n < SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              bsl::string initial("abcdefghij"); // period = 10! = 3628800
//              results[1][i] = computeDoubleHashingCollisions(
//                                                 n,
//                                                 SIZE,
//                                                 SequentialStrings(initial));
//              assert(results[1][i].d_average < 2.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) cout << "Sequential Vector\n";
//          timer.start();
//          for (int n = INC/2, i = 0; n < SIZE; n += INC, ++i) {
//              if (veryVerbose)
//                  cout << "\tWith load factor " <<
//                                       static_cast<double>(n) / SIZE << "\n";
//
//              bsl::vector<char> ranges(7, static_cast<char>(8));
//                                                    // period = 8^7 = 2097152
//              results[2][i] = computeDoubleHashingCollisions(
//                                                   n,
//                                                   SIZE,
//                                                   SequentialVector(ranges));
//              assert(results[2][i].d_average < 2.5 * n / SIZE);
//          }
//          if (verbose) cout << "\t... in " << timer.elapsedTime() << "\n";
//          timer.reset();
//
//          if (verbose) {
//              cout << "\nDisplaying average (max) for double-hashing:";
//              cout << "\n--------------------------------------------\n";
//              const char *ROW_LABELS[] = { "\nIntegers:",
//                                           "\nStrings :",
//                                           "\nVector  :",
//                                           "\nLoad factor:",
//              };
//              const int   NUM_ROWS     = sizeof  ROW_LABELS
//                                       / sizeof *ROW_LABELS -1;
//
//              cout << ROW_LABELS[NUM_ROWS] << bsl::setprecision(2);
//              for (int n = INC/2; n < SIZE; n += INC) {
//                  cout << "\t" << static_cast<double>(n) / SIZE;
//              }
//              for (int j = 0; j < NUM_ROWS; ++j) {
//                  cout << ROW_LABELS[j];
//                  for (int i = 0; i < COLS; ++i) {
//                      cout << "\t" << results[j][i].d_average;
//                      cout << "(" << results[j][i].d_max << ")";
//                  }
//                  cout << "\n";
//              }
//          }
//      }
//
//      return 0;
//  }
//..
// The above code produces the following results.  The results for chaining are
// identical for 'hash1' (used in the code above) or 'hash2' (code identical
// except 'hash2' is used in place of 'hash1' in 'computeChainingCollisions').
//..
//  Displaying average(max) for chaining:
//  ---------------------------------------------------------------------------
//  Load factor:    0.2     0.4     0.6     0.8     1       1.2     1.4
//  Integers:       0.2(3)  0.4(5)  0.6(6)  0.8(6)  1(7)    1.2(7)  1.4(8)
//  Strings :       0.2(4)  0.4(4)  0.6(5)  0.8(7)  1(7)    1.2(7)  1.4(7)
//  Vector  :       0.2(5)  0.4(5)  0.6(10) 0.8(10) 1(15)   1.2(15) 1.4(20)
//
//  Load factor:    1.6     1.8     2       2.2     2.4     2.6     2.8
//  Integers:       1.6(9)  1.8(9)  2(10)   2.2(10) 2.4(10) 2.6(10) 2.8(11)
//  Strings :       1.6(7)  1.8(8)  2(9)    2.2(11) 2.4(11) 2.6(11) 2.8(11)
//  Vector  :       1.6(20) 1.8(24) 2(25)   2.2(29) 2.4(30) 2.6(34) 2.8(35)
//
//  Load factor:    3       3.2     3.4     3.6     3.8
//  Integers:       3(11)   3.2(11) 3.4(12) 3.6(12) 3.8(13)
//  Strings :       3(12)   3.2(12) 3.4(13) 3.6(13) 3.8(13)
//  Vector  :       3(39)   3.2(40) 3.4(42) 3.6(45) 3.8(46)
//
//  Displaying average(max) for double-hashing:
//  ---------------------------------------------------------------------------
//  Load factor:    0.1      0.3     0.5      0.7      0.9
//  Integers:       0.046(2) 0.20(4) 0.37(10) 0.71(15) 1.6(59)
//  Strings :       0.064(2) 0.20(6) 0.40(12) 0.75(18) 1.6(50)
//  Vector  :       0.05(2)  0.19(5) 0.58(9)  1.20(15) 2.4(64)
//..

#include <bdlscm_version.h>

#include <bsls_assert.h>
#include <bsls_review.h>
#include <bsls_types.h>

namespace BloombergLP {
namespace bdlb {
                              // ===============
                              // struct HashUtil
                              // ===============

struct HashUtil {
    // This 'struct' provides a namespace for hash functions.

    // CLASS METHODS
    static unsigned int hash0(char                 key, int modulus);
    static unsigned int hash0(signed char          key, int modulus);
    static unsigned int hash0(unsigned char        key, int modulus);
    static unsigned int hash0(short                key, int modulus);
    static unsigned int hash0(unsigned short       key, int modulus);
    static unsigned int hash0(int                  key, int modulus);
    static unsigned int hash0(unsigned int         key, int modulus);
    static unsigned int hash0(long                 key, int modulus);
    static unsigned int hash0(unsigned long        key, int modulus);
    static unsigned int hash0(bsls::Types::Int64   key, int modulus);
    static unsigned int hash0(bsls::Types::Uint64  key, int modulus);
    static unsigned int hash0(float                key, int modulus);
    static unsigned int hash0(double               key, int modulus);
    static unsigned int hash0(const void          *key, int modulus);
        // Return an unsigned integer hash value in the range from zero to one
        // less than the specified 'modulus' corresponding to the specified
        // 'key'.  The behavior is undefined unless '0 < modulus < 2^31'.  Note
        // that 'modulus' is expected to be a prime not close to an integral
        // power of 2.  Also note that specifying a 'modulus' of 1 will cause 0
        // to be returned for every 'value'.

    static unsigned int hash0(const char *string, int modulus);
        // Return a pseudo-random unsigned integer in the range from zero to
        // one less than the specified 'modulus' corresponding to the specified
        // null-terminated 'string'.  The behavior is undefined unless
        // '0 < modulus < 2^31'.  Note that 'modulus' is expected to be a prime
        // not close to an integral power of 2.  Also note that specifying a
        // 'modulus' of 1 will cause 0 to be returned for every 'string'.

    static unsigned int hash0(const char *string,
                              int         stringLength,
                              int         modulus);
        // Return a pseudo-random unsigned integer in the range from zero to
        // one less than the specified 'modulus' corresponding to the specified
        // 'string' having the specified 'stringLength'.  'string' need not be
        // null-terminated and may contain embedded null characters.  The
        // behavior is undefined unless '0 <= stringLength' and
        // '0 < modulus < 2^31'.  Note that 'modulus' is expected to be a prime
        // not close to an integral power of 2.  Also note that specifying a
        // 'modulus' of 1 will cause 0 to be returned for every 'string'.

    static unsigned int hash1(const char *data, int length);
    static unsigned int hash2(const char *data, int length);
        // Return an unsigned integer hash value corresponding to the specified
        // 'data' of the specified 'length' (in bytes).  The behavior is
        // undefined unless '0 <= length'.  Note that if 'data' is 0, then
        // 'length' also must be 0.  Also note that every byte in 'data' is
        // significant; that is, it is not appropriate to cast a 'struct'
        // address to a 'char *' unless the 'struct' is packed (no padding).
        //
        // Both 'hash1' and 'hash2' return a hash, but both hashes can be
        // assumed to be independent (i.e., there are no known correlations
        // between the results of the two hash functions given the same input
        // data).

    static unsigned int hash1(char                 key);
    static unsigned int hash1(signed char          key);
    static unsigned int hash1(unsigned char        key);
    static unsigned int hash1(short                key);
    static unsigned int hash1(unsigned short       key);
    static unsigned int hash1(int                  key);
    static unsigned int hash1(unsigned int         key);
    static unsigned int hash1(long                 key);
    static unsigned int hash1(unsigned long        key);
    static unsigned int hash1(bsls::Types::Int64   key);
    static unsigned int hash1(bsls::Types::Uint64  key);
    static unsigned int hash1(float                key);
    static unsigned int hash1(double               key);
    static unsigned int hash1(const void          *key);
        // Return an unsigned integer hash value corresponding to the specified
        // 'key'.  Note that the return value is seemingly random (i.e., the
        // hash is good) but identical on all platforms (irrespective of
        // endianness).  Both functions 'hash1' and 'hash2' return a hash, but
        // both hashes can be assumed to be independent (i.e., there are no
        // known correlations between the results of both hash functions given
        // the same input data).

    static unsigned int hash2(char                 key);
    static unsigned int hash2(signed char          key);
    static unsigned int hash2(unsigned char        key);
    static unsigned int hash2(short                key);
    static unsigned int hash2(unsigned short       key);
    static unsigned int hash2(int                  key);
    static unsigned int hash2(unsigned int         key);
    static unsigned int hash2(long                 key);
    static unsigned int hash2(unsigned long        key);
    static unsigned int hash2(bsls::Types::Int64   key);
    static unsigned int hash2(bsls::Types::Uint64  key);
    static unsigned int hash2(float                key);
    static unsigned int hash2(double               key);
    static unsigned int hash2(const void          *key);
        // Return an unsigned integer hash value corresponding to the specified
        // 'key'.  Note that the return value is seemingly random (i.e., the
        // hash is good) but identical on all platforms (irrespective of
        // endianness).  Both functions 'hash1' and 'hash2' return a hash, but
        // both hashes can be assumed to be independent (i.e., there are no
        // known correlations between the results of both hash functions given
        // the same input data).
};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                              // ---------------
                              // struct HashUtil
                              // ---------------

// CLASS METHODS
inline
unsigned int HashUtil::hash0(int key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    if (4 == sizeof(int)) {
        return static_cast<unsigned int>(key)
             % static_cast<unsigned int>(modulus);                    // RETURN
    }

    return (static_cast<unsigned int>(key) & 0xFFFFFFFF)
          % static_cast<unsigned int>(modulus);
}

inline
unsigned int HashUtil::hash0(bsls::Types::Int64 key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return ((static_cast<unsigned int>((key >> 32) & 0xFFFFFFFF)
           % static_cast<unsigned int>(modulus))
           ^ static_cast<unsigned int>(key         & 0xFFFFFFFF))
           % static_cast<unsigned int>(modulus);
}

inline
unsigned int HashUtil::hash0(char key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(static_cast<unsigned char>(key)),
                           modulus);
}

inline
unsigned int HashUtil::hash0(signed char key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(static_cast<unsigned char>(key)),
                           modulus);
}

inline
unsigned int HashUtil::hash0(unsigned char key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(key), modulus);
}

inline
unsigned int HashUtil::hash0(short key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(static_cast<unsigned short>(key)),
                           modulus);
}

inline
unsigned int HashUtil::hash0(unsigned short key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(key), modulus);
}

inline
unsigned int HashUtil::hash0(unsigned int key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<int>(key), modulus);
}

inline
unsigned int HashUtil::hash0(long key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    if (4 == sizeof(long)) {
        return HashUtil::hash0(
                             static_cast<int>(static_cast<unsigned long>(key)),
                             modulus);                                // RETURN
    }
    else {
        return HashUtil::hash0(
              static_cast<bsls::Types::Int64>(static_cast<unsigned long>(key)),
              modulus);                                               // RETURN
    }
}

inline
unsigned int HashUtil::hash0(unsigned long key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    if (4 == sizeof(unsigned long)) {
        return HashUtil::hash0(static_cast<int>(key), modulus);       // RETURN
    }
    else {
        return HashUtil::hash0(static_cast<bsls::Types::Int64>(key), modulus);
                                                                      // RETURN
    }
}

inline
unsigned int HashUtil::hash0(bsls::Types::Uint64 key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<bsls::Types::Int64>(key), modulus);
}

inline
unsigned int HashUtil::hash0(double key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    bsls::Types::Int64 *v = reinterpret_cast<bsls::Types::Int64 *>(&key);
    return HashUtil::hash0(*v, modulus);
}

inline
unsigned int HashUtil::hash0(float key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    return HashUtil::hash0(static_cast<double>(key), modulus);
}

inline
unsigned int HashUtil::hash0(const void *key, int modulus)
{
    BSLS_ASSERT(0 < modulus);

    if (4 == sizeof(void *)) {
        const int *v = reinterpret_cast<const int *>(&key);
        return HashUtil::hash0(*v, modulus);                          // RETURN
    }
    else {
        const bsls::Types::Int64 *v = static_cast<const bsls::Types::Int64 *>(
                                              static_cast<const void *>(&key));
        return HashUtil::hash0(*v, modulus);                          // RETURN
    }
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------