// bslh_seededhash.h                                                  -*-C++-*-
#ifndef INCLUDED_BSLH_SEEDEDHASH
#define INCLUDED_BSLH_SEEDEDHASH

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide a struct to run seeded 'bslh' hash algorithms on types.
//
//@CLASSES:
//  bslh::SeededHash: functor that runs seeded 'bslh' hash algorithms on types
//
//@SEE_ALSO: bslh_hash, bslh_seedgenerator
//
//@DESCRIPTION: This component provides a templated 'struct',
// 'bslh::SeededHash', that defines a hash-functor that can be used with
// standard containers (a drop in replacement for 'bsl::hash'), and which
// applies the supplied (template parameter) 'HASH_ALGORITHM' to the attributes
// of the (template parameter) 'TYPE' which have been identified as salient to
// hashing.  The 'bslh::SeededHash' template parameter 'HASH_ALGORITHM' must be
// a hashing algorithm that conforms to the requirements outlined below (see
// {Requirements for Seeded 'bslh' Hashing Algorithms}).  Note that there are
// several hashing algorithms defined in 'bslh', some of which do not accept
// seeds, meaning they cannot be used with 'bslh::SeededHash'.
//
// 'bslh::SeededHash' will use the (template parameter) 'SEED_GENERATOR' to
// generate the seed used to instantiate the 'HASH_ALGORITHM'.  The
// 'bslh::SeededHash' template parameter 'SEED_GENERATOR' must be a seed
// generator that conforms to the requirements outlined below (see
// {Requirements on (template parameter) Type 'SEED_GENERATOR'}).  The seed
// will be generated once upon construction of 'bslh::SeededHash' and then held
// until destruction.
//
// A call to 'bslh::Hash::operator()' for a (template parameter) 'TYPE' will
// call the 'hashAppend' free function for 'TYPE' and provide 'hashAppend' an
// instance of the 'HASH_ALGORITHM' which has been constructed using the stored
// seed.  Clients are expected to define a free-function 'hashAppend' for each
// of the types they wish to be hashable (see 'bslh_hash.h' for details on
// 'hashAppend').  More information can be found in the package level
// documentation for 'bslh' (internal users can also find information here
// {TEAM BDE:USING MODULAR HASHING<GO>})
//
///Relationship to 'bslh::Hash'
///----------------------------
// 'bslh::SeededHash' is substantially similar to 'bslh::Hash'.
// 'bslh::SeededHash' presents a similar interface to that of 'bslh::Hash',
// however, it adds a constructor that accepts a seed generator.  Because of
// the use of seeds, 'bslh::SeededHash' stores data and therefor does not allow
// the empty base optimization like 'bslh::Hash' does.
//
///Requirements for Seeded 'bslh' Hashing Algorithms
///-------------------------------------------------
// Users of this modular hashing system are free write their own hashing
// algorithms.  In order to plug into 'bslh::SeededHash', the user-implemented
// algorithms must meet the requirements for regular 'bslh' hashing algorithms
// defined in 'bslh_hash.h', with the exception that a default constructor is
// not required.  The user-implemented algorithm must also implement the
// interface shown here:
//..
//  class SomeHashAlgorithm {
//    public:
//      // CONSTANTS
//      enum { k_SEED_LENGTH = XXX };
//
//      // CREATORS
//      explicit SomeHashAlgorithm(const char *seed);
//  };
//..
// The 'k_SEED_LENGTH' enum must be in the public interface, and 'XXX' must be
// replaced with an integer literal indicating the number of bytes of seed the
// algorithm requires.  The parameterized constructor must accept a
// 'const char *'.  This pointer will point to a seed of 'XXX' bytes in size.
//
///Requirements on (template parameter) Type 'SEED_GENERATOR'
///----------------------------------------------------------
// Users are free to write their own seed generator, which can be supplied to
// bslh::SeededHash.  The seed generator must conform to the interface shown
// here:
//..
//  class SomeSeedGenerator {
//    // ACCESSORS
//    void generateSeed(char *seedLocation, size_t seedLength);
//  };
//..
// The only mandatory piece of the seed generator interface is the generateSeed
// method, which accepts a char pointer to memory to be written and a size_t
// length in bytes.  The generateSeed method must fill the size_t bytes of the
// memory pointed to by the char pointer with a seed.  The seed generator must
// be meet one of the two requirements:
//
//: A The seed generator is default constructible.
//:
//: B The seed generator is copy constructible.
//
// Option A is preferred because it allows 'bslh::SeededHash' to be default
// constructible.  Option B is allowed, but means that 'bslh::SeededHash' must
// be passed an already-instantiated 'SEED_GENERATOR' at construction.
//
///Usage
///-----
// This section illustrates intended usage of this component.
//
///Example 1: Storing User Defined Input in a Hash Table
///- - - - - - - - - - - - - - - - - - - - - - - - - - -
// Suppose we have any array of user-specified nicknames, and we want a really
// fast way to find out if values are contained in the array.  We can create a
// 'HashTable' data structure that is capable of looking up values in O(1)
// time.
//
// Because we will be storing arbitrary user input in our table, it is possible
// that an attacker with knowledge of the hashing algorithm we are using could
// specially craft input that will cause collisions in our hash table,
// degrading performance to O(n).  To avoid this we will need to use a secure
// hash algorithm with a random seed.  This algorithm will need to be in the
// form of a hash functor -- an object that will take objects stored in our
// array as input, and yield a 64-bit int value which is hard enough for an
// outside observer to predict that it appear random.  'bslh::SeededHash'
// provides a convenient functor that can wrap any seeded hashing algorithm and
// use it to produce a hash for any type them implements 'hashAppend'.
//
// We can use the result of the hash function to index into our array of
// 'buckets'.  Each 'bucket' is simply a pointer to a value in our original
// array of 'TYPE' objects.
//
// First, we define our 'HashTable' template class, with the two type
// parameters: 'TYPE' (the type being referenced) and 'HASHER' (a functor that
// produces the hash).
//..
//  template <class TYPE, class HASHER>
//  class HashTable {
//      // This class template implements a hash table providing fast lookup of
//      // an external, non-owned, array of values of (template parameter)
//      // 'TYPE'.
//      //
//      // The (template parameter) 'TYPE' shall have a transitive, symmetric
//      // 'operator==' function and it will be hashable using 'bslh::Hash'.
//      // Note that there is no requirement that it have any kind of creator
//      // defined.
//      //
//      // The 'HASHER' template parameter type must be a functor with a method
//      // having the following signature:
//      //..
//      //  size_t operator()(TYPE)  const;
//      //                   -OR-
//      //  size_t operator()(const TYPE&) const;
//      //..
//      // and 'HASHER' shall have a publicly accessible default constructor
//      // and destructor.  Here we use 'bslh::Hash' as our default template
//      // argument.  This allows us to hash any type for which 'hashAppend'
//      // has been implemented.
//      //
//      // Note that this hash table has numerous simplifications because we
//      // know the size of the array and never have to resize the table.
//
//      // DATA
//      const TYPE       *d_values;          // Array of values table is to
//                                           // hold
//      size_t            d_numValues;       // Length of 'd_values'.
//      const TYPE      **d_bucketArray;     // Contains ptrs into 'd_values'
//      size_t            d_bucketArrayMask; // Will always be '2^N - 1'.
//      HASHER            d_hasher;          // User supplied hashing algorithm
//
//    private:
//      // PRIVATE ACCESSORS
//      bool lookup(size_t      *idx,
//                  const TYPE&  value,
//                  size_t       hashValue) const;
//          // Look up the specified 'value', having the specified 'hashValue',
//          // and load its index in 'd_bucketArray' into the specified 'idx'.
//          // If not found, return the vacant entry in 'd_bucketArray' where
//          // it should be inserted.  Return 'true' if 'value' is found and
//          // 'false' otherwise.
//
//    public:
//      // CREATORS
//      HashTable(const TYPE *valuesArray,
//                size_t      numValues,
//                HASHER      hasher);
//          // Create a hash table referring to the specified 'valuesArray'
//          // having length of the specified 'numValues' and using the
//          // specified 'hasher' to generate hash values.  No value in
//          // 'valuesArray' shall have the same value as any of the other
//          // values in 'valuesArray'
//
//      ~HashTable();
//          // Free up memory used by this cross-reference.
//
//      // ACCESSORS
//      bool contains(const TYPE& value) const;
//          // Return true if the specified 'value' is found in the table and
//          // false otherwise.
//  };
//..
// Then, we will create an array of user supplied nicknames that would create
// collisions in some other hashing algorithm.
//..
//  const char names[6][11] = { "COLLISION!",
//                              "COLLISION@",
//                              "COLLISION#",
//                              "COLLISION$",
//                              "COLLISION%",
//                              "COLLISION^"};
//
//  enum { NUM_NAMES = sizeof names / sizeof *names };
//..
// Next, we create a seed generator, with a cryptographically secure random
// number generator, that can be used to generate seeds for our secure hashing
// algorithm.  We then pass that seed generator into 'bslh::SeededHash'.  We
// use the 'bslh::SipHashAlgorithm' as our secure hashing algorithm.
//..
//  typedef SeedGenerator<CryptographicallySecureRNG> SecureSeedGenerator;
//  typedef SeededHash<SecureSeedGenerator, SipHashAlgorithm> SecureHash;
//
//  SecureSeedGenerator secureSeedGenerator;
//  SecureHash          secureHash(secureSeedGenerator);
//..
// Then, we create our hash table 'hashTable'.  We pass it the 'secureHash'
// hashing functor we created.  Passing it in through the functor, rather than
// just having it default constructed from the template parameter, allows us to
// pass in an algorithm with a pre-configured state if we so desire.
//..
//  HashTable<const char [11], SecureHash> hashTable(names,
//                                                   NUM_NAMES,
//                                                   secureHash);
//..
// Now, we verify that each element in our array registers with count:
//..
//  for ( int i = 0; i < NUM_NAMES; ++i) {
//      assert(hashTable.contains(names[i]));
//  }
//..
// Finally, we verify that futures not in our original array are correctly
// identified as not being in the set:
//..
//  assert(!hashTable.contains("asdfasdfas"));
//  assert(!hashTable.contains("asdfqwerqw"));
//  assert(!hashTable.contains("asdfqwerzx"));
//..

#include <bslscm_version.h>

#include <bslh_defaultseededhashalgorithm.h>
#include <bslh_hash.h>

#include <stddef.h>  // for 'size_t'

namespace BloombergLP {

namespace bslh {

                        // ======================
                        // class bslh::SeededHash
                        // ======================

template <class SEED_GENERATOR, class HASH_ALGORITHM
                                            = bslh::DefaultSeededHashAlgorithm>
struct SeededHash {
    // This class wraps the (template parameter) 'HASH_ALGORITHM', which
    // requires a seed, in an interface that satisfies the 'hash' requirements
    // of the C++11 standard.  The (template parameter) type 'SEED_GENERATOR'
    // will be used to generate the seed required by 'HASH_ALGORITHM'.

  private:
    // DATA
    char seed[HASH_ALGORITHM::k_SEED_LENGTH];
        // Stores the seed generated by the (template parameter) type
        // 'SEED_GENERATOR'.

  public:
    // TYPES
    typedef size_t result_type;
        // The type of the hash value that will be returned by the
        // function-call operator.

    // CREATORS
    SeededHash();
        // Create a 'bslh::SeededHash' which will default construct the
        // (template parameter) type 'SEED_GENERATOR' to initialize the seed
        // that will be passed to the (template parameter) type
        // 'HASH_ALGORITHM' when it is used.  'SEED_GENERATOR' must be default
        // constructible to use this constructor.

    explicit SeededHash(SEED_GENERATOR& seedGenerator);
        // Create a 'bslh::SeededHash' which will use the specified
        // 'seedGenerator' to initialize the seed that will be passed to the
        // (template parameter) type 'HASH_ALGORITHM' when it is used.
        // 'SEED_GENERATOR' must be copy-constructible to use this constructor.

    //! SeededHash(const SeededHash& original) = default;
        // Create a 'bslh::SeededHash' object having the same internal state as
        // the specified 'original'.

    //! ~SeededHash() = default;
        // Destroy this object.

    // MANIPULATORS
    //! SeededHash& operator=(const SeededHash& rhs) = default;
        // Assign to this object the value of the specified 'rhs' object, and
        // return a reference providing modifiable access to this object.

    // ACCESSORS
    template <class TYPE>
    result_type operator()(const TYPE& type) const;
        // Returns a hash generated by the (template parameter) type
        // 'HASH_ALGORITHM' for the specified 'type'.  The value returned by
        // the 'HASH_ALGORITHM' is cast to 'size_t' before returning.

};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

// CREATORS
template <class SEED_GENERATOR, class HASH_ALGORITHM>
inline
SeededHash<SEED_GENERATOR, HASH_ALGORITHM>::SeededHash()
{
    SEED_GENERATOR seedGenerator = SEED_GENERATOR();
    seedGenerator.generateSeed(seed, HASH_ALGORITHM::k_SEED_LENGTH);
}

template <class SEED_GENERATOR, class HASH_ALGORITHM>
inline
SeededHash<SEED_GENERATOR, HASH_ALGORITHM>::SeededHash(
                                                 SEED_GENERATOR& seedGenerator)
{
    seedGenerator.generateSeed(seed, HASH_ALGORITHM::k_SEED_LENGTH);
}

// ACCESSORS
template <class SEED_GENERATOR, class HASH_ALGORITHM>
template <class TYPE>
inline
typename SeededHash<SEED_GENERATOR, HASH_ALGORITHM>::result_type
SeededHash<SEED_GENERATOR, HASH_ALGORITHM>::operator()(TYPE const& key) const
{
    HASH_ALGORITHM hashAlg(seed);
    hashAppend(hashAlg, key);
    return static_cast<result_type>(hashAlg.computeHash());
}


}  // close package namespace

}  // close enterprise namespace


#endif

// ----------------------------------------------------------------------------
// Copyright 2014 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------