Outline
Purpose
Provide a type holding the constituent parts of a hash table.
Classes
- See also
- bslstl_hashtable, bslalg_hashtableimputil
Description
This component provides a single, complex-constrained in-core (value-semantic) attribute class, bslalg::HashTableAnchor
, that is used to hold (not own) the array of buckets and the list of nodes that form the key data elements of a hash-table. This class is typically used with the utilities provided in bslstl_hashtableimputil . Note that the decision to store nodes in a linked list (i.e., resolving collisions through chaining) is intended to facilitate a hash-table implementation meeting the requirements of a C++11 standard unordered container.
Attributes
Name Type Simple Constraints
------------------ ------------------- ------------------
bucketArrayAddress HashTableBucket * none
bucketArraySize size_t none
listRootAddress BidirectionalLink * none
Complex Constraint
-------------------------------------------------------------------------
`bucketArrayAddress` must refer to a contiguous sequence of valid
`bucketArraySize` or both `bucketArrayAddress` and `bucketArraySize` must
be 0.
Definition bslalg_hashtablebucket.h:297
listRootAddress
: address of the head of the linked list of nodes holding the elements contained in a hash table
bucketArrayAddress
: address of the first element of the sequence of HashTableBucket
objects, each of which refers to the first and last node (from listRootAddress
) in that bucket
bucketArraySize
: the number of (contiguous) buckets in the array of buckets at bucketArrayAddress
Usage
This section illustrates intended use of this component.
Example 1: Basic Usage
Suppose we want to create a hash table that keeps track of pointers. Pointers can be added (insert
ed) or removed (erase
d) from the table, and the table will keep track, at any time, of whether a pointer is currently stored in the table using the count
method. It will also be able to return the total number of objects stored in the table (the size
method). Redundant insert
s have no effect – a given pointer may only be stored in the table once.
First, we create our class:
double d_maxLoadFactor;
unsigned d_numNodes;
void grow();
bool checkInvariants() const;
bool find(Node **node, Bucket **bucket, const void *ptr) const;␇
private:
PtrHashSet& operator=(const PtrHashSet&);
public:
explicit
~PtrHashSet();
bool insert(void *ptr);
bool erase(void *ptr);
std::size_t count(void *ptr) const;
std::size_t size() const;
};
void PtrHashSet::grow()
{
std::size_t newBucketArraySize = bucketArraySize() * 2 + 1;
std::size_t newBucketArraySizeInBytes =
newBucketArraySize * sizeof(Bucket);
memset(bucketArrayAddress(), 0x5a, size() * sizeof(Bucket));
d_allocator_p->deallocate(bucketArrayAddress());
setBucketArrayAddressAndSize(
(Bucket *) d_allocator_p->allocate(newBucketArraySizeInBytes),
newBucketArraySize);
memset(bucketArrayAddress(), 0, newBucketArraySizeInBytes);
Node *newListRootAddress = 0;
unsigned numNodes = 0;
for (Node *node = (Node *) listRootAddress(); node; ++numNodes) {
Node *rippedOut = node;
node = (Node *) node->nextLink();
std::size_t index =
(UintPtr) rippedOut->value() % bucketArraySize();
Bucket& bucket = bucketArrayAddress()[index];
if (bucket.first()) {
if (0 == bucket.first()->previousLink()) {
newListRootAddress = rippedOut;
}
Util::insertLinkBeforeTarget(rippedOut, bucket.first());
bucket.setFirst(rippedOut);
}
else {
Util::insertLinkBeforeTarget(rippedOut,
newListRootAddress);
newListRootAddress = rippedOut;
bucket.setFirstAndLast(rippedOut, rippedOut);
}
}
assert(
size() == numNodes);
setListRootAddress(newListRootAddress);
checkInvariants();
}
bool PtrHashSet::checkInvariants() const
{
bool ok;
unsigned numNodes = 0;
Node *prev = 0;
for (Node *node = (Node *) listRootAddress(); node;
prev = node, node = (Node *) node->nextLink()) {
ok = node->previousLink() == prev;
assert(ok && "node->previousLink() == prev");
if (!ok) return false;
++numNodes;
}
assert(ok && "size() == numNodes");
if (!ok) return false;
numNodes = 0;
for (unsigned i = 0; i < bucketArraySize(); ++i) {
Bucket& bucket = bucketArrayAddress()[i];
if (bucket.first()) {
++numNodes;
for (Node *node = (Node *) bucket.first();
bucket.last() != node;
node = (Node *) node->nextLink()) {
++numNodes;
}
}
else {
ok = !bucket.last();
assert(ok && "!bucket.last()");
if (!ok) return false;
}
}
assert(ok && "size() == numNodes");
return ok;
}
bool PtrHashSet::find(Node **node, Bucket **bucket, const void *ptr) const
{
Node *dummyNodePtr;
Bucket *dummyBucketPtr;
if (!node ) node = &dummyNodePtr;
if (!bucket) bucket = &dummyBucketPtr;
Node *& nodePtrRef = *node;
unsigned index = (UintPtr) ptr % bucketArraySize();
Bucket& bucketRef = bucketArrayAddress()[index];
*bucket = &bucketRef;
if (bucketRef.first()) {
Node *
begin = (Node *) bucketRef.first();
Node *
const end = (Node *) bucketRef.last()->nextLink();
for (Node *n = begin;
end != n; n = (Node *) n->nextLink()) {
if (n->value() == ptr) {
nodePtrRef = n;
return true;
}
}
return false;
}
nodePtrRef = (Node *) listRootAddress();
return false;
}
: HashTableAnchor(0, 0, 0)
, d_maxLoadFactor(0.4)
, d_numNodes(0)
{
enum { NUM_BUCKETS = 3 };
std::size_t bucketArraySizeInBytes = NUM_BUCKETS * sizeof(Bucket);
setBucketArrayAddressAndSize(
(Bucket *) d_allocator_p->allocate(bucketArraySizeInBytes),
NUM_BUCKETS);
memset(bucketArrayAddress(), 0, bucketArraySizeInBytes);
}
PtrHashSet::~PtrHashSet()
{
for (Node *node = (Node *) listRootAddress(); node; ) {
Node *toDelete = node;
node = (Node *) node->nextLink();
memset(toDelete, 0x5a, sizeof(*toDelete));
d_allocator_p->deallocate(toDelete);
}
d_allocator_p->deallocate(bucketArrayAddress());
}
bool PtrHashSet::erase(void *ptr)
{
Bucket *bucket;
Node *node;
if (!find(&node, &bucket, ptr)) {
return false;
}
if (bucket->first() == node) {
if (bucket->last() == node) {
bucket->reset();
}
else {
bucket->setFirst(node->nextLink());
}
}
else if (bucket->last() == node) {
bucket->setLast(node->previousLink());
}
--d_numNodes;
Util::unlink(node);
d_allocator_p->deallocate(node);
checkInvariants();
return true;
}
bool PtrHashSet::insert(void *ptr)
{
Bucket *bucket;
Node *insertionPoint;
if (find(&insertionPoint, &bucket, ptr)) {
return false;
}
if (bucketArraySize() * d_maxLoadFactor < d_numNodes + 1) {
grow();
bool found = find(&insertionPoint, &bucket, ptr);
}
++d_numNodes;
Node *node = (Node *) d_allocator_p->allocate(sizeof(Node));
Util::insertLinkBeforeTarget(node, insertionPoint);
node->value() = ptr;
if (listRootAddress() == insertionPoint) {
setListRootAddress(node);
}
if (bucket->first()) {
bucket->setFirst(node);
}
else {
bucket->setFirstAndLast(node, node);
}
assert(count(ptr));
checkInvariants();
return true;
}
std::size_t PtrHashSet::count(void *ptr) const
{
return find(0, 0, ptr);
}
std::size_t PtrHashSet::size() const
{
return d_numNodes;
}
Definition bslalg_bidirectionalnode.h:357
Definition bslalg_hashtableanchor.h:541
Definition bslma_allocator.h:457
#define BSLS_ASSERT_SAFE(X)
Definition bsls_assert.h:1762
bsl::size_t size(const TYPE &array)
Return the number of elements in the specified array.
T::iterator begin(T &container)
Definition bslstl_iterator.h:1495
T::iterator end(T &container)
Definition bslstl_iterator.h:1523
Definition bslalg_bidirectionallinklistutil.h:237
static Allocator * allocator(Allocator *basicAllocator=0)
Definition bslma_default.h:897
std::size_t UintPtr
Definition bsls_types.h:126
Then, in main
, we create a test allocator for use in this example to ensure that no memory is leaked:
Definition bslma_testallocator.h:384
Next, we declare our table using that allocator:
Then, we create an area of memory from which our pointers will come:
enum { SEGMENT_LENGTH = 1000 };
char *pc = (char *) ta.allocate(SEGMENT_LENGTH);
Next, we iterate through the length of the segment, insert those pointers for which ptr - pc == N * 7
is true. We keep a count of the number of items we insert into the table in the variable sevens
:
unsigned sevens = 0;
for (int i = 0; i < SEGMENT_LENGTH; i += 7) {
++sevens;
bool status = phs.insert(&pc[i]);
assert(status);
}
Then, we verify the number of objects we've placed in the table:
assert(phs.size() == sevens);
Next, we iterate through ALL pointers in the pc
array, using the count
method to verify that the ones we expect are in the table:
for (int i = 0; i < SEGMENT_LENGTH; ++i) {
assert(phs.count(&pc[i]) == (0 == i % 7));
}
Then, we iterate, deleting all elements from the table for which ptr - pc == 3 * N
is true. We keep a count of the number of elements that were deleted from the table in the variable killed
:
unsigned killed = 0;
for (int i = 0; i < SEGMENT_LENGTH; i += 3) {
const bool deleted = phs.erase(&pc[i]);
assert(deleted == (0 == i % 7));
killed += deleted;
}
Next, we verify the number of remaining elements in the table:
assert(killed < sevens);
assert(phs.size() == sevens - killed);
Then, in verbose mode we print our tallies:
if (verbose) {
printf("sevens = %u, killed = %u, phs.size() = %u\n", sevens,
killed, (unsigned) phs.size());
}
Now, we iterate through every element of the pc
array, verifying that the surviving elements are exactly those for which ptr - pc
was divisible by 7 and not by 3:
for (int i = 0; i < SEGMENT_LENGTH; ++i) {
const bool present = phs.count(&pc[i]);
assert(present == ((0 == i % 7) && (0 != i % 3)));
}
Finally, we clean up our pc
array: