// bsls_spinlock.h                                                    -*-C++-*-
#ifndef INCLUDED_BSLS_SPINLOCK
#define INCLUDED_BSLS_SPINLOCK

#include <bsls_ident.h>
BSLS_IDENT("$: $")

//@PURPOSE: Provide a spin lock.
//
//@CLASSES:
//  bsls::SpinLock: A mutex using "busy waiting" atomic operations
//  bsls::SpinLockGuard: Automatic locking-unlocking of SpinLock
//
//@DESCRIPTION: This component provides a "busy wait" mutual exclusion lock
// primitive ("mutex").  A 'SpinLock' is small and statically-initializable,
// but because it "spins" in a tight loop rather than using system operations
// to block the thread of execution, it is unsuited for use cases involving
// high contention or long critical regions.  Additionally, this component does
// not provide any guarantee of fairness when multiple threads are contending
// for the same lock.  Use 'SpinLockGuard' for automatic locking-unlocking in a
// scope.
//
// *WARNING*: A 'bsls::SpinLock' *must* be aggregate initialized to
// 'BSLS_SPINLOCK_UNLOCKED'.  For example:
//..
//  bsls::SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
//..
// Note that 'SpinLock' is a struct requiring aggregate initialization to allow
// lock variables to be statically initialized when using a C++03 compiler
// (i.e., without using 'constexpr').
//
///Usage
///-----
// In this section we show intended use of this component.
//
///Example 1: Maintaining Static Count/Max Values
/// - - - - - - - - - - - - - - - - - - - - - - -
// Suppose that we want to determine the maximum number of threads executing a
// block of code concurrently.  Note that such a use case naturally calls for a
// statically initialized lock and the critical region involves a few integer
// operations; SpinLock may be suitable.
//
// First, we define a type to manage the count within a scope:
//..
// class MaxConcurrencyCounter {
//     // This type manages a count and high-water-mark within a scope.
//     // It decrements the count in its destructor upon leaving the scope.
//
//     // DATA
//     int            *d_count_p;
//     bsls::SpinLock *d_lock_p;
//
//   public:
//     // CREATORS
//     MaxConcurrencyCounter(int *count, int *max, bsls::SpinLock *lock);
//         // Acquire the specified 'lock' and increment the specified 'count'.
//         // If the resulting value is larger than the specified 'max',
//         // load it into 'max'. Release 'lock' and create a scoped guard to
//         // decrement 'count' on destruction.
//
//     ~MaxConcurrencyCounter();
//         // Acquire the lock specified at construction, decrement the count
//         // variable, and release the lock.
//   };
//
//   MaxConcurrencyCounter::MaxConcurrencyCounter(int            *count,
//                                                int            *max,
//                                                bsls::SpinLock *lock)
//   : d_count_p(count)
//   , d_lock_p(lock) {
//      bsls::SpinLockGuard guard(lock);
//      int result = ++(*count);
//      if (result > *max) {
//         *max = result;
//      }
//   }
//
//   MaxConcurrencyCounter::~MaxConcurrencyCounter() {
//      bsls::SpinLockGuard guard(d_lock_p);
//      --(*d_count_p);
//   }
//..
// Next, we declare static variables to track the call count and a SpinLock to
// guard them.  'SpinLock' may be statically initialized using the
// 'BSLS_SPINLOCK_UNLOCKED' constant:
//..
//   {
//      static int            threadCount = 0;
//      static int            maxThreads = 0;
//      static bsls::SpinLock threadLock = BSLS_SPINLOCK_UNLOCKED;
//..
// Next, by creating a 'MaxConcurrencyCounter' object, each thread entering the
// block of code uses the 'SpinLock' to synchronize manipulation of the static
// count variables:
//..
//      MaxConcurrencyCounter counter(&threadCount, &maxThreads, &threadLock);
//..
// Finally, closing the block synchronizes on the 'SpinLock' again to decrement
// the thread count.  Any intervening code can run in parallel.
//..
//   }
//..
//
///Example 2: Fine-Grained Locking
///- - - - - - - - - - - - - - - -
// Suppose that we have a large array of objects to be manipulated concurrently
// by multiple threads, but the size of the array itself does not change.
// (This might be because it represents an inherently fixed number of objects
// or because changes to the array size are infrequent and controlled by some
// other synchronization mechanism like a "reader-writer" lock).  Thus one
// thread can manipulate a particular object in the array concurrently with a
// different thread manipulating another.  If the manipulations are short and
// contention is likely to be low, SpinLock might be suitable due to its small
// size.
//
// In particular, imagine we want a threadsafe "multi-queue".  In this case, we
// would have an array of queues, each with a SpinLock member for fine-grained
// locking.  First, we define the type to be held in the array.
//..
//  template<typename TYPE>
//  class LightweightThreadsafeQueue {
//     // This type implements a threadsafe queue with a small memory
//     // footprint and low initialization costs. It is designed for
//     // low-contention use only.
//
//     // TYPES
//     struct Node {
//          TYPE  d_item;
//          Node *d_next_p;
//
//          Node(const TYPE& item) : d_item(item), d_next_p(0) {}
//      };
//
//     // DATA
//     Node           *d_front_p; // Front of queue, or 0 if empty
//     Node           *d_back_p; // Back of queue, or 0 if empty
//     bsls::SpinLock  d_lock;
//
//   public:
//     // CREATORS
//     LightweightThreadsafeQueue();
//       // Create an empty queue.
//
//     ~LightweightThreadsafeQueue();
//       // Destroy this object.
//
//     // MANIPULATORS
//     int dequeue(TYPE* value);
//        // Remove the element at the front of the queue and load it into the
//        // specified 'value'. Return '0' on success, or a nonzero value if
//        // the queue is empty.
//
//     void enqueue(const TYPE& value);
//        // Add the specified 'value' to the back of the queue.
//   };
//..
// Next, we implement the creators.  Note that a different idiom is used to
// initialize member variables of 'SpinLock' type than is used for static
// variables:
//..
//  template<typename TYPE>
//  LightweightThreadsafeQueue<TYPE>::LightweightThreadsafeQueue()
//  : d_front_p(0)
//  , d_back_p(0)
//  , d_lock(bsls::SpinLock::s_unlocked)
//  {}
//
//  template<typename TYPE>
//  LightweightThreadsafeQueue<TYPE>::~LightweightThreadsafeQueue() {
//     for (Node *node = d_front_p; 0 != node; ) {
//         Node *next = node->d_next_p;
//         delete node;
//         node = next;
//     }
//  }
//..
// Then we implement the manipulator functions using 'SpinLockGuard' to ensure
// thread safety.  Note that we do memory allocation and deallocation outside
// the scope of the lock, as these may involve system calls that should be
// avoided in the scope of a SpinLock.
//..
//  template<typename TYPE>
//  int LightweightThreadsafeQueue<TYPE>::dequeue(TYPE* value) {
//     Node *front;
//     {
//        bsls::SpinLockGuard guard(&d_lock);
//        front = d_front_p;
//        if (0 == front) {
//          return 1;
//        }
//
//        *value = front->d_item;
//
//        if (d_back_p == front) {
//           d_front_p = d_back_p = 0;
//        } else {
//           d_front_p = front->d_next_p;
//        }
//     }
//     delete front;
//     return 0;
//  }
//
//  template<typename TYPE>
//  void LightweightThreadsafeQueue<TYPE>::enqueue(const TYPE& value) {
//     Node *node = new Node(value);
//     bsls::SpinLockGuard guard(&d_lock);
//     if (0 == d_front_p && 0 == d_back_p) {
//        d_front_p = d_back_p = node;
//     } else {
//        d_back_p->d_next_p = node;
//        d_back_p = node;
//     }
//  }
//..
//  To illustrate fine-grained locking with this queue, we create a thread
//  function that will manipulate queues out of a large array at random.
//  Since each element in the array is locked independently, these threads
//  will rarely contend for the same queue and can run largely in parallel.
//..
// const int NUM_QUEUES = 10000;
// const int NUM_ITERATIONS = 20000;
//
// struct QueueElement {
//    int d_threadId;
//    int d_value;
// };
//
// struct ThreadParam {
//    LightweightThreadsafeQueue<QueueElement> *d_queues_p;
//    int                                       d_threadId;
// };
//
// void *addToRandomQueues(void *paramAddr) {
//    ThreadParam *param = (ThreadParam*)paramAddr;
//    LightweightThreadsafeQueue<QueueElement> *queues = param->d_queues_p;
//    int threadId = param->d_threadId;
//    unsigned seed = threadId;
//    for (int i = 0; i < NUM_ITERATIONS; ++i) {
//       int queueIndex = rand_r(&seed) % NUM_QUEUES;
//       LightweightThreadsafeQueue<QueueElement> *queue = queues + queueIndex;
//       QueueElement value = { threadId, i };
//       queue->enqueue(value);
//    }
//    return 0;
// }
//..
// Finally, we create the "multi-queue" and several of these threads to
// manipulate it.  We assume the existence of a createThread() function that
// starts a new thread of execution with a parameter, and we omit details of
// "joining" these threads.
//..
// enum { NUM_THREADS = 3};
// LightweightThreadsafeQueue<QueueElement> multiQueue[NUM_QUEUES];
// ThreadParam threadParams[NUM_THREADS];
// for (int i = 0; i < NUM_THREADS; ++i) {
//   threadParams[i].d_queues_p = multiQueue;
//   threadParams[i].d_threadId = i + 1;
//   createThread(addToRandomQueues, threadParams + i);
// }
//..
//

#include <bsls_assert.h>
#include <bsls_atomicoperations.h>
#include <bsls_keyword.h>
#include <bsls_platform.h>
#include <bsls_performancehint.h>

#define BSLS_SPINLOCK_UNLOCKED  { {0} }
    // Use this macro as the value for initializing an object of type
    // 'SpinLock'.  For example:
    //..
    //  SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
    //..

#ifdef BSLS_PLATFORM_OS_WINDOWS
typedef unsigned long DWORD;
typedef int BOOL;

extern "C" {
    __declspec(dllimport) void __stdcall Sleep(
            DWORD dwMilliseconds);

    __declspec(dllimport) DWORD __stdcall SleepEx(
            DWORD dwMilliseconds,
            BOOL  bAlertable);
};
#else
#include <pthread.h>
#include <sched.h>
#include <time.h>
#endif

namespace BloombergLP {
namespace bsls {

                             // ==============
                             // class SpinLock
                             // ==============
struct SpinLock {
    // A statically-initializable synchronization primitive that "spins" (i.e.,
    // executes user instructions in a tight loop) rather than blocking waiting
    // threads using system calls.  The following idiom is used to initialize
    // 'SpinLock' variables:
    //..
    //  SpinLock lock = BSLS_SPINLOCK_UNLOCKED;
    //..
    // A class member 'd_lock' of type 'SpinLock' may be initialized using the
    // following idiom:
    //..
    //  , d_lock(SpinLock::s_unlocked)
    //..

  private:
    // NOT IMPLEMENTED
    SpinLock& operator=(const SpinLock&) BSLS_KEYWORD_DELETED;

    // We would like to prohibit copy construction, but then this class would
    // not be a POD and could not be initialized statically:
    //
    // SpinLock(const SpinLock&);

    // PRIVATE TYPES
    enum {
        e_UNLOCKED = 0, // unlocked state value
        e_LOCKED = 1    // locked state value
    };

    // PRIVATE CLASS METHODS
    static void doBackoff(int *count);
        // This function provides a backoff mechanism for 'lock' and 'tryLock',
        // using the specified 'count' as the backoff counter.  'count' is
        // incremented within this method.

    static void pause();
        // If available, invoke a pause operation (e.g., Intel's 'pause'
        // instruction); otherwise do nothing.  (i.e., this method is a no-op).

    static void sleepMillis(int milliseconds);
        // Sleep the specified 'milliseconds'.  Note that this partially
        // reimplements 'bslmt::ThreadUtil::microSleep' locally, as 'bslmt' is
        // above 'bsls'.

    static void yield();
        // Move the current thread to the end of the scheduler's queue and
        // schedule another thread to run.  Note that this allows cooperating
        // threads of the same priority to share CPU resources equally.  Also
        // note that this reimplements 'bslmt::ThreadUtil::yield()' locally, as
        // 'bslmt' is above 'bsls'.

  public:
    // PUBLIC CLASS DATA
    static const SpinLock s_unlocked;
        // This constant SpinLock is always unlocked.  It is suitable for use
        // initializing class members of SpinLock type.

    // PUBLIC DATA
    AtomicOperations::AtomicTypes::Int d_state;
        // Public to allow this type to be a statically-initializable POD.  Do
        // not use directly.

    // MANIPULATORS
    void lock();
        // Spin (repeat a loop continuously without using the system to pause
        // or reschedule the thread) until this object is unlocked, then
        // atomically acquire the lock.

    void lockWithBackoff();
        // Repeat a loop continuously, potentially using the system to pause or
        // reschedule the thread, until this object is unlocked, then
        // atomically acquire the lock.  The spinning has backoff logic.  Note
        // that this method is recommended when system calls are permissible,
        // significant contention is expected, and no better mutual exclusion
        // primitive is available.

    void lockWithoutBackoff();
        // Spin (repeat a loop continuously without using the system to pause
        // or reschedule the thread) until this object is unlocked, then
        // atomically acquire the lock.  The spinning does not perform a
        // backoff.

    int tryLock(int numRetries = 0);
        // Attempt to acquire the lock; optionally specify the 'numRetries'
        // times to attempt again if this object is already locked.  Return 0
        // on success, and a non-zero value if the lock was not successfully
        // acquired.  The behavior is undefined unless '0 <= numRetries'.

    void unlock();
        // Release the lock.  The behavior is undefined unless the current
        // thread holds the lock.
};

                         // ===================
                         // class SpinLockGuard
                         // ===================
class SpinLockGuard {
    // This type implements a scoped guard for 'SpinLock'.

    // DATA
    SpinLock *d_lock_p; // lock proctored by this object

  private:
    // NOT IMPLEMENTED
    SpinLockGuard(const SpinLockGuard&);
    SpinLockGuard& operator=(const SpinLockGuard&);

  public:

    // CREATORS
    explicit SpinLockGuard(SpinLock *lock);
        // Create a proctor object that manages the specified 'lock'.  Invoke
        // 'lock->lock()'.

    ~SpinLockGuard();
        // Destroy this proctor object and invoke 'unlock()' on the lock
        // managed by this object.

    // MANIPULATORS
    SpinLock *release();
        // Return the lock pointer that was provided at construction and stop
        // managing it.  (Subsequent calls to 'release()' will return null and
        // the destruction of this object will not affect the lock.)  The lock
        // status is not changed by this call.
};

// ============================================================================
//                             INLINE DEFINITIONS
// ============================================================================

                             // --------------
                             // class SpinLock
                             // --------------
// PRIVATE CLASS METHODS
inline
void SpinLock::doBackoff(int *count) {
    if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(*count < 10)) {
        pause();
    } else if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(*count < 25)) {
        yield();
    } else {
        sleepMillis(10);
    }
    ++(*count);
}


inline
void SpinLock::sleepMillis(int milliseconds)
{
#ifdef BSLS_PLATFORM_OS_WINDOWS
    ::Sleep(milliseconds);
#else
    timespec naptime;
    naptime.tv_sec = 0;
    naptime.tv_nsec = 1000 * 1000 * milliseconds;
    nanosleep(&naptime, 0);
#endif
}

inline
void SpinLock::yield() {
#ifdef BSLS_PLATFORM_OS_WINDOWS
    ::SleepEx(0, 0);
#else
    sched_yield();
#endif
}

// MANIPULATORS
inline
void SpinLock::lock() {
    lockWithoutBackoff();
}

inline
void SpinLock::lockWithBackoff() {
    int count = 0;
    do {
        // Implementation note: the outer 'if' block is not logically necessary
        // but may reduce memory barrier costs when spinning.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                break;
            }
        }
        doBackoff(&count);
    } while (1);
}

inline
void SpinLock::lockWithoutBackoff() {
    do {
        // Implementation note: the outer 'if' block is not logically necessary
        // but may reduce memory barrier costs when spinning.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                break;
            }
        }
    } while (1);
}

inline
int SpinLock::tryLock(int numRetries) {
    int count = 0;
    do {
        // See lock() for implementation note.
        if (e_UNLOCKED == AtomicOperations::getIntAcquire(&d_state)) {
            if (e_UNLOCKED == AtomicOperations::swapIntAcqRel(&d_state,
                                                              e_LOCKED))
            {
                return 0;                                             // RETURN
            }
        }
        doBackoff(&count);
    } while (numRetries--);
    return -1;
}

inline
void SpinLock::unlock() {
    BSLS_ASSERT_SAFE(e_LOCKED == AtomicOperations::getInt(&d_state));

    AtomicOperations::setIntRelease(&d_state, e_UNLOCKED);
}

                          // -------------------
                          // class SpinLockGuard
                          // -------------------
inline
SpinLockGuard::SpinLockGuard(SpinLock *lock)
: d_lock_p(lock) {
    BSLS_ASSERT_SAFE(0 != lock);
    lock->lock();
}

inline
SpinLockGuard::~SpinLockGuard()
{
    if (0 != d_lock_p) {
        d_lock_p->unlock();
    }
}

// MANIPULATORS
inline
SpinLock *SpinLockGuard::release() {
    SpinLock *lock = d_lock_p;
    d_lock_p = 0;
    return lock;
}

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2020 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------