BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bslmt_throughputbenchmark.h
Go to the documentation of this file.
1/// @file bslmt_throughputbenchmark.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bslmt_throughputbenchmark.h -*-C++-*-
8
9#ifndef INCLUDED_BSLMT_THROUGHPUTBENCHMARK
10#define INCLUDED_BSLMT_THROUGHPUTBENCHMARK
11
12#include <bsls_ident.h>
13BSLS_IDENT("$Id: $")
14
15/// @defgroup bslmt_throughputbenchmark bslmt_throughputbenchmark
16/// @brief Provide a performance test harness for multi-threaded components.
17/// @addtogroup bsl
18/// @{
19/// @addtogroup bslmt
20/// @{
21/// @addtogroup bslmt_throughputbenchmark
22/// @{
23///
24/// <h1> Outline </h1>
25/// * <a href="#bslmt_throughputbenchmark-purpose"> Purpose</a>
26/// * <a href="#bslmt_throughputbenchmark-classes"> Classes </a>
27/// * <a href="#bslmt_throughputbenchmark-description"> Description </a>
28/// * <a href="#bslmt_throughputbenchmark-structure-of-a-test"> Structure of a Test </a>
29/// * <a href="#bslmt_throughputbenchmark-usage"> Usage </a>
30/// * <a href="#bslmt_throughputbenchmark-example-1-test-performance-of-bsl-queue<int>"> Example 1: Test Performance of bsl::queue<int> </a>
31///
32/// # Purpose {#bslmt_throughputbenchmark-purpose}
33/// Provide a performance test harness for multi-threaded components.
34///
35/// # Classes {#bslmt_throughputbenchmark-classes}
36///
37/// - bslmt::ThroughputBenchmark: multi-threaded performance test harness
38///
39/// # Description {#bslmt_throughputbenchmark-description}
40/// This component defines a mechanism,
41/// `bslmt::ThroughputBenchmark`, that provides performance testing for multi-
42/// threaded components. The results are loaded into a
43/// `bslmt::ThroughputBenchmarkResult` object, which provides access to counts
44/// of the work done by each thread, thread group, and sample, divided by the
45/// number of actual seconds of execution.
46///
47/// ## Structure of a Test {#bslmt_throughputbenchmark-structure-of-a-test}
48///
49///
50/// A test is composed from one or more thread groups, each running one or more
51/// threads. Each thread in a thread group executes a thread function, with a
52/// simulated work load executing between subsequent calls to the thread
53/// function. To provide reliability, the test is executed multiple times. A
54/// single execution of a test is referred to as a "sample execution" and its
55/// result referred to as a "sample". To support fine tuning of the test, it is
56/// possible to provide initialize and cleanup functions for a sample and / or a
57/// thread.
58///
59/// ## Usage {#bslmt_throughputbenchmark-usage}
60///
61///
62/// This section illustrates intended use of this component.
63///
64/// ### Example 1: Test Performance of bsl::queue<int> {#bslmt_throughputbenchmark-example-1-test-performance-of-bsl-queue<int>}
65///
66///
67/// In the following example we test the throughput of a `bsl::queue<int>` in a
68/// multi-threaded environment, where multiple "producer" threads are pushing
69/// elements, and multiple "consumer" threads are popping these elements.
70///
71/// First, we define a global queue, a mutex to protect this queue, and a
72/// semaphore for a "pop" operation to block on:
73/// @code
74/// bsl::queue<int> myQueue;
75/// bslmt::Mutex myMutex;
76/// bslmt::Semaphore mySem;
77/// @endcode
78/// Next, we define a counter value we push in:
79/// @code
80/// int counterValue = 0;
81/// @endcode
82/// Then, we define simple push and pop functions that manipulate this queue:
83/// @code
84/// /// Push an element into `myQueue`, using the specified `threadIndex`.
85/// void myPush(int threadIndex)
86/// {
87/// bslmt::LockGuard<bslmt::Mutex> guard(&myMutex);
88/// myQueue.push(1000000 * threadIndex + counterValue++);
89/// mySem.post();
90/// }
91///
92/// /// Pop an element from `myQueue`.
93/// void myPop(int)
94/// {
95/// mySem.wait();
96/// bslmt::LockGuard<bslmt::Mutex> guard(&myMutex);
97/// myQueue.pop();
98/// }
99/// @endcode
100/// Next, we define a thread "cleanup" function for the push thread group, which
101/// pushes a couple of extra elements to make sure that the pop thread group
102/// will not hang on an empty queue:
103/// @code
104/// void myCleanup()
105/// // Cleanup function.
106/// {
107/// bslmt::LockGuard<bslmt::Mutex> guard(&myMutex);
108/// for (int i = 0; i < 10; ++i) {
109/// myQueue.push(counterValue++);
110/// mySem.post();
111/// }
112/// }
113/// @endcode
114/// Then, we create a `bslmt::ThroughputBenchmark` object and add push and pop
115/// thread groups, each with 2 threads and a work load (arithmetic operations to
116/// consume an amount of time) of 100:
117/// @code
118/// bslmt::ThroughputBenchmark myBench;
119/// myBench.addThreadGroup(
120/// myPush,
121/// 2,
122/// 100,
123/// bslmt::ThroughputBenchmark::InitializeThreadFunction(),
124/// myCleanup);
125/// const int consumerGroupIdx = myBench.addThreadGroup(myPop, 2, 100);
126/// @endcode
127/// Now, we create a `bslmt::ThroughputBenchmarkResult` object to contain the
128/// result, and call `execute` to run the benchmark for 500 millseconds 10
129/// times:
130/// @code
131/// bslmt::ThroughputBenchmarkResult myResult;
132/// myBench.execute(&myResult, 500, 10);
133/// @endcode
134/// Finally, we print the median of the throughput of the consumer thread group.
135/// @code
136/// double median;
137/// myResult.getMedian(&median, consumerGroupIdx);
138/// bsl::cout << "Throughput:" << median << "\n";
139/// @endcode
140/// @}
141/** @} */
142/** @} */
143
144/** @addtogroup bsl
145 * @{
146 */
147/** @addtogroup bslmt
148 * @{
149 */
150/** @addtogroup bslmt_throughputbenchmark
151 * @{
152 */
153
154#include <bslscm_version.h>
155
156#include <bslmt_barrier.h>
158
159#include <bslma_allocator.h>
160
162
163#include <bsls_assert.h>
164#include <bsls_atomic.h>
165#include <bsls_timeinterval.h>
166#include <bsls_types.h>
167
168#include <bsl_functional.h>
169#include <bsl_vector.h>
170
171
172namespace bslmt {
173
174class ThroughputBenchmark_TestUtil;
175
176 // =========================
177 // class ThroughputBenchmark
178 // =========================
179
180/// This class is a mechanism that provides performance testing for multi-
181/// threaded components. It allows running different thread functions at
182/// the same time, and simulates a work load between subsequent calls to the
183/// tested thread functions. The results are loaded into a
184/// `bslmt::ThroughputBenchmarkResult` object, which provides access to
185/// counts of the work done by each thread, thread group, and sample,
186/// divided by the number of actual seconds of execution.
187///
188/// See @ref bslmt_throughputbenchmark
190
191 public:
192 // PUBLIC TYPES
193
194 /// An alias to a function meeting the following contract:
195 /// @code
196 /// void runTest(int threadIndex);
197 /// // Run the main part of the benchmark having the specified
198 /// // 'threadIndex'. The behavior is undefined unless
199 /// // 'threadIndex' is in the range '[0, numThreadsInGroup)',
200 /// // where 'numThreadsInGroup' is the number of threads in a
201 /// // thread group for the associated throughput benchmark.
202 /// @endcode
203 typedef bsl::function<void(int)> RunFunction;
204
205 /// An alias to a function meeting the following contract:
206 /// @code
207 /// void initializeSample(bool isFirst);
208 /// // Initialize the sample run. If the specified 'isFirst' is
209 /// // 'true', this is the first sample run.
210 /// @endcode
212
213 /// An alias to a function meeting the following contract:
214 /// @code
215 /// void shutdownSample(bool isLast);
216 /// // Clean up at the end of the sample run, before threads have
217 /// // been joined. If the specified 'isLast' is 'true', this is
218 /// // the last sample run.
219 /// @endcode
221
222 /// An alias to a function meeting the following contract:
223 /// @code
224 /// void cleanupSample(bool isLast);
225 /// // Clean up after the sample run. If the specified 'isLast' is
226 /// // 'true', this is the last sample run.
227 /// @endcode
229
230 /// An alias to a function meeting the following contract:
231 /// @code
232 /// void initializeThread();
233 /// // Initialize each thread in a sample run.
234 /// @endcode
236
237 /// An alias to a function meeting the following contract:
238 /// @code
239 /// void cleanupThread();
240 /// // Clean up after each thread in a sample run.
241 /// @endcode
243
244 /// Data used by a thread group
245 struct ThreadGroup {
246
247 // PUBLIC DATA
248 RunFunction d_func; // test function to run
249
250 int d_numThreads; // number of threads in the
251 // thread group
252
253 bsls::Types::Int64 d_amount; // amount of busy work to
254 // perform between calls to
255 // 'd_func'
256
257 InitializeThreadFunction d_initialize; // initialize function per
258 // thread
259
260 CleanupThreadFunction d_cleanup; // cleanup function per
261 // thread
262 };
263
264 private:
265 // CLASS DATA
266 static unsigned int s_antiOptimization; // Used by 'busyWork' to
267 // prevent optimization.
268
269 // DATA
270 bsl::vector<ThreadGroup> d_threadGroups; // Data kept for each thread
271 // group added.
272
273 bsls::AtomicInt d_state; // This is how a test thread
274 // knows it has to exit. It
275 // starts as 0, and exits
276 // when is set to 1.
277
278 // FRIENDS
281
282 // NOT IMPLEMENTED
284 ThroughputBenchmark& operator=(const ThroughputBenchmark&);
285
286 // PRIVATE ACCESSORS
287
288 /// Return `true` if the test should continue to run, and `false`
289 /// otherwise.
290 bool isRunState() const;
291
292 public:
293 // TRAITS
296
297 // CLASS METHODS
298
299 /// Return the value calculated by `busyWork`. Note that this method is
300 /// provided to prevent the compiler from optimizing the simulated
301 /// workload away.
302 static unsigned int antiOptimization();
303
304 /// Perform arithmetic operations to consume an amount of time in linear
305 /// relation to the specified `busyWorkAmount`. Note that the duration
306 /// of `busyWork` invoked with a particular `busyWorkAmount` will vary
307 /// with system load.
308 static void busyWork(bsls::Types::Int64 busyWorkAmount);
309
310 /// Return an estimate of the work amount so that `busyWork` invoked
311 /// with the returned work amount executes, approximately, for the
312 /// specified `duration`. Note that this estimate varies with system
313 /// load.
315 bsls::TimeInterval duration);
316
317 // CREATORS
318
319 /// Create an empty `ThroughputBenchmark` object. Optionally specify a
320 /// `basicAllocator` used to supply memory. If `basicAllocator` is 0,
321 /// the currently installed default allocator is used.
322 explicit ThroughputBenchmark(bslma::Allocator *basicAllocator = 0);
323
324 // MANIPULATORS
325
326 int addThreadGroup(const RunFunction& runFunction,
327 int numThreads,
328 bsls::Types::Int64 busyWorkAmount);
329 /// Create a set of threads, with cardinality the specified
330 /// `numThreads`, that will repeatedly execute the specified
331 /// `runFunction` followed by the specified `busyWork`, with the
332 /// specified `busyWorkAmount` as its argument. Return the index for
333 /// the thread group. Optionally specify `initializeFunctor`, which is
334 /// run at the beginning of each thread of the sample and accepts a
335 /// boolean flag `isFirst`, that is set to `true` on the first sample,
336 /// and `false` otherwise. Optionally specify `cleanupFunctor`, which
337 /// is run at the end of each thread of the sample and accepts a boolean
338 /// flag `isLast`, that is set to `true` on the last sample, and `false`
339 /// otherwise. Return an id for the added thread group. The behavior
340 /// is undefined unless `0 < numThreads` and `0 <= busyWorkAmount`.
341 int addThreadGroup(const RunFunction& runFunction,
342 int numThreads,
343 bsls::Types::Int64 busyWorkAmount,
344 const InitializeThreadFunction& initializeFunctor,
345 const CleanupThreadFunction& cleanupFunctor);
346
348 int millisecondsPerSample,
349 int numSamples);
350 /// Run the tests previously added with calls to the `addThreadGroup`
351 /// method. The tests are run for the specified `numSamples` times.
352 /// Each sample is run for the specified `millisecondsPerSample`
353 /// duration. The results are stored in the specified `result` object.
354 /// Optionally specify `initializeFunctor`, which is run at the
355 /// beginning of the sample and accepts a boolean flag `isFirst`, that
356 /// is set to `true` on the first sample, and `false` otherwise.
357 /// Optionally specify `shutdownFunctor`, which is run at the end of
358 /// each sample before threads have been joined, and accepts a boolean
359 /// flag `isLast`, that is set to `true` on the last sample, and `false`
360 /// otherwise. Optionally specify `cleanupFunctor`, which is run at the
361 /// end of each sample after threads have been joined, and accepts a
362 /// boolean flag `isLast`, that is set to `true` on the last sample, and
363 /// `false` otherwise. The behavior is undefined unless
364 /// `0 < millisecondsPerSample`, `0 < numSamples`, and
365 /// `0 < numThreadGroups()`. Also see {Structure of a Test}.
367 int millisecondsPerSample,
368 int numSamples,
369 const InitializeSampleFunction& initializeFunctor,
370 const ShutdownSampleFunction& shutdownFunctor,
371 const CleanupSampleFunction& cleanupFunctor);
372
373 // ACCESSORS
374
375 /// Return the total number of threads.
376 int numThreads() const;
377
378 /// Return the number of thread groups.
379 int numThreadGroups() const;
380
381 /// Return the number of threads in the specified `threadGroupIndex`.
382 /// The behavior is undefined unless
383 /// `0 <= threadGroupIndex < numThreadGroups()`.
384 int numThreadsInGroup(int threadGroupIndex) const;
385
386 // Aspects
387
388 /// Return the allocator used by this object.
390
391};
392
393 // ===================================
394 // struct ThroughputBenchmark_WorkData
395 // ===================================
396
397/// Data transferred to ThroughputBenchmark_WorkFunction.
399
400 // PUBLIC DATA
402 // test function to run
403
405 // busy work amount
406
408 // initialize function per
409 // thread
410
412 // cleanup function per
413 // thread
414
416 // exposes the "this"
417 // pointer of the benchmark
418 // to the work thread
419
421 // thread index 0, 1, 2,
422 // ... that is provided to
423 // the thread to
424 // differentiate it if so
425 // desired
426
428 // trigger start for
429 // threads to start
430 // processing at the same
431 // time
432
434 // number of nanoseconds
435 // that the thread actually
436 // ran
437
439 // number of items
440 // processed by this thread
441};
442
443 // ======================================
444 // class ThroughputBenchmark_WorkFunction
445 // ======================================
446
447/// This class is the work function functor, being called for each work
448/// thread.
449///
450/// See @ref bslmt_throughputbenchmark
452
453 private:
454 // DATA
456
457 public:
458 // CREATORS
459
460 /// Create a `ThroughputBenchmark_WorkFunction` object with the
461 /// specified `data` argument.
464
466 // Destroy this object.
467
468 // MANIPULATORS
469
470 /// Work function being run on the thread.
472};
473
474 // ==================================
475 // class ThroughputBenchmark_TestUtil
476 // ==================================
477
478/// This class implements a test utility that gives the test driver access
479/// to the unexposed data members of `ThroughputBenchmark`.
480///
481/// See @ref bslmt_throughputbenchmark
483
484 // DATA
485 ThroughputBenchmark& d_data;
486
487 public:
488 // CREATORS
489
490 /// Create a `ThroughputBenchmark_TestUtil` object to test contents of
491 /// the specified `data`.
493
495 // Destroy this object.
496
497 // MANIPULATORS
498
499 /// Return a reference providing modifiable access to the `d_state` data
500 /// member of `ThroughputBenchmark`.
502
503 /// Return a reference providing modifiable access to the
504 /// `d_threadGroups` data member of `ThroughputBenchmark`.
506};
507
508// ============================================================================
509// INLINE DEFINITIONS
510// ============================================================================
511
512 // -------------------------
513 // class ThroughputBenchmark
514 // -------------------------
515
516// PRIVATE ACCESSORS
517inline
518bool ThroughputBenchmark::isRunState() const
519{
520 return d_state.loadAcquire() == 0;
521}
522
523// ACCESSORS
524inline
526{
527 if (0 == d_threadGroups.size()) {
528 return 0; // RETURN
529 }
530
531 int numThreads = d_threadGroups[0].d_numThreads;
532 for (int i = 1; i < numThreadGroups(); ++i) {
533 numThreads += d_threadGroups[i].d_numThreads;
534 }
535 return numThreads;
536}
537
538inline
540{
541 return static_cast<int>(d_threadGroups.size());
542}
543
544inline
545int ThroughputBenchmark::numThreadsInGroup(int threadGroupIndex) const
546{
547 BSLS_ASSERT(0 <= threadGroupIndex);
548 BSLS_ASSERT(numThreadGroups() > threadGroupIndex);
549
550 return d_threadGroups[threadGroupIndex].d_numThreads;
551}
552
553 // Aspects
554
555inline
557{
558 return d_threadGroups.get_allocator().mechanism();
559}
560
561 // --------------------------------------
562 // class ThroughputBenchmark_WorkFunction
563 // --------------------------------------
564
565// CREATORS
566inline
572
573 // ----------------------------------
574 // class ThroughputBenchmark_TestUtil
575 // ----------------------------------
576
577// CREATORS
578inline
584
585// MANIPULATORS
586inline
588{
589 return d_data.d_state;
590}
591
592inline
595{
596 return d_data.d_threadGroups;
597}
598
599} // close package namespace
600
601
602#endif
603
604// ----------------------------------------------------------------------------
605// Copyright 2019 Bloomberg Finance L.P.
606//
607// Licensed under the Apache License, Version 2.0 (the "License");
608// you may not use this file except in compliance with the License.
609// You may obtain a copy of the License at
610//
611// http://www.apache.org/licenses/LICENSE-2.0
612//
613// Unless required by applicable law or agreed to in writing, software
614// distributed under the License is distributed on an "AS IS" BASIS,
615// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
616// See the License for the specific language governing permissions and
617// limitations under the License.
618// ----------------------------- END-OF-FILE ----------------------------------
619
620/** @} */
621/** @} */
622/** @} */
Forward declaration.
Definition bslstl_function.h:934
Definition bslstl_vector.h:1025
Definition bslma_allocator.h:457
Definition bslmt_barrier.h:353
Definition bslmt_throughputbenchmarkresult.h:140
Definition bslmt_throughputbenchmark.h:482
ThroughputBenchmark_TestUtil(ThroughputBenchmark &data)
Definition bslmt_throughputbenchmark.h:579
bsl::vector< ThroughputBenchmark::ThreadGroup > & threadGroups()
Definition bslmt_throughputbenchmark.h:594
bsls::AtomicInt & state()
Definition bslmt_throughputbenchmark.h:587
Definition bslmt_throughputbenchmark.h:451
void operator()()
Work function being run on the thread.
ThroughputBenchmark_WorkFunction(ThroughputBenchmark_WorkData &data)
Definition bslmt_throughputbenchmark.h:567
Definition bslmt_throughputbenchmark.h:189
bsl::function< void(bool)> ShutdownSampleFunction
Definition bslmt_throughputbenchmark.h:220
void execute(ThroughputBenchmarkResult *result, int millisecondsPerSample, int numSamples)
bsl::function< void()> InitializeThreadFunction
Definition bslmt_throughputbenchmark.h:235
bsl::function< void(bool)> CleanupSampleFunction
Definition bslmt_throughputbenchmark.h:228
int numThreadsInGroup(int threadGroupIndex) const
Definition bslmt_throughputbenchmark.h:545
int numThreadGroups() const
Return the number of thread groups.
Definition bslmt_throughputbenchmark.h:539
bsl::function< void()> CleanupThreadFunction
Definition bslmt_throughputbenchmark.h:242
bsl::function< void(bool)> InitializeSampleFunction
Definition bslmt_throughputbenchmark.h:211
static unsigned int antiOptimization()
ThroughputBenchmark(bslma::Allocator *basicAllocator=0)
void execute(ThroughputBenchmarkResult *result, int millisecondsPerSample, int numSamples, const InitializeSampleFunction &initializeFunctor, const ShutdownSampleFunction &shutdownFunctor, const CleanupSampleFunction &cleanupFunctor)
static bsls::Types::Int64 estimateBusyWorkAmount(bsls::TimeInterval duration)
BSLMF_NESTED_TRAIT_DECLARATION(ThroughputBenchmark, bslma::UsesBslmaAllocator)
int addThreadGroup(const RunFunction &runFunction, int numThreads, bsls::Types::Int64 busyWorkAmount, const InitializeThreadFunction &initializeFunctor, const CleanupThreadFunction &cleanupFunctor)
bslma::Allocator * allocator() const
Return the allocator used by this object.
Definition bslmt_throughputbenchmark.h:556
int addThreadGroup(const RunFunction &runFunction, int numThreads, bsls::Types::Int64 busyWorkAmount)
bsl::function< void(int)> RunFunction
Definition bslmt_throughputbenchmark.h:203
int numThreads() const
Return the total number of threads.
Definition bslmt_throughputbenchmark.h:525
static void busyWork(bsls::Types::Int64 busyWorkAmount)
Definition bsls_atomic.h:743
int loadAcquire() const
Definition bsls_atomic.h:1732
Definition bsls_timeinterval.h:301
#define BSLS_ASSERT(X)
Definition bsls_assert.h:1804
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
Definition bslmt_barrier.h:344
Definition bslma_usesbslmaallocator.h:343
Data used by a thread group.
Definition bslmt_throughputbenchmark.h:245
InitializeThreadFunction d_initialize
Definition bslmt_throughputbenchmark.h:257
CleanupThreadFunction d_cleanup
Definition bslmt_throughputbenchmark.h:260
int d_numThreads
Definition bslmt_throughputbenchmark.h:250
RunFunction d_func
Definition bslmt_throughputbenchmark.h:248
bsls::Types::Int64 d_amount
Definition bslmt_throughputbenchmark.h:253
Data transferred to ThroughputBenchmark_WorkFunction.
Definition bslmt_throughputbenchmark.h:398
ThroughputBenchmark::InitializeThreadFunction d_initialize
Definition bslmt_throughputbenchmark.h:407
bsls::Types::Int64 d_amount
Definition bslmt_throughputbenchmark.h:404
bslmt::Barrier * d_barrier_p
Definition bslmt_throughputbenchmark.h:427
bsls::Types::Int64 d_actualNanos
Definition bslmt_throughputbenchmark.h:433
int d_threadIndex
Definition bslmt_throughputbenchmark.h:420
ThroughputBenchmark::CleanupThreadFunction d_cleanup
Definition bslmt_throughputbenchmark.h:411
ThroughputBenchmark * d_bench_p
Definition bslmt_throughputbenchmark.h:415
bsls::Types::Int64 d_count
Definition bslmt_throughputbenchmark.h:438
ThroughputBenchmark::RunFunction d_func
Definition bslmt_throughputbenchmark.h:401
long long Int64
Definition bsls_types.h:132