BDE 4.14.0 Production release
Loading...
Searching...
No Matches
bsls_performancehint.h
Go to the documentation of this file.
1/// @file bsls_performancehint.h
2///
3/// The content of this file has been pre-processed for Doxygen.
4///
5
6
7// bsls_performancehint.h -*-C++-*-
8#ifndef INCLUDED_BSLS_PERFORMANCEHINT
9#define INCLUDED_BSLS_PERFORMANCEHINT
10
11#include <bsls_ident.h>
12BSLS_IDENT("$Id: $")
13
14/// @defgroup bsls_performancehint bsls_performancehint
15/// @brief Provide performance hints for code optimization.
16/// @addtogroup bsl
17/// @{
18/// @addtogroup bsls
19/// @{
20/// @addtogroup bsls_performancehint
21/// @{
22///
23/// <h1> Outline </h1>
24/// * <a href="#bsls_performancehint-purpose"> Purpose</a>
25/// * <a href="#bsls_performancehint-classes"> Classes </a>
26/// * <a href="#bsls_performancehint-macros"> Macros </a>
27/// * <a href="#bsls_performancehint-description"> Description </a>
28/// * <a href="#bsls_performancehint-branch-prediction"> Branch Prediction </a>
29/// * <a href="#bsls_performancehint-warning"> Warning </a>
30/// * <a href="#bsls_performancehint-limitations"> Limitations </a>
31/// * <a href="#bsls_performancehint-data-cache-prefetching"> Data Cache Prefetching </a>
32/// * <a href="#bsls_performancehint-optimization-fence"> Optimization Fence </a>
33/// * <a href="#bsls_performancehint-usage"> Usage </a>
34/// * <a href="#bsls_performancehint-example-1-using-the-branch-prediction-macros"> Example 1: Using the Branch Prediction Macros </a>
35/// * <a href="#bsls_performancehint-example-2-using-bsls_performancehint_predict_expect"> Example 2: Using BSLS_PERFORMANCEHINT_PREDICT_EXPECT </a>
36/// * <a href="#bsls_performancehint-example-3-cache-line-prefetching"> Example 3: Cache Line Prefetching </a>
37///
38/// # Purpose {#bsls_performancehint-purpose}
39/// Provide performance hints for code optimization.
40///
41/// # Classes {#bsls_performancehint-classes}
42///
43/// - bsls::PerformanceHint: namespace for performance optimization hints
44///
45/// # Macros {#bsls_performancehint-macros}
46///
47/// - BSLS_PERFORMANCEHINT_PREDICT_LIKELY(X): `X` probably evaluates to non-zero
48/// - BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(X): `X` probably evaluates to zero
49/// - BSLS_PERFORMANCEHINT_PREDICT_EXPECT(X, Y): `X` probably evaluates to `Y`
50/// - BSLS_PERFORMANCEHINT_UNLIKELY_HINT: annotate block unlikely to be taken
51/// - BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE: prevent compiler optimizations
52///
53/// # Description {#bsls_performancehint-description}
54/// This component provides performance hints for the compiler or
55/// hardware. There are currently two types of hints that are supported:
56/// * branch prediction
57/// * data cache prefetching
58///
59/// ## Branch Prediction {#bsls_performancehint-branch-prediction}
60///
61///
62/// The three macros provided, `BSLS_PERFORMANCEHINT_PREDICT_LIKELY`,
63/// `BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY`, and
64/// `BSLS_PERFORMANCEHINT_PREDICT_EXPECT`, can be used to optimize compiler
65/// generated code for branch prediction. The compiler, when given the hint
66/// under *optimized* mode (i.e., with `BDE_BUILD_TARGET_OPT` defined) will
67/// rearrange the assembly instructions it generates to minimize the number of
68/// jumps needed.
69///
70/// The following describes the macros provided by this component:
71/// @code
72/// Macro Name Description of Macro
73/// ---------------------------------------- -----------------------------
74/// BSLS_PERFORMANCEHINT_PREDICT_LIKELY(expr) Hint to the compiler that the
75/// specified *integral* 'expr'
76/// expression is likely to
77/// evaluate to non-zero.
78/// Returns 'true' or 'false'
79/// depending on the result of
80/// the expression.
81///
82/// BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(expr) Hint to the compiler that the
83/// specified *integral* 'expr'
84/// expression is likely to
85/// evaluate to zero. Returns
86/// 'true' or 'false' depending
87/// on the result of the
88/// expression.
89///
90/// BSLS_PERFORMANCEHINT_PREDICT_EXPECT(expr, value)
91/// Hint to the compiler that the
92/// specified *integral* 'expr'
93/// expression is likely to
94/// evaluate to the specified
95/// 'value'. Returns the result
96/// of the expression.
97///
98/// BSLS_PERFORMANCEHINT_UNLIKELY_HINT Hint to the compiler that the
99/// block which contains the hint
100/// is unlikely chosen. Use this
101/// in conjunction with the
102/// 'PREDICT_UNLIKELY' clause for
103/// maximum portability.
104/// @endcode
105///
106/// ### Warning {#bsls_performancehint-warning}
107///
108///
109/// Please use the macros provided in this component *with* *caution*. Always
110/// profile your code to get an idea of actual usage before attempting to
111/// optimize with these macros. Furthermore, these macros are merely *hints* to
112/// the compiler. Whether or not they will have visible effect on performance
113/// is not guaranteed. Note that one can perform similar optimization with a
114/// profile-based compilation. When compiled with the proper options, the
115/// compiler can collect usage information of the code, and such information can
116/// then be passed back to recompile the code in a more optimized form. Please
117/// refer to the compiler manual for more information.
118///
119/// ### Limitations {#bsls_performancehint-limitations}
120///
121///
122/// There is a bug in gcc 4.2, 4.3, and 4.4 such that when using the branch
123/// prediction macros with multiple conditions, the generated code might not be
124/// properly optimized. For example:
125/// @code
126/// if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(a && b)) {
127/// // ...
128/// }
129/// @endcode
130/// The work-around is simply to split the conditions:
131/// @code
132/// if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(a)
133/// && BSLS_PERFORMANCEHINT_PREDICT_LIKELY(b)) {
134/// // ...
135/// }
136/// @endcode
137/// This applies to all of the "likely", "unlikely", and "expect" macros defined
138/// in this component. Note that a bug report has been filed:
139/// @code
140/// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42233
141/// @endcode
142///
143/// ## Data Cache Prefetching {#bsls_performancehint-data-cache-prefetching}
144///
145///
146/// The two functions provided in the `bsls::PerformanceHint` `struct` are
147/// `prefetchForReading` and `prefetchForWriting`. Use of these functions will
148/// cause the compiler to generate prefetch instructions to prefetch one cache
149/// line worth of data at the specified address into the cache line to minimize
150/// processor stalls.
151/// @code
152/// Function Name Description of Function
153/// ------------------------ ------------------------------------------
154/// prefetchForReading(address) Prefetches one cache line worth of data at
155/// the specified 'address' for reading.
156///
157/// prefetchForWriting(address) Prefetches one cache line worth of data at
158/// the specified 'address' for writing.
159/// @endcode
160///
161/// **Warning**
162/// > These functions must be used **with caution**. Inappropriate use of these
163/// > functions degrades performance. Note that there should be sufficient time
164/// > for the prefetch instruction to finish before the specified address is
165/// > accessed, otherwise prefetching will be pointless. A profiler should be
166/// > used to understand the program's behavior before attempting to optimize
167/// > with these functions.
168///
169/// ## Optimization Fence {#bsls_performancehint-optimization-fence}
170///
171///
172/// The macro `BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE` prevents some compiler
173/// optimizations, particularly compiler instruction reordering. This fence
174/// does *not* map to a CPU instruction and has no impact on processor
175/// instruction re-ordering, and therefore should not be used to synchronize
176/// memory between threads. The fence may be useful in unusual contexts, like
177/// performing benchmarks, or working around bugs identified in the compiler's
178/// optimizer.
179///
180/// **Warning**
181/// > This macro should be used *with* *caution*. The macro will generally
182/// > decrease the performance of code on which it is applied, and is not
183/// > implemented on all platforms.
184///
185/// ## Usage {#bsls_performancehint-usage}
186///
187///
188/// The following series of examples illustrates use of the macros and functions
189/// provided by this component.
190///
191/// ### Example 1: Using the Branch Prediction Macros {#bsls_performancehint-example-1-using-the-branch-prediction-macros}
192///
193///
194/// The following demonstrates the use of `BSLS_PERFORMANCEHINT_PREDICT_LIKELY`
195/// and `BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY` to generate more efficient
196/// assembly instructions. Note the use of `BSLS_PERFORMANCEHINT_UNLIKELY_HINT`
197/// inside the `if` branch for maximum portability.
198/// @code
199/// volatile int global;
200///
201/// void foo()
202/// {
203/// global = 1;
204/// }
205///
206/// void bar()
207/// {
208/// global = 2;
209/// }
210///
211/// int main(int argc, char **argv)
212/// {
213/// argc = std::atoi(argv[1]);
214///
215/// for (int x = 0; x < argc; ++x) {
216/// int y = std::rand() % 10;
217///
218/// // Correct usage of 'BSLS_PERFORMANCEHINT_PREDICT_LIKELY' since
219/// // there are nine of ten chance that this branch is taken.
220///
221/// if (BSLS_PERFORMANCEHINT_PREDICT_LIKELY(8 != y)) {
222/// foo();
223/// }
224/// else {
225/// BSLS_PERFORMANCEHINT_UNLIKELY_HINT;
226/// bar();
227/// }
228/// }
229/// return 0;
230/// }
231/// @endcode
232/// An excerpt of the assembly code generated using `xlC` Version 10 on AIX from
233/// this small program is:
234/// @code
235/// b8: 2c 00 00 08 cmpwi r0,8
236/// bc: 41 82 00 38 beq- f4 <.main+0xb4>
237/// ^
238/// Note that if register r0 (y) equals 8, branch to
239/// instruction f4 (a jump). The '-' after 'beq'
240/// indicates that the branch is unlikely to be taken.
241/// The predicted code path continues the 'if'
242/// statement, which calls 'foo' below.
243///
244/// c0: 4b ff ff 41 bl 0 <.foo__Fv>
245/// ...
246/// f4: 4b ff ff 2d bl 20 <.bar__Fv>
247/// @endcode
248/// Now, if `BSLS_PERFORMANCEHINT_PREDICT_LIKELY` is changed to
249/// `BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY`, and the
250/// `BSLS_PERFORMANCEHINT_UNLIKELY_HINT` is moved to the first branch, the
251/// following assembly code will be generated:
252/// @code
253/// b8: 2c 00 00 08 cmpwi r0,8
254/// bc: 40 c2 00 38 bne- f4 <.main+0xb4>
255/// ^
256/// Note that the test became a "branch not equal"
257/// test. The predicted code path now continues to the
258/// 'else' statement, which calls 'bar' below.
259///
260/// c0: 4b ff ff 61 bl 20 <.bar__Fv>
261/// ...
262/// f4: 4b ff ff 0d bl 0 <.foo__Fv>
263/// @endcode
264/// A timing analysis shows that effective use of branch prediction can have a
265/// material effect on code efficiency:
266/// @code
267/// $time ./unlikely.out 100000000
268///
269/// real 0m2.022s
270/// user 0m2.010s
271/// sys 0m0.013s
272///
273/// $time ./likely.out 100000000
274///
275/// real 0m2.159s
276/// user 0m2.149s
277/// sys 0m0.005s
278/// @endcode
279///
280/// ### Example 2: Using BSLS_PERFORMANCEHINT_PREDICT_EXPECT {#bsls_performancehint-example-2-using-bsls_performancehint_predict_expect}
281///
282///
283/// This macro is essentially the same as the `__builtin_expect(expr, value)`
284/// macro that is provided by some compilers. This macro allows the user to
285/// define more complex hints to the compiler, such as the optimization of
286/// `switch` statements. For example, given:
287/// @code
288/// int x = std::rand() % 4;
289/// @endcode
290/// the following is incorrect usage of `BSLS_PERFORMANCEHINT_PREDICT_EXPECT`,
291/// since the probability of getting a 3 is equivalent to the other
292/// possibilities ( 0, 1, 2 ):
293/// @code
294/// switch (BSLS_PERFORMANCEHINT_PREDICT_EXPECT(x, 3)) {
295/// case 1: //..
296/// break;
297/// case 2: //..
298/// break;
299/// case 3: //..
300/// break;
301/// default: break;
302/// }
303/// @endcode
304/// However, this is sufficient to illustrate the intent of this macro.
305///
306/// ### Example 3: Cache Line Prefetching {#bsls_performancehint-example-3-cache-line-prefetching}
307///
308///
309/// The following demonstrates use of `prefetchForReading` and
310/// `prefetchForWriting` to prefetch data cache lines:
311/// @code
312/// const int SIZE = 10 * 1024 * 1024;
313///
314/// void add(int *arrayA, int *arrayB)
315/// {
316/// for (int i = 0; i < SIZE / 8; ++i){
317/// *arrayA += *arrayB; ++arrayA; ++arrayB;
318/// *arrayA += *arrayB; ++arrayA; ++arrayB;
319/// *arrayA += *arrayB; ++arrayA; ++arrayB;
320/// *arrayA += *arrayB; ++arrayA; ++arrayB;
321///
322/// *arrayA += *arrayB; ++arrayA; ++arrayB;
323/// *arrayA += *arrayB; ++arrayA; ++arrayB;
324/// *arrayA += *arrayB; ++arrayA; ++arrayB;
325/// *arrayA += *arrayB; ++arrayA; ++arrayB;
326/// }
327/// }
328///
329/// int array1[SIZE];
330/// int array2[SIZE];
331///
332/// int main()
333/// {
334/// BloombergLP::bsls::Stopwatch timer;
335/// timer.start();
336/// for (int i = 0; i < 10; ++i) {
337/// add(array1, array2);
338/// }
339/// printf("time: %f\n", timer.elapsedTime());
340/// return 0;
341/// }
342/// @endcode
343/// The above code simply adds two arrays together multiple times. Using
344/// `bsls::Stopwatch`, we recorded the running time and printed it to `stdout`:
345/// @code
346/// $./prefetch.sundev1.tsk
347/// time: 8.446806
348/// @endcode
349/// Now, we can observe that in the `add` function, `arrayA` and `arrayB` are
350/// accessed sequentially for the majority of the program. `arrayA` is used for
351/// writing and `arrayB` is used for reading. Making use of prefetch, we add
352/// calls to `prefetchForReading` and `prefetchForWriting`:
353/// @code
354/// void add2(int *arrayA, int *arrayB)
355/// {
356/// for (int i = 0; i < SIZE / 8; ++i){
357/// using namespace BloombergLP; // Generally avoid 'using' in this TD.
358/// bsls::PerformanceHint::prefetchForWriting((int *) arrayA + 16);
359/// bsls::PerformanceHint::prefetchForReading((int *) arrayB + 16);
360///
361/// *arrayA += *arrayB; ++arrayA; ++arrayB;
362/// *arrayA += *arrayB; ++arrayA; ++arrayB;
363/// *arrayA += *arrayB; ++arrayA; ++arrayB;
364/// *arrayA += *arrayB; ++arrayA; ++arrayB;
365///
366/// *arrayA += *arrayB; ++arrayA; ++arrayB;
367/// *arrayA += *arrayB; ++arrayA; ++arrayB;
368/// *arrayA += *arrayB; ++arrayA; ++arrayB;
369/// *arrayA += *arrayB; ++arrayA; ++arrayB;
370/// }
371/// }
372/// @endcode
373/// Adding the prefetch improves the program's efficiency:
374/// @code
375/// $./prefetch.sundev1.tsk
376/// time: 6.442100
377/// @endcode
378/// Note that we prefetch the address `16 * sizeof(int)` bytes away from
379/// `arrayA`. This is such that the prefetch instruction has sufficient time to
380/// finish before the data is actually accessed. To see the difference, if we
381/// changed `+ 16` to `+ 4`:
382/// @code
383/// $./prefetch.sundev1.tsk
384/// time: 6.835928
385/// @endcode
386/// And we get less of an improvement in speed. Similarly, if we prefetch too
387/// far away from the data use, the data might be removed from the cache before
388/// it is looked at and the prefetch is wasted.
389/// @}
390/** @} */
391/** @} */
392
393/** @addtogroup bsl
394 * @{
395 */
396/** @addtogroup bsls
397 * @{
398 */
399/** @addtogroup bsls_performancehint
400 * @{
401 */
402
403#include <bsls_platform.h>
404
405#if defined(BSLS_PLATFORM_CMP_IBM)
406#include <builtins.h> // for 'dcbt', '__builtin_expect'
407#endif
408
409#if defined(BSLS_PLATFORM_CMP_HP)
410#include <machine/sys/builtins.h>
411
412#include <machine/sys/inline.h>
413#endif
414
415#if defined(BSLS_PLATFORM_CMP_SUN)
416#include <sun_prefetch.h> // for 'sparc_prefetch_write|read_many'
417
418#include <mbarrier.h>
419#endif
420
421#if defined(BSLS_PLATFORM_CMP_MSVC)
422#if !defined(BSLS_PLATFORM_CPU_ARM)
423#include <xmmintrin.h> // for '_mm_prefetch', '_MM_HINT_T0'
424#endif
425
426#include <intrin.h>
427#endif
428
429
430
431 // ============================
432 // BSLS_PERFORMANCEHINT_PREDICT
433 // ============================
434
435// These macros are effective in *optimized* mode only, and *only* on platforms
436// that support '__builtin_expect'.
437
438#if defined(BDE_BUILD_TARGET_OPT) && \
439 (defined(BSLS_PLATFORM_CMP_CLANG) || \
440 defined(BSLS_PLATFORM_CMP_GNU) || \
441 defined(BSLS_PLATFORM_CMP_IBM))
442
443 #define BSLS_PERFORMANCEHINT_PREDICT_LIKELY(expr) \
444 __builtin_expect(!!(expr), 1)
445 #define BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(expr) \
446 __builtin_expect(!!(expr), 0)
447 #define BSLS_PERFORMANCEHINT_PREDICT_EXPECT(expr, value) \
448 __builtin_expect((expr), (value))
449#else
450
451 #define BSLS_PERFORMANCEHINT_PREDICT_LIKELY(expr) (expr)
452 #define BSLS_PERFORMANCEHINT_PREDICT_UNLIKELY(expr) (expr)
453 #define BSLS_PERFORMANCEHINT_PREDICT_EXPECT(expr, value) (expr)
454
455#endif
456
457// Define the 'BSLS_PERFORMANCEHINT_HAS_ATTRIBUTE_COLD' and
458// 'BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD' macros.
459
460#if defined(BSLS_PLATFORM_CMP_CLANG)
461 #if __has_attribute(cold)
462 #define BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD __attribute__((cold))
463 #endif
464#elif defined(BSLS_PLATFORM_CMP_GNU)
465 #define BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD __attribute__((cold))
466#endif
467
468#if !defined(BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD)
469 #define BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD
470#else
471 #define BSLS_PERFORMANCEHINT_HAS_ATTRIBUTE_COLD 1
472#endif
473
474// Define the 'BSLS_PERFORMANCEHINT_UNLIKELY_HINT' macro.
475
476#if defined(BDE_BUILD_TARGET_OPT) && defined(BSLS_PLATFORM_CMP_SUN)
477 #define BSLS_PERFORMANCEHINT_UNLIKELY_HINT \
478 BloombergLP::bsls::PerformanceHint::rarelyCalled()
479#elif defined(BDE_BUILD_TARGET_OPT) && \
480 (defined(BSLS_PLATFORM_CMP_IBM) || BSLS_PERFORMANCEHINT_HAS_ATTRIBUTE_COLD)
481 #define BSLS_PERFORMANCEHINT_UNLIKELY_HINT \
482 BloombergLP::bsls::PerformanceHint::lowFrequency()
483#else
484 #define BSLS_PERFORMANCEHINT_UNLIKELY_HINT
485#endif
486
487 // =======================================
488 // BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE
489 // =======================================
490
491#if defined(BSLS_PLATFORM_CMP_IBM)
492
493 #define BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE __fence()
494
495#elif defined(BSLS_PLATFORM_CMP_MSVC)
496
497 #pragma intrinsic(_ReadWriteBarrier)
498 #define BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE _ReadWriteBarrier()
499
500#elif defined(BSLS_PLATFORM_CMP_HP)
501
502 #define BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE \
503 _Asm_sched_fence(_UP_MEM_FENCE|_DOWN_MEM_FENCE)
504
505#elif defined(BSLS_PLATFORM_CMP_SUN)
506
507 #define BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE __compiler_barrier()
508
509#elif defined(BSLS_PLATFORM_CMP_GNU) \
510 || defined(BSLS_PLATFORM_CMP_CLANG)
511
512 #define BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE asm volatile("":::"memory")
513
514#else
515 #error "BSLS_PERFORMANCEHINT_OPTIMIZATION_FENCE not implemented"
516#endif
517
518namespace bsls {
519
520 // ======================
521 // struct PerformanceHint
522 // ======================
523
524/// This `struct` provides a namespace for a suite of functions that give
525/// performance hints to the compiler or hardware.
527
528 // CLASS METHODS
529
530 /// Prefetch one cache line worth of data at the specified `address` for
531 /// reading if the compiler built-in is available (see to the component
532 /// level document for limitations). Otherwise this method has no
533 /// effect.
534 static void prefetchForReading(const void *address);
535
536 /// Prefetch one cache line worth of data at the specified `address` for
537 /// writing if the compiler built-in is available (see to the component
538 /// level document for limitations). Otherwise this method has no
539 /// effect.
540 static void prefetchForWriting(void *address);
541
542 /// This is an empty function that is marked as rarely called using
543 /// pragmas. If this function is placed in a block of code inside a
544 /// branch, the compiler will optimize the assembly code generated and
545 /// mark the block as unlikely. Note that this function is
546 /// intentionally not inlined.
547 static void rarelyCalled();
548
549#if defined(BDE_BUILD_TARGET_OPT)
550#if defined(BSLS_PLATFORM_CMP_SUN)
551
552// Pragma to flag the function as rarely called.
553#pragma rarely_called(rarelyCalled)
554
555// Pragma to flag the function as no side effect. This is necessary because a
556// function that is marked as rarely called cannot be inlined without losing
557// the @ref rarely_called characteristics. When marked as no side effect, even an
558// out-of-line function will not trigger a function call.
559#pragma no_side_effect(rarelyCalled)
560
561#endif // BSLS_PLATFORM_CMP_SUN
562#endif // BDE_BUILD_TARGET_OPT
563
564 /// This is an empty function that is marked with low execution
565 /// frequency using pragmas. If this function is placed in a block of
566 /// code inside a branch, the compiler will optimize the assembly code
567 /// generated and mark the block as unlikely.
569 static void lowFrequency();
570};
571
572// ============================================================================
573// INLINE DEFINITIONS
574// ============================================================================
575
576 // ----------------------
577 // struct PerformanceHint
578 // ----------------------
579
580// CLASS METHODS
581inline
582void PerformanceHint::prefetchForReading(const void *address)
583{
584#if defined(BSLS_PLATFORM_CMP_GNU) || defined(BSLS_PLATFORM_CMP_CLANG)
585
586 __builtin_prefetch(address, 0);
587
588#elif defined(BSLS_PLATFORM_CMP_IBM)
589
590 __dcbt(const_cast<void *>(address));
591
592#elif defined(BSLS_PLATFORM_CMP_SUN)
593
594 sparc_prefetch_read_many(const_cast<void *>(address));
595
596#elif defined(BSLS_PLATFORM_CMP_MSVC)
597
598#if !defined(BSLS_PLATFORM_CPU_ARM)
599 _mm_prefetch(static_cast<const char*>(address), _MM_HINT_T0);
600 // '_MM_HINT_T0' fetches data to all levels of cache.
601#else
602 __prefetch(address);
603#endif
604
605#elif defined(BSLS_PLATFORM_CMP_HP)
606
607 _Asm_lfetch(_LFTYPE_NONE, _LFHINT_NTA, address);
608
609#else
610
611 // no-op
612
613#endif
614}
615
616inline
618{
619#if defined(BSLS_PLATFORM_CMP_GNU) || defined(BSLS_PLATFORM_CMP_CLANG)
620
621 __builtin_prefetch(address, 1);
622
623#elif defined(BSLS_PLATFORM_CMP_IBM)
624
625 __dcbtst(address);
626
627#elif defined(BSLS_PLATFORM_CMP_SUN)
628
629 sparc_prefetch_write_many(address);
630
631#elif defined(BSLS_PLATFORM_CMP_MSVC)
632
633#if !defined(BSLS_PLATFORM_CPU_ARM)
634 _mm_prefetch(static_cast<const char*>(address), _MM_HINT_T0);
635 // '_MM_HINT_T0' fetches data to all levels of cache.
636#else
637 __prefetch(address);
638#endif
639
640#elif defined(BSLS_PLATFORM_CMP_HP)
641
642 _Asm_lfetch_excl(_LFTYPE_NONE, _LFHINT_NTA, address);
643
644#else
645
646 // no-op
647
648#endif
649}
650
651// This function must be inlined for the pragma to take effect on the branch
652// prediction in IBM xlC.
653
655inline
657{
658#if defined(BDE_BUILD_TARGET_OPT) && defined(BSLS_PLATFORM_CMP_IBM)
659
660#pragma execution_frequency(very_low)
661
662#endif
663}
664
665} // close package namespace
666
667#ifndef BDE_OPENSOURCE_PUBLICATION // BACKWARD_COMPATIBILITY
668// ============================================================================
669// BACKWARD COMPATIBILITY
670// ============================================================================
671
672/// This alias is defined for backward compatibility.
674#endif // BDE_OPENSOURCE_PUBLICATION -- BACKWARD_COMPATIBILITY
675
676
677
678#endif
679
680// ----------------------------------------------------------------------------
681// Copyright 2013 Bloomberg Finance L.P.
682//
683// Licensed under the Apache License, Version 2.0 (the "License");
684// you may not use this file except in compliance with the License.
685// You may obtain a copy of the License at
686//
687// http://www.apache.org/licenses/LICENSE-2.0
688//
689// Unless required by applicable law or agreed to in writing, software
690// distributed under the License is distributed on an "AS IS" BASIS,
691// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
692// See the License for the specific language governing permissions and
693// limitations under the License.
694// ----------------------------- END-OF-FILE ----------------------------------
695
696/** @} */
697/** @} */
698/** @} */
#define BSLS_IDENT(str)
Definition bsls_ident.h:195
bsls::PerformanceHint bsls_PerformanceHint
This alias is defined for backward compatibility.
Definition bsls_performancehint.h:673
#define BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD
Definition bsls_performancehint.h:469
Definition bdlt_iso8601util.h:691
Definition bsls_performancehint.h:526
static void rarelyCalled()
static void prefetchForWriting(void *address)
Definition bsls_performancehint.h:617
static BSLS_PERFORMANCEHINT_ATTRIBUTE_COLD void lowFrequency()
Definition bsls_performancehint.h:656
static void prefetchForReading(const void *address)
Definition bsls_performancehint.h:582