tbb_machine.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00099 #include "tbb_stddef.h"
00100 
00101 namespace tbb {
00102 namespace internal {
00103 
00105 // Overridable helpers declarations
00106 //
00107 // A machine/*.h file may choose to define these templates, otherwise it must
00108 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
00109 //
00110 template <typename T, std::size_t S>
00111 struct machine_load_store;
00112 
00113 template <typename T, std::size_t S>
00114 struct machine_load_store_relaxed;
00115 
00116 template <typename T, std::size_t S>
00117 struct machine_load_store_seq_cst;
00118 //
00119 // End of overridable helpers declarations
00121 
00122 template<size_t S> struct atomic_selector;
00123 
00124 template<> struct atomic_selector<1> {
00125     typedef int8_t word;
00126     inline static word fetch_store ( volatile void* location, word value );
00127 };
00128 
00129 template<> struct atomic_selector<2> {
00130     typedef int16_t word;
00131     inline static word fetch_store ( volatile void* location, word value );
00132 };
00133 
00134 template<> struct atomic_selector<4> {
00135 #if _MSC_VER && !_WIN64
00136     // Work-around that avoids spurious /Wp64 warnings
00137     typedef intptr_t word;
00138 #else
00139     typedef int32_t word;
00140 #endif
00141     inline static word fetch_store ( volatile void* location, word value );
00142 };
00143 
00144 template<> struct atomic_selector<8> {
00145     typedef int64_t word;
00146     inline static word fetch_store ( volatile void* location, word value );
00147 };
00148 
00149 }} // namespaces internal, tbb
00150 
00151 #if _WIN32||_WIN64
00152 
00153 #ifdef _MANAGED
00154 #pragma managed(push, off)
00155 #endif
00156 
00157     #if __MINGW64__ || __MINGW32__
00158         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00159         #define __TBB_Yield()  SwitchToThread()
00160         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00161             #include "machine/gcc_generic.h"
00162         #elif __MINGW64__
00163             #include "machine/linux_intel64.h"
00164         #elif __MINGW32__
00165             #include "machine/linux_ia32.h"
00166         #endif
00167     #elif defined(_M_IX86)
00168         #include "machine/windows_ia32.h"
00169     #elif defined(_M_X64) 
00170         #include "machine/windows_intel64.h"
00171     #elif _XBOX
00172         #include "machine/xbox360_ppc.h"
00173     #endif
00174 
00175 #ifdef _MANAGED
00176 #pragma managed(pop)
00177 #endif
00178 
00179 #elif __linux__ || __FreeBSD__ || __NetBSD__
00180 
00181     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00182         #include "machine/gcc_generic.h"
00183     #elif __i386__
00184         #include "machine/linux_ia32.h"
00185     #elif __x86_64__
00186         #include "machine/linux_intel64.h"
00187     #elif __ia64__
00188         #include "machine/linux_ia64.h"
00189     #elif __powerpc__
00190         #include "machine/mac_ppc.h"
00191     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00192         #include "machine/gcc_generic.h"
00193     #endif
00194     #include "machine/linux_common.h"
00195 
00196 #elif __APPLE__
00197 
00198     #if __i386__
00199         #include "machine/linux_ia32.h"
00200     #elif __x86_64__
00201         #include "machine/linux_intel64.h"
00202     #elif __POWERPC__
00203         #include "machine/mac_ppc.h"
00204     #endif
00205     #include "machine/macos_common.h"
00206 
00207 #elif _AIX
00208 
00209     #include "machine/ibm_aix51.h"
00210 
00211 #elif __sun || __SUNPRO_CC
00212 
00213     #define __asm__ asm
00214     #define __volatile__ volatile
00215 
00216     #if __i386  || __i386__
00217         #include "machine/linux_ia32.h"
00218     #elif __x86_64__
00219         #include "machine/linux_intel64.h"
00220     #elif __sparc
00221         #include "machine/sunos_sparc.h"
00222     #endif
00223     #include <sched.h>
00224 
00225     #define __TBB_Yield() sched_yield()
00226 
00227 #endif /* OS selection */
00228 
00229 #ifndef __TBB_64BIT_ATOMICS
00230     #define __TBB_64BIT_ATOMICS 1
00231 #endif
00232 
00233 // Special atomic functions
00234 #if __TBB_USE_FENCED_ATOMICS
00235     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
00236     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
00237     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
00238     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
00239 
00240     #if __TBB_WORDSIZE==8
00241         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
00242         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
00243         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
00244         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
00245         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
00246     #else
00247         #error Define macros for 4-byte word, similarly to the above __TBB_WORDSIZE==8 branch.
00248     #endif /* __TBB_WORDSIZE==4 */
00249 #else /* !__TBB_USE_FENCED_ATOMICS */
00250     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
00251     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
00252     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
00253 #endif /* !__TBB_USE_FENCED_ATOMICS */
00254 
00255 #if __TBB_WORDSIZE==4
00256     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
00257     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
00258     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
00259 #elif  __TBB_WORDSIZE==8
00260     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00261         #error These macros should only be used on 32-bit platforms.
00262     #endif
00263 
00264     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
00265     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
00266     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
00267 #else /* __TBB_WORDSIZE != 8 */
00268     #error Unsupported machine word size.
00269 #endif /* __TBB_WORDSIZE */
00270 
00271 #ifndef __TBB_Pause
00272     inline void __TBB_Pause(int32_t) {
00273         __TBB_Yield();
00274     }
00275 #endif
00276 
00277 namespace tbb {
00278 
00280 inline void atomic_fence () { __TBB_full_memory_fence(); }
00281 
00282 namespace internal {
00283 
00285 
00286 class atomic_backoff : no_copy {
00288 
00290     static const int32_t LOOPS_BEFORE_YIELD = 16;
00291     int32_t count;
00292 public:
00293     atomic_backoff() : count(1) {}
00294 
00296     void pause() {
00297         if( count<=LOOPS_BEFORE_YIELD ) {
00298             __TBB_Pause(count);
00299             // Pause twice as long the next time.
00300             count*=2;
00301         } else {
00302             // Pause is so long that we might as well yield CPU to scheduler.
00303             __TBB_Yield();
00304         }
00305     }
00306 
00307     // pause for a few times and then return false immediately.
00308     bool bounded_pause() {
00309         if( count<=LOOPS_BEFORE_YIELD ) {
00310             __TBB_Pause(count);
00311             // Pause twice as long the next time.
00312             count*=2;
00313             return true;
00314         } else {
00315             return false;
00316         }
00317     }
00318 
00319     void reset() {
00320         count = 1;
00321     }
00322 };
00323 
00325 
00326 template<typename T, typename U>
00327 void spin_wait_while_eq( const volatile T& location, U value ) {
00328     atomic_backoff backoff;
00329     while( location==value ) backoff.pause();
00330 }
00331 
00333 
00334 template<typename T, typename U>
00335 void spin_wait_until_eq( const volatile T& location, const U value ) {
00336     atomic_backoff backoff;
00337     while( location!=value ) backoff.pause();
00338 }
00339 
00340 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00341 // S should be either 1 or 2, for the mask calculation to work correctly.
00342 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00343 template<size_t S, typename T>
00344 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00345     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00346 #if __TBB_BIG_ENDIAN
00347     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00348 #else
00349     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00350 #endif
00351     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00352     atomic_backoff b;
00353     uint32_t result;
00354     for(;;) {
00355         result = *base; // reload the base value which might change during the pause
00356         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00357         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00358         // __TBB_CompareAndSwap4 presumed to have full fence.
00359         // Cast shuts up /Wp64 warning
00360         result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_value );
00361         if(  result==old_value               // CAS succeeded
00362           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00363             break;
00364         else                                 // CAS failed but the bits of interest left unchanged
00365             b.pause();
00366     }
00367     return T((result & mask) >> bitoffset);
00368 }
00369 
00370 template<size_t S, typename T>
00371 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
00372 
00373 template<>
00374 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00375 #if __TBB_USE_GENERIC_PART_WORD_CAS
00376     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00377 #else
00378     return __TBB_machine_cmpswp1(ptr,value,comparand);
00379 #endif
00380 }
00381 
00382 template<>
00383 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00384 #if __TBB_USE_GENERIC_PART_WORD_CAS
00385     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00386 #else
00387     return __TBB_machine_cmpswp2(ptr,value,comparand);
00388 #endif
00389 }
00390 
00391 template<>
00392 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00393     // Cast shuts up /Wp64 warning
00394     return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
00395 }
00396 
00397 #if __TBB_64BIT_ATOMICS
00398 template<>
00399 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00400     return __TBB_machine_cmpswp8(ptr,value,comparand);
00401 }
00402 #endif
00403 
00404 template<size_t S, typename T>
00405 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00406     atomic_backoff b;
00407     T result;
00408     for(;;) {
00409         result = *reinterpret_cast<volatile T *>(ptr);
00410         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00411         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00412             break;
00413         b.pause();
00414     }
00415     return result;
00416 }
00417 
00418 template<size_t S, typename T>
00419 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00420     atomic_backoff b;
00421     T result;
00422     for(;;) {
00423         result = *reinterpret_cast<volatile T *>(ptr);
00424         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00425         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00426             break;
00427         b.pause();
00428     }
00429     return result;
00430 }
00431 
00432 #if __TBB_USE_GENERIC_PART_WORD_CAS
00433 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00434 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00435 #endif
00436 
00437 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
00438 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00439 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00440 #endif
00441 
00442 #if __TBB_USE_GENERIC_FETCH_ADD 
00443 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00444 #endif
00445 
00446 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
00447 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00448 #endif
00449 
00450 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
00451 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00452 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00453 #endif
00454 
00455 #if __TBB_USE_GENERIC_FETCH_STORE 
00456 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00457 #endif
00458 
00459 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00460 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00461 #endif
00462 
00463 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00464 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
00465     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
00466         return __TBB_machine_fetchstore##S( location, value );                                          \
00467     }
00468 
00469 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
00470 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
00471 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
00472 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
00473 
00474 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
00475 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00476 
00477 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
00478 inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
00479     for(;;) {
00480         int64_t result = *(int64_t *)ptr;
00481         if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
00482     }
00483 }
00484 
00485 inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
00486     // Comparand and new value may be anything, they only must be equal, and
00487     // the value should have a low probability to be actually found in 'location'.
00488     const int64_t anyvalue = 2305843009213693951;
00489     return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00490 }
00491 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
00492 
00493 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
00494 
00500 template <typename T, size_t S>
00501 struct machine_load_store {
00502     static T load_with_acquire ( const volatile T& location ) {
00503         T to_return = location;
00504         __TBB_acquire_consistency_helper();
00505         return to_return;
00506     }
00507     static void store_with_release ( volatile T &location, T value ) {
00508         __TBB_release_consistency_helper();
00509         location = value;
00510     }
00511 };
00512 
00513 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00514 template <typename T>
00515 struct machine_load_store<T,8> {
00516     static T load_with_acquire ( const volatile T& location ) {
00517         return (T)__TBB_machine_load8( (const volatile void*)&location );
00518     }
00519     static void store_with_release ( volatile T& location, T value ) {
00520         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00521     }
00522 };
00523 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00524 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
00525 
00526 template <typename T, size_t S>
00527 struct machine_load_store_seq_cst {
00528     static T load ( const volatile T& location ) {
00529         __TBB_full_memory_fence();
00530         return machine_load_store<T,S>::load_with_acquire( location );
00531     }
00532 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00533     static void store ( volatile T &location, T value ) {
00534         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
00535     }
00536 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00537     static void store ( volatile T &location, T value ) {
00538         machine_load_store<T,S>::store_with_release( location, value );
00539         __TBB_full_memory_fence();
00540     }
00541 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00542 };
00543 
00544 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00545 
00547 template <typename T>
00548 struct machine_load_store_seq_cst<T,8> {
00549     static T load ( const volatile T& location ) {
00550         // Comparand and new value may be anything, they only must be equal, and
00551         // the value should have a low probability to be actually found in 'location'.
00552         const int64_t anyvalue = 2305843009213693951ll;
00553         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
00554     }
00555     static void store ( volatile T &location, T value ) {
00556         int64_t result = (volatile int64_t&)location;
00557         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
00558             result = (volatile int64_t&)location;
00559     }
00560 };
00561 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00562 
00563 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
00564 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
00568 template <typename T, size_t S>
00569 struct machine_load_store_relaxed {
00570     static inline T load ( const volatile T& location ) {
00571         return location;
00572     }
00573     static inline void store ( volatile T& location, T value ) {
00574         location = value;
00575     }
00576 };
00577 
00578 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00579 template <typename T>
00580 struct machine_load_store_relaxed<T,8> {
00581     static inline T load ( const volatile T& location ) {
00582         return (T)__TBB_machine_load8( (const volatile void*)&location );
00583     }
00584     static inline void store ( volatile T& location, T value ) {
00585         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00586     }
00587 };
00588 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00589 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
00590 
00591 template<typename T>
00592 inline T __TBB_load_with_acquire(const volatile T &location) {
00593     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
00594 }
00595 template<typename T, typename V>
00596 inline void __TBB_store_with_release(volatile T& location, V value) {
00597     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
00598 }
00600 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00601     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
00602 }
00603 
00604 template<typename T>
00605 inline T __TBB_load_full_fence(const volatile T &location) {
00606     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
00607 }
00608 template<typename T, typename V>
00609 inline void __TBB_store_full_fence(volatile T& location, V value) {
00610     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
00611 }
00613 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
00614     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
00615 }
00616 
00617 template<typename T>
00618 inline T __TBB_load_relaxed (const volatile T& location) {
00619     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
00620 }
00621 template<typename T, typename V>
00622 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
00623     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
00624 }
00626 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
00627     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
00628 }
00629 
00630 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00631 // strict as type T.  The type should have a trivial default constructor and destructor, so that
00632 // arrays of that type can be declared without initializers.
00633 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00634 // to a type bigger than T.
00635 // The default definition here works on machines where integers are naturally aligned and the
00636 // strictest alignment is 64.
00637 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00638 
00639 #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
00640 
00641 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00642 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00643     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00644 } __attribute__((aligned(PowerOf2)));
00645 #define __TBB_alignof(T) __alignof__(T)
00646 
00647 #elif __TBB_DECLSPEC_ALIGN_PRESENT
00648 
00649 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00650 __declspec(align(PowerOf2))                           \
00651 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00652     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00653 };
00654 #define __TBB_alignof(T) __alignof(T)
00655 
00656 #else /* A compiler with unknown syntax for data alignment */
00657 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
00658 #endif
00659 
00660 /* Now declare types aligned to useful powers of two */
00661 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
00662 __TBB_DefineTypeWithAlignment(16)
00663 __TBB_DefineTypeWithAlignment(32)
00664 __TBB_DefineTypeWithAlignment(64)
00665 
00666 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
00667 
00668 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
00669 template<size_t N> struct type_with_alignment;
00670 
00671 // Specializations for allowed alignments
00672 template<> struct type_with_alignment<1> { char member; };
00673 template<> struct type_with_alignment<2> { uint16_t member; };
00674 template<> struct type_with_alignment<4> { uint32_t member; };
00675 template<> struct type_with_alignment<8> { uint64_t member; };
00676 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
00677 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
00678 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
00679 
00680 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN  
00682 
00684 template<size_t Size, typename T>
00685 struct work_around_alignment_bug {
00686     static const size_t alignment = __TBB_alignof(T);
00687 };
00688 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00689 #else
00690 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
00691 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
00692 
00693 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
00694 
00695 // Template class here is to avoid instantiation of the static data for modules that don't use it
00696 template<typename T>
00697 struct reverse {
00698     static const T byte_table[256];
00699 };
00700 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00701 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00702 template<typename T>
00703 const T reverse<T>::byte_table[256] = {
00704     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00705     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00706     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00707     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00708     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00709     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00710     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00711     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00712     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00713     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00714     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00715     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00716     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00717     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00718     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00719     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00720 };
00721 
00722 } // namespace internal
00723 } // namespace tbb
00724 
00725 // Preserving access to legacy APIs
00726 using tbb::internal::__TBB_load_with_acquire;
00727 using tbb::internal::__TBB_store_with_release;
00728 
00729 // Mapping historically used names to the ones expected by atomic_load_store_traits
00730 #define __TBB_load_acquire  __TBB_load_with_acquire
00731 #define __TBB_store_release __TBB_store_with_release
00732 
00733 #ifndef __TBB_Log2
00734 inline intptr_t __TBB_Log2( uintptr_t x ) {
00735     if( x==0 ) return -1;
00736     intptr_t result = 0;
00737     uintptr_t tmp;
00738 #if __TBB_WORDSIZE>=8
00739     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00740 #endif
00741     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00742     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00743     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00744     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00745     return (x&2)? result+1: result;
00746 }
00747 #endif
00748 
00749 #ifndef __TBB_AtomicOR
00750 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00751     tbb::internal::atomic_backoff b;
00752     for(;;) {
00753         uintptr_t tmp = *(volatile uintptr_t *)operand;
00754         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00755         if( result==tmp ) break;
00756         b.pause();
00757     }
00758 }
00759 #endif
00760 
00761 #ifndef __TBB_AtomicAND
00762 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00763     tbb::internal::atomic_backoff b;
00764     for(;;) {
00765         uintptr_t tmp = *(volatile uintptr_t *)operand;
00766         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00767         if( result==tmp ) break;
00768         b.pause();
00769     }
00770 }
00771 #endif
00772 
00773 #ifndef __TBB_Flag
00774 typedef unsigned char __TBB_Flag;
00775 #endif
00776 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
00777 
00778 #ifndef __TBB_TryLockByte
00779 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
00780     return __TBB_machine_cmpswp1(&flag,1,0)==0;
00781 }
00782 #endif
00783 
00784 #ifndef __TBB_LockByte
00785 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
00786     if ( !__TBB_TryLockByte(flag) ) {
00787         tbb::internal::atomic_backoff b;
00788         do {
00789             b.pause();
00790         } while ( !__TBB_TryLockByte(flag) );
00791     }
00792     return 0;
00793 }
00794 #endif
00795 
00796 #define __TBB_UnlockByte __TBB_store_with_release
00797 
00798 #ifndef __TBB_ReverseByte
00799 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00800     return tbb::internal::reverse<unsigned char>::byte_table[src];
00801 }
00802 #endif
00803 
00804 template<typename T>
00805 T __TBB_ReverseBits(T src) {
00806     T dst;
00807     unsigned char *original = (unsigned char *) &src;
00808     unsigned char *reversed = (unsigned char *) &dst;
00809 
00810     for( int i = sizeof(T)-1; i >= 0; i-- )
00811         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00812 
00813     return dst;
00814 }
00815 
00816 #endif /* __TBB_machine_H */

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.