00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_partitioner_H
00022 #define __TBB_partitioner_H
00023
00024 #ifndef __TBB_INITIAL_CHUNKS
00025 #define __TBB_INITIAL_CHUNKS 2
00026 #endif
00027 #ifndef __TBB_RANGE_POOL_CAPACITY
00028 #define __TBB_RANGE_POOL_CAPACITY 8
00029 #endif
00030 #ifndef __TBB_INIT_DEPTH
00031 #define __TBB_INIT_DEPTH 5
00032 #endif
00033
00034 #include "task.h"
00035 #include "aligned_space.h"
00036 #include "atomic.h"
00037
00038 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00039
00040 #pragma warning (push)
00041 #pragma warning (disable: 4244)
00042 #endif
00043
00044 namespace tbb {
00045
00046 class auto_partitioner;
00047 class simple_partitioner;
00048 class affinity_partitioner;
00049 namespace interface6 {
00050 namespace internal {
00051 class affinity_partition_type;
00052 }
00053 }
00054
00055 namespace internal {
00056 size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
00057
00059 class affinity_partitioner_base_v3: no_copy {
00060 friend class tbb::affinity_partitioner;
00061 friend class tbb::interface6::internal::affinity_partition_type;
00063
00064 affinity_id* my_array;
00066 size_t my_size;
00068 affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
00070 ~affinity_partitioner_base_v3() {resize(0);}
00072
00073 void __TBB_EXPORTED_METHOD resize( unsigned factor );
00074 };
00075
00077 class partition_type_base {
00078 public:
00079 void set_affinity( task & ) {}
00080 void note_affinity( task::affinity_id ) {}
00081 task* continue_after_execute_range() {return NULL;}
00082 bool decide_whether_to_delay() {return false;}
00083 void spawn_or_delay( bool, task& b ) {
00084 task::spawn(b);
00085 }
00086 };
00087
00088 template<typename Range, typename Body, typename Partitioner> class start_scan;
00089
00090 }
00092
00093 namespace serial {
00094 namespace interface6 {
00095 template<typename Range, typename Body, typename Partitioner> class start_for;
00096 }
00097 }
00098
00099 namespace interface6 {
00101 namespace internal {
00102 using namespace tbb::internal;
00103 template<typename Range, typename Body, typename Partitioner> class start_for;
00104 template<typename Range, typename Body, typename Partitioner> class start_reduce;
00105
00107 class flag_task: public task {
00108 public:
00109 tbb::atomic<bool> child_stolen;
00110 flag_task() { child_stolen = false; }
00111 task* execute() { return NULL; }
00112 };
00113
00115 class signal_task: public task {
00116 public:
00117 task* execute() {
00118 if( is_stolen_task() ) {
00119 static_cast<flag_task*>(parent())->child_stolen = true;
00120 }
00121 return NULL;
00122 }
00123 };
00124
00128 typedef unsigned char depth_t;
00129
00131 template <typename T, depth_t MaxCapacity>
00132 class range_vector {
00133 depth_t my_head;
00134 depth_t my_tail;
00135 depth_t my_size;
00136 depth_t my_depth[MaxCapacity];
00137 tbb::aligned_space<T, MaxCapacity> my_pool;
00138
00139 public:
00141 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
00142 my_depth[0] = 0;
00143 new( my_pool.begin() ) T(elem);
00144 }
00145 ~range_vector() {
00146 while( !empty() ) pop_back();
00147 }
00148 bool empty() const { return my_size == 0; }
00149 depth_t size() const { return my_size; }
00152 void split_to_fill(depth_t max_depth) {
00153 while( my_size < MaxCapacity && my_depth[my_head] < max_depth
00154 && my_pool.begin()[my_head].is_divisible() ) {
00155 depth_t prev = my_head;
00156 my_head = (my_head + 1) % MaxCapacity;
00157 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]);
00158 my_pool.begin()[prev].~T();
00159 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split());
00160 my_depth[my_head] = ++my_depth[prev];
00161 my_size++;
00162 }
00163 }
00164 void pop_back() {
00165 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
00166 my_pool.begin()[my_head].~T();
00167 my_size--;
00168 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
00169 }
00170 void pop_front() {
00171 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
00172 my_pool.begin()[my_tail].~T();
00173 my_size--;
00174 my_tail = (my_tail + 1) % MaxCapacity;
00175 }
00176 T& back() {
00177 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
00178 return my_pool.begin()[my_head];
00179 }
00180 T& front() {
00181 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
00182 return my_pool.begin()[my_tail];
00183 }
00185 depth_t front_depth() {
00186 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
00187 return my_depth[my_tail];
00188 }
00189 };
00190
00192 template <typename Partition>
00193 struct partition_type_base {
00194
00195 void set_affinity( task & ) {}
00196 void note_affinity( task::affinity_id ) {}
00197 bool check_being_stolen(task &) { return false; }
00198 bool check_for_demand(task &) { return false; }
00199 bool divisions_left() { return true; }
00200 bool should_create_trap() { return false; }
00201 depth_t max_depth() { return 0; }
00202 void align_depth(depth_t) { }
00203
00204 Partition& derived() { return *static_cast<Partition*>(this); }
00205 template<typename StartType>
00206 flag_task* split_work(StartType &start) {
00207 flag_task* parent_ptr = start.create_continuation();
00208 start.set_parent(parent_ptr);
00209 parent_ptr->set_ref_count(2);
00210 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, split());
00211 start.spawn(right_work);
00212 return parent_ptr;
00213 }
00214 template<typename StartType, typename Range>
00215 void execute(StartType &start, Range &range) {
00216
00217
00218
00219
00220
00221
00222
00223 task* parent_ptr = start.parent();
00224 if( range.is_divisible() ) {
00225 if( derived().divisions_left() )
00226 do parent_ptr = split_work(start);
00227 while( range.is_divisible() && derived().divisions_left() );
00228 if( derived().should_create_trap() ) {
00229 if( parent_ptr->ref_count() > 1 ) {
00230 parent_ptr = start.create_continuation();
00231 start.set_parent(parent_ptr);
00232 } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
00233 parent_ptr->set_ref_count(2);
00234 signal_task& right_signal = *new( parent_ptr->allocate_child() ) signal_task();
00235 start.spawn(right_signal);
00236 }
00237 }
00238 if( !range.is_divisible() || !derived().max_depth() )
00239 start.run_body( range );
00240 else {
00241 internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
00242 do {
00243 range_pool.split_to_fill(derived().max_depth());
00244 if( derived().check_for_demand( start ) ) {
00245 if( range_pool.size() > 1 ) {
00246 parent_ptr = start.create_continuation();
00247 start.set_parent(parent_ptr);
00248 parent_ptr->set_ref_count(2);
00249 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, range_pool.front(), range_pool.front_depth());
00250 start.spawn(right_work);
00251 range_pool.pop_front();
00252 continue;
00253 }
00254 if( range_pool.back().is_divisible() )
00255 continue;
00256 }
00257 start.run_body( range_pool.back() );
00258 range_pool.pop_back();
00259 } while( !range_pool.empty() && !start.is_cancelled() );
00260 }
00261 }
00262 };
00263
00265 template <typename Partition>
00266 struct auto_partition_type_base : partition_type_base<Partition> {
00267 size_t my_divisor;
00268 depth_t my_max_depth;
00269 auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
00270 my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4;
00271 __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
00272 }
00273 auto_partition_type_base(auto_partition_type_base &src, split) {
00274 my_max_depth = src.my_max_depth;
00275 #if __TBB_INITIAL_TASK_IMBALANCE
00276 if( src.my_divisor <= 1 ) my_divisor = 0;
00277 else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
00278 #else
00279 my_divisor = src.my_divisor / 2u;
00280 src.my_divisor = src.my_divisor - my_divisor;
00281 if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor/my_divisor));
00282 #endif
00283 }
00284 bool check_being_stolen( task &t) {
00285 if( !my_divisor ) {
00286 my_divisor = 1;
00287 if( t.is_stolen_task() ) {
00288 #if TBB_USE_EXCEPTIONS
00289
00290 __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
00291
00292
00293
00294 #endif
00295 static_cast<flag_task*>(t.parent())->child_stolen = true;
00296 my_max_depth++;
00297 return true;
00298 }
00299 }
00300 return false;
00301 }
00302 bool divisions_left() {
00303 if( my_divisor > 1 ) return true;
00304 if( my_divisor && my_max_depth > 1 ) {
00305
00306 my_max_depth--;
00307 my_divisor = 0;
00308 return true;
00309 } else return false;
00310 }
00311 bool should_create_trap() {
00312 return my_divisor > 0;
00313 }
00314 bool check_for_demand(task &t) {
00315 if( static_cast<flag_task*>(t.parent())->child_stolen ) {
00316 my_max_depth++;
00317 return true;
00318 } else return false;
00319 }
00320 void align_depth(depth_t base) {
00321 __TBB_ASSERT(base <= my_max_depth, 0);
00322 my_max_depth -= base;
00323 }
00324 depth_t max_depth() { return my_max_depth; }
00325 };
00326
00328 class affinity_partition_type : public auto_partition_type_base<affinity_partition_type> {
00329 static const unsigned factor_power = 4;
00330 static const unsigned factor = 1<<factor_power;
00331 bool my_delay;
00332 unsigned map_begin, map_end, map_mid;
00333 tbb::internal::affinity_id* my_array;
00334 void set_mid() {
00335 unsigned d = (map_end - map_begin)/2;
00336 if( d > factor )
00337 d &= 0u-factor;
00338 map_mid = map_end - d;
00339 }
00340 public:
00341 affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) {
00342 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
00343 ap.resize(factor);
00344 my_array = ap.my_array;
00345 map_begin = 0;
00346 map_end = unsigned(ap.my_size);
00347 set_mid();
00348 my_delay = true;
00349 my_divisor /= __TBB_INITIAL_CHUNKS;
00350 my_max_depth = factor_power+1;
00351 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
00352 }
00353 affinity_partition_type(affinity_partition_type& p, split)
00354 : auto_partition_type_base<affinity_partition_type>(p, split()), my_array(p.my_array) {
00355 __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
00356 map_end = p.map_end;
00357 map_begin = p.map_end = p.map_mid;
00358 set_mid(); p.set_mid();
00359 my_delay = p.my_delay;
00360 }
00361 void set_affinity( task &t ) {
00362 if( map_begin<map_end )
00363 t.set_affinity( my_array[map_begin] );
00364 }
00365 void note_affinity( task::affinity_id id ) {
00366 if( map_begin<map_end )
00367 my_array[map_begin] = id;
00368 }
00369 bool check_for_demand( task &t ) {
00370 if( !my_delay ) {
00371 if( map_mid<map_end ) {
00372 __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
00373 return true;
00374 }
00375 if( static_cast<flag_task*>(t.parent())->child_stolen ) {
00376 my_max_depth++;
00377 return true;
00378 }
00379 } else my_delay = false;
00380 return false;
00381 }
00382 bool divisions_left() {
00383 return my_divisor > 1;
00384 }
00385 bool should_create_trap() {
00386 return true;
00387 }
00388 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00389 };
00390
00391 class auto_partition_type: public auto_partition_type_base<auto_partition_type> {
00392 public:
00393 auto_partition_type( const auto_partitioner& ) {}
00394 auto_partition_type( auto_partition_type& src, split)
00395 : auto_partition_type_base<auto_partition_type>(src, split()) {}
00396 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00397 };
00398
00399 class simple_partition_type: public partition_type_base<simple_partition_type> {
00400 public:
00401 simple_partition_type( const simple_partitioner& ) {}
00402 simple_partition_type( const simple_partition_type&, split ) {}
00404 template<typename StartType, typename Range>
00405 void execute(StartType &start, Range &range) {
00406 while( range.is_divisible() )
00407 split_work( start );
00408 start.run_body( range );
00409 }
00410
00411 };
00412
00414 class old_auto_partition_type: public tbb::internal::partition_type_base {
00415 size_t num_chunks;
00416 static const size_t VICTIM_CHUNKS = 4;
00417 public:
00418 bool should_execute_range(const task &t) {
00419 if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
00420 num_chunks = VICTIM_CHUNKS;
00421 return num_chunks==1;
00422 }
00423 old_auto_partition_type( const auto_partitioner& )
00424 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00425 old_auto_partition_type( const affinity_partitioner& )
00426 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00427 old_auto_partition_type( old_auto_partition_type& pt, split ) {
00428 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
00429 }
00430 };
00431
00432 }
00434 }
00435
00437
00439 class simple_partitioner {
00440 public:
00441 simple_partitioner() {}
00442 private:
00443 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00444 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00445 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00446 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00447
00448 class partition_type: public internal::partition_type_base {
00449 public:
00450 bool should_execute_range(const task& ) {return false;}
00451 partition_type( const simple_partitioner& ) {}
00452 partition_type( const partition_type&, split ) {}
00453 };
00454
00455 typedef interface6::internal::simple_partition_type task_partition_type;
00456 };
00457
00459
00462 class auto_partitioner {
00463 public:
00464 auto_partitioner() {}
00465
00466 private:
00467 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00468 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00469 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00470 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00471
00472 typedef interface6::internal::old_auto_partition_type partition_type;
00473
00474 typedef interface6::internal::auto_partition_type task_partition_type;
00475 };
00476
00478 class affinity_partitioner: internal::affinity_partitioner_base_v3 {
00479 public:
00480 affinity_partitioner() {}
00481
00482 private:
00483 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00484 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00485 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00486 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00487
00488 typedef interface6::internal::old_auto_partition_type partition_type;
00489
00490 typedef interface6::internal::affinity_partition_type task_partition_type;
00491 };
00492
00493 }
00494
00495 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00496 #pragma warning (pop)
00497 #endif // warning 4244 is back
00498 #undef __TBB_INITIAL_CHUNKS
00499 #undef __TBB_RANGE_POOL_CAPACITY
00500 #undef __TBB_INIT_DEPTH
00501 #endif