parallel_reduce.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include <new>
00025 #include "task.h"
00026 #include "aligned_space.h"
00027 #include "partitioner.h"
00028 #include "tbb_profiling.h"
00029 
00030 namespace tbb {
00031 
00032 namespace interface6 {
00034 namespace internal {
00035 
00036     using namespace tbb::internal;
00037 
00039 
00040     typedef char reduction_context;
00041 
00043 
00044     template<typename Body>
00045     class finish_reduce: public flag_task {
00047         bool has_right_zombie;
00048         const reduction_context my_context;
00049         Body* my_body;
00050         aligned_space<Body,1> zombie_space;
00051         finish_reduce( reduction_context context_ ) : 
00052             has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
00053             my_context(context_),
00054             my_body(NULL)
00055         {
00056         }
00057         task* execute() {
00058             if( has_right_zombie ) {
00059                 // Right child was stolen.
00060                 Body* s = zombie_space.begin();
00061                 my_body->join( *s );
00062                 s->~Body();
00063             }
00064             if( my_context==1 )  // left child
00065                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
00066             return NULL;
00067         }
00068         template<typename Range,typename Body_, typename Partitioner>
00069         friend class start_reduce;
00070     };
00071 
00073 
00074     template<typename Range, typename Body, typename Partitioner>
00075     class start_reduce: public task {
00076         typedef finish_reduce<Body> finish_type;
00077         Body* my_body;
00078         Range my_range;
00079         typename Partitioner::task_partition_type my_partition;
00080         reduction_context my_context; // TODO: factor out into start_reduce_base
00081         /*override*/ task* execute();
00082         template<typename Body_>
00083         friend class finish_reduce;
00084     
00085 public:
00087         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00088             my_body(body),
00089             my_range(range),
00090             my_partition(partitioner),
00091             my_context(0)
00092         {
00093         }
00095 
00096         start_reduce( start_reduce& parent_, split ) :
00097             my_body(parent_.my_body),
00098             my_range(parent_.my_range,split()),
00099             my_partition(parent_.my_partition,split()),
00100             my_context(2)
00101         {
00102             my_partition.set_affinity(*this);
00103             parent_.my_context = 1;
00104         }
00106 
00107         start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
00108             my_body(parent_.my_body),
00109             my_range(r),
00110             my_partition(parent_.my_partition,split()),
00111             my_context(2) // right leaf mark
00112         {
00113             my_partition.set_affinity(*this);
00114             my_partition.align_depth( d );
00115             parent_.my_context = 1; // left leaf mark
00116         }
00118         /*override*/ void note_affinity( affinity_id id ) {
00119             my_partition.note_affinity( id );
00120         }
00121         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00122             if( !range.empty() ) {
00123 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00124                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00125 #else
00126                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00127                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00128                 task_group_context context;
00129                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00130 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00131             }
00132         }
00133 #if __TBB_TASK_GROUP_CONTEXT
00134         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00135             if( !range.empty() ) 
00136                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00137         }
00138 #endif /* __TBB_TASK_GROUP_CONTEXT */
00140         finish_type *create_continuation() {
00141             return new( allocate_continuation() ) finish_type(my_context);
00142         }
00144         void run_body( Range &r ) { (*my_body)( r ); }
00145     };
00146     template<typename Range, typename Body, typename Partitioner>
00147     task* start_reduce<Range,Body,Partitioner>::execute() {
00148         my_partition.check_being_stolen( *this );
00149         if( my_context==2 ) { // right child
00150             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00151             if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
00152                 my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
00153                 parent_ptr->has_right_zombie = true;
00154             }
00155         } else __TBB_ASSERT(my_context==0,0);// because left leaf spawns right leafs without recycling
00156         my_partition.execute(*this, my_range);
00157         if( my_context==1 ) {
00158             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00159             __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),0);
00160             itt_store_word_with_release(parent_ptr->my_body, my_body );
00161         }
00162         return NULL;
00163     }
00164 
00165 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00167 
00168     template<typename Body>
00169     class finish_deterministic_reduce: public task {
00170         Body &my_left_body;
00171         Body my_right_body;
00172 
00173         finish_deterministic_reduce( Body &body ) :
00174             my_left_body( body ),
00175             my_right_body( body, split() )
00176         {
00177         }
00178         task* execute() {
00179             my_left_body.join( my_right_body );
00180             return NULL;
00181         }
00182         template<typename Range,typename Body_>
00183         friend class start_deterministic_reduce;
00184     };
00185 
00187 
00188     template<typename Range, typename Body>
00189     class start_deterministic_reduce: public task {
00190         typedef finish_deterministic_reduce<Body> finish_type;
00191         Body &my_body;
00192         Range my_range;
00193         /*override*/ task* execute();
00194 
00196         start_deterministic_reduce( const Range& range, Body& body ) :
00197             my_body( body ),
00198             my_range( range )
00199         {
00200         }
00202 
00203         start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
00204             my_body( c.my_right_body ),
00205             my_range( parent_.my_range, split() )
00206         {
00207         }
00208 
00209 public:
00210         static void run( const Range& range, Body& body ) {
00211             if( !range.empty() ) {
00212 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00213                 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
00214 #else
00215                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00216                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00217                 task_group_context context;
00218                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00219 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00220             }
00221         }
00222 #if __TBB_TASK_GROUP_CONTEXT
00223         static void run( const Range& range, Body& body, task_group_context& context ) {
00224             if( !range.empty() ) 
00225                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00226         }
00227 #endif /* __TBB_TASK_GROUP_CONTEXT */
00228     };
00229 
00230     template<typename Range, typename Body>
00231     task* start_deterministic_reduce<Range,Body>::execute() {
00232         if( !my_range.is_divisible() ) {
00233             my_body( my_range );
00234             return NULL;
00235         } else {
00236             finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
00237             recycle_as_child_of(c);
00238             c.set_ref_count(2);
00239             start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
00240             task::spawn(b);
00241             return this;
00242         }
00243     }
00244 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00245 } // namespace internal
00247 } //namespace interfaceX
00248 
00250 namespace internal {
00251     using interface6::internal::start_reduce;
00252 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00253     using interface6::internal::start_deterministic_reduce;
00254 #endif
00256 
00260     template<typename Range, typename Value, typename RealBody, typename Reduction>
00261     class lambda_reduce_body {
00262 
00263 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00264 //       (might require some performance measurements)
00265 
00266         const Value&     identity_element;
00267         const RealBody&  my_real_body;
00268         const Reduction& my_reduction;
00269         Value            my_value;
00270         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
00271     public:
00272         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00273             : identity_element(identity)
00274             , my_real_body(body)
00275             , my_reduction(reduction)
00276             , my_value(identity)
00277         { }
00278         lambda_reduce_body( const lambda_reduce_body& other )
00279             : identity_element(other.identity_element)
00280             , my_real_body(other.my_real_body)
00281             , my_reduction(other.my_reduction)
00282             , my_value(other.my_value)
00283         { }
00284         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00285             : identity_element(other.identity_element)
00286             , my_real_body(other.my_real_body)
00287             , my_reduction(other.my_reduction)
00288             , my_value(other.identity_element)
00289         { }
00290         void operator()(Range& range) {
00291             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00292         }
00293         void join( lambda_reduce_body& rhs ) {
00294             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00295         }
00296         Value result() const {
00297             return my_value;
00298         }
00299     };
00300 
00301 } // namespace internal
00303 
00304 // Requirements on Range concept are documented in blocked_range.h
00305 
00324 
00326 
00327 template<typename Range, typename Body>
00328 void parallel_reduce( const Range& range, Body& body ) {
00329     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00330 }
00331 
00333 
00334 template<typename Range, typename Body>
00335 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00336     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00337 }
00338 
00340 
00341 template<typename Range, typename Body>
00342 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00343     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00344 }
00345 
00347 
00348 template<typename Range, typename Body>
00349 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00350     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00351 }
00352 
00353 #if __TBB_TASK_GROUP_CONTEXT
00355 
00356 template<typename Range, typename Body>
00357 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00358     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00359 }
00360 
00362 
00363 template<typename Range, typename Body>
00364 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00365     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00366 }
00367 
00369 
00370 template<typename Range, typename Body>
00371 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00372     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00373 }
00374 #endif /* __TBB_TASK_GROUP_CONTEXT */
00375 
00379 
00380 
00381 template<typename Range, typename Value, typename RealBody, typename Reduction>
00382 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00383     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00384     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
00385                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00386     return body.result();
00387 }
00388 
00390 
00391 template<typename Range, typename Value, typename RealBody, typename Reduction>
00392 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00393                        const simple_partitioner& partitioner ) {
00394     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00395     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00396                           ::run(range, body, partitioner );
00397     return body.result();
00398 }
00399 
00401 
00402 template<typename Range, typename Value, typename RealBody, typename Reduction>
00403 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00404                        const auto_partitioner& partitioner ) {
00405     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00406     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00407                           ::run( range, body, partitioner );
00408     return body.result();
00409 }
00410 
00412 
00413 template<typename Range, typename Value, typename RealBody, typename Reduction>
00414 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00415                        affinity_partitioner& partitioner ) {
00416     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00417     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00418                                         ::run( range, body, partitioner );
00419     return body.result();
00420 }
00421 
00422 #if __TBB_TASK_GROUP_CONTEXT
00424 
00425 template<typename Range, typename Value, typename RealBody, typename Reduction>
00426 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00427                        const simple_partitioner& partitioner, task_group_context& context ) {
00428     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00429     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00430                           ::run( range, body, partitioner, context );
00431     return body.result();
00432 }
00433 
00435 
00436 template<typename Range, typename Value, typename RealBody, typename Reduction>
00437 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00438                        const auto_partitioner& partitioner, task_group_context& context ) {
00439     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00440     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00441                           ::run( range, body, partitioner, context );
00442     return body.result();
00443 }
00444 
00446 
00447 template<typename Range, typename Value, typename RealBody, typename Reduction>
00448 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00449                        affinity_partitioner& partitioner, task_group_context& context ) {
00450     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00451     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00452                                         ::run( range, body, partitioner, context );
00453     return body.result();
00454 }
00455 #endif /* __TBB_TASK_GROUP_CONTEXT */
00456 
00457 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00459 
00460 template<typename Range, typename Body>
00461 void parallel_deterministic_reduce( const Range& range, Body& body ) {
00462     internal::start_deterministic_reduce<Range,Body>::run( range, body );
00463 }
00464 
00465 #if __TBB_TASK_GROUP_CONTEXT
00467 
00468 template<typename Range, typename Body>
00469 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
00470     internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
00471 }
00472 #endif /* __TBB_TASK_GROUP_CONTEXT */
00473 
00477 
00478 
00479 template<typename Range, typename Value, typename RealBody, typename Reduction>
00480 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00481     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00482     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00483                           ::run(range, body);
00484     return body.result();
00485 }
00486 
00487 #if __TBB_TASK_GROUP_CONTEXT
00489 
00490 template<typename Range, typename Value, typename RealBody, typename Reduction>
00491 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00492                        task_group_context& context ) {
00493     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00494     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00495                           ::run( range, body, context );
00496     return body.result();
00497 }
00498 #endif /* __TBB_TASK_GROUP_CONTEXT */
00499 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00500 
00501 
00502 } // namespace tbb
00503 
00504 #endif /* __TBB_parallel_reduce_H */
00505 

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.