Logo Search packages:      
Sourcecode: tbb version File versions  Download package

parallel_scan.h

/*
    Copyright 2005-2007 Intel Corporation.  All Rights Reserved.

    This file is part of Threading Building Blocks.

    Threading Building Blocks is free software; you can redistribute it
    and/or modify it under the terms of the GNU General Public License
    version 2 as published by the Free Software Foundation.

    Threading Building Blocks is distributed in the hope that it will be
    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Threading Building Blocks; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

    As a special exception, you may use this file as part of a free software
    library without restriction.  Specifically, if other files instantiate
    templates or use macros or inline functions from this file, or you compile
    this file and link it with other files to produce an executable, this
    file does not by itself cause the resulting executable to be covered by
    the GNU General Public License.  This exception does not however
    invalidate any other reasons why the executable file might be covered by
    the GNU General Public License.
*/

#ifndef __TBB_parallel_scan_H
#define __TBB_parallel_scan_H

#include "task.h"
#include "aligned_space.h"
#include <new>
#include "partitioner.h"

namespace tbb {

//! Used to indicate that the initial scan is being performed.
/** @ingroup algorithms */
00041 struct pre_scan_tag {
    static bool is_final_scan() {return false;}
};

//! Used to indicate that the final scan is being performed.
/** @ingroup algorithms */
00047 struct final_scan_tag {
    static bool is_final_scan() {return true;}
};

//! @cond INTERNAL
namespace internal {

    //! Performs final scan for a leaf 
    /** @ingroup algorithms */
    template<typename Range, typename Body>
    class final_sum: public task {
    public:
        Body body;
    private:
        aligned_space<Range,1> range;
        //! Where to put result of last subrange, or NULL if not last subrange.
        Body* stuff_last;
    public:
        final_sum( Body& body_ ) :
            body(body_,split())
        {
            poison_pointer(stuff_last);
        }
        ~final_sum() {
            range.begin()->~Range();
        }     
        void finish_construction( const Range& range_, Body* stuff_last_ ) {
            new( range.begin() ) Range(range_);
            stuff_last = stuff_last_;
        }
    private:
        /*override*/ task* execute() {
            body( *range.begin(), final_scan_tag() );
            if( stuff_last )
                stuff_last->assign(body);
            return NULL;
        }
    };       

    //! Split work to be done in the scan.
    /** @ingroup algorithms */
    template<typename Range, typename Body>
    class sum_node: public task {
        typedef final_sum<Range,Body> final_sum_type;
    public:
        final_sum_type *incoming; 
        final_sum_type *body;
        Body *stuff_last;
    private:
        final_sum_type *left_sum;
        sum_node *left;
        sum_node *right;     
        bool left_is_final;
        Range range;
        sum_node( const Range range_, bool left_is_final_ ) : 
            left_sum(NULL), 
            left(NULL), 
            right(NULL), 
            left_is_final(left_is_final_), 
            range(range_)
        {
            // Poison fields that will be set by second pass.
            poison_pointer(body);
            poison_pointer(incoming);
        }
        task* create_child( const Range& range, final_sum_type& f, sum_node* n, final_sum_type* incoming, Body* stuff_last ) {
            if( !n ) {
                f.recycle_as_child_of( *this );
                f.finish_construction( range, stuff_last );
                return &f;
            } else {
                n->body = &f;
                n->incoming = incoming;
                n->stuff_last = stuff_last;
                return n;
            }
        }
        /*override*/ task* execute() {
            if( body ) {
                if( incoming )
                    left_sum->body.reverse_join( incoming->body );
                recycle_as_continuation();
                sum_node& c = *this;
                task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
                task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
                set_ref_count( (a!=NULL)+(b!=NULL) );
                body = NULL; 
                if( a ) spawn(*b);
                else a = b;
                return a;
            } else {
                return NULL;
            }
        }
        template<typename Range_,typename Body_,typename Partitioner_>
        friend class start_scan;

        template<typename Range_,typename Body_>
        friend class finish_scan;
    };

    //! Combine partial results
    /** @ingroup algorithms */
    template<typename Range, typename Body>
    class finish_scan: public task {
        typedef sum_node<Range,Body> sum_node_type;
        typedef final_sum<Range,Body> final_sum_type;
        final_sum_type** const sum;
        sum_node_type*& return_slot;
    public:
        final_sum_type* right_zombie;
        sum_node_type& result;

        /*override*/ task* execute() {
            __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
            if( result.left )
                result.left_is_final = false;
            if( right_zombie && sum ) 
                ((*sum)->body).reverse_join(result.left_sum->body);
            __TBB_ASSERT( !return_slot, NULL );
            if( right_zombie || result.right ) {
                return_slot = &result;
            } else {
                destroy( result );
            }
            if( right_zombie && !sum && !result.right ) destroy(*right_zombie);
            return NULL;
        }

        finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
            sum(sum_),
            return_slot(return_slot_), 
            right_zombie(NULL),
            result(result_)
        {
            __TBB_ASSERT( !return_slot, NULL );
        }
    };

    //! Initial task to split the work
    /** @ingroup algorithms */
    template<typename Range, typename Body, typename Partitioner=simple_partitioner>
    class start_scan: public task {
        typedef sum_node<Range,Body> sum_node_type;
        typedef final_sum<Range,Body> final_sum_type;
        final_sum_type* body;
        /** Non-null if caller is requesting total. */
        final_sum_type** sum; 
        sum_node_type** return_slot;
        /** Null if computing root. */
        sum_node_type* parent_sum;
        bool is_final;
        bool is_right_child;
        Range range;
        Partitioner partitioner;
        /*override*/ task* execute();
    public:
        start_scan( 
            sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_,
            final_sum_type**sum_, bool is_final_, sum_node_type* parent_sum_ 
        ) :
            body(&body_),
            sum(sum_),
            return_slot(&return_slot_),
            parent_sum(parent_sum_),
            is_final(is_final_),
            is_right_child(false),
            range(range_),
            partitioner(partitioner_)
        {
            __TBB_ASSERT( !*return_slot, NULL );
        }
    };

    template<typename Range, typename Body, typename Partitioner>
    task* start_scan<Range,Body,Partitioner>::execute() {
        typedef internal::finish_scan<Range,Body> finish_pass1_type;
        finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
        // Inspecting p->result.left_sum would ordinarily be a race condition.
        // But we inspect it only if we are not a stolen task, in which case we
        // know that task assigning to p->result.left_sum has completed.
        bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
        if( treat_as_stolen ) {
            // Invocation is for right child that has been really stolen or needs to be virtually stolen
            p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
            is_final = false;
        }
        task* next_task = NULL;
        if( is_right_child && !treat_as_stolen || partitioner.should_execute_range(range, *this) ) {
            if( is_final )
                (body->body)( range, final_scan_tag() );
            else if( sum )
                (body->body)( range, pre_scan_tag() );
            if( sum ) 
                *sum = body;
            __TBB_ASSERT( !*return_slot, NULL );
        } else {
            sum_node_type* result;
            if( parent_sum ) 
                result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
            else
                result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
            finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
            // Split off right child
            start_scan& b = *new( c.allocate_child() ) start_scan(
                /*return_slot=*/result->right,
                /*range=*/Range(range,split()),
                /*body=*/ *body,
                /*partitioner=*/ Partitioner(partitioner,split()),
                sum,
                is_final,
                result);
            b.is_right_child = true;    
            // Left child is recycling of *this.  Must recycle this before spawning b, 
            // otherwise b might complete and decrement c.ref_count() to zero, which
            // would cause c.execute() to run prematurely.
            recycle_as_child_of(c);
            c.set_ref_count(2);
            c.spawn(b);
            sum = &result->left_sum;
            return_slot = &result->left;
            is_right_child = false;
            next_task = this;
            parent_sum = result; 
            __TBB_ASSERT( !*return_slot, NULL );
        }
        return next_task;
    } 
} // namespace internal
//! @endcond

//! Parallel prefix
/**
  * Type Body must have the following signatures: \n
  *     void operator( const Range& r, pre_scan_tag ); \n
  *     void operator( const Range& r, final_scan_tag ); \n
  *     Body( const Body& b, split );  (be aware that b may have concurrent accesses) \n
  *     void reverse_join( const Body& ); \n
  *     void assign( const Body& ); \n
  *     ~Body \n
  * The partitioner p must define: \n
  *     p.should_execute_range(r,t)   True if r should be executed to completion without further splits. \n
  *     P p2(p,split())               Split the partitioner into p2 and p.      \n
  * @ingroup algorithms
  */
template<typename Range, typename Body, typename Partitioner>
00293 void parallel_scan( const Range& range, Body& body, const Partitioner& partitioner ) {
    if( !range.empty() ) {
        typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
        internal::sum_node<Range,Body>* root = NULL;
        typedef internal::final_sum<Range,Body> final_sum_type;
        final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
        start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
            /*return_slot=*/root,
            range,
            *temp_body,
            partitioner,
            /*sum=*/NULL,
            /*is_final=*/true,
            /*parent_sum=*/NULL);
        task::spawn_root_and_wait( pass1 );
        if( root ) {
            root->body = temp_body;
            root->incoming = NULL;
            root->stuff_last = &body;
            task::spawn_root_and_wait( *root );
        } else {
            body.assign(temp_body->body);
            temp_body->finish_construction( range, NULL );
            temp_body->destroy(*temp_body);
        }
    }
}

//! Parallel prefix with default partitioner
/** 
  * Type Body must have the following signatures: \n
  *     void operator( const Range& r, pre_scan_tag ); \n
  *     void operator( const Range& r, final_scan_tag ); \n
  *     Body( const Body& b, split );  (be aware that b may have concurrent accesses) \n
  *     void reverse_join( const Body& ); \n
  *     void assign( const Body& ); \n
  *     ~Body
  * @ingroup algorithms
  */
template<typename Range, typename Body>
00333 inline void parallel_scan( const Range& range, Body& body ) {
    parallel_scan( range, body, simple_partitioner() );
}

} // namespace tbb

#endif /* __TBB_parallel_scan_H */


Generated by  Doxygen 1.6.0   Back to index