Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb/parallel_for.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_for_H
18 #define __TBB_parallel_for_H
19 
20 #include <new>
21 #include "task.h"
22 #include "partitioner.h"
23 #include "blocked_range.h"
24 #include "tbb_exception.h"
26 
27 namespace tbb {
28 
29 namespace interface9 {
31 namespace internal {
32 
34  void* allocate_sibling(task* start_for_task, size_t bytes);
35 
37 
38  template<typename Range, typename Body, typename Partitioner>
39  class start_for: public task {
40  Range my_range;
41  const Body my_body;
42  typename Partitioner::task_partition_type my_partition;
44 
47  my_partition.note_affinity( id );
48  }
49 
50  public:
52  start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
53  my_range(range),
54  my_body(body),
55  my_partition(partitioner)
56  {
57  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
58  }
60 
61  start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
62  my_range(parent_.my_range, split_obj),
63  my_body(parent_.my_body),
64  my_partition(parent_.my_partition, split_obj)
65  {
66  my_partition.set_affinity(*this);
67  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
68  }
70 
71  start_for( start_for& parent_, const Range& r, depth_t d ) :
72  my_range(r),
73  my_body(parent_.my_body),
74  my_partition(parent_.my_partition, split())
75  {
76  my_partition.set_affinity(*this);
77  my_partition.align_depth( d );
78  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
79  }
80  static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
81  if( !range.empty() ) {
82 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
83  start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
84 #else
85  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
86  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
87  task_group_context context(PARALLEL_FOR);
88  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
89 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
90  // REGION BEGIN
91  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
93  fgt_end_algorithm( (void*)&context );
94  // REGION END
95  }
96  }
97 #if __TBB_TASK_GROUP_CONTEXT
98  static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
99  if( !range.empty() ) {
100  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
101  // REGION BEGIN
102  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
104  fgt_end_algorithm( (void*)&context );
105  // END REGION
106  }
107  }
108 #endif /* __TBB_TASK_GROUP_CONTEXT */
109  void run_body( Range &r ) {
111  fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
112  my_body( r );
113  fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
114  }
115 
117  void offer_work(typename Partitioner::split_type& split_obj) {
118  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
119  }
121  void offer_work(const Range& r, depth_t d = 0) {
122  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
123  }
124  };
125 
127  // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
128  inline void* allocate_sibling(task* start_for_task, size_t bytes) {
129  task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
130  start_for_task->set_parent(parent_ptr);
131  parent_ptr->set_ref_count(2);
132  return &parent_ptr->allocate_child().allocate(bytes);
133  }
134 
136  template<typename Range, typename Body, typename Partitioner>
138  my_partition.check_being_stolen( *this );
139  my_partition.execute(*this, my_range);
140  return NULL;
141  }
142 } // namespace internal
144 } // namespace interfaceX
145 
147 namespace internal {
149 
151  template<typename Function, typename Index>
152  class parallel_for_body : internal::no_assign {
153  const Function &my_func;
154  const Index my_begin;
155  const Index my_step;
156  public:
157  parallel_for_body( const Function& _func, Index& _begin, Index& _step )
158  : my_func(_func), my_begin(_begin), my_step(_step) {}
159 
160  void operator()( const tbb::blocked_range<Index>& r ) const {
161  // A set of local variables to help the compiler with vectorization of the following loop.
162  Index b = r.begin();
163  Index e = r.end();
164  Index ms = my_step;
165  Index k = my_begin + b*ms;
166 
167 #if __INTEL_COMPILER
168 #pragma ivdep
169 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
170 #pragma vector always assert
171 #endif
172 #endif
173  for ( Index i = b; i < e; ++i, k += ms ) {
174  my_func( k );
175  }
176  }
177  };
178 } // namespace internal
180 
181 // Requirements on Range concept are documented in blocked_range.h
182 
193 
195 
196 template<typename Range, typename Body>
197 void parallel_for( const Range& range, const Body& body ) {
199 }
200 
202 
203 template<typename Range, typename Body>
204 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
206 }
207 
209 
210 template<typename Range, typename Body>
211 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
213 }
214 
216 
217 template<typename Range, typename Body>
218 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
220 }
221 
223 
224 template<typename Range, typename Body>
225 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
227 }
228 
229 #if __TBB_TASK_GROUP_CONTEXT
230 
232 template<typename Range, typename Body>
233 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
235 }
236 
238 
239 template<typename Range, typename Body>
240 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
241  internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
242 }
243 
245 
246 template<typename Range, typename Body>
247 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
248  internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
249 }
250 
252 
253 template<typename Range, typename Body>
254 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
255  internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
256 }
257 
259 
260 template<typename Range, typename Body>
261 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
262  internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
263 }
264 #endif /* __TBB_TASK_GROUP_CONTEXT */
265 
266 
267 namespace strict_ppl {
268 
270 template <typename Index, typename Function, typename Partitioner>
272 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
273  if (step <= 0 )
274  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
275  else if (last > first) {
276  // Above "else" avoids "potential divide by zero" warning on some platforms
277  Index end = (last - first - Index(1)) / step + Index(1);
278  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
279  internal::parallel_for_body<Function, Index> body(f, first, step);
280  tbb::parallel_for(range, body, partitioner);
281  }
282 }
283 
285 template <typename Index, typename Function>
286 void parallel_for(Index first, Index last, Index step, const Function& f) {
287  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
288 }
290 template <typename Index, typename Function>
291 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
292  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
293 }
295 template <typename Index, typename Function>
296 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
297  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
298 }
300 template <typename Index, typename Function>
301 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
302  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
303 }
305 template <typename Index, typename Function>
306 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
307  parallel_for_impl(first, last, step, f, partitioner);
308 }
309 
311 template <typename Index, typename Function>
312 void parallel_for(Index first, Index last, const Function& f) {
313  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
314 }
316 template <typename Index, typename Function>
317 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
318  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
319 }
321 template <typename Index, typename Function>
322 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
323  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
324 }
326 template <typename Index, typename Function>
327 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
328  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
329 }
331 template <typename Index, typename Function>
332 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
333  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
334 }
335 
336 #if __TBB_TASK_GROUP_CONTEXT
337 template <typename Index, typename Function, typename Partitioner>
339 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
340  if (step <= 0 )
341  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
342  else if (last > first) {
343  // Above "else" avoids "potential divide by zero" warning on some platforms
344  Index end = (last - first - Index(1)) / step + Index(1);
345  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
346  internal::parallel_for_body<Function, Index> body(f, first, step);
347  tbb::parallel_for(range, body, partitioner, context);
348  }
349 }
350 
352 template <typename Index, typename Function>
353 void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
354  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
355 }
357  template <typename Index, typename Function>
358 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
359  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
360 }
362  template <typename Index, typename Function>
363 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
364  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
365 }
367 template <typename Index, typename Function>
368 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
369  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
370 }
372  template <typename Index, typename Function>
373 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
374  parallel_for_impl(first, last, step, f, partitioner, context);
375 }
376 
377 
379 template <typename Index, typename Function>
380 void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
381  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
382 }
384  template <typename Index, typename Function>
385 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
386  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
387 }
389  template <typename Index, typename Function>
390 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
391  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
392 }
394 template <typename Index, typename Function>
395 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
396  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
397 }
399  template <typename Index, typename Function>
400 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
401  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
402 }
403 
404 #endif /* __TBB_TASK_GROUP_CONTEXT */
405 
406 
407 } // namespace strict_ppl
408 
410 
411 } // namespace tbb
412 
413 #if TBB_PREVIEW_SERIAL_SUBSET
414 #define __TBB_NORMAL_EXECUTION
415 #include "../serial/tbb/parallel_for.h"
416 #undef __TBB_NORMAL_EXECUTION
417 #endif
418 
419 #endif /* __TBB_parallel_for_H */
void * allocate_sibling(task *start_for_task, size_t bytes)
allocate right task with new parent
void parallel_for(const Range &range, const Body &body)
Parallel iteration over range with default partitioner.
#define __TBB_override
Definition: tbb_stddef.h:240
auto last(Container &c) -> decltype(begin(c))
const_iterator end() const
One past last value in range.
Definition: blocked_range.h:72
A static partitioner.
Definition: partitioner.h:629
A simple partitioner.
Definition: partitioner.h:583
task_group_context * context()
This method is deprecated and will be removed in the future.
Definition: task.h:848
Calls the function with values from range [begin, end) with a step provided.
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:778
parallel_for_body(const Function &_func, Index &_begin, Index &_step)
const_iterator begin() const
Beginning of range.
Definition: blocked_range.h:69
auto first(Container &c) -> decltype(begin(c))
static void fgt_alg_end_body(void *)
The graph class.
void parallel_for_impl(Index first, Index last, Index step, const Function &f, Partitioner &partitioner)
Implementation of parallel iteration over stepped range of integers with explicit step and partitione...
void run_body(Range &r)
Run body for range, serves as callback for partitioner.
Used to form groups of tasks.
Definition: task.h:332
void note_affinity(affinity_id id) __TBB_override
Update affinity info, if any.
static void fgt_algorithm(string_index, void *, void *)
internal::allocate_child_proxy & allocate_child()
Returns proxy for overloaded new that allocates a child task of *this.
Definition: task.h:651
An auto partitioner.
Definition: partitioner.h:610
static void run(const Range &range, const Body &body, Partitioner &partitioner)
static void run(const Range &range, const Body &body, Partitioner &partitioner, task_group_context &context)
Task type used in parallel_for.
static void fgt_begin_algorithm(string_index, void *)
An affinity partitioner.
Definition: partitioner.h:648
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
start_for(const Range &range, const Body &body, Partitioner &partitioner)
Constructor for root task.
task * execute() __TBB_override
execute task for parallel_for
Partitioner::task_partition_type my_partition
void set_ref_count(int count)
Set reference count.
Definition: task.h:731
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:595
void offer_work(typename Partitioner::split_type &split_obj)
spawn right task, serves as callback for partitioner
internal::affinity_id affinity_id
An id as used for specifying affinity.
Definition: task.h:884
Base class for user-defined tasks.
Definition: task.h:589
void offer_work(const Range &r, depth_t d=0)
spawn right task, serves as callback for partitioner
internal::allocate_continuation_proxy & allocate_continuation()
Returns proxy for overloaded new that allocates a continuation task of *this.
Definition: task.h:646
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:633
static void fgt_alg_begin_body(string_index, void *, void *)
virtual task * execute()=0
Should be overridden by derived classes.
A range over which to iterate.
Definition: blocked_range.h:45
void parallel_for(Index first, Index last, Index step, const Function &f)
Parallel iteration over a range of integers with a step provided and default partitioner.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task * task
void set_parent(task *p)
sets parent task pointer to specified value
Definition: task.h:838
Join task node that contains shared flag for stealing feedback.
Definition: partitioner.h:125
static void fgt_end_algorithm(void *)
void operator()(const tbb::blocked_range< Index > &r) const
start_for(start_for &parent_, typename Partitioner::split_type &split_obj)
Splitting constructor used to generate children.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
start_for(start_for &parent_, const Range &r, depth_t d)
Construct right child from the given range as response to the demand.
void throw_exception(exception_id eid)
Versionless convenience wrapper for throw_exception_v4()
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:395

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.