73split_range(TIterator it_begin, TIterator it_end, std::size_t idx, std::size_t count)
75 auto range_size = std::distance(it_begin, it_end);
76 auto begin_shift = (range_size*idx) / count;
77 auto end_shift = (range_size*(idx+1)) / count;
78 return { it_begin + begin_shift, it_begin + end_shift };
84split_range(TIterable & iterable, std::size_t idx, std::size_t count)
85 ->
decltype(
split_range(iterable.begin(), iterable.end(), idx, count))
87 return split_range(iterable.begin(), iterable.end(), idx, count);
97 #pragma omp parallel reduction(+:sum)
99 std::size_t thread_idx = omp_get_thread_num();
100 std::size_t thread_cnt = omp_get_max_threads();
101 auto const range =
split_range(image, thread_idx, thread_cnt);
102 sum = std::accumulate(range.begin(), range.end(), sum);
137 std::size_t thread_idx = omp_get_thread_num();
138 std::size_t thread_cnt = omp_get_num_threads();
140 for (
auto const& pt :
split_range(image.domain(), thread_idx, thread_cnt))
141 image.setValue(pt, fn(pt, image(pt)));
153 std::size_t thread_idx = omp_get_thread_num();
154 std::size_t thread_cnt = omp_get_num_threads();
156 auto domain_it =
split_range(image.domain(), thread_idx, thread_cnt).begin();
157 for (
auto & v :
split_range(image, thread_idx, thread_cnt))
159 v = fn(*domain_it, v);
166int main(
int argc,
char* argv[])
171 using Value = double;
176 std::cerr <<
"Usage: " << argv[0] <<
" <domain_size>" << std::endl;
180 trace.
info() <<
"Initialization..." << std::endl;
181 std::size_t domain_size = std::stoll(argv[1]);
182 Domain domain(Point::diagonal(0), Point::diagonal(domain_size-1));
185 double ref_duration = 0;
186 std::size_t max_threads = omp_get_max_threads();
187 trace.
info() << std::fixed << std::setprecision(6);
201 trace.
info() <<
"Scanning a domain in parallel..." << std::endl;
202 for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
204 omp_set_num_threads(thread_cnt);
208 const double duration =
toc();
211 ref_duration = duration;
213 trace.
info() <<
"\tthreads: " << thread_cnt
214 <<
"\tduration: " << duration <<
"s"
215 <<
"\tspeed: " << 1e-6 *
domain.
size() / duration <<
"Mpt/s"
216 <<
"\tspeedup: " << ref_duration/duration
217 <<
"\tchecksum: " << sum
226 trace.
info() <<
"Initializing an image in parallel using getter and setter..." << std::endl;
227 for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
229 omp_set_num_threads(thread_cnt);
233 const double duration =
toc();
236 ref_duration = duration;
238 trace.
info() <<
"\tthreads: " << thread_cnt
239 <<
"\tduration: " << duration <<
"s"
240 <<
"\tspeed: " << 1e-6 *
domain.
size() / duration <<
"Mpt/s"
241 <<
"\tspeedup: " << ref_duration/duration
251 trace.
info() <<
"Initializing an image in parallel using iterators..." << std::endl;
252 for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
254 omp_set_num_threads(thread_cnt);
258 const double duration =
toc();
261 ref_duration = duration;
263 trace.
info() <<
"\tthreads: " << thread_cnt
264 <<
"\tduration: " << duration <<
"s"
265 <<
"\tspeed: " << 1e-6 *
domain.
size() / duration <<
"Mpt/s"
266 <<
"\tspeedup: " << ref_duration/duration
const Point & lowerBound() const
const Point & upperBound() const
Image::Value calc_image_checksum(Image const &image)
[split_range]
SimpleConstRange< TIterator > split_range(TIterator it_begin, TIterator it_end, std::size_t idx, std::size_t count)
[split_range]
void init_image_iter(Image &image, Function const &fn)
void init_image_getset(Image &image, Function const &fn)
auto sum_fn_on_domain(Domain const &domain, Function const &fn) -> decltype(fn(domain.lowerBound()))