Bugzilla – Attachment 694 Details for
Bug 1159
Fine tuning of the evaluation on a per expression basis
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
wrapper for parallelization
parallel_for_each.h (text/x-c), 5.17 KB, created by
Dmitry Zhdanov
on 2016-04-12 22:43:42 UTC
(
hide
)
Description:
wrapper for parallelization
Filename:
MIME Type:
Creator:
Dmitry Zhdanov
Created:
2016-04-12 22:43:42 UTC
Size:
5.17 KB
patch
obsolete
>template<bool resplitting = true, class _IteratorType, class _IterarorAlgorithm>//std::complex<double>* >void parallel_for_each(_IteratorType _start_index, _IteratorType _end_index, ptrdiff_t iterator_increment, _IterarorAlgorithm _sit, int concurrentThreadsNo = 0) >{ > typedef size_t thread_index_t; > if (concurrentThreadsNo == 0) > { >#ifdef UTIL_USE_OPENMP > concurrentThreadsNo = omp_get_num_threads(); >#else > concurrentThreadsNo = std::thread::hardware_concurrency(); >#endif > }; > volatile _IteratorType* pi_starts = new _IteratorType[concurrentThreadsNo]; > volatile _IteratorType* pi_ends = new _IteratorType[concurrentThreadsNo]; > volatile bool* locked_flags = new bool[concurrentThreadsNo]; > volatile bool* locked_flagsX = new bool[concurrentThreadsNo]; > std::atomic<int>* split_counters = new std::atomic<int>[concurrentThreadsNo]; > > //>>>>>>>>>>!!!!!!!!!!!!!!!!!!! > size_t memory_chunk_size = ((_end_index - _start_index) / concurrentThreadsNo) + (((_end_index - _start_index) % concurrentThreadsNo == 0) ? 0 : 1); > if (memory_chunk_size%iterator_increment != 0) > memory_chunk_size = memory_chunk_size + iterator_increment - memory_chunk_size%iterator_increment; > //<<<<<<<<<<!!!!!!!!!!!!!!!!!!! > volatile _IteratorType* pi_s = pi_starts; > volatile _IteratorType* pi_e = pi_ends; > volatile bool* locked = locked_flags; //flag managed by guest process > //volatile bool* lockedX = locked_flagsX; //flags managed by process itself > std::atomic<int>* split_c = split_counters; > > *locked_flags = false; > //*locked_flagsX = false; > //>>>>>>>>>>!!!!!!!!!!!!!!!!!!! > *pi_starts = _start_index; > //>>>>>>>>>>!!!!!!!!!!!!!!!!!!! > *pi_ends = *pi_starts + memory_chunk_size; > *split_c = 0; > while (*pi_e < _end_index) > { > *(++split_c) = 0; > *(++locked) = false; > //*(++lockedX) = false; > *(++pi_s) = *pi_e; > *(++pi_e) = *pi_s + memory_chunk_size; > } > *pi_e = _end_index; > thread_index_t thread_count = pi_e - pi_ends + 1; > > auto parallel_algorithm = [&](thread_index_t threadNo) > { > if (!resplitting) > { > _sit(pi_starts[threadNo], pi_ends[threadNo], locked_flags[threadNo], threadNo); > } > else > { > _IteratorType volatile& pi = pi_starts[threadNo]; > _IteratorType volatile& pi_e = pi_ends[threadNo]; > bool volatile & locked = locked_flags[threadNo]; > //bool volatile & lockedX = locked_flagsX[threadNo]; > do > { > //**************************** > //Main algorithm > _sit(pi, pi_e, locked, threadNo); > //End of main algorithm > //**************************** > //return; > //trying to split other thread > //**************************** > while (split_counters[threadNo] > 0)_mm_pause(); > bool locked2 = true; > //lockedX = true; > //locked = true; > do { > thread_index_t new_threadNo = 0; > ptrdiff_t delta = 0, delta_test; > for (thread_index_t candidate_threadNo = 0; candidate_threadNo < thread_count; candidate_threadNo++) > { > if (candidate_threadNo != threadNo) > { > delta_test = (pi_ends[candidate_threadNo] - pi_starts[candidate_threadNo]); > if (delta_test > delta) > { > delta = delta_test; > new_threadNo = candidate_threadNo; > } > } > } > if (delta > 2 * iterator_increment) > { > //std::string s=std::to_string(thread_count)+"|"+std::to_string(threadNo)+"<-"+std::to_string(new_threadNo)+": delta/iterator_increment="+std::to_string(delta/iterator_increment)+"\n"; > //std::cout<<s;//thread_count<<"|"<<threadNo<<"<-"<<new_threadNo<<": delta="<<delta<<"\n"; > split_counters[new_threadNo]++; > if (split_counters[new_threadNo] == 1 && !locked_flags[new_threadNo])// && !locked_flagsX[new_threadNo]) > { > locked_flags[new_threadNo] = true; > delta = iterator_increment*(ptrdiff_t)((pi_ends[new_threadNo] - pi_starts[new_threadNo]) / 2 / iterator_increment); > if (delta > iterator_increment) > { > //_mm_pause(); > if (pi_e > pi_ends[new_threadNo]) > { > pi_e = pi_ends[new_threadNo]; > pi = pi_e - delta; > } > else > { > pi = pi_ends[new_threadNo] - delta; > pi_e = pi_ends[new_threadNo]; > }; > pi_ends[new_threadNo] = pi; > locked_flags[new_threadNo] = false; > locked2 = false; > //lockedX = false; > //locked = false; > } > else > { > locked_flags[new_threadNo] = false; > } > } > split_counters[new_threadNo]--; > } > else > { > return; > } > } while (locked2); > > //**************************** > } while (true); > }; > }; >#ifdef UTIL_USE_OPENMP > if (thread_count > 1) > { >#pragma omp parallel for num_threads((int)thread_count) > for (ptrdiff_t i = 0; i < (ptrdiff_t)thread_count; i++) { > parallel_algorithm(i); > }; > } > else > parallel_algorithm(0); >#else > std::vector<std::thread> workers; > for (size_t i = 1; i < thread_count; i++) { > workers.push_back(std::thread(parallel_algorithm, i)); > }; > parallel_algorithm(0); > std::for_each(workers.begin(), workers.end(), [&](std::thread& t) {t.join(); }); >#endif > delete pi_starts; > delete pi_ends; > delete locked_flags; > delete split_counters; >}; >};
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 1159
: 694