Nested parallellism in OpenMP -
i want map tasks 3 threads follows:
each of
taska,taskb, ,taskcmust executed separate threads.taskahas subtaskstask(1),task(2), ,task(3).taskbhas subtaskstask(11),task(12), ,task(13).taskchas subtaskstask(21),task(22), ,task(23).if 1 of
taska,taskb, ,taskcfinishes , there @ least 1 unstarted subtask of task, thread associated finished task should steal unstarted subtask.
i not able achieve setting. able following mwe. in mwe, threads not obey rules 2, 3, 4.
here mwe:
double task(int taskid) { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%d/%d: taskid=%d\n", tid, nthreads, taskid); int i; double t = 1.1; for(i = 0; < 10000000*taskid; i++) { t *= t/i; } return t; } double taska() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(1); #pragma omp task untied shared(b) b=task(2); #pragma omp task untied shared(c) c=task(3); } return a+b+c; } double taskb() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(11); #pragma omp task untied shared(b) b=task(12); #pragma omp task untied shared(c) c=task(13); } return a+b+c; } double taskc() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(21); #pragma omp task untied shared(b) b=task(22); #pragma omp task untied shared(c) c=task(23); } return a+b+c; } int main() { omp_set_num_threads(3); double a,b,c; #pragma omp parallel #pragma omp single { #pragma omp task untied a=taska(); #pragma omp task untied b=taskb(); #pragma omp task untied c=taskc(); } #pragma omp taskwait printf("%g %g %g\n", a, b, c); return 0; } compiled as:
icpc -wall -fopenmp -o2 -o nestedomp nestedomp.c output:
taskc 1/3 1/3: taskid=21 taska 2/3 taskb 0/3 0/3: taskid=23 2/3: taskid=22 1/3: taskid=1 1/3: taskid=2 2/3: taskid=3 0/3: taskid=11 1/3: taskid=12 2/3: taskid=13 here, thread 0 starts processing task 23, must start processing 1 or 11.
you use thread id structure work distribution:
#pragma omp parallel num_threads(3) { int tid = omp_get_thread_num(); if (tid == 0) // task 0 } else if (tid == 1) { // task 1 } else // task 2 } you can set number of threads according needs , introduce nesting @ task level.
Comments
Post a Comment