Nested parallellism in OpenMP -
i want map tasks 3 threads follows:
each of
taska
,taskb
, ,taskc
must executed separate threads.taska
has subtaskstask(1)
,task(2)
, ,task(3)
.taskb
has subtaskstask(11)
,task(12)
, ,task(13)
.taskc
has subtaskstask(21)
,task(22)
, ,task(23)
.if 1 of
taska
,taskb
, ,taskc
finishes , there @ least 1 unstarted subtask of task, thread associated finished task should steal unstarted subtask.
i not able achieve setting. able following mwe. in mwe, threads not obey rules 2, 3, 4.
here mwe:
double task(int taskid) { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%d/%d: taskid=%d\n", tid, nthreads, taskid); int i; double t = 1.1; for(i = 0; < 10000000*taskid; i++) { t *= t/i; } return t; } double taska() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(1); #pragma omp task untied shared(b) b=task(2); #pragma omp task untied shared(c) c=task(3); } return a+b+c; } double taskb() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(11); #pragma omp task untied shared(b) b=task(12); #pragma omp task untied shared(c) c=task(13); } return a+b+c; } double taskc() { int tid = omp_get_thread_num(); int nthreads = omp_get_num_threads(); printf("%s %d/%d\n", __function__, tid, nthreads); double a, b, c; //#pragma omp parallel //#pragma omp single { #pragma omp task untied shared(a) a=task(21); #pragma omp task untied shared(b) b=task(22); #pragma omp task untied shared(c) c=task(23); } return a+b+c; } int main() { omp_set_num_threads(3); double a,b,c; #pragma omp parallel #pragma omp single { #pragma omp task untied a=taska(); #pragma omp task untied b=taskb(); #pragma omp task untied c=taskc(); } #pragma omp taskwait printf("%g %g %g\n", a, b, c); return 0; }
compiled as:
icpc -wall -fopenmp -o2 -o nestedomp nestedomp.c
output:
taskc 1/3 1/3: taskid=21 taska 2/3 taskb 0/3 0/3: taskid=23 2/3: taskid=22 1/3: taskid=1 1/3: taskid=2 2/3: taskid=3 0/3: taskid=11 1/3: taskid=12 2/3: taskid=13
here, thread 0 starts processing task 23, must start processing 1 or 11.
you use thread id structure work distribution:
#pragma omp parallel num_threads(3) { int tid = omp_get_thread_num(); if (tid == 0) // task 0 } else if (tid == 1) { // task 1 } else // task 2 }
you can set number of threads according needs , introduce nesting @ task level.
Comments
Post a Comment