e460634820
gcc/ 2015-11-14 Jakub Jelinek <jakub@redhat.com> * omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_* internal calls - 0 if ordered simd and 1 for ordered threads simd. * tree-vectorizer.c (adjust_simduid_builtins): If GOMP_SIMD_ORDERED_* argument is 1, replace it with GOMP_ordered_* call instead of removing it. gcc/c/ 2015-11-14 Jakub Jelinek <jakub@redhat.com> * c-typeck.c (c_finish_omp_clauses): Don't mark GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable. gcc/cp/ 2015-11-14 Jakub Jelinek <jakub@redhat.com> * semantics.c (finish_omp_clauses): Don't mark GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable. libgomp/ 2015-11-14 Jakub Jelinek <jakub@redhat.com> Aldy Hernandez <aldyh@redhat.com> Ilya Verbin <ilya.verbin@intel.com> * ordered.c (gomp_doacross_init, GOMP_doacross_post, GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post, GOMP_doacross_ull_wait): For GFS_GUIDED don't divide number of iterators or IV by chunk size. * parallel.c (gomp_resolve_num_threads): Don't assume that if thr->ts.team is non-NULL, then pool must be non-NULL. * libgomp-plugin.h (GOMP_PLUGIN_target_task_completion): Declare. * libgomp.map (GOMP_PLUGIN_1.1): New symbol version, export GOMP_PLUGIN_target_task_completion. * Makefile.am (libgomp_la_SOURCES): Add priority_queue.c. * Makefile.in: Regenerate. * libgomp.h: Shuffle prototypes and forward definitions around so priority queues can be defined. (enum gomp_task_kind): Add GOMP_TASK_ASYNC_RUNNING. (enum gomp_target_task_state): New enum. (struct gomp_target_task): Add state, tgt, task and team fields. (gomp_create_target_task): Change return type to bool, add state argument. (gomp_target_task_fn): Change return type to bool. (struct gomp_device_descr): Add async_run_func. (struct gomp_task): Remove children, next_child, prev_child, next_queue, prev_queue, next_taskgroup, prev_taskgroup. Add pnode field. (struct gomp_taskgroup): Remove children. Add taskgroup_queue. (struct gomp_team): Change task_queue type to a priority queue. (splay_compare): Define inline. (priority_queue_offset): New. (priority_node_to_task): New. (task_to_priority_node): New. * oacc-mem.c: Do not include splay-tree.h. * priority_queue.c: New file. * priority_queue.h: New file. * splay-tree.c: Do not include splay-tree.h. (splay_tree_foreach_internal): New. (splay_tree_foreach): New. * splay-tree.h: Become re-entrant if splay_tree_prefix is defined. (splay_tree_callback): Define typedef. * target.c (splay_compare): Move to libgomp.h. (GOMP_target): Don't adjust *thr in any way around running offloaded task. (GOMP_target_ext): Likewise. Handle target nowait. (GOMP_target_update_ext, GOMP_target_enter_exit_data): Check return value from gomp_create_target_task, if false, fallthrough as if no dependencies exist. (gomp_target_task_fn): Change return type to bool, return true if the task should have another part scheduled later. Handle target nowait. (gomp_load_plugin_for_device): Initialize async_run. * task.c (gomp_init_task): Initialize children_queue. (gomp_clear_parent_in_list): New. (gomp_clear_parent_in_tree): New. (gomp_clear_parent): Handle priorities. (GOMP_task): Likewise. (priority_queue_move_task_first, gomp_target_task_completion, GOMP_PLUGIN_target_task_completion): New functions. (gomp_create_target_task): Use priority queues. Change return type to bool, add state argument, return false if for async {{enter,exit} data,update} constructs no dependencies need to be waited for, handle target nowait. Set task->fn to NULL instead of gomp_target_task_fn. (verify_children_queue): Remove. (priority_list_upgrade_task): New. (priority_queue_upgrade_task): New. (verify_task_queue): Remove. (priority_list_downgrade_task): New. (priority_queue_downgrade_task): New. (gomp_task_run_pre): Use priority queues. Abstract code out to priority_queue_downgrade_task. (gomp_task_run_post_handle_dependers): Use priority queues. (gomp_task_run_post_remove_parent): Likewise. (gomp_task_run_post_remove_taskgroup): Likewise. (gomp_barrier_handle_tasks): Likewise. Handle target nowait target tasks specially. (GOMP_taskwait): Likewise. (gomp_task_maybe_wait_for_dependencies): Likewise. Abstract code to priority-queue_upgrade_task. (GOMP_taskgroup_start): Use priority queues. (GOMP_taskgroup_end): Likewise. Handle target nowait target tasks specially. If taskgroup is NULL, and thr->ts.level is 0, act as a barrier. * taskloop.c (GOMP_taskloop): Handle priorities. * team.c (gomp_new_team): Call priority_queue_init. (free_team): Call priority_queue_free. (gomp_free_thread): Call gomp_team_end if thr->ts.team is artificial team created for target nowait in implicit parallel region. (gomp_team_start): For nested check, test thr->ts.level instead of thr->ts.team != NULL. * testsuite/libgomp.c/doacross-3.c: New test. * testsuite/libgomp.c/ordered-5.c: New test. * testsuite/libgomp.c/priority.c: New test. * testsuite/libgomp.c/target-31.c: New test. * testsuite/libgomp.c/target-32.c: New test. * testsuite/libgomp.c/target-33.c: New test. * testsuite/libgomp.c/target-34.c: New test. liboffloadmic/ 2015-11-14 Ilya Verbin <ilya.verbin@intel.com> * runtime/offload_host.cpp (task_completion_callback): New variable. (offload_proxy_task_completed_ooo): Call task_completion_callback. (__offload_register_task_callback): New function. * runtime/offload_host.h (__offload_register_task_callback): New declaration. * plugin/libgomp-plugin-intelmic.cpp (offload): Add async_data argument, handle async offloading. (register_main_image): Call register_main_image. (GOMP_OFFLOAD_init_device, get_target_table, GOMP_OFFLOAD_alloc, GOMP_OFFLOAD_free, GOMP_OFFLOAD_host2dev, GOMP_OFFLOAD_dev2host, GOMP_OFFLOAD_dev2dev) Adjust offload callers. (GOMP_OFFLOAD_async_run): New function. (GOMP_OFFLOAD_run): Implement using GOMP_OFFLOAD_async_run. From-SVN: r230381
164 lines
4.4 KiB
C
164 lines
4.4 KiB
C
#include <omp.h>
|
|
#include <stdlib.h>
|
|
|
|
int a = 1, b = 2, c = 3, d = 4;
|
|
int e[2] = { 5, 6 }, f[2] = { 7, 8 }, g[2] = { 9, 10 }, h[2] = { 11, 12 };
|
|
|
|
__attribute__((noinline, noclone)) void
|
|
use (int *k, int *l, int *m, int *n, int *o, int *p, int *q, int *r)
|
|
{
|
|
asm volatile ("" : : "r" (k) : "memory");
|
|
asm volatile ("" : : "r" (l) : "memory");
|
|
asm volatile ("" : : "r" (m) : "memory");
|
|
asm volatile ("" : : "r" (n) : "memory");
|
|
asm volatile ("" : : "r" (o) : "memory");
|
|
asm volatile ("" : : "r" (p) : "memory");
|
|
asm volatile ("" : : "r" (q) : "memory");
|
|
asm volatile ("" : : "r" (r) : "memory");
|
|
}
|
|
|
|
#pragma omp declare target to (use)
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int err = 0, r = -1, t[4];
|
|
long s[4] = { -1, -2, -3, -4 };
|
|
int j = 13, k = 14, l[2] = { 15, 16 }, m[2] = { 17, 18 };
|
|
#pragma omp target private (a, b, e, f) firstprivate (c, d, g, h) map(from: r, s, t) \
|
|
map(tofrom: err, j, l) map(to: k, m)
|
|
#pragma omp teams num_teams (4) thread_limit (8) private (b, f) firstprivate (d, h, k, m)
|
|
{
|
|
int u1 = k, u2[2] = { m[0], m[1] };
|
|
int u3[64];
|
|
int i;
|
|
for (i = 0; i < 64; i++)
|
|
u3[i] = k + i;
|
|
#pragma omp parallel num_threads (1)
|
|
{
|
|
if (c != 3 || d != 4 || g[0] != 9 || g[1] != 10 || h[0] != 11 || h[1] != 12 || k != 14 || m[0] != 17 || m[1] != 18)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
b = omp_get_team_num ();
|
|
if (b >= 4)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
if (b == 0)
|
|
{
|
|
a = omp_get_num_teams ();
|
|
e[0] = 2 * a;
|
|
e[1] = 3 * a;
|
|
}
|
|
f[0] = 2 * b;
|
|
f[1] = 3 * b;
|
|
#pragma omp atomic update
|
|
c++;
|
|
#pragma omp atomic update
|
|
g[0] += 2;
|
|
#pragma omp atomic update
|
|
g[1] += 3;
|
|
d++;
|
|
h[0] += 2;
|
|
h[1] += 3;
|
|
k += b;
|
|
m[0] += 2 * b;
|
|
m[1] += 3 * b;
|
|
}
|
|
use (&a, &b, &c, &d, e, f, g, h);
|
|
#pragma omp parallel firstprivate (u1, u2)
|
|
{
|
|
int w = omp_get_thread_num ();
|
|
int x = 19;
|
|
int y[2] = { 20, 21 };
|
|
int v = 24;
|
|
int ll[64];
|
|
if (u1 != 14 || u2[0] != 17 || u2[1] != 18)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
u1 += w;
|
|
u2[0] += 2 * w;
|
|
u2[1] += 3 * w;
|
|
use (&u1, u2, &t[b], l, &k, m, &j, h);
|
|
#pragma omp master
|
|
t[b] = omp_get_num_threads ();
|
|
#pragma omp atomic update
|
|
j++;
|
|
#pragma omp atomic update
|
|
l[0] += 2;
|
|
#pragma omp atomic update
|
|
l[1] += 3;
|
|
#pragma omp atomic update
|
|
k += 4;
|
|
#pragma omp atomic update
|
|
m[0] += 5;
|
|
#pragma omp atomic update
|
|
m[1] += 6;
|
|
x += w;
|
|
y[0] += 2 * w;
|
|
y[1] += 3 * w;
|
|
#pragma omp simd safelen(32) private (v)
|
|
for (i = 0; i < 64; i++)
|
|
{
|
|
v = 3 * i;
|
|
ll[i] = u1 + v * u2[0] + u2[1] + x + y[0] + y[1] + v + h[0] + u3[i];
|
|
}
|
|
#pragma omp barrier
|
|
use (&u1, u2, &t[b], l, &k, m, &x, y);
|
|
if (w < 0 || w > 8 || w != omp_get_thread_num () || u1 != 14 + w
|
|
|| u2[0] != 17 + 2 * w || u2[1] != 18 + 3 * w
|
|
|| x != 19 + w || y[0] != 20 + 2 * w || y[1] != 21 + 3 * w
|
|
|| v != 24)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
for (i = 0; i < 64; i++)
|
|
if (ll[i] != u1 + 3 * i * u2[0] + u2[1] + x + y[0] + y[1] + 3 * i + 13 + 14 + i)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
}
|
|
#pragma omp parallel num_threads (1)
|
|
{
|
|
if (b == 0)
|
|
{
|
|
r = a;
|
|
if (a != omp_get_num_teams ()
|
|
|| e[0] != 2 * a
|
|
|| e[1] != 3 * a)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
}
|
|
int v1, v2, v3;
|
|
#pragma omp atomic read
|
|
v1 = c;
|
|
#pragma omp atomic read
|
|
v2 = g[0];
|
|
#pragma omp atomic read
|
|
v3 = g[1];
|
|
s[b] = v1 * 65536L + v2 * 256L + v3;
|
|
if (d != 5 || h[0] != 13 || h[1] != 15
|
|
|| k != 14 + b + 4 * t[b]
|
|
|| m[0] != 17 + 2 * b + 5 * t[b]
|
|
|| m[1] != 18 + 3 * b + 6 * t[b]
|
|
|| b != omp_get_team_num ()
|
|
|| f[0] != 2 * b || f[1] != 3 * b)
|
|
#pragma omp atomic write
|
|
err = 1;
|
|
}
|
|
}
|
|
if (err != 0) abort ();
|
|
if (r < 1 || r > 4) abort ();
|
|
if (a != 1 || b != 2 || c != 3 || d != 4) abort ();
|
|
if (e[0] != 5 || e[1] != 6 || f[0] != 7 || f[1] != 8) abort ();
|
|
if (g[0] != 9 || g[1] != 10 || h[0] != 11 || h[1] != 12) abort ();
|
|
int i, cnt = 0;
|
|
for (i = 0; i < r; i++)
|
|
if ((s[i] >> 16) < 3 + 1 || (s[i] >> 16) > 3 + 4
|
|
|| ((s[i] >> 8) & 0xff) < 9 + 2 * 1 || ((s[i] >> 8) & 0xff) > 9 + 2 * 4
|
|
|| (s[i] & 0xff) < 10 + 3 * 1 || (s[i] & 0xff) > 10 + 3 * 4
|
|
|| t[i] < 1 || t[i] > 8)
|
|
abort ();
|
|
else
|
|
cnt += t[i];
|
|
if (j != 13 + cnt || l[0] != 15 + 2 * cnt || l[1] != 16 + 3 * cnt) abort ();
|
|
return 0;
|
|
}
|