gcc/libgomp/testsuite/libgomp.c/target-31.c
Jakub Jelinek e460634820 omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_* internal calls - 0 if...
gcc/
2015-11-14  Jakub Jelinek  <jakub@redhat.com>

	* omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_*
	internal calls - 0 if ordered simd and 1 for ordered threads simd.
	* tree-vectorizer.c (adjust_simduid_builtins): If GOMP_SIMD_ORDERED_*
	argument is 1, replace it with GOMP_ordered_* call instead of removing
	it.
gcc/c/
2015-11-14  Jakub Jelinek  <jakub@redhat.com>

	* c-typeck.c (c_finish_omp_clauses): Don't mark
	GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable.
gcc/cp/
2015-11-14  Jakub Jelinek  <jakub@redhat.com>

	* semantics.c (finish_omp_clauses): Don't mark
	GOMP_MAP_FIRSTPRIVATE_POINTER decls addressable.
libgomp/
2015-11-14  Jakub Jelinek  <jakub@redhat.com>
	    Aldy Hernandez  <aldyh@redhat.com>
	    Ilya Verbin  <ilya.verbin@intel.com>

	* ordered.c (gomp_doacross_init, GOMP_doacross_post,
	GOMP_doacross_wait, gomp_doacross_ull_init, GOMP_doacross_ull_post,
	GOMP_doacross_ull_wait): For GFS_GUIDED don't divide number of
	iterators or IV by chunk size.
	* parallel.c (gomp_resolve_num_threads): Don't assume that
	if thr->ts.team is non-NULL, then pool must be non-NULL.
	* libgomp-plugin.h (GOMP_PLUGIN_target_task_completion): Declare.
	* libgomp.map (GOMP_PLUGIN_1.1): New symbol version, export
	GOMP_PLUGIN_target_task_completion.
	* Makefile.am (libgomp_la_SOURCES): Add priority_queue.c.
	* Makefile.in: Regenerate.
	* libgomp.h: Shuffle prototypes and forward definitions around so
	priority queues can be defined.
	(enum gomp_task_kind): Add GOMP_TASK_ASYNC_RUNNING.
	(enum gomp_target_task_state): New enum.
	(struct gomp_target_task): Add state, tgt, task and team fields.
	(gomp_create_target_task): Change return type to bool, add
	state argument.
	(gomp_target_task_fn): Change return type to bool.
	(struct gomp_device_descr): Add async_run_func.
	(struct gomp_task): Remove children, next_child, prev_child,
	next_queue, prev_queue, next_taskgroup, prev_taskgroup.
	Add pnode field.
	(struct gomp_taskgroup): Remove children.
	Add taskgroup_queue.
	(struct gomp_team): Change task_queue type to a priority queue.
	(splay_compare): Define inline.
	(priority_queue_offset): New.
	(priority_node_to_task): New.
	(task_to_priority_node): New.
	* oacc-mem.c: Do not include splay-tree.h.
	* priority_queue.c: New file.
	* priority_queue.h: New file.
	* splay-tree.c: Do not include splay-tree.h.
	(splay_tree_foreach_internal): New.
	(splay_tree_foreach): New.
	* splay-tree.h: Become re-entrant if splay_tree_prefix is defined.
	(splay_tree_callback): Define typedef.
	* target.c (splay_compare): Move to libgomp.h.
	(GOMP_target): Don't adjust *thr in any way around running offloaded
	task.
	(GOMP_target_ext): Likewise.  Handle target nowait.
	(GOMP_target_update_ext, GOMP_target_enter_exit_data): Check
	return value from gomp_create_target_task, if false, fallthrough
	as if no dependencies exist.
	(gomp_target_task_fn): Change return type to bool, return true
	if the task should have another part scheduled later.  Handle
	target nowait.
	(gomp_load_plugin_for_device): Initialize async_run.
	* task.c (gomp_init_task): Initialize children_queue.
	(gomp_clear_parent_in_list): New.
	(gomp_clear_parent_in_tree): New.
	(gomp_clear_parent): Handle priorities.
	(GOMP_task): Likewise.
	(priority_queue_move_task_first,
	gomp_target_task_completion, GOMP_PLUGIN_target_task_completion):
	New functions.
	(gomp_create_target_task): Use priority queues.  Change return type
	to bool, add state argument, return false if for async
	{{enter,exit} data,update} constructs no dependencies need to be
	waited for, handle target nowait.  Set task->fn to NULL instead of
	gomp_target_task_fn.
	(verify_children_queue): Remove.
	(priority_list_upgrade_task): New.
	(priority_queue_upgrade_task): New.
	(verify_task_queue): Remove.
	(priority_list_downgrade_task): New.
	(priority_queue_downgrade_task): New.
	(gomp_task_run_pre): Use priority queues.
	Abstract code out to priority_queue_downgrade_task.
	(gomp_task_run_post_handle_dependers): Use priority queues.
	(gomp_task_run_post_remove_parent): Likewise.
	(gomp_task_run_post_remove_taskgroup): Likewise.
	(gomp_barrier_handle_tasks): Likewise.  Handle target nowait target
	tasks specially.
	(GOMP_taskwait): Likewise.
	(gomp_task_maybe_wait_for_dependencies): Likewise.  Abstract code to
	priority-queue_upgrade_task.
	(GOMP_taskgroup_start): Use priority queues.
	(GOMP_taskgroup_end): Likewise.  Handle target nowait target tasks
	specially.  If taskgroup is NULL, and thr->ts.level is 0, act as a
	barrier.
	* taskloop.c (GOMP_taskloop): Handle priorities.
	* team.c (gomp_new_team): Call priority_queue_init.
	(free_team): Call priority_queue_free.
	(gomp_free_thread): Call gomp_team_end if thr->ts.team is artificial
	team created for target nowait in implicit parallel region.
	(gomp_team_start): For nested check, test thr->ts.level instead of
	thr->ts.team != NULL.
	* testsuite/libgomp.c/doacross-3.c: New test.
	* testsuite/libgomp.c/ordered-5.c: New test.
	* testsuite/libgomp.c/priority.c: New test.
	* testsuite/libgomp.c/target-31.c: New test.
	* testsuite/libgomp.c/target-32.c: New test.
	* testsuite/libgomp.c/target-33.c: New test.
	* testsuite/libgomp.c/target-34.c: New test.
liboffloadmic/
2015-11-14  Ilya Verbin  <ilya.verbin@intel.com>

	* runtime/offload_host.cpp (task_completion_callback): New
	variable.
	(offload_proxy_task_completed_ooo): Call task_completion_callback.
	(__offload_register_task_callback): New function.
	* runtime/offload_host.h (__offload_register_task_callback): New
	declaration.
	* plugin/libgomp-plugin-intelmic.cpp (offload): Add async_data
	argument, handle async offloading.
	(register_main_image): Call register_main_image.
	(GOMP_OFFLOAD_init_device, get_target_table, GOMP_OFFLOAD_alloc,
	GOMP_OFFLOAD_free, GOMP_OFFLOAD_host2dev, GOMP_OFFLOAD_dev2host,
	GOMP_OFFLOAD_dev2dev) Adjust offload callers.
	(GOMP_OFFLOAD_async_run): New function.
	(GOMP_OFFLOAD_run): Implement using GOMP_OFFLOAD_async_run.

From-SVN: r230381
2015-11-14 19:42:13 +01:00

164 lines
4.4 KiB
C

#include <omp.h>
#include <stdlib.h>
int a = 1, b = 2, c = 3, d = 4;
int e[2] = { 5, 6 }, f[2] = { 7, 8 }, g[2] = { 9, 10 }, h[2] = { 11, 12 };
__attribute__((noinline, noclone)) void
use (int *k, int *l, int *m, int *n, int *o, int *p, int *q, int *r)
{
asm volatile ("" : : "r" (k) : "memory");
asm volatile ("" : : "r" (l) : "memory");
asm volatile ("" : : "r" (m) : "memory");
asm volatile ("" : : "r" (n) : "memory");
asm volatile ("" : : "r" (o) : "memory");
asm volatile ("" : : "r" (p) : "memory");
asm volatile ("" : : "r" (q) : "memory");
asm volatile ("" : : "r" (r) : "memory");
}
#pragma omp declare target to (use)
int
main ()
{
int err = 0, r = -1, t[4];
long s[4] = { -1, -2, -3, -4 };
int j = 13, k = 14, l[2] = { 15, 16 }, m[2] = { 17, 18 };
#pragma omp target private (a, b, e, f) firstprivate (c, d, g, h) map(from: r, s, t) \
map(tofrom: err, j, l) map(to: k, m)
#pragma omp teams num_teams (4) thread_limit (8) private (b, f) firstprivate (d, h, k, m)
{
int u1 = k, u2[2] = { m[0], m[1] };
int u3[64];
int i;
for (i = 0; i < 64; i++)
u3[i] = k + i;
#pragma omp parallel num_threads (1)
{
if (c != 3 || d != 4 || g[0] != 9 || g[1] != 10 || h[0] != 11 || h[1] != 12 || k != 14 || m[0] != 17 || m[1] != 18)
#pragma omp atomic write
err = 1;
b = omp_get_team_num ();
if (b >= 4)
#pragma omp atomic write
err = 1;
if (b == 0)
{
a = omp_get_num_teams ();
e[0] = 2 * a;
e[1] = 3 * a;
}
f[0] = 2 * b;
f[1] = 3 * b;
#pragma omp atomic update
c++;
#pragma omp atomic update
g[0] += 2;
#pragma omp atomic update
g[1] += 3;
d++;
h[0] += 2;
h[1] += 3;
k += b;
m[0] += 2 * b;
m[1] += 3 * b;
}
use (&a, &b, &c, &d, e, f, g, h);
#pragma omp parallel firstprivate (u1, u2)
{
int w = omp_get_thread_num ();
int x = 19;
int y[2] = { 20, 21 };
int v = 24;
int ll[64];
if (u1 != 14 || u2[0] != 17 || u2[1] != 18)
#pragma omp atomic write
err = 1;
u1 += w;
u2[0] += 2 * w;
u2[1] += 3 * w;
use (&u1, u2, &t[b], l, &k, m, &j, h);
#pragma omp master
t[b] = omp_get_num_threads ();
#pragma omp atomic update
j++;
#pragma omp atomic update
l[0] += 2;
#pragma omp atomic update
l[1] += 3;
#pragma omp atomic update
k += 4;
#pragma omp atomic update
m[0] += 5;
#pragma omp atomic update
m[1] += 6;
x += w;
y[0] += 2 * w;
y[1] += 3 * w;
#pragma omp simd safelen(32) private (v)
for (i = 0; i < 64; i++)
{
v = 3 * i;
ll[i] = u1 + v * u2[0] + u2[1] + x + y[0] + y[1] + v + h[0] + u3[i];
}
#pragma omp barrier
use (&u1, u2, &t[b], l, &k, m, &x, y);
if (w < 0 || w > 8 || w != omp_get_thread_num () || u1 != 14 + w
|| u2[0] != 17 + 2 * w || u2[1] != 18 + 3 * w
|| x != 19 + w || y[0] != 20 + 2 * w || y[1] != 21 + 3 * w
|| v != 24)
#pragma omp atomic write
err = 1;
for (i = 0; i < 64; i++)
if (ll[i] != u1 + 3 * i * u2[0] + u2[1] + x + y[0] + y[1] + 3 * i + 13 + 14 + i)
#pragma omp atomic write
err = 1;
}
#pragma omp parallel num_threads (1)
{
if (b == 0)
{
r = a;
if (a != omp_get_num_teams ()
|| e[0] != 2 * a
|| e[1] != 3 * a)
#pragma omp atomic write
err = 1;
}
int v1, v2, v3;
#pragma omp atomic read
v1 = c;
#pragma omp atomic read
v2 = g[0];
#pragma omp atomic read
v3 = g[1];
s[b] = v1 * 65536L + v2 * 256L + v3;
if (d != 5 || h[0] != 13 || h[1] != 15
|| k != 14 + b + 4 * t[b]
|| m[0] != 17 + 2 * b + 5 * t[b]
|| m[1] != 18 + 3 * b + 6 * t[b]
|| b != omp_get_team_num ()
|| f[0] != 2 * b || f[1] != 3 * b)
#pragma omp atomic write
err = 1;
}
}
if (err != 0) abort ();
if (r < 1 || r > 4) abort ();
if (a != 1 || b != 2 || c != 3 || d != 4) abort ();
if (e[0] != 5 || e[1] != 6 || f[0] != 7 || f[1] != 8) abort ();
if (g[0] != 9 || g[1] != 10 || h[0] != 11 || h[1] != 12) abort ();
int i, cnt = 0;
for (i = 0; i < r; i++)
if ((s[i] >> 16) < 3 + 1 || (s[i] >> 16) > 3 + 4
|| ((s[i] >> 8) & 0xff) < 9 + 2 * 1 || ((s[i] >> 8) & 0xff) > 9 + 2 * 4
|| (s[i] & 0xff) < 10 + 3 * 1 || (s[i] & 0xff) > 10 + 3 * 4
|| t[i] < 1 || t[i] > 8)
abort ();
else
cnt += t[i];
if (j != 13 + cnt || l[0] != 15 + 2 * cnt || l[1] != 16 + 3 * cnt) abort ();
return 0;
}