Implement circular print buffer.
2019-03-18 Andrew Stubbs <ams@codesourcery.com> gcc/ * config/gcn/gcn-run.c (struct output): Make next_output unsigned. Extend queue to 1024 entries. Add "consumed" field. (gomp_print_output): Remove print_index parameter. Add final parameter. Change limit to unsigned. Use consumed field to implement circular buffer. Detect interrupted print in final pass. Flush output at the end. (run): Update gomp_print_output usage. (main): Initialize kernargs->output_data.consumed. From-SVN: r269764
This commit is contained in:
parent
c57546fe94
commit
f9cacebbde
@ -1,3 +1,17 @@
|
|||||||
|
2019-03-18 Andrew Stubbs <ams@codesourcery.com>
|
||||||
|
|
||||||
|
* config/gcn/gcn-run.c (struct output): Make next_output unsigned.
|
||||||
|
Extend queue to 1024 entries.
|
||||||
|
Add "consumed" field.
|
||||||
|
(gomp_print_output): Remove print_index parameter.
|
||||||
|
Add final parameter.
|
||||||
|
Change limit to unsigned.
|
||||||
|
Use consumed field to implement circular buffer.
|
||||||
|
Detect interrupted print in final pass.
|
||||||
|
Flush output at the end.
|
||||||
|
(run): Update gomp_print_output usage.
|
||||||
|
(main): Initialize kernargs->output_data.consumed.
|
||||||
|
|
||||||
2019-03-18 Richard Sandiford <richard.sandiford@arm.com>
|
2019-03-18 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Fix the
|
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Fix the
|
||||||
|
@ -601,7 +601,7 @@ struct kernargs
|
|||||||
struct output
|
struct output
|
||||||
{
|
{
|
||||||
int return_value;
|
int return_value;
|
||||||
int next_output;
|
unsigned int next_output;
|
||||||
struct printf_data
|
struct printf_data
|
||||||
{
|
{
|
||||||
int written;
|
int written;
|
||||||
@ -613,7 +613,8 @@ struct kernargs
|
|||||||
double dvalue;
|
double dvalue;
|
||||||
char text[128];
|
char text[128];
|
||||||
};
|
};
|
||||||
} queue[1000];
|
} queue[1024];
|
||||||
|
unsigned int consumed;
|
||||||
} output_data;
|
} output_data;
|
||||||
|
|
||||||
struct heap
|
struct heap
|
||||||
@ -624,21 +625,34 @@ struct kernargs
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Print any console output from the kernel.
|
/* Print any console output from the kernel.
|
||||||
We print all entries from print_index to the next entry without a "written"
|
We print all entries from "consumed" to the next entry without a "written"
|
||||||
flag. Subsequent calls should use the returned print_index value to resume
|
flag, or "next_output" is reached. The buffer is circular, but the
|
||||||
from the same point. */
|
indices are absolute. It is assumed the kernel will stop writing data
|
||||||
|
if "next_output" wraps (becomes smaller than "consumed"). */
|
||||||
void
|
void
|
||||||
gomp_print_output (struct kernargs *kernargs, int *print_index)
|
gomp_print_output (struct kernargs *kernargs, bool final)
|
||||||
{
|
{
|
||||||
int limit = (sizeof (kernargs->output_data.queue)
|
unsigned int limit = (sizeof (kernargs->output_data.queue)
|
||||||
/ sizeof (kernargs->output_data.queue[0]));
|
/ sizeof (kernargs->output_data.queue[0]));
|
||||||
|
|
||||||
int i;
|
unsigned int from = __atomic_load_n (&kernargs->output_data.consumed,
|
||||||
for (i = *print_index; i < limit; i++)
|
__ATOMIC_ACQUIRE);
|
||||||
|
unsigned int to = kernargs->output_data.next_output;
|
||||||
|
|
||||||
|
if (from > to)
|
||||||
{
|
{
|
||||||
struct printf_data *data = &kernargs->output_data.queue[i];
|
/* Overflow. */
|
||||||
|
if (final)
|
||||||
|
printf ("GCN print buffer overflowed.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!data->written)
|
unsigned int i;
|
||||||
|
for (i = from; i < to; i++)
|
||||||
|
{
|
||||||
|
struct printf_data *data = &kernargs->output_data.queue[i%limit];
|
||||||
|
|
||||||
|
if (!data->written && !final)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
switch (data->type)
|
switch (data->type)
|
||||||
@ -655,16 +669,16 @@ gomp_print_output (struct kernargs *kernargs, int *print_index)
|
|||||||
case 3:
|
case 3:
|
||||||
printf ("%.128s%.128s", data->msg, data->text);
|
printf ("%.128s%.128s", data->msg, data->text);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
printf ("GCN print buffer error!\n");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
data->written = 0;
|
data->written = 0;
|
||||||
|
__atomic_store_n (&kernargs->output_data.consumed, i+1,
|
||||||
|
__ATOMIC_RELEASE);
|
||||||
}
|
}
|
||||||
|
fflush (stdout);
|
||||||
if (*print_index < limit && i == limit
|
|
||||||
&& kernargs->output_data.next_output > limit)
|
|
||||||
printf ("WARNING: GCN print buffer exhausted.\n");
|
|
||||||
|
|
||||||
*print_index = i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Execute an already-loaded kernel on the device. */
|
/* Execute an already-loaded kernel on the device. */
|
||||||
@ -711,16 +725,15 @@ run (void *kernargs)
|
|||||||
hsa_fns.hsa_queue_store_write_index_relaxed_fn (queue, index + 1);
|
hsa_fns.hsa_queue_store_write_index_relaxed_fn (queue, index + 1);
|
||||||
hsa_fns.hsa_signal_store_relaxed_fn (queue->doorbell_signal, index);
|
hsa_fns.hsa_signal_store_relaxed_fn (queue->doorbell_signal, index);
|
||||||
/* Kernel running ...... */
|
/* Kernel running ...... */
|
||||||
int print_index = 0;
|
|
||||||
while (hsa_fns.hsa_signal_wait_relaxed_fn (signal, HSA_SIGNAL_CONDITION_LT,
|
while (hsa_fns.hsa_signal_wait_relaxed_fn (signal, HSA_SIGNAL_CONDITION_LT,
|
||||||
1, 1000000,
|
1, 1000000,
|
||||||
HSA_WAIT_STATE_ACTIVE) != 0)
|
HSA_WAIT_STATE_ACTIVE) != 0)
|
||||||
{
|
{
|
||||||
usleep (10000);
|
usleep (10000);
|
||||||
gomp_print_output (kernargs, &print_index);
|
gomp_print_output (kernargs, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
gomp_print_output (kernargs, &print_index);
|
gomp_print_output (kernargs, true);
|
||||||
|
|
||||||
if (debug)
|
if (debug)
|
||||||
fprintf (stderr, "Kernel exited\n");
|
fprintf (stderr, "Kernel exited\n");
|
||||||
@ -797,6 +810,7 @@ main (int argc, char *argv[])
|
|||||||
for (unsigned i = 0; i < (sizeof (kernargs->output_data.queue)
|
for (unsigned i = 0; i < (sizeof (kernargs->output_data.queue)
|
||||||
/ sizeof (kernargs->output_data.queue[0])); i++)
|
/ sizeof (kernargs->output_data.queue[0])); i++)
|
||||||
kernargs->output_data.queue[i].written = 0;
|
kernargs->output_data.queue[i].written = 0;
|
||||||
|
kernargs->output_data.consumed = 0;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (int i = 0; i < kernel_argc; i++)
|
for (int i = 0; i < kernel_argc; i++)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user