amdgcn: Handle early debug info in mkoffload
Forward the early debug information from the input LTO file to the output HSACO file, in the same way lto-wrapper does. This is a little more complicated, however, because the ELF file containing the debug needs to be converted from x86_64 to amdgcn, and because the offloaded code will have less content than the host program the debug info describes. gcc/ChangeLog: * config/gcn/mkoffload.c: Include simple-object.h and elf.h. (EM_AMDGPU): New macro. (ELFOSABI_AMDGPU_HSA): New macro. (ELFABIVERSION_AMDGPU_HSA): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX803): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX900): New macro. (EF_AMDGPU_MACH_AMDGCN_GFX906): New macro. (R_AMDGPU_NONE): New macro. (R_AMDGPU_ABS32_LO): New macro. (R_AMDGPU_ABS32_HI): New macro. (R_AMDGPU_ABS64): New macro. (R_AMDGPU_REL32): New macro. (R_AMDGPU_REL64): New macro. (R_AMDGPU_ABS32): New macro. (R_AMDGPU_GOTPCREL): New macro. (R_AMDGPU_GOTPCREL32_LO): New macro. (R_AMDGPU_GOTPCREL32_HI): New macro. (R_AMDGPU_REL32_LO): New macro. (R_AMDGPU_REL32_HI): New macro. (reserved): New macro. (R_AMDGPU_RELATIVE64): New macro. (gcn_s1_name): Delete global variable. (gcn_s2_name): Delete global variable. (gcn_o_name): Delete global variable. (gcn_cfile_name): Delete global variable. (files_to_cleanup): New global variable. (offload_abi): New global variable. (tool_cleanup): Use files_to_cleanup, not explicit list. (copy_early_debug_info): New function. (main): New local variables gcn_s1_name, gcn_s2_name, gcn_o_name, gcn_cfile_name. Create files_to_cleanup obstack. Recognize -march options. Copy early debug info from input .o files.
This commit is contained in:
parent
866c5bfd9c
commit
a4f49061b6
@ -33,31 +33,53 @@
|
||||
#include <libgen.h>
|
||||
#include "collect-utils.h"
|
||||
#include "gomp-constants.h"
|
||||
#include "simple-object.h"
|
||||
#include "elf.h"
|
||||
|
||||
/* These probably won't be in elf.h for a while. */
|
||||
#ifndef EM_AMDGPU
|
||||
#define EM_AMDGPU 0xe0;
|
||||
|
||||
#define ELFOSABI_AMDGPU_HSA 64
|
||||
#define ELFABIVERSION_AMDGPU_HSA 1
|
||||
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
|
||||
|
||||
#define R_AMDGPU_NONE 0
|
||||
#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
|
||||
#define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
|
||||
#define R_AMDGPU_ABS64 3 /* S + A */
|
||||
#define R_AMDGPU_REL32 4 /* S + A - P */
|
||||
#define R_AMDGPU_REL64 5 /* S + A - P */
|
||||
#define R_AMDGPU_ABS32 6 /* S + A */
|
||||
#define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
|
||||
#define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
|
||||
#define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
|
||||
#define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
|
||||
#define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
|
||||
#define reserved 12
|
||||
#define R_AMDGPU_RELATIVE64 13 /* B + A */
|
||||
#endif
|
||||
|
||||
const char tool_name[] = "gcn mkoffload";
|
||||
|
||||
/* Files to unlink. */
|
||||
static const char *gcn_s1_name;
|
||||
static const char *gcn_s2_name;
|
||||
static const char *gcn_o_name;
|
||||
static const char *gcn_cfile_name;
|
||||
static const char *gcn_dumpbase;
|
||||
static struct obstack files_to_cleanup;
|
||||
|
||||
enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
|
||||
uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture.
|
||||
|
||||
/* Delete tempfiles. */
|
||||
|
||||
void
|
||||
tool_cleanup (bool from_signal ATTRIBUTE_UNUSED)
|
||||
{
|
||||
if (gcn_cfile_name)
|
||||
maybe_unlink (gcn_cfile_name);
|
||||
if (gcn_s1_name)
|
||||
maybe_unlink (gcn_s1_name);
|
||||
if (gcn_s2_name)
|
||||
maybe_unlink (gcn_s2_name);
|
||||
if (gcn_o_name)
|
||||
maybe_unlink (gcn_o_name);
|
||||
obstack_ptr_grow (&files_to_cleanup, NULL);
|
||||
const char **files = XOBFINISH (&files_to_cleanup, const char **);
|
||||
for (int i = 0; files[i]; i++)
|
||||
maybe_unlink (files[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -204,6 +226,180 @@ access_check (const char *name, int mode)
|
||||
return access (name, mode);
|
||||
}
|
||||
|
||||
/* Copy the early-debug-info from the incoming LTO object to a new object
|
||||
that will be linked into the output HSACO file. The host relocations
|
||||
must be translated into GCN relocations, and any global undefined symbols
|
||||
must be weakened (so as not to have the debug info try to pull in host
|
||||
junk).
|
||||
|
||||
Returns true if the file was created, false otherwise. */
|
||||
|
||||
static bool
|
||||
copy_early_debug_info (const char *infile, const char *outfile)
|
||||
{
|
||||
const char *errmsg;
|
||||
int err;
|
||||
|
||||
/* The simple_object code can handle extracting the debug sections.
|
||||
This code is based on that in lto-wrapper.c. */
|
||||
int infd = open (infile, O_RDONLY | O_BINARY);
|
||||
if (infd == -1)
|
||||
return false;
|
||||
simple_object_read *inobj = simple_object_start_read (infd, 0,
|
||||
"__GNU_LTO",
|
||||
&errmsg, &err);
|
||||
if (!inobj)
|
||||
return false;
|
||||
|
||||
off_t off, len;
|
||||
if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info",
|
||||
&off, &len, &errmsg, &err) != 1)
|
||||
{
|
||||
simple_object_release_read (inobj);
|
||||
close (infd);
|
||||
return false;
|
||||
}
|
||||
|
||||
errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true);
|
||||
if (errmsg)
|
||||
{
|
||||
unlink_if_ordinary (outfile);
|
||||
return false;
|
||||
}
|
||||
|
||||
simple_object_release_read (inobj);
|
||||
close (infd);
|
||||
|
||||
/* Open the file we just created for some adjustments.
|
||||
The simple_object code can't do this, so we do it manually. */
|
||||
FILE *outfd = fopen (outfile, "r+b");
|
||||
if (!outfd)
|
||||
return false;
|
||||
|
||||
Elf64_Ehdr ehdr;
|
||||
if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1)
|
||||
{
|
||||
fclose (outfd);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We only support host relocations of x86_64, for now. */
|
||||
gcc_assert (ehdr.e_machine == EM_X86_64);
|
||||
|
||||
/* Patch the correct elf architecture flag into the file. */
|
||||
ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
|
||||
ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
|
||||
ehdr.e_type = ET_REL;
|
||||
ehdr.e_machine = EM_AMDGPU;
|
||||
ehdr.e_flags = elf_arch;
|
||||
|
||||
/* Load the section headers so we can walk them later. */
|
||||
Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
|
||||
* ehdr.e_shnum);
|
||||
if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1
|
||||
|| fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum,
|
||||
outfd) != ehdr.e_shnum)
|
||||
{
|
||||
free (sections);
|
||||
fclose (outfd);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Convert the host relocations to target relocations. */
|
||||
for (int i = 0; i < ehdr.e_shnum; i++)
|
||||
{
|
||||
if (sections[i].sh_type != SHT_RELA)
|
||||
continue;
|
||||
|
||||
char *data = (char *)xmalloc (sections[i].sh_size);
|
||||
if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
|
||||
|| fread (data, sections[i].sh_size, 1, outfd) != 1)
|
||||
{
|
||||
free (data);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t offset = 0;
|
||||
offset < sections[i].sh_size;
|
||||
offset += sections[i].sh_entsize)
|
||||
{
|
||||
Elf64_Rela *reloc = (Elf64_Rela *) (data + offset);
|
||||
|
||||
/* Map the host relocations to GCN relocations.
|
||||
Only relocations that can appear in DWARF need be handled. */
|
||||
switch (ELF64_R_TYPE (reloc->r_info))
|
||||
{
|
||||
case R_X86_64_32:
|
||||
case R_X86_64_32S:
|
||||
reloc->r_info = R_AMDGPU_ABS32;
|
||||
break;
|
||||
case R_X86_64_PC32:
|
||||
reloc->r_info = R_AMDGPU_REL32;
|
||||
break;
|
||||
case R_X86_64_PC64:
|
||||
reloc->r_info = R_AMDGPU_REL64;
|
||||
break;
|
||||
case R_X86_64_64:
|
||||
reloc->r_info = R_AMDGPU_ABS64;
|
||||
break;
|
||||
case R_X86_64_RELATIVE:
|
||||
reloc->r_info = R_AMDGPU_RELATIVE64;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Write back our relocation changes. */
|
||||
if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
|
||||
fwrite (data, sections[i].sh_size, 1, outfd);
|
||||
|
||||
free (data);
|
||||
}
|
||||
|
||||
/* Weaken any global undefined symbols that would pull in unwanted
|
||||
objects. */
|
||||
for (int i = 0; i < ehdr.e_shnum; i++)
|
||||
{
|
||||
if (sections[i].sh_type != SHT_SYMTAB)
|
||||
continue;
|
||||
|
||||
char *data = (char *)xmalloc (sections[i].sh_size);
|
||||
if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
|
||||
|| fread (data, sections[i].sh_size, 1, outfd) != 1)
|
||||
{
|
||||
free (data);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t offset = 0;
|
||||
offset < sections[i].sh_size;
|
||||
offset += sections[i].sh_entsize)
|
||||
{
|
||||
Elf64_Sym *sym = (Elf64_Sym *) (data + offset);
|
||||
int type = ELF64_ST_TYPE (sym->st_info);
|
||||
int bind = ELF64_ST_BIND (sym->st_info);
|
||||
|
||||
if (bind == STB_GLOBAL && sym->st_shndx == 0)
|
||||
sym->st_info = ELF64_ST_INFO (STB_WEAK, type);
|
||||
}
|
||||
|
||||
/* Write back our symbol changes. */
|
||||
if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
|
||||
fwrite (data, sections[i].sh_size, 1, outfd);
|
||||
|
||||
free (data);
|
||||
}
|
||||
free (sections);
|
||||
|
||||
/* Write back our header changes. */
|
||||
rewind (outfd);
|
||||
fwrite (&ehdr, sizeof (ehdr), 1, outfd);
|
||||
|
||||
fclose (outfd);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Parse an input assembler file, extract the offload tables etc.,
|
||||
and output (1) the assembler code, minus the tables (which can contain
|
||||
problematic relocations), and (2) a C file with the offload tables
|
||||
@ -538,9 +734,15 @@ main (int argc, char **argv)
|
||||
FILE *cfile = stdout;
|
||||
const char *outname = 0;
|
||||
|
||||
const char *gcn_s1_name;
|
||||
const char *gcn_s2_name;
|
||||
const char *gcn_o_name;
|
||||
const char *gcn_cfile_name;
|
||||
|
||||
progname = "mkoffload";
|
||||
diagnostic_initialize (global_dc, 0);
|
||||
|
||||
obstack_init (&files_to_cleanup);
|
||||
if (atexit (mkoffload_cleanup) != 0)
|
||||
fatal_error (input_location, "atexit failed");
|
||||
|
||||
@ -632,7 +834,14 @@ main (int argc, char **argv)
|
||||
else if (strcmp (argv[i], "-dumpbase") == 0
|
||||
&& i + 1 < argc)
|
||||
dumppfx = argv[++i];
|
||||
else if (strcmp (argv[i], "-march=fiji") == 0)
|
||||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
|
||||
else if (strcmp (argv[i], "-march=gfx900") == 0)
|
||||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
|
||||
else if (strcmp (argv[i], "-march=gfx906") == 0)
|
||||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
|
||||
}
|
||||
|
||||
if (!(fopenacc ^ fopenmp))
|
||||
fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
|
||||
|
||||
@ -693,6 +902,10 @@ main (int argc, char **argv)
|
||||
gcn_o_name = make_temp_file (".mkoffload.hsaco");
|
||||
gcn_cfile_name = make_temp_file (".c");
|
||||
}
|
||||
obstack_ptr_grow (&files_to_cleanup, gcn_s1_name);
|
||||
obstack_ptr_grow (&files_to_cleanup, gcn_s2_name);
|
||||
obstack_ptr_grow (&files_to_cleanup, gcn_o_name);
|
||||
obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name);
|
||||
|
||||
obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
|
||||
obstack_ptr_grow (&cc_argv_obstack, "");
|
||||
@ -710,6 +923,39 @@ main (int argc, char **argv)
|
||||
struct obstack ld_argv_obstack;
|
||||
obstack_init (&ld_argv_obstack);
|
||||
obstack_ptr_grow (&ld_argv_obstack, driver);
|
||||
|
||||
/* Extract early-debug information from the input objects.
|
||||
This loop finds all the inputs that end ".o" and aren't the output. */
|
||||
int dbgcount = 0;
|
||||
for (int ix = 1; ix != argc; ix++)
|
||||
{
|
||||
if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
|
||||
++ix;
|
||||
else
|
||||
{
|
||||
if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0)
|
||||
{
|
||||
char *dbgobj;
|
||||
if (save_temps)
|
||||
{
|
||||
char buf[10];
|
||||
sprintf (buf, "%d", dbgcount++);
|
||||
dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL);
|
||||
}
|
||||
else
|
||||
dbgobj = make_temp_file (".mkoffload.dbg.o");
|
||||
|
||||
/* If the copy fails then just ignore it. */
|
||||
if (copy_early_debug_info (argv[ix], dbgobj))
|
||||
{
|
||||
obstack_ptr_grow (&ld_argv_obstack, dbgobj);
|
||||
obstack_ptr_grow (&files_to_cleanup, dbgobj);
|
||||
}
|
||||
else
|
||||
free (dbgobj);
|
||||
}
|
||||
}
|
||||
}
|
||||
obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
|
||||
obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user