libctf: add CU-mapping machinery

Once the deduplicator is capable of actually detecting conflicting types
with the same name (i.e., not yet) we will place such conflicting types,
and types that depend on them, into CTF dictionaries that are the child
of the main dictionary we usually emit: currently, this will lead to the
.ctf section becoming a CTF archive rather than a single dictionary,
with the default-named archive member (_CTF_SECTION, or NULL) being the
main shared dictionary with most of the types in it.

By default, the sections are named after the compilation unit they come
from (complete path and all), with the cuname field in the CTF header
providing further evidence of the name without requiring the caller to
engage in tiresome parsing.  But some callers may not wish the mapping
from input CU to output sub-dictionary to be purely CU-based.

The machinery here allows this to be freely changed, in two ways:

 - callers can call ctf_link_add_cu_mapping to specify that a single
   input compilation unit should have its types placed in some other CU
   if they conflict: the CU will always be created, even if empty, so
   the consuming program can depend on its existence.  You can map
   multiple input CUs to one output CU to force all their types to be
   merged together: if some of *those* types conflict, the behaviour is
   currently unspecified (the new deduplicator will specify it).

 - callers can call ctf_link_set_memb_name_changer to provide a function
   which is passed every CTF sub-dictionary name in turn (including
   _CTF_SECTION) and can return a new name, or NULL if no change is
   desired.  The mapping from input to output names should not map two
   input names to the same output name: if this happens, the two are not
   merged but will result in an archive with two members with the same
   name (technically valid, but it's hard to access the second
   same-named member: you have to do an iteration over archive members).

This is used by the kernel's ctfarchive machinery (not yet upstream) to
encode CTF under member names like {module name}.ctf rather than
.ctf.CU, but it is anticipated that other large projects may wish to
have their own storage for CTF outside of .ctf sections and may wish to
have new naming schemes that suit their special-purpose consumers.

New in v3.
v4: check for strdup failure.
v5: fix tabdamage.

include/
	* ctf-api.h (ctf_link_add_cu_mapping): New.
	(ctf_link_memb_name_changer_f): New.
	(ctf_link_set_memb_name_changer): New.

libctf/
	* ctf-impl.h (ctf_file_t) <ctf_link_cu_mappping>: New.
	<ctf_link_memb_name_changer>: Likewise.
	<ctf_link_memb_name_changer_arg>: Likewise.
	* ctf-create.c (ctf_update): Update accordingly.
	* ctf-open.c (ctf_file_close): Likewise.
	* ctf-link.c (ctf_create_per_cu): Apply the cu mapping.
	(ctf_link_add_cu_mapping): New.
	(ctf_link_set_memb_name_changer): Likewise.
	(ctf_change_parent_name): New.
	(ctf_name_list_accum_cb_arg_t) <dynames>: New, storage for names
	allocated by the caller's ctf_link_memb_name_changer.
	<ndynames>: Likewise.
	(ctf_accumulate_archive_names): Call the ctf_link_memb_name_changer.
	(ctf_link_write): Likewise (for _CTF_SECTION only): also call
	ctf_change_parent_name.  Free any resulting names.
This commit is contained in:
Nick Alcock 2019-07-20 14:44:44 +01:00
parent eabb7154df
commit 49ea9b450b
7 changed files with 210 additions and 4 deletions

View File

@ -1,3 +1,9 @@
2019-07-30 Nick Alcock <nick.alcock@oracle.com>
* ctf-api.h (ctf_link_add_cu_mapping): New.
(ctf_link_memb_name_changer_f): New.
(ctf_link_set_memb_name_changer): New.
2019-07-13 Nick Alcock <nick.alcock@oracle.com>
* ctf-api.h (ECTF_INTERNAL): New.

View File

@ -421,6 +421,16 @@ extern int ctf_link_shuffle_syms (ctf_file_t *, ctf_link_iter_symbol_f *,
extern unsigned char *ctf_link_write (ctf_file_t *, size_t *size,
size_t threshold);
/* Specialist linker functions. These functions are not used by ld, but can be
used by other prgorams making use of the linker machinery for other purposes
to customize its output. */
extern int ctf_link_add_cu_mapping (ctf_file_t *, const char *from,
const char *to);
typedef char *ctf_link_memb_name_changer_f (ctf_file_t *,
const char *, void *);
extern void ctf_link_set_memb_name_changer
(ctf_file_t *, ctf_link_memb_name_changer_f *, void *);
extern void ctf_setdebug (int debug);
extern int ctf_getdebug (void);

View File

@ -1,3 +1,21 @@
2019-07-30 Nick Alcock <nick.alcock@oracle.com>
* ctf-impl.h (ctf_file_t) <ctf_link_cu_mappping>: New.
<ctf_link_memb_name_changer>: Likewise.
<ctf_link_memb_name_changer_arg>: Likewise.
* ctf-create.c (ctf_update): Update accordingly.
* ctf-open.c (ctf_file_close): Likewise.
* ctf-link.c (ctf_create_per_cu): Apply the cu mapping.
(ctf_link_add_cu_mapping): New.
(ctf_link_set_memb_name_changer): Likewise.
(ctf_change_parent_name): New.
(ctf_name_list_accum_cb_arg_t) <dynames>: New, storage for names
allocated by the caller's ctf_link_memb_name_changer.
<ndynames>: Likewise.
(ctf_accumulate_archive_names): Call the ctf_link_memb_name_changer.
(ctf_link_write): Likewise (for _CTF_SECTION only): also call
ctf_change_parent_name. Free any resulting names.
2019-07-13 Nick Alcock <nick.alcock@oracle.com>
* ctf-link.c (ctf_create_per_cu): New, refactored out of...

View File

@ -473,7 +473,10 @@ ctf_update (ctf_file_t *fp)
nfp->ctf_link_inputs = fp->ctf_link_inputs;
nfp->ctf_link_outputs = fp->ctf_link_outputs;
nfp->ctf_syn_ext_strtab = fp->ctf_syn_ext_strtab;
nfp->ctf_link_cu_mapping = fp->ctf_link_cu_mapping;
nfp->ctf_link_type_mapping = fp->ctf_link_type_mapping;
nfp->ctf_link_memb_name_changer = fp->ctf_link_memb_name_changer;
nfp->ctf_link_memb_name_changer_arg = fp->ctf_link_memb_name_changer_arg;
nfp->ctf_snapshot_lu = fp->ctf_snapshots;
@ -486,6 +489,7 @@ ctf_update (ctf_file_t *fp)
fp->ctf_link_inputs = NULL;
fp->ctf_link_outputs = NULL;
fp->ctf_syn_ext_strtab = NULL;
fp->ctf_link_cu_mapping = NULL;
fp->ctf_link_type_mapping = NULL;
fp->ctf_dvhash = NULL;

View File

@ -281,6 +281,10 @@ struct ctf_file
ctf_dynhash_t *ctf_link_inputs; /* Inputs to this link. */
ctf_dynhash_t *ctf_link_outputs; /* Additional outputs from this link. */
ctf_dynhash_t *ctf_link_type_mapping; /* Map input types to output types. */
ctf_dynhash_t *ctf_link_cu_mapping; /* Map CU names to CTF dict names. */
/* Allow the caller to Change the name of link archive members. */
ctf_link_memb_name_changer_f *ctf_link_memb_name_changer;
void *ctf_link_memb_name_changer_arg; /* Argument for it. */
char *ctf_tmp_typeslice; /* Storage for slicing up type names. */
size_t ctf_tmp_typeslicelen; /* Size of the typeslice. */
void *ctf_specific; /* Data for ctf_get/setspecific(). */

View File

@ -182,9 +182,26 @@ static ctf_file_t *
ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
{
ctf_file_t *cu_fp;
const char *ctf_name = NULL;
char *dynname = NULL;
if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, filename)) == NULL)
/* First, check the mapping table and translate the per-CU name we use
accordingly. We check both the input filename and the CU name. Only if
neither are set do we fall back to the input filename as the per-CU
dictionary name. We prefer the filename because this is easier for likely
callers to determine. */
if (fp->ctf_link_cu_mapping)
{
if (((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, filename)) == NULL) &&
((ctf_name = ctf_dynhash_lookup (fp->ctf_link_cu_mapping, cuname)) == NULL))
ctf_name = filename;
}
if (ctf_name == NULL)
ctf_name = filename;
if ((cu_fp = ctf_dynhash_lookup (fp->ctf_link_outputs, ctf_name)) == NULL)
{
int err;
@ -197,7 +214,7 @@ ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
return NULL;
}
if ((dynname = strdup (filename)) == NULL)
if ((dynname = strdup (ctf_name)) == NULL)
goto oom;
if (ctf_dynhash_insert (fp->ctf_link_outputs, dynname, cu_fp) < 0)
goto oom;
@ -215,6 +232,79 @@ ctf_create_per_cu (ctf_file_t *fp, const char *filename, const char *cuname)
return NULL;
}
/* Add a mapping directing that the CU named FROM should have its
conflicting/non-duplicate types (depending on link mode) go into a container
named TO. Many FROMs can share a TO: in this case, the effect on conflicting
types is not yet defined (but in time an auto-renaming algorithm will be
added: ugly, but there is really no right thing one can do in this
situation).
We forcibly add a container named TO in every case, even though it may well
wind up empty, because clients that use this facility usually expect to find
every TO container present, even if empty, and malfunction otherwise. */
int
ctf_link_add_cu_mapping (ctf_file_t *fp, const char *from, const char *to)
{
int err;
char *f, *t;
if (fp->ctf_link_cu_mapping == NULL)
fp->ctf_link_cu_mapping = ctf_dynhash_create (ctf_hash_string,
ctf_hash_eq_string, free,
free);
if (fp->ctf_link_cu_mapping == NULL)
return ctf_set_errno (fp, ENOMEM);
if (fp->ctf_link_outputs == NULL)
fp->ctf_link_outputs = ctf_dynhash_create (ctf_hash_string,
ctf_hash_eq_string, free,
ctf_file_close_thunk);
if (fp->ctf_link_outputs == NULL)
return ctf_set_errno (fp, ENOMEM);
f = strdup (from);
t = strdup (to);
if (!f || !t)
goto oom;
if (ctf_create_per_cu (fp, t, t) == NULL)
goto oom_noerrno; /* Errno is set for us. */
err = ctf_dynhash_insert (fp->ctf_link_cu_mapping, f, t);
if (err)
{
ctf_set_errno (fp, err);
goto oom_noerrno;
}
return 0;
oom:
ctf_set_errno (fp, errno);
oom_noerrno:
free (f);
free (t);
return -1;
}
/* Set a function which is called to transform the names of archive members.
This is useful for applying regular transformations to many names, where
ctf_link_add_cu_mapping applies arbitrarily irregular changes to single
names. The member name changer is applied at ctf_link_write time, so it
cannot conflate multiple CUs into one the way ctf_link_add_cu_mapping can.
The changer function accepts a name and should return a new
dynamically-allocated name, or NULL if the name should be left unchanged. */
void
ctf_link_set_memb_name_changer (ctf_file_t *fp,
ctf_link_memb_name_changer_f *changer,
void *arg)
{
fp->ctf_link_memb_name_changer = changer;
fp->ctf_link_memb_name_changer_arg = arg;
}
typedef struct ctf_link_in_member_cb_arg
{
ctf_file_t *out_fp;
@ -266,7 +356,7 @@ ctf_link_one_type (ctf_id_t type, int isroot _libctf_unused_, void *arg_)
ctf_set_errno (arg->out_fp, 0);
}
if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname,
if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name,
arg->cu_name)) == NULL)
return -1; /* Errno is set for us. */
@ -347,7 +437,7 @@ ctf_link_one_variable (const char *name, ctf_id_t type, void *arg_)
type only present in the child. Try adding to the child, creating if need
be. */
if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->arcname,
if ((per_cu_out_fp = ctf_create_per_cu (arg->out_fp, arg->file_name,
arg->cu_name)) == NULL)
return -1; /* Errno is set for us. */
@ -589,6 +679,8 @@ typedef struct ctf_name_list_accum_cb_arg
ctf_file_t *fp;
ctf_file_t **files;
size_t i;
char **dynames;
size_t ndynames;
} ctf_name_list_accum_cb_arg_t;
/* Accumulate the names and a count of the names in the link output hash,
@ -622,12 +714,51 @@ ctf_accumulate_archive_names (void *key, void *value, void *arg_)
ctf_set_errno (arg->fp, ENOMEM);
return;
}
/* Allow the caller to get in and modify the name at the last minute. If the
caller *does* modify the name, we have to stash away the new name the
caller returned so we can free it later on. (The original name is the key
of the ctf_link_outputs hash and is freed by the dynhash machinery.) */
if (fp->ctf_link_memb_name_changer)
{
char **dynames;
char *dyname;
void *nc_arg = fp->ctf_link_memb_name_changer_arg;
dyname = fp->ctf_link_memb_name_changer (fp, name, nc_arg);
if (dyname != NULL)
{
if ((dynames = realloc (arg->dynames,
sizeof (char *) * ++(arg->ndynames))) == NULL)
{
(arg->ndynames)--;
ctf_set_errno (arg->fp, ENOMEM);
return;
}
arg->dynames = dynames;
name = (const char *) dyname;
}
}
arg->names = names;
arg->names[(arg->i) - 1] = (char *) name;
arg->files = files;
arg->files[(arg->i) - 1] = fp;
}
/* Change the name of the parent CTF section, if the name transformer has got to
it. */
static void
ctf_change_parent_name (void *key _libctf_unused_, void *value, void *arg)
{
ctf_file_t *fp = (ctf_file_t *) value;
const char *name = (const char *) arg;
ctf_parent_name_set (fp, name);
}
/* Write out a CTF archive (if there are per-CU CTF files) or a CTF file
(otherwise) into a new dynamically-allocated string, and return it.
Members with sizes above THRESHOLD are compressed. */
@ -636,6 +767,7 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
{
ctf_name_list_accum_cb_arg_t arg;
char **names;
char *transformed_name = NULL;
ctf_file_t **files;
FILE *f = NULL;
int err;
@ -675,7 +807,22 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
}
arg.names = names;
memmove (&(arg.names[1]), arg.names, sizeof (char *) * (arg.i));
arg.names[0] = (char *) _CTF_SECTION;
if (fp->ctf_link_memb_name_changer)
{
void *nc_arg = fp->ctf_link_memb_name_changer_arg;
transformed_name = fp->ctf_link_memb_name_changer (fp, _CTF_SECTION,
nc_arg);
if (transformed_name != NULL)
{
arg.names[0] = transformed_name;
ctf_dynhash_iter (fp->ctf_link_outputs, ctf_change_parent_name,
transformed_name);
}
}
if ((files = realloc (arg.files,
sizeof (struct ctf_file *) * (arg.i + 1))) == NULL)
@ -736,6 +883,14 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
*size = fsize;
free (arg.names);
free (arg.files);
free (transformed_name);
if (arg.ndynames)
{
size_t i;
for (i = 0; i < arg.ndynames; i++)
free (arg.dynames[i]);
free (arg.dynames);
}
return buf;
err_no:
@ -746,6 +901,14 @@ ctf_link_write (ctf_file_t *fp, size_t *size, size_t threshold)
fclose (f);
free (arg.names);
free (arg.files);
free (transformed_name);
if (arg.ndynames)
{
size_t i;
for (i = 0; i < arg.ndynames; i++)
free (arg.dynames[i]);
free (arg.dynames);
}
ctf_dprintf ("Cannot write archive in link: %s failure: %s\n", errloc,
ctf_errmsg (ctf_errno (fp)));
return NULL;

View File

@ -1628,6 +1628,7 @@ ctf_file_close (ctf_file_t *fp)
ctf_dynhash_destroy (fp->ctf_link_inputs);
ctf_dynhash_destroy (fp->ctf_link_outputs);
ctf_dynhash_destroy (fp->ctf_link_type_mapping);
ctf_dynhash_destroy (fp->ctf_link_cu_mapping);
ctf_free (fp->ctf_sxlate);
ctf_free (fp->ctf_txlate);