/* Plugin for offload execution on Intel MIC devices. Copyright (C) 2014-2015 Free Software Foundation, Inc. Contributed by Ilya Verbin . This file is part of the GNU Offloading and Multi Processing Library (libgomp). Libgomp is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* Host side part of a libgomp plugin. */ #include #include #include #include #include #include #include #include "libgomp-plugin.h" #include "compiler_if_host.h" #include "main_target_image.h" #include "gomp-constants.h" #define LD_LIBRARY_PATH_ENV "LD_LIBRARY_PATH" #define MIC_LD_LIBRARY_PATH_ENV "MIC_LD_LIBRARY_PATH" #define OFFLOAD_ACTIVE_WAIT_ENV "OFFLOAD_ACTIVE_WAIT" #ifdef DEBUG #define TRACE(...) \ { \ fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__); \ fprintf (stderr, __VA_ARGS__); \ fprintf (stderr, "\n"); \ } #else #define TRACE { } #endif /* Start/end addresses of functions and global variables on a device. */ typedef std::vector AddrVect; /* Addresses for one image and all devices. */ typedef std::vector DevAddrVect; /* Addresses for all images and all devices. */ typedef std::map ImgDevAddrMap; /* Image descriptor needed by __offload_[un]register_image. */ struct TargetImageDesc { int64_t size; /* 10 characters is enough for max int value. */ char name[sizeof ("lib0000000000.so")]; char data[]; }; /* Image descriptors, indexed by a pointer obtained from libgomp. */ typedef std::map ImgDescMap; /* Total number of available devices. */ static int num_devices; /* Total number of shared libraries with offloading to Intel MIC. */ static int num_images; /* Two dimensional array: one key is a pointer to image, second key is number of device. Contains a vector of pointer pairs. */ static ImgDevAddrMap *address_table; /* Descriptors of all images, registered in liboffloadmic. */ static ImgDescMap *image_descriptors; /* Thread-safe registration of the main image. */ static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT; static VarDesc vd_host2tgt = { { 1, 1 }, /* dst, src */ { 1, 0 }, /* in, out */ 1, /* alloc_if */ 1, /* free_if */ 4, /* align */ 0, /* mic_offset */ { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, is_stack_buf, sink_addr, alloc_disp, is_noncont_src, is_noncont_dst */ 0, /* offset */ 0, /* size */ 1, /* count */ 0, /* alloc */ 0, /* into */ 0 /* ptr */ }; static VarDesc vd_tgt2host = { { 1, 1 }, /* dst, src */ { 0, 1 }, /* in, out */ 1, /* alloc_if */ 1, /* free_if */ 4, /* align */ 0, /* mic_offset */ { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, is_stack_buf, sink_addr, alloc_disp, is_noncont_src, is_noncont_dst */ 0, /* offset */ 0, /* size */ 1, /* count */ 0, /* alloc */ 0, /* into */ 0 /* ptr */ }; __attribute__((constructor)) static void init (void) { const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV); const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV); const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV); /* Disable active wait by default to avoid useless CPU usage. */ if (!active_wait) setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0); if (!ld_lib_path) goto out; /* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is required by liboffloadmic. */ if (!mic_lib_path) setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1); else { size_t len = strlen (mic_lib_path) + strlen (ld_lib_path) + 2; bool use_alloca = len <= 2048; char *mic_lib_path_new = (char *) (use_alloca ? alloca (len) : malloc (len)); if (!mic_lib_path_new) { fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); exit (1); } sprintf (mic_lib_path_new, "%s:%s", mic_lib_path, ld_lib_path); setenv (MIC_LD_LIBRARY_PATH_ENV, mic_lib_path_new, 1); if (!use_alloca) free (mic_lib_path_new); } out: address_table = new ImgDevAddrMap; image_descriptors = new ImgDescMap; num_devices = _Offload_number_of_devices (); } extern "C" const char * GOMP_OFFLOAD_get_name (void) { const char *res = "intelmic"; TRACE ("(): return %s", res); return res; } extern "C" unsigned int GOMP_OFFLOAD_get_caps (void) { unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400; TRACE ("(): return %x", res); return res; } extern "C" enum offload_target_type GOMP_OFFLOAD_get_type (void) { enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC; TRACE ("(): return %d", res); return res; } extern "C" int GOMP_OFFLOAD_get_num_devices (void) { TRACE ("(): return %d", num_devices); return num_devices; } static void offload (const char *file, uint64_t line, int device, const char *name, int num_vars, VarDesc *vars, const void **async_data) { OFFLOAD ofld = __offload_target_acquire1 (&device, file, line); if (ofld) { if (async_data == NULL) __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0, NULL, NULL); else { OffloadFlags flags; flags.flags = 0; flags.bits.omp_async = 1; __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0, NULL, async_data, 0, NULL, flags, NULL); } } else { fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line); exit (1); } } static void unregister_main_image () { __offload_unregister_image (&main_target_image); } static void register_main_image () { /* Do not check the return value, because old versions of liboffloadmic did not have return values. */ __offload_register_image (&main_target_image); /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when asynchronous task on target is completed. */ __offload_register_task_callback (GOMP_PLUGIN_target_task_completion); if (atexit (unregister_main_image) != 0) { fprintf (stderr, "%s: atexit failed\n", __FILE__); exit (1); } } /* liboffloadmic loads and runs offload_target_main on all available devices during a first call to offload (). */ extern "C" void GOMP_OFFLOAD_init_device (int device) { TRACE ("(device = %d)", device); pthread_once (&main_image_is_registered, register_main_image); offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0, NULL, NULL); } extern "C" void GOMP_OFFLOAD_fini_device (int device) { TRACE ("(device = %d)", device); /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400. */ abort (); } static void get_target_table (int device, int &num_funcs, int &num_vars, void **&table) { VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host }; vd1[0].ptr = &num_funcs; vd1[0].size = sizeof (num_funcs); vd1[1].ptr = &num_vars; vd1[1].size = sizeof (num_vars); offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2, vd1, NULL); int table_size = num_funcs + 2 * num_vars; if (table_size > 0) { table = new void * [table_size]; VarDesc vd2; vd2 = vd_tgt2host; vd2.ptr = table; vd2.size = table_size * sizeof (void *); offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1, &vd2, NULL); } } /* Offload TARGET_IMAGE to all available devices and fill address_table with corresponding target addresses. */ static void offload_image (const void *target_image) { void *image_start = ((void **) target_image)[0]; void *image_end = ((void **) target_image)[1]; TRACE ("(target_image = %p { %p, %p })", target_image, image_start, image_end); int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start; TargetImageDesc *image = (TargetImageDesc *) malloc (offsetof (TargetImageDesc, data) + image_size); if (!image) { fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); exit (1); } image->size = image_size; sprintf (image->name, "lib%010d.so", num_images++); memcpy (image->data, image_start, image->size); TRACE ("() __offload_register_image %s { %p, %d }", image->name, image_start, image->size); /* Do not check the return value, because old versions of liboffloadmic did not have return values. */ __offload_register_image (image); /* Receive tables for target_image from all devices. */ DevAddrVect dev_table; for (int dev = 0; dev < num_devices; dev++) { int num_funcs = 0; int num_vars = 0; void **table = NULL; get_target_table (dev, num_funcs, num_vars, table); AddrVect curr_dev_table; for (int i = 0; i < num_funcs; i++) { addr_pair tgt_addr; tgt_addr.start = (uintptr_t) table[i]; tgt_addr.end = tgt_addr.start + 1; TRACE ("() func %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end); curr_dev_table.push_back (tgt_addr); } for (int i = 0; i < num_vars; i++) { addr_pair tgt_addr; tgt_addr.start = (uintptr_t) table[num_funcs+i*2]; tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1]; TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end); curr_dev_table.push_back (tgt_addr); } dev_table.push_back (curr_dev_table); delete [] table; } address_table->insert (std::make_pair (target_image, dev_table)); image_descriptors->insert (std::make_pair (target_image, image)); } /* Return the libgomp version number we're compatible with. There is no requirement for cross-version compatibility. */ extern "C" unsigned GOMP_OFFLOAD_version (void) { return GOMP_VERSION; } extern "C" int GOMP_OFFLOAD_load_image (int device, const unsigned version, void *target_image, addr_pair **result) { TRACE ("(device = %d, target_image = %p)", device, target_image); if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC) GOMP_PLUGIN_fatal ("Offload data incompatible with intelmic plugin" " (expected %u, received %u)", GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version)); /* If target_image is already present in address_table, then there is no need to offload it. */ if (address_table->count (target_image) == 0) offload_image (target_image); AddrVect *curr_dev_table = &(*address_table)[target_image][device]; int table_size = curr_dev_table->size (); addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair)); if (table == NULL) { fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); exit (1); } std::copy (curr_dev_table->begin (), curr_dev_table->end (), table); *result = table; return table_size; } extern "C" void GOMP_OFFLOAD_unload_image (int device, unsigned version, const void *target_image) { if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC) return; TRACE ("(device = %d, target_image = %p)", device, target_image); /* liboffloadmic unloads the image from all available devices. */ if (image_descriptors->count (target_image) > 0) { TargetImageDesc *image_desc = (*image_descriptors)[target_image]; __offload_unregister_image (image_desc); free (image_desc); address_table->erase (target_image); image_descriptors->erase (target_image); } } extern "C" void * GOMP_OFFLOAD_alloc (int device, size_t size) { TRACE ("(device = %d, size = %d)", device, size); void *tgt_ptr; VarDesc vd[2] = { vd_host2tgt, vd_tgt2host }; vd[0].ptr = &size; vd[0].size = sizeof (size); vd[1].ptr = &tgt_ptr; vd[1].size = sizeof (void *); offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd, NULL); return tgt_ptr; } extern "C" void GOMP_OFFLOAD_free (int device, void *tgt_ptr) { TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr); VarDesc vd = vd_host2tgt; vd.ptr = &tgt_ptr; vd.size = sizeof (void *); offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd, NULL); } extern "C" void * GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr, size_t size) { TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)", device, tgt_ptr, host_ptr, size); if (!size) return tgt_ptr; VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; vd1[0].ptr = &tgt_ptr; vd1[0].size = sizeof (void *); vd1[1].ptr = &size; vd1[1].size = sizeof (size); offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2, vd1, NULL); VarDesc vd2 = vd_host2tgt; vd2.ptr = (void *) host_ptr; vd2.size = size; offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1, &vd2, NULL); return tgt_ptr; } extern "C" void * GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr, size_t size) { TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)", device, host_ptr, tgt_ptr, size); if (!size) return host_ptr; VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; vd1[0].ptr = &tgt_ptr; vd1[0].size = sizeof (void *); vd1[1].ptr = &size; vd1[1].size = sizeof (size); offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2, vd1, NULL); VarDesc vd2 = vd_tgt2host; vd2.ptr = (void *) host_ptr; vd2.size = size; offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1, &vd2, NULL); return host_ptr; } extern "C" void * GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr, size_t size) { TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)", device, dst_ptr, src_ptr, size); if (!size) return dst_ptr; VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt }; vd[0].ptr = &dst_ptr; vd[0].size = sizeof (void *); vd[1].ptr = &src_ptr; vd[1].size = sizeof (void *); vd[2].ptr = &size; vd[2].size = sizeof (size); offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd, NULL); return dst_ptr; } extern "C" void GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, void *async_data) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device, tgt_fn, tgt_vars, async_data); VarDesc vd[2] = { vd_host2tgt, vd_host2tgt }; vd[0].ptr = &tgt_fn; vd[0].size = sizeof (void *); vd[1].ptr = &tgt_vars; vd[1].size = sizeof (void *); offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd, (const void **) async_data); } extern "C" void GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars); GOMP_OFFLOAD_async_run (device, tgt_fn, tgt_vars, NULL); }