Auto merge of #57937 - denzp:nvptx, r=nagisa

NVPTX target specification

This change adds a built-in `nvptx64-nvidia-cuda` GPGPU no-std target specification and a basic PTX assembly smoke tests.

The approach is taken here and the target spec is based on `ptx-linker`, a project started about 1.5 years ago. Key feature: bitcode object files being linked with LTO into the final module on the linker's side.

Prior to this change, the linker used a `ld` linker-flavor, but I think, having the special CLI convention is a more reliable way.

Questions about further progress on reliable CUDA workflow with Rust:
1. Is it possible to create a test suite `codegen-asm` to verify end-to-end integration with LLVM backend?
1. How would it be better to organise no-std `compile-fail` tests: add `#![no_std]` where possible and mark others as `ignore-nvptx` directive, or alternatively, introduce `compile-fail-no-std` test suite?
1. Can we have the `ptx-linker` eventually be integrated as `rls` or `clippy`? Hopefully, this should allow to statically link against LLVM used in Rust and get rid of the [current hacky solution](https://github.com/denzp/rustc-llvm-proxy).
1. Am I missing some methods from `rustc_codegen_ssa:🔙:linker::Linker` that can be useful for bitcode-only linking?

Currently, there are no major public CUDA projects written in Rust I'm aware of, but I'm expecting to have a built-in target will create a solid foundation for further experiments and awesome crates.

Related to #38789
Fixes #38787
Fixes #38786
This commit is contained in:
bors 2019-02-01 23:43:34 +00:00
commit 2efa31b2d9
19 changed files with 489 additions and 67 deletions

View File

@ -168,7 +168,7 @@ matrix:
if: branch = auto
- env: IMAGE=i686-gnu-nopt
if: branch = auto
- env: IMAGE=wasm32-unknown
- env: IMAGE=test-various
if: branch = auto
- env: IMAGE=x86_64-gnu
if: branch = auto

View File

@ -831,6 +831,7 @@ impl Build {
!target.contains("msvc") &&
!target.contains("emscripten") &&
!target.contains("wasm32") &&
!target.contains("nvptx") &&
!target.contains("fuchsia") {
Some(self.cc(target))
} else {

View File

@ -156,7 +156,7 @@ pub fn check(build: &mut Build) {
panic!("the iOS target is only supported on macOS");
}
if target.contains("-none-") {
if target.contains("-none-") || target.contains("nvptx") {
if build.no_std(*target).is_none() {
let target = build.config.target_config.entry(target.clone())
.or_default();
@ -165,7 +165,7 @@ pub fn check(build: &mut Build) {
}
if build.no_std(*target) == Some(false) {
panic!("All the *-none-* targets are no-std targets")
panic!("All the *-none-* and nvptx* targets are no-std targets")
}
}

View File

@ -70,6 +70,7 @@ ENV TARGETS=$TARGETS,x86_64-sun-solaris
ENV TARGETS=$TARGETS,x86_64-unknown-linux-gnux32
ENV TARGETS=$TARGETS,x86_64-unknown-cloudabi
ENV TARGETS=$TARGETS,x86_64-fortanix-unknown-sgx
ENV TARGETS=$TARGETS,nvptx64-nvidia-cuda
ENV X86_FORTANIX_SGX_LIBS="/x86_64-fortanix-unknown-sgx/lib/"

View File

@ -13,14 +13,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
gdb \
xz-utils
# FIXME: build the `ptx-linker` instead.
RUN curl -sL https://github.com/denzp/rust-ptx-linker/releases/download/v0.9.0-alpha.2/rust-ptx-linker.linux64.tar.gz | \
tar -xzvC /usr/bin
RUN curl -sL https://nodejs.org/dist/v9.2.0/node-v9.2.0-linux-x64.tar.xz | \
tar -xJ
tar -xJ
COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh
ENV TARGETS=wasm32-unknown-unknown
ENV RUST_CONFIGURE_ARGS \
--set build.nodejs=/node-v9.2.0-linux-x64/bin/node \
--set rust.lld
@ -31,11 +33,18 @@ ENV RUST_CONFIGURE_ARGS \
# other contexts as well
ENV NO_DEBUG_ASSERTIONS=1
ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \
ENV WASM_TARGETS=wasm32-unknown-unknown
ENV WASM_SCRIPT python2.7 /checkout/x.py test --target $WASM_TARGETS \
src/test/run-make \
src/test/ui \
src/test/run-pass \
src/test/compile-fail \
src/test/mir-opt \
src/test/codegen-units \
src/libcore \
src/libcore
ENV NVPTX_TARGETS=nvptx64-nvidia-cuda
ENV NVPTX_SCRIPT python2.7 /checkout/x.py test --target $NVPTX_TARGETS \
src/test/run-make
ENV SCRIPT $WASM_SCRIPT && $NVPTX_SCRIPT

View File

@ -1675,6 +1675,12 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
}
false
}
/// Determine whether identifiers in the assembly have strict naming rules.
/// Currently, only NVPTX* targets need it.
pub fn has_strict_asm_symbol_naming(&self) -> bool {
self.gcx.sess.target.target.arch.contains("nvptx")
}
}
impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> {

View File

@ -149,6 +149,7 @@ pub fn linker_and_flavor(sess: &Session) -> (PathBuf, LinkerFlavor) {
LinkerFlavor::Ld => "ld",
LinkerFlavor::Msvc => "link.exe",
LinkerFlavor::Lld(_) => "lld",
LinkerFlavor::PtxLinker => "rust-ptx-linker",
}), flavor)),
(Some(linker), None) => {
let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") {

View File

@ -13,7 +13,7 @@ use rustc::hir::def_id::{LOCAL_CRATE, CrateNum};
use rustc::middle::dependency_format::Linkage;
use rustc::session::Session;
use rustc::session::config::{self, CrateType, OptLevel, DebugInfo,
CrossLangLto};
CrossLangLto, Lto};
use rustc::ty::TyCtxt;
use rustc_target::spec::{LinkerFlavor, LldFlavor};
use serialize::{json, Encoder};
@ -83,6 +83,10 @@ impl LinkerInfo {
LinkerFlavor::Lld(LldFlavor::Wasm) => {
Box::new(WasmLd::new(cmd, sess, self)) as Box<dyn Linker>
}
LinkerFlavor::PtxLinker => {
Box::new(PtxLinker { cmd, sess }) as Box<dyn Linker>
}
}
}
}
@ -1080,3 +1084,129 @@ fn exported_symbols(tcx: TyCtxt, crate_type: CrateType) -> Vec<String> {
symbols
}
/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
/// with bitcode and uses LLVM backend to generate a PTX assembly.
pub struct PtxLinker<'a> {
cmd: Command,
sess: &'a Session,
}
impl<'a> Linker for PtxLinker<'a> {
fn link_rlib(&mut self, path: &Path) {
self.cmd.arg("--rlib").arg(path);
}
fn link_whole_rlib(&mut self, path: &Path) {
self.cmd.arg("--rlib").arg(path);
}
fn include_path(&mut self, path: &Path) {
self.cmd.arg("-L").arg(path);
}
fn debuginfo(&mut self) {
self.cmd.arg("--debug");
}
fn add_object(&mut self, path: &Path) {
self.cmd.arg("--bitcode").arg(path);
}
fn args(&mut self, args: &[String]) {
self.cmd.args(args);
}
fn optimize(&mut self) {
match self.sess.lto() {
Lto::Thin | Lto::Fat | Lto::ThinLocal => {
self.cmd.arg("-Olto");
},
Lto::No => { },
};
}
fn output_filename(&mut self, path: &Path) {
self.cmd.arg("-o").arg(path);
}
fn finalize(&mut self) -> Command {
// Provide the linker with fallback to internal `target-cpu`.
self.cmd.arg("--fallback-arch").arg(match self.sess.opts.cg.target_cpu {
Some(ref s) => s,
None => &self.sess.target.target.options.cpu
});
::std::mem::replace(&mut self.cmd, Command::new(""))
}
fn link_dylib(&mut self, _lib: &str) {
panic!("external dylibs not supported")
}
fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) {
panic!("external dylibs not supported")
}
fn link_staticlib(&mut self, _lib: &str) {
panic!("staticlibs not supported")
}
fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) {
panic!("staticlibs not supported")
}
fn framework_path(&mut self, _path: &Path) {
panic!("frameworks not supported")
}
fn link_framework(&mut self, _framework: &str) {
panic!("frameworks not supported")
}
fn position_independent_executable(&mut self) {
}
fn full_relro(&mut self) {
}
fn partial_relro(&mut self) {
}
fn no_relro(&mut self) {
}
fn build_static_executable(&mut self) {
}
fn gc_sections(&mut self, _keep_metadata: bool) {
}
fn pgo_gen(&mut self) {
}
fn no_default_libraries(&mut self) {
}
fn build_dylib(&mut self, _out_filename: &Path) {
}
fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) {
}
fn subsystem(&mut self, _subsystem: &str) {
}
fn no_position_independent_executable(&mut self) {
}
fn group_start(&mut self) {
}
fn group_end(&mut self) {
}
fn cross_lang_lto(&mut self) {
}
}

View File

@ -12,6 +12,7 @@
#![feature(nll)]
#![allow(unused_attributes)]
#![feature(rustc_diagnostic_macros)]
#![feature(in_band_lifetimes)]
#![recursion_limit="256"]

View File

@ -221,7 +221,7 @@ fn get_symbol_hash<'a, 'tcx>(
}
fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName {
let mut buffer = SymbolPathBuffer::new();
let mut buffer = SymbolPathBuffer::new(tcx);
item_path::with_forced_absolute_paths(|| {
tcx.push_item_path(&mut buffer, def_id, false);
});
@ -317,7 +317,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id));
let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx);
if instance.is_vtable_shim() {
buf.push("{{vtable-shim}}");
@ -343,22 +343,25 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
struct SymbolPathBuffer {
result: String,
temp_buf: String,
strict_naming: bool,
}
impl SymbolPathBuffer {
fn new() -> Self {
fn new(tcx: TyCtxt<'_, '_, '_>) -> Self {
let mut result = SymbolPathBuffer {
result: String::with_capacity(64),
temp_buf: String::with_capacity(16),
strict_naming: tcx.has_strict_asm_symbol_naming(),
};
result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested
result
}
fn from_interned(symbol: ty::SymbolName) -> Self {
fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'_, '_, '_>) -> Self {
let mut result = SymbolPathBuffer {
result: String::with_capacity(64),
temp_buf: String::with_capacity(16),
strict_naming: tcx.has_strict_asm_symbol_naming(),
};
result.result.push_str(&symbol.as_str());
result
@ -375,6 +378,79 @@ impl SymbolPathBuffer {
let _ = write!(self.result, "17h{:016x}E", hash);
self.result
}
// Name sanitation. LLVM will happily accept identifiers with weird names, but
// gas doesn't!
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
// NVPTX assembly has more strict naming rules than gas, so additionally, dots
// are replaced with '$' there.
fn sanitize_and_append(&mut self, s: &str) {
self.temp_buf.clear();
for c in s.chars() {
match c {
// Escape these with $ sequences
'@' => self.temp_buf.push_str("$SP$"),
'*' => self.temp_buf.push_str("$BP$"),
'&' => self.temp_buf.push_str("$RF$"),
'<' => self.temp_buf.push_str("$LT$"),
'>' => self.temp_buf.push_str("$GT$"),
'(' => self.temp_buf.push_str("$LP$"),
')' => self.temp_buf.push_str("$RP$"),
',' => self.temp_buf.push_str("$C$"),
'-' | ':' => if self.strict_naming {
// NVPTX doesn't support these characters in symbol names.
self.temp_buf.push('$')
}
else {
// '.' doesn't occur in types and functions, so reuse it
// for ':' and '-'
self.temp_buf.push('.')
},
'.' => if self.strict_naming {
self.temp_buf.push('$')
}
else {
self.temp_buf.push('.')
},
// These are legal symbols
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => self.temp_buf.push(c),
_ => {
self.temp_buf.push('$');
for c in c.escape_unicode().skip(1) {
match c {
'{' => {}
'}' => self.temp_buf.push('$'),
c => self.temp_buf.push(c),
}
}
}
}
}
let need_underscore = {
// Underscore-qualify anything that didn't start as an ident.
!self.temp_buf.is_empty()
&& self.temp_buf.as_bytes()[0] != '_' as u8
&& !(self.temp_buf.as_bytes()[0] as char).is_xid_start()
};
let _ = write!(
self.result,
"{}",
self.temp_buf.len() + (need_underscore as usize)
);
if need_underscore {
self.result.push('_');
}
self.result.push_str(&self.temp_buf);
}
}
impl ItemPathBuffer for SymbolPathBuffer {
@ -384,59 +460,6 @@ impl ItemPathBuffer for SymbolPathBuffer {
}
fn push(&mut self, text: &str) {
self.temp_buf.clear();
let need_underscore = sanitize(&mut self.temp_buf, text);
let _ = write!(
self.result,
"{}",
self.temp_buf.len() + (need_underscore as usize)
);
if need_underscore {
self.result.push('_');
}
self.result.push_str(&self.temp_buf);
self.sanitize_and_append(text);
}
}
// Name sanitation. LLVM will happily accept identifiers with weird names, but
// gas doesn't!
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
//
// returns true if an underscore must be added at the start
pub fn sanitize(result: &mut String, s: &str) -> bool {
for c in s.chars() {
match c {
// Escape these with $ sequences
'@' => result.push_str("$SP$"),
'*' => result.push_str("$BP$"),
'&' => result.push_str("$RF$"),
'<' => result.push_str("$LT$"),
'>' => result.push_str("$GT$"),
'(' => result.push_str("$LP$"),
')' => result.push_str("$RP$"),
',' => result.push_str("$C$"),
// '.' doesn't occur in types and functions, so reuse it
// for ':' and '-'
'-' | ':' => result.push('.'),
// These are legal symbols
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c),
_ => {
result.push('$');
for c in c.escape_unicode().skip(1) {
match c {
'{' => {}
'}' => result.push('$'),
c => result.push(c),
}
}
}
}
}
// Underscore-qualify anything that didn't start as an ident.
!result.is_empty() && result.as_bytes()[0] != '_' as u8
&& !(result.as_bytes()[0] as char).is_xid_start()
}

View File

@ -75,6 +75,7 @@ pub enum LinkerFlavor {
Ld,
Msvc,
Lld(LldFlavor),
PtxLinker,
}
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash,
@ -143,6 +144,7 @@ flavor_mappings! {
((LinkerFlavor::Gcc), "gcc"),
((LinkerFlavor::Ld), "ld"),
((LinkerFlavor::Msvc), "msvc"),
((LinkerFlavor::PtxLinker), "ptx-linker"),
((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"),
((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"),
((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"),
@ -455,6 +457,8 @@ supported_targets! {
("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx),
("x86_64-unknown-uefi", x86_64_unknown_uefi),
("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda),
}
/// Everything `rustc` knows about how to compile for a specific target.

View File

@ -0,0 +1,73 @@
use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions};
use spec::abi::Abi;
pub fn target() -> TargetResult {
Ok(Target {
arch: "nvptx64".to_string(),
data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(),
llvm_target: "nvptx64-nvidia-cuda".to_string(),
target_os: "cuda".to_string(),
target_vendor: "nvidia".to_string(),
target_env: String::new(),
linker_flavor: LinkerFlavor::PtxLinker,
target_endian: "little".to_string(),
target_pointer_width: "64".to_string(),
target_c_int_width: "32".to_string(),
options: TargetOptions {
// The linker can be installed from `crates.io`.
linker: Some("rust-ptx-linker".to_string()),
// With `ptx-linker` approach, it can be later overriden via link flags.
cpu: "sm_30".to_string(),
// FIXME: create tests for the atomics.
max_atomic_width: Some(64),
// Unwinding on CUDA is neither feasible nor useful.
panic_strategy: PanicStrategy::Abort,
// Needed to use `dylib` and `bin` crate types and the linker.
dynamic_linking: true,
executables: true,
// Avoid using dylib because it contain metadata not supported
// by LLVM NVPTX backend.
only_cdylib: true,
// Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
obj_is_bitcode: true,
// Convinient and predicable naming scheme.
dll_prefix: "".to_string(),
dll_suffix: ".ptx".to_string(),
exe_suffix: ".ptx".to_string(),
// Disable MergeFunctions LLVM optimisation pass because it can
// produce kernel functions that call other kernel functions.
// This behavior is not supported by PTX ISA.
merge_functions: MergeFunctions::Disabled,
// FIXME: enable compilation tests for the target and
// create the tests for this.
abi_blacklist: vec![
Abi::Cdecl,
Abi::Stdcall,
Abi::Fastcall,
Abi::Vectorcall,
Abi::Thiscall,
Abi::Aapcs,
Abi::Win64,
Abi::SysV64,
Abi::Msp430Interrupt,
Abi::X86Interrupt,
Abi::AmdGpuKernel,
],
.. Default::default()
},
})
}

View File

@ -0,0 +1,12 @@
-include ../../run-make-fulldeps/tools.mk
ifeq ($(TARGET),nvptx64-nvidia-cuda)
all:
$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C link-arg=--arch=sm_60 -o $(TMPDIR)/main.link_arg.ptx
$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C target-cpu=sm_60 -o $(TMPDIR)/main.target_cpu.ptx
FileCheck main.rs --input-file $(TMPDIR)/main.link_arg.ptx
FileCheck main.rs --input-file $(TMPDIR)/main.target_cpu.ptx
else
all:
endif

View File

@ -0,0 +1,28 @@
#![no_std]
#![no_main]
#![deny(warnings)]
#![feature(abi_ptx, core_intrinsics)]
// Check the overriden CUDA arch.
// CHECK: .target sm_60
// CHECK: .address_size 64
// Verify that no extra function declarations are present.
// CHECK-NOT: .func
// CHECK-LABEL: .visible .entry top_kernel(
#[no_mangle]
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
// CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5;
*b = *a + 5;
}
// Verify that no extra function definitions are there.
// CHECK-NOT: .func
// CHECK-NOT: .entry
#[panic_handler]
unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
core::intrinsics::breakpoint();
core::hint::unreachable_unchecked();
}

View File

@ -0,0 +1,10 @@
-include ../../run-make-fulldeps/tools.mk
ifeq ($(TARGET),nvptx64-nvidia-cuda)
all:
$(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET)
$(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET)
FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx
else
all:
endif

View File

@ -0,0 +1,14 @@
#![no_std]
#![deny(warnings)]
#[inline(never)]
#[no_mangle]
pub fn wrapping_external_fn(a: u32) -> u32 {
a.wrapping_mul(a)
}
#[inline(never)]
#[no_mangle]
pub fn panicking_external_fn(a: u32) -> u32 {
a * a
}

View File

@ -0,0 +1,59 @@
#![no_std]
#![deny(warnings)]
#![feature(abi_ptx, core_intrinsics)]
extern crate dep;
// Verify the default CUDA arch.
// CHECK: .target sm_30
// CHECK: .address_size 64
// Make sure declarations are there.
// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn
// CHECK: .func (.param .b32 func_retval0) panicking_external_fn
// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]]
// CHECK-LABEL: .visible .entry top_kernel(
#[no_mangle]
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
// CHECK: call.uni (retval0),
// CHECK-NEXT: wrapping_external_fn
// CHECK: ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0];
let lhs = dep::wrapping_external_fn(*a);
// CHECK: call.uni (retval0),
// CHECK-NEXT: panicking_external_fn
// CHECK: ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0];
let rhs = dep::panicking_external_fn(*a);
// CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]];
// CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]];
*b = lhs + rhs;
}
// Verify that external function bodies are available.
// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn
// CHECK: {
// CHECK: st.param.b32 [func_retval0+0], %{{r[0-9]+}};
// CHECK: }
// Also verify panic behavior.
// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn
// CHECK: {
// CHECK: %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]];
// CHECK: [[PANIC_LABEL]]:
// CHECK: call.uni
// CHECK: [[PANIC_HANDLER]]
// CHECK: }
// Verify whether out dummy panic formatter has a correct body.
// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]]()
// CHECK: {
// CHECK: trap;
// CHECK: }
#[panic_handler]
unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
core::intrinsics::breakpoint();
core::hint::unreachable_unchecked();
}

View File

@ -0,0 +1,9 @@
-include ../../run-make-fulldeps/tools.mk
ifeq ($(TARGET),nvptx64-nvidia-cuda)
all:
$(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET)
FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s
else
all:
endif

View File

@ -0,0 +1,41 @@
#![no_std]
#![deny(warnings)]
#![feature(abi_ptx)]
// Verify the default CUDA arch.
// CHECK: .target sm_30
// CHECK: .address_size 64
// Verify function name doesn't contain unacceaptable characters.
// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]]
// CHECK-LABEL: .visible .entry top_kernel(
#[no_mangle]
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
// CHECK: call.uni (retval0),
// CHECK-NEXT: [[IMPL_FN]]
*b = deep::private::MyStruct::new(*a).square();
}
pub mod deep {
pub mod private {
pub struct MyStruct<T>(T);
impl MyStruct<u32> {
pub fn new(a: u32) -> Self {
MyStruct(a)
}
#[inline(never)]
pub fn square(&self) -> u32 {
self.0.wrapping_mul(self.0)
}
}
}
}
// Verify that external function bodies are available.
// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]]
// CHECK: {
// CHECK: mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}}
// CHECK: }