2014-11-10 17:12:42 +01:00
|
|
|
|
/* Target code for NVPTX.
|
2017-01-01 13:07:43 +01:00
|
|
|
|
Copyright (C) 2014-2017 Free Software Foundation, Inc.
|
2014-11-10 17:12:42 +01:00
|
|
|
|
Contributed by Bernd Schmidt <bernds@codesourcery.com>
|
|
|
|
|
|
|
|
|
|
This file is part of GCC.
|
|
|
|
|
|
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
|
|
|
under the terms of the GNU General Public License as published
|
|
|
|
|
by the Free Software Foundation; either version 3, or (at your
|
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with GCC; see the file COPYING3. If not see
|
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
|
|
#include "config.h"
|
2014-11-18 13:13:26 +01:00
|
|
|
|
#include <sstream>
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#include "system.h"
|
|
|
|
|
#include "coretypes.h"
|
2015-07-08 02:53:03 +02:00
|
|
|
|
#include "backend.h"
|
2015-10-16 21:47:09 +02:00
|
|
|
|
#include "target.h"
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#include "rtl.h"
|
2015-10-16 21:47:09 +02:00
|
|
|
|
#include "tree.h"
|
|
|
|
|
#include "cfghooks.h"
|
2015-07-08 02:53:03 +02:00
|
|
|
|
#include "df.h"
|
Move MEMMODEL_* from coretypes.h to memmodel.h
2016-10-13 Thomas Preud'homme <thomas.preudhomme@arm.com>
gcc/
* coretypes.h: Move MEMMODEL_* macros and enum memmodel definition
into ...
* memmodel.h: This file.
* alias.c, asan.c, auto-inc-dec.c, bb-reorder.c, bt-load.c,
caller-save.c, calls.c, ccmp.c, cfgbuild.c, cfgcleanup.c,
cfgexpand.c, cfgloopanal.c, cfgrtl.c, cilk-common.c, combine.c,
combine-stack-adj.c, common/config/aarch64/aarch64-common.c,
common/config/arm/arm-common.c, common/config/bfin/bfin-common.c,
common/config/c6x/c6x-common.c, common/config/i386/i386-common.c,
common/config/ia64/ia64-common.c, common/config/nvptx/nvptx-common.c,
compare-elim.c, config/aarch64/aarch64-builtins.c,
config/aarch64/aarch64-c.c, config/aarch64/cortex-a57-fma-steering.c,
config/arc/arc.c, config/arc/arc-c.c, config/arm/arm-builtins.c,
config/arm/arm-c.c, config/avr/avr.c, config/avr/avr-c.c,
config/avr/avr-log.c, config/bfin/bfin.c, config/c6x/c6x.c,
config/cr16/cr16.c, config/cris/cris.c, config/darwin-c.c,
config/darwin.c, config/epiphany/epiphany.c,
config/epiphany/mode-switch-use.c,
config/epiphany/resolve-sw-modes.c, config/fr30/fr30.c,
config/frv/frv.c, config/ft32/ft32.c, config/h8300/h8300.c,
config/i386/i386-c.c, config/i386/winnt.c, config/iq2000/iq2000.c,
config/lm32/lm32.c, config/m32c/m32c.c, config/m32r/m32r.c,
config/m68k/m68k.c, config/mcore/mcore.c,
config/microblaze/microblaze.c, config/mmix/mmix.c,
config/mn10300/mn10300.c, config/moxie/moxie.c,
config/msp430/msp430.c, config/nds32/nds32-cost.c,
config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c,
config/nds32/nds32-memory-manipulation.c,
config/nds32/nds32-predicates.c, config/nds32/nds32.c,
config/nios2/nios2.c, config/nvptx/nvptx.c, config/pa/pa.c,
config/pdp11/pdp11.c, config/rl78/rl78.c, config/rs6000/rs6000-c.c,
config/rx/rx.c, config/s390/s390-c.c, config/s390/s390.c,
config/sh/sh.c, config/sh/sh-c.c, config/sh/sh-mem.cc,
config/sh/sh_treg_combine.cc, config/sol2.c, config/spu/spu.c,
config/stormy16/stormy16.c, config/tilegx/tilegx.c,
config/tilepro/tilepro.c, config/v850/v850.c, config/vax/vax.c,
config/visium/visium.c, config/vms/vms-c.c, config/xtensa/xtensa.c,
coverage.c, cppbuiltin.c, cprop.c, cse.c, cselib.c, dbxout.c, dce.c,
df-core.c, df-problems.c, df-scan.c, dojump.c, dse.c, dwarf2asm.c,
dwarf2cfi.c, dwarf2out.c, emit-rtl.c, except.c, explow.c, expmed.c,
expr.c, final.c, fold-const.c, function.c, fwprop.c, gcse.c,
ggc-page.c, haifa-sched.c, hsa-brig.c, hsa-gen.c, hw-doloop.c,
ifcvt.c, init-regs.c, internal-fn.c, ira-build.c, ira-color.c,
ira-conflicts.c, ira-costs.c, ira-emit.c, ira-lives.c, ira.c, jump.c,
loop-doloop.c, loop-invariant.c, loop-iv.c, loop-unroll.c,
lower-subreg.c, lra.c, lra-assigns.c, lra-coalesce.c,
lra-constraints.c, lra-eliminations.c, lra-lives.c, lra-remat.c,
lra-spills.c, mode-switching.c, modulo-sched.c, omp-low.c, passes.c,
postreload-gcse.c, postreload.c, predict.c, print-rtl-function.c,
recog.c, ree.c, reg-stack.c, regcprop.c, reginfo.c, regrename.c,
reload.c, reload1.c, reorg.c, resource.c, rtl-chkp.c, rtl-tests.c,
rtlanal.c, rtlhooks.c, sched-deps.c, sched-rgn.c, sdbout.c,
sel-sched-ir.c, sel-sched.c, shrink-wrap.c, simplify-rtx.c,
stack-ptr-mod.c, stmt.c, stor-layout.c, target-globals.c,
targhooks.c, toplev.c, tree-nested.c, tree-outof-ssa.c,
tree-profile.c, tree-ssa-coalesce.c, tree-ssa-ifcombine.c,
tree-ssa-loop-ivopts.c, tree-ssa-loop.c, tree-ssa-reassoc.c,
tree-ssa-sccvn.c, tree-vect-data-refs.c, ubsan.c, valtrack.c,
var-tracking.c, varasm.c: Include memmodel.h.
* genattrtab.c (write_header): Include memmodel.h in generated file.
* genautomata.c (main): Likewise.
* gengtype.c (open_base_files): Likewise.
* genopinit.c (main): Likewise.
* genconditions.c (write_header): Include memmodel.h earlier in
generated file.
* genemit.c (main): Likewise.
* genoutput.c (output_prologue): Likewise.
* genpeep.c (main): Likewise.
* genpreds.c (write_insn_preds_c): Likewise.
* genrecog.c (write_header): Likewise.
* Makefile.in (PLUGIN_HEADERS): Include memmodel.h
gcc/ada/
* gcc-interface/utils2.c: Include memmodel.h.
gcc/c-family/
* c-cppbuiltin.c: Include memmodel.h.
* c-opts.c: Likewise.
* c-pragma.c: Likewise.
* c-warn.c: Likewise.
gcc/c/
* c-typeck.c: Include memmodel.h.
gcc/cp/
* decl2.c: Include memmodel.h.
* rtti.c: Likewise.
gcc/fortran/
* trans-intrinsic.c: Include memmodel.h.
gcc/go/
* go-backend.c: Include memmodel.h.
libgcc/
* libgcov-profiler.c: Replace MEMMODEL_* macros by their __ATOMIC_*
equivalent.
* config/tilepro/atomic.c: Likewise and stop casting model to
enum memmodel.
From-SVN: r241121
2016-10-13 16:17:52 +02:00
|
|
|
|
#include "memmodel.h"
|
2015-10-16 21:47:09 +02:00
|
|
|
|
#include "tm_p.h"
|
|
|
|
|
#include "expmed.h"
|
|
|
|
|
#include "optabs.h"
|
|
|
|
|
#include "regs.h"
|
|
|
|
|
#include "emit-rtl.h"
|
|
|
|
|
#include "recog.h"
|
|
|
|
|
#include "diagnostic.h"
|
genattrtab.c (write_header): Include hash-set.h...
2015-01-09 Michael Collison <michael.collison@linaro.org>
* genattrtab.c (write_header): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-attrtab.c.
* genautomata.c (main) : Include hash-set.h, macInclude hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-automata.c.
* genemit.c (main): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-emit.c.
* gengtype.c (open_base_files): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
gtype-desc.c.
* genopinit.c (main): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-opinit.c.
* genoutput.c (output_prologue): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-output.c.
* genpeep.c (main): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-peep.c.
* genpreds.c (write_insn_preds_c): Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
insn-preds.c.
* optc-save-gen-awk: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h when generating
options-save.c.
* opth-gen.awk: Change include guard from GCC_C_COMMON_H to GCC_C_COMMON_C
when generating options.h.
* ada/gcc-interface/cuintp.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h,
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* ada/gcc-interface/decl.c: ditto.
* ada/gcc-interface/misc.c: ditto.
* ada/gcc-interface/targtyps.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h,
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* ada/gcc-interface/trans.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, real.h,
fold-const.h, wide-int.h, inchash.h due to
flattening of tree.h.
* ada/gcc-interface/utils.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h,
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* ada/gcc-interface/utils2.c: ditto.
* alias.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* asan.c: ditto.
* attribs.c: ditto.
* auto-inc-dec.c: ditto.
* auto-profile.c: ditto
* bb-reorder.c: ditto.
* bt-load.c: Include symtab.h due to flattening of tree.h.
* builtins.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* c/c-array-notation.c: ditto.
* c/c-aux-info.c: ditto.
* c/c-convert.c: ditto.
* c/c-decl.c: ditto.
* c/c-errors.c: ditto.
* c/c-lang.c: dittoxs.
* c/c-objc-common.c: ditto.
* c/c-parser.c: ditto.
* c/c-typeck.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, inchash.h, real.h and
fixed-value.h due to flattening of tree.h.
* calls.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* ccmp.c: ditto.
* c-family/array-notation-common.c: ditto.
* c-family/c-ada-spec.c: ditto.
* c-family/c-cilkplus.c: ditto.
* c-family/c-common.c: Include input.h due to flattening of tree.h.
Define macro GCC_C_COMMON_C.
* c-family/c-common.h: Flatten tree.h header files into c-common.h.
Remove include of tree-core.h.
* c-family/c-cppbuiltin.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* c-family/c-dump.c: ditto.
* c-family/c-format.c: Flatten tree.h header files into c-common.h.
* c-family/c-cppbuiltin.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* c-family/c-dump.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* c-family/c-format.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, inchash.h and real.h due to
flattening of tree.h.
* c-family/c-gimplify.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* c-family/cilk.c: ditto.
* c-family/c-lex.c: ditto.
* c-family/c-omp.c: ditto.
* c-family/c-opts.c: ditto.
* c-family/c-pch.c: ditto.
* c-family/c-ppoutput.c: ditto.
* c-family/c-pragma.c: ditto.
* c-family/c-pretty-print.c: ditto.
* c-family/c-semantics.c: ditto.
* c-family/c-ubsan.c: ditto.
* c-family/stub-objc.c: ditto.
* cfgbuild.c: ditto.
* cfg.c: ditto.
* cfgcleanup.c: ditto.
* cfgexpand.c: ditto.
* cfghooks.c: ditto.
* cfgloop.c: Include symtab.h, fold-const.h, and
inchash.h due to flattening of tree.h.
* cfgloopmanip.c: ditto.
* cfgrtl.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* cgraphbuild.c: ditto.
* cgraph.c: ditto.
* cgraphclones.c: ditto.
* cgraphunit.c: ditto.
* cilk-common.c: ditto.
* combine.c: ditto.
* combine-stack-adj.c: Include symbol.h due to flattening of tree.h.
* config/aarch64/aarch64-builtins.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/aarch64/aarch64.c: ditto.
* config/alpha/alpha.c: ditto.
* config/arc/arc.c: ditto.
* config/arm/aarch-common.c: ditto.
* config/arm/arm-builtins.c: ditto.
* config/arm/arm.c: ditto.
* config/arm/arm-c.c: ditto.
* config/avr/avr.c: ditto.
* config/avr/avr-c.c: ditto.
* config/avr/avr-log.c: ditto.
* config/bfin/bfin.c: ditto.
* config/c6x/c6x.c: ditto.
* config/cr16/cr16.c: ditto.
* config/cris/cris.c: ditto.
* config/darwin.c: ditto.
* config/darwin-c.c: ditto.
* config/default-c.c: ditto.
* config/epiphany/epiphany.c: ditto.
* config/fr30/fr30.c: ditto.
* config/frv/frv.c: ditto.
* config/glibc-c.c: ditto.
* config/h8300/h8300.c: ditto.
* config/i386/i386.c: ditto.
* config/i386/i386-c.c: ditto.
* config/i386/msformat.c: ditto.
* config/i386/winnt.c: ditto.
* config/i386/winnt-cxx.c: ditto.
* config/i386/winnt-stubs.c: ditto.
* config/ia64/ia64.c: ditto.
* config/ia64/ia64-c.c: ditto.
* config/iq2000/iq2000.c: ditto.
* config/lm32/lm32.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* config/m32c/m32c.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/m32c/m32c-pragma.c: ditto.
* config/m32c/m32cr.c: ditto.
* config/m68/m68k.c: ditto.
* config/mcore/mcore.c: ditto.
* config/mep/mep.c: ditto.
* config/mep/mep-pragma.c: ditto.
* config/microblaze/microblaze.c: ditto.
* config/microblaze/microblaze-c.c: ditto.
* config/mips/mips.c: ditto.
* config/mmix/mmix.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* config/mn10300/mn10300.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/moxie/moxie.c: ditto.
* config/msp430/msp430.c: ditto.
* config/msp430/msp430-c.c: ditto.
* config/nds32/nds32.c: ditto.
* config/nds32/nds32-cost.c: ditto.
* config/nds32/nds32-fp-as-gp.c: ditto.
* config/nds32/nds32-intrinsic.c: ditto.
* config/nds32/nds32-isr.c: ditto.
* config/nds32/nds32-md-auxillary.c: ditto.
* config/nds32/nds32-memory-manipulationx.c: ditto.
* config/nds32/nds32-pipelines-auxillary.c: ditto.
* config/nds32/nds32-predicates.c: ditto.
* config/nios2/nios2.c: ditto.
* config/nvptx/nvptx.c: ditto.
* config/pa/pa.c: ditto.
* config/pdp11/pdp11x.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* config/rl78/rl78.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/rl78/rl78-cx.c: ditto.
* config/rs6000/rs6000.c: ditto.
* config/rs6000/rs6000-c.c: ditto.
* config/rx/rx.c: ditto.
* config/s390/s390.c: ditto.
* config/sh/sh.c: ditto.
* config/sh/sc.c: ditto.
* config/sh/sh-mem.cc: ditto.
* config/sh/sh_treg_combine.cc: Include symtab.h, inchash.h and tree.h
due to flattening of tree.h.
Remove include of tree-core.h.
* config/sol2.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/sol2-c.c: ditto.
* config/sol2-cxx.c: ditto.
* config/sol2-stubs.c: ditto.
* config/sparc/sparc.c: ditto.
* config/sparc/sparc-cx.c: ditto.
* config/spu/spu.c: ditto.
* config/spu/spu-c.c: ditto
* config/storym16/stormy16.c: ditto.
* config/tilegx/tilegx.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* config/tilepro/gen-mul-tables.cc: Include symtab.h in generated file.
* config/tilegx/tilegx-c.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/tilepro/tilepro.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* config/tilepro/tilepro-c.c: Include hash-set.h, machmode.h,
vec.h, double-int.h, input.h, alias.h, symtab.h, options.h
fold-const.h, wide-int.h, and inchash.h due to
flattening of tree.h.
* config/v850/v850.c: ditto.
* config/v850/v850-c.c: ditto.
* config/vax/vax.c: ditto.
* config/vms/vms.c: ditto.
* config/vms/vms-c.c: ditto.
* config/vxworks.c: ditto.
* config/winnt-c.c: ditto.
* config/xtensa/xtensa.c: Include symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* convert.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* coverage.c: ditto.
* cp/call.c: ditto.
* cp/class.c: ditto.
* cp/constexpr.c: ditto.
* cp/cp-array-notation.c: ditto.
* cp/cp-gimplify.c: ditto.
* cp/cp-lang.c: ditto.
* cp/cp-objcp-common.c: ditto.
* cp/cvt.c: ditto.
* cp/decl2.c: ditto.
* cp/decl.c: ditto.
* cp/dump.c: ditto.
* cp/error.c: ditto.
* cp/except.c: ditto.
* cp/expr.c: ditto.
* cp/friend.c: ditto.
* cp/init.c: ditto.
* cp/lambda.c: ditto.
* cp/lex.c: ditto.
* cp/mangle.c: ditto.
* cp/name-lookup.c: ditto.
* cp/optimize.c: ditto.
* cp/parser.c: ditto.
* cp/pt.c: ditto.
* cp/ptree.c: ditto.
* cp/repo.c: ditto.
* cp/rtti.c: ditto.
* cp/search.c: ditto.
* cp/semantics.c: ditto.
* cp/tree.c: ditto.
* cp/typeck2.c: ditto.
* cp/typeck.c: ditto.
* cppbuiltin.c: ditto.
* cprop.c: ditto.
* cse.c: Add include of symtab.h due to flattening of tree.h.
* cselib.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* data-streamer.c: ditto.
* data-streamer-in.c: ditto.
* data-streamer-out.c: ditto.
* dbxout.c: ditto.
* dce.c: ditto.
* ddg.c: Add include of symtab.h due to flattening of tree.h.
* debug.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* dfp.c: ditto.
* df-scan.c: ditto.
* dojump.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, inchash.h and real.h due to flattening of tree.h.
* double-int.c: ditto.
* dse.c: ditto.
* dumpfile.c: ditto.
* dwarf2asm.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, inchash.h and real.h due to flattening of tree.h.
* dwarf2cfi.c: ditto.
* dwarf2out.c: ditto.
* emit-rtl.c: ditto.
* except.c: ditto.
* explow.c: ditto.
* expmed.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* expr.c: ditto.
* final.c: ditto.
* fixed-value.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, inchash.h and fixed-value.h due to flattening of tree.h.
* fold-const.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
Relocate inline function convert_to_ptrofftype_loc from tree.h.
Relocate inline function fold_build_pointer_plus_loc from tree.h.
Relocate inline function fold_build_pointer_plus_hwi_loc from tree.h.
* fold-const.h: Relocate macro convert_to_ptrofftype from tree.h.
Relocate macro fold_build_pointer_plus to relocate from tree.h.h.
Relocate macro fold_build_pointer_plus_hwi from tree.h.
Add prototype for convert_to_ptrofftype_loc relocated from tree.h.
Add prototype for fold_build_pointer_plus_loc relocated from tree.h.
Add prototype for fold_build_pointer_plus_hwi_loc relocated from tree.h.
* fortran/convert.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/cpp.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/decl.c: ditto.
* fortran/f95.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/iresolve.c: ditto.
* fortran/match.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/module.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/options.c: ditto.
* fortran/target-memory.c: Include hash-set.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans-array.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans-common.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans-const.c: ditto.
* fortran/trans-decl.c: ditto.
* fortran/trans-expr.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans-intrinsic.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, inchash.h and real.h due to flattening of tree.h.
* fortran/trans-io.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* fortran/trans-openmp.c: ditto.
* fortran/trans-stmt.c: ditto.
* fortran/trans-types.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, inchash.h and real.h due to flattening of tree.h.
* function.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* gcc-plugin.h: Include statistics.h, double-int.h, real.h, fixed-value.h,
alias.h, flags.h, and symtab.h due to flattening of tree.h
* gcse.c: ditto.
* generic-match-head.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ggc-page.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* gimple-builder.c: ditto.
* gimple.c: ditto.
* gimple-expr.c: ditto.
* gimple-fold.c: ditto.
* gimple-iterator.c: ditto.
* gimple-low.c: ditto.
* gimple-match-head.c: ditto.
* gimple-pretty-print.c: ditto.
* generic-ssa-isolate-paths.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* gimple-ssa-strength-reduction.c: ditto.
* gimple-streamer-in.c: ditto.
* gimple-streamer-out.c: ditto.
* gimple-walk.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* gimplify.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* gimplify-me.c: ditto.
* go/go-gcc.cc: ditto.
* go/go-lang.c: ditto.
* go/gdump.c: ditto.
* graphite-blocking.c: ditto.
* graphite.c: ditto.
* graphite-dependencies.c: ditto.
* graphite-interchange.c: ditto.
* graphite-isl-ast-to-gimple.c: ditto.
* graphite-optimize-isl.c: ditto.
* graphite-poly.c: ditto.
* graphite-scop-detection.c: ditto.
* graphite-sese-to-poly.c: ditto.
* hw-doloop.c: Include symtab.h due to flattening of tree.h.
* ifcvt.c: ditto.
* init-regs.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* internal-fc.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h,options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa.c: ditto.
* ipa-chkp.c: ditto.
* ipa-comdats.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-cp.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h,options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-devirt.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-icf.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h,options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-icf-gimple.c: ditto.
* ipa-inline-analysis.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-inline.c: ditto.
* ipa-inline-transform.c: ditto.
* ipa-polymorhpic-call.c: ditto.
* ipa-profile.c: ditto.
* ipa-prop.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-pure-const.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-ref.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ipa-reference.c: ditto.
* ipa-split.c: ditto.
* ipa-utils.c: ditto.
* ipa-visbility.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* ira.c: ditto.
* ira-color.c: Include hash-set.h due to flattening of tree.h.
* ira-costs.c: ditto.
* ira-emit.c: ditto.
* java/boehm.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/builtins.c: ditto.
* java/class.c: ditto.
* java/constants.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/decl.c: ditto.
* java/except.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/expr.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h,inchash.h and real.h due to flattening of tree.h.
* java/gimplify.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/jcf-dump.c: ditto.
* java/jcf-io.c: ditto.
* java/jcf-parse.c: ditto.
* java/jvgenmain.c: ditto.
* java/lang.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/mangle.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* java/mangle_name.c: ditto.
* java/resource.c: ditto.
* java/typeck.c: ditto.
* java/verify-glue.c: ditto.
* java/verify-impl.c: ditto.
* jump.c: Include symtab.h due to flattening of tree.h.
* langhooks.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* loop-doloop.c: Include symtab.h due to flattening of tree.h.
* loop-init.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* loop-invariant.c: Include symtab.h due to flattening of tree.h.
* loop-iv.c: ditto.
* loop-unroll.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lower-subreg.c: ditto.
* lra-assigns.c: Include symtab.h due to flattening of tree.h.
* lra.c: Include symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* lra-coalesce.c: Include symtab.h due to flattening of tree.h.
* lra-constraints.c: ditto.
* lra-eliminations.c: ditto.
* lra-livesc: ditto.
* lra-remat.c: ditto.
* lra-spills.c: ditto.
* lto/lto.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto/lto-lang.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto/lto-object.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto/lto-partition.c: ditto.
* lto/lto-symtab.c: ditto.
* lto-cgraph.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto-compress.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto-opts.c: ditto.
* lto-section-in.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* lto-section-out.c: ditto.
* lto-streamer.c: ditto.
* lto-streamer-in.c: ditto.
* lto-streamer-out.c: ditto.
* modulo-sched.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* objc/objc-act.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options, fold-const.h,
wide-int.h, and inchash.h due to flattening of tree.h.
* objc/objc-encoding.c: ditto.
* objc/objc-gnu-runtime-abi-01.c: ditto.
* objc/objc-lang.c: ditto.
* objc/objc-map.c: ditto.
* objc/objc-next-runtime-abi-01.c: ditto.
* objc/objc-next-runtime-abi-02.c: ditto.
* objc/objc-runtime-shared-support.c: ditto.
* objcp/objcp-decl.c: ditto.
* objcp/objcp-lang.c: ditto.
* omega.c: ditto.
* omega-low.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* optabs.c: ditto.
* opts-global.c: ditto.
* passes.c: ditto.
* plugin.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* postreload.c: Include symtab.h due to flattening of tree.h.
* postreload-gcse.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* predict.c: ditto.
* print-rtl.c: ditto.
* print-tree.c: ditto.
* profile.c: Include symtab.h, fold-const.h
and inchash.h due to flattening of tree.h.
* real.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* realmpfr.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* recog.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* ree.c: ditto.
* reginfo.c: ditto.
* reg-stack.c: ditto.
* reload1.c: Include symtab.h, fold-const.h, wide-int.h
and inchash.h due to flattening of tree.h.
* reload.c: Include symtab.h due to flattening of tree.h.
* reorg.c: ditto.
* rtlanal.c: Include symtab.h, fold-const.h, wide-int.h
and inchash.h due to flattening of tree.h.
* rtl-chkp.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* rtlhooks.c: Include symtab.h due to flattening of tree.h.
* sanopt.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* sched-deps.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* sched-vis.c: ditto.
* sdbout.c: ditto.
* sel-sched.c: Include symtab.h, fold-const.h, wide-int.h
and inchash.h due to flattening of tree.h.
* sel-sched-ir.c: ditto.
* sese.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* shrink-wrap.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* simplify-rtx.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* stack-ptr-mod.c: ditto.
* stmt.c: ditto.
* store-motion.c: ditto.
* store-layout.c: ditto.
* stringpool.c: ditto.
* symtab.c: ditto.
* target-globals.c: ditto.
* targhooks.c: ditto.
* toplev.c: ditto.
* tracer.c: ditto.
* trans-mem.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-affine.c: ditto.
* tree-browser.c: ditto.
* tree.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-call-cdce.c: Include symtab.h, alias.h, double-int.h,
fold-const.h, wide-int.h, inchash.h and real.h due to
flattening of tree.h.
* tree-cfg.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-cfgcleanup.c: ditto.
* tree-chkp.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-chkp-opt.c: ditto.
* tree-chrec.c: ditto.
* tree-chkp-opt.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h and
real.h due to flattening of tree.h.
* tree-core.h: Flatten header file by removing all #include statements.
* tree-data-ref.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-dfa.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h and
real.h due to flattening of tree.h.
* tree-diagnostic.c: ditto.
* tree-dump.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h, real.h and
fixed-value.h due to flattening of tree.h.
* tree-dfa.c: ditto.
* tree-eh.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h and
real.h due to flattening of tree.h.
* tree-emutls.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree.h: Flatten header files by removing all includes except tree-core.h.
Remove inline function convert_to_ptrofftype_loc to relocate to fold-const.c.
Remove macro convert_to_ptrofftype to relocate to fold-const.h.
Remove inline function fold_build_pointer_plus_loc to relocate to fold-const.c.
Remove macro fold_build_pointer_plus to relocate to fold-const.h.
Remove inline function fold_build_pointer_plus_hwi_loc to relocate to fold-const.c.
Remove macro fold_build_pointer_plus_hwi to relocate to fold-const.h.
* tree-if-conv.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h, real.h and
fixed-value.h due to flattening of tree.h.
* tree-inline.c: ditto.
* tree-into-ssa.c: ditto.
* tree-iterator.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-loop-distribution.c: ditto.
* tree-nested.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-nrv.c: ditto.
* tree-object-size.c: ditto.
* tree-outof-ssa.c: ditto.
* tree-parloops.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-phinodes.c: ditto.
* tree-predcom.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-pretty-print.c: ditto.
* tree-profile.c: double-int.h, input.h, alias.h, symtab.h,
fold-const.h, wide-int.h and inchash.h due to flattening of tree.h.
* tree-scalar-evolution.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h and inchash.h due to flattening of tree.h.
* tree-sra.c: Include vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, and
inchash.h due to flattening of tree.h.
* tree-ssa-alias.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-ssa.c: ditto.
* tree-ssa-ccp.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h
and real.h due to flattening of tree.h.
* tree-ssa-coalesce.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-ssa-copy.c: ditto.
* tree-ssa-copyrename.c: ditto.
* tree-ssa-dce.c: ditto.
* tree-ssa-dom.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h, inchash.h
and real.h due to flattening of tree.h.
* tree-ssa-dse.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-ssa-forwprop.c: ditto.
* tree-ssa-ifcombine.c: ditto.
* tree-ssa-live.c: ditto.
* tree-ssa-loop.c: ditto.
* tree-ssa-loop-ch.c: ditto.
* tree-ssa-loop-im.c: ditto.
* tree-ssa-loop-ivcanon.c: ditto.
* tree-ssa-loop-ivopts.c: ditto.
* tree-ssa-loop-manip.c: ditto.
* tree-ssa-loop-niter.c: ditto.
* tree-ssa-loop-prefetch.c: ditto.
* tree-ssa-loop-unswitch.c: ditto.
* tree-ssa-loop-math-opts.c: ditto.
* tree-ssanames.c: ditto.
* tree-ssa-operands.c: ditto.
* tree-ssa-phiopt.c: ditto.
* tree-ssa-phiprop.c: ditto.
* tree-ssa-pre.c: ditto.
* tree-ssa-propagate.c: ditto.
* tree-ssa-reassoc.c: ditto.
* tree-ssa-sccvn.c: ditto.
* tree-ssa-sink.c: ditto.
* tree-ssa-strlen.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h and inchash.h due to flattening of tree.h.
* tree-ssa-structalias.c: double-int.h, input.h, alias.h, symtab.h,
fold-const.h, wide-int.h and inchash.h due to flattening of tree.h.
* tree-ssa-tail-merge.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, fold-const.h,
wide-int.h and inchash.h due to flattening of tree.h.
* tree-ssa-ter.c: ditto.
* tree-ssa-threadedge.c: ditto.
* tree-ssa-threadupdate.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h and inchash.h due to flattening of tree.h.
* tree-ssa-uncprop.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, fold-const.h,
wide-int.h and inchash.h due to flattening of tree.h.
* tree-ssa-uninit.c: ditto.
* tree-stdarg.c: Include vec.h, double-int.h, input.h, alias.h,
symtab.h, fold-const.h, wide-int.h and inchash.h due to flattening
of tree.h.
* tree-streamer.c: Include vec.h, double-int.h, input.h, alias.h,
symtab.h, options.h, fold-const.h, wide-int.h and
inchash.h due to flattening of tree.h.
* tree-streamer-in.c: Include hash-set.h, machmode.h, vec.h,
double-int.h, input.h, alias.h, symtab.h, options.h, fold-const.h,
wide-int.h, inchash.h, real.h and fixed-value.h due to flattening
of tree.h.
* tree-streamer-out.c: dittoo.
* tree-switch-conversion.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-tailcall.c: ditto.
* tree-vect-data-refs.c: ditto.
* tree-vect-generic.c: Include hash-set.h, machmode.h, vec.h, double-int.h, input.h,
alias.h, symtab.h, options.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-vect-loop.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* tree-vect-loop-manip.c: ditto.
* tree-vectorizer.c: ditto.
* tree-vect-patterns.c: ditto.
* tree-vect-slp.c: ditto.
* tree-vect-stmts.c: ditto.
* tree-vrp.c: ditto.
* tsan.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* ubsan.c: ditto.
* value-prof.c.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* varasm.c: ditto.
* varpool.c: ditto.
* var-tracking.c: ditto.
* vmsdbgout.c: ditto.
* vtable-verify.c: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* wide-int.cc: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
* xcoffout.c: ditto.
* libcc1/plugin.cc: Include hash-set.h, machmode.h, vec.h, double-int.h,
input.h, alias.h, symtab.h, options.h, fold-const.h, wide-int.h and inchash.h
due to flattening of tree.h.
From-SVN: r219402
2015-01-09 21:18:42 +01:00
|
|
|
|
#include "alias.h"
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#include "insn-flags.h"
|
|
|
|
|
#include "output.h"
|
|
|
|
|
#include "insn-attr.h"
|
dojump.h: New header file.
2015-10-15 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
* dojump.h: New header file.
* explow.h: Likewise.
* expr.h: Remove includes.
Move expmed.c prototypes to expmed.h.
Move dojump.c prototypes to dojump.h.
Move alias.c prototypes to alias.h.
Move explow.c prototypes to explow.h.
Move calls.c prototypes to calls.h.
Move emit-rtl.c prototypes to emit-rtl.h.
Move varasm.c prototypes to varasm.h.
Move stmt.c prototypes to stmt.h.
(saved_pending_stack_adjust): Move to dojump.h.
(adjust_address): Move to explow.h.
(adjust_address_nv): Move to emit-rtl.h.
(adjust_bitfield_address): Likewise.
(adjust_bitfield_address_size): Likewise.
(adjust_bitfield_address_nv): Likewise.
(adjust_automodify_address_nv): Likewise.
* explow.c (expr_size): Move to expr.c.
(int_expr_size): Likewise.
(tree_expr_size): Likewise.
Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h statistics.h stmt.h varasm.h.
* genemit.c (main): Generate includes statistics.h, real.h, fixed-value.h,
insn-config.h, expmed.h, dojump.h, explow.h, emit-rtl.h, stmt.h.
* genopinit.c (main): Generate includes hashtab.h, hard-reg-set.h, function.h,
statistics.h, real.h, fixed-value.h, expmed.h, dojump.h, explow.h, emit-rtl.h,
stmt.h.
* genoutput.c (main): Generate includes hashtab.h, statistics.h, real.h,
fixed-value.h, expmed.h, dojump.h, explow.h, emit-rtl.h, stmt.h.
* genemit.c (open_base_files): Generate includes flags.h, statistics.h, real.h,
fixed-value.h, tree.h, expmed.h, dojump.h, explow.h, calls.h, emit-rtl.h, varasm.h,
stmt.h.
* config/tilepro/gen-mul-tables.cc: Generate includes hashtab.h, hash-set.h, vec.h,
machmode.h, tm.h, hard-reg-set.h, input.h, function.h, rtl.h, flags.h, statistics.h,
double-int.h, real.h, fixed-value.h, alias.h, wide-int.h, inchash.h, tree.h,
insn-config.h, expmed.h, dojump.h, explow.h, calls.h, emit-rtl.h, varasm.h, stmt.h.
* config/tilegx/mul-tables.c: Include alias.h calls.h dojump.h
double-int.h emit-rtl.h explow.h expmed.h fixed-value.h flags.h
function.h hard-reg-set.h hash-set.h hashtab.h inchash.h input.h
insn-config.h machmode.h real.h rtl.h statistics.h stmt.h symtab.h
tm.h tree.h varasm.h vec.h wide-int.h.
* rtlhooks.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h flags.h function.h hard-reg-set.h
hash-set.h hashtab.h inchash.h input.h insn-config.h machmode.h
real.h statistics.h stmt.h tree.h varasm.h vec.h wide-int.h.
* cfgloopanal.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h flags.h inchash.h insn-config.h
real.h statistics.h stmt.h tree.h varasm.h wide-int.h.
* loop-iv.c: Likewise.
* lra-assigns.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h flags.h inchash.h real.h
statistics.h stmt.h tree.h varasm.h wide-int.h.
* lra-constraints.c: Likewise.
* lra-eliminations.c: Likewise.
* lra-lives.c: Likewise.
* lra-remat.c: Likewise.
* bt-load.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h inchash.h insn-config.h real.h
statistics.h stmt.h tree.h varasm.h wide-int.h.
* hw-doloop.c: Likewise.
* ira-color.c: Likewise.
* ira-emit.c: Likewise.
* loop-doloop.c: Likewise.
* loop-invariant.c: Likewise.
* reload.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h inchash.h real.h rtl.h
statistics.h stmt.h tree.h varasm.h wide-int.h.
* caller-save.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h inchash.h real.h statistics.h
stmt.h tree.h varasm.h wide-int.h.
* combine-stack-adj.c: Likewise.
* cse.c: Likewise.
* ddg.c: Likewise.
* ifcvt.c: Likewise.
* ira-costs.c: Likewise.
* jump.c: Likewise.
* lra-coalesce.c: Likewise.
* lra-spills.c: Likewise.
* profile.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h insn-config.h real.h statistics.h
stmt.h varasm.h wide-int.h.
* lra.c: Include alias.h calls.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h real.h statistics.h stmt.h
varasm.h.
* config/sh/sh_treg_combine.cc: Include alias.h calls.h dojump.h
double-int.h explow.h expmed.h fixed-value.h flags.h real.h
statistics.h stmt.h varasm.h wide-int.h.
* reorg.c: Include alias.h calls.h dojump.h double-int.h explow.h
expmed.h fixed-value.h inchash.h real.h statistics.h stmt.h tree.h
varasm.h wide-int.h.
* reload1.c: Include alias.h calls.h dojump.h double-int.h explow.h
expmed.h fixed-value.h real.h rtl.h statistics.h stmt.h varasm.h.
* config/tilegx/tilegx.c: Include alias.h dojump.h double-int.h
emit-rtl.h explow.h expmed.h fixed-value.h flags.h real.h
statistics.h stmt.h.
* config/tilepro/tilepro.c: Likewise.
* config/mmix/mmix.c: Include alias.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h real.h statistics.h stmt.h.
* config/pdp11/pdp11.c: Likewise.
* config/xtensa/xtensa.c: Likewise.
* config/lm32/lm32.c: Include alias.h dojump.h double-int.h emit-rtl.h
explow.h expmed.h fixed-value.h real.h statistics.h stmt.h
varasm.h.
* tree-chkp.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h function.h hard-reg-set.h hashtab.h
insn-config.h real.h rtl.h statistics.h stmt.h tm.h.
* cilk-common.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h function.h hard-reg-set.h hashtab.h
insn-config.h real.h rtl.h statistics.h stmt.h tm.h varasm.h.
* rtl-chkp.c: Likewise.
* tree-chkp-opt.c: Likewise.
* config/arm/arm-builtins.c: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h flags.h function.h hard-reg-set.h hashtab.h
insn-config.h real.h statistics.h stmt.h varasm.h.
* ipa-icf.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h hashtab.h insn-config.h real.h rtl.h
statistics.h stmt.h.
* tree-vect-data-refs.c: Likewise.
* graphite-sese-to-poly.c: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h flags.h hashtab.h insn-config.h real.h
rtl.h statistics.h stmt.h varasm.h.
* internal-fn.c: Likewise.
* ipa-icf-gimple.c: Likewise.
* lto-section-out.c: Likewise.
* tree-data-ref.c: Likewise.
* tree-nested.c: Likewise.
* tree-outof-ssa.c: Likewise.
* tree-predcom.c: Likewise.
* tree-pretty-print.c: Likewise.
* tree-scalar-evolution.c: Likewise.
* tree-ssa-strlen.c: Likewise.
* tree-vect-loop.c: Likewise.
* tree-vect-patterns.c: Likewise.
* tree-vect-slp.c: Likewise.
* tree-vect-stmts.c: Likewise.
* tsan.c: Likewise.
* targhooks.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h hashtab.h insn-config.h real.h statistics.h
stmt.h.
* config/sh/sh-mem.cc: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h flags.h hashtab.h insn-config.h real.h
statistics.h stmt.h varasm.h.
* loop-unroll.c: Likewise.
* ubsan.c: Likewise.
* tree-ssa-loop-prefetch.c: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h flags.h hashtab.h real.h rtl.h statistics.h
stmt.h varasm.h.
* dse.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h function.h hashtab.h statistics.h stmt.h varasm.h.
* tree-switch-conversion.c: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h hashtab.h insn-config.h real.h rtl.h
statistics.h stmt.h.
* generic-match-head.c: Include calls.h dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h hashtab.h insn-config.h real.h rtl.h
statistics.h stmt.h varasm.h.
* gimple-match-head.c: Likewise.
* lto-cgraph.c: Likewise.
* lto-section-in.c: Likewise.
* lto-streamer-in.c: Likewise.
* lto-streamer-out.c: Likewise.
* tree-affine.c: Likewise.
* tree-cfg.c: Likewise.
* tree-cfgcleanup.c: Likewise.
* tree-if-conv.c: Likewise.
* tree-into-ssa.c: Likewise.
* tree-ssa-alias.c: Likewise.
* tree-ssa-copyrename.c: Likewise.
* tree-ssa-dse.c: Likewise.
* tree-ssa-forwprop.c: Likewise.
* tree-ssa-live.c: Likewise.
* tree-ssa-math-opts.c: Likewise.
* tree-ssa-pre.c: Likewise.
* tree-ssa-sccvn.c: Likewise.
* tree-tailcall.c: Likewise.
* tree-vect-generic.c: Likewise.
* tree-sra.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h insn-config.h real.h rtl.h stmt.h varasm.h.
* stor-layout.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h insn-config.h real.h statistics.h stmt.h.
* varasm.c: Likewise.
* coverage.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h insn-config.h real.h statistics.h stmt.h
varasm.h.
* init-regs.c: Likewise.
* ira.c: Likewise.
* omp-low.c: Likewise.
* stack-ptr-mod.c: Likewise.
* tree-ssa-reassoc.c: Likewise.
* tree-complex.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h insn-config.h rtl.h statistics.h stmt.h
varasm.h.
* dwarf2cfi.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h insn-config.h statistics.h stmt.h varasm.h.
* shrink-wrap.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h rtl.h statistics.h stmt.h.
* recog.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h rtl.h statistics.h stmt.h varasm.h.
* tree-ssa-phiopt.c: Likewise.
* config/darwin.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h statistics.h stmt.h.
* config/fr30/fr30.c: Likewise.
* config/frv/frv.c: Likewise.
* expr.c: Likewise.
* final.c: Likewise.
* optabs.c: Likewise.
* passes.c: Likewise.
* simplify-rtx.c: Likewise.
* stmt.c: Likewise.
* toplev.c: Likewise.
* var-tracking.c: Likewise.
* gcse.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h statistics.h stmt.h varasm.h.
* lower-subreg.c: Likewise.
* postreload-gcse.c: Likewise.
* ree.c: Likewise.
* reginfo.c: Likewise.
* store-motion.c: Likewise.
* combine.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h stmt.h varasm.h.
* emit-rtl.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h statistics.h stmt.h.
* dojump.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h statistics.h stmt.h varasm.h.
* except.c: Likewise.
* explow.c: Likewise.
* tree-dfa.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h rtl.h statistics.h stmt.h
varasm.h.
* gimple-fold.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h rtl.h statistics.h varasm.h.
* tree-ssa-structalias.c: Likewise.
* cfgexpand.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h statistics.h.
* calls.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h statistics.h stmt.h.
* bb-reorder.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h statistics.h stmt.h varasm.h.
* cfgbuild.c: Likewise.
* function.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h real.h rtl.h statistics.h stmt.h.
* cfgrtl.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h real.h rtl.h statistics.h stmt.h varasm.h.
* dbxout.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h real.h statistics.h stmt.h.
* auto-inc-dec.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h real.h statistics.h stmt.h varasm.h.
* cprop.c: Likewise.
* modulo-sched.c: Likewise.
* postreload.c: Likewise.
* ccmp.c: Include calls.h dojump.h emit-rtl.h explow.h fixed-value.h
flags.h function.h hard-reg-set.h hashtab.h insn-config.h real.h
statistics.h stmt.h varasm.h.
* gimple-ssa-strength-reduction.c: Include calls.h dojump.h emit-rtl.h
explow.h fixed-value.h flags.h hashtab.h insn-config.h real.h
rtl.h statistics.h stmt.h varasm.h.
* tree-ssa-loop-ivopts.c: Include calls.h dojump.h emit-rtl.h explow.h
fixed-value.h flags.h hashtab.h real.h rtl.h statistics.h stmt.h
varasm.h.
* expmed.c: Include calls.h dojump.h emit-rtl.h explow.h fixed-value.h
function.h hard-reg-set.h hashtab.h real.h statistics.h stmt.h
varasm.h.
* target-globals.c: Include calls.h dojump.h emit-rtl.h explow.h
fixed-value.h function.h hashtab.h real.h statistics.h stmt.h
varasm.h.
* tree-ssa-address.c: Include calls.h dojump.h emit-rtl.h explow.h
fixed-value.h hashtab.h real.h statistics.h stmt.h varasm.h.
* cfgcleanup.c: Include calls.h dojump.h explow.h expmed.h fixed-value.h
function.h real.h statistics.h stmt.h varasm.h.
* alias.c: Include calls.h dojump.h explow.h expmed.h fixed-value.h
insn-config.h real.h statistics.h stmt.h.
* dwarf2out.c: Include calls.h dojump.h explow.h expmed.h fixed-value.h
statistics.h stmt.h.
* config/nvptx/nvptx.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h hard-reg-set.h insn-config.h real.h
statistics.h stmt.h varasm.h.
* gimplify.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
flags.h hashtab.h insn-config.h real.h rtl.h statistics.h.
* asan.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
flags.h hashtab.h insn-config.h real.h rtl.h statistics.h stmt.h.
* ipa-devirt.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h hashtab.h insn-config.h real.h rtl.h
statistics.h stmt.h varasm.h.
* ipa-polymorphic-call.c: Likewise.
* config/aarch64/aarch64.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h flags.h hashtab.h insn-config.h real.h statistics.h
stmt.h.
* config/c6x/c6x.c: Likewise.
* config/aarch64/aarch64-builtins.c: Include dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h flags.h hashtab.h insn-config.h real.h
statistics.h stmt.h varasm.h.
* ipa-prop.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
hashtab.h insn-config.h real.h rtl.h statistics.h stmt.h varasm.h.
* ipa-split.c: Likewise.
* tree-eh.c: Likewise.
* tree-ssa-dce.c: Likewise.
* tree-ssa-loop-niter.c: Likewise.
* tree-vrp.c: Likewise.
* config/nds32/nds32-cost.c: Include dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h hashtab.h insn-config.h real.h statistics.h
stmt.h.
* config/nds32/nds32-fp-as-gp.c: Likewise.
* config/nds32/nds32-intrinsic.c: Likewise.
* config/nds32/nds32-isr.c: Likewise.
* config/nds32/nds32-md-auxiliary.c: Likewise.
* config/nds32/nds32-memory-manipulation.c: Likewise.
* config/nds32/nds32-pipelines-auxiliary.c: Likewise.
* config/nds32/nds32-predicates.c: Likewise.
* config/nds32/nds32.c: Likewise.
* config/cris/cris.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h statistics.h.
* config/alpha/alpha.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h statistics.h stmt.h.
* config/arm/arm.c: Likewise.
* config/avr/avr.c: Likewise.
* config/bfin/bfin.c: Likewise.
* config/h8300/h8300.c: Likewise.
* config/i386/i386.c: Likewise.
* config/ia64/ia64.c: Likewise.
* config/iq2000/iq2000.c: Likewise.
* config/m32c/m32c.c: Likewise.
* config/m32r/m32r.c: Likewise.
* config/m68k/m68k.c: Likewise.
* config/mcore/mcore.c: Likewise.
* config/mep/mep.c: Likewise.
* config/mips/mips.c: Likewise.
* config/mn10300/mn10300.c: Likewise.
* config/moxie/moxie.c: Likewise.
* config/pa/pa.c: Likewise.
* config/rl78/rl78.c: Likewise.
* config/rx/rx.c: Likewise.
* config/s390/s390.c: Likewise.
* config/sh/sh.c: Likewise.
* config/sparc/sparc.c: Likewise.
* config/spu/spu.c: Likewise.
* config/stormy16/stormy16.c: Likewise.
* config/v850/v850.c: Likewise.
* config/vax/vax.c: Likewise.
* config/cr16/cr16.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h hashtab.h real.h statistics.h stmt.h varasm.h.
* config/msp430/msp430.c: Likewise.
* predict.c: Likewise.
* value-prof.c: Likewise.
* config/epiphany/epiphany.c: Include dojump.h emit-rtl.h explow.h
expmed.h fixed-value.h hashtab.h statistics.h stmt.h.
* config/microblaze/microblaze.c: Likewise.
* config/nios2/nios2.c: Likewise.
* config/rs6000/rs6000.c: Likewise.
* tree.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
insn-config.h real.h rtl.h statistics.h stmt.h.
* cgraph.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
insn-config.h real.h statistics.h stmt.h.
* fold-const.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h insn-config.h real.h statistics.h stmt.h varasm.h.
* tree-inline.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h real.h rtl.h statistics.h stmt.h varasm.h.
* builtins.c: Include dojump.h emit-rtl.h explow.h expmed.h fixed-value.h
real.h statistics.h stmt.h.
* config/arc/arc.c: Include dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h statistics.h stmt.h.
* config/visium/visium.c: Include dojump.h emit-rtl.h explow.h expmed.h
stmt.h.
java/
* builtins.c: Include calls.h dojump.h emit-rtl.h explow.h expmed.h
fixed-value.h function.h hard-reg-set.h hashtab.h insn-config.h
real.h statistics.h stmt.h varasm.h.
From-SVN: r219655
2015-01-15 14:28:42 +01:00
|
|
|
|
#include "flags.h"
|
|
|
|
|
#include "dojump.h"
|
|
|
|
|
#include "explow.h"
|
|
|
|
|
#include "calls.h"
|
|
|
|
|
#include "varasm.h"
|
|
|
|
|
#include "stmt.h"
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#include "expr.h"
|
|
|
|
|
#include "tm-preds.h"
|
|
|
|
|
#include "tm-constrs.h"
|
|
|
|
|
#include "langhooks.h"
|
|
|
|
|
#include "dbxout.h"
|
|
|
|
|
#include "cfgrtl.h"
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
#include "gimple.h"
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#include "stor-layout.h"
|
|
|
|
|
#include "builtins.h"
|
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz>
* omp-general.h: New file.
* omp-general.c: New file.
* omp-expand.h: Likewise.
* omp-expand.c: Likewise.
* omp-offload.h: Likewise.
* omp-offload.c: Likewise.
* omp-grid.c: Likewise.
* omp-grid.c: Likewise.
* omp-low.h: Include omp-general.h and omp-grid.h. Removed includes
of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h,
ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h,
varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h,
tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h,
alloc-pool.h, cfghooks.h, rtl.h and memmodel.h.
(omp_find_combined_for): Declare.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.h.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.h.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.h.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.h.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.h.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.h.
(oacc_fn_attrib_kernels_p): Moved to omp-general.h.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(omp_expand_local): Moved to omp-expand.h.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.h.
(omp_finish_file): Moved to omp-offload.h.
(default_goacc_validate_dims): Renamed to
oacc_default_goacc_validate_dims and moved to omp-offload.h.
(offload_funcs, offload_vars): Moved to omp-offload.h.
* omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h.
(omp_region): Moved to omp-expand.c.
(omp_for_data_loop): Moved to omp-general.h.
(omp_for_data): Likewise.
(oacc_loop): Moved to omp-offload.c.
(oacc_loop_flags): Moved to omp-general.h.
(offload_funcs, offload_vars): Moved to omp-offload.c.
(root_omp_region): Moved to omp-expand.c.
(omp_any_child_fn_dumped): Likewise.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.c.
(is_combined_parallel): Moved to omp-expand.c.
(is_reference): Renamed to omp_is_reference and and moved to
omp-general.c.
(adjust_for_condition): Renamed to omp_adjust_for_condition and moved
to omp-general.c.
(get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr
and moved to omp-general.c.
(extract_omp_for_data): Renamed to omp_extract_for_data and moved to
omp-general.c.
(workshare_safe_to_combine_p): Moved to omp-expand.c.
(omp_adjust_chunk_size): Likewise.
(get_ws_args_for): Likewise.
(get_base_type): Removed.
(dump_omp_region): Moved to omp-expand.c.
(debug_omp_region): Likewise.
(debug_all_omp_regions): Likewise.
(new_omp_region): Likewise.
(free_omp_region_1): Likewise.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.c.
(find_combined_for): Renamed to omp_find_combined_for, made global.
(build_omp_barrier): Renamed to omp_build_barrier and moved to
omp-general.c.
(omp_max_vf): Moved to omp-general.c.
(omp_max_simt_vf): Likewise.
(gimple_build_cond_empty): Moved to omp-expand.c.
(parallel_needs_hsa_kernel_p): Likewise.
(expand_omp_build_assign): Moved declaration to omp-expand.c.
(expand_parallel_call): Moved to omp-expand.c.
(expand_cilk_for_call): Likewise.
(expand_task_call): Likewise.
(vec2chain): Likewise.
(remove_exit_barrier): Likewise.
(remove_exit_barriers): Likewise.
(optimize_omp_library_calls): Likewise.
(expand_omp_regimplify_p): Likewise.
(expand_omp_build_assign): Likewise.
(expand_omp_taskreg): Likewise.
(oacc_collapse): Likewise.
(expand_oacc_collapse_init): Likewise.
(expand_oacc_collapse_vars): Likewise.
(expand_omp_for_init_counts): Likewise.
(expand_omp_for_init_vars): Likewise.
(extract_omp_for_update_vars): Likewise.
(expand_omp_ordered_source): Likewise.
(expand_omp_ordered_sink): Likewise.
(expand_omp_ordered_source_sink): Likewise.
(expand_omp_for_ordered_loops): Likewise.
(expand_omp_for_generic): Likewise.
(expand_omp_for_static_nochunk): Likewise.
(find_phi_with_arg_on_edge): Likewise.
(expand_omp_for_static_chunk): Likewise.
(expand_cilk_for): Likewise.
(expand_omp_simd): Likewise.
(expand_omp_taskloop_for_outer): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
(expand_omp_for): Likewise.
(expand_omp_sections): Likewise.
(expand_omp_single): Likewise.
(expand_omp_synch): Likewise.
(expand_omp_atomic_load): Likewise.
(expand_omp_atomic_store): Likewise.
(expand_omp_atomic_fetch_op): Likewise.
(expand_omp_atomic_pipeline): Likewise.
(expand_omp_atomic_mutex): Likewise.
(expand_omp_atomic): Likewise.
(oacc_launch_pack): and moved to omp-general.c, made public.
(OACC_FN_ATTRIB): Likewise.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.c.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.c.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.c.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.c.
(oacc_fn_attrib_kernels_p): Moved to omp-general.c.
(oacc_fn_attrib_level): Moved to omp-offload.c.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to
omp-general.c.
(mark_loops_in_oacc_kernels_region): Moved to omp-expand.c.
(grid_launch_attributes_trees): Likewise.
(grid_attr_trees): Likewise.
(grid_create_kernel_launch_attr_types): Likewise.
(grid_insert_store_range_dim): Likewise.
(grid_get_kernel_launch_attributes): Likewise.
(get_target_argument_identifier_1): Likewise.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(push_target_argument_according_to_value): Likewise.
(get_target_arguments): Likewise.
(expand_omp_target): Likewise.
(grid_expand_omp_for_loop): Moved to omp-grid.c.
(grid_arg_decl_map): Likewise.
(grid_remap_kernel_arg_accesses): Likewise.
(grid_expand_target_grid_body): Likewise.
(expand_omp): Renamed to omp_expand and moved to omp-expand.c.
(build_omp_regions_1): Moved to omp-expand.c.
(build_omp_regions_root): Likewise.
(omp_expand_local): Likewise.
(build_omp_regions): Likewise.
(execute_expand_omp): Likewise.
(pass_data_expand_omp): Likewise.
(pass_expand_omp): Likewise.
(make_pass_expand_omp): Likewise.
(pass_data_expand_omp_ssa): Likewise.
(pass_expand_omp_ssa): Likewise.
(make_pass_expand_omp_ssa): Likewise.
(grid_lastprivate_predicate): Renamed to
omp_grid_lastprivate_predicate and moved to omp-grid.c, made public.
(grid_prop): Moved to omp-grid.c.
(GRID_MISSED_MSG_PREFIX): Likewise.
(grid_safe_assignment_p): Likewise.
(grid_seq_only_contains_local_assignments): Likewise.
(grid_find_single_omp_among_assignments_1): Likewise.
(grid_find_single_omp_among_assignments): Likewise.
(grid_find_ungridifiable_statement): Likewise.
(grid_parallel_clauses_gridifiable): Likewise.
(grid_inner_loop_gridifiable_p): Likewise.
(grid_dist_follows_simple_pattern): Likewise.
(grid_gfor_follows_tiling_pattern): Likewise.
(grid_call_permissible_in_distribute_p): Likewise.
(grid_handle_call_in_distribute): Likewise.
(grid_dist_follows_tiling_pattern): Likewise.
(grid_target_follows_gridifiable_pattern): Likewise.
(grid_remap_prebody_decls): Likewise.
(grid_var_segment): Likewise.
(grid_mark_variable_segment): Likewise.
(grid_copy_leading_local_assignments): Likewise.
(grid_process_grid_body): Likewise.
(grid_eliminate_combined_simd_part): Likewise.
(grid_mark_tiling_loops): Likewise.
(grid_mark_tiling_parallels_and_loops): Likewise.
(grid_process_kernel_body_copy): Likewise.
(grid_attempt_target_gridification): Likewise.
(grid_gridify_all_targets_stmt): Likewise.
(grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets
and moved to omp-grid.c, made public.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.c.
(add_decls_addresses_to_decl_constructor): Moved to omp-offload.c.
(omp_finish_file): Likewise.
(oacc_thread_numbers): Likewise.
(oacc_xform_loop): Likewise.
(oacc_default_dims, oacc_min_dims): Likewise.
(oacc_parse_default_dims): Likewise.
(oacc_validate_dims): Likewise.
(new_oacc_loop_raw): Likewise.
(new_oacc_loop_outer): Likewise.
(new_oacc_loop): Likewise.
(new_oacc_loop_routine): Likewise.
(finish_oacc_loop): Likewise.
(free_oacc_loop): Likewise.
(dump_oacc_loop_part): Likewise.
(dump_oacc_loop): Likewise.
(debug_oacc_loop): Likewise.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_sibling_nreverse): Likewise.
(oacc_loop_discovery): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(oacc_loop_xform_loop): Likewise.
(oacc_loop_process): Likewise.
(oacc_loop_fixed_partitions): Likewise.
(oacc_loop_auto_partitions): Likewise.
(oacc_loop_partition): Likewise.
(default_goacc_fork_join): Likewise.
(default_goacc_reduction): Likewise.
(execute_oacc_device_lower): Likewise.
(default_goacc_validate_dims): Likewise.
(default_goacc_dim_limit): Likewise.
(pass_data_oacc_device_lower): Likewise.
(pass_oacc_device_lower): Likewise.
(make_pass_oacc_device_lower): Likewise.
(execute_omp_device_lower): Likewise.
(pass_data_omp_device_lower): Likewise.
(pass_omp_device_lower): Likewise.
(make_pass_omp_device_lower): Likewise.
(pass_data_omp_target_link): Likewise.
(pass_omp_target_link): Likewise.
(find_link_var_op): Likewise.
(pass_omp_target_link::execute): Likewise.
(make_pass_omp_target_link): Likewise.
* Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o
and omp-grid.o.
(GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed
omp-low.h.
* gimple-fold.c: Include omp-general.h instead of omp-low.h.
(fold_internal_goacc_dim): Adjusted calls to
get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names.
* gimplify.c: Include omp-low.h.
(omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use
its new name.
(gimplify_omp_task): Adjusted calls to find_omp_clause to use its new
name.
(gimplify_omp_for): Likewise.
* lto-cgraph.c: Include omp-offload.h instead of omp-low.h.
* toplev.c: Include omp-offload.h instead of omp-low.h.
* tree-cfg.c: Include omp-general.h instead of omp-low.h. Also
include omp-expand.h.
(make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its
new name.
(make_edges): Adjust the call to free_omp_regions to use its new name.
* tree-parloops.c: Include omp-general.h.
(create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use
its new name.
(parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-ssa-loop.c: Include omp-general.h instead of omp-low.h.
(gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-vrp.c: Include omp-general.h instead of omp-low.h.
(extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and
get_oacc_fn_dim_size to use their new names.
* varpool.c: Include omp-offload.h instead of omp-low.h.
* gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in
ifiles.
* config/nvptx/nvptx.c: Include omp-general.c.
(nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use
its new name.
(nvptx_reorg): Likewise.
(nvptx_record_offload_symbol): Likewise.
gcc/c-family:
* c-omp.c: Include omp-general.h instead of omp-low.h.
(c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new
name.
gcc/c/
* c-parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(c_finish_oacc_routine): Adjusted call to
get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib
to use their new names.
(c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to
use its new name.
(c_parser_oacc_update): Likewise.
(c_parser_omp_simd): Likewise.
(c_parser_omp_target_update): Likewise.
* c-typeck.c: Include omp-general.h instead of omp-low.h.
(c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new
name.
(c_finish_omp_cancellation_point): Likewise.
* gimple-parser.c: Do not include omp-low.h
gcc/cp/
* parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new
name.
(cp_parser_omp_target_update): Likewise.
(cp_parser_oacc_declare): Likewise.
(cp_parser_oacc_enter_exit_data): Likewise.
(cp_parser_oacc_update): Likewise.
(cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib,
build_oacc_routine_dims and replace_oacc_fn_attrib to use their new
names.
* semantics.c: Include omp-general insteda of omp-low.h.
(finish_omp_for): Adjusted calls to find_omp_clause to use its new
name.
(finish_omp_cancel): Likewise.
(finish_omp_cancellation_point): Likewise.
fortran/
* trans-openmp.c: Include omp-general.h.
From-SVN: r243673
2016-12-14 23:30:41 +01:00
|
|
|
|
#include "omp-general.h"
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
#include "omp-low.h"
|
|
|
|
|
#include "gomp-constants.h"
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
#include "dumpfile.h"
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
#include "internal-fn.h"
|
|
|
|
|
#include "gimple-iterator.h"
|
|
|
|
|
#include "stringpool.h"
|
2016-08-29 14:49:10 +02:00
|
|
|
|
#include "tree-vrp.h"
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
#include "tree-ssa-operands.h"
|
|
|
|
|
#include "tree-ssanames.h"
|
|
|
|
|
#include "gimplify.h"
|
|
|
|
|
#include "tree-phinodes.h"
|
|
|
|
|
#include "cfgloop.h"
|
|
|
|
|
#include "fold-const.h"
|
2017-03-01 11:25:49 +01:00
|
|
|
|
#include "intl.h"
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
aarch64.c, [...]: Add comment above target-def.h include.
gcc/
* config/aarch64/aarch64.c, config/alpha/alpha.c,
config/arm/arm.c, config/avr/avr.c, config/bfin/bfin.c,
config/c6x/c6x.c, config/cr16/cr16.c, config/cris/cris.c,
config/fr30/fr30.c, config/frv/frv.c, config/h8300/h8300.c,
config/i386/i386.c, config/ia64/ia64.c, config/iq2000/iq2000.c,
config/lm32/lm32.c, config/m32c/m32c.c, config/m32r/m32r.c,
config/m68k/m68k.c, config/mcore/mcore.c, config/mep/mep.c,
config/microblaze/microblaze.c, config/mips/mips.c,
config/mmix/mmix.c, config/mn10300/mn10300.c,
config/moxie/moxie.c, config/msp430/msp430.c,
config/nds32/nds32.c, config/nios2/nios2.c, config/nvptx/nvptx.c,
config/pa/pa.c, config/pdp11/pdp11.c, config/rl78/rl78.c,
config/rs6000/rs6000.c, config/rx/rx.c, config/s390/s390.c,
config/sh/sh.c, config/sparc/sparc.c, config/spu/spu.c,
config/stormy16/stormy16.c, config/tilegx/tilegx.c,
config/tilepro/tilepro.c, config/v850/v850.c, config/vax/vax.c,
config/visium/visium.c, config/xtensa/xtensa.c: Add comment above
target-def.h include.
* config/ft32/ft32.c: Likewise. Fix misapplied hunk.
From-SVN: r224980
2015-06-25 19:50:01 +02:00
|
|
|
|
/* This file should be included last. */
|
aarch64.c, [...]: Move target-def.h includes to end.
gcc/
* config/aarch64/aarch64.c, config/alpha/alpha.c, config/arm/arm.c,
config/avr/avr.c, config/bfin/bfin.c, config/c6x/c6x.c,
config/cr16/cr16.c, config/cris/cris.c, config/fr30/fr30.c,
config/frv/frv.c, config/ft32/ft32.c, config/h8300/h8300.c,
config/i386/i386.c, config/ia64/ia64.c, config/iq2000/iq2000.c,
config/lm32/lm32.c, config/m32c/m32c.c, config/m32r/m32r.c,
config/m68k/m68k.c, config/mcore/mcore.c, config/mep/mep.c,
config/microblaze/microblaze.c, config/mips/mips.c, config/mmix/mmix.c,
config/mn10300/mn10300.c, config/moxie/moxie.c, config/msp430/msp430.c,
config/nds32/nds32.c, config/nios2/nios2.c, config/nvptx/nvptx.c,
config/pa/pa.c, config/pdp11/pdp11.c, config/rl78/rl78.c,
config/rs6000/rs6000.c, config/rx/rx.c, config/s390/s390.c,
config/sh/sh.c, config/sparc/sparc.c, config/spu/spu.c,
config/stormy16/stormy16.c, config/tilegx/tilegx.c,
config/tilepro/tilepro.c, config/v850/v850.c, config/vax/vax.c,
config/visium/visium.c, config/xtensa/xtensa.c: Move target-def.h
includes to end.
From-SVN: r224978
2015-06-25 19:18:00 +02:00
|
|
|
|
#include "target-def.h"
|
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
/* The various PTX memory areas an object might reside in. */
|
|
|
|
|
enum nvptx_data_area
|
|
|
|
|
{
|
|
|
|
|
DATA_AREA_GENERIC,
|
|
|
|
|
DATA_AREA_GLOBAL,
|
|
|
|
|
DATA_AREA_SHARED,
|
|
|
|
|
DATA_AREA_LOCAL,
|
|
|
|
|
DATA_AREA_CONST,
|
|
|
|
|
DATA_AREA_PARAM,
|
|
|
|
|
DATA_AREA_MAX
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* We record the data area in the target symbol flags. */
|
|
|
|
|
#define SYMBOL_DATA_AREA(SYM) \
|
|
|
|
|
(nvptx_data_area)((SYMBOL_REF_FLAGS (SYM) >> SYMBOL_FLAG_MACH_DEP_SHIFT) \
|
|
|
|
|
& 7)
|
|
|
|
|
#define SET_SYMBOL_DATA_AREA(SYM,AREA) \
|
|
|
|
|
(SYMBOL_REF_FLAGS (SYM) |= (AREA) << SYMBOL_FLAG_MACH_DEP_SHIFT)
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Record the function decls we've written, and the libfuncs and function
|
|
|
|
|
decls corresponding to them. */
|
|
|
|
|
static std::stringstream func_decls;
|
2014-12-09 03:30:06 +01:00
|
|
|
|
|
2015-06-25 19:06:44 +02:00
|
|
|
|
struct declared_libfunc_hasher : ggc_cache_ptr_hash<rtx_def>
|
2014-12-09 03:30:06 +01:00
|
|
|
|
{
|
|
|
|
|
static hashval_t hash (rtx x) { return htab_hash_pointer (x); }
|
|
|
|
|
static bool equal (rtx a, rtx b) { return a == b; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static GTY((cache))
|
|
|
|
|
hash_table<declared_libfunc_hasher> *declared_libfuncs_htab;
|
|
|
|
|
|
2015-06-25 19:06:44 +02:00
|
|
|
|
struct tree_hasher : ggc_cache_ptr_hash<tree_node>
|
2014-12-09 03:30:06 +01:00
|
|
|
|
{
|
|
|
|
|
static hashval_t hash (tree t) { return htab_hash_pointer (t); }
|
|
|
|
|
static bool equal (tree a, tree b) { return a == b; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static GTY((cache)) hash_table<tree_hasher> *declared_fndecls_htab;
|
|
|
|
|
static GTY((cache)) hash_table<tree_hasher> *needed_fndecls_htab;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
/* Buffer needed to broadcast across workers. This is used for both
|
|
|
|
|
worker-neutering and worker broadcasting. It is shared by all
|
|
|
|
|
functions emitted. The buffer is placed in shared memory. It'd be
|
|
|
|
|
nice if PTX supported common blocks, because then this could be
|
|
|
|
|
shared across TUs (taking the largest size). */
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
static unsigned worker_bcast_size;
|
|
|
|
|
static unsigned worker_bcast_align;
|
|
|
|
|
static GTY(()) rtx worker_bcast_sym;
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
/* Buffer needed for worker reductions. This has to be distinct from
|
|
|
|
|
the worker broadcast array, as both may be live concurrently. */
|
|
|
|
|
static unsigned worker_red_size;
|
|
|
|
|
static unsigned worker_red_align;
|
|
|
|
|
static GTY(()) rtx worker_red_sym;
|
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
/* Global lock variable, needed for 128bit worker & gang reductions. */
|
|
|
|
|
static GTY(()) tree global_lock_var;
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* True if any function references __nvptx_stacks. */
|
|
|
|
|
static bool need_softstack_decl;
|
|
|
|
|
|
|
|
|
|
/* True if any function references __nvptx_uni. */
|
|
|
|
|
static bool need_unisimt_decl;
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Allocate a new, cleared machine_function structure. */
|
|
|
|
|
|
|
|
|
|
static struct machine_function *
|
|
|
|
|
nvptx_init_machine_status (void)
|
|
|
|
|
{
|
|
|
|
|
struct machine_function *p = ggc_cleared_alloc<machine_function> ();
|
2015-12-16 14:51:36 +01:00
|
|
|
|
p->return_mode = VOIDmode;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Issue a diagnostic when option OPTNAME is enabled (as indicated by OPTVAL)
|
|
|
|
|
and -fopenacc is also enabled. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
diagnose_openacc_conflict (bool optval, const char *optname)
|
|
|
|
|
{
|
|
|
|
|
if (flag_openacc && optval)
|
|
|
|
|
error ("option %s is not supported together with -fopenacc", optname);
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Implement TARGET_OPTION_OVERRIDE. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_option_override (void)
|
|
|
|
|
{
|
|
|
|
|
init_machine_status = nvptx_init_machine_status;
|
2016-05-20 21:52:50 +02:00
|
|
|
|
|
2016-07-25 14:37:29 +02:00
|
|
|
|
/* Set toplevel_reorder, unless explicitly disabled. We need
|
|
|
|
|
reordering so that we emit necessary assembler decls of
|
|
|
|
|
undeclared variables. */
|
|
|
|
|
if (!global_options_set.x_flag_toplevel_reorder)
|
|
|
|
|
flag_toplevel_reorder = 1;
|
|
|
|
|
|
2016-05-20 21:52:50 +02:00
|
|
|
|
/* Set flag_no_common, unless explicitly disabled. We fake common
|
|
|
|
|
using .weak, and that's not entirely accurate, so avoid it
|
|
|
|
|
unless forced. */
|
|
|
|
|
if (!global_options_set.x_flag_no_common)
|
|
|
|
|
flag_no_common = 1;
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Assumes that it will see only hard registers. */
|
|
|
|
|
flag_var_tracking = 0;
|
2015-12-18 18:43:11 +01:00
|
|
|
|
|
2015-11-10 23:29:20 +01:00
|
|
|
|
if (nvptx_optimize < 0)
|
|
|
|
|
nvptx_optimize = optimize > 0;
|
|
|
|
|
|
2014-12-09 03:30:06 +01:00
|
|
|
|
declared_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
|
|
|
|
|
needed_fndecls_htab = hash_table<tree_hasher>::create_ggc (17);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
declared_libfuncs_htab
|
2014-12-09 03:30:06 +01:00
|
|
|
|
= hash_table<declared_libfunc_hasher>::create_ggc (17);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
2015-12-18 15:39:52 +01:00
|
|
|
|
worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_bcast");
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
SET_SYMBOL_DATA_AREA (worker_bcast_sym, DATA_AREA_SHARED);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-12-18 15:39:52 +01:00
|
|
|
|
worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_red");
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
|
2016-11-16 18:17:00 +01:00
|
|
|
|
|
|
|
|
|
diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
|
|
|
|
|
diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
|
|
|
|
|
diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
|
|
|
|
|
|
|
|
|
|
if (TARGET_GOMP)
|
|
|
|
|
target_flags |= MASK_SOFT_STACK | MASK_UNIFORM_SIMT;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
|
|
|
|
|
deal with ptx ideosyncracies. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_ptx_type_from_mode (machine_mode mode, bool promote)
|
|
|
|
|
{
|
|
|
|
|
switch (mode)
|
|
|
|
|
{
|
|
|
|
|
case BLKmode:
|
|
|
|
|
return ".b8";
|
|
|
|
|
case BImode:
|
|
|
|
|
return ".pred";
|
|
|
|
|
case QImode:
|
|
|
|
|
if (promote)
|
|
|
|
|
return ".u32";
|
|
|
|
|
else
|
|
|
|
|
return ".u8";
|
|
|
|
|
case HImode:
|
|
|
|
|
return ".u16";
|
|
|
|
|
case SImode:
|
|
|
|
|
return ".u32";
|
|
|
|
|
case DImode:
|
|
|
|
|
return ".u64";
|
|
|
|
|
|
|
|
|
|
case SFmode:
|
|
|
|
|
return ".f32";
|
|
|
|
|
case DFmode:
|
|
|
|
|
return ".f64";
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
/* Encode the PTX data area that DECL (which might not actually be a
|
|
|
|
|
_DECL) should reside in. */
|
2015-11-27 15:22:26 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
static void
|
|
|
|
|
nvptx_encode_section_info (tree decl, rtx rtl, int first)
|
2015-11-27 15:22:26 +01:00
|
|
|
|
{
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
default_encode_section_info (decl, rtl, first);
|
|
|
|
|
if (first && MEM_P (rtl))
|
|
|
|
|
{
|
|
|
|
|
nvptx_data_area area = DATA_AREA_GENERIC;
|
2015-11-27 15:22:26 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
if (TREE_CONSTANT (decl))
|
|
|
|
|
area = DATA_AREA_CONST;
|
|
|
|
|
else if (TREE_CODE (decl) == VAR_DECL)
|
2016-11-16 18:17:00 +01:00
|
|
|
|
{
|
|
|
|
|
if (lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
|
|
|
|
|
{
|
|
|
|
|
area = DATA_AREA_SHARED;
|
|
|
|
|
if (DECL_INITIAL (decl))
|
|
|
|
|
error ("static initialization of variable %q+D in %<.shared%>"
|
|
|
|
|
" memory is not supported", decl);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL;
|
|
|
|
|
}
|
2015-11-27 15:22:26 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
SET_SYMBOL_DATA_AREA (XEXP (rtl, 0), area);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return the PTX name of the data area in which SYM should be
|
|
|
|
|
placed. The symbol must have already been processed by
|
|
|
|
|
nvptx_encode_seciton_info, or equivalent. */
|
|
|
|
|
|
|
|
|
|
static const char *
|
|
|
|
|
section_for_sym (rtx sym)
|
|
|
|
|
{
|
|
|
|
|
nvptx_data_area area = SYMBOL_DATA_AREA (sym);
|
|
|
|
|
/* Same order as nvptx_data_area enum. */
|
|
|
|
|
static char const *const areas[] =
|
|
|
|
|
{"", ".global", ".shared", ".local", ".const", ".param"};
|
|
|
|
|
|
|
|
|
|
return areas[area];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Similarly for a decl. */
|
|
|
|
|
|
|
|
|
|
static const char *
|
|
|
|
|
section_for_decl (const_tree decl)
|
|
|
|
|
{
|
|
|
|
|
return section_for_sym (XEXP (DECL_RTL (CONST_CAST (tree, decl)), 0));
|
2015-11-27 15:22:26 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-05-16 15:16:28 +02:00
|
|
|
|
/* Check NAME for special function names and redirect them by returning a
|
|
|
|
|
replacement. This applies to malloc, free and realloc, for which we
|
|
|
|
|
want to use libgcc wrappers, and call, which triggers a bug in
|
|
|
|
|
ptxas. We can't use TARGET_MANGLE_DECL_ASSEMBLER_NAME, as that's
|
|
|
|
|
not active in an offload compiler -- the names are all set by the
|
|
|
|
|
host-side compiler. */
|
|
|
|
|
|
|
|
|
|
static const char *
|
|
|
|
|
nvptx_name_replacement (const char *name)
|
|
|
|
|
{
|
|
|
|
|
if (strcmp (name, "call") == 0)
|
|
|
|
|
return "__nvptx_call";
|
|
|
|
|
if (strcmp (name, "malloc") == 0)
|
|
|
|
|
return "__nvptx_malloc";
|
|
|
|
|
if (strcmp (name, "free") == 0)
|
|
|
|
|
return "__nvptx_free";
|
|
|
|
|
if (strcmp (name, "realloc") == 0)
|
|
|
|
|
return "__nvptx_realloc";
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-24 21:15:44 +01:00
|
|
|
|
/* If MODE should be treated as two registers of an inner mode, return
|
|
|
|
|
that inner mode. Otherwise return VOIDmode. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-24 21:15:44 +01:00
|
|
|
|
static machine_mode
|
|
|
|
|
maybe_split_mode (machine_mode mode)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
|
|
|
|
if (COMPLEX_MODE_P (mode))
|
2015-11-24 21:15:44 +01:00
|
|
|
|
return GET_MODE_INNER (mode);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
if (mode == TImode)
|
2015-11-24 21:15:44 +01:00
|
|
|
|
return DImode;
|
|
|
|
|
|
|
|
|
|
return VOIDmode;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-02 15:57:54 +01:00
|
|
|
|
/* Output a register, subreg, or register pair (with optional
|
|
|
|
|
enclosing braces). */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
output_reg (FILE *file, unsigned regno, machine_mode inner_mode,
|
|
|
|
|
int subreg_offset = -1)
|
|
|
|
|
{
|
|
|
|
|
if (inner_mode == VOIDmode)
|
|
|
|
|
{
|
|
|
|
|
if (HARD_REGISTER_NUM_P (regno))
|
|
|
|
|
fprintf (file, "%s", reg_names[regno]);
|
|
|
|
|
else
|
|
|
|
|
fprintf (file, "%%r%d", regno);
|
|
|
|
|
}
|
|
|
|
|
else if (subreg_offset >= 0)
|
|
|
|
|
{
|
|
|
|
|
output_reg (file, regno, VOIDmode);
|
|
|
|
|
fprintf (file, "$%d", subreg_offset);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (subreg_offset == -1)
|
|
|
|
|
fprintf (file, "{");
|
|
|
|
|
output_reg (file, regno, inner_mode, GET_MODE_SIZE (inner_mode));
|
|
|
|
|
fprintf (file, ",");
|
|
|
|
|
output_reg (file, regno, inner_mode, 0);
|
|
|
|
|
if (subreg_offset == -1)
|
|
|
|
|
fprintf (file, "}");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Emit forking instructions for MASK. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_emit_forking (unsigned mask, bool is_call)
|
|
|
|
|
{
|
|
|
|
|
mask &= (GOMP_DIM_MASK (GOMP_DIM_WORKER)
|
|
|
|
|
| GOMP_DIM_MASK (GOMP_DIM_VECTOR));
|
|
|
|
|
if (mask)
|
|
|
|
|
{
|
|
|
|
|
rtx op = GEN_INT (mask | (is_call << GOMP_DIM_MAX));
|
|
|
|
|
|
|
|
|
|
/* Emit fork at all levels. This helps form SESE regions, as
|
|
|
|
|
it creates a block with a single successor before entering a
|
|
|
|
|
partitooned region. That is a good candidate for the end of
|
|
|
|
|
an SESE region. */
|
|
|
|
|
if (!is_call)
|
|
|
|
|
emit_insn (gen_nvptx_fork (op));
|
|
|
|
|
emit_insn (gen_nvptx_forked (op));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit joining instructions for MASK. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_emit_joining (unsigned mask, bool is_call)
|
|
|
|
|
{
|
|
|
|
|
mask &= (GOMP_DIM_MASK (GOMP_DIM_WORKER)
|
|
|
|
|
| GOMP_DIM_MASK (GOMP_DIM_VECTOR));
|
|
|
|
|
if (mask)
|
|
|
|
|
{
|
|
|
|
|
rtx op = GEN_INT (mask | (is_call << GOMP_DIM_MAX));
|
|
|
|
|
|
|
|
|
|
/* Emit joining for all non-call pars to ensure there's a single
|
|
|
|
|
predecessor for the block the join insn ends up in. This is
|
|
|
|
|
needed for skipping entire loops. */
|
|
|
|
|
if (!is_call)
|
|
|
|
|
emit_insn (gen_nvptx_joining (op));
|
|
|
|
|
emit_insn (gen_nvptx_join (op));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
/* Determine whether MODE and TYPE (possibly NULL) should be passed or
|
|
|
|
|
returned in memory. Integer and floating types supported by the
|
|
|
|
|
machine are passed in registers, everything else is passed in
|
|
|
|
|
memory. Complex types are split. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
pass_in_memory (machine_mode mode, const_tree type, bool for_return)
|
|
|
|
|
{
|
|
|
|
|
if (type)
|
|
|
|
|
{
|
|
|
|
|
if (AGGREGATE_TYPE_P (type))
|
|
|
|
|
return true;
|
|
|
|
|
if (TREE_CODE (type) == VECTOR_TYPE)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!for_return && COMPLEX_MODE_P (mode))
|
|
|
|
|
/* Complex types are passed as two underlying args. */
|
|
|
|
|
mode = GET_MODE_INNER (mode);
|
|
|
|
|
|
|
|
|
|
if (GET_MODE_CLASS (mode) != MODE_INT
|
|
|
|
|
&& GET_MODE_CLASS (mode) != MODE_FLOAT)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* A non-memory argument of mode MODE is being passed, determine the mode it
|
|
|
|
|
should be promoted to. This is also used for determining return
|
|
|
|
|
type promotion. */
|
|
|
|
|
|
|
|
|
|
static machine_mode
|
|
|
|
|
promote_arg (machine_mode mode, bool prototyped)
|
|
|
|
|
{
|
|
|
|
|
if (!prototyped && mode == SFmode)
|
|
|
|
|
/* K&R float promotion for unprototyped functions. */
|
|
|
|
|
mode = DFmode;
|
|
|
|
|
else if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
|
|
|
|
|
mode = SImode;
|
|
|
|
|
|
|
|
|
|
return mode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* A non-memory return type of MODE is being returned. Determine the
|
|
|
|
|
mode it should be promoted to. */
|
|
|
|
|
|
|
|
|
|
static machine_mode
|
|
|
|
|
promote_return (machine_mode mode)
|
|
|
|
|
{
|
|
|
|
|
return promote_arg (mode, true);
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
/* Implement TARGET_FUNCTION_ARG. */
|
2015-12-04 15:02:27 +01:00
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
static rtx
|
2015-12-11 19:06:37 +01:00
|
|
|
|
nvptx_function_arg (cumulative_args_t ARG_UNUSED (cum_v), machine_mode mode,
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
const_tree, bool named)
|
|
|
|
|
{
|
2015-12-11 19:06:37 +01:00
|
|
|
|
if (mode == VOIDmode || !named)
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
return NULL_RTX;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-11 19:06:37 +01:00
|
|
|
|
return gen_reg_rtx (mode);
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_FUNCTION_INCOMING_ARG. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_function_incoming_arg (cumulative_args_t cum_v, machine_mode mode,
|
|
|
|
|
const_tree, bool named)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
|
2015-12-11 19:06:37 +01:00
|
|
|
|
if (mode == VOIDmode || !named)
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
return NULL_RTX;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
/* No need to deal with split modes here, the only case that can
|
|
|
|
|
happen is complex modes and those are dealt with by
|
|
|
|
|
TARGET_SPLIT_COMPLEX_ARG. */
|
|
|
|
|
return gen_rtx_UNSPEC (mode,
|
|
|
|
|
gen_rtvec (1, GEN_INT (cum->count)),
|
|
|
|
|
UNSPEC_ARG_REG);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_FUNCTION_ARG_ADVANCE. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_function_arg_advance (cumulative_args_t cum_v,
|
|
|
|
|
machine_mode ARG_UNUSED (mode),
|
|
|
|
|
const_tree ARG_UNUSED (type),
|
|
|
|
|
bool ARG_UNUSED (named))
|
|
|
|
|
{
|
|
|
|
|
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
2015-12-11 19:06:37 +01:00
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
cum->count++;
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-17 23:16:49 +02:00
|
|
|
|
/* Implement TARGET_FUNCTION_ARG_BOUNDARY.
|
|
|
|
|
|
|
|
|
|
For nvptx This is only used for varadic args. The type has already
|
|
|
|
|
been promoted and/or converted to invisible reference. */
|
|
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
|
nvptx_function_arg_boundary (machine_mode mode, const_tree ARG_UNUSED (type))
|
|
|
|
|
{
|
|
|
|
|
return GET_MODE_ALIGNMENT (mode);
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
|
|
|
|
|
|
|
|
|
|
For nvptx, we know how to handle functions declared as stdarg: by
|
|
|
|
|
passing an extra pointer to the unnamed arguments. However, the
|
|
|
|
|
Fortran frontend can produce a different situation, where a
|
|
|
|
|
function pointer is declared with no arguments, but the actual
|
|
|
|
|
function and calls to it take more arguments. In that case, we
|
|
|
|
|
want to ensure the call matches the definition of the function. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_strict_argument_naming (cumulative_args_t cum_v)
|
|
|
|
|
{
|
|
|
|
|
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
2015-12-11 19:06:37 +01:00
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_LIBCALL_VALUE. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_libcall_value (machine_mode mode, const_rtx)
|
|
|
|
|
{
|
2016-05-02 13:25:17 +02:00
|
|
|
|
if (!cfun || !cfun->machine->doing_call)
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
/* Pretend to return in a hard reg for early uses before pseudos can be
|
|
|
|
|
generated. */
|
|
|
|
|
return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
|
2015-12-11 19:06:37 +01:00
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
return gen_reg_rtx (mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
|
|
|
|
|
where function FUNC returns or receives a value of data type TYPE. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
2015-12-14 20:26:49 +01:00
|
|
|
|
nvptx_function_value (const_tree type, const_tree ARG_UNUSED (func),
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
bool outgoing)
|
|
|
|
|
{
|
2015-12-14 20:26:49 +01:00
|
|
|
|
machine_mode mode = promote_return (TYPE_MODE (type));
|
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
if (outgoing)
|
2015-12-15 14:32:48 +01:00
|
|
|
|
{
|
2016-05-02 13:25:17 +02:00
|
|
|
|
gcc_assert (cfun);
|
2015-12-16 14:51:36 +01:00
|
|
|
|
cfun->machine->return_mode = mode;
|
2015-12-15 14:32:48 +01:00
|
|
|
|
return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM);
|
|
|
|
|
}
|
2015-12-11 19:06:37 +01:00
|
|
|
|
|
|
|
|
|
return nvptx_libcall_value (mode, NULL_RTX);
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_function_value_regno_p (const unsigned int regno)
|
|
|
|
|
{
|
|
|
|
|
return regno == NVPTX_RETURN_REGNUM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Types with a mode other than those supported by the machine are passed by
|
|
|
|
|
reference in memory. */
|
|
|
|
|
|
|
|
|
|
static bool
|
2015-12-16 21:02:02 +01:00
|
|
|
|
nvptx_pass_by_reference (cumulative_args_t ARG_UNUSED (cum),
|
|
|
|
|
machine_mode mode, const_tree type,
|
|
|
|
|
bool ARG_UNUSED (named))
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
{
|
2015-12-14 20:26:49 +01:00
|
|
|
|
return pass_in_memory (mode, type, false);
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_RETURN_IN_MEMORY. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_return_in_memory (const_tree type, const_tree)
|
|
|
|
|
{
|
2015-12-14 20:26:49 +01:00
|
|
|
|
return pass_in_memory (TYPE_MODE (type), type, true);
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_PROMOTE_FUNCTION_MODE. */
|
|
|
|
|
|
|
|
|
|
static machine_mode
|
|
|
|
|
nvptx_promote_function_mode (const_tree type, machine_mode mode,
|
2015-12-14 20:26:49 +01:00
|
|
|
|
int *ARG_UNUSED (punsignedp),
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
const_tree funtype, int for_return)
|
|
|
|
|
{
|
2015-12-14 20:26:49 +01:00
|
|
|
|
return promote_arg (mode, for_return || !type || TYPE_ARG_TYPES (funtype));
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Helper for write_arg. Emit a single PTX argument of MODE, either
|
|
|
|
|
in a prototype, or as copy in a function prologue. ARGNO is the
|
|
|
|
|
index of this argument in the PTX function. FOR_REG is negative,
|
|
|
|
|
if we're emitting the PTX prototype. It is zero if we're copying
|
|
|
|
|
to an argument register and it is greater than zero if we're
|
|
|
|
|
copying to a specific hard register. */
|
|
|
|
|
|
|
|
|
|
static int
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_arg_mode (std::stringstream &s, int for_reg, int argno,
|
|
|
|
|
machine_mode mode)
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
{
|
|
|
|
|
const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
|
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
if (for_reg < 0)
|
|
|
|
|
{
|
|
|
|
|
/* Writing PTX prototype. */
|
|
|
|
|
s << (argno ? ", " : " (");
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
s << ".param" << ptx_type << " %in_ar" << argno;
|
2015-12-04 15:02:27 +01:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
s << "\t.reg" << ptx_type << " ";
|
2015-12-04 15:02:27 +01:00
|
|
|
|
if (for_reg)
|
|
|
|
|
s << reg_names[for_reg];
|
|
|
|
|
else
|
|
|
|
|
s << "%ar" << argno;
|
|
|
|
|
s << ";\n";
|
2015-12-16 21:02:02 +01:00
|
|
|
|
if (argno >= 0)
|
|
|
|
|
{
|
|
|
|
|
s << "\tld.param" << ptx_type << " ";
|
|
|
|
|
if (for_reg)
|
|
|
|
|
s << reg_names[for_reg];
|
|
|
|
|
else
|
|
|
|
|
s << "%ar" << argno;
|
|
|
|
|
s << ", [%in_ar" << argno << "];\n";
|
|
|
|
|
}
|
2015-12-04 15:02:27 +01:00
|
|
|
|
}
|
|
|
|
|
return argno + 1;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
/* Process function parameter TYPE to emit one or more PTX
|
2015-12-15 21:46:37 +01:00
|
|
|
|
arguments. S, FOR_REG and ARGNO as for write_arg_mode. PROTOTYPED
|
2015-12-14 20:26:49 +01:00
|
|
|
|
is true, if this is a prototyped function, rather than an old-style
|
|
|
|
|
C declaration. Returns the next argument number to use.
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
|
2016-02-12 00:53:54 +01:00
|
|
|
|
The promotion behavior here must match the regular GCC function
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
parameter marshalling machinery. */
|
|
|
|
|
|
|
|
|
|
static int
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_arg_type (std::stringstream &s, int for_reg, int argno,
|
|
|
|
|
tree type, bool prototyped)
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
{
|
|
|
|
|
machine_mode mode = TYPE_MODE (type);
|
|
|
|
|
|
|
|
|
|
if (mode == VOIDmode)
|
|
|
|
|
return argno;
|
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
if (pass_in_memory (mode, type, false))
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
mode = Pmode;
|
2015-12-14 20:26:49 +01:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
bool split = TREE_CODE (type) == COMPLEX_TYPE;
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
if (split)
|
|
|
|
|
{
|
|
|
|
|
/* Complex types are sent as two separate args. */
|
|
|
|
|
type = TREE_TYPE (type);
|
2015-12-16 21:02:02 +01:00
|
|
|
|
mode = TYPE_MODE (type);
|
2015-12-14 20:26:49 +01:00
|
|
|
|
prototyped = true;
|
|
|
|
|
}
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
mode = promote_arg (mode, prototyped);
|
|
|
|
|
if (split)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_mode (s, for_reg, argno, mode);
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
return write_arg_mode (s, for_reg, argno, mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit a PTX return as a prototype or function prologue declaration
|
|
|
|
|
for MODE. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
write_return_mode (std::stringstream &s, bool for_proto, machine_mode mode)
|
|
|
|
|
{
|
|
|
|
|
const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
|
|
|
|
|
const char *pfx = "\t.reg";
|
|
|
|
|
const char *sfx = ";\n";
|
|
|
|
|
|
|
|
|
|
if (for_proto)
|
|
|
|
|
pfx = "(.param", sfx = "_out) ";
|
|
|
|
|
|
|
|
|
|
s << pfx << ptx_type << " " << reg_names[NVPTX_RETURN_REGNUM] << sfx;
|
nvptx.c (nvptx_function_arg, [...]): Move earlier.
* config/nvptx/nvptx.c (nvptx_function_arg,
nvptx_function_incoming_arg, nvptx_function_arg_advance,
nvptx_strict_argument_naming, nvptx_function_arg_boundary,
nvptx_libcall_value, nvptx_function_value,
nvptx_function_value_regno_p, nvptx_pass_by_reference,
nvptx_return_in_memory, nvptx_promote_function_mode,
nvptx_static_chain): Move earlier.
(write_one_arg): Break out as helper fn for ...
(write_arg): ... this new function. Adjust all callers.
From-SVN: r231543
2015-12-10 21:13:14 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
/* Process a function return TYPE to emit a PTX return as a prototype
|
2015-12-15 21:46:37 +01:00
|
|
|
|
or function prologue declaration. Returns true if return is via an
|
2016-02-12 00:53:54 +01:00
|
|
|
|
additional pointer parameter. The promotion behavior here must
|
2015-12-15 21:46:37 +01:00
|
|
|
|
match the regular GCC function return mashalling. */
|
2015-12-14 20:26:49 +01:00
|
|
|
|
|
2015-12-07 18:53:09 +01:00
|
|
|
|
static bool
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_return_type (std::stringstream &s, bool for_proto, tree type)
|
2015-12-07 18:53:09 +01:00
|
|
|
|
{
|
|
|
|
|
machine_mode mode = TYPE_MODE (type);
|
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
if (mode == VOIDmode)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
bool return_in_mem = pass_in_memory (mode, type, true);
|
|
|
|
|
|
|
|
|
|
if (return_in_mem)
|
2015-12-07 18:53:09 +01:00
|
|
|
|
{
|
2015-12-14 20:26:49 +01:00
|
|
|
|
if (for_proto)
|
|
|
|
|
return return_in_mem;
|
|
|
|
|
|
|
|
|
|
/* Named return values can cause us to return a pointer as well
|
|
|
|
|
as expect an argument for the return location. This is
|
|
|
|
|
optimization-level specific, so no caller can make use of
|
|
|
|
|
this data, but more importantly for us, we must ensure it
|
|
|
|
|
doesn't change the PTX prototype. */
|
2015-12-16 14:51:36 +01:00
|
|
|
|
mode = (machine_mode) cfun->machine->return_mode;
|
2015-12-15 14:32:48 +01:00
|
|
|
|
|
2015-12-14 20:26:49 +01:00
|
|
|
|
if (mode == VOIDmode)
|
|
|
|
|
return return_in_mem;
|
|
|
|
|
|
2015-12-16 14:51:36 +01:00
|
|
|
|
/* Clear return_mode to inhibit copy of retval to non-existent
|
2015-12-14 20:26:49 +01:00
|
|
|
|
retval parameter. */
|
2015-12-16 14:51:36 +01:00
|
|
|
|
cfun->machine->return_mode = VOIDmode;
|
2015-12-07 18:53:09 +01:00
|
|
|
|
}
|
|
|
|
|
else
|
2015-12-14 20:26:49 +01:00
|
|
|
|
mode = promote_return (mode);
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_return_mode (s, for_proto, mode);
|
2015-12-07 18:53:09 +01:00
|
|
|
|
|
|
|
|
|
return return_in_mem;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Look for attributes in ATTRS that would indicate we must write a function
|
|
|
|
|
as a .entry kernel rather than a .func. Return true if one is found. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
write_as_kernel (tree attrs)
|
|
|
|
|
{
|
|
|
|
|
return (lookup_attribute ("kernel", attrs) != NULL_TREE
|
2016-11-16 18:17:00 +01:00
|
|
|
|
|| (lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE
|
|
|
|
|
&& lookup_attribute ("oacc function", attrs) != NULL_TREE));
|
|
|
|
|
/* For OpenMP target regions, the corresponding kernel entry is emitted from
|
|
|
|
|
write_omp_entry as a separate function. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-01 21:13:02 +01:00
|
|
|
|
/* Emit a linker marker for a function decl or defn. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
write_fn_marker (std::stringstream &s, bool is_defn, bool globalize,
|
|
|
|
|
const char *name)
|
|
|
|
|
{
|
|
|
|
|
s << "\n// BEGIN";
|
|
|
|
|
if (globalize)
|
|
|
|
|
s << " GLOBAL";
|
|
|
|
|
s << " FUNCTION " << (is_defn ? "DEF: " : "DECL: ");
|
|
|
|
|
s << name << "\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit a linker marker for a variable decl or defn. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
write_var_marker (FILE *file, bool is_defn, bool globalize, const char *name)
|
|
|
|
|
{
|
|
|
|
|
fprintf (file, "\n// BEGIN%s VAR %s: ",
|
|
|
|
|
globalize ? " GLOBAL" : "",
|
|
|
|
|
is_defn ? "DEF" : "DECL");
|
|
|
|
|
assemble_name_raw (file, name);
|
|
|
|
|
fputs ("\n", file);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
/* Write a .func or .kernel declaration or definition along with
|
|
|
|
|
a helper comment for use by ld. S is the stream to write to, DECL
|
|
|
|
|
the decl for the function with name NAME. For definitions, emit
|
|
|
|
|
a declaration too. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
static const char *
|
|
|
|
|
write_fn_proto (std::stringstream &s, bool is_defn,
|
|
|
|
|
const char *name, const_tree decl)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-11-30 15:44:48 +01:00
|
|
|
|
if (is_defn)
|
|
|
|
|
/* Emit a declaration. The PTX assembler gets upset without it. */
|
|
|
|
|
name = write_fn_proto (s, false, name, decl);
|
2016-05-16 15:16:28 +02:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/* Avoid repeating the name replacement. */
|
|
|
|
|
name = nvptx_name_replacement (name);
|
|
|
|
|
if (name[0] == '*')
|
|
|
|
|
name++;
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-01 21:13:02 +01:00
|
|
|
|
write_fn_marker (s, is_defn, TREE_PUBLIC (decl), name);
|
2015-11-30 15:44:48 +01:00
|
|
|
|
|
|
|
|
|
/* PTX declaration. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (DECL_EXTERNAL (decl))
|
|
|
|
|
s << ".extern ";
|
|
|
|
|
else if (TREE_PUBLIC (decl))
|
2015-11-19 23:05:03 +01:00
|
|
|
|
s << (DECL_WEAK (decl) ? ".weak " : ".visible ");
|
2015-11-30 15:44:48 +01:00
|
|
|
|
s << (write_as_kernel (DECL_ATTRIBUTES (decl)) ? ".entry " : ".func ");
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
tree fntype = TREE_TYPE (decl);
|
|
|
|
|
tree result_type = TREE_TYPE (fntype);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2016-05-13 14:57:50 +02:00
|
|
|
|
/* atomic_compare_exchange_$n builtins have an exceptional calling
|
|
|
|
|
convention. */
|
|
|
|
|
int not_atomic_weak_arg = -1;
|
|
|
|
|
if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
|
|
|
|
|
switch (DECL_FUNCTION_CODE (decl))
|
|
|
|
|
{
|
|
|
|
|
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
|
|
|
|
|
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
|
|
|
|
|
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
|
|
|
|
|
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
|
|
|
|
|
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
|
|
|
|
|
/* These atomics skip the 'weak' parm in an actual library
|
|
|
|
|
call. We must skip it in the prototype too. */
|
|
|
|
|
not_atomic_weak_arg = 3;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Declare the result. */
|
2015-12-15 21:46:37 +01:00
|
|
|
|
bool return_in_mem = write_return_type (s, true, result_type);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
s << name;
|
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
int argno = 0;
|
2015-11-30 15:44:48 +01:00
|
|
|
|
|
|
|
|
|
/* Emit argument list. */
|
|
|
|
|
if (return_in_mem)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, -1, argno, ptr_type_node, true);
|
2015-11-25 18:33:30 +01:00
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
/* We get:
|
|
|
|
|
NULL in TYPE_ARG_TYPES, for old-style functions
|
|
|
|
|
NULL in DECL_ARGUMENTS, for builtin functions without another
|
|
|
|
|
declaration.
|
|
|
|
|
So we have to pick the best one we have. */
|
|
|
|
|
tree args = TYPE_ARG_TYPES (fntype);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
bool prototyped = true;
|
|
|
|
|
if (!args)
|
|
|
|
|
{
|
|
|
|
|
args = DECL_ARGUMENTS (decl);
|
|
|
|
|
prototyped = false;
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2016-05-13 14:57:50 +02:00
|
|
|
|
for (; args; args = TREE_CHAIN (args), not_atomic_weak_arg--)
|
2015-11-30 15:44:48 +01:00
|
|
|
|
{
|
2015-12-04 15:02:27 +01:00
|
|
|
|
tree type = prototyped ? TREE_VALUE (args) : TREE_TYPE (args);
|
2016-05-13 14:57:50 +02:00
|
|
|
|
|
|
|
|
|
if (not_atomic_weak_arg)
|
|
|
|
|
argno = write_arg_type (s, -1, argno, type, prototyped);
|
|
|
|
|
else
|
|
|
|
|
gcc_assert (type == boolean_type_node);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
if (stdarg_p (fntype))
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, -1, argno, ptr_type_node, true);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-30 15:44:48 +01:00
|
|
|
|
if (DECL_STATIC_CHAIN (decl))
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, -1, argno, ptr_type_node, true);
|
2015-11-30 15:44:48 +01:00
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
if (!argno && strcmp (name, "main") == 0)
|
2015-11-30 15:44:48 +01:00
|
|
|
|
{
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, -1, argno, integer_type_node, true);
|
|
|
|
|
argno = write_arg_type (s, -1, argno, ptr_type_node, true);
|
2015-11-30 15:44:48 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
if (argno)
|
2015-11-30 15:44:48 +01:00
|
|
|
|
s << ")";
|
|
|
|
|
|
|
|
|
|
s << (is_defn ? "\n" : ";\n");
|
|
|
|
|
|
|
|
|
|
return name;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-27 14:57:09 +01:00
|
|
|
|
/* Construct a function declaration from a call insn. This can be
|
|
|
|
|
necessary for two reasons - either we have an indirect call which
|
|
|
|
|
requires a .callprototype declaration, or we have a libcall
|
|
|
|
|
generated by emit_library_call for which no decl exists. */
|
|
|
|
|
|
|
|
|
|
static void
|
2015-11-30 15:44:48 +01:00
|
|
|
|
write_fn_proto_from_insn (std::stringstream &s, const char *name,
|
|
|
|
|
rtx result, rtx pat)
|
2015-11-27 14:57:09 +01:00
|
|
|
|
{
|
|
|
|
|
if (!name)
|
|
|
|
|
{
|
|
|
|
|
s << "\t.callprototype ";
|
|
|
|
|
name = "_";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-05-16 15:16:28 +02:00
|
|
|
|
name = nvptx_name_replacement (name);
|
2015-12-01 21:13:02 +01:00
|
|
|
|
write_fn_marker (s, false, true, name);
|
2015-11-27 14:57:09 +01:00
|
|
|
|
s << "\t.extern .func ";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (result != NULL_RTX)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_return_mode (s, true, GET_MODE (result));
|
2015-11-27 14:57:09 +01:00
|
|
|
|
|
|
|
|
|
s << name;
|
|
|
|
|
|
|
|
|
|
int arg_end = XVECLEN (pat, 0);
|
|
|
|
|
for (int i = 1; i < arg_end; i++)
|
|
|
|
|
{
|
2015-12-15 21:46:37 +01:00
|
|
|
|
/* We don't have to deal with mode splitting & promotion here,
|
|
|
|
|
as that was already done when generating the call
|
|
|
|
|
sequence. */
|
2015-11-27 14:57:09 +01:00
|
|
|
|
machine_mode mode = GET_MODE (XEXP (XVECEXP (pat, 0, i), 0));
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
write_arg_mode (s, -1, i - 1, mode);
|
2015-11-27 14:57:09 +01:00
|
|
|
|
}
|
|
|
|
|
if (arg_end != 1)
|
|
|
|
|
s << ")";
|
|
|
|
|
s << ";\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* DECL is an external FUNCTION_DECL, make sure its in the fndecl hash
|
|
|
|
|
table and and write a ptx prototype. These are emitted at end of
|
|
|
|
|
compilation. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-27 14:57:09 +01:00
|
|
|
|
static void
|
|
|
|
|
nvptx_record_fndecl (tree decl)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2014-12-09 03:30:06 +01:00
|
|
|
|
tree *slot = declared_fndecls_htab->find_slot (decl, INSERT);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (*slot == NULL)
|
|
|
|
|
{
|
|
|
|
|
*slot = decl;
|
|
|
|
|
const char *name = get_fnname_from_decl (decl);
|
2015-11-30 15:44:48 +01:00
|
|
|
|
write_fn_proto (func_decls, false, name, decl);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-27 14:57:09 +01:00
|
|
|
|
/* Record a libcall or unprototyped external function. CALLEE is the
|
|
|
|
|
SYMBOL_REF. Insert into the libfunc hash table and emit a ptx
|
|
|
|
|
declaration for it. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_record_libfunc (rtx callee, rtx retval, rtx pat)
|
|
|
|
|
{
|
|
|
|
|
rtx *slot = declared_libfuncs_htab->find_slot (callee, INSERT);
|
|
|
|
|
if (*slot == NULL)
|
|
|
|
|
{
|
|
|
|
|
*slot = callee;
|
|
|
|
|
|
|
|
|
|
const char *name = XSTR (callee, 0);
|
2015-11-30 15:44:48 +01:00
|
|
|
|
write_fn_proto_from_insn (func_decls, name, retval, pat);
|
2015-11-27 14:57:09 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* DECL is an external FUNCTION_DECL, that we're referencing. If it
|
|
|
|
|
is prototyped, record it now. Otherwise record it as needed at end
|
|
|
|
|
of compilation, when we might have more information about it. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_record_needed_fndecl (tree decl)
|
|
|
|
|
{
|
2015-11-27 14:57:09 +01:00
|
|
|
|
if (TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE)
|
|
|
|
|
{
|
|
|
|
|
tree *slot = needed_fndecls_htab->find_slot (decl, INSERT);
|
|
|
|
|
if (*slot == NULL)
|
|
|
|
|
*slot = decl;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
nvptx_record_fndecl (decl);
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-27 14:57:09 +01:00
|
|
|
|
/* SYM is a SYMBOL_REF. If it refers to an external function, record
|
|
|
|
|
it as needed. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_maybe_record_fnsym (rtx sym)
|
|
|
|
|
{
|
|
|
|
|
tree decl = SYMBOL_REF_DECL (sym);
|
|
|
|
|
|
|
|
|
|
if (decl && TREE_CODE (decl) == FUNCTION_DECL && DECL_EXTERNAL (decl))
|
|
|
|
|
nvptx_record_needed_fndecl (decl);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
/* Emit a local array to hold some part of a conventional stack frame
|
2015-12-16 21:02:02 +01:00
|
|
|
|
and initialize REGNO to point to it. If the size is zero, it'll
|
|
|
|
|
never be valid to dereference, so we can simply initialize to
|
|
|
|
|
zero. */
|
2015-12-15 21:46:37 +01:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
init_frame (FILE *file, int regno, unsigned align, unsigned size)
|
|
|
|
|
{
|
2015-12-16 21:02:02 +01:00
|
|
|
|
if (size)
|
|
|
|
|
fprintf (file, "\t.local .align %d .b8 %s_ar[%u];\n",
|
|
|
|
|
align, reg_names[regno], size);
|
|
|
|
|
fprintf (file, "\t.reg.u%d %s;\n",
|
|
|
|
|
POINTER_SIZE, reg_names[regno]);
|
|
|
|
|
fprintf (file, (size ? "\tcvta.local.u%d %s, %s_ar;\n"
|
|
|
|
|
: "\tmov.u%d %s, 0;\n"),
|
2015-12-15 21:46:37 +01:00
|
|
|
|
POINTER_SIZE, reg_names[regno], reg_names[regno]);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Emit soft stack frame setup sequence. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
init_softstack_frame (FILE *file, unsigned alignment, HOST_WIDE_INT size)
|
|
|
|
|
{
|
|
|
|
|
/* Maintain 64-bit stack alignment. */
|
|
|
|
|
unsigned keep_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
|
|
|
|
|
size = ROUND_UP (size, keep_align);
|
|
|
|
|
int bits = POINTER_SIZE;
|
|
|
|
|
const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
|
|
|
|
|
const char *reg_frame = reg_names[FRAME_POINTER_REGNUM];
|
|
|
|
|
const char *reg_sspslot = reg_names[SOFTSTACK_SLOT_REGNUM];
|
|
|
|
|
const char *reg_sspprev = reg_names[SOFTSTACK_PREV_REGNUM];
|
|
|
|
|
fprintf (file, "\t.reg.u%d %s;\n", bits, reg_stack);
|
|
|
|
|
fprintf (file, "\t.reg.u%d %s;\n", bits, reg_frame);
|
|
|
|
|
fprintf (file, "\t.reg.u%d %s;\n", bits, reg_sspslot);
|
|
|
|
|
fprintf (file, "\t.reg.u%d %s;\n", bits, reg_sspprev);
|
|
|
|
|
fprintf (file, "\t{\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u32 %%fstmp0;\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u%d %%fstmp1;\n", bits);
|
|
|
|
|
fprintf (file, "\t\t.reg.u%d %%fstmp2;\n", bits);
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%fstmp0, %%tid.y;\n");
|
|
|
|
|
fprintf (file, "\t\tmul%s.u32 %%fstmp1, %%fstmp0, %d;\n",
|
|
|
|
|
bits == 64 ? ".wide" : ".lo", bits / 8);
|
|
|
|
|
fprintf (file, "\t\tmov.u%d %%fstmp2, __nvptx_stacks;\n", bits);
|
|
|
|
|
|
|
|
|
|
/* Initialize %sspslot = &__nvptx_stacks[tid.y]. */
|
|
|
|
|
fprintf (file, "\t\tadd.u%d %s, %%fstmp2, %%fstmp1;\n", bits, reg_sspslot);
|
|
|
|
|
|
|
|
|
|
/* Initialize %sspprev = __nvptx_stacks[tid.y]. */
|
|
|
|
|
fprintf (file, "\t\tld.shared.u%d %s, [%s];\n",
|
|
|
|
|
bits, reg_sspprev, reg_sspslot);
|
|
|
|
|
|
|
|
|
|
/* Initialize %frame = %sspprev - size. */
|
|
|
|
|
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
|
|
|
|
|
bits, reg_frame, reg_sspprev, size);
|
|
|
|
|
|
|
|
|
|
/* Apply alignment, if larger than 64. */
|
|
|
|
|
if (alignment > keep_align)
|
|
|
|
|
fprintf (file, "\t\tand.b%d %s, %s, %d;\n",
|
|
|
|
|
bits, reg_frame, reg_frame, -alignment);
|
|
|
|
|
|
|
|
|
|
size = crtl->outgoing_args_size;
|
|
|
|
|
gcc_assert (size % keep_align == 0);
|
|
|
|
|
|
|
|
|
|
/* Initialize %stack. */
|
|
|
|
|
fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
|
|
|
|
|
bits, reg_stack, reg_frame, size);
|
|
|
|
|
|
|
|
|
|
if (!crtl->is_leaf)
|
|
|
|
|
fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
|
|
|
|
|
bits, reg_sspslot, reg_stack);
|
|
|
|
|
fprintf (file, "\t}\n");
|
|
|
|
|
cfun->machine->has_softstack = true;
|
|
|
|
|
need_softstack_decl = true;
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Emit code to initialize the REGNO predicate register to indicate
|
|
|
|
|
whether we are not lane zero on the NAME axis. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
|
|
|
|
|
{
|
|
|
|
|
fprintf (file, "\t{\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u32\t%%%s;\n", name);
|
|
|
|
|
fprintf (file, "\t\tmov.u32\t%%%s, %%tid.%s;\n", name, name);
|
|
|
|
|
fprintf (file, "\t\tsetp.ne.u32\t%%r%d, %%%s, 0;\n", regno, name);
|
|
|
|
|
fprintf (file, "\t}\n");
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Emit code to initialize predicate and master lane index registers for
|
|
|
|
|
-muniform-simt code generation variant. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_init_unisimt_predicate (FILE *file)
|
|
|
|
|
{
|
2017-03-28 19:24:57 +02:00
|
|
|
|
cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
|
|
|
|
|
int loc = REGNO (cfun->machine->unisimt_location);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
int bits = POINTER_SIZE;
|
2017-03-28 19:24:57 +02:00
|
|
|
|
fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
fprintf (file, "\t{\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
|
|
|
|
|
fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
|
|
|
|
|
bits == 64 ? ".wide" : ".lo");
|
2017-03-28 19:24:57 +02:00
|
|
|
|
fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
|
|
|
|
|
fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
|
|
|
|
|
if (cfun->machine->unisimt_predicate)
|
|
|
|
|
{
|
|
|
|
|
int master = REGNO (cfun->machine->unisimt_master);
|
|
|
|
|
int pred = REGNO (cfun->machine->unisimt_predicate);
|
|
|
|
|
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
|
|
|
|
|
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
|
|
|
|
|
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
|
|
|
|
|
/* Compute predicate as 'tid.x == master'. */
|
|
|
|
|
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
|
|
|
|
|
}
|
2016-11-16 18:17:00 +01:00
|
|
|
|
fprintf (file, "\t}\n");
|
|
|
|
|
need_unisimt_decl = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit kernel NAME for function ORIG outlined for an OpenMP 'target' region:
|
|
|
|
|
|
|
|
|
|
extern void gomp_nvptx_main (void (*fn)(void*), void *fnarg);
|
|
|
|
|
void __attribute__((kernel)) NAME (void *arg, char *stack, size_t stacksize)
|
|
|
|
|
{
|
|
|
|
|
__nvptx_stacks[tid.y] = stack + stacksize * (ctaid.x * ntid.y + tid.y + 1);
|
|
|
|
|
__nvptx_uni[tid.y] = 0;
|
|
|
|
|
gomp_nvptx_main (ORIG, arg);
|
|
|
|
|
}
|
|
|
|
|
ORIG itself should not be emitted as a PTX .entry function. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
write_omp_entry (FILE *file, const char *name, const char *orig)
|
|
|
|
|
{
|
|
|
|
|
static bool gomp_nvptx_main_declared;
|
|
|
|
|
if (!gomp_nvptx_main_declared)
|
|
|
|
|
{
|
|
|
|
|
gomp_nvptx_main_declared = true;
|
|
|
|
|
write_fn_marker (func_decls, false, true, "gomp_nvptx_main");
|
|
|
|
|
func_decls << ".extern .func gomp_nvptx_main (.param.u" << POINTER_SIZE
|
|
|
|
|
<< " %in_ar1, .param.u" << POINTER_SIZE << " %in_ar2);\n";
|
|
|
|
|
}
|
2017-02-21 11:42:07 +01:00
|
|
|
|
/* PR79332. Single out this string; it confuses gcc.pot generation. */
|
|
|
|
|
#define NTID_Y "%ntid.y"
|
2016-11-16 18:17:00 +01:00
|
|
|
|
#define ENTRY_TEMPLATE(PS, PS_BYTES, MAD_PS_32) "\
|
|
|
|
|
(.param.u" PS " %arg, .param.u" PS " %stack, .param.u" PS " %sz)\n\
|
|
|
|
|
{\n\
|
|
|
|
|
.reg.u32 %r<3>;\n\
|
|
|
|
|
.reg.u" PS " %R<4>;\n\
|
|
|
|
|
mov.u32 %r0, %tid.y;\n\
|
2017-02-21 11:42:07 +01:00
|
|
|
|
mov.u32 %r1, " NTID_Y ";\n\
|
2016-11-16 18:17:00 +01:00
|
|
|
|
mov.u32 %r2, %ctaid.x;\n\
|
|
|
|
|
cvt.u" PS ".u32 %R1, %r0;\n\
|
|
|
|
|
" MAD_PS_32 " %R1, %r1, %r2, %R1;\n\
|
|
|
|
|
mov.u" PS " %R0, __nvptx_stacks;\n\
|
|
|
|
|
" MAD_PS_32 " %R0, %r0, " PS_BYTES ", %R0;\n\
|
|
|
|
|
ld.param.u" PS " %R2, [%stack];\n\
|
|
|
|
|
ld.param.u" PS " %R3, [%sz];\n\
|
|
|
|
|
add.u" PS " %R2, %R2, %R3;\n\
|
|
|
|
|
mad.lo.u" PS " %R2, %R1, %R3, %R2;\n\
|
|
|
|
|
st.shared.u" PS " [%R0], %R2;\n\
|
|
|
|
|
mov.u" PS " %R0, __nvptx_uni;\n\
|
|
|
|
|
" MAD_PS_32 " %R0, %r0, 4, %R0;\n\
|
|
|
|
|
mov.u32 %r0, 0;\n\
|
|
|
|
|
st.shared.u32 [%R0], %r0;\n\
|
|
|
|
|
mov.u" PS " %R0, \0;\n\
|
|
|
|
|
ld.param.u" PS " %R1, [%arg];\n\
|
|
|
|
|
{\n\
|
|
|
|
|
.param.u" PS " %P<2>;\n\
|
|
|
|
|
st.param.u" PS " [%P0], %R0;\n\
|
|
|
|
|
st.param.u" PS " [%P1], %R1;\n\
|
|
|
|
|
call.uni gomp_nvptx_main, (%P0, %P1);\n\
|
|
|
|
|
}\n\
|
|
|
|
|
ret.uni;\n\
|
|
|
|
|
}\n"
|
|
|
|
|
static const char entry64[] = ENTRY_TEMPLATE ("64", "8", "mad.wide.u32");
|
|
|
|
|
static const char entry32[] = ENTRY_TEMPLATE ("32", "4", "mad.lo.u32 ");
|
|
|
|
|
#undef ENTRY_TEMPLATE
|
2017-02-21 11:42:07 +01:00
|
|
|
|
#undef NTID_Y
|
2016-11-16 18:17:00 +01:00
|
|
|
|
const char *entry_1 = TARGET_ABI64 ? entry64 : entry32;
|
|
|
|
|
/* Position ENTRY_2 after the embedded nul using strlen of the prefix. */
|
|
|
|
|
const char *entry_2 = entry_1 + strlen (entry64) + 1;
|
|
|
|
|
fprintf (file, ".visible .entry %s%s%s%s", name, entry_1, orig, entry_2);
|
|
|
|
|
need_softstack_decl = need_unisimt_decl = true;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
|
|
|
|
|
function, including local var decls and copies from the arguments to
|
|
|
|
|
local regs. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
|
|
|
|
|
{
|
|
|
|
|
tree fntype = TREE_TYPE (decl);
|
|
|
|
|
tree result_type = TREE_TYPE (fntype);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
int argno = 0;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
if (lookup_attribute ("omp target entrypoint", DECL_ATTRIBUTES (decl))
|
|
|
|
|
&& !lookup_attribute ("oacc function", DECL_ATTRIBUTES (decl)))
|
|
|
|
|
{
|
|
|
|
|
char *buf = (char *) alloca (strlen (name) + sizeof ("$impl"));
|
|
|
|
|
sprintf (buf, "%s$impl", name);
|
|
|
|
|
write_omp_entry (file, name, buf);
|
|
|
|
|
name = buf;
|
|
|
|
|
}
|
2015-12-04 15:02:27 +01:00
|
|
|
|
/* We construct the initial part of the function into a string
|
|
|
|
|
stream, in order to share the prototype writing code. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
std::stringstream s;
|
2015-11-30 15:44:48 +01:00
|
|
|
|
write_fn_proto (s, true, name, decl);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
s << "{\n";
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
bool return_in_mem = write_return_type (s, false, result_type);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (return_in_mem)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, 0, argno, ptr_type_node, true);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
|
2015-11-25 15:03:44 +01:00
|
|
|
|
/* Declare and initialize incoming arguments. */
|
2015-12-04 15:02:27 +01:00
|
|
|
|
tree args = TYPE_ARG_TYPES (fntype);
|
|
|
|
|
bool prototyped = true;
|
|
|
|
|
if (!args)
|
2015-11-25 15:03:44 +01:00
|
|
|
|
{
|
2015-12-04 15:02:27 +01:00
|
|
|
|
args = DECL_ARGUMENTS (decl);
|
|
|
|
|
prototyped = false;
|
2015-11-25 15:03:44 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (; args != NULL_TREE; args = TREE_CHAIN (args))
|
|
|
|
|
{
|
|
|
|
|
tree type = prototyped ? TREE_VALUE (args) : TREE_TYPE (args);
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
argno = write_arg_type (s, 0, argno, type, prototyped);
|
2015-12-04 15:02:27 +01:00
|
|
|
|
}
|
2015-11-25 15:03:44 +01:00
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
if (stdarg_p (fntype))
|
2015-12-16 21:02:02 +01:00
|
|
|
|
argno = write_arg_type (s, ARG_POINTER_REGNUM, argno, ptr_type_node,
|
2015-12-15 21:46:37 +01:00
|
|
|
|
true);
|
2015-11-25 15:03:44 +01:00
|
|
|
|
|
2015-12-16 21:02:02 +01:00
|
|
|
|
if (DECL_STATIC_CHAIN (decl) || cfun->machine->has_chain)
|
|
|
|
|
write_arg_type (s, STATIC_CHAIN_REGNUM,
|
|
|
|
|
DECL_STATIC_CHAIN (decl) ? argno : -1, ptr_type_node,
|
|
|
|
|
true);
|
|
|
|
|
|
2015-12-04 15:02:27 +01:00
|
|
|
|
fprintf (file, "%s", s.str().c_str());
|
2015-10-08 19:31:36 +02:00
|
|
|
|
|
2017-03-28 19:24:57 +02:00
|
|
|
|
/* Usually 'crtl->is_leaf' is computed during register allocator
|
|
|
|
|
initialization (which is not done on NVPTX) or for pressure-sensitive
|
|
|
|
|
optimizations. Initialize it here, except if already set. */
|
|
|
|
|
if (!crtl->is_leaf)
|
|
|
|
|
crtl->is_leaf = leaf_function_p ();
|
|
|
|
|
|
2015-12-16 14:51:36 +01:00
|
|
|
|
HOST_WIDE_INT sz = get_frame_size ();
|
2016-11-16 18:17:00 +01:00
|
|
|
|
bool need_frameptr = sz || cfun->machine->has_chain;
|
|
|
|
|
int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
|
|
|
|
if (!TARGET_SOFT_STACK)
|
|
|
|
|
{
|
|
|
|
|
/* Declare a local var for outgoing varargs. */
|
|
|
|
|
if (cfun->machine->has_varadic)
|
|
|
|
|
init_frame (file, STACK_POINTER_REGNUM,
|
|
|
|
|
UNITS_PER_WORD, crtl->outgoing_args_size);
|
|
|
|
|
|
|
|
|
|
/* Declare a local variable for the frame. Force its size to be
|
|
|
|
|
DImode-compatible. */
|
|
|
|
|
if (need_frameptr)
|
|
|
|
|
init_frame (file, FRAME_POINTER_REGNUM, alignment,
|
|
|
|
|
ROUND_UP (sz, GET_MODE_SIZE (DImode)));
|
|
|
|
|
}
|
2017-03-28 19:24:57 +02:00
|
|
|
|
else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca
|
|
|
|
|
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
2016-11-16 18:17:00 +01:00
|
|
|
|
init_softstack_frame (file, alignment, sz);
|
2015-12-16 14:51:36 +01:00
|
|
|
|
|
2017-03-28 19:24:57 +02:00
|
|
|
|
if (cfun->machine->has_simtreg)
|
|
|
|
|
{
|
|
|
|
|
unsigned HOST_WIDE_INT &simtsz = cfun->machine->simt_stack_size;
|
|
|
|
|
unsigned HOST_WIDE_INT &align = cfun->machine->simt_stack_align;
|
|
|
|
|
align = MAX (align, GET_MODE_SIZE (DImode));
|
|
|
|
|
if (!crtl->is_leaf || cfun->calls_alloca)
|
|
|
|
|
simtsz = HOST_WIDE_INT_M1U;
|
|
|
|
|
if (simtsz == HOST_WIDE_INT_M1U)
|
|
|
|
|
simtsz = nvptx_softstack_size;
|
|
|
|
|
if (cfun->machine->has_softstack)
|
|
|
|
|
simtsz += POINTER_SIZE / 8;
|
|
|
|
|
simtsz = ROUND_UP (simtsz, GET_MODE_SIZE (DImode));
|
|
|
|
|
if (align > GET_MODE_SIZE (DImode))
|
|
|
|
|
simtsz += align - GET_MODE_SIZE (DImode);
|
|
|
|
|
if (simtsz)
|
|
|
|
|
fprintf (file, "\t.local.align 8 .b8 %%simtstack_ar["
|
|
|
|
|
HOST_WIDE_INT_PRINT_DEC "];\n", simtsz);
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Declare the pseudos we have as ptx registers. */
|
|
|
|
|
int maxregs = max_reg_num ();
|
|
|
|
|
for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
|
|
|
|
|
{
|
|
|
|
|
if (regno_reg_rtx[i] != const0_rtx)
|
|
|
|
|
{
|
|
|
|
|
machine_mode mode = PSEUDO_REGNO_MODE (i);
|
2015-11-24 21:15:44 +01:00
|
|
|
|
machine_mode split = maybe_split_mode (mode);
|
2015-12-02 15:57:54 +01:00
|
|
|
|
|
2015-11-24 21:15:44 +01:00
|
|
|
|
if (split != VOIDmode)
|
2015-12-02 15:57:54 +01:00
|
|
|
|
mode = split;
|
|
|
|
|
fprintf (file, "\t.reg%s ", nvptx_ptx_type_from_mode (mode, true));
|
|
|
|
|
output_reg (file, i, split, -2);
|
|
|
|
|
fprintf (file, ";\n");
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Emit axis predicates. */
|
|
|
|
|
if (cfun->machine->axis_predicate[0])
|
|
|
|
|
nvptx_init_axis_predicate (file,
|
|
|
|
|
REGNO (cfun->machine->axis_predicate[0]), "y");
|
|
|
|
|
if (cfun->machine->axis_predicate[1])
|
|
|
|
|
nvptx_init_axis_predicate (file,
|
|
|
|
|
REGNO (cfun->machine->axis_predicate[1]), "x");
|
2017-03-28 19:24:57 +02:00
|
|
|
|
if (cfun->machine->unisimt_predicate
|
|
|
|
|
|| (cfun->machine->has_simtreg && !crtl->is_leaf))
|
2016-11-16 18:17:00 +01:00
|
|
|
|
nvptx_init_unisimt_predicate (file);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2017-03-28 19:24:57 +02:00
|
|
|
|
/* Output code for switching uniform-simt state. ENTERING indicates whether
|
|
|
|
|
we are entering or leaving non-uniform execution region. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_output_unisimt_switch (FILE *file, bool entering)
|
|
|
|
|
{
|
|
|
|
|
if (crtl->is_leaf && !cfun->machine->unisimt_predicate)
|
|
|
|
|
return;
|
|
|
|
|
fprintf (file, "\t{\n");
|
|
|
|
|
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
|
|
|
|
|
if (!crtl->is_leaf)
|
|
|
|
|
{
|
|
|
|
|
int loc = REGNO (cfun->machine->unisimt_location);
|
|
|
|
|
fprintf (file, "\t\tst.shared.u32 [%%r%d], %%ustmp2;\n", loc);
|
|
|
|
|
}
|
|
|
|
|
if (cfun->machine->unisimt_predicate)
|
|
|
|
|
{
|
|
|
|
|
int master = REGNO (cfun->machine->unisimt_master);
|
|
|
|
|
int pred = REGNO (cfun->machine->unisimt_predicate);
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%ustmp2, %%laneid;\n");
|
|
|
|
|
fprintf (file, "\t\tmov.u32 %%r%d, %s;\n",
|
|
|
|
|
master, entering ? "%ustmp2" : "0");
|
|
|
|
|
fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp2;\n", pred, master);
|
|
|
|
|
}
|
|
|
|
|
fprintf (file, "\t}\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Output code for allocating per-lane storage and switching soft-stack pointer.
|
|
|
|
|
ENTERING indicates whether we are entering or leaving non-uniform execution.
|
|
|
|
|
PTR is the register pointing to allocated storage, it is assigned to on
|
|
|
|
|
entering and used to restore state on leaving. SIZE and ALIGN are used only
|
|
|
|
|
on entering. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_output_softstack_switch (FILE *file, bool entering,
|
|
|
|
|
rtx ptr, rtx size, rtx align)
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (REG_P (ptr) && !HARD_REGISTER_P (ptr));
|
|
|
|
|
if (crtl->is_leaf && !cfun->machine->simt_stack_size)
|
|
|
|
|
return;
|
|
|
|
|
int bits = POINTER_SIZE, regno = REGNO (ptr);
|
|
|
|
|
fprintf (file, "\t{\n");
|
|
|
|
|
if (entering)
|
|
|
|
|
{
|
|
|
|
|
fprintf (file, "\t\tcvta.local.u%d %%r%d, %%simtstack_ar + "
|
|
|
|
|
HOST_WIDE_INT_PRINT_DEC ";\n", bits, regno,
|
|
|
|
|
cfun->machine->simt_stack_size);
|
|
|
|
|
fprintf (file, "\t\tsub.u%d %%r%d, %%r%d, ", bits, regno, regno);
|
|
|
|
|
if (CONST_INT_P (size))
|
|
|
|
|
fprintf (file, HOST_WIDE_INT_PRINT_DEC,
|
|
|
|
|
ROUND_UP (UINTVAL (size), GET_MODE_SIZE (DImode)));
|
|
|
|
|
else
|
|
|
|
|
output_reg (file, REGNO (size), VOIDmode);
|
|
|
|
|
fputs (";\n", file);
|
|
|
|
|
if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
|
|
|
|
|
fprintf (file, "\t\tand.u%d %%r%d, %%r%d, -%d;\n",
|
|
|
|
|
bits, regno, regno, UINTVAL (align));
|
|
|
|
|
}
|
|
|
|
|
if (cfun->machine->has_softstack)
|
|
|
|
|
{
|
|
|
|
|
const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
|
|
|
|
|
if (entering)
|
|
|
|
|
{
|
|
|
|
|
fprintf (file, "\t\tst.u%d [%%r%d + -%d], %s;\n",
|
|
|
|
|
bits, regno, bits / 8, reg_stack);
|
|
|
|
|
fprintf (file, "\t\tsub.u%d %s, %%r%d, %d;\n",
|
|
|
|
|
bits, reg_stack, regno, bits / 8);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
fprintf (file, "\t\tld.u%d %s, [%%r%d + -%d];\n",
|
|
|
|
|
bits, reg_stack, regno, bits / 8);
|
|
|
|
|
}
|
|
|
|
|
nvptx_output_set_softstack (REGNO (stack_pointer_rtx));
|
|
|
|
|
}
|
|
|
|
|
fprintf (file, "\t}\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Output code to enter non-uniform execution region. DEST is a register
|
|
|
|
|
to hold a per-lane allocation given by SIZE and ALIGN. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_simt_enter (rtx dest, rtx size, rtx align)
|
|
|
|
|
{
|
|
|
|
|
nvptx_output_unisimt_switch (asm_out_file, true);
|
|
|
|
|
nvptx_output_softstack_switch (asm_out_file, true, dest, size, align);
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Output code to leave non-uniform execution region. SRC is the register
|
|
|
|
|
holding per-lane storage previously allocated by omp_simt_enter insn. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_simt_exit (rtx src)
|
|
|
|
|
{
|
|
|
|
|
nvptx_output_unisimt_switch (asm_out_file, false);
|
|
|
|
|
nvptx_output_softstack_switch (asm_out_file, false, src, NULL_RTX, NULL_RTX);
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Output instruction that sets soft stack pointer in shared memory to the
|
|
|
|
|
value in register given by SRC_REGNO. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_set_softstack (unsigned src_regno)
|
|
|
|
|
{
|
|
|
|
|
if (cfun->machine->has_softstack && !crtl->is_leaf)
|
|
|
|
|
{
|
|
|
|
|
fprintf (asm_out_file, "\tst.shared.u%d\t[%s], ",
|
|
|
|
|
POINTER_SIZE, reg_names[SOFTSTACK_SLOT_REGNUM]);
|
|
|
|
|
output_reg (asm_out_file, src_regno, VOIDmode);
|
|
|
|
|
fprintf (asm_out_file, ";\n");
|
|
|
|
|
}
|
|
|
|
|
return "";
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Output a return instruction. Also copy the return value to its outgoing
|
|
|
|
|
location. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_return (void)
|
|
|
|
|
{
|
2015-12-16 14:51:36 +01:00
|
|
|
|
machine_mode mode = (machine_mode)cfun->machine->return_mode;
|
2015-10-08 19:31:36 +02:00
|
|
|
|
|
|
|
|
|
if (mode != VOIDmode)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
fprintf (asm_out_file, "\tst.param%s\t[%s_out], %s;\n",
|
|
|
|
|
nvptx_ptx_type_from_mode (mode, false),
|
|
|
|
|
reg_names[NVPTX_RETURN_REGNUM],
|
|
|
|
|
reg_names[NVPTX_RETURN_REGNUM]);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
return "ret;";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Terminate a function by writing a closing brace to FILE. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_function_end (FILE *file)
|
|
|
|
|
{
|
2015-11-21 14:57:09 +01:00
|
|
|
|
fprintf (file, "}\n");
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Decide whether we can make a sibling call to a function. For ptx, we
|
|
|
|
|
can't. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_function_ok_for_sibcall (tree, tree)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-21 21:34:34 +02:00
|
|
|
|
/* Return Dynamic ReAlignment Pointer RTX. For PTX there isn't any. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_get_drap_rtx (void)
|
|
|
|
|
{
|
2016-11-16 18:17:00 +01:00
|
|
|
|
if (TARGET_SOFT_STACK && stack_realign_drap)
|
|
|
|
|
return arg_pointer_rtx;
|
2015-08-21 21:34:34 +02:00
|
|
|
|
return NULL_RTX;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Implement the TARGET_CALL_ARGS hook. Record information about one
|
|
|
|
|
argument to the next call. */
|
|
|
|
|
|
|
|
|
|
static void
|
2015-12-16 14:51:36 +01:00
|
|
|
|
nvptx_call_args (rtx arg, tree fntype)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-16 14:51:36 +01:00
|
|
|
|
if (!cfun->machine->doing_call)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-16 14:51:36 +01:00
|
|
|
|
cfun->machine->doing_call = true;
|
|
|
|
|
cfun->machine->is_varadic = false;
|
|
|
|
|
cfun->machine->num_args = 0;
|
|
|
|
|
|
|
|
|
|
if (fntype && stdarg_p (fntype))
|
|
|
|
|
{
|
|
|
|
|
cfun->machine->is_varadic = true;
|
|
|
|
|
cfun->machine->has_varadic = true;
|
|
|
|
|
cfun->machine->num_args++;
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-16 14:51:36 +01:00
|
|
|
|
if (REG_P (arg) && arg != pc_rtx)
|
|
|
|
|
{
|
|
|
|
|
cfun->machine->num_args++;
|
|
|
|
|
cfun->machine->call_args = alloc_EXPR_LIST (VOIDmode, arg,
|
|
|
|
|
cfun->machine->call_args);
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement the corresponding END_CALL_ARGS hook. Clear and free the
|
|
|
|
|
information we recorded. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_end_call_args (void)
|
|
|
|
|
{
|
2015-12-16 14:51:36 +01:00
|
|
|
|
cfun->machine->doing_call = false;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
free_EXPR_LIST_list (&cfun->machine->call_args);
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-22 17:30:14 +02:00
|
|
|
|
/* Emit the sequence for a call to ADDRESS, setting RETVAL. Keep
|
|
|
|
|
track of whether calls involving static chains or varargs were seen
|
|
|
|
|
in the current function.
|
|
|
|
|
For libcalls, maintain a hash table of decls we have seen, and
|
|
|
|
|
record a function decl for later when encountering a new one. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_expand_call (rtx retval, rtx address)
|
|
|
|
|
{
|
|
|
|
|
rtx callee = XEXP (address, 0);
|
2015-09-09 19:18:15 +02:00
|
|
|
|
rtx varargs = NULL_RTX;
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
unsigned parallel = 0;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
if (!call_insn_operand (callee, Pmode))
|
|
|
|
|
{
|
|
|
|
|
callee = force_reg (Pmode, callee);
|
|
|
|
|
address = change_address (address, QImode, callee);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (GET_CODE (callee) == SYMBOL_REF)
|
|
|
|
|
{
|
|
|
|
|
tree decl = SYMBOL_REF_DECL (callee);
|
|
|
|
|
if (decl != NULL_TREE)
|
|
|
|
|
{
|
|
|
|
|
if (DECL_STATIC_CHAIN (decl))
|
2015-12-16 14:51:36 +01:00
|
|
|
|
cfun->machine->has_chain = true;
|
2015-11-27 14:57:09 +01:00
|
|
|
|
|
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz>
* omp-general.h: New file.
* omp-general.c: New file.
* omp-expand.h: Likewise.
* omp-expand.c: Likewise.
* omp-offload.h: Likewise.
* omp-offload.c: Likewise.
* omp-grid.c: Likewise.
* omp-grid.c: Likewise.
* omp-low.h: Include omp-general.h and omp-grid.h. Removed includes
of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h,
ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h,
varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h,
tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h,
alloc-pool.h, cfghooks.h, rtl.h and memmodel.h.
(omp_find_combined_for): Declare.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.h.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.h.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.h.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.h.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.h.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.h.
(oacc_fn_attrib_kernels_p): Moved to omp-general.h.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(omp_expand_local): Moved to omp-expand.h.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.h.
(omp_finish_file): Moved to omp-offload.h.
(default_goacc_validate_dims): Renamed to
oacc_default_goacc_validate_dims and moved to omp-offload.h.
(offload_funcs, offload_vars): Moved to omp-offload.h.
* omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h.
(omp_region): Moved to omp-expand.c.
(omp_for_data_loop): Moved to omp-general.h.
(omp_for_data): Likewise.
(oacc_loop): Moved to omp-offload.c.
(oacc_loop_flags): Moved to omp-general.h.
(offload_funcs, offload_vars): Moved to omp-offload.c.
(root_omp_region): Moved to omp-expand.c.
(omp_any_child_fn_dumped): Likewise.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.c.
(is_combined_parallel): Moved to omp-expand.c.
(is_reference): Renamed to omp_is_reference and and moved to
omp-general.c.
(adjust_for_condition): Renamed to omp_adjust_for_condition and moved
to omp-general.c.
(get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr
and moved to omp-general.c.
(extract_omp_for_data): Renamed to omp_extract_for_data and moved to
omp-general.c.
(workshare_safe_to_combine_p): Moved to omp-expand.c.
(omp_adjust_chunk_size): Likewise.
(get_ws_args_for): Likewise.
(get_base_type): Removed.
(dump_omp_region): Moved to omp-expand.c.
(debug_omp_region): Likewise.
(debug_all_omp_regions): Likewise.
(new_omp_region): Likewise.
(free_omp_region_1): Likewise.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.c.
(find_combined_for): Renamed to omp_find_combined_for, made global.
(build_omp_barrier): Renamed to omp_build_barrier and moved to
omp-general.c.
(omp_max_vf): Moved to omp-general.c.
(omp_max_simt_vf): Likewise.
(gimple_build_cond_empty): Moved to omp-expand.c.
(parallel_needs_hsa_kernel_p): Likewise.
(expand_omp_build_assign): Moved declaration to omp-expand.c.
(expand_parallel_call): Moved to omp-expand.c.
(expand_cilk_for_call): Likewise.
(expand_task_call): Likewise.
(vec2chain): Likewise.
(remove_exit_barrier): Likewise.
(remove_exit_barriers): Likewise.
(optimize_omp_library_calls): Likewise.
(expand_omp_regimplify_p): Likewise.
(expand_omp_build_assign): Likewise.
(expand_omp_taskreg): Likewise.
(oacc_collapse): Likewise.
(expand_oacc_collapse_init): Likewise.
(expand_oacc_collapse_vars): Likewise.
(expand_omp_for_init_counts): Likewise.
(expand_omp_for_init_vars): Likewise.
(extract_omp_for_update_vars): Likewise.
(expand_omp_ordered_source): Likewise.
(expand_omp_ordered_sink): Likewise.
(expand_omp_ordered_source_sink): Likewise.
(expand_omp_for_ordered_loops): Likewise.
(expand_omp_for_generic): Likewise.
(expand_omp_for_static_nochunk): Likewise.
(find_phi_with_arg_on_edge): Likewise.
(expand_omp_for_static_chunk): Likewise.
(expand_cilk_for): Likewise.
(expand_omp_simd): Likewise.
(expand_omp_taskloop_for_outer): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
(expand_omp_for): Likewise.
(expand_omp_sections): Likewise.
(expand_omp_single): Likewise.
(expand_omp_synch): Likewise.
(expand_omp_atomic_load): Likewise.
(expand_omp_atomic_store): Likewise.
(expand_omp_atomic_fetch_op): Likewise.
(expand_omp_atomic_pipeline): Likewise.
(expand_omp_atomic_mutex): Likewise.
(expand_omp_atomic): Likewise.
(oacc_launch_pack): and moved to omp-general.c, made public.
(OACC_FN_ATTRIB): Likewise.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.c.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.c.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.c.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.c.
(oacc_fn_attrib_kernels_p): Moved to omp-general.c.
(oacc_fn_attrib_level): Moved to omp-offload.c.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to
omp-general.c.
(mark_loops_in_oacc_kernels_region): Moved to omp-expand.c.
(grid_launch_attributes_trees): Likewise.
(grid_attr_trees): Likewise.
(grid_create_kernel_launch_attr_types): Likewise.
(grid_insert_store_range_dim): Likewise.
(grid_get_kernel_launch_attributes): Likewise.
(get_target_argument_identifier_1): Likewise.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(push_target_argument_according_to_value): Likewise.
(get_target_arguments): Likewise.
(expand_omp_target): Likewise.
(grid_expand_omp_for_loop): Moved to omp-grid.c.
(grid_arg_decl_map): Likewise.
(grid_remap_kernel_arg_accesses): Likewise.
(grid_expand_target_grid_body): Likewise.
(expand_omp): Renamed to omp_expand and moved to omp-expand.c.
(build_omp_regions_1): Moved to omp-expand.c.
(build_omp_regions_root): Likewise.
(omp_expand_local): Likewise.
(build_omp_regions): Likewise.
(execute_expand_omp): Likewise.
(pass_data_expand_omp): Likewise.
(pass_expand_omp): Likewise.
(make_pass_expand_omp): Likewise.
(pass_data_expand_omp_ssa): Likewise.
(pass_expand_omp_ssa): Likewise.
(make_pass_expand_omp_ssa): Likewise.
(grid_lastprivate_predicate): Renamed to
omp_grid_lastprivate_predicate and moved to omp-grid.c, made public.
(grid_prop): Moved to omp-grid.c.
(GRID_MISSED_MSG_PREFIX): Likewise.
(grid_safe_assignment_p): Likewise.
(grid_seq_only_contains_local_assignments): Likewise.
(grid_find_single_omp_among_assignments_1): Likewise.
(grid_find_single_omp_among_assignments): Likewise.
(grid_find_ungridifiable_statement): Likewise.
(grid_parallel_clauses_gridifiable): Likewise.
(grid_inner_loop_gridifiable_p): Likewise.
(grid_dist_follows_simple_pattern): Likewise.
(grid_gfor_follows_tiling_pattern): Likewise.
(grid_call_permissible_in_distribute_p): Likewise.
(grid_handle_call_in_distribute): Likewise.
(grid_dist_follows_tiling_pattern): Likewise.
(grid_target_follows_gridifiable_pattern): Likewise.
(grid_remap_prebody_decls): Likewise.
(grid_var_segment): Likewise.
(grid_mark_variable_segment): Likewise.
(grid_copy_leading_local_assignments): Likewise.
(grid_process_grid_body): Likewise.
(grid_eliminate_combined_simd_part): Likewise.
(grid_mark_tiling_loops): Likewise.
(grid_mark_tiling_parallels_and_loops): Likewise.
(grid_process_kernel_body_copy): Likewise.
(grid_attempt_target_gridification): Likewise.
(grid_gridify_all_targets_stmt): Likewise.
(grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets
and moved to omp-grid.c, made public.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.c.
(add_decls_addresses_to_decl_constructor): Moved to omp-offload.c.
(omp_finish_file): Likewise.
(oacc_thread_numbers): Likewise.
(oacc_xform_loop): Likewise.
(oacc_default_dims, oacc_min_dims): Likewise.
(oacc_parse_default_dims): Likewise.
(oacc_validate_dims): Likewise.
(new_oacc_loop_raw): Likewise.
(new_oacc_loop_outer): Likewise.
(new_oacc_loop): Likewise.
(new_oacc_loop_routine): Likewise.
(finish_oacc_loop): Likewise.
(free_oacc_loop): Likewise.
(dump_oacc_loop_part): Likewise.
(dump_oacc_loop): Likewise.
(debug_oacc_loop): Likewise.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_sibling_nreverse): Likewise.
(oacc_loop_discovery): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(oacc_loop_xform_loop): Likewise.
(oacc_loop_process): Likewise.
(oacc_loop_fixed_partitions): Likewise.
(oacc_loop_auto_partitions): Likewise.
(oacc_loop_partition): Likewise.
(default_goacc_fork_join): Likewise.
(default_goacc_reduction): Likewise.
(execute_oacc_device_lower): Likewise.
(default_goacc_validate_dims): Likewise.
(default_goacc_dim_limit): Likewise.
(pass_data_oacc_device_lower): Likewise.
(pass_oacc_device_lower): Likewise.
(make_pass_oacc_device_lower): Likewise.
(execute_omp_device_lower): Likewise.
(pass_data_omp_device_lower): Likewise.
(pass_omp_device_lower): Likewise.
(make_pass_omp_device_lower): Likewise.
(pass_data_omp_target_link): Likewise.
(pass_omp_target_link): Likewise.
(find_link_var_op): Likewise.
(pass_omp_target_link::execute): Likewise.
(make_pass_omp_target_link): Likewise.
* Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o
and omp-grid.o.
(GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed
omp-low.h.
* gimple-fold.c: Include omp-general.h instead of omp-low.h.
(fold_internal_goacc_dim): Adjusted calls to
get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names.
* gimplify.c: Include omp-low.h.
(omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use
its new name.
(gimplify_omp_task): Adjusted calls to find_omp_clause to use its new
name.
(gimplify_omp_for): Likewise.
* lto-cgraph.c: Include omp-offload.h instead of omp-low.h.
* toplev.c: Include omp-offload.h instead of omp-low.h.
* tree-cfg.c: Include omp-general.h instead of omp-low.h. Also
include omp-expand.h.
(make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its
new name.
(make_edges): Adjust the call to free_omp_regions to use its new name.
* tree-parloops.c: Include omp-general.h.
(create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use
its new name.
(parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-ssa-loop.c: Include omp-general.h instead of omp-low.h.
(gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-vrp.c: Include omp-general.h instead of omp-low.h.
(extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and
get_oacc_fn_dim_size to use their new names.
* varpool.c: Include omp-offload.h instead of omp-low.h.
* gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in
ifiles.
* config/nvptx/nvptx.c: Include omp-general.c.
(nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use
its new name.
(nvptx_reorg): Likewise.
(nvptx_record_offload_symbol): Likewise.
gcc/c-family:
* c-omp.c: Include omp-general.h instead of omp-low.h.
(c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new
name.
gcc/c/
* c-parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(c_finish_oacc_routine): Adjusted call to
get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib
to use their new names.
(c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to
use its new name.
(c_parser_oacc_update): Likewise.
(c_parser_omp_simd): Likewise.
(c_parser_omp_target_update): Likewise.
* c-typeck.c: Include omp-general.h instead of omp-low.h.
(c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new
name.
(c_finish_omp_cancellation_point): Likewise.
* gimple-parser.c: Do not include omp-low.h
gcc/cp/
* parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new
name.
(cp_parser_omp_target_update): Likewise.
(cp_parser_oacc_declare): Likewise.
(cp_parser_oacc_enter_exit_data): Likewise.
(cp_parser_oacc_update): Likewise.
(cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib,
build_oacc_routine_dims and replace_oacc_fn_attrib to use their new
names.
* semantics.c: Include omp-general insteda of omp-low.h.
(finish_omp_for): Adjusted calls to find_omp_clause to use its new
name.
(finish_omp_cancel): Likewise.
(finish_omp_cancellation_point): Likewise.
fortran/
* trans-openmp.c: Include omp-general.h.
From-SVN: r243673
2016-12-14 23:30:41 +01:00
|
|
|
|
tree attr = oacc_get_fn_attrib (decl);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
if (attr)
|
|
|
|
|
{
|
|
|
|
|
tree dims = TREE_VALUE (attr);
|
|
|
|
|
|
|
|
|
|
parallel = GOMP_DIM_MASK (GOMP_DIM_MAX) - 1;
|
|
|
|
|
for (int ix = 0; ix != GOMP_DIM_MAX; ix++)
|
|
|
|
|
{
|
|
|
|
|
if (TREE_PURPOSE (dims)
|
|
|
|
|
&& !integer_zerop (TREE_PURPOSE (dims)))
|
|
|
|
|
break;
|
|
|
|
|
/* Not on this axis. */
|
|
|
|
|
parallel ^= GOMP_DIM_MASK (ix);
|
|
|
|
|
dims = TREE_CHAIN (dims);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-09-11 00:53:37 +02:00
|
|
|
|
|
2015-12-16 14:51:36 +01:00
|
|
|
|
unsigned nargs = cfun->machine->num_args;
|
|
|
|
|
if (cfun->machine->is_varadic)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-09-09 19:18:15 +02:00
|
|
|
|
varargs = gen_reg_rtx (Pmode);
|
2015-11-26 15:13:28 +01:00
|
|
|
|
emit_move_insn (varargs, stack_pointer_rtx);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-16 14:51:36 +01:00
|
|
|
|
rtvec vec = rtvec_alloc (nargs + 1);
|
|
|
|
|
rtx pat = gen_rtx_PARALLEL (VOIDmode, vec);
|
2015-09-09 19:18:15 +02:00
|
|
|
|
int vec_pos = 0;
|
2015-12-16 14:51:36 +01:00
|
|
|
|
|
|
|
|
|
rtx call = gen_rtx_CALL (VOIDmode, address, const0_rtx);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
rtx tmp_retval = retval;
|
2015-12-16 14:51:36 +01:00
|
|
|
|
if (retval)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
|
|
|
|
if (!nvptx_register_operand (retval, GET_MODE (retval)))
|
|
|
|
|
tmp_retval = gen_reg_rtx (GET_MODE (retval));
|
2015-12-16 14:51:36 +01:00
|
|
|
|
call = gen_rtx_SET (tmp_retval, call);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
2015-12-16 14:51:36 +01:00
|
|
|
|
XVECEXP (pat, 0, vec_pos++) = call;
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
|
|
|
|
/* Construct the call insn, including a USE for each argument pseudo
|
|
|
|
|
register. These will be used when printing the insn. */
|
|
|
|
|
for (rtx arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1))
|
2015-12-16 14:51:36 +01:00
|
|
|
|
XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, XEXP (arg, 0));
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
|
|
|
|
if (varargs)
|
2015-11-21 14:57:09 +01:00
|
|
|
|
XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
|
|
|
|
gcc_assert (vec_pos = XVECLEN (pat, 0));
|
2015-07-22 17:30:14 +02:00
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
nvptx_emit_forking (parallel, true);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
emit_call_insn (pat);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
nvptx_emit_joining (parallel, true);
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (tmp_retval != retval)
|
|
|
|
|
emit_move_insn (retval, tmp_retval);
|
|
|
|
|
}
|
2015-12-14 20:26:49 +01:00
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Emit a comparison COMPARE, and return the new test to be used in the
|
|
|
|
|
jump. */
|
|
|
|
|
|
|
|
|
|
rtx
|
|
|
|
|
nvptx_expand_compare (rtx compare)
|
|
|
|
|
{
|
|
|
|
|
rtx pred = gen_reg_rtx (BImode);
|
|
|
|
|
rtx cmp = gen_rtx_fmt_ee (GET_CODE (compare), BImode,
|
|
|
|
|
XEXP (compare, 0), XEXP (compare, 1));
|
rtl.h (always_void_p): New function.
gcc/
* rtl.h (always_void_p): New function.
* gengenrtl.c (always_void_p): Likewise.
(genmacro): Don't add a mode parameter to gen_rtx_foo if rtxes
with code foo are always VOIDmode.
* genemit.c (gen_exp): Update gen_rtx_foo calls accordingly.
* builtins.c, caller-save.c, calls.c, cfgexpand.c, combine.c,
compare-elim.c, config/aarch64/aarch64.c,
config/aarch64/aarch64.md, config/alpha/alpha.c,
config/alpha/alpha.md, config/arc/arc.c, config/arc/arc.md,
config/arm/arm-fixed.md, config/arm/arm.c, config/arm/arm.md,
config/arm/ldrdstrd.md, config/arm/thumb2.md, config/arm/vfp.md,
config/avr/avr.c, config/bfin/bfin.c, config/c6x/c6x.c,
config/c6x/c6x.md, config/cr16/cr16.c, config/cris/cris.c,
config/cris/cris.md, config/darwin.c, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv.c,
config/frv/frv.md, config/h8300/h8300.c, config/i386/i386.c,
config/i386/i386.md, config/i386/sse.md, config/ia64/ia64.c,
config/ia64/vect.md, config/iq2000/iq2000.c,
config/iq2000/iq2000.md, config/lm32/lm32.c, config/lm32/lm32.md,
config/m32c/m32c.c, config/m32r/m32r.c, config/m68k/m68k.c,
config/m68k/m68k.md, config/mcore/mcore.c, config/mcore/mcore.md,
config/mep/mep.c, config/microblaze/microblaze.c,
config/mips/mips.c, config/mips/mips.md, config/mmix/mmix.c,
config/mn10300/mn10300.c, config/msp430/msp430.c,
config/nds32/nds32-memory-manipulation.c, config/nds32/nds32.c,
config/nds32/nds32.md, config/nios2/nios2.c, config/nvptx/nvptx.c,
config/pa/pa.c, config/pa/pa.md, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000.c,
config/rs6000/rs6000.md, config/rs6000/vector.md,
config/rs6000/vsx.md, config/rx/rx.c, config/rx/rx.md,
config/s390/s390.c, config/s390/s390.md, config/sh/sh.c,
config/sh/sh.md, config/sh/sh_treg_combine.cc,
config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu.c,
config/spu/spu.md, config/stormy16/stormy16.c,
config/tilegx/tilegx.c, config/tilegx/tilegx.md,
config/tilepro/tilepro.c, config/tilepro/tilepro.md,
config/v850/v850.c, config/v850/v850.md, config/vax/vax.c,
config/visium/visium.c, config/xtensa/xtensa.c, cprop.c, dse.c,
expr.c, gcse.c, ifcvt.c, ira.c, jump.c, lower-subreg.c,
lra-constraints.c, lra-eliminations.c, lra.c, postreload.c, ree.c,
reg-stack.c, reload.c, reload1.c, reorg.c, sel-sched.c,
var-tracking.c: Update calls accordingly.
From-SVN: r222883
2015-05-07 18:58:46 +02:00
|
|
|
|
emit_insn (gen_rtx_SET (pred, cmp));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
return gen_rtx_NE (BImode, pred, const0_rtx);
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Expand the oacc fork & join primitive into ptx-required unspecs. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_expand_oacc_fork (unsigned mode)
|
|
|
|
|
{
|
|
|
|
|
nvptx_emit_forking (GOMP_DIM_MASK (mode), false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_expand_oacc_join (unsigned mode)
|
|
|
|
|
{
|
|
|
|
|
nvptx_emit_joining (GOMP_DIM_MASK (mode), false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate instruction(s) to unpack a 64 bit object into 2 32 bit
|
|
|
|
|
objects. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_gen_unpack (rtx dst0, rtx dst1, rtx src)
|
|
|
|
|
{
|
|
|
|
|
rtx res;
|
|
|
|
|
|
|
|
|
|
switch (GET_MODE (src))
|
|
|
|
|
{
|
|
|
|
|
case DImode:
|
|
|
|
|
res = gen_unpackdisi2 (dst0, dst1, src);
|
|
|
|
|
break;
|
|
|
|
|
case DFmode:
|
|
|
|
|
res = gen_unpackdfsi2 (dst0, dst1, src);
|
|
|
|
|
break;
|
|
|
|
|
default: gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate instruction(s) to pack 2 32 bit objects into a 64 bit
|
|
|
|
|
object. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_gen_pack (rtx dst, rtx src0, rtx src1)
|
|
|
|
|
{
|
|
|
|
|
rtx res;
|
|
|
|
|
|
|
|
|
|
switch (GET_MODE (dst))
|
|
|
|
|
{
|
|
|
|
|
case DImode:
|
|
|
|
|
res = gen_packsidi2 (dst, src0, src1);
|
|
|
|
|
break;
|
|
|
|
|
case DFmode:
|
|
|
|
|
res = gen_packsidf2 (dst, src0, src1);
|
|
|
|
|
break;
|
|
|
|
|
default: gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate an instruction or sequence to broadcast register REG
|
|
|
|
|
across the vectors of a single warp. */
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
rtx
|
2015-12-02 18:28:32 +01:00
|
|
|
|
nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
{
|
|
|
|
|
rtx res;
|
|
|
|
|
|
|
|
|
|
switch (GET_MODE (dst))
|
|
|
|
|
{
|
|
|
|
|
case SImode:
|
|
|
|
|
res = gen_nvptx_shufflesi (dst, src, idx, GEN_INT (kind));
|
|
|
|
|
break;
|
|
|
|
|
case SFmode:
|
|
|
|
|
res = gen_nvptx_shufflesf (dst, src, idx, GEN_INT (kind));
|
|
|
|
|
break;
|
|
|
|
|
case DImode:
|
|
|
|
|
case DFmode:
|
|
|
|
|
{
|
|
|
|
|
rtx tmp0 = gen_reg_rtx (SImode);
|
|
|
|
|
rtx tmp1 = gen_reg_rtx (SImode);
|
|
|
|
|
|
|
|
|
|
start_sequence ();
|
|
|
|
|
emit_insn (nvptx_gen_unpack (tmp0, tmp1, src));
|
|
|
|
|
emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
|
|
|
|
|
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
|
|
|
|
|
emit_insn (nvptx_gen_pack (dst, tmp0, tmp1));
|
|
|
|
|
res = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case BImode:
|
|
|
|
|
{
|
|
|
|
|
rtx tmp = gen_reg_rtx (SImode);
|
|
|
|
|
|
|
|
|
|
start_sequence ();
|
|
|
|
|
emit_insn (gen_sel_truesi (tmp, src, GEN_INT (1), const0_rtx));
|
|
|
|
|
emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
|
|
|
|
|
emit_insn (gen_rtx_SET (dst, gen_rtx_NE (BImode, tmp, const0_rtx)));
|
|
|
|
|
res = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
}
|
|
|
|
|
break;
|
2016-02-22 17:28:25 +01:00
|
|
|
|
case QImode:
|
|
|
|
|
case HImode:
|
|
|
|
|
{
|
|
|
|
|
rtx tmp = gen_reg_rtx (SImode);
|
|
|
|
|
|
|
|
|
|
start_sequence ();
|
|
|
|
|
emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_e (ZERO_EXTEND, SImode, src)));
|
|
|
|
|
emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
|
|
|
|
|
emit_insn (gen_rtx_SET (dst, gen_rtx_fmt_e (TRUNCATE, GET_MODE (dst),
|
|
|
|
|
tmp)));
|
|
|
|
|
res = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
}
|
|
|
|
|
break;
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Generate an instruction or sequence to broadcast register REG
|
|
|
|
|
across the vectors of a single warp. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_gen_vcast (rtx reg)
|
|
|
|
|
{
|
|
|
|
|
return nvptx_gen_shuffle (reg, reg, const0_rtx, SHUFFLE_IDX);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Structure used when generating a worker-level spill or fill. */
|
|
|
|
|
|
|
|
|
|
struct wcast_data_t
|
|
|
|
|
{
|
|
|
|
|
rtx base; /* Register holding base addr of buffer. */
|
|
|
|
|
rtx ptr; /* Iteration var, if needed. */
|
|
|
|
|
unsigned offset; /* Offset into worker buffer. */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Direction of the spill/fill and looping setup/teardown indicator. */
|
|
|
|
|
|
|
|
|
|
enum propagate_mask
|
|
|
|
|
{
|
|
|
|
|
PM_read = 1 << 0,
|
|
|
|
|
PM_write = 1 << 1,
|
|
|
|
|
PM_loop_begin = 1 << 2,
|
|
|
|
|
PM_loop_end = 1 << 3,
|
|
|
|
|
|
|
|
|
|
PM_read_write = PM_read | PM_write
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Generate instruction(s) to spill or fill register REG to/from the
|
|
|
|
|
worker broadcast array. PM indicates what is to be done, REP
|
|
|
|
|
how many loop iterations will be executed (0 for not a loop). */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned rep, wcast_data_t *data)
|
|
|
|
|
{
|
|
|
|
|
rtx res;
|
|
|
|
|
machine_mode mode = GET_MODE (reg);
|
|
|
|
|
|
|
|
|
|
switch (mode)
|
|
|
|
|
{
|
|
|
|
|
case BImode:
|
|
|
|
|
{
|
|
|
|
|
rtx tmp = gen_reg_rtx (SImode);
|
|
|
|
|
|
|
|
|
|
start_sequence ();
|
|
|
|
|
if (pm & PM_read)
|
|
|
|
|
emit_insn (gen_sel_truesi (tmp, reg, GEN_INT (1), const0_rtx));
|
|
|
|
|
emit_insn (nvptx_gen_wcast (tmp, pm, rep, data));
|
|
|
|
|
if (pm & PM_write)
|
|
|
|
|
emit_insn (gen_rtx_SET (reg, gen_rtx_NE (BImode, tmp, const0_rtx)));
|
|
|
|
|
res = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
{
|
|
|
|
|
rtx addr = data->ptr;
|
|
|
|
|
|
|
|
|
|
if (!addr)
|
|
|
|
|
{
|
|
|
|
|
unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
|
|
|
|
|
|
|
|
|
|
if (align > worker_bcast_align)
|
|
|
|
|
worker_bcast_align = align;
|
|
|
|
|
data->offset = (data->offset + align - 1) & ~(align - 1);
|
|
|
|
|
addr = data->base;
|
|
|
|
|
if (data->offset)
|
|
|
|
|
addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (data->offset));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
addr = gen_rtx_MEM (mode, addr);
|
|
|
|
|
if (pm == PM_read)
|
|
|
|
|
res = gen_rtx_SET (addr, reg);
|
|
|
|
|
else if (pm == PM_write)
|
|
|
|
|
res = gen_rtx_SET (reg, addr);
|
|
|
|
|
else
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
|
|
|
|
|
if (data->ptr)
|
|
|
|
|
{
|
|
|
|
|
/* We're using a ptr, increment it. */
|
|
|
|
|
start_sequence ();
|
|
|
|
|
|
|
|
|
|
emit_insn (res);
|
|
|
|
|
emit_insn (gen_adddi3 (data->ptr, data->ptr,
|
|
|
|
|
GEN_INT (GET_MODE_SIZE (GET_MODE (reg)))));
|
|
|
|
|
res = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
rep = 1;
|
|
|
|
|
data->offset += rep * GET_MODE_SIZE (GET_MODE (reg));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
/* Returns true if X is a valid address for use in a memory reference. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_legitimate_address_p (machine_mode, rtx x, bool)
|
|
|
|
|
{
|
|
|
|
|
enum rtx_code code = GET_CODE (x);
|
|
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
|
{
|
|
|
|
|
case REG:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
case PLUS:
|
|
|
|
|
if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
case CONST:
|
|
|
|
|
case SYMBOL_REF:
|
|
|
|
|
case LABEL_REF:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
/* Machinery to output constant initializers. When beginning an
|
|
|
|
|
initializer, we decide on a fragment size (which is visible in ptx
|
|
|
|
|
in the type used), and then all initializer data is buffered until
|
|
|
|
|
a fragment is filled and ready to be written out. */
|
|
|
|
|
|
|
|
|
|
static struct
|
|
|
|
|
{
|
|
|
|
|
unsigned HOST_WIDE_INT mask; /* Mask for storing fragment. */
|
|
|
|
|
unsigned HOST_WIDE_INT val; /* Current fragment value. */
|
|
|
|
|
unsigned HOST_WIDE_INT remaining; /* Remaining bytes to be written
|
|
|
|
|
out. */
|
|
|
|
|
unsigned size; /* Fragment size to accumulate. */
|
|
|
|
|
unsigned offset; /* Offset within current fragment. */
|
|
|
|
|
bool started; /* Whether we've output any initializer. */
|
|
|
|
|
} init_frag;
|
|
|
|
|
|
|
|
|
|
/* The current fragment is full, write it out. SYM may provide a
|
|
|
|
|
symbolic reference we should output, in which case the fragment
|
|
|
|
|
value is the addend. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
static void
|
2015-12-08 22:06:42 +01:00
|
|
|
|
output_init_frag (rtx sym)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
fprintf (asm_out_file, init_frag.started ? ", " : " = { ");
|
|
|
|
|
unsigned HOST_WIDE_INT val = init_frag.val;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
init_frag.started = true;
|
|
|
|
|
init_frag.val = 0;
|
|
|
|
|
init_frag.offset = 0;
|
|
|
|
|
init_frag.remaining--;
|
|
|
|
|
|
|
|
|
|
if (sym)
|
|
|
|
|
{
|
|
|
|
|
fprintf (asm_out_file, "generic(");
|
|
|
|
|
output_address (VOIDmode, sym);
|
|
|
|
|
fprintf (asm_out_file, val ? ") + " : ")");
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
if (!sym || val)
|
|
|
|
|
fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, val);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
/* Add value VAL of size SIZE to the data we're emitting, and keep
|
|
|
|
|
writing out chunks as they fill up. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
static void
|
2015-12-08 22:06:42 +01:00
|
|
|
|
nvptx_assemble_value (unsigned HOST_WIDE_INT val, unsigned size)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
val &= ((unsigned HOST_WIDE_INT)2 << (size * BITS_PER_UNIT - 1)) - 1;
|
|
|
|
|
|
|
|
|
|
for (unsigned part = 0; size; size -= part)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
val >>= part * BITS_PER_UNIT;
|
|
|
|
|
part = init_frag.size - init_frag.offset;
|
|
|
|
|
if (part > size)
|
|
|
|
|
part = size;
|
|
|
|
|
|
|
|
|
|
unsigned HOST_WIDE_INT partial
|
|
|
|
|
= val << (init_frag.offset * BITS_PER_UNIT);
|
|
|
|
|
init_frag.val |= partial & init_frag.mask;
|
|
|
|
|
init_frag.offset += part;
|
|
|
|
|
|
|
|
|
|
if (init_frag.offset == init_frag.size)
|
|
|
|
|
output_init_frag (NULL);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Target hook for assembling integer object X of size SIZE. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
|
|
|
|
|
{
|
2015-11-27 14:57:09 +01:00
|
|
|
|
HOST_WIDE_INT val = 0;
|
|
|
|
|
|
|
|
|
|
switch (GET_CODE (x))
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-11-27 14:57:09 +01:00
|
|
|
|
default:
|
2015-12-11 14:52:05 +01:00
|
|
|
|
/* Let the generic machinery figure it out, usually for a
|
|
|
|
|
CONST_WIDE_INT. */
|
|
|
|
|
return false;
|
2015-11-27 14:57:09 +01:00
|
|
|
|
|
|
|
|
|
case CONST_INT:
|
2015-12-08 22:06:42 +01:00
|
|
|
|
nvptx_assemble_value (INTVAL (x), size);
|
2015-11-27 14:57:09 +01:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CONST:
|
|
|
|
|
x = XEXP (x, 0);
|
|
|
|
|
gcc_assert (GET_CODE (x) == PLUS);
|
|
|
|
|
val = INTVAL (XEXP (x, 1));
|
|
|
|
|
x = XEXP (x, 0);
|
|
|
|
|
gcc_assert (GET_CODE (x) == SYMBOL_REF);
|
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
|
|
|
|
|
case SYMBOL_REF:
|
2015-12-08 22:06:42 +01:00
|
|
|
|
gcc_assert (size == init_frag.size);
|
|
|
|
|
if (init_frag.offset)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
sorry ("cannot emit unaligned pointers in ptx assembly");
|
|
|
|
|
|
2015-11-27 14:57:09 +01:00
|
|
|
|
nvptx_maybe_record_fnsym (x);
|
2015-12-08 22:06:42 +01:00
|
|
|
|
init_frag.val = val;
|
|
|
|
|
output_init_frag (x);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Output SIZE zero bytes. We ignore the FILE argument since the
|
|
|
|
|
functions we're calling to perform the output just use
|
|
|
|
|
asm_out_file. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
|
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
/* Finish the current fragment, if it's started. */
|
|
|
|
|
if (init_frag.offset)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
unsigned part = init_frag.size - init_frag.offset;
|
|
|
|
|
if (part > size)
|
|
|
|
|
part = (unsigned) size;
|
|
|
|
|
size -= part;
|
|
|
|
|
nvptx_assemble_value (0, part);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
/* If this skip doesn't terminate the initializer, write as many
|
|
|
|
|
remaining pieces as possible directly. */
|
|
|
|
|
if (size < init_frag.remaining * init_frag.size)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
while (size >= init_frag.size)
|
|
|
|
|
{
|
|
|
|
|
size -= init_frag.size;
|
|
|
|
|
output_init_frag (NULL_RTX);
|
|
|
|
|
}
|
|
|
|
|
if (size)
|
|
|
|
|
nvptx_assemble_value (0, size);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Output a string STR with length SIZE. As in nvptx_output_skip we
|
|
|
|
|
ignore the FILE arg. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size)
|
|
|
|
|
{
|
|
|
|
|
for (unsigned HOST_WIDE_INT i = 0; i < size; i++)
|
|
|
|
|
nvptx_assemble_value (str[i], 1);
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-04 19:41:46 +01:00
|
|
|
|
/* Emit a PTX variable decl and prepare for emission of its
|
|
|
|
|
initializer. NAME is the symbol name and SETION the PTX data
|
|
|
|
|
area. The type is TYPE, object size SIZE and alignment is ALIGN.
|
|
|
|
|
The caller has already emitted any indentation and linkage
|
|
|
|
|
specifier. It is responsible for any initializer, terminating ;
|
|
|
|
|
and newline. SIZE is in bytes, ALIGN is in bits -- confusingly
|
|
|
|
|
this is the opposite way round that PTX wants them! */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_assemble_decl_begin (FILE *file, const char *name, const char *section,
|
|
|
|
|
const_tree type, HOST_WIDE_INT size, unsigned align)
|
|
|
|
|
{
|
|
|
|
|
while (TREE_CODE (type) == ARRAY_TYPE)
|
|
|
|
|
type = TREE_TYPE (type);
|
|
|
|
|
|
2015-12-07 14:46:07 +01:00
|
|
|
|
if (TREE_CODE (type) == VECTOR_TYPE
|
|
|
|
|
|| TREE_CODE (type) == COMPLEX_TYPE)
|
|
|
|
|
/* Neither vector nor complex types can contain the other. */
|
|
|
|
|
type = TREE_TYPE (type);
|
|
|
|
|
|
2015-12-04 19:41:46 +01:00
|
|
|
|
unsigned elt_size = int_size_in_bytes (type);
|
2015-12-07 14:46:07 +01:00
|
|
|
|
|
|
|
|
|
/* Largest mode we're prepared to accept. For BLKmode types we
|
|
|
|
|
don't know if it'll contain pointer constants, so have to choose
|
|
|
|
|
pointer size, otherwise we can choose DImode. */
|
|
|
|
|
machine_mode elt_mode = TYPE_MODE (type) == BLKmode ? Pmode : DImode;
|
|
|
|
|
|
|
|
|
|
elt_size |= GET_MODE_SIZE (elt_mode);
|
|
|
|
|
elt_size &= -elt_size; /* Extract LSB set. */
|
2015-12-04 19:41:46 +01:00
|
|
|
|
|
2015-12-08 22:06:42 +01:00
|
|
|
|
init_frag.size = elt_size;
|
2016-02-12 00:53:54 +01:00
|
|
|
|
/* Avoid undefined shift behavior by using '2'. */
|
2015-12-08 22:06:42 +01:00
|
|
|
|
init_frag.mask = ((unsigned HOST_WIDE_INT)2
|
|
|
|
|
<< (elt_size * BITS_PER_UNIT - 1)) - 1;
|
|
|
|
|
init_frag.val = 0;
|
|
|
|
|
init_frag.offset = 0;
|
|
|
|
|
init_frag.started = false;
|
|
|
|
|
/* Size might not be a multiple of elt size, if there's an
|
|
|
|
|
initialized trailing struct array with smaller type than
|
|
|
|
|
elt_size. */
|
|
|
|
|
init_frag.remaining = (size + elt_size - 1) / elt_size;
|
2015-12-04 19:41:46 +01:00
|
|
|
|
|
|
|
|
|
fprintf (file, "%s .align %d .u%d ",
|
|
|
|
|
section, align / BITS_PER_UNIT,
|
|
|
|
|
elt_size * BITS_PER_UNIT);
|
|
|
|
|
assemble_name (file, name);
|
|
|
|
|
|
|
|
|
|
if (size)
|
|
|
|
|
/* We make everything an array, to simplify any initialization
|
|
|
|
|
emission. */
|
2015-12-08 22:06:42 +01:00
|
|
|
|
fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]", init_frag.remaining);
|
2015-12-04 19:41:46 +01:00
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Called when the initializer for a decl has been completely output through
|
|
|
|
|
combinations of the three functions above. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_assemble_decl_end (void)
|
|
|
|
|
{
|
2015-12-08 22:06:42 +01:00
|
|
|
|
if (init_frag.offset)
|
|
|
|
|
/* This can happen with a packed struct with trailing array member. */
|
|
|
|
|
nvptx_assemble_value (0, init_frag.size - init_frag.offset);
|
|
|
|
|
fprintf (asm_out_file, init_frag.started ? " };\n" : ";\n");
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-01 21:13:02 +01:00
|
|
|
|
/* Output an uninitialized common or file-scope variable. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_output_aligned_decl (FILE *file, const char *name,
|
|
|
|
|
const_tree decl, HOST_WIDE_INT size, unsigned align)
|
|
|
|
|
{
|
|
|
|
|
write_var_marker (file, true, TREE_PUBLIC (decl), name);
|
|
|
|
|
|
|
|
|
|
/* If this is public, it is common. The nearest thing we have to
|
|
|
|
|
common is weak. */
|
2015-12-04 19:41:46 +01:00
|
|
|
|
fprintf (file, "\t%s", TREE_PUBLIC (decl) ? ".weak " : "");
|
|
|
|
|
|
|
|
|
|
nvptx_assemble_decl_begin (file, name, section_for_decl (decl),
|
|
|
|
|
TREE_TYPE (decl), size, align);
|
2015-12-08 22:06:42 +01:00
|
|
|
|
nvptx_assemble_decl_end ();
|
2015-12-01 21:13:02 +01:00
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
|
|
|
|
|
writing a constant variable EXP with NAME and SIZE and its
|
|
|
|
|
initializer to FILE. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_asm_declare_constant_name (FILE *file, const char *name,
|
2015-12-04 19:41:46 +01:00
|
|
|
|
const_tree exp, HOST_WIDE_INT obj_size)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-12-04 19:41:46 +01:00
|
|
|
|
write_var_marker (file, true, false, name);
|
|
|
|
|
|
|
|
|
|
fprintf (file, "\t");
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
tree type = TREE_TYPE (exp);
|
2015-12-04 19:41:46 +01:00
|
|
|
|
nvptx_assemble_decl_begin (file, name, ".const", type, obj_size,
|
|
|
|
|
TYPE_ALIGN (type));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
|
|
|
|
|
a variable DECL with NAME to FILE. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
nvptx_declare_object_name (FILE *file, const char *name, const_tree decl)
|
|
|
|
|
{
|
2015-12-04 19:41:46 +01:00
|
|
|
|
write_var_marker (file, true, TREE_PUBLIC (decl), name);
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
|
2015-12-04 19:41:46 +01:00
|
|
|
|
fprintf (file, "\t%s", (!TREE_PUBLIC (decl) ? ""
|
|
|
|
|
: DECL_WEAK (decl) ? ".weak " : ".visible "));
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
|
2015-12-04 19:41:46 +01:00
|
|
|
|
tree type = TREE_TYPE (decl);
|
|
|
|
|
HOST_WIDE_INT obj_size = tree_to_shwi (DECL_SIZE_UNIT (decl));
|
|
|
|
|
nvptx_assemble_decl_begin (file, name, section_for_decl (decl),
|
|
|
|
|
type, obj_size, DECL_ALIGN (decl));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_globalize_label (FILE *, const char *)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
|
|
|
|
|
declaration only for variable DECL with NAME to FILE. */
|
2015-12-02 15:57:54 +01:00
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
static void
|
|
|
|
|
nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl)
|
|
|
|
|
{
|
2015-12-30 16:52:59 +01:00
|
|
|
|
/* The middle end can place constant pool decls into the varpool as
|
|
|
|
|
undefined. Until that is fixed, catch the problem here. */
|
|
|
|
|
if (DECL_IN_CONSTANT_POOL (decl))
|
|
|
|
|
return;
|
|
|
|
|
|
2016-06-01 15:41:19 +02:00
|
|
|
|
/* We support weak defintions, and hence have the right
|
|
|
|
|
ASM_WEAKEN_DECL definition. Diagnose the problem here. */
|
|
|
|
|
if (DECL_WEAK (decl))
|
|
|
|
|
error_at (DECL_SOURCE_LOCATION (decl),
|
|
|
|
|
"PTX does not support weak declarations"
|
|
|
|
|
" (only weak definitions)");
|
2015-12-01 21:13:02 +01:00
|
|
|
|
write_var_marker (file, false, TREE_PUBLIC (decl), name);
|
|
|
|
|
|
2015-12-04 19:41:46 +01:00
|
|
|
|
fprintf (file, "\t.extern ");
|
|
|
|
|
tree size = DECL_SIZE_UNIT (decl);
|
|
|
|
|
nvptx_assemble_decl_begin (file, name, section_for_decl (decl),
|
|
|
|
|
TREE_TYPE (decl), size ? tree_to_shwi (size) : 0,
|
|
|
|
|
DECL_ALIGN (decl));
|
2015-12-18 18:43:11 +01:00
|
|
|
|
nvptx_assemble_decl_end ();
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-12-02 15:57:54 +01:00
|
|
|
|
/* Output a pattern for a move instruction. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_mov_insn (rtx dst, rtx src)
|
|
|
|
|
{
|
|
|
|
|
machine_mode dst_mode = GET_MODE (dst);
|
|
|
|
|
machine_mode dst_inner = (GET_CODE (dst) == SUBREG
|
|
|
|
|
? GET_MODE (XEXP (dst, 0)) : dst_mode);
|
|
|
|
|
machine_mode src_inner = (GET_CODE (src) == SUBREG
|
|
|
|
|
? GET_MODE (XEXP (src, 0)) : dst_mode);
|
|
|
|
|
|
2015-12-18 21:18:42 +01:00
|
|
|
|
rtx sym = src;
|
|
|
|
|
if (GET_CODE (sym) == CONST)
|
|
|
|
|
sym = XEXP (XEXP (sym, 0), 0);
|
2015-12-23 17:55:31 +01:00
|
|
|
|
if (SYMBOL_REF_P (sym))
|
|
|
|
|
{
|
|
|
|
|
if (SYMBOL_DATA_AREA (sym) != DATA_AREA_GENERIC)
|
|
|
|
|
return "%.\tcvta%D1%t0\t%0, %1;";
|
|
|
|
|
nvptx_maybe_record_fnsym (sym);
|
|
|
|
|
}
|
2015-12-18 21:18:42 +01:00
|
|
|
|
|
2015-12-02 15:57:54 +01:00
|
|
|
|
if (src_inner == dst_inner)
|
|
|
|
|
return "%.\tmov%t0\t%0, %1;";
|
|
|
|
|
|
|
|
|
|
if (CONSTANT_P (src))
|
|
|
|
|
return (GET_MODE_CLASS (dst_inner) == MODE_INT
|
|
|
|
|
&& GET_MODE_CLASS (src_inner) != MODE_FLOAT
|
|
|
|
|
? "%.\tmov%t0\t%0, %1;" : "%.\tmov.b%T0\t%0, %1;");
|
|
|
|
|
|
|
|
|
|
if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
|
|
|
|
|
return "%.\tmov.b%T0\t%0, %1;";
|
|
|
|
|
|
|
|
|
|
return "%.\tcvt%t0%t1\t%0, %1;";
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
static void nvptx_print_operand (FILE *, rtx, int);
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
|
2015-07-22 17:30:14 +02:00
|
|
|
|
involves writing .param declarations and in/out copies into them. For
|
|
|
|
|
indirect calls, also write the .callprototype. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
|
|
|
|
|
{
|
2015-11-26 15:13:28 +01:00
|
|
|
|
char buf[16];
|
2014-11-10 17:12:42 +01:00
|
|
|
|
static int labelno;
|
|
|
|
|
bool needs_tgt = register_operand (callee, Pmode);
|
|
|
|
|
rtx pat = PATTERN (insn);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
if (GET_CODE (pat) == COND_EXEC)
|
|
|
|
|
pat = COND_EXEC_CODE (pat);
|
2015-09-09 19:18:15 +02:00
|
|
|
|
int arg_end = XVECLEN (pat, 0);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
tree decl = NULL_TREE;
|
|
|
|
|
|
|
|
|
|
fprintf (asm_out_file, "\t{\n");
|
|
|
|
|
if (result != NULL)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
fprintf (asm_out_file, "\t\t.param%s %s_in;\n",
|
|
|
|
|
nvptx_ptx_type_from_mode (GET_MODE (result), false),
|
|
|
|
|
reg_names[NVPTX_RETURN_REGNUM]);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-07-22 17:30:14 +02:00
|
|
|
|
/* Ensure we have a ptx declaration in the output if necessary. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (GET_CODE (callee) == SYMBOL_REF)
|
|
|
|
|
{
|
|
|
|
|
decl = SYMBOL_REF_DECL (callee);
|
2015-11-27 14:57:09 +01:00
|
|
|
|
if (!decl
|
|
|
|
|
|| (DECL_EXTERNAL (decl) && !TYPE_ARG_TYPES (TREE_TYPE (decl))))
|
|
|
|
|
nvptx_record_libfunc (callee, result, pat);
|
|
|
|
|
else if (DECL_EXTERNAL (decl))
|
2014-11-10 17:12:42 +01:00
|
|
|
|
nvptx_record_fndecl (decl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (needs_tgt)
|
|
|
|
|
{
|
|
|
|
|
ASM_GENERATE_INTERNAL_LABEL (buf, "LCT", labelno);
|
|
|
|
|
labelno++;
|
|
|
|
|
ASM_OUTPUT_LABEL (asm_out_file, buf);
|
|
|
|
|
std::stringstream s;
|
2015-11-30 15:44:48 +01:00
|
|
|
|
write_fn_proto_from_insn (s, NULL, result, pat);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (s.str().c_str(), asm_out_file);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-26 15:13:28 +01:00
|
|
|
|
for (int argno = 1; argno < arg_end; argno++)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-11-26 15:13:28 +01:00
|
|
|
|
rtx t = XEXP (XVECEXP (pat, 0, argno), 0);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
machine_mode mode = GET_MODE (t);
|
2015-12-28 14:38:10 +01:00
|
|
|
|
const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-26 15:13:28 +01:00
|
|
|
|
/* Mode splitting has already been done. */
|
2015-12-28 14:38:10 +01:00
|
|
|
|
fprintf (asm_out_file, "\t\t.param%s %%out_arg%d;\n"
|
|
|
|
|
"\t\tst.param%s [%%out_arg%d], ",
|
|
|
|
|
ptx_type, argno, ptx_type, argno);
|
|
|
|
|
output_reg (asm_out_file, REGNO (t), VOIDmode);
|
|
|
|
|
fprintf (asm_out_file, ";\n");
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* The '.' stands for the call's predicate, if any. */
|
|
|
|
|
nvptx_print_operand (asm_out_file, NULL_RTX, '.');
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fprintf (asm_out_file, "\t\tcall ");
|
|
|
|
|
if (result != NULL_RTX)
|
2015-12-15 21:46:37 +01:00
|
|
|
|
fprintf (asm_out_file, "(%s_in), ", reg_names[NVPTX_RETURN_REGNUM]);
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (decl)
|
|
|
|
|
{
|
|
|
|
|
const char *name = get_fnname_from_decl (decl);
|
2016-05-16 15:16:28 +02:00
|
|
|
|
name = nvptx_name_replacement (name);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
assemble_name (asm_out_file, name);
|
|
|
|
|
}
|
|
|
|
|
else
|
2015-11-09 13:16:55 +01:00
|
|
|
|
output_address (VOIDmode, callee);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-11-26 15:13:28 +01:00
|
|
|
|
const char *open = "(";
|
|
|
|
|
for (int argno = 1; argno < arg_end; argno++)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-11-26 15:13:28 +01:00
|
|
|
|
fprintf (asm_out_file, ", %s%%out_arg%d", open, argno);
|
|
|
|
|
open = "";
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
2015-11-26 15:13:28 +01:00
|
|
|
|
if (decl && DECL_STATIC_CHAIN (decl))
|
|
|
|
|
{
|
2015-12-16 21:02:02 +01:00
|
|
|
|
fprintf (asm_out_file, ", %s%s", open, reg_names [STATIC_CHAIN_REGNUM]);
|
2015-11-26 15:13:28 +01:00
|
|
|
|
open = "";
|
|
|
|
|
}
|
|
|
|
|
if (!open[0])
|
|
|
|
|
fprintf (asm_out_file, ")");
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (needs_tgt)
|
|
|
|
|
{
|
|
|
|
|
fprintf (asm_out_file, ", ");
|
|
|
|
|
assemble_name (asm_out_file, buf);
|
|
|
|
|
}
|
|
|
|
|
fprintf (asm_out_file, ";\n");
|
|
|
|
|
|
2015-12-07 17:08:42 +01:00
|
|
|
|
if (find_reg_note (insn, REG_NORETURN, NULL))
|
|
|
|
|
/* No return functions confuse the PTX JIT, as it doesn't realize
|
|
|
|
|
the flow control barrier they imply. It can seg fault if it
|
|
|
|
|
encounters what looks like an unexitable loop. Emit a trailing
|
|
|
|
|
trap, which it does grok. */
|
|
|
|
|
fprintf (asm_out_file, "\t\ttrap; // (noreturn)\n");
|
|
|
|
|
|
2015-12-15 21:46:37 +01:00
|
|
|
|
if (result)
|
|
|
|
|
{
|
|
|
|
|
static char rval[sizeof ("\tld.param%%t0\t%%0, [%%%s_in];\n\t}") + 8];
|
|
|
|
|
|
|
|
|
|
if (!rval[0])
|
|
|
|
|
/* We must escape the '%' that starts RETURN_REGNUM. */
|
|
|
|
|
sprintf (rval, "\tld.param%%t0\t%%0, [%%%s_in];\n\t}",
|
|
|
|
|
reg_names[NVPTX_RETURN_REGNUM]);
|
|
|
|
|
return rval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "}";
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_print_operand_punct_valid_p (unsigned char c)
|
|
|
|
|
{
|
|
|
|
|
return c == '.' || c== '#';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_print_address_operand (FILE *file, rtx x, machine_mode)
|
|
|
|
|
{
|
|
|
|
|
rtx off;
|
|
|
|
|
if (GET_CODE (x) == CONST)
|
|
|
|
|
x = XEXP (x, 0);
|
|
|
|
|
switch (GET_CODE (x))
|
|
|
|
|
{
|
|
|
|
|
case PLUS:
|
|
|
|
|
off = XEXP (x, 1);
|
2015-11-09 13:16:55 +01:00
|
|
|
|
output_address (VOIDmode, XEXP (x, 0));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fprintf (file, "+");
|
2015-11-09 13:16:55 +01:00
|
|
|
|
output_address (VOIDmode, off);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case SYMBOL_REF:
|
|
|
|
|
case LABEL_REF:
|
|
|
|
|
output_addr_const (file, x);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gcc_assert (GET_CODE (x) != MEM);
|
|
|
|
|
nvptx_print_operand (file, x, 0);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Write assembly language output for the address ADDR to FILE. */
|
|
|
|
|
|
|
|
|
|
static void
|
2015-11-09 13:16:55 +01:00
|
|
|
|
nvptx_print_operand_address (FILE *file, machine_mode mode, rtx addr)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
2015-11-09 13:16:55 +01:00
|
|
|
|
nvptx_print_address_operand (file, addr, mode);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Print an operand, X, to FILE, with an optional modifier in CODE.
|
|
|
|
|
|
|
|
|
|
Meaning of CODE:
|
|
|
|
|
. -- print the predicate for the instruction or an emptry string for an
|
|
|
|
|
unconditional one.
|
|
|
|
|
# -- print a rounding mode for the instruction
|
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
A -- print a data area for a MEM
|
2014-11-10 17:12:42 +01:00
|
|
|
|
c -- print an opcode suffix for a comparison operator, including a type code
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
D -- print a data area for a MEM operand
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
S -- print a shuffle kind specified by CONST_INT
|
2014-11-10 17:12:42 +01:00
|
|
|
|
t -- print a type opcode suffix, promoting QImode to 32 bits
|
|
|
|
|
T -- print a type size in bits
|
|
|
|
|
u -- print a type opcode suffix without promotions. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_print_operand (FILE *file, rtx x, int code)
|
|
|
|
|
{
|
|
|
|
|
if (code == '.')
|
|
|
|
|
{
|
|
|
|
|
x = current_insn_predicate;
|
|
|
|
|
if (x)
|
|
|
|
|
{
|
2016-11-16 18:17:00 +01:00
|
|
|
|
fputs ("@", file);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (GET_CODE (x) == EQ)
|
|
|
|
|
fputs ("!", file);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
output_reg (file, REGNO (XEXP (x, 0)), VOIDmode);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
else if (code == '#')
|
|
|
|
|
{
|
|
|
|
|
fputs (".rn", file);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum rtx_code x_code = GET_CODE (x);
|
2015-12-02 15:57:54 +01:00
|
|
|
|
machine_mode mode = GET_MODE (x);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
|
{
|
|
|
|
|
case 'A':
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
x = XEXP (x, 0);
|
|
|
|
|
/* FALLTHROUGH. */
|
2015-11-27 15:22:26 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
case 'D':
|
|
|
|
|
if (GET_CODE (x) == CONST)
|
|
|
|
|
x = XEXP (x, 0);
|
|
|
|
|
if (GET_CODE (x) == PLUS)
|
|
|
|
|
x = XEXP (x, 0);
|
2015-11-27 15:22:26 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
if (GET_CODE (x) == SYMBOL_REF)
|
|
|
|
|
fputs (section_for_sym (x), file);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 't':
|
|
|
|
|
case 'u':
|
2015-12-02 15:57:54 +01:00
|
|
|
|
if (x_code == SUBREG)
|
|
|
|
|
{
|
|
|
|
|
mode = GET_MODE (SUBREG_REG (x));
|
|
|
|
|
if (mode == TImode)
|
|
|
|
|
mode = DImode;
|
|
|
|
|
else if (COMPLEX_MODE_P (mode))
|
|
|
|
|
mode = GET_MODE_INNER (mode);
|
|
|
|
|
}
|
|
|
|
|
fprintf (file, "%s", nvptx_ptx_type_from_mode (mode, code == 't'));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
case 'S':
|
|
|
|
|
{
|
2015-12-02 18:28:32 +01:00
|
|
|
|
nvptx_shuffle_kind kind = (nvptx_shuffle_kind) UINTVAL (x);
|
|
|
|
|
/* Same order as nvptx_shuffle_kind. */
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
static const char *const kinds[] =
|
2015-12-02 18:28:32 +01:00
|
|
|
|
{".up", ".down", ".bfly", ".idx"};
|
|
|
|
|
fputs (kinds[kind], file);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
case 'T':
|
2015-12-02 15:57:54 +01:00
|
|
|
|
fprintf (file, "%d", GET_MODE_BITSIZE (mode));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'j':
|
|
|
|
|
fprintf (file, "@");
|
|
|
|
|
goto common;
|
|
|
|
|
|
|
|
|
|
case 'J':
|
|
|
|
|
fprintf (file, "@!");
|
|
|
|
|
goto common;
|
|
|
|
|
|
|
|
|
|
case 'c':
|
2015-12-02 15:57:54 +01:00
|
|
|
|
mode = GET_MODE (XEXP (x, 0));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
switch (x_code)
|
|
|
|
|
{
|
|
|
|
|
case EQ:
|
|
|
|
|
fputs (".eq", file);
|
|
|
|
|
break;
|
|
|
|
|
case NE:
|
2015-12-02 15:57:54 +01:00
|
|
|
|
if (FLOAT_MODE_P (mode))
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (".neu", file);
|
|
|
|
|
else
|
|
|
|
|
fputs (".ne", file);
|
|
|
|
|
break;
|
|
|
|
|
case LE:
|
2016-02-02 19:24:25 +01:00
|
|
|
|
case LEU:
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (".le", file);
|
|
|
|
|
break;
|
|
|
|
|
case GE:
|
2016-02-02 19:24:25 +01:00
|
|
|
|
case GEU:
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (".ge", file);
|
|
|
|
|
break;
|
|
|
|
|
case LT:
|
2016-02-02 19:24:25 +01:00
|
|
|
|
case LTU:
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (".lt", file);
|
|
|
|
|
break;
|
|
|
|
|
case GT:
|
|
|
|
|
case GTU:
|
2016-02-02 19:24:25 +01:00
|
|
|
|
fputs (".gt", file);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
case LTGT:
|
|
|
|
|
fputs (".ne", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNEQ:
|
|
|
|
|
fputs (".equ", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNLE:
|
|
|
|
|
fputs (".leu", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNGE:
|
|
|
|
|
fputs (".geu", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNLT:
|
|
|
|
|
fputs (".ltu", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNGT:
|
|
|
|
|
fputs (".gtu", file);
|
|
|
|
|
break;
|
|
|
|
|
case UNORDERED:
|
|
|
|
|
fputs (".nan", file);
|
|
|
|
|
break;
|
|
|
|
|
case ORDERED:
|
|
|
|
|
fputs (".num", file);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
2015-12-02 15:57:54 +01:00
|
|
|
|
if (FLOAT_MODE_P (mode)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|| x_code == EQ || x_code == NE
|
|
|
|
|
|| x_code == GEU || x_code == GTU
|
|
|
|
|
|| x_code == LEU || x_code == LTU)
|
2015-12-02 15:57:54 +01:00
|
|
|
|
fputs (nvptx_ptx_type_from_mode (mode, true), file);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
else
|
2015-12-02 15:57:54 +01:00
|
|
|
|
fprintf (file, ".s%d", GET_MODE_BITSIZE (mode));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
common:
|
|
|
|
|
switch (x_code)
|
|
|
|
|
{
|
|
|
|
|
case SUBREG:
|
2015-12-02 15:57:54 +01:00
|
|
|
|
{
|
|
|
|
|
rtx inner_x = SUBREG_REG (x);
|
|
|
|
|
machine_mode inner_mode = GET_MODE (inner_x);
|
|
|
|
|
machine_mode split = maybe_split_mode (inner_mode);
|
|
|
|
|
|
|
|
|
|
if (split != VOIDmode
|
|
|
|
|
&& (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)))
|
|
|
|
|
output_reg (file, REGNO (inner_x), split);
|
|
|
|
|
else
|
|
|
|
|
output_reg (file, REGNO (inner_x), split, SUBREG_BYTE (x));
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
case REG:
|
2015-12-02 15:57:54 +01:00
|
|
|
|
output_reg (file, REGNO (x), maybe_split_mode (mode));
|
2014-11-10 17:12:42 +01:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case MEM:
|
|
|
|
|
fputc ('[', file);
|
2015-12-02 15:57:54 +01:00
|
|
|
|
nvptx_print_address_operand (file, XEXP (x, 0), mode);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputc (']', file);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CONST_INT:
|
|
|
|
|
output_addr_const (file, x);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CONST:
|
|
|
|
|
case SYMBOL_REF:
|
|
|
|
|
case LABEL_REF:
|
|
|
|
|
/* We could use output_addr_const, but that can print things like
|
|
|
|
|
"x-8", which breaks ptxas. Need to ensure it is output as
|
|
|
|
|
"x+-8". */
|
|
|
|
|
nvptx_print_address_operand (file, x, VOIDmode);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CONST_DOUBLE:
|
|
|
|
|
long vals[2];
|
2015-12-02 15:57:54 +01:00
|
|
|
|
real_to_target (vals, CONST_DOUBLE_REAL_VALUE (x), mode);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
vals[0] &= 0xffffffff;
|
|
|
|
|
vals[1] &= 0xffffffff;
|
2015-12-02 15:57:54 +01:00
|
|
|
|
if (mode == SFmode)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fprintf (file, "0f%08lx", vals[0]);
|
|
|
|
|
else
|
|
|
|
|
fprintf (file, "0d%08lx%08lx", vals[1], vals[0]);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
output_addr_const (file, x);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Record replacement regs used to deal with subreg operands. */
|
|
|
|
|
struct reg_replace
|
|
|
|
|
{
|
|
|
|
|
rtx replacement[MAX_RECOG_OPERANDS];
|
|
|
|
|
machine_mode mode;
|
|
|
|
|
int n_allocated;
|
|
|
|
|
int n_in_use;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Allocate or reuse a replacement in R and return the rtx. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
get_replacement (struct reg_replace *r)
|
|
|
|
|
{
|
|
|
|
|
if (r->n_allocated == r->n_in_use)
|
|
|
|
|
r->replacement[r->n_allocated++] = gen_reg_rtx (r->mode);
|
|
|
|
|
return r->replacement[r->n_in_use++];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Clean up subreg operands. In ptx assembly, everything is typed, and
|
|
|
|
|
the presence of subregs would break the rules for most instructions.
|
|
|
|
|
Replace them with a suitable new register of the right size, plus
|
|
|
|
|
conversion copyin/copyout instructions. */
|
|
|
|
|
|
|
|
|
|
static void
|
2015-06-30 01:23:10 +02:00
|
|
|
|
nvptx_reorg_subreg (void)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
|
|
|
|
struct reg_replace qiregs, hiregs, siregs, diregs;
|
|
|
|
|
rtx_insn *insn, *next;
|
|
|
|
|
|
|
|
|
|
qiregs.n_allocated = 0;
|
|
|
|
|
hiregs.n_allocated = 0;
|
|
|
|
|
siregs.n_allocated = 0;
|
|
|
|
|
diregs.n_allocated = 0;
|
|
|
|
|
qiregs.mode = QImode;
|
|
|
|
|
hiregs.mode = HImode;
|
|
|
|
|
siregs.mode = SImode;
|
|
|
|
|
diregs.mode = DImode;
|
|
|
|
|
|
|
|
|
|
for (insn = get_insns (); insn; insn = next)
|
|
|
|
|
{
|
|
|
|
|
next = NEXT_INSN (insn);
|
|
|
|
|
if (!NONDEBUG_INSN_P (insn)
|
2015-08-25 16:06:39 +02:00
|
|
|
|
|| asm_noperands (PATTERN (insn)) >= 0
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|| GET_CODE (PATTERN (insn)) == USE
|
|
|
|
|
|| GET_CODE (PATTERN (insn)) == CLOBBER)
|
|
|
|
|
continue;
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
qiregs.n_in_use = 0;
|
|
|
|
|
hiregs.n_in_use = 0;
|
|
|
|
|
siregs.n_in_use = 0;
|
|
|
|
|
diregs.n_in_use = 0;
|
|
|
|
|
extract_insn (insn);
|
|
|
|
|
enum attr_subregs_ok s_ok = get_attr_subregs_ok (insn);
|
2015-09-09 19:18:15 +02:00
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
for (int i = 0; i < recog_data.n_operands; i++)
|
|
|
|
|
{
|
|
|
|
|
rtx op = recog_data.operand[i];
|
|
|
|
|
if (GET_CODE (op) != SUBREG)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
rtx inner = SUBREG_REG (op);
|
|
|
|
|
|
|
|
|
|
machine_mode outer_mode = GET_MODE (op);
|
|
|
|
|
machine_mode inner_mode = GET_MODE (inner);
|
|
|
|
|
gcc_assert (s_ok);
|
|
|
|
|
if (s_ok
|
|
|
|
|
&& (GET_MODE_PRECISION (inner_mode)
|
|
|
|
|
>= GET_MODE_PRECISION (outer_mode)))
|
|
|
|
|
continue;
|
|
|
|
|
gcc_assert (SCALAR_INT_MODE_P (outer_mode));
|
|
|
|
|
struct reg_replace *r = (outer_mode == QImode ? &qiregs
|
|
|
|
|
: outer_mode == HImode ? &hiregs
|
|
|
|
|
: outer_mode == SImode ? &siregs
|
|
|
|
|
: &diregs);
|
|
|
|
|
rtx new_reg = get_replacement (r);
|
|
|
|
|
|
|
|
|
|
if (recog_data.operand_type[i] != OP_OUT)
|
|
|
|
|
{
|
|
|
|
|
enum rtx_code code;
|
|
|
|
|
if (GET_MODE_PRECISION (inner_mode)
|
|
|
|
|
< GET_MODE_PRECISION (outer_mode))
|
|
|
|
|
code = ZERO_EXTEND;
|
|
|
|
|
else
|
|
|
|
|
code = TRUNCATE;
|
|
|
|
|
|
rtl.h (always_void_p): New function.
gcc/
* rtl.h (always_void_p): New function.
* gengenrtl.c (always_void_p): Likewise.
(genmacro): Don't add a mode parameter to gen_rtx_foo if rtxes
with code foo are always VOIDmode.
* genemit.c (gen_exp): Update gen_rtx_foo calls accordingly.
* builtins.c, caller-save.c, calls.c, cfgexpand.c, combine.c,
compare-elim.c, config/aarch64/aarch64.c,
config/aarch64/aarch64.md, config/alpha/alpha.c,
config/alpha/alpha.md, config/arc/arc.c, config/arc/arc.md,
config/arm/arm-fixed.md, config/arm/arm.c, config/arm/arm.md,
config/arm/ldrdstrd.md, config/arm/thumb2.md, config/arm/vfp.md,
config/avr/avr.c, config/bfin/bfin.c, config/c6x/c6x.c,
config/c6x/c6x.md, config/cr16/cr16.c, config/cris/cris.c,
config/cris/cris.md, config/darwin.c, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv.c,
config/frv/frv.md, config/h8300/h8300.c, config/i386/i386.c,
config/i386/i386.md, config/i386/sse.md, config/ia64/ia64.c,
config/ia64/vect.md, config/iq2000/iq2000.c,
config/iq2000/iq2000.md, config/lm32/lm32.c, config/lm32/lm32.md,
config/m32c/m32c.c, config/m32r/m32r.c, config/m68k/m68k.c,
config/m68k/m68k.md, config/mcore/mcore.c, config/mcore/mcore.md,
config/mep/mep.c, config/microblaze/microblaze.c,
config/mips/mips.c, config/mips/mips.md, config/mmix/mmix.c,
config/mn10300/mn10300.c, config/msp430/msp430.c,
config/nds32/nds32-memory-manipulation.c, config/nds32/nds32.c,
config/nds32/nds32.md, config/nios2/nios2.c, config/nvptx/nvptx.c,
config/pa/pa.c, config/pa/pa.md, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000.c,
config/rs6000/rs6000.md, config/rs6000/vector.md,
config/rs6000/vsx.md, config/rx/rx.c, config/rx/rx.md,
config/s390/s390.c, config/s390/s390.md, config/sh/sh.c,
config/sh/sh.md, config/sh/sh_treg_combine.cc,
config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu.c,
config/spu/spu.md, config/stormy16/stormy16.c,
config/tilegx/tilegx.c, config/tilegx/tilegx.md,
config/tilepro/tilepro.c, config/tilepro/tilepro.md,
config/v850/v850.c, config/v850/v850.md, config/vax/vax.c,
config/visium/visium.c, config/xtensa/xtensa.c, cprop.c, dse.c,
expr.c, gcse.c, ifcvt.c, ira.c, jump.c, lower-subreg.c,
lra-constraints.c, lra-eliminations.c, lra.c, postreload.c, ree.c,
reg-stack.c, reload.c, reload1.c, reorg.c, sel-sched.c,
var-tracking.c: Update calls accordingly.
From-SVN: r222883
2015-05-07 18:58:46 +02:00
|
|
|
|
rtx pat = gen_rtx_SET (new_reg,
|
2014-11-10 17:12:42 +01:00
|
|
|
|
gen_rtx_fmt_e (code, outer_mode, inner));
|
|
|
|
|
emit_insn_before (pat, insn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (recog_data.operand_type[i] != OP_IN)
|
|
|
|
|
{
|
|
|
|
|
enum rtx_code code;
|
|
|
|
|
if (GET_MODE_PRECISION (inner_mode)
|
|
|
|
|
< GET_MODE_PRECISION (outer_mode))
|
|
|
|
|
code = TRUNCATE;
|
|
|
|
|
else
|
|
|
|
|
code = ZERO_EXTEND;
|
|
|
|
|
|
rtl.h (always_void_p): New function.
gcc/
* rtl.h (always_void_p): New function.
* gengenrtl.c (always_void_p): Likewise.
(genmacro): Don't add a mode parameter to gen_rtx_foo if rtxes
with code foo are always VOIDmode.
* genemit.c (gen_exp): Update gen_rtx_foo calls accordingly.
* builtins.c, caller-save.c, calls.c, cfgexpand.c, combine.c,
compare-elim.c, config/aarch64/aarch64.c,
config/aarch64/aarch64.md, config/alpha/alpha.c,
config/alpha/alpha.md, config/arc/arc.c, config/arc/arc.md,
config/arm/arm-fixed.md, config/arm/arm.c, config/arm/arm.md,
config/arm/ldrdstrd.md, config/arm/thumb2.md, config/arm/vfp.md,
config/avr/avr.c, config/bfin/bfin.c, config/c6x/c6x.c,
config/c6x/c6x.md, config/cr16/cr16.c, config/cris/cris.c,
config/cris/cris.md, config/darwin.c, config/epiphany/epiphany.c,
config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv.c,
config/frv/frv.md, config/h8300/h8300.c, config/i386/i386.c,
config/i386/i386.md, config/i386/sse.md, config/ia64/ia64.c,
config/ia64/vect.md, config/iq2000/iq2000.c,
config/iq2000/iq2000.md, config/lm32/lm32.c, config/lm32/lm32.md,
config/m32c/m32c.c, config/m32r/m32r.c, config/m68k/m68k.c,
config/m68k/m68k.md, config/mcore/mcore.c, config/mcore/mcore.md,
config/mep/mep.c, config/microblaze/microblaze.c,
config/mips/mips.c, config/mips/mips.md, config/mmix/mmix.c,
config/mn10300/mn10300.c, config/msp430/msp430.c,
config/nds32/nds32-memory-manipulation.c, config/nds32/nds32.c,
config/nds32/nds32.md, config/nios2/nios2.c, config/nvptx/nvptx.c,
config/pa/pa.c, config/pa/pa.md, config/rl78/rl78.c,
config/rs6000/altivec.md, config/rs6000/rs6000.c,
config/rs6000/rs6000.md, config/rs6000/vector.md,
config/rs6000/vsx.md, config/rx/rx.c, config/rx/rx.md,
config/s390/s390.c, config/s390/s390.md, config/sh/sh.c,
config/sh/sh.md, config/sh/sh_treg_combine.cc,
config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu.c,
config/spu/spu.md, config/stormy16/stormy16.c,
config/tilegx/tilegx.c, config/tilegx/tilegx.md,
config/tilepro/tilepro.c, config/tilepro/tilepro.md,
config/v850/v850.c, config/v850/v850.md, config/vax/vax.c,
config/visium/visium.c, config/xtensa/xtensa.c, cprop.c, dse.c,
expr.c, gcse.c, ifcvt.c, ira.c, jump.c, lower-subreg.c,
lra-constraints.c, lra-eliminations.c, lra.c, postreload.c, ree.c,
reg-stack.c, reload.c, reload1.c, reorg.c, sel-sched.c,
var-tracking.c: Update calls accordingly.
From-SVN: r222883
2015-05-07 18:58:46 +02:00
|
|
|
|
rtx pat = gen_rtx_SET (inner,
|
2014-11-10 17:12:42 +01:00
|
|
|
|
gen_rtx_fmt_e (code, inner_mode, new_reg));
|
|
|
|
|
emit_insn_after (pat, insn);
|
|
|
|
|
}
|
|
|
|
|
validate_change (insn, recog_data.operand_loc[i], new_reg, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-06-30 01:23:10 +02:00
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Return a SImode "master lane index" register for uniform-simt, allocating on
|
|
|
|
|
first use. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_get_unisimt_master ()
|
|
|
|
|
{
|
|
|
|
|
rtx &master = cfun->machine->unisimt_master;
|
|
|
|
|
return master ? master : master = gen_reg_rtx (SImode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return a BImode "predicate" register for uniform-simt, similar to above. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_get_unisimt_predicate ()
|
|
|
|
|
{
|
|
|
|
|
rtx &pred = cfun->machine->unisimt_predicate;
|
|
|
|
|
return pred ? pred : pred = gen_reg_rtx (BImode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return true if given call insn references one of the functions provided by
|
|
|
|
|
the CUDA runtime: malloc, free, vprintf. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_call_insn_is_syscall_p (rtx_insn *insn)
|
|
|
|
|
{
|
|
|
|
|
rtx pat = PATTERN (insn);
|
|
|
|
|
gcc_checking_assert (GET_CODE (pat) == PARALLEL);
|
|
|
|
|
pat = XVECEXP (pat, 0, 0);
|
|
|
|
|
if (GET_CODE (pat) == SET)
|
|
|
|
|
pat = SET_SRC (pat);
|
|
|
|
|
gcc_checking_assert (GET_CODE (pat) == CALL
|
|
|
|
|
&& GET_CODE (XEXP (pat, 0)) == MEM);
|
|
|
|
|
rtx addr = XEXP (XEXP (pat, 0), 0);
|
|
|
|
|
if (GET_CODE (addr) != SYMBOL_REF)
|
|
|
|
|
return false;
|
|
|
|
|
const char *name = XSTR (addr, 0);
|
|
|
|
|
/* Ordinary malloc/free are redirected to __nvptx_{malloc,free), so only the
|
|
|
|
|
references with forced assembler name refer to PTX syscalls. For vprintf,
|
|
|
|
|
accept both normal and forced-assembler-name references. */
|
|
|
|
|
return (!strcmp (name, "vprintf") || !strcmp (name, "*vprintf")
|
|
|
|
|
|| !strcmp (name, "*malloc")
|
|
|
|
|
|| !strcmp (name, "*free"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If SET subexpression of INSN sets a register, emit a shuffle instruction to
|
|
|
|
|
propagate its value from lane MASTER to current lane. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master)
|
|
|
|
|
{
|
|
|
|
|
rtx reg;
|
|
|
|
|
if (GET_CODE (set) == SET && REG_P (reg = SET_DEST (set)))
|
|
|
|
|
emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX), insn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Adjust code for uniform-simt code generation variant by making atomics and
|
|
|
|
|
"syscalls" conditionally executed, and inserting shuffle-based propagation
|
|
|
|
|
for registers being set. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_reorg_uniform_simt ()
|
|
|
|
|
{
|
|
|
|
|
rtx_insn *insn, *next;
|
|
|
|
|
|
|
|
|
|
for (insn = get_insns (); insn; insn = next)
|
|
|
|
|
{
|
|
|
|
|
next = NEXT_INSN (insn);
|
|
|
|
|
if (!(CALL_P (insn) && nvptx_call_insn_is_syscall_p (insn))
|
|
|
|
|
&& !(NONJUMP_INSN_P (insn)
|
|
|
|
|
&& GET_CODE (PATTERN (insn)) == PARALLEL
|
|
|
|
|
&& get_attr_atomic (insn)))
|
|
|
|
|
continue;
|
|
|
|
|
rtx pat = PATTERN (insn);
|
|
|
|
|
rtx master = nvptx_get_unisimt_master ();
|
|
|
|
|
for (int i = 0; i < XVECLEN (pat, 0); i++)
|
|
|
|
|
nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master);
|
|
|
|
|
rtx pred = nvptx_get_unisimt_predicate ();
|
|
|
|
|
pred = gen_rtx_NE (BImode, pred, const0_rtx);
|
|
|
|
|
pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
|
|
|
|
|
validate_change (insn, &PATTERN (insn), pat, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-28 19:32:07 +01:00
|
|
|
|
/* Loop structure of the function. The entire function is described as
|
|
|
|
|
a NULL loop. */
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
|
|
|
|
struct parallel
|
|
|
|
|
{
|
|
|
|
|
/* Parent parallel. */
|
|
|
|
|
parallel *parent;
|
|
|
|
|
|
|
|
|
|
/* Next sibling parallel. */
|
|
|
|
|
parallel *next;
|
|
|
|
|
|
|
|
|
|
/* First child parallel. */
|
|
|
|
|
parallel *inner;
|
|
|
|
|
|
|
|
|
|
/* Partitioning mask of the parallel. */
|
|
|
|
|
unsigned mask;
|
|
|
|
|
|
|
|
|
|
/* Partitioning used within inner parallels. */
|
|
|
|
|
unsigned inner_mask;
|
|
|
|
|
|
|
|
|
|
/* Location of parallel forked and join. The forked is the first
|
|
|
|
|
block in the parallel and the join is the first block after of
|
|
|
|
|
the partition. */
|
|
|
|
|
basic_block forked_block;
|
|
|
|
|
basic_block join_block;
|
|
|
|
|
|
|
|
|
|
rtx_insn *forked_insn;
|
|
|
|
|
rtx_insn *join_insn;
|
|
|
|
|
|
|
|
|
|
rtx_insn *fork_insn;
|
|
|
|
|
rtx_insn *joining_insn;
|
|
|
|
|
|
|
|
|
|
/* Basic blocks in this parallel, but not in child parallels. The
|
|
|
|
|
FORKED and JOINING blocks are in the partition. The FORK and JOIN
|
|
|
|
|
blocks are not. */
|
|
|
|
|
auto_vec<basic_block> blocks;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
parallel (parallel *parent, unsigned mode);
|
|
|
|
|
~parallel ();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Constructor links the new parallel into it's parent's chain of
|
|
|
|
|
children. */
|
|
|
|
|
|
|
|
|
|
parallel::parallel (parallel *parent_, unsigned mask_)
|
|
|
|
|
:parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
|
|
|
|
|
{
|
|
|
|
|
forked_block = join_block = 0;
|
|
|
|
|
forked_insn = join_insn = 0;
|
|
|
|
|
fork_insn = joining_insn = 0;
|
|
|
|
|
|
|
|
|
|
if (parent)
|
|
|
|
|
{
|
|
|
|
|
next = parent->inner;
|
|
|
|
|
parent->inner = this;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parallel::~parallel ()
|
|
|
|
|
{
|
|
|
|
|
delete inner;
|
|
|
|
|
delete next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Map of basic blocks to insns */
|
|
|
|
|
typedef hash_map<basic_block, rtx_insn *> bb_insn_map_t;
|
|
|
|
|
|
|
|
|
|
/* A tuple of an insn of interest and the BB in which it resides. */
|
|
|
|
|
typedef std::pair<rtx_insn *, basic_block> insn_bb_t;
|
|
|
|
|
typedef auto_vec<insn_bb_t> insn_bb_vec_t;
|
|
|
|
|
|
|
|
|
|
/* Split basic blocks such that each forked and join unspecs are at
|
|
|
|
|
the start of their basic blocks. Thus afterwards each block will
|
|
|
|
|
have a single partitioning mode. We also do the same for return
|
|
|
|
|
insns, as they are executed by every thread. Return the
|
|
|
|
|
partitioning mode of the function as a whole. Populate MAP with
|
|
|
|
|
head and tail blocks. We also clear the BB visited flag, which is
|
|
|
|
|
used when finding partitions. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_split_blocks (bb_insn_map_t *map)
|
|
|
|
|
{
|
|
|
|
|
insn_bb_vec_t worklist;
|
|
|
|
|
basic_block block;
|
|
|
|
|
rtx_insn *insn;
|
|
|
|
|
|
|
|
|
|
/* Locate all the reorg instructions of interest. */
|
|
|
|
|
FOR_ALL_BB_FN (block, cfun)
|
|
|
|
|
{
|
|
|
|
|
bool seen_insn = false;
|
|
|
|
|
|
|
|
|
|
/* Clear visited flag, for use by parallel locator */
|
|
|
|
|
block->flags &= ~BB_VISITED;
|
|
|
|
|
|
|
|
|
|
FOR_BB_INSNS (block, insn)
|
|
|
|
|
{
|
|
|
|
|
if (!INSN_P (insn))
|
|
|
|
|
continue;
|
|
|
|
|
switch (recog_memoized (insn))
|
|
|
|
|
{
|
|
|
|
|
default:
|
|
|
|
|
seen_insn = true;
|
|
|
|
|
continue;
|
|
|
|
|
case CODE_FOR_nvptx_forked:
|
|
|
|
|
case CODE_FOR_nvptx_join:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CODE_FOR_return:
|
|
|
|
|
/* We also need to split just before return insns, as
|
|
|
|
|
that insn needs executing by all threads, but the
|
|
|
|
|
block it is in probably does not. */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (seen_insn)
|
|
|
|
|
/* We've found an instruction that must be at the start of
|
|
|
|
|
a block, but isn't. Add it to the worklist. */
|
|
|
|
|
worklist.safe_push (insn_bb_t (insn, block));
|
|
|
|
|
else
|
|
|
|
|
/* It was already the first instruction. Just add it to
|
|
|
|
|
the map. */
|
|
|
|
|
map->get_or_insert (block) = insn;
|
|
|
|
|
seen_insn = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Split blocks on the worklist. */
|
|
|
|
|
unsigned ix;
|
|
|
|
|
insn_bb_t *elt;
|
|
|
|
|
basic_block remap = 0;
|
|
|
|
|
for (ix = 0; worklist.iterate (ix, &elt); ix++)
|
|
|
|
|
{
|
|
|
|
|
if (remap != elt->second)
|
|
|
|
|
{
|
|
|
|
|
block = elt->second;
|
|
|
|
|
remap = block;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Split block before insn. The insn is in the new block */
|
|
|
|
|
edge e = split_block (block, PREV_INSN (elt->first));
|
|
|
|
|
|
|
|
|
|
block = e->dest;
|
|
|
|
|
map->get_or_insert (block) = elt->first;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* BLOCK is a basic block containing a head or tail instruction.
|
|
|
|
|
Locate the associated prehead or pretail instruction, which must be
|
|
|
|
|
in the single predecessor block. */
|
|
|
|
|
|
|
|
|
|
static rtx_insn *
|
|
|
|
|
nvptx_discover_pre (basic_block block, int expected)
|
|
|
|
|
{
|
|
|
|
|
gcc_assert (block->preds->length () == 1);
|
|
|
|
|
basic_block pre_block = (*block->preds)[0]->src;
|
|
|
|
|
rtx_insn *pre_insn;
|
|
|
|
|
|
|
|
|
|
for (pre_insn = BB_END (pre_block); !INSN_P (pre_insn);
|
|
|
|
|
pre_insn = PREV_INSN (pre_insn))
|
|
|
|
|
gcc_assert (pre_insn != BB_HEAD (pre_block));
|
|
|
|
|
|
|
|
|
|
gcc_assert (recog_memoized (pre_insn) == expected);
|
|
|
|
|
return pre_insn;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Dump this parallel and all its inner parallels. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_dump_pars (parallel *par, unsigned depth)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "%u: mask %d head=%d, tail=%d\n",
|
|
|
|
|
depth, par->mask,
|
|
|
|
|
par->forked_block ? par->forked_block->index : -1,
|
|
|
|
|
par->join_block ? par->join_block->index : -1);
|
|
|
|
|
|
|
|
|
|
fprintf (dump_file, " blocks:");
|
|
|
|
|
|
|
|
|
|
basic_block block;
|
|
|
|
|
for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
|
|
|
|
|
fprintf (dump_file, " %d", block->index);
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
if (par->inner)
|
|
|
|
|
nvptx_dump_pars (par->inner, depth + 1);
|
|
|
|
|
|
|
|
|
|
if (par->next)
|
|
|
|
|
nvptx_dump_pars (par->next, depth);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If BLOCK contains a fork/join marker, process it to create or
|
|
|
|
|
terminate a loop structure. Add this block to the current loop,
|
|
|
|
|
and then walk successor blocks. */
|
|
|
|
|
|
|
|
|
|
static parallel *
|
|
|
|
|
nvptx_find_par (bb_insn_map_t *map, parallel *par, basic_block block)
|
|
|
|
|
{
|
|
|
|
|
if (block->flags & BB_VISITED)
|
|
|
|
|
return par;
|
|
|
|
|
block->flags |= BB_VISITED;
|
|
|
|
|
|
|
|
|
|
if (rtx_insn **endp = map->get (block))
|
|
|
|
|
{
|
|
|
|
|
rtx_insn *end = *endp;
|
|
|
|
|
|
|
|
|
|
/* This is a block head or tail, or return instruction. */
|
|
|
|
|
switch (recog_memoized (end))
|
|
|
|
|
{
|
|
|
|
|
case CODE_FOR_return:
|
|
|
|
|
/* Return instructions are in their own block, and we
|
|
|
|
|
don't need to do anything more. */
|
|
|
|
|
return par;
|
|
|
|
|
|
|
|
|
|
case CODE_FOR_nvptx_forked:
|
|
|
|
|
/* Loop head, create a new inner loop and add it into
|
|
|
|
|
our parent's child list. */
|
|
|
|
|
{
|
|
|
|
|
unsigned mask = UINTVAL (XVECEXP (PATTERN (end), 0, 0));
|
|
|
|
|
|
|
|
|
|
gcc_assert (mask);
|
|
|
|
|
par = new parallel (par, mask);
|
|
|
|
|
par->forked_block = block;
|
|
|
|
|
par->forked_insn = end;
|
|
|
|
|
if (!(mask & GOMP_DIM_MASK (GOMP_DIM_MAX))
|
|
|
|
|
&& (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
|
|
|
|
|
par->fork_insn
|
|
|
|
|
= nvptx_discover_pre (block, CODE_FOR_nvptx_fork);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case CODE_FOR_nvptx_join:
|
|
|
|
|
/* A loop tail. Finish the current loop and return to
|
|
|
|
|
parent. */
|
|
|
|
|
{
|
|
|
|
|
unsigned mask = UINTVAL (XVECEXP (PATTERN (end), 0, 0));
|
|
|
|
|
|
|
|
|
|
gcc_assert (par->mask == mask);
|
|
|
|
|
par->join_block = block;
|
|
|
|
|
par->join_insn = end;
|
|
|
|
|
if (!(mask & GOMP_DIM_MASK (GOMP_DIM_MAX))
|
|
|
|
|
&& (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
|
|
|
|
|
par->joining_insn
|
|
|
|
|
= nvptx_discover_pre (block, CODE_FOR_nvptx_joining);
|
|
|
|
|
par = par->parent;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (par)
|
|
|
|
|
/* Add this block onto the current loop's list of blocks. */
|
|
|
|
|
par->blocks.safe_push (block);
|
|
|
|
|
else
|
|
|
|
|
/* This must be the entry block. Create a NULL parallel. */
|
|
|
|
|
par = new parallel (0, 0);
|
|
|
|
|
|
|
|
|
|
/* Walk successor blocks. */
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, block->succs)
|
|
|
|
|
nvptx_find_par (map, par, e->dest);
|
|
|
|
|
|
|
|
|
|
return par;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* DFS walk the CFG looking for fork & join markers. Construct
|
|
|
|
|
loop structures as we go. MAP is a mapping of basic blocks
|
|
|
|
|
to head & tail markers, discovered when splitting blocks. This
|
|
|
|
|
speeds up the discovery. We rely on the BB visited flag having
|
|
|
|
|
been cleared when splitting blocks. */
|
|
|
|
|
|
|
|
|
|
static parallel *
|
|
|
|
|
nvptx_discover_pars (bb_insn_map_t *map)
|
|
|
|
|
{
|
|
|
|
|
basic_block block;
|
|
|
|
|
|
|
|
|
|
/* Mark exit blocks as visited. */
|
|
|
|
|
block = EXIT_BLOCK_PTR_FOR_FN (cfun);
|
|
|
|
|
block->flags |= BB_VISITED;
|
|
|
|
|
|
|
|
|
|
/* And entry block as not. */
|
|
|
|
|
block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
|
|
|
block->flags &= ~BB_VISITED;
|
|
|
|
|
|
|
|
|
|
parallel *par = nvptx_find_par (map, 0, block);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "\nLoops\n");
|
|
|
|
|
nvptx_dump_pars (par, 0);
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return par;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-18 19:33:38 +01:00
|
|
|
|
/* Analyse a group of BBs within a partitioned region and create N
|
|
|
|
|
Single-Entry-Single-Exit regions. Some of those regions will be
|
|
|
|
|
trivial ones consisting of a single BB. The blocks of a
|
|
|
|
|
partitioned region might form a set of disjoint graphs -- because
|
|
|
|
|
the region encloses a differently partitoned sub region.
|
|
|
|
|
|
|
|
|
|
We use the linear time algorithm described in 'Finding Regions Fast:
|
|
|
|
|
Single Entry Single Exit and control Regions in Linear Time'
|
|
|
|
|
Johnson, Pearson & Pingali. That algorithm deals with complete
|
|
|
|
|
CFGs, where a back edge is inserted from END to START, and thus the
|
|
|
|
|
problem becomes one of finding equivalent loops.
|
|
|
|
|
|
|
|
|
|
In this case we have a partial CFG. We complete it by redirecting
|
|
|
|
|
any incoming edge to the graph to be from an arbitrary external BB,
|
|
|
|
|
and similarly redirecting any outgoing edge to be to that BB.
|
|
|
|
|
Thus we end up with a closed graph.
|
|
|
|
|
|
|
|
|
|
The algorithm works by building a spanning tree of an undirected
|
|
|
|
|
graph and keeping track of back edges from nodes further from the
|
|
|
|
|
root in the tree to nodes nearer to the root in the tree. In the
|
|
|
|
|
description below, the root is up and the tree grows downwards.
|
|
|
|
|
|
|
|
|
|
We avoid having to deal with degenerate back-edges to the same
|
|
|
|
|
block, by splitting each BB into 3 -- one for input edges, one for
|
|
|
|
|
the node itself and one for the output edges. Such back edges are
|
|
|
|
|
referred to as 'Brackets'. Cycle equivalent nodes will have the
|
|
|
|
|
same set of brackets.
|
|
|
|
|
|
|
|
|
|
Determining bracket equivalency is done by maintaining a list of
|
|
|
|
|
brackets in such a manner that the list length and final bracket
|
|
|
|
|
uniquely identify the set.
|
|
|
|
|
|
|
|
|
|
We use coloring to mark all BBs with cycle equivalency with the
|
|
|
|
|
same color. This is the output of the 'Finding Regions Fast'
|
|
|
|
|
algorithm. Notice it doesn't actually find the set of nodes within
|
|
|
|
|
a particular region, just unorderd sets of nodes that are the
|
|
|
|
|
entries and exits of SESE regions.
|
|
|
|
|
|
|
|
|
|
After determining cycle equivalency, we need to find the minimal
|
|
|
|
|
set of SESE regions. Do this with a DFS coloring walk of the
|
|
|
|
|
complete graph. We're either 'looking' or 'coloring'. When
|
|
|
|
|
looking, and we're in the subgraph, we start coloring the color of
|
|
|
|
|
the current node, and remember that node as the start of the
|
|
|
|
|
current color's SESE region. Every time we go to a new node, we
|
|
|
|
|
decrement the count of nodes with thet color. If it reaches zero,
|
|
|
|
|
we remember that node as the end of the current color's SESE region
|
|
|
|
|
and return to 'looking'. Otherwise we color the node the current
|
|
|
|
|
color.
|
|
|
|
|
|
|
|
|
|
This way we end up with coloring the inside of non-trivial SESE
|
|
|
|
|
regions with the color of that region. */
|
|
|
|
|
|
|
|
|
|
/* A pair of BBs. We use this to represent SESE regions. */
|
|
|
|
|
typedef std::pair<basic_block, basic_block> bb_pair_t;
|
|
|
|
|
typedef auto_vec<bb_pair_t> bb_pair_vec_t;
|
|
|
|
|
|
|
|
|
|
/* A node in the undirected CFG. The discriminator SECOND indicates just
|
|
|
|
|
above or just below the BB idicated by FIRST. */
|
|
|
|
|
typedef std::pair<basic_block, int> pseudo_node_t;
|
|
|
|
|
|
|
|
|
|
/* A bracket indicates an edge towards the root of the spanning tree of the
|
|
|
|
|
undirected graph. Each bracket has a color, determined
|
|
|
|
|
from the currrent set of brackets. */
|
|
|
|
|
struct bracket
|
|
|
|
|
{
|
|
|
|
|
pseudo_node_t back; /* Back target */
|
|
|
|
|
|
|
|
|
|
/* Current color and size of set. */
|
|
|
|
|
unsigned color;
|
|
|
|
|
unsigned size;
|
|
|
|
|
|
|
|
|
|
bracket (pseudo_node_t back_)
|
|
|
|
|
: back (back_), color (~0u), size (~0u)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned get_color (auto_vec<unsigned> &color_counts, unsigned length)
|
|
|
|
|
{
|
|
|
|
|
if (length != size)
|
|
|
|
|
{
|
|
|
|
|
size = length;
|
|
|
|
|
color = color_counts.length ();
|
|
|
|
|
color_counts.quick_push (0);
|
|
|
|
|
}
|
|
|
|
|
color_counts[color]++;
|
|
|
|
|
return color;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef auto_vec<bracket> bracket_vec_t;
|
|
|
|
|
|
|
|
|
|
/* Basic block info for finding SESE regions. */
|
|
|
|
|
|
|
|
|
|
struct bb_sese
|
|
|
|
|
{
|
|
|
|
|
int node; /* Node number in spanning tree. */
|
|
|
|
|
int parent; /* Parent node number. */
|
|
|
|
|
|
|
|
|
|
/* The algorithm splits each node A into Ai, A', Ao. The incoming
|
|
|
|
|
edges arrive at pseudo-node Ai and the outgoing edges leave at
|
|
|
|
|
pseudo-node Ao. We have to remember which way we arrived at a
|
|
|
|
|
particular node when generating the spanning tree. dir > 0 means
|
|
|
|
|
we arrived at Ai, dir < 0 means we arrived at Ao. */
|
|
|
|
|
int dir;
|
|
|
|
|
|
|
|
|
|
/* Lowest numbered pseudo-node reached via a backedge from thsis
|
|
|
|
|
node, or any descendant. */
|
|
|
|
|
pseudo_node_t high;
|
|
|
|
|
|
|
|
|
|
int color; /* Cycle-equivalence color */
|
|
|
|
|
|
|
|
|
|
/* Stack of brackets for this node. */
|
|
|
|
|
bracket_vec_t brackets;
|
|
|
|
|
|
|
|
|
|
bb_sese (unsigned node_, unsigned p, int dir_)
|
|
|
|
|
:node (node_), parent (p), dir (dir_)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
~bb_sese ();
|
|
|
|
|
|
|
|
|
|
/* Push a bracket ending at BACK. */
|
|
|
|
|
void push (const pseudo_node_t &back)
|
|
|
|
|
{
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Pushing backedge %d:%+d\n",
|
|
|
|
|
back.first ? back.first->index : 0, back.second);
|
|
|
|
|
brackets.safe_push (bracket (back));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void append (bb_sese *child);
|
|
|
|
|
void remove (const pseudo_node_t &);
|
|
|
|
|
|
|
|
|
|
/* Set node's color. */
|
|
|
|
|
void set_color (auto_vec<unsigned> &color_counts)
|
|
|
|
|
{
|
|
|
|
|
color = brackets.last ().get_color (color_counts, brackets.length ());
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
bb_sese::~bb_sese ()
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Destructively append CHILD's brackets. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
bb_sese::append (bb_sese *child)
|
|
|
|
|
{
|
|
|
|
|
if (int len = child->brackets.length ())
|
|
|
|
|
{
|
|
|
|
|
int ix;
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
for (ix = 0; ix < len; ix++)
|
|
|
|
|
{
|
|
|
|
|
const pseudo_node_t &pseudo = child->brackets[ix].back;
|
|
|
|
|
fprintf (dump_file, "Appending (%d)'s backedge %d:%+d\n",
|
|
|
|
|
child->node, pseudo.first ? pseudo.first->index : 0,
|
|
|
|
|
pseudo.second);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!brackets.length ())
|
|
|
|
|
std::swap (brackets, child->brackets);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
brackets.reserve (len);
|
|
|
|
|
for (ix = 0; ix < len; ix++)
|
|
|
|
|
brackets.quick_push (child->brackets[ix]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove brackets that terminate at PSEUDO. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
bb_sese::remove (const pseudo_node_t &pseudo)
|
|
|
|
|
{
|
|
|
|
|
unsigned removed = 0;
|
|
|
|
|
int len = brackets.length ();
|
|
|
|
|
|
|
|
|
|
for (int ix = 0; ix < len; ix++)
|
|
|
|
|
{
|
|
|
|
|
if (brackets[ix].back == pseudo)
|
|
|
|
|
{
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Removing backedge %d:%+d\n",
|
|
|
|
|
pseudo.first ? pseudo.first->index : 0, pseudo.second);
|
|
|
|
|
removed++;
|
|
|
|
|
}
|
|
|
|
|
else if (removed)
|
|
|
|
|
brackets[ix-removed] = brackets[ix];
|
|
|
|
|
}
|
|
|
|
|
while (removed--)
|
|
|
|
|
brackets.pop ();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Accessors for BB's aux pointer. */
|
|
|
|
|
#define BB_SET_SESE(B, S) ((B)->aux = (S))
|
|
|
|
|
#define BB_GET_SESE(B) ((bb_sese *)(B)->aux)
|
|
|
|
|
|
|
|
|
|
/* DFS walk creating SESE data structures. Only cover nodes with
|
|
|
|
|
BB_VISITED set. Append discovered blocks to LIST. We number in
|
|
|
|
|
increments of 3 so that the above and below pseudo nodes can be
|
|
|
|
|
implicitly numbered too. */
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
nvptx_sese_number (int n, int p, int dir, basic_block b,
|
|
|
|
|
auto_vec<basic_block> *list)
|
|
|
|
|
{
|
|
|
|
|
if (BB_GET_SESE (b))
|
|
|
|
|
return n;
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Block %d(%d), parent (%d), orientation %+d\n",
|
|
|
|
|
b->index, n, p, dir);
|
|
|
|
|
|
|
|
|
|
BB_SET_SESE (b, new bb_sese (n, p, dir));
|
|
|
|
|
p = n;
|
|
|
|
|
|
|
|
|
|
n += 3;
|
|
|
|
|
list->quick_push (b);
|
|
|
|
|
|
|
|
|
|
/* First walk the nodes on the 'other side' of this node, then walk
|
|
|
|
|
the nodes on the same side. */
|
|
|
|
|
for (unsigned ix = 2; ix; ix--)
|
|
|
|
|
{
|
|
|
|
|
vec<edge, va_gc> *edges = dir > 0 ? b->succs : b->preds;
|
|
|
|
|
size_t offset = (dir > 0 ? offsetof (edge_def, dest)
|
|
|
|
|
: offsetof (edge_def, src));
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator (ei);
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, edges)
|
|
|
|
|
{
|
|
|
|
|
basic_block target = *(basic_block *)((char *)e + offset);
|
|
|
|
|
|
|
|
|
|
if (target->flags & BB_VISITED)
|
|
|
|
|
n = nvptx_sese_number (n, p, dir, target, list);
|
|
|
|
|
}
|
|
|
|
|
dir = -dir;
|
|
|
|
|
}
|
|
|
|
|
return n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Process pseudo node above (DIR < 0) or below (DIR > 0) ME.
|
|
|
|
|
EDGES are the outgoing edges and OFFSET is the offset to the src
|
|
|
|
|
or dst block on the edges. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_sese_pseudo (basic_block me, bb_sese *sese, int depth, int dir,
|
|
|
|
|
vec<edge, va_gc> *edges, size_t offset)
|
|
|
|
|
{
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator (ei);
|
|
|
|
|
int hi_back = depth;
|
|
|
|
|
pseudo_node_t node_back (0, depth);
|
|
|
|
|
int hi_child = depth;
|
|
|
|
|
pseudo_node_t node_child (0, depth);
|
|
|
|
|
basic_block child = NULL;
|
|
|
|
|
unsigned num_children = 0;
|
|
|
|
|
int usd = -dir * sese->dir;
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "\nProcessing %d(%d) %+d\n",
|
|
|
|
|
me->index, sese->node, dir);
|
|
|
|
|
|
|
|
|
|
if (dir < 0)
|
|
|
|
|
{
|
|
|
|
|
/* This is the above pseudo-child. It has the BB itself as an
|
|
|
|
|
additional child node. */
|
|
|
|
|
node_child = sese->high;
|
|
|
|
|
hi_child = node_child.second;
|
|
|
|
|
if (node_child.first)
|
|
|
|
|
hi_child += BB_GET_SESE (node_child.first)->node;
|
|
|
|
|
num_children++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Examine each edge.
|
|
|
|
|
- if it is a child (a) append its bracket list and (b) record
|
|
|
|
|
whether it is the child with the highest reaching bracket.
|
|
|
|
|
- if it is an edge to ancestor, record whether it's the highest
|
|
|
|
|
reaching backlink. */
|
|
|
|
|
FOR_EACH_EDGE (e, ei, edges)
|
|
|
|
|
{
|
|
|
|
|
basic_block target = *(basic_block *)((char *)e + offset);
|
|
|
|
|
|
|
|
|
|
if (bb_sese *t_sese = BB_GET_SESE (target))
|
|
|
|
|
{
|
|
|
|
|
if (t_sese->parent == sese->node && !(t_sese->dir + usd))
|
|
|
|
|
{
|
|
|
|
|
/* Child node. Append its bracket list. */
|
|
|
|
|
num_children++;
|
|
|
|
|
sese->append (t_sese);
|
|
|
|
|
|
|
|
|
|
/* Compare it's hi value. */
|
|
|
|
|
int t_hi = t_sese->high.second;
|
|
|
|
|
|
|
|
|
|
if (basic_block child_hi_block = t_sese->high.first)
|
|
|
|
|
t_hi += BB_GET_SESE (child_hi_block)->node;
|
|
|
|
|
|
|
|
|
|
if (hi_child > t_hi)
|
|
|
|
|
{
|
|
|
|
|
hi_child = t_hi;
|
|
|
|
|
node_child = t_sese->high;
|
|
|
|
|
child = target;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (t_sese->node < sese->node + dir
|
|
|
|
|
&& !(dir < 0 && sese->parent == t_sese->node))
|
|
|
|
|
{
|
|
|
|
|
/* Non-parental ancestor node -- a backlink. */
|
|
|
|
|
int d = usd * t_sese->dir;
|
|
|
|
|
int back = t_sese->node + d;
|
|
|
|
|
|
|
|
|
|
if (hi_back > back)
|
|
|
|
|
{
|
|
|
|
|
hi_back = back;
|
|
|
|
|
node_back = pseudo_node_t (target, d);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{ /* Fallen off graph, backlink to entry node. */
|
|
|
|
|
hi_back = 0;
|
|
|
|
|
node_back = pseudo_node_t (0, 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remove any brackets that terminate at this pseudo node. */
|
|
|
|
|
sese->remove (pseudo_node_t (me, dir));
|
|
|
|
|
|
|
|
|
|
/* Now push any backlinks from this pseudo node. */
|
|
|
|
|
FOR_EACH_EDGE (e, ei, edges)
|
|
|
|
|
{
|
|
|
|
|
basic_block target = *(basic_block *)((char *)e + offset);
|
|
|
|
|
if (bb_sese *t_sese = BB_GET_SESE (target))
|
|
|
|
|
{
|
|
|
|
|
if (t_sese->node < sese->node + dir
|
|
|
|
|
&& !(dir < 0 && sese->parent == t_sese->node))
|
|
|
|
|
/* Non-parental ancestor node - backedge from me. */
|
|
|
|
|
sese->push (pseudo_node_t (target, usd * t_sese->dir));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/* back edge to entry node */
|
|
|
|
|
sese->push (pseudo_node_t (0, 0));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If this node leads directly or indirectly to a no-return region of
|
|
|
|
|
the graph, then fake a backedge to entry node. */
|
|
|
|
|
if (!sese->brackets.length () || !edges || !edges->length ())
|
|
|
|
|
{
|
|
|
|
|
hi_back = 0;
|
|
|
|
|
node_back = pseudo_node_t (0, 0);
|
|
|
|
|
sese->push (node_back);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Record the highest reaching backedge from us or a descendant. */
|
|
|
|
|
sese->high = hi_back < hi_child ? node_back : node_child;
|
|
|
|
|
|
|
|
|
|
if (num_children > 1)
|
|
|
|
|
{
|
|
|
|
|
/* There is more than one child -- this is a Y shaped piece of
|
|
|
|
|
spanning tree. We have to insert a fake backedge from this
|
|
|
|
|
node to the highest ancestor reached by not-the-highest
|
|
|
|
|
reaching child. Note that there may be multiple children
|
|
|
|
|
with backedges to the same highest node. That's ok and we
|
|
|
|
|
insert the edge to that highest node. */
|
|
|
|
|
hi_child = depth;
|
|
|
|
|
if (dir < 0 && child)
|
|
|
|
|
{
|
|
|
|
|
node_child = sese->high;
|
|
|
|
|
hi_child = node_child.second;
|
|
|
|
|
if (node_child.first)
|
|
|
|
|
hi_child += BB_GET_SESE (node_child.first)->node;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, edges)
|
|
|
|
|
{
|
|
|
|
|
basic_block target = *(basic_block *)((char *)e + offset);
|
|
|
|
|
|
|
|
|
|
if (target == child)
|
|
|
|
|
/* Ignore the highest child. */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
bb_sese *t_sese = BB_GET_SESE (target);
|
|
|
|
|
if (!t_sese)
|
|
|
|
|
continue;
|
|
|
|
|
if (t_sese->parent != sese->node)
|
|
|
|
|
/* Not a child. */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Compare its hi value. */
|
|
|
|
|
int t_hi = t_sese->high.second;
|
|
|
|
|
|
|
|
|
|
if (basic_block child_hi_block = t_sese->high.first)
|
|
|
|
|
t_hi += BB_GET_SESE (child_hi_block)->node;
|
|
|
|
|
|
|
|
|
|
if (hi_child > t_hi)
|
|
|
|
|
{
|
|
|
|
|
hi_child = t_hi;
|
|
|
|
|
node_child = t_sese->high;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sese->push (node_child);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* DFS walk of BB graph. Color node BLOCK according to COLORING then
|
|
|
|
|
proceed to successors. Set SESE entry and exit nodes of
|
|
|
|
|
REGIONS. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_sese_color (auto_vec<unsigned> &color_counts, bb_pair_vec_t ®ions,
|
|
|
|
|
basic_block block, int coloring)
|
|
|
|
|
{
|
|
|
|
|
bb_sese *sese = BB_GET_SESE (block);
|
|
|
|
|
|
|
|
|
|
if (block->flags & BB_VISITED)
|
|
|
|
|
{
|
|
|
|
|
/* If we've already encountered this block, either we must not
|
|
|
|
|
be coloring, or it must have been colored the current color. */
|
|
|
|
|
gcc_assert (coloring < 0 || (sese && coloring == sese->color));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
block->flags |= BB_VISITED;
|
|
|
|
|
|
|
|
|
|
if (sese)
|
|
|
|
|
{
|
|
|
|
|
if (coloring < 0)
|
|
|
|
|
{
|
|
|
|
|
/* Start coloring a region. */
|
|
|
|
|
regions[sese->color].first = block;
|
|
|
|
|
coloring = sese->color;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!--color_counts[sese->color] && sese->color == coloring)
|
|
|
|
|
{
|
|
|
|
|
/* Found final block of SESE region. */
|
|
|
|
|
regions[sese->color].second = block;
|
|
|
|
|
coloring = -1;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
/* Color the node, so we can assert on revisiting the node
|
|
|
|
|
that the graph is indeed SESE. */
|
|
|
|
|
sese->color = coloring;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
/* Fallen off the subgraph, we cannot be coloring. */
|
|
|
|
|
gcc_assert (coloring < 0);
|
|
|
|
|
|
|
|
|
|
/* Walk each successor block. */
|
|
|
|
|
if (block->succs && block->succs->length ())
|
|
|
|
|
{
|
|
|
|
|
edge e;
|
|
|
|
|
edge_iterator ei;
|
|
|
|
|
|
|
|
|
|
FOR_EACH_EDGE (e, ei, block->succs)
|
|
|
|
|
nvptx_sese_color (color_counts, regions, e->dest, coloring);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
gcc_assert (coloring < 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Find minimal set of SESE regions covering BLOCKS. REGIONS might
|
|
|
|
|
end up with NULL entries in it. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t ®ions)
|
|
|
|
|
{
|
|
|
|
|
basic_block block;
|
|
|
|
|
int ix;
|
|
|
|
|
|
|
|
|
|
/* First clear each BB of the whole function. */
|
2016-10-18 21:36:45 +02:00
|
|
|
|
FOR_ALL_BB_FN (block, cfun)
|
2015-11-18 19:33:38 +01:00
|
|
|
|
{
|
|
|
|
|
block->flags &= ~BB_VISITED;
|
|
|
|
|
BB_SET_SESE (block, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Mark blocks in the function that are in this graph. */
|
|
|
|
|
for (ix = 0; blocks.iterate (ix, &block); ix++)
|
|
|
|
|
block->flags |= BB_VISITED;
|
|
|
|
|
|
|
|
|
|
/* Counts of nodes assigned to each color. There cannot be more
|
|
|
|
|
colors than blocks (and hopefully there will be fewer). */
|
|
|
|
|
auto_vec<unsigned> color_counts;
|
|
|
|
|
color_counts.reserve (blocks.length ());
|
|
|
|
|
|
|
|
|
|
/* Worklist of nodes in the spanning tree. Again, there cannot be
|
|
|
|
|
more nodes in the tree than blocks (there will be fewer if the
|
|
|
|
|
CFG of blocks is disjoint). */
|
|
|
|
|
auto_vec<basic_block> spanlist;
|
|
|
|
|
spanlist.reserve (blocks.length ());
|
|
|
|
|
|
|
|
|
|
/* Make sure every block has its cycle class determined. */
|
|
|
|
|
for (ix = 0; blocks.iterate (ix, &block); ix++)
|
|
|
|
|
{
|
|
|
|
|
if (BB_GET_SESE (block))
|
|
|
|
|
/* We already met this block in an earlier graph solve. */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Searching graph starting at %d\n", block->index);
|
|
|
|
|
|
|
|
|
|
/* Number the nodes reachable from block initial DFS order. */
|
|
|
|
|
int depth = nvptx_sese_number (2, 0, +1, block, &spanlist);
|
|
|
|
|
|
|
|
|
|
/* Now walk in reverse DFS order to find cycle equivalents. */
|
|
|
|
|
while (spanlist.length ())
|
|
|
|
|
{
|
|
|
|
|
block = spanlist.pop ();
|
|
|
|
|
bb_sese *sese = BB_GET_SESE (block);
|
|
|
|
|
|
|
|
|
|
/* Do the pseudo node below. */
|
|
|
|
|
nvptx_sese_pseudo (block, sese, depth, +1,
|
|
|
|
|
sese->dir > 0 ? block->succs : block->preds,
|
|
|
|
|
(sese->dir > 0 ? offsetof (edge_def, dest)
|
|
|
|
|
: offsetof (edge_def, src)));
|
|
|
|
|
sese->set_color (color_counts);
|
|
|
|
|
/* Do the pseudo node above. */
|
|
|
|
|
nvptx_sese_pseudo (block, sese, depth, -1,
|
|
|
|
|
sese->dir < 0 ? block->succs : block->preds,
|
|
|
|
|
(sese->dir < 0 ? offsetof (edge_def, dest)
|
|
|
|
|
: offsetof (edge_def, src)));
|
|
|
|
|
}
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
unsigned count;
|
|
|
|
|
const char *comma = "";
|
|
|
|
|
|
|
|
|
|
fprintf (dump_file, "Found %d cycle equivalents\n",
|
|
|
|
|
color_counts.length ());
|
|
|
|
|
for (ix = 0; color_counts.iterate (ix, &count); ix++)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "%s%d[%d]={", comma, ix, count);
|
|
|
|
|
|
|
|
|
|
comma = "";
|
|
|
|
|
for (unsigned jx = 0; blocks.iterate (jx, &block); jx++)
|
|
|
|
|
if (BB_GET_SESE (block)->color == ix)
|
|
|
|
|
{
|
|
|
|
|
block->flags |= BB_VISITED;
|
|
|
|
|
fprintf (dump_file, "%s%d", comma, block->index);
|
|
|
|
|
comma=",";
|
|
|
|
|
}
|
|
|
|
|
fprintf (dump_file, "}");
|
|
|
|
|
comma = ", ";
|
|
|
|
|
}
|
|
|
|
|
fprintf (dump_file, "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now we've colored every block in the subgraph. We now need to
|
|
|
|
|
determine the minimal set of SESE regions that cover that
|
|
|
|
|
subgraph. Do this with a DFS walk of the complete function.
|
|
|
|
|
During the walk we're either 'looking' or 'coloring'. When we
|
|
|
|
|
reach the last node of a particular color, we stop coloring and
|
|
|
|
|
return to looking. */
|
|
|
|
|
|
|
|
|
|
/* There cannot be more SESE regions than colors. */
|
|
|
|
|
regions.reserve (color_counts.length ());
|
|
|
|
|
for (ix = color_counts.length (); ix--;)
|
|
|
|
|
regions.quick_push (bb_pair_t (0, 0));
|
|
|
|
|
|
|
|
|
|
for (ix = 0; blocks.iterate (ix, &block); ix++)
|
|
|
|
|
block->flags &= ~BB_VISITED;
|
|
|
|
|
|
|
|
|
|
nvptx_sese_color (color_counts, regions, ENTRY_BLOCK_PTR_FOR_FN (cfun), -1);
|
|
|
|
|
|
|
|
|
|
if (dump_file)
|
|
|
|
|
{
|
|
|
|
|
const char *comma = "";
|
|
|
|
|
int len = regions.length ();
|
|
|
|
|
|
|
|
|
|
fprintf (dump_file, "SESE regions:");
|
|
|
|
|
for (ix = 0; ix != len; ix++)
|
|
|
|
|
{
|
|
|
|
|
basic_block from = regions[ix].first;
|
|
|
|
|
basic_block to = regions[ix].second;
|
|
|
|
|
|
|
|
|
|
if (from)
|
|
|
|
|
{
|
|
|
|
|
fprintf (dump_file, "%s %d{%d", comma, ix, from->index);
|
|
|
|
|
if (to != from)
|
|
|
|
|
fprintf (dump_file, "->%d", to->index);
|
|
|
|
|
|
|
|
|
|
int color = BB_GET_SESE (from)->color;
|
|
|
|
|
|
|
|
|
|
/* Print the blocks within the region (excluding ends). */
|
|
|
|
|
FOR_EACH_BB_FN (block, cfun)
|
|
|
|
|
{
|
|
|
|
|
bb_sese *sese = BB_GET_SESE (block);
|
|
|
|
|
|
|
|
|
|
if (sese && sese->color == color
|
|
|
|
|
&& block != from && block != to)
|
|
|
|
|
fprintf (dump_file, ".%d", block->index);
|
|
|
|
|
}
|
|
|
|
|
fprintf (dump_file, "}");
|
|
|
|
|
}
|
|
|
|
|
comma = ",";
|
|
|
|
|
}
|
|
|
|
|
fprintf (dump_file, "\n\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (ix = 0; blocks.iterate (ix, &block); ix++)
|
|
|
|
|
delete BB_GET_SESE (block);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#undef BB_SET_SESE
|
|
|
|
|
#undef BB_GET_SESE
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Propagate live state at the start of a partitioned region. BLOCK
|
|
|
|
|
provides the live register information, and might not contain
|
|
|
|
|
INSN. Propagation is inserted just after INSN. RW indicates whether
|
|
|
|
|
we are reading and/or writing state. This
|
|
|
|
|
separation is needed for worker-level proppagation where we
|
|
|
|
|
essentially do a spill & fill. FN is the underlying worker
|
|
|
|
|
function to generate the propagation instructions for single
|
|
|
|
|
register. DATA is user data.
|
|
|
|
|
|
|
|
|
|
We propagate the live register set and the entire frame. We could
|
|
|
|
|
do better by (a) propagating just the live set that is used within
|
|
|
|
|
the partitioned regions and (b) only propagating stack entries that
|
|
|
|
|
are used. The latter might be quite hard to determine. */
|
|
|
|
|
|
|
|
|
|
typedef rtx (*propagator_fn) (rtx, propagate_mask, unsigned, void *);
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_propagate (basic_block block, rtx_insn *insn, propagate_mask rw,
|
|
|
|
|
propagator_fn fn, void *data)
|
|
|
|
|
{
|
|
|
|
|
bitmap live = DF_LIVE_IN (block);
|
|
|
|
|
bitmap_iterator iterator;
|
|
|
|
|
unsigned ix;
|
|
|
|
|
|
|
|
|
|
/* Copy the frame array. */
|
|
|
|
|
HOST_WIDE_INT fs = get_frame_size ();
|
|
|
|
|
if (fs)
|
|
|
|
|
{
|
|
|
|
|
rtx tmp = gen_reg_rtx (DImode);
|
|
|
|
|
rtx idx = NULL_RTX;
|
|
|
|
|
rtx ptr = gen_reg_rtx (Pmode);
|
|
|
|
|
rtx pred = NULL_RTX;
|
|
|
|
|
rtx_code_label *label = NULL;
|
|
|
|
|
|
2016-08-03 19:26:51 +02:00
|
|
|
|
/* The frame size might not be DImode compatible, but the frame
|
|
|
|
|
array's declaration will be. So it's ok to round up here. */
|
|
|
|
|
fs = (fs + GET_MODE_SIZE (DImode) - 1) / GET_MODE_SIZE (DImode);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Detect single iteration loop. */
|
|
|
|
|
if (fs == 1)
|
|
|
|
|
fs = 0;
|
|
|
|
|
|
|
|
|
|
start_sequence ();
|
|
|
|
|
emit_insn (gen_rtx_SET (ptr, frame_pointer_rtx));
|
|
|
|
|
if (fs)
|
|
|
|
|
{
|
|
|
|
|
idx = gen_reg_rtx (SImode);
|
|
|
|
|
pred = gen_reg_rtx (BImode);
|
|
|
|
|
label = gen_label_rtx ();
|
|
|
|
|
|
|
|
|
|
emit_insn (gen_rtx_SET (idx, GEN_INT (fs)));
|
|
|
|
|
/* Allow worker function to initialize anything needed. */
|
|
|
|
|
rtx init = fn (tmp, PM_loop_begin, fs, data);
|
|
|
|
|
if (init)
|
|
|
|
|
emit_insn (init);
|
|
|
|
|
emit_label (label);
|
|
|
|
|
LABEL_NUSES (label)++;
|
|
|
|
|
emit_insn (gen_addsi3 (idx, idx, GEN_INT (-1)));
|
|
|
|
|
}
|
|
|
|
|
if (rw & PM_read)
|
|
|
|
|
emit_insn (gen_rtx_SET (tmp, gen_rtx_MEM (DImode, ptr)));
|
|
|
|
|
emit_insn (fn (tmp, rw, fs, data));
|
|
|
|
|
if (rw & PM_write)
|
|
|
|
|
emit_insn (gen_rtx_SET (gen_rtx_MEM (DImode, ptr), tmp));
|
|
|
|
|
if (fs)
|
|
|
|
|
{
|
|
|
|
|
emit_insn (gen_rtx_SET (pred, gen_rtx_NE (BImode, idx, const0_rtx)));
|
|
|
|
|
emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (GET_MODE_SIZE (DImode))));
|
|
|
|
|
emit_insn (gen_br_true_uni (pred, label));
|
|
|
|
|
rtx fini = fn (tmp, PM_loop_end, fs, data);
|
|
|
|
|
if (fini)
|
|
|
|
|
emit_insn (fini);
|
|
|
|
|
emit_insn (gen_rtx_CLOBBER (GET_MODE (idx), idx));
|
|
|
|
|
}
|
|
|
|
|
emit_insn (gen_rtx_CLOBBER (GET_MODE (tmp), tmp));
|
|
|
|
|
emit_insn (gen_rtx_CLOBBER (GET_MODE (ptr), ptr));
|
|
|
|
|
rtx cpy = get_insns ();
|
|
|
|
|
end_sequence ();
|
|
|
|
|
insn = emit_insn_after (cpy, insn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Copy live registers. */
|
|
|
|
|
EXECUTE_IF_SET_IN_BITMAP (live, 0, ix, iterator)
|
|
|
|
|
{
|
|
|
|
|
rtx reg = regno_reg_rtx[ix];
|
|
|
|
|
|
|
|
|
|
if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
|
|
|
|
|
{
|
|
|
|
|
rtx bcast = fn (reg, rw, 0, data);
|
|
|
|
|
|
|
|
|
|
insn = emit_insn_after (bcast, insn);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Worker for nvptx_vpropagate. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
vprop_gen (rtx reg, propagate_mask pm,
|
|
|
|
|
unsigned ARG_UNUSED (count), void *ARG_UNUSED (data))
|
|
|
|
|
{
|
|
|
|
|
if (!(pm & PM_read_write))
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
return nvptx_gen_vcast (reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Propagate state that is live at start of BLOCK across the vectors
|
|
|
|
|
of a single warp. Propagation is inserted just after INSN. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_vpropagate (basic_block block, rtx_insn *insn)
|
|
|
|
|
{
|
|
|
|
|
nvptx_propagate (block, insn, PM_read_write, vprop_gen, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Worker for nvptx_wpropagate. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
wprop_gen (rtx reg, propagate_mask pm, unsigned rep, void *data_)
|
|
|
|
|
{
|
|
|
|
|
wcast_data_t *data = (wcast_data_t *)data_;
|
|
|
|
|
|
|
|
|
|
if (pm & PM_loop_begin)
|
|
|
|
|
{
|
|
|
|
|
/* Starting a loop, initialize pointer. */
|
|
|
|
|
unsigned align = GET_MODE_ALIGNMENT (GET_MODE (reg)) / BITS_PER_UNIT;
|
|
|
|
|
|
|
|
|
|
if (align > worker_bcast_align)
|
|
|
|
|
worker_bcast_align = align;
|
|
|
|
|
data->offset = (data->offset + align - 1) & ~(align - 1);
|
|
|
|
|
|
|
|
|
|
data->ptr = gen_reg_rtx (Pmode);
|
|
|
|
|
|
|
|
|
|
return gen_adddi3 (data->ptr, data->base, GEN_INT (data->offset));
|
|
|
|
|
}
|
|
|
|
|
else if (pm & PM_loop_end)
|
|
|
|
|
{
|
|
|
|
|
rtx clobber = gen_rtx_CLOBBER (GET_MODE (data->ptr), data->ptr);
|
|
|
|
|
data->ptr = NULL_RTX;
|
|
|
|
|
return clobber;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return nvptx_gen_wcast (reg, pm, rep, data);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Spill or fill live state that is live at start of BLOCK. PRE_P
|
|
|
|
|
indicates if this is just before partitioned mode (do spill), or
|
|
|
|
|
just after it starts (do fill). Sequence is inserted just after
|
|
|
|
|
INSN. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_wpropagate (bool pre_p, basic_block block, rtx_insn *insn)
|
|
|
|
|
{
|
|
|
|
|
wcast_data_t data;
|
|
|
|
|
|
|
|
|
|
data.base = gen_reg_rtx (Pmode);
|
|
|
|
|
data.offset = 0;
|
|
|
|
|
data.ptr = NULL_RTX;
|
|
|
|
|
|
|
|
|
|
nvptx_propagate (block, insn, pre_p ? PM_read : PM_write, wprop_gen, &data);
|
|
|
|
|
if (data.offset)
|
|
|
|
|
{
|
|
|
|
|
/* Stuff was emitted, initialize the base pointer now. */
|
2015-12-23 17:55:31 +01:00
|
|
|
|
rtx init = gen_rtx_SET (data.base, worker_bcast_sym);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
emit_insn_after (init, insn);
|
2015-12-18 15:39:52 +01:00
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
if (worker_bcast_size < data.offset)
|
|
|
|
|
worker_bcast_size = data.offset;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit a worker-level synchronization barrier. We use different
|
|
|
|
|
markers for before and after synchronizations. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_wsync (bool after)
|
|
|
|
|
{
|
|
|
|
|
return gen_nvptx_barsync (GEN_INT (after));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Single neutering according to MASK. FROM is the incoming block and
|
|
|
|
|
TO is the outgoing block. These may be the same block. Insert at
|
|
|
|
|
start of FROM:
|
|
|
|
|
|
|
|
|
|
if (tid.<axis>) goto end.
|
|
|
|
|
|
|
|
|
|
and insert before ending branch of TO (if there is such an insn):
|
|
|
|
|
|
|
|
|
|
end:
|
|
|
|
|
<possibly-broadcast-cond>
|
|
|
|
|
<branch>
|
|
|
|
|
|
|
|
|
|
We currently only use differnt FROM and TO when skipping an entire
|
|
|
|
|
loop. We could do more if we detected superblocks. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_single (unsigned mask, basic_block from, basic_block to)
|
|
|
|
|
{
|
|
|
|
|
rtx_insn *head = BB_HEAD (from);
|
|
|
|
|
rtx_insn *tail = BB_END (to);
|
|
|
|
|
unsigned skip_mask = mask;
|
|
|
|
|
|
|
|
|
|
/* Find first insn of from block */
|
|
|
|
|
while (head != BB_END (from) && !INSN_P (head))
|
|
|
|
|
head = NEXT_INSN (head);
|
|
|
|
|
|
|
|
|
|
/* Find last insn of to block */
|
|
|
|
|
rtx_insn *limit = from == to ? head : BB_HEAD (to);
|
|
|
|
|
while (tail != limit && !INSN_P (tail) && !LABEL_P (tail))
|
|
|
|
|
tail = PREV_INSN (tail);
|
|
|
|
|
|
|
|
|
|
/* Detect if tail is a branch. */
|
|
|
|
|
rtx tail_branch = NULL_RTX;
|
|
|
|
|
rtx cond_branch = NULL_RTX;
|
|
|
|
|
if (tail && INSN_P (tail))
|
|
|
|
|
{
|
|
|
|
|
tail_branch = PATTERN (tail);
|
|
|
|
|
if (GET_CODE (tail_branch) != SET || SET_DEST (tail_branch) != pc_rtx)
|
|
|
|
|
tail_branch = NULL_RTX;
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
cond_branch = SET_SRC (tail_branch);
|
|
|
|
|
if (GET_CODE (cond_branch) != IF_THEN_ELSE)
|
|
|
|
|
cond_branch = NULL_RTX;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tail == head)
|
|
|
|
|
{
|
|
|
|
|
/* If this is empty, do nothing. */
|
|
|
|
|
if (!head || !INSN_P (head))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* If this is a dummy insn, do nothing. */
|
|
|
|
|
switch (recog_memoized (head))
|
|
|
|
|
{
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
case CODE_FOR_nvptx_fork:
|
|
|
|
|
case CODE_FOR_nvptx_forked:
|
|
|
|
|
case CODE_FOR_nvptx_joining:
|
|
|
|
|
case CODE_FOR_nvptx_join:
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cond_branch)
|
|
|
|
|
{
|
|
|
|
|
/* If we're only doing vector single, there's no need to
|
|
|
|
|
emit skip code because we'll not insert anything. */
|
|
|
|
|
if (!(mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)))
|
|
|
|
|
skip_mask = 0;
|
|
|
|
|
}
|
|
|
|
|
else if (tail_branch)
|
|
|
|
|
/* Block with only unconditional branch. Nothing to do. */
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Insert the vector test inside the worker test. */
|
|
|
|
|
unsigned mode;
|
|
|
|
|
rtx_insn *before = tail;
|
|
|
|
|
for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
|
|
|
|
|
if (GOMP_DIM_MASK (mode) & skip_mask)
|
|
|
|
|
{
|
|
|
|
|
rtx_code_label *label = gen_label_rtx ();
|
|
|
|
|
rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER];
|
|
|
|
|
|
|
|
|
|
if (!pred)
|
|
|
|
|
{
|
|
|
|
|
pred = gen_reg_rtx (BImode);
|
|
|
|
|
cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER] = pred;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rtx br;
|
|
|
|
|
if (mode == GOMP_DIM_VECTOR)
|
|
|
|
|
br = gen_br_true (pred, label);
|
|
|
|
|
else
|
|
|
|
|
br = gen_br_true_uni (pred, label);
|
|
|
|
|
emit_insn_before (br, head);
|
|
|
|
|
|
|
|
|
|
LABEL_NUSES (label)++;
|
|
|
|
|
if (tail_branch)
|
|
|
|
|
before = emit_label_before (label, before);
|
|
|
|
|
else
|
|
|
|
|
emit_label_after (label, tail);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Now deal with propagating the branch condition. */
|
|
|
|
|
if (cond_branch)
|
|
|
|
|
{
|
|
|
|
|
rtx pvar = XEXP (XEXP (cond_branch, 0), 0);
|
|
|
|
|
|
|
|
|
|
if (GOMP_DIM_MASK (GOMP_DIM_VECTOR) == mask)
|
|
|
|
|
{
|
|
|
|
|
/* Vector mode only, do a shuffle. */
|
|
|
|
|
emit_insn_before (nvptx_gen_vcast (pvar), tail);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/* Includes worker mode, do spill & fill. By construction
|
|
|
|
|
we should never have worker mode only. */
|
|
|
|
|
wcast_data_t data;
|
|
|
|
|
|
|
|
|
|
data.base = worker_bcast_sym;
|
|
|
|
|
data.ptr = 0;
|
|
|
|
|
|
|
|
|
|
if (worker_bcast_size < GET_MODE_SIZE (SImode))
|
|
|
|
|
worker_bcast_size = GET_MODE_SIZE (SImode);
|
|
|
|
|
|
|
|
|
|
data.offset = 0;
|
|
|
|
|
emit_insn_before (nvptx_gen_wcast (pvar, PM_read, 0, &data),
|
|
|
|
|
before);
|
|
|
|
|
/* Barrier so other workers can see the write. */
|
|
|
|
|
emit_insn_before (nvptx_wsync (false), tail);
|
|
|
|
|
data.offset = 0;
|
|
|
|
|
emit_insn_before (nvptx_gen_wcast (pvar, PM_write, 0, &data), tail);
|
|
|
|
|
/* This barrier is needed to avoid worker zero clobbering
|
|
|
|
|
the broadcast buffer before all the other workers have
|
|
|
|
|
had a chance to read this instance of it. */
|
|
|
|
|
emit_insn_before (nvptx_wsync (true), tail);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extract_insn (tail);
|
|
|
|
|
rtx unsp = gen_rtx_UNSPEC (BImode, gen_rtvec (1, pvar),
|
|
|
|
|
UNSPEC_BR_UNIFIED);
|
|
|
|
|
validate_change (tail, recog_data.operand_loc[0], unsp, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* PAR is a parallel that is being skipped in its entirety according to
|
|
|
|
|
MASK. Treat this as skipping a superblock starting at forked
|
|
|
|
|
and ending at joining. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_skip_par (unsigned mask, parallel *par)
|
|
|
|
|
{
|
|
|
|
|
basic_block tail = par->join_block;
|
|
|
|
|
gcc_assert (tail->preds->length () == 1);
|
|
|
|
|
|
|
|
|
|
basic_block pre_tail = (*tail->preds)[0]->src;
|
|
|
|
|
gcc_assert (pre_tail->succs->length () == 1);
|
|
|
|
|
|
|
|
|
|
nvptx_single (mask, par->forked_block, pre_tail);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-10 23:29:20 +01:00
|
|
|
|
/* If PAR has a single inner parallel and PAR itself only contains
|
|
|
|
|
empty entry and exit blocks, swallow the inner PAR. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_optimize_inner (parallel *par)
|
|
|
|
|
{
|
|
|
|
|
parallel *inner = par->inner;
|
|
|
|
|
|
|
|
|
|
/* We mustn't be the outer dummy par. */
|
|
|
|
|
if (!par->mask)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We must have a single inner par. */
|
|
|
|
|
if (!inner || inner->next)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We must only contain 2 blocks ourselves -- the head and tail of
|
|
|
|
|
the inner par. */
|
|
|
|
|
if (par->blocks.length () != 2)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* We must be disjoint partitioning. As we only have vector and
|
|
|
|
|
worker partitioning, this is sufficient to guarantee the pars
|
|
|
|
|
have adjacent partitioning. */
|
|
|
|
|
if ((par->mask & inner->mask) & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1))
|
|
|
|
|
/* This indicates malformed code generation. */
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* The outer forked insn should be immediately followed by the inner
|
|
|
|
|
fork insn. */
|
|
|
|
|
rtx_insn *forked = par->forked_insn;
|
|
|
|
|
rtx_insn *fork = BB_END (par->forked_block);
|
|
|
|
|
|
|
|
|
|
if (NEXT_INSN (forked) != fork)
|
|
|
|
|
return;
|
|
|
|
|
gcc_checking_assert (recog_memoized (fork) == CODE_FOR_nvptx_fork);
|
|
|
|
|
|
|
|
|
|
/* The outer joining insn must immediately follow the inner join
|
|
|
|
|
insn. */
|
|
|
|
|
rtx_insn *joining = par->joining_insn;
|
|
|
|
|
rtx_insn *join = inner->join_insn;
|
|
|
|
|
if (NEXT_INSN (join) != joining)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/* Preconditions met. Swallow the inner par. */
|
|
|
|
|
if (dump_file)
|
|
|
|
|
fprintf (dump_file, "Merging loop %x [%d,%d] into %x [%d,%d]\n",
|
|
|
|
|
inner->mask, inner->forked_block->index,
|
|
|
|
|
inner->join_block->index,
|
|
|
|
|
par->mask, par->forked_block->index, par->join_block->index);
|
|
|
|
|
|
|
|
|
|
par->mask |= inner->mask & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1);
|
|
|
|
|
|
|
|
|
|
par->blocks.reserve (inner->blocks.length ());
|
|
|
|
|
while (inner->blocks.length ())
|
|
|
|
|
par->blocks.quick_push (inner->blocks.pop ());
|
|
|
|
|
|
|
|
|
|
par->inner = inner->inner;
|
|
|
|
|
inner->inner = NULL;
|
|
|
|
|
|
|
|
|
|
delete inner;
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Process the parallel PAR and all its contained
|
|
|
|
|
parallels. We do everything but the neutering. Return mask of
|
|
|
|
|
partitioned modes used within this parallel. */
|
|
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
|
nvptx_process_pars (parallel *par)
|
|
|
|
|
{
|
2015-11-10 23:29:20 +01:00
|
|
|
|
if (nvptx_optimize)
|
|
|
|
|
nvptx_optimize_inner (par);
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
unsigned inner_mask = par->mask;
|
|
|
|
|
|
|
|
|
|
/* Do the inner parallels first. */
|
|
|
|
|
if (par->inner)
|
|
|
|
|
{
|
|
|
|
|
par->inner_mask = nvptx_process_pars (par->inner);
|
|
|
|
|
inner_mask |= par->inner_mask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (par->mask & GOMP_DIM_MASK (GOMP_DIM_MAX))
|
|
|
|
|
/* No propagation needed for a call. */;
|
2015-11-18 22:33:57 +01:00
|
|
|
|
else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
{
|
|
|
|
|
nvptx_wpropagate (false, par->forked_block, par->forked_insn);
|
|
|
|
|
nvptx_wpropagate (true, par->forked_block, par->fork_insn);
|
|
|
|
|
/* Insert begin and end synchronizations. */
|
|
|
|
|
emit_insn_after (nvptx_wsync (false), par->forked_insn);
|
|
|
|
|
emit_insn_before (nvptx_wsync (true), par->joining_insn);
|
|
|
|
|
}
|
|
|
|
|
else if (par->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
|
|
|
|
|
nvptx_vpropagate (par->forked_block, par->forked_insn);
|
|
|
|
|
|
|
|
|
|
/* Now do siblings. */
|
|
|
|
|
if (par->next)
|
|
|
|
|
inner_mask |= nvptx_process_pars (par->next);
|
|
|
|
|
return inner_mask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Neuter the parallel described by PAR. We recurse in depth-first
|
|
|
|
|
order. MODES are the partitioning of the execution and OUTER is
|
|
|
|
|
the partitioning of the parallels we are contained in. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
|
|
|
|
|
{
|
|
|
|
|
unsigned me = (par->mask
|
|
|
|
|
& (GOMP_DIM_MASK (GOMP_DIM_WORKER)
|
|
|
|
|
| GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
|
|
|
|
|
unsigned skip_mask = 0, neuter_mask = 0;
|
|
|
|
|
|
|
|
|
|
if (par->inner)
|
|
|
|
|
nvptx_neuter_pars (par->inner, modes, outer | me);
|
|
|
|
|
|
|
|
|
|
for (unsigned mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++)
|
|
|
|
|
{
|
|
|
|
|
if ((outer | me) & GOMP_DIM_MASK (mode))
|
|
|
|
|
{} /* Mode is partitioned: no neutering. */
|
|
|
|
|
else if (!(modes & GOMP_DIM_MASK (mode)))
|
2015-11-18 22:33:57 +01:00
|
|
|
|
{} /* Mode is not used: nothing to do. */
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
else if (par->inner_mask & GOMP_DIM_MASK (mode)
|
|
|
|
|
|| !par->forked_insn)
|
|
|
|
|
/* Partitioned in inner parallels, or we're not a partitioned
|
|
|
|
|
at all: neuter individual blocks. */
|
|
|
|
|
neuter_mask |= GOMP_DIM_MASK (mode);
|
|
|
|
|
else if (!par->parent || !par->parent->forked_insn
|
|
|
|
|
|| par->parent->inner_mask & GOMP_DIM_MASK (mode))
|
|
|
|
|
/* Parent isn't a parallel or contains this paralleling: skip
|
|
|
|
|
parallel at this level. */
|
|
|
|
|
skip_mask |= GOMP_DIM_MASK (mode);
|
|
|
|
|
else
|
|
|
|
|
{} /* Parent will skip this parallel itself. */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (neuter_mask)
|
|
|
|
|
{
|
2015-11-18 19:33:38 +01:00
|
|
|
|
int ix, len;
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
2015-11-18 19:33:38 +01:00
|
|
|
|
if (nvptx_optimize)
|
|
|
|
|
{
|
|
|
|
|
/* Neuter whole SESE regions. */
|
|
|
|
|
bb_pair_vec_t regions;
|
|
|
|
|
|
|
|
|
|
nvptx_find_sese (par->blocks, regions);
|
|
|
|
|
len = regions.length ();
|
|
|
|
|
for (ix = 0; ix != len; ix++)
|
|
|
|
|
{
|
|
|
|
|
basic_block from = regions[ix].first;
|
|
|
|
|
basic_block to = regions[ix].second;
|
|
|
|
|
|
|
|
|
|
if (from)
|
|
|
|
|
nvptx_single (neuter_mask, from, to);
|
|
|
|
|
else
|
|
|
|
|
gcc_assert (!to);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
{
|
2015-11-18 19:33:38 +01:00
|
|
|
|
/* Neuter each BB individually. */
|
|
|
|
|
len = par->blocks.length ();
|
|
|
|
|
for (ix = 0; ix != len; ix++)
|
|
|
|
|
{
|
|
|
|
|
basic_block block = par->blocks[ix];
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
2015-11-18 19:33:38 +01:00
|
|
|
|
nvptx_single (neuter_mask, block, block);
|
|
|
|
|
}
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (skip_mask)
|
|
|
|
|
nvptx_skip_par (skip_mask, par);
|
|
|
|
|
|
|
|
|
|
if (par->next)
|
|
|
|
|
nvptx_neuter_pars (par->next, modes, outer);
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-30 01:23:10 +02:00
|
|
|
|
/* PTX-specific reorganization
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
- Split blocks at fork and join instructions
|
2015-09-11 00:53:37 +02:00
|
|
|
|
- Compute live registers
|
|
|
|
|
- Mark now-unused registers, so function begin doesn't declare
|
2015-06-30 01:23:10 +02:00
|
|
|
|
unused registers.
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
- Insert state propagation when entering partitioned mode
|
|
|
|
|
- Insert neutering instructions when in single mode
|
2015-09-11 00:53:37 +02:00
|
|
|
|
- Replace subregs with suitable sequences.
|
2015-06-30 01:23:10 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_reorg (void)
|
|
|
|
|
{
|
|
|
|
|
/* We are freeing block_for_insn in the toplev to keep compatibility
|
|
|
|
|
with old MDEP_REORGS that are not CFG based. Recompute it now. */
|
|
|
|
|
compute_bb_for_insn ();
|
|
|
|
|
|
|
|
|
|
thread_prologue_and_epilogue_insns ();
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Split blocks and record interesting unspecs. */
|
|
|
|
|
bb_insn_map_t bb_insn_map;
|
|
|
|
|
|
|
|
|
|
nvptx_split_blocks (&bb_insn_map);
|
|
|
|
|
|
2015-09-11 00:53:37 +02:00
|
|
|
|
/* Compute live regs */
|
2015-06-30 01:23:10 +02:00
|
|
|
|
df_clear_flags (DF_LR_RUN_DCE);
|
|
|
|
|
df_set_flags (DF_NO_INSN_RESCAN | DF_NO_HARD_REGS);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
df_live_add_problem ();
|
|
|
|
|
df_live_set_all_dirty ();
|
2015-06-30 01:23:10 +02:00
|
|
|
|
df_analyze ();
|
2014-11-10 17:12:42 +01:00
|
|
|
|
regstat_init_n_sets_and_refs ();
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
if (dump_file)
|
|
|
|
|
df_dump (dump_file);
|
|
|
|
|
|
2015-06-30 01:23:10 +02:00
|
|
|
|
/* Mark unused regs as unused. */
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
int max_regs = max_reg_num ();
|
2015-12-16 14:51:36 +01:00
|
|
|
|
for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
|
2014-11-10 17:12:42 +01:00
|
|
|
|
if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0)
|
|
|
|
|
regno_reg_rtx[i] = const0_rtx;
|
2015-06-30 01:23:10 +02:00
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Determine launch dimensions of the function. If it is not an
|
|
|
|
|
offloaded function (i.e. this is a regular compiler), the
|
|
|
|
|
function has no neutering. */
|
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz>
* omp-general.h: New file.
* omp-general.c: New file.
* omp-expand.h: Likewise.
* omp-expand.c: Likewise.
* omp-offload.h: Likewise.
* omp-offload.c: Likewise.
* omp-grid.c: Likewise.
* omp-grid.c: Likewise.
* omp-low.h: Include omp-general.h and omp-grid.h. Removed includes
of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h,
ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h,
varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h,
tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h,
alloc-pool.h, cfghooks.h, rtl.h and memmodel.h.
(omp_find_combined_for): Declare.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.h.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.h.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.h.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.h.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.h.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.h.
(oacc_fn_attrib_kernels_p): Moved to omp-general.h.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(omp_expand_local): Moved to omp-expand.h.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.h.
(omp_finish_file): Moved to omp-offload.h.
(default_goacc_validate_dims): Renamed to
oacc_default_goacc_validate_dims and moved to omp-offload.h.
(offload_funcs, offload_vars): Moved to omp-offload.h.
* omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h.
(omp_region): Moved to omp-expand.c.
(omp_for_data_loop): Moved to omp-general.h.
(omp_for_data): Likewise.
(oacc_loop): Moved to omp-offload.c.
(oacc_loop_flags): Moved to omp-general.h.
(offload_funcs, offload_vars): Moved to omp-offload.c.
(root_omp_region): Moved to omp-expand.c.
(omp_any_child_fn_dumped): Likewise.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.c.
(is_combined_parallel): Moved to omp-expand.c.
(is_reference): Renamed to omp_is_reference and and moved to
omp-general.c.
(adjust_for_condition): Renamed to omp_adjust_for_condition and moved
to omp-general.c.
(get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr
and moved to omp-general.c.
(extract_omp_for_data): Renamed to omp_extract_for_data and moved to
omp-general.c.
(workshare_safe_to_combine_p): Moved to omp-expand.c.
(omp_adjust_chunk_size): Likewise.
(get_ws_args_for): Likewise.
(get_base_type): Removed.
(dump_omp_region): Moved to omp-expand.c.
(debug_omp_region): Likewise.
(debug_all_omp_regions): Likewise.
(new_omp_region): Likewise.
(free_omp_region_1): Likewise.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.c.
(find_combined_for): Renamed to omp_find_combined_for, made global.
(build_omp_barrier): Renamed to omp_build_barrier and moved to
omp-general.c.
(omp_max_vf): Moved to omp-general.c.
(omp_max_simt_vf): Likewise.
(gimple_build_cond_empty): Moved to omp-expand.c.
(parallel_needs_hsa_kernel_p): Likewise.
(expand_omp_build_assign): Moved declaration to omp-expand.c.
(expand_parallel_call): Moved to omp-expand.c.
(expand_cilk_for_call): Likewise.
(expand_task_call): Likewise.
(vec2chain): Likewise.
(remove_exit_barrier): Likewise.
(remove_exit_barriers): Likewise.
(optimize_omp_library_calls): Likewise.
(expand_omp_regimplify_p): Likewise.
(expand_omp_build_assign): Likewise.
(expand_omp_taskreg): Likewise.
(oacc_collapse): Likewise.
(expand_oacc_collapse_init): Likewise.
(expand_oacc_collapse_vars): Likewise.
(expand_omp_for_init_counts): Likewise.
(expand_omp_for_init_vars): Likewise.
(extract_omp_for_update_vars): Likewise.
(expand_omp_ordered_source): Likewise.
(expand_omp_ordered_sink): Likewise.
(expand_omp_ordered_source_sink): Likewise.
(expand_omp_for_ordered_loops): Likewise.
(expand_omp_for_generic): Likewise.
(expand_omp_for_static_nochunk): Likewise.
(find_phi_with_arg_on_edge): Likewise.
(expand_omp_for_static_chunk): Likewise.
(expand_cilk_for): Likewise.
(expand_omp_simd): Likewise.
(expand_omp_taskloop_for_outer): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
(expand_omp_for): Likewise.
(expand_omp_sections): Likewise.
(expand_omp_single): Likewise.
(expand_omp_synch): Likewise.
(expand_omp_atomic_load): Likewise.
(expand_omp_atomic_store): Likewise.
(expand_omp_atomic_fetch_op): Likewise.
(expand_omp_atomic_pipeline): Likewise.
(expand_omp_atomic_mutex): Likewise.
(expand_omp_atomic): Likewise.
(oacc_launch_pack): and moved to omp-general.c, made public.
(OACC_FN_ATTRIB): Likewise.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.c.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.c.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.c.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.c.
(oacc_fn_attrib_kernels_p): Moved to omp-general.c.
(oacc_fn_attrib_level): Moved to omp-offload.c.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to
omp-general.c.
(mark_loops_in_oacc_kernels_region): Moved to omp-expand.c.
(grid_launch_attributes_trees): Likewise.
(grid_attr_trees): Likewise.
(grid_create_kernel_launch_attr_types): Likewise.
(grid_insert_store_range_dim): Likewise.
(grid_get_kernel_launch_attributes): Likewise.
(get_target_argument_identifier_1): Likewise.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(push_target_argument_according_to_value): Likewise.
(get_target_arguments): Likewise.
(expand_omp_target): Likewise.
(grid_expand_omp_for_loop): Moved to omp-grid.c.
(grid_arg_decl_map): Likewise.
(grid_remap_kernel_arg_accesses): Likewise.
(grid_expand_target_grid_body): Likewise.
(expand_omp): Renamed to omp_expand and moved to omp-expand.c.
(build_omp_regions_1): Moved to omp-expand.c.
(build_omp_regions_root): Likewise.
(omp_expand_local): Likewise.
(build_omp_regions): Likewise.
(execute_expand_omp): Likewise.
(pass_data_expand_omp): Likewise.
(pass_expand_omp): Likewise.
(make_pass_expand_omp): Likewise.
(pass_data_expand_omp_ssa): Likewise.
(pass_expand_omp_ssa): Likewise.
(make_pass_expand_omp_ssa): Likewise.
(grid_lastprivate_predicate): Renamed to
omp_grid_lastprivate_predicate and moved to omp-grid.c, made public.
(grid_prop): Moved to omp-grid.c.
(GRID_MISSED_MSG_PREFIX): Likewise.
(grid_safe_assignment_p): Likewise.
(grid_seq_only_contains_local_assignments): Likewise.
(grid_find_single_omp_among_assignments_1): Likewise.
(grid_find_single_omp_among_assignments): Likewise.
(grid_find_ungridifiable_statement): Likewise.
(grid_parallel_clauses_gridifiable): Likewise.
(grid_inner_loop_gridifiable_p): Likewise.
(grid_dist_follows_simple_pattern): Likewise.
(grid_gfor_follows_tiling_pattern): Likewise.
(grid_call_permissible_in_distribute_p): Likewise.
(grid_handle_call_in_distribute): Likewise.
(grid_dist_follows_tiling_pattern): Likewise.
(grid_target_follows_gridifiable_pattern): Likewise.
(grid_remap_prebody_decls): Likewise.
(grid_var_segment): Likewise.
(grid_mark_variable_segment): Likewise.
(grid_copy_leading_local_assignments): Likewise.
(grid_process_grid_body): Likewise.
(grid_eliminate_combined_simd_part): Likewise.
(grid_mark_tiling_loops): Likewise.
(grid_mark_tiling_parallels_and_loops): Likewise.
(grid_process_kernel_body_copy): Likewise.
(grid_attempt_target_gridification): Likewise.
(grid_gridify_all_targets_stmt): Likewise.
(grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets
and moved to omp-grid.c, made public.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.c.
(add_decls_addresses_to_decl_constructor): Moved to omp-offload.c.
(omp_finish_file): Likewise.
(oacc_thread_numbers): Likewise.
(oacc_xform_loop): Likewise.
(oacc_default_dims, oacc_min_dims): Likewise.
(oacc_parse_default_dims): Likewise.
(oacc_validate_dims): Likewise.
(new_oacc_loop_raw): Likewise.
(new_oacc_loop_outer): Likewise.
(new_oacc_loop): Likewise.
(new_oacc_loop_routine): Likewise.
(finish_oacc_loop): Likewise.
(free_oacc_loop): Likewise.
(dump_oacc_loop_part): Likewise.
(dump_oacc_loop): Likewise.
(debug_oacc_loop): Likewise.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_sibling_nreverse): Likewise.
(oacc_loop_discovery): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(oacc_loop_xform_loop): Likewise.
(oacc_loop_process): Likewise.
(oacc_loop_fixed_partitions): Likewise.
(oacc_loop_auto_partitions): Likewise.
(oacc_loop_partition): Likewise.
(default_goacc_fork_join): Likewise.
(default_goacc_reduction): Likewise.
(execute_oacc_device_lower): Likewise.
(default_goacc_validate_dims): Likewise.
(default_goacc_dim_limit): Likewise.
(pass_data_oacc_device_lower): Likewise.
(pass_oacc_device_lower): Likewise.
(make_pass_oacc_device_lower): Likewise.
(execute_omp_device_lower): Likewise.
(pass_data_omp_device_lower): Likewise.
(pass_omp_device_lower): Likewise.
(make_pass_omp_device_lower): Likewise.
(pass_data_omp_target_link): Likewise.
(pass_omp_target_link): Likewise.
(find_link_var_op): Likewise.
(pass_omp_target_link::execute): Likewise.
(make_pass_omp_target_link): Likewise.
* Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o
and omp-grid.o.
(GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed
omp-low.h.
* gimple-fold.c: Include omp-general.h instead of omp-low.h.
(fold_internal_goacc_dim): Adjusted calls to
get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names.
* gimplify.c: Include omp-low.h.
(omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use
its new name.
(gimplify_omp_task): Adjusted calls to find_omp_clause to use its new
name.
(gimplify_omp_for): Likewise.
* lto-cgraph.c: Include omp-offload.h instead of omp-low.h.
* toplev.c: Include omp-offload.h instead of omp-low.h.
* tree-cfg.c: Include omp-general.h instead of omp-low.h. Also
include omp-expand.h.
(make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its
new name.
(make_edges): Adjust the call to free_omp_regions to use its new name.
* tree-parloops.c: Include omp-general.h.
(create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use
its new name.
(parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-ssa-loop.c: Include omp-general.h instead of omp-low.h.
(gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-vrp.c: Include omp-general.h instead of omp-low.h.
(extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and
get_oacc_fn_dim_size to use their new names.
* varpool.c: Include omp-offload.h instead of omp-low.h.
* gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in
ifiles.
* config/nvptx/nvptx.c: Include omp-general.c.
(nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use
its new name.
(nvptx_reorg): Likewise.
(nvptx_record_offload_symbol): Likewise.
gcc/c-family:
* c-omp.c: Include omp-general.h instead of omp-low.h.
(c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new
name.
gcc/c/
* c-parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(c_finish_oacc_routine): Adjusted call to
get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib
to use their new names.
(c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to
use its new name.
(c_parser_oacc_update): Likewise.
(c_parser_omp_simd): Likewise.
(c_parser_omp_target_update): Likewise.
* c-typeck.c: Include omp-general.h instead of omp-low.h.
(c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new
name.
(c_finish_omp_cancellation_point): Likewise.
* gimple-parser.c: Do not include omp-low.h
gcc/cp/
* parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new
name.
(cp_parser_omp_target_update): Likewise.
(cp_parser_oacc_declare): Likewise.
(cp_parser_oacc_enter_exit_data): Likewise.
(cp_parser_oacc_update): Likewise.
(cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib,
build_oacc_routine_dims and replace_oacc_fn_attrib to use their new
names.
* semantics.c: Include omp-general insteda of omp-low.h.
(finish_omp_for): Adjusted calls to find_omp_clause to use its new
name.
(finish_omp_cancel): Likewise.
(finish_omp_cancellation_point): Likewise.
fortran/
* trans-openmp.c: Include omp-general.h.
From-SVN: r243673
2016-12-14 23:30:41 +01:00
|
|
|
|
tree attr = oacc_get_fn_attrib (current_function_decl);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
if (attr)
|
|
|
|
|
{
|
|
|
|
|
/* If we determined this mask before RTL expansion, we could
|
|
|
|
|
elide emission of some levels of forks and joins. */
|
|
|
|
|
unsigned mask = 0;
|
|
|
|
|
tree dims = TREE_VALUE (attr);
|
|
|
|
|
unsigned ix;
|
|
|
|
|
|
|
|
|
|
for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
|
|
|
|
|
{
|
|
|
|
|
int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
|
|
|
|
|
tree allowed = TREE_PURPOSE (dims);
|
|
|
|
|
|
|
|
|
|
if (size != 1 && !(allowed && integer_zerop (allowed)))
|
|
|
|
|
mask |= GOMP_DIM_MASK (ix);
|
|
|
|
|
}
|
|
|
|
|
/* If there is worker neutering, there must be vector
|
|
|
|
|
neutering. Otherwise the hardware will fail. */
|
|
|
|
|
gcc_assert (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
|
|
|
|
|
|| (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
|
|
|
|
|
|
|
|
|
|
/* Discover & process partitioned regions. */
|
|
|
|
|
parallel *pars = nvptx_discover_pars (&bb_insn_map);
|
|
|
|
|
nvptx_process_pars (pars);
|
|
|
|
|
nvptx_neuter_pars (pars, mask, 0);
|
|
|
|
|
delete pars;
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-30 01:23:10 +02:00
|
|
|
|
/* Replace subregs. */
|
2015-07-06 22:53:34 +02:00
|
|
|
|
nvptx_reorg_subreg ();
|
2015-06-30 01:23:10 +02:00
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
if (TARGET_UNIFORM_SIMT)
|
|
|
|
|
nvptx_reorg_uniform_simt ();
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
regstat_free_n_sets_and_refs ();
|
2015-06-30 01:23:10 +02:00
|
|
|
|
|
|
|
|
|
df_finish_pass (true);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle a "kernel" attribute; arguments as in
|
|
|
|
|
struct attribute_spec.handler. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_handle_kernel_attribute (tree *node, tree name, tree ARG_UNUSED (args),
|
|
|
|
|
int ARG_UNUSED (flags), bool *no_add_attrs)
|
|
|
|
|
{
|
|
|
|
|
tree decl = *node;
|
|
|
|
|
|
|
|
|
|
if (TREE_CODE (decl) != FUNCTION_DECL)
|
|
|
|
|
{
|
|
|
|
|
error ("%qE attribute only applies to functions", name);
|
|
|
|
|
*no_add_attrs = true;
|
|
|
|
|
}
|
2015-12-11 19:06:37 +01:00
|
|
|
|
else if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (decl))))
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{
|
|
|
|
|
error ("%qE attribute requires a void return type", name);
|
|
|
|
|
*no_add_attrs = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL_TREE;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Handle a "shared" attribute; arguments as in
|
|
|
|
|
struct attribute_spec.handler. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_handle_shared_attribute (tree *node, tree name, tree ARG_UNUSED (args),
|
|
|
|
|
int ARG_UNUSED (flags), bool *no_add_attrs)
|
|
|
|
|
{
|
|
|
|
|
tree decl = *node;
|
|
|
|
|
|
|
|
|
|
if (TREE_CODE (decl) != VAR_DECL)
|
|
|
|
|
{
|
|
|
|
|
error ("%qE attribute only applies to variables", name);
|
|
|
|
|
*no_add_attrs = true;
|
|
|
|
|
}
|
|
|
|
|
else if (!(TREE_PUBLIC (decl) || TREE_STATIC (decl)))
|
|
|
|
|
{
|
|
|
|
|
error ("%qE attribute not allowed with auto storage class", name);
|
|
|
|
|
*no_add_attrs = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL_TREE;
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Table of valid machine attributes. */
|
|
|
|
|
static const struct attribute_spec nvptx_attribute_table[] =
|
|
|
|
|
{
|
|
|
|
|
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
|
|
|
|
|
affects_type_identity } */
|
|
|
|
|
{ "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute, false },
|
2016-11-16 18:17:00 +01:00
|
|
|
|
{ "shared", 0, 0, true, false, false, nvptx_handle_shared_attribute, false },
|
2014-11-10 17:12:42 +01:00
|
|
|
|
{ NULL, 0, 0, false, false, false, NULL, false }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Limit vector alignments to BIGGEST_ALIGNMENT. */
|
|
|
|
|
|
|
|
|
|
static HOST_WIDE_INT
|
|
|
|
|
nvptx_vector_alignment (const_tree type)
|
|
|
|
|
{
|
|
|
|
|
HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
|
|
|
|
|
|
|
|
|
|
return MIN (align, BIGGEST_ALIGNMENT);
|
|
|
|
|
}
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
|
|
|
|
/* Indicate that INSN cannot be duplicated. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_cannot_copy_insn_p (rtx_insn *insn)
|
|
|
|
|
{
|
|
|
|
|
switch (recog_memoized (insn))
|
|
|
|
|
{
|
|
|
|
|
case CODE_FOR_nvptx_shufflesi:
|
|
|
|
|
case CODE_FOR_nvptx_shufflesf:
|
|
|
|
|
case CODE_FOR_nvptx_barsync:
|
|
|
|
|
case CODE_FOR_nvptx_fork:
|
|
|
|
|
case CODE_FOR_nvptx_forked:
|
|
|
|
|
case CODE_FOR_nvptx_joining:
|
|
|
|
|
case CODE_FOR_nvptx_join:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-11-20 15:23:18 +01:00
|
|
|
|
|
|
|
|
|
/* Section anchors do not work. Initialization for flag_section_anchor
|
|
|
|
|
probes the existence of the anchoring target hooks and prevents
|
|
|
|
|
anchoring if they don't exist. However, we may be being used with
|
|
|
|
|
a host-side compiler that does support anchoring, and hence see
|
|
|
|
|
the anchor flag set (as it's not recalculated). So provide an
|
|
|
|
|
implementation denying anchoring. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_use_anchors_for_symbol_p (const_rtx ARG_UNUSED (a))
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
2015-01-28 18:03:44 +01:00
|
|
|
|
/* Record a symbol for mkoffload to enter into the mapping table. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_record_offload_symbol (tree decl)
|
|
|
|
|
{
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
switch (TREE_CODE (decl))
|
|
|
|
|
{
|
|
|
|
|
case VAR_DECL:
|
|
|
|
|
fprintf (asm_out_file, "//:VAR_MAP \"%s\"\n",
|
|
|
|
|
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case FUNCTION_DECL:
|
|
|
|
|
{
|
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz>
* omp-general.h: New file.
* omp-general.c: New file.
* omp-expand.h: Likewise.
* omp-expand.c: Likewise.
* omp-offload.h: Likewise.
* omp-offload.c: Likewise.
* omp-grid.c: Likewise.
* omp-grid.c: Likewise.
* omp-low.h: Include omp-general.h and omp-grid.h. Removed includes
of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h,
ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h,
varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h,
tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h,
alloc-pool.h, cfghooks.h, rtl.h and memmodel.h.
(omp_find_combined_for): Declare.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.h.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.h.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.h.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.h.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.h.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.h.
(oacc_fn_attrib_kernels_p): Moved to omp-general.h.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(omp_expand_local): Moved to omp-expand.h.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.h.
(omp_finish_file): Moved to omp-offload.h.
(default_goacc_validate_dims): Renamed to
oacc_default_goacc_validate_dims and moved to omp-offload.h.
(offload_funcs, offload_vars): Moved to omp-offload.h.
* omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h.
(omp_region): Moved to omp-expand.c.
(omp_for_data_loop): Moved to omp-general.h.
(omp_for_data): Likewise.
(oacc_loop): Moved to omp-offload.c.
(oacc_loop_flags): Moved to omp-general.h.
(offload_funcs, offload_vars): Moved to omp-offload.c.
(root_omp_region): Moved to omp-expand.c.
(omp_any_child_fn_dumped): Likewise.
(find_omp_clause): Renamed to omp_find_clause and moved to
omp-general.c.
(is_combined_parallel): Moved to omp-expand.c.
(is_reference): Renamed to omp_is_reference and and moved to
omp-general.c.
(adjust_for_condition): Renamed to omp_adjust_for_condition and moved
to omp-general.c.
(get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr
and moved to omp-general.c.
(extract_omp_for_data): Renamed to omp_extract_for_data and moved to
omp-general.c.
(workshare_safe_to_combine_p): Moved to omp-expand.c.
(omp_adjust_chunk_size): Likewise.
(get_ws_args_for): Likewise.
(get_base_type): Removed.
(dump_omp_region): Moved to omp-expand.c.
(debug_omp_region): Likewise.
(debug_all_omp_regions): Likewise.
(new_omp_region): Likewise.
(free_omp_region_1): Likewise.
(free_omp_regions): Renamed to omp_free_regions and moved to
omp-expand.c.
(find_combined_for): Renamed to omp_find_combined_for, made global.
(build_omp_barrier): Renamed to omp_build_barrier and moved to
omp-general.c.
(omp_max_vf): Moved to omp-general.c.
(omp_max_simt_vf): Likewise.
(gimple_build_cond_empty): Moved to omp-expand.c.
(parallel_needs_hsa_kernel_p): Likewise.
(expand_omp_build_assign): Moved declaration to omp-expand.c.
(expand_parallel_call): Moved to omp-expand.c.
(expand_cilk_for_call): Likewise.
(expand_task_call): Likewise.
(vec2chain): Likewise.
(remove_exit_barrier): Likewise.
(remove_exit_barriers): Likewise.
(optimize_omp_library_calls): Likewise.
(expand_omp_regimplify_p): Likewise.
(expand_omp_build_assign): Likewise.
(expand_omp_taskreg): Likewise.
(oacc_collapse): Likewise.
(expand_oacc_collapse_init): Likewise.
(expand_oacc_collapse_vars): Likewise.
(expand_omp_for_init_counts): Likewise.
(expand_omp_for_init_vars): Likewise.
(extract_omp_for_update_vars): Likewise.
(expand_omp_ordered_source): Likewise.
(expand_omp_ordered_sink): Likewise.
(expand_omp_ordered_source_sink): Likewise.
(expand_omp_for_ordered_loops): Likewise.
(expand_omp_for_generic): Likewise.
(expand_omp_for_static_nochunk): Likewise.
(find_phi_with_arg_on_edge): Likewise.
(expand_omp_for_static_chunk): Likewise.
(expand_cilk_for): Likewise.
(expand_omp_simd): Likewise.
(expand_omp_taskloop_for_outer): Likewise.
(expand_omp_taskloop_for_inner): Likewise.
(expand_oacc_for): Likewise.
(expand_omp_for): Likewise.
(expand_omp_sections): Likewise.
(expand_omp_single): Likewise.
(expand_omp_synch): Likewise.
(expand_omp_atomic_load): Likewise.
(expand_omp_atomic_store): Likewise.
(expand_omp_atomic_fetch_op): Likewise.
(expand_omp_atomic_pipeline): Likewise.
(expand_omp_atomic_mutex): Likewise.
(expand_omp_atomic): Likewise.
(oacc_launch_pack): and moved to omp-general.c, made public.
(OACC_FN_ATTRIB): Likewise.
(replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved
to omp-general.c.
(set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to
omp-general.c.
(build_oacc_routine_dims): Renamed to oacc_build_routine_dims and
moved to omp-general.c.
(get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to
omp-general.c.
(oacc_fn_attrib_kernels_p): Moved to omp-general.c.
(oacc_fn_attrib_level): Moved to omp-offload.c.
(get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to
omp-general.c.
(get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to
omp-general.c.
(mark_loops_in_oacc_kernels_region): Moved to omp-expand.c.
(grid_launch_attributes_trees): Likewise.
(grid_attr_trees): Likewise.
(grid_create_kernel_launch_attr_types): Likewise.
(grid_insert_store_range_dim): Likewise.
(grid_get_kernel_launch_attributes): Likewise.
(get_target_argument_identifier_1): Likewise.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(push_target_argument_according_to_value): Likewise.
(get_target_arguments): Likewise.
(expand_omp_target): Likewise.
(grid_expand_omp_for_loop): Moved to omp-grid.c.
(grid_arg_decl_map): Likewise.
(grid_remap_kernel_arg_accesses): Likewise.
(grid_expand_target_grid_body): Likewise.
(expand_omp): Renamed to omp_expand and moved to omp-expand.c.
(build_omp_regions_1): Moved to omp-expand.c.
(build_omp_regions_root): Likewise.
(omp_expand_local): Likewise.
(build_omp_regions): Likewise.
(execute_expand_omp): Likewise.
(pass_data_expand_omp): Likewise.
(pass_expand_omp): Likewise.
(make_pass_expand_omp): Likewise.
(pass_data_expand_omp_ssa): Likewise.
(pass_expand_omp_ssa): Likewise.
(make_pass_expand_omp_ssa): Likewise.
(grid_lastprivate_predicate): Renamed to
omp_grid_lastprivate_predicate and moved to omp-grid.c, made public.
(grid_prop): Moved to omp-grid.c.
(GRID_MISSED_MSG_PREFIX): Likewise.
(grid_safe_assignment_p): Likewise.
(grid_seq_only_contains_local_assignments): Likewise.
(grid_find_single_omp_among_assignments_1): Likewise.
(grid_find_single_omp_among_assignments): Likewise.
(grid_find_ungridifiable_statement): Likewise.
(grid_parallel_clauses_gridifiable): Likewise.
(grid_inner_loop_gridifiable_p): Likewise.
(grid_dist_follows_simple_pattern): Likewise.
(grid_gfor_follows_tiling_pattern): Likewise.
(grid_call_permissible_in_distribute_p): Likewise.
(grid_handle_call_in_distribute): Likewise.
(grid_dist_follows_tiling_pattern): Likewise.
(grid_target_follows_gridifiable_pattern): Likewise.
(grid_remap_prebody_decls): Likewise.
(grid_var_segment): Likewise.
(grid_mark_variable_segment): Likewise.
(grid_copy_leading_local_assignments): Likewise.
(grid_process_grid_body): Likewise.
(grid_eliminate_combined_simd_part): Likewise.
(grid_mark_tiling_loops): Likewise.
(grid_mark_tiling_parallels_and_loops): Likewise.
(grid_process_kernel_body_copy): Likewise.
(grid_attempt_target_gridification): Likewise.
(grid_gridify_all_targets_stmt): Likewise.
(grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets
and moved to omp-grid.c, made public.
(make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to
omp-expand.c.
(add_decls_addresses_to_decl_constructor): Moved to omp-offload.c.
(omp_finish_file): Likewise.
(oacc_thread_numbers): Likewise.
(oacc_xform_loop): Likewise.
(oacc_default_dims, oacc_min_dims): Likewise.
(oacc_parse_default_dims): Likewise.
(oacc_validate_dims): Likewise.
(new_oacc_loop_raw): Likewise.
(new_oacc_loop_outer): Likewise.
(new_oacc_loop): Likewise.
(new_oacc_loop_routine): Likewise.
(finish_oacc_loop): Likewise.
(free_oacc_loop): Likewise.
(dump_oacc_loop_part): Likewise.
(dump_oacc_loop): Likewise.
(debug_oacc_loop): Likewise.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_sibling_nreverse): Likewise.
(oacc_loop_discovery): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(oacc_loop_xform_loop): Likewise.
(oacc_loop_process): Likewise.
(oacc_loop_fixed_partitions): Likewise.
(oacc_loop_auto_partitions): Likewise.
(oacc_loop_partition): Likewise.
(default_goacc_fork_join): Likewise.
(default_goacc_reduction): Likewise.
(execute_oacc_device_lower): Likewise.
(default_goacc_validate_dims): Likewise.
(default_goacc_dim_limit): Likewise.
(pass_data_oacc_device_lower): Likewise.
(pass_oacc_device_lower): Likewise.
(make_pass_oacc_device_lower): Likewise.
(execute_omp_device_lower): Likewise.
(pass_data_omp_device_lower): Likewise.
(pass_omp_device_lower): Likewise.
(make_pass_omp_device_lower): Likewise.
(pass_data_omp_target_link): Likewise.
(pass_omp_target_link): Likewise.
(find_link_var_op): Likewise.
(pass_omp_target_link::execute): Likewise.
(make_pass_omp_target_link): Likewise.
* Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o
and omp-grid.o.
(GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed
omp-low.h.
* gimple-fold.c: Include omp-general.h instead of omp-low.h.
(fold_internal_goacc_dim): Adjusted calls to
get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names.
* gimplify.c: Include omp-low.h.
(omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use
its new name.
(gimplify_omp_task): Adjusted calls to find_omp_clause to use its new
name.
(gimplify_omp_for): Likewise.
* lto-cgraph.c: Include omp-offload.h instead of omp-low.h.
* toplev.c: Include omp-offload.h instead of omp-low.h.
* tree-cfg.c: Include omp-general.h instead of omp-low.h. Also
include omp-expand.h.
(make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its
new name.
(make_edges): Adjust the call to free_omp_regions to use its new name.
* tree-parloops.c: Include omp-general.h.
(create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use
its new name.
(parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-ssa-loop.c: Include omp-general.h instead of omp-low.h.
(gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use
its new name.
* tree-vrp.c: Include omp-general.h instead of omp-low.h.
(extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and
get_oacc_fn_dim_size to use their new names.
* varpool.c: Include omp-offload.h instead of omp-low.h.
* gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in
ifiles.
* config/nvptx/nvptx.c: Include omp-general.c.
(nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use
its new name.
(nvptx_reorg): Likewise.
(nvptx_record_offload_symbol): Likewise.
gcc/c-family:
* c-omp.c: Include omp-general.h instead of omp-low.h.
(c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new
name.
gcc/c/
* c-parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(c_finish_oacc_routine): Adjusted call to
get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib
to use their new names.
(c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to
use its new name.
(c_parser_oacc_update): Likewise.
(c_parser_omp_simd): Likewise.
(c_parser_omp_target_update): Likewise.
* c-typeck.c: Include omp-general.h instead of omp-low.h.
(c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new
name.
(c_finish_omp_cancellation_point): Likewise.
* gimple-parser.c: Do not include omp-low.h
gcc/cp/
* parser.c: Include omp-general.h and omp-offload.h instead of
omp-low.h.
(cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new
name.
(cp_parser_omp_target_update): Likewise.
(cp_parser_oacc_declare): Likewise.
(cp_parser_oacc_enter_exit_data): Likewise.
(cp_parser_oacc_update): Likewise.
(cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib,
build_oacc_routine_dims and replace_oacc_fn_attrib to use their new
names.
* semantics.c: Include omp-general insteda of omp-low.h.
(finish_omp_for): Adjusted calls to find_omp_clause to use its new
name.
(finish_omp_cancel): Likewise.
(finish_omp_cancellation_point): Likewise.
fortran/
* trans-openmp.c: Include omp-general.h.
From-SVN: r243673
2016-12-14 23:30:41 +01:00
|
|
|
|
tree attr = oacc_get_fn_attrib (decl);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* OpenMP offloading does not set this attribute. */
|
|
|
|
|
tree dims = attr ? TREE_VALUE (attr) : NULL_TREE;
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
|
|
|
|
|
fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
|
|
|
|
|
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
for (; dims; dims = TREE_CHAIN (dims))
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
{
|
2015-11-18 22:33:57 +01:00
|
|
|
|
int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
|
2015-11-18 22:33:57 +01:00
|
|
|
|
gcc_assert (!TREE_PURPOSE (dims));
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
fprintf (asm_out_file, ", %#x", size);
|
|
|
|
|
}
|
2015-10-28 19:32:07 +01:00
|
|
|
|
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
fprintf (asm_out_file, "\n");
|
|
|
|
|
}
|
|
|
|
|
break;
|
2015-10-28 19:32:07 +01:00
|
|
|
|
|
gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
inlude/
* gomp-constants.h (GOMP_VERSION_NVIDIA_PTX): Increment.
(GOMP_DIM_GANG, GOMP_DIM_WORKER, GOMP_DIM_VECTOR, GOMP_DIM_MAX,
GOMP_DIM_MASK): New.
(GOMP_LAUNCH_DIM, GOMP_LAUNCH_ASYNC, GOMP_LAUNCH_WAIT): New.
(GOMP_LAUNCH_CODE_SHIFT, GOMP_LAUNCH_DEVICE_SHIFT,
GOMP_LAUNCH_OP_SHIFT): New.
(GOMP_LAUNCH_PACK, GOMP_LAUNCH_CODE, GOMP_LAUNCH_DEVICE,
GOMP_LAUNCH_OP): New.
(GOMP_LAUNCH_OP_MAX): New.
libgomp/
* libgomp.h (acc_dispatch_t): Replace separate geometry args with
array.
* libgomp.map (GOACC_parallel_keyed): New.
* oacc-parallel.c (goacc_wait): Take pointer to va_list. Adjust
all callers.
(GOACC_parallel_keyed): New interface. Lose geometry arguments
and take keyed varargs list. Adjust call to exec_func.
(GOACC_parallel): Force host fallback.
* libgomp_g.h (GOACC_parallel): Remove.
(GOACC_parallel_keyed): Declare.
* plugin/plugin-nvptx.c (struct targ_fn_launch): New struct.
(stuct targ_gn_descriptor): Replace name field with launch field.
(nvptx_exec): Lose separate geometry args, take array. Process
dynamic dimensions and adjust.
(struct nvptx_tdata): Replace fn_names field with fn_descs.
(GOMP_OFFLOAD_load_image): Adjust for change in function table
data.
(GOMP_OFFLOAD_openacc_parallel): Adjust for change in dimension
passing.
* oacc-host.c (host_openacc_exec): Adjust for change in dimension
passing.
gcc/
* config/nvptx/nvptx.c: Include omp-low.h and gomp-constants.h.
(nvptx_record_offload_symbol): Record function execution geometry.
* config/nvptx/mkoffload.c (process): Include launch geometry in
function data.
* omp-low.c (oacc_launch_pack): New.
(replace_oacc_fn_attrib): New.
(set_oacc_fn_attrib): New.
(get_oacc_fn_attrib): New.
(expand_omp_target): Create keyed varargs for GOACC_parallel call
generation.
* omp-low.h (get_oacc_fn_attrib): Declare.
* builtin-types.def (DEF_FUNCTION_TyPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* tree.h (OMP_CLAUSE_EXPR): New.
* omp-builtins.def (BUILT_IN_GOACC_PARALLEL): Change target fn name.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/c-family/
* c-common.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/fortran/
* f95-lang.c (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
* types.def (DEF_FUNCTION_TYPE_VAR_6): New.
(DEF_FUNCTION_TYPE_VAR_11): Delete.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_6): Define
From-SVN: r228220
2015-09-28 21:37:33 +02:00
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
2015-01-28 18:03:44 +01:00
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
/* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
|
|
|
|
|
at the start of a file. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_file_start (void)
|
|
|
|
|
{
|
|
|
|
|
fputs ("// BEGIN PREAMBLE\n", asm_out_file);
|
|
|
|
|
fputs ("\t.version\t3.1\n", asm_out_file);
|
|
|
|
|
fputs ("\t.target\tsm_30\n", asm_out_file);
|
|
|
|
|
fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
|
|
|
|
|
fputs ("// END PREAMBLE\n", asm_out_file);
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-18 15:39:52 +01:00
|
|
|
|
/* Emit a declaration for a worker-level buffer in .shared memory. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
write_worker_buffer (FILE *file, rtx sym, unsigned align, unsigned size)
|
|
|
|
|
{
|
|
|
|
|
const char *name = XSTR (sym, 0);
|
|
|
|
|
|
|
|
|
|
write_var_marker (file, true, false, name);
|
|
|
|
|
fprintf (file, ".shared .align %d .u8 %s[%d];\n",
|
|
|
|
|
align, name, size);
|
|
|
|
|
}
|
|
|
|
|
|
2015-07-22 17:30:14 +02:00
|
|
|
|
/* Write out the function declarations we've collected and declare storage
|
|
|
|
|
for the broadcast buffer. */
|
2014-11-10 17:12:42 +01:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_file_end (void)
|
|
|
|
|
{
|
2014-12-09 03:30:06 +01:00
|
|
|
|
hash_table<tree_hasher>::iterator iter;
|
|
|
|
|
tree decl;
|
|
|
|
|
FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab, decl, tree, iter)
|
2015-11-27 14:57:09 +01:00
|
|
|
|
nvptx_record_fndecl (decl);
|
2014-11-10 17:12:42 +01:00
|
|
|
|
fputs (func_decls.str().c_str(), asm_out_file);
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
|
|
|
|
if (worker_bcast_size)
|
2015-12-18 15:39:52 +01:00
|
|
|
|
write_worker_buffer (asm_out_file, worker_bcast_sym,
|
|
|
|
|
worker_bcast_align, worker_bcast_size);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
|
|
|
|
if (worker_red_size)
|
2015-12-18 15:39:52 +01:00
|
|
|
|
write_worker_buffer (asm_out_file, worker_red_sym,
|
|
|
|
|
worker_red_align, worker_red_size);
|
2016-11-16 18:17:00 +01:00
|
|
|
|
|
|
|
|
|
if (need_softstack_decl)
|
|
|
|
|
{
|
|
|
|
|
write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
|
|
|
|
|
/* 32 is the maximum number of warps in a block. Even though it's an
|
|
|
|
|
external declaration, emit the array size explicitly; otherwise, it
|
|
|
|
|
may fail at PTX JIT time if the definition is later in link order. */
|
|
|
|
|
fprintf (asm_out_file, ".extern .shared .u%d __nvptx_stacks[32];\n",
|
|
|
|
|
POINTER_SIZE);
|
|
|
|
|
}
|
|
|
|
|
if (need_unisimt_decl)
|
|
|
|
|
{
|
|
|
|
|
write_var_marker (asm_out_file, false, true, "__nvptx_uni");
|
|
|
|
|
fprintf (asm_out_file, ".extern .shared .u32 __nvptx_uni[32];\n");
|
|
|
|
|
}
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Expander for the shuffle builtins. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
|
|
|
|
|
{
|
|
|
|
|
if (ignore)
|
|
|
|
|
return target;
|
|
|
|
|
|
|
|
|
|
rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
|
|
|
|
|
NULL_RTX, mode, EXPAND_NORMAL);
|
|
|
|
|
if (!REG_P (src))
|
|
|
|
|
src = copy_to_mode_reg (mode, src);
|
|
|
|
|
|
|
|
|
|
rtx idx = expand_expr (CALL_EXPR_ARG (exp, 1),
|
|
|
|
|
NULL_RTX, SImode, EXPAND_NORMAL);
|
|
|
|
|
rtx op = expand_expr (CALL_EXPR_ARG (exp, 2),
|
|
|
|
|
NULL_RTX, SImode, EXPAND_NORMAL);
|
|
|
|
|
|
|
|
|
|
if (!REG_P (idx) && GET_CODE (idx) != CONST_INT)
|
|
|
|
|
idx = copy_to_mode_reg (SImode, idx);
|
|
|
|
|
|
2015-12-02 18:28:32 +01:00
|
|
|
|
rtx pat = nvptx_gen_shuffle (target, src, idx,
|
|
|
|
|
(nvptx_shuffle_kind) INTVAL (op));
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
if (pat)
|
|
|
|
|
emit_insn (pat);
|
|
|
|
|
|
|
|
|
|
return target;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Worker reduction address expander. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_expand_worker_addr (tree exp, rtx target,
|
|
|
|
|
machine_mode ARG_UNUSED (mode), int ignore)
|
|
|
|
|
{
|
|
|
|
|
if (ignore)
|
|
|
|
|
return target;
|
|
|
|
|
|
|
|
|
|
unsigned align = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 2));
|
|
|
|
|
if (align > worker_red_align)
|
|
|
|
|
worker_red_align = align;
|
|
|
|
|
|
|
|
|
|
unsigned offset = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 0));
|
|
|
|
|
unsigned size = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
|
|
|
|
|
if (size + offset > worker_red_size)
|
|
|
|
|
worker_red_size = size + offset;
|
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
rtx addr = worker_red_sym;
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
if (offset)
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
{
|
|
|
|
|
addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (offset));
|
|
|
|
|
addr = gen_rtx_CONST (Pmode, addr);
|
|
|
|
|
}
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
emit_move_insn (target, addr);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
|
|
|
|
return target;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Expand the CMP_SWAP PTX builtins. We have our own versions that do
|
|
|
|
|
not require taking the address of any object, other than the memory
|
|
|
|
|
cell being operated on. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_expand_cmp_swap (tree exp, rtx target,
|
|
|
|
|
machine_mode ARG_UNUSED (m), int ARG_UNUSED (ignore))
|
|
|
|
|
{
|
|
|
|
|
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
|
|
|
|
|
|
|
|
|
|
if (!target)
|
|
|
|
|
target = gen_reg_rtx (mode);
|
|
|
|
|
|
|
|
|
|
rtx mem = expand_expr (CALL_EXPR_ARG (exp, 0),
|
|
|
|
|
NULL_RTX, Pmode, EXPAND_NORMAL);
|
|
|
|
|
rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
|
|
|
|
|
NULL_RTX, mode, EXPAND_NORMAL);
|
|
|
|
|
rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
|
|
|
|
|
NULL_RTX, mode, EXPAND_NORMAL);
|
|
|
|
|
rtx pat;
|
|
|
|
|
|
|
|
|
|
mem = gen_rtx_MEM (mode, mem);
|
|
|
|
|
if (!REG_P (cmp))
|
|
|
|
|
cmp = copy_to_mode_reg (mode, cmp);
|
|
|
|
|
if (!REG_P (src))
|
|
|
|
|
src = copy_to_mode_reg (mode, src);
|
|
|
|
|
|
|
|
|
|
if (mode == SImode)
|
|
|
|
|
pat = gen_atomic_compare_and_swapsi_1 (target, mem, cmp, src, const0_rtx);
|
|
|
|
|
else
|
|
|
|
|
pat = gen_atomic_compare_and_swapdi_1 (target, mem, cmp, src, const0_rtx);
|
|
|
|
|
|
|
|
|
|
emit_insn (pat);
|
|
|
|
|
|
|
|
|
|
return target;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Codes for all the NVPTX builtins. */
|
|
|
|
|
enum nvptx_builtins
|
|
|
|
|
{
|
|
|
|
|
NVPTX_BUILTIN_SHUFFLE,
|
|
|
|
|
NVPTX_BUILTIN_SHUFFLELL,
|
|
|
|
|
NVPTX_BUILTIN_WORKER_ADDR,
|
|
|
|
|
NVPTX_BUILTIN_CMP_SWAP,
|
|
|
|
|
NVPTX_BUILTIN_CMP_SWAPLL,
|
|
|
|
|
NVPTX_BUILTIN_MAX
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
|
|
|
|
|
|
|
|
|
|
/* Return the NVPTX builtin for CODE. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
|
|
|
|
|
{
|
|
|
|
|
if (code >= NVPTX_BUILTIN_MAX)
|
|
|
|
|
return error_mark_node;
|
|
|
|
|
|
|
|
|
|
return nvptx_builtin_decls[code];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set up all builtin functions for this target. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_init_builtins (void)
|
|
|
|
|
{
|
|
|
|
|
#define DEF(ID, NAME, T) \
|
|
|
|
|
(nvptx_builtin_decls[NVPTX_BUILTIN_ ## ID] \
|
|
|
|
|
= add_builtin_function ("__builtin_nvptx_" NAME, \
|
|
|
|
|
build_function_type_list T, \
|
|
|
|
|
NVPTX_BUILTIN_ ## ID, BUILT_IN_MD, NULL, NULL))
|
|
|
|
|
#define ST sizetype
|
|
|
|
|
#define UINT unsigned_type_node
|
|
|
|
|
#define LLUINT long_long_unsigned_type_node
|
|
|
|
|
#define PTRVOID ptr_type_node
|
|
|
|
|
|
|
|
|
|
DEF (SHUFFLE, "shuffle", (UINT, UINT, UINT, UINT, NULL_TREE));
|
|
|
|
|
DEF (SHUFFLELL, "shufflell", (LLUINT, LLUINT, UINT, UINT, NULL_TREE));
|
|
|
|
|
DEF (WORKER_ADDR, "worker_addr",
|
|
|
|
|
(PTRVOID, ST, UINT, UINT, NULL_TREE));
|
|
|
|
|
DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE));
|
|
|
|
|
DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE));
|
|
|
|
|
|
|
|
|
|
#undef DEF
|
|
|
|
|
#undef ST
|
|
|
|
|
#undef UINT
|
|
|
|
|
#undef LLUINT
|
|
|
|
|
#undef PTRVOID
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Expand an expression EXP that calls a built-in function,
|
|
|
|
|
with result going to TARGET if that's convenient
|
|
|
|
|
(and in mode MODE if that's convenient).
|
|
|
|
|
SUBTARGET may be used as the target for computing one of EXP's operands.
|
|
|
|
|
IGNORE is nonzero if the value is to be ignored. */
|
|
|
|
|
|
|
|
|
|
static rtx
|
|
|
|
|
nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
|
|
|
|
|
machine_mode mode, int ignore)
|
|
|
|
|
{
|
|
|
|
|
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
|
|
|
|
|
switch (DECL_FUNCTION_CODE (fndecl))
|
|
|
|
|
{
|
|
|
|
|
case NVPTX_BUILTIN_SHUFFLE:
|
|
|
|
|
case NVPTX_BUILTIN_SHUFFLELL:
|
|
|
|
|
return nvptx_expand_shuffle (exp, target, mode, ignore);
|
|
|
|
|
|
|
|
|
|
case NVPTX_BUILTIN_WORKER_ADDR:
|
|
|
|
|
return nvptx_expand_worker_addr (exp, target, mode, ignore);
|
|
|
|
|
|
|
|
|
|
case NVPTX_BUILTIN_CMP_SWAP:
|
|
|
|
|
case NVPTX_BUILTIN_CMP_SWAPLL:
|
|
|
|
|
return nvptx_expand_cmp_swap (exp, target, mode, ignore);
|
|
|
|
|
|
|
|
|
|
default: gcc_unreachable ();
|
|
|
|
|
}
|
2014-11-10 17:12:42 +01:00
|
|
|
|
}
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
/* Define dimension sizes for known hardware. */
|
|
|
|
|
#define PTX_VECTOR_LENGTH 32
|
|
|
|
|
#define PTX_WORKER_LENGTH 32
|
2016-11-02 23:10:02 +01:00
|
|
|
|
#define PTX_GANG_DEFAULT 0 /* Defer to runtime. */
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
/* Implement TARGET_SIMT_VF target hook: number of threads in a warp. */
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
nvptx_simt_vf ()
|
|
|
|
|
{
|
|
|
|
|
return PTX_VECTOR_LENGTH;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-30 21:16:29 +02:00
|
|
|
|
/* Validate compute dimensions of an OpenACC offload or routine, fill
|
|
|
|
|
in non-unity defaults. FN_LEVEL indicates the level at which a
|
2016-02-01 17:20:13 +01:00
|
|
|
|
routine might spawn a loop. It is negative for non-routines. If
|
|
|
|
|
DECL is null, we are validating the default dimensions. */
|
2015-09-30 21:16:29 +02:00
|
|
|
|
|
|
|
|
|
static bool
|
2015-11-18 22:33:57 +01:00
|
|
|
|
nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
|
2015-09-30 21:16:29 +02:00
|
|
|
|
{
|
|
|
|
|
bool changed = false;
|
|
|
|
|
|
2015-11-04 21:48:05 +01:00
|
|
|
|
/* The vector size must be 32, unless this is a SEQ routine. */
|
2016-02-01 17:20:13 +01:00
|
|
|
|
if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
|
|
|
|
|
&& dims[GOMP_DIM_VECTOR] >= 0
|
2015-11-04 21:48:05 +01:00
|
|
|
|
&& dims[GOMP_DIM_VECTOR] != PTX_VECTOR_LENGTH)
|
|
|
|
|
{
|
2016-02-01 17:20:13 +01:00
|
|
|
|
if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0)
|
|
|
|
|
warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0,
|
2015-11-04 21:48:05 +01:00
|
|
|
|
dims[GOMP_DIM_VECTOR]
|
2017-02-28 16:25:19 +01:00
|
|
|
|
? G_("using vector_length (%d), ignoring %d")
|
|
|
|
|
: G_("using vector_length (%d), ignoring runtime setting"),
|
2015-11-04 21:48:05 +01:00
|
|
|
|
PTX_VECTOR_LENGTH, dims[GOMP_DIM_VECTOR]);
|
|
|
|
|
dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH;
|
|
|
|
|
changed = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check the num workers is not too large. */
|
|
|
|
|
if (dims[GOMP_DIM_WORKER] > PTX_WORKER_LENGTH)
|
|
|
|
|
{
|
2016-02-01 17:20:13 +01:00
|
|
|
|
warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0,
|
2015-11-04 21:48:05 +01:00
|
|
|
|
"using num_workers (%d), ignoring %d",
|
|
|
|
|
PTX_WORKER_LENGTH, dims[GOMP_DIM_WORKER]);
|
|
|
|
|
dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH;
|
|
|
|
|
changed = true;
|
|
|
|
|
}
|
2015-09-30 21:16:29 +02:00
|
|
|
|
|
2016-02-01 17:20:13 +01:00
|
|
|
|
if (!decl)
|
|
|
|
|
{
|
|
|
|
|
dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH;
|
|
|
|
|
if (dims[GOMP_DIM_WORKER] < 0)
|
|
|
|
|
dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH;
|
|
|
|
|
if (dims[GOMP_DIM_GANG] < 0)
|
|
|
|
|
dims[GOMP_DIM_GANG] = PTX_GANG_DEFAULT;
|
|
|
|
|
changed = true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-30 21:16:29 +02:00
|
|
|
|
return changed;
|
|
|
|
|
}
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
|
2015-11-05 14:50:13 +01:00
|
|
|
|
/* Return maximum dimension size, or zero for unbounded. */
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
nvptx_dim_limit (int axis)
|
|
|
|
|
{
|
|
|
|
|
switch (axis)
|
|
|
|
|
{
|
|
|
|
|
case GOMP_DIM_WORKER:
|
|
|
|
|
return PTX_WORKER_LENGTH;
|
|
|
|
|
|
|
|
|
|
case GOMP_DIM_VECTOR:
|
|
|
|
|
return PTX_VECTOR_LENGTH;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
/* Determine whether fork & joins are needed. */
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
nvptx_goacc_fork_join (gcall *call, const int dims[],
|
|
|
|
|
bool ARG_UNUSED (is_fork))
|
|
|
|
|
{
|
|
|
|
|
tree arg = gimple_call_arg (call, 2);
|
|
|
|
|
unsigned axis = TREE_INT_CST_LOW (arg);
|
|
|
|
|
|
|
|
|
|
/* We only care about worker and vector partitioning. */
|
|
|
|
|
if (axis < GOMP_DIM_WORKER)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* If the size is 1, there's no partitioning. */
|
|
|
|
|
if (dims[axis] == 1)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
/* Generate a PTX builtin function call that returns the address in
|
|
|
|
|
the worker reduction buffer at OFFSET. TYPE is the type of the
|
|
|
|
|
data at that location. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_get_worker_red_addr (tree type, tree offset)
|
|
|
|
|
{
|
|
|
|
|
machine_mode mode = TYPE_MODE (type);
|
|
|
|
|
tree fndecl = nvptx_builtin_decl (NVPTX_BUILTIN_WORKER_ADDR, true);
|
|
|
|
|
tree size = build_int_cst (unsigned_type_node, GET_MODE_SIZE (mode));
|
|
|
|
|
tree align = build_int_cst (unsigned_type_node,
|
|
|
|
|
GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
|
|
|
|
|
tree call = build_call_expr (fndecl, 3, offset, size, align);
|
|
|
|
|
|
|
|
|
|
return fold_convert (build_pointer_type (type), call);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit a SHFL.DOWN using index SHFL of VAR into DEST_VAR. This function
|
|
|
|
|
will cast the variable if necessary. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_generate_vector_shuffle (location_t loc,
|
|
|
|
|
tree dest_var, tree var, unsigned shift,
|
|
|
|
|
gimple_seq *seq)
|
|
|
|
|
{
|
|
|
|
|
unsigned fn = NVPTX_BUILTIN_SHUFFLE;
|
|
|
|
|
tree_code code = NOP_EXPR;
|
2015-11-13 16:08:11 +01:00
|
|
|
|
tree arg_type = unsigned_type_node;
|
|
|
|
|
tree var_type = TREE_TYPE (var);
|
|
|
|
|
tree dest_type = var_type;
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-11-13 16:08:11 +01:00
|
|
|
|
if (TREE_CODE (var_type) == COMPLEX_TYPE)
|
|
|
|
|
var_type = TREE_TYPE (var_type);
|
|
|
|
|
|
|
|
|
|
if (TREE_CODE (var_type) == REAL_TYPE)
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
code = VIEW_CONVERT_EXPR;
|
2015-11-13 16:08:11 +01:00
|
|
|
|
|
|
|
|
|
if (TYPE_SIZE (var_type)
|
|
|
|
|
== TYPE_SIZE (long_long_unsigned_type_node))
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
{
|
|
|
|
|
fn = NVPTX_BUILTIN_SHUFFLELL;
|
2015-11-13 16:08:11 +01:00
|
|
|
|
arg_type = long_long_unsigned_type_node;
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
}
|
2015-11-13 16:08:11 +01:00
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
tree call = nvptx_builtin_decl (fn, true);
|
2015-11-13 16:08:11 +01:00
|
|
|
|
tree bits = build_int_cst (unsigned_type_node, shift);
|
|
|
|
|
tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN);
|
|
|
|
|
tree expr;
|
|
|
|
|
|
|
|
|
|
if (var_type != dest_type)
|
|
|
|
|
{
|
|
|
|
|
/* Do real and imaginary parts separately. */
|
|
|
|
|
tree real = fold_build1 (REALPART_EXPR, var_type, var);
|
|
|
|
|
real = fold_build1 (code, arg_type, real);
|
|
|
|
|
real = build_call_expr_loc (loc, call, 3, real, bits, kind);
|
|
|
|
|
real = fold_build1 (code, var_type, real);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-11-13 16:08:11 +01:00
|
|
|
|
tree imag = fold_build1 (IMAGPART_EXPR, var_type, var);
|
|
|
|
|
imag = fold_build1 (code, arg_type, imag);
|
|
|
|
|
imag = build_call_expr_loc (loc, call, 3, imag, bits, kind);
|
|
|
|
|
imag = fold_build1 (code, var_type, imag);
|
|
|
|
|
|
|
|
|
|
expr = fold_build2 (COMPLEX_EXPR, dest_type, real, imag);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
expr = fold_build1 (code, arg_type, var);
|
|
|
|
|
expr = build_call_expr_loc (loc, call, 3, expr, bits, kind);
|
|
|
|
|
expr = fold_build1 (code, dest_type, expr);
|
|
|
|
|
}
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-11-13 16:08:11 +01:00
|
|
|
|
gimplify_assign (dest_var, expr, seq);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
/* Lazily generate the global lock var decl and return its address. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_global_lock_addr ()
|
|
|
|
|
{
|
|
|
|
|
tree v = global_lock_var;
|
|
|
|
|
|
|
|
|
|
if (!v)
|
|
|
|
|
{
|
|
|
|
|
tree name = get_identifier ("__reduction_lock");
|
|
|
|
|
tree type = build_qualified_type (unsigned_type_node,
|
|
|
|
|
TYPE_QUAL_VOLATILE);
|
|
|
|
|
v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type);
|
|
|
|
|
global_lock_var = v;
|
|
|
|
|
DECL_ARTIFICIAL (v) = 1;
|
|
|
|
|
DECL_EXTERNAL (v) = 1;
|
|
|
|
|
TREE_STATIC (v) = 1;
|
|
|
|
|
TREE_PUBLIC (v) = 1;
|
|
|
|
|
TREE_USED (v) = 1;
|
|
|
|
|
mark_addressable (v);
|
|
|
|
|
mark_decl_referenced (v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return build_fold_addr_expr (v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Insert code to locklessly update *PTR with *PTR OP VAR just before
|
|
|
|
|
GSI. We use a lockless scheme for nearly all case, which looks
|
|
|
|
|
like:
|
|
|
|
|
actual = initval(OP);
|
|
|
|
|
do {
|
|
|
|
|
guess = actual;
|
|
|
|
|
write = guess OP myval;
|
|
|
|
|
actual = cmp&swap (ptr, guess, write)
|
|
|
|
|
} while (actual bit-different-to guess);
|
|
|
|
|
return write;
|
|
|
|
|
|
|
|
|
|
This relies on a cmp&swap instruction, which is available for 32-
|
|
|
|
|
and 64-bit types. Larger types must use a locking scheme. */
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
|
|
|
tree ptr, tree var, tree_code op)
|
|
|
|
|
{
|
|
|
|
|
unsigned fn = NVPTX_BUILTIN_CMP_SWAP;
|
|
|
|
|
tree_code code = NOP_EXPR;
|
2015-11-18 14:49:17 +01:00
|
|
|
|
tree arg_type = unsigned_type_node;
|
|
|
|
|
tree var_type = TREE_TYPE (var);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
if (TREE_CODE (var_type) == COMPLEX_TYPE
|
|
|
|
|
|| TREE_CODE (var_type) == REAL_TYPE)
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
code = VIEW_CONVERT_EXPR;
|
2015-11-18 14:49:17 +01:00
|
|
|
|
|
|
|
|
|
if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node))
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
{
|
2015-11-18 14:49:17 +01:00
|
|
|
|
arg_type = long_long_unsigned_type_node;
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
fn = NVPTX_BUILTIN_CMP_SWAPLL;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
tree swap_fn = nvptx_builtin_decl (fn, true);
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
gimple_seq init_seq = NULL;
|
2015-11-18 14:49:17 +01:00
|
|
|
|
tree init_var = make_ssa_name (arg_type);
|
|
|
|
|
tree init_expr = omp_reduction_init_op (loc, op, var_type);
|
|
|
|
|
init_expr = fold_build1 (code, arg_type, init_expr);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
gimplify_assign (init_var, init_expr, &init_seq);
|
|
|
|
|
gimple *init_end = gimple_seq_last (init_seq);
|
|
|
|
|
|
|
|
|
|
gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
/* Split the block just after the init stmts. */
|
|
|
|
|
basic_block pre_bb = gsi_bb (*gsi);
|
|
|
|
|
edge pre_edge = split_block (pre_bb, init_end);
|
|
|
|
|
basic_block loop_bb = pre_edge->dest;
|
|
|
|
|
pre_bb = pre_edge->src;
|
|
|
|
|
/* Reset the iterator. */
|
|
|
|
|
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
tree expect_var = make_ssa_name (arg_type);
|
|
|
|
|
tree actual_var = make_ssa_name (arg_type);
|
|
|
|
|
tree write_var = make_ssa_name (arg_type);
|
|
|
|
|
|
|
|
|
|
/* Build and insert the reduction calculation. */
|
|
|
|
|
gimple_seq red_seq = NULL;
|
|
|
|
|
tree write_expr = fold_build1 (code, var_type, expect_var);
|
|
|
|
|
write_expr = fold_build2 (op, var_type, write_expr, var);
|
|
|
|
|
write_expr = fold_build1 (code, arg_type, write_expr);
|
|
|
|
|
gimplify_assign (write_var, write_expr, &red_seq);
|
|
|
|
|
|
|
|
|
|
gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
/* Build & insert the cmp&swap sequence. */
|
|
|
|
|
gimple_seq latch_seq = NULL;
|
|
|
|
|
tree swap_expr = build_call_expr_loc (loc, swap_fn, 3,
|
|
|
|
|
ptr, expect_var, write_var);
|
|
|
|
|
gimplify_assign (actual_var, swap_expr, &latch_seq);
|
|
|
|
|
|
|
|
|
|
gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var,
|
|
|
|
|
NULL_TREE, NULL_TREE);
|
|
|
|
|
gimple_seq_add_stmt (&latch_seq, cond);
|
|
|
|
|
|
|
|
|
|
gimple *latch_end = gimple_seq_last (latch_seq);
|
|
|
|
|
gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
/* Split the block just after the latch stmts. */
|
|
|
|
|
edge post_edge = split_block (loop_bb, latch_end);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
basic_block post_bb = post_edge->dest;
|
|
|
|
|
loop_bb = post_edge->src;
|
|
|
|
|
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
|
|
|
|
|
|
|
|
post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
|
|
|
|
|
edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
|
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
|
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
|
|
|
|
|
|
|
|
|
|
gphi *phi = create_phi_node (expect_var, loop_bb);
|
|
|
|
|
add_phi_arg (phi, init_var, pre_edge, loc);
|
|
|
|
|
add_phi_arg (phi, actual_var, loop_edge, loc);
|
|
|
|
|
|
|
|
|
|
loop *loop = alloc_loop ();
|
|
|
|
|
loop->header = loop_bb;
|
|
|
|
|
loop->latch = loop_bb;
|
|
|
|
|
add_loop (loop, loop_bb->loop_father);
|
|
|
|
|
|
2015-11-18 14:49:17 +01:00
|
|
|
|
return fold_build1 (code, var_type, write_var);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Insert code to lockfully update *PTR with *PTR OP VAR just before
|
|
|
|
|
GSI. This is necessary for types larger than 64 bits, where there
|
|
|
|
|
is no cmp&swap instruction to implement a lockless scheme. We use
|
|
|
|
|
a lock variable in global memory.
|
|
|
|
|
|
|
|
|
|
while (cmp&swap (&lock_var, 0, 1))
|
|
|
|
|
continue;
|
|
|
|
|
T accum = *ptr;
|
|
|
|
|
accum = accum OP var;
|
|
|
|
|
*ptr = accum;
|
|
|
|
|
cmp&swap (&lock_var, 1, 0);
|
|
|
|
|
return accum;
|
|
|
|
|
|
|
|
|
|
A lock in global memory is necessary to force execution engine
|
|
|
|
|
descheduling and avoid resource starvation that can occur if the
|
|
|
|
|
lock is in .shared memory. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
|
|
|
tree ptr, tree var, tree_code op)
|
|
|
|
|
{
|
|
|
|
|
tree var_type = TREE_TYPE (var);
|
|
|
|
|
tree swap_fn = nvptx_builtin_decl (NVPTX_BUILTIN_CMP_SWAP, true);
|
|
|
|
|
tree uns_unlocked = build_int_cst (unsigned_type_node, 0);
|
|
|
|
|
tree uns_locked = build_int_cst (unsigned_type_node, 1);
|
|
|
|
|
|
|
|
|
|
/* Split the block just before the gsi. Insert a gimple nop to make
|
|
|
|
|
this easier. */
|
|
|
|
|
gimple *nop = gimple_build_nop ();
|
|
|
|
|
gsi_insert_before (gsi, nop, GSI_SAME_STMT);
|
|
|
|
|
basic_block entry_bb = gsi_bb (*gsi);
|
|
|
|
|
edge entry_edge = split_block (entry_bb, nop);
|
|
|
|
|
basic_block lock_bb = entry_edge->dest;
|
|
|
|
|
/* Reset the iterator. */
|
|
|
|
|
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
|
|
|
|
|
|
|
|
/* Build and insert the locking sequence. */
|
|
|
|
|
gimple_seq lock_seq = NULL;
|
|
|
|
|
tree lock_var = make_ssa_name (unsigned_type_node);
|
|
|
|
|
tree lock_expr = nvptx_global_lock_addr ();
|
|
|
|
|
lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr,
|
|
|
|
|
uns_unlocked, uns_locked);
|
|
|
|
|
gimplify_assign (lock_var, lock_expr, &lock_seq);
|
|
|
|
|
gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked,
|
|
|
|
|
NULL_TREE, NULL_TREE);
|
|
|
|
|
gimple_seq_add_stmt (&lock_seq, cond);
|
|
|
|
|
gimple *lock_end = gimple_seq_last (lock_seq);
|
|
|
|
|
gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
/* Split the block just after the lock sequence. */
|
|
|
|
|
edge locked_edge = split_block (lock_bb, lock_end);
|
|
|
|
|
basic_block update_bb = locked_edge->dest;
|
|
|
|
|
lock_bb = locked_edge->src;
|
|
|
|
|
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
|
|
|
|
|
|
|
|
/* Create the lock loop ... */
|
|
|
|
|
locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
|
|
|
|
|
make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
|
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb);
|
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb);
|
|
|
|
|
|
|
|
|
|
/* ... and the loop structure. */
|
|
|
|
|
loop *lock_loop = alloc_loop ();
|
|
|
|
|
lock_loop->header = lock_bb;
|
|
|
|
|
lock_loop->latch = lock_bb;
|
|
|
|
|
lock_loop->nb_iterations_estimate = 1;
|
|
|
|
|
lock_loop->any_estimate = true;
|
|
|
|
|
add_loop (lock_loop, entry_bb->loop_father);
|
|
|
|
|
|
|
|
|
|
/* Build and insert the reduction calculation. */
|
|
|
|
|
gimple_seq red_seq = NULL;
|
|
|
|
|
tree acc_in = make_ssa_name (var_type);
|
|
|
|
|
tree ref_in = build_simple_mem_ref (ptr);
|
|
|
|
|
TREE_THIS_VOLATILE (ref_in) = 1;
|
|
|
|
|
gimplify_assign (acc_in, ref_in, &red_seq);
|
|
|
|
|
|
|
|
|
|
tree acc_out = make_ssa_name (var_type);
|
|
|
|
|
tree update_expr = fold_build2 (op, var_type, ref_in, var);
|
|
|
|
|
gimplify_assign (acc_out, update_expr, &red_seq);
|
|
|
|
|
|
|
|
|
|
tree ref_out = build_simple_mem_ref (ptr);
|
|
|
|
|
TREE_THIS_VOLATILE (ref_out) = 1;
|
|
|
|
|
gimplify_assign (ref_out, acc_out, &red_seq);
|
|
|
|
|
|
|
|
|
|
gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
/* Build & insert the unlock sequence. */
|
|
|
|
|
gimple_seq unlock_seq = NULL;
|
|
|
|
|
tree unlock_expr = nvptx_global_lock_addr ();
|
|
|
|
|
unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr,
|
|
|
|
|
uns_locked, uns_unlocked);
|
|
|
|
|
gimplify_and_add (unlock_expr, &unlock_seq);
|
|
|
|
|
gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
return acc_out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit a sequence to update a reduction accumlator at *PTR with the
|
|
|
|
|
value held in VAR using operator OP. Return the updated value.
|
|
|
|
|
|
|
|
|
|
TODO: optimize for atomic ops and indepedent complex ops. */
|
|
|
|
|
|
|
|
|
|
static tree
|
|
|
|
|
nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
|
|
|
tree ptr, tree var, tree_code op)
|
|
|
|
|
{
|
|
|
|
|
tree type = TREE_TYPE (var);
|
|
|
|
|
tree size = TYPE_SIZE (type);
|
|
|
|
|
|
|
|
|
|
if (size == TYPE_SIZE (unsigned_type_node)
|
|
|
|
|
|| size == TYPE_SIZE (long_long_unsigned_type_node))
|
|
|
|
|
return nvptx_lockless_update (loc, gsi, ptr, var, op);
|
|
|
|
|
else
|
|
|
|
|
return nvptx_lockfull_update (loc, gsi, ptr, var, op);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NVPTX implementation of GOACC_REDUCTION_SETUP. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_goacc_reduction_setup (gcall *call)
|
|
|
|
|
{
|
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
|
|
|
tree lhs = gimple_call_lhs (call);
|
|
|
|
|
tree var = gimple_call_arg (call, 2);
|
|
|
|
|
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
|
|
|
gimple_seq seq = NULL;
|
|
|
|
|
|
|
|
|
|
push_gimplify_context (true);
|
|
|
|
|
|
|
|
|
|
if (level != GOMP_DIM_GANG)
|
|
|
|
|
{
|
|
|
|
|
/* Copy the receiver object. */
|
|
|
|
|
tree ref_to_res = gimple_call_arg (call, 1);
|
|
|
|
|
|
|
|
|
|
if (!integer_zerop (ref_to_res))
|
|
|
|
|
var = build_simple_mem_ref (ref_to_res);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (level == GOMP_DIM_WORKER)
|
|
|
|
|
{
|
|
|
|
|
/* Store incoming value to worker reduction buffer. */
|
|
|
|
|
tree offset = gimple_call_arg (call, 5);
|
|
|
|
|
tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
|
|
|
|
|
tree ptr = make_ssa_name (TREE_TYPE (call));
|
|
|
|
|
|
|
|
|
|
gimplify_assign (ptr, call, &seq);
|
|
|
|
|
tree ref = build_simple_mem_ref (ptr);
|
|
|
|
|
TREE_THIS_VOLATILE (ref) = 1;
|
|
|
|
|
gimplify_assign (ref, var, &seq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lhs)
|
|
|
|
|
gimplify_assign (lhs, var, &seq);
|
|
|
|
|
|
|
|
|
|
pop_gimplify_context (NULL);
|
|
|
|
|
gsi_replace_with_seq (&gsi, seq, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NVPTX implementation of GOACC_REDUCTION_INIT. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_goacc_reduction_init (gcall *call)
|
|
|
|
|
{
|
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
|
|
|
tree lhs = gimple_call_lhs (call);
|
|
|
|
|
tree var = gimple_call_arg (call, 2);
|
|
|
|
|
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
|
|
|
enum tree_code rcode
|
|
|
|
|
= (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
|
|
|
|
|
tree init = omp_reduction_init_op (gimple_location (call), rcode,
|
|
|
|
|
TREE_TYPE (var));
|
|
|
|
|
gimple_seq seq = NULL;
|
|
|
|
|
|
|
|
|
|
push_gimplify_context (true);
|
|
|
|
|
|
|
|
|
|
if (level == GOMP_DIM_VECTOR)
|
|
|
|
|
{
|
|
|
|
|
/* Initialize vector-non-zeroes to INIT_VAL (OP). */
|
|
|
|
|
tree tid = make_ssa_name (integer_type_node);
|
|
|
|
|
tree dim_vector = gimple_call_arg (call, 3);
|
|
|
|
|
gimple *tid_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1,
|
|
|
|
|
dim_vector);
|
|
|
|
|
gimple *cond_stmt = gimple_build_cond (NE_EXPR, tid, integer_zero_node,
|
|
|
|
|
NULL_TREE, NULL_TREE);
|
|
|
|
|
|
|
|
|
|
gimple_call_set_lhs (tid_call, tid);
|
|
|
|
|
gimple_seq_add_stmt (&seq, tid_call);
|
|
|
|
|
gimple_seq_add_stmt (&seq, cond_stmt);
|
|
|
|
|
|
|
|
|
|
/* Split the block just after the call. */
|
|
|
|
|
edge init_edge = split_block (gsi_bb (gsi), call);
|
|
|
|
|
basic_block init_bb = init_edge->dest;
|
|
|
|
|
basic_block call_bb = init_edge->src;
|
|
|
|
|
|
|
|
|
|
/* Fixup flags from call_bb to init_bb. */
|
|
|
|
|
init_edge->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
|
|
|
|
|
|
|
|
|
|
/* Set the initialization stmts. */
|
|
|
|
|
gimple_seq init_seq = NULL;
|
|
|
|
|
tree init_var = make_ssa_name (TREE_TYPE (var));
|
|
|
|
|
gimplify_assign (init_var, init, &init_seq);
|
|
|
|
|
gsi = gsi_start_bb (init_bb);
|
|
|
|
|
gsi_insert_seq_before (&gsi, init_seq, GSI_SAME_STMT);
|
|
|
|
|
|
|
|
|
|
/* Split block just after the init stmt. */
|
|
|
|
|
gsi_prev (&gsi);
|
|
|
|
|
edge inited_edge = split_block (gsi_bb (gsi), gsi_stmt (gsi));
|
|
|
|
|
basic_block dst_bb = inited_edge->dest;
|
|
|
|
|
|
|
|
|
|
/* Create false edge from call_bb to dst_bb. */
|
|
|
|
|
edge nop_edge = make_edge (call_bb, dst_bb, EDGE_FALSE_VALUE);
|
|
|
|
|
|
|
|
|
|
/* Create phi node in dst block. */
|
|
|
|
|
gphi *phi = create_phi_node (lhs, dst_bb);
|
|
|
|
|
add_phi_arg (phi, init_var, inited_edge, gimple_location (call));
|
|
|
|
|
add_phi_arg (phi, var, nop_edge, gimple_location (call));
|
|
|
|
|
|
|
|
|
|
/* Reset dominator of dst bb. */
|
|
|
|
|
set_immediate_dominator (CDI_DOMINATORS, dst_bb, call_bb);
|
|
|
|
|
|
|
|
|
|
/* Reset the gsi. */
|
|
|
|
|
gsi = gsi_for_stmt (call);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (level == GOMP_DIM_GANG)
|
|
|
|
|
{
|
|
|
|
|
/* If there's no receiver object, propagate the incoming VAR. */
|
|
|
|
|
tree ref_to_res = gimple_call_arg (call, 1);
|
|
|
|
|
if (integer_zerop (ref_to_res))
|
|
|
|
|
init = var;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gimplify_assign (lhs, init, &seq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pop_gimplify_context (NULL);
|
|
|
|
|
gsi_replace_with_seq (&gsi, seq, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NVPTX implementation of GOACC_REDUCTION_FINI. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_goacc_reduction_fini (gcall *call)
|
|
|
|
|
{
|
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
|
|
|
tree lhs = gimple_call_lhs (call);
|
|
|
|
|
tree ref_to_res = gimple_call_arg (call, 1);
|
|
|
|
|
tree var = gimple_call_arg (call, 2);
|
|
|
|
|
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
|
|
|
enum tree_code op
|
|
|
|
|
= (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
|
|
|
|
|
gimple_seq seq = NULL;
|
|
|
|
|
tree r = NULL_TREE;;
|
|
|
|
|
|
|
|
|
|
push_gimplify_context (true);
|
|
|
|
|
|
|
|
|
|
if (level == GOMP_DIM_VECTOR)
|
|
|
|
|
{
|
|
|
|
|
/* Emit binary shuffle tree. TODO. Emit this as an actual loop,
|
|
|
|
|
but that requires a method of emitting a unified jump at the
|
|
|
|
|
gimple level. */
|
|
|
|
|
for (int shfl = PTX_VECTOR_LENGTH / 2; shfl > 0; shfl = shfl >> 1)
|
|
|
|
|
{
|
|
|
|
|
tree other_var = make_ssa_name (TREE_TYPE (var));
|
|
|
|
|
nvptx_generate_vector_shuffle (gimple_location (call),
|
|
|
|
|
other_var, var, shfl, &seq);
|
|
|
|
|
|
|
|
|
|
r = make_ssa_name (TREE_TYPE (var));
|
|
|
|
|
gimplify_assign (r, fold_build2 (op, TREE_TYPE (var),
|
|
|
|
|
var, other_var), &seq);
|
|
|
|
|
var = r;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
tree accum = NULL_TREE;
|
|
|
|
|
|
|
|
|
|
if (level == GOMP_DIM_WORKER)
|
|
|
|
|
{
|
|
|
|
|
/* Get reduction buffer address. */
|
|
|
|
|
tree offset = gimple_call_arg (call, 5);
|
|
|
|
|
tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
|
|
|
|
|
tree ptr = make_ssa_name (TREE_TYPE (call));
|
|
|
|
|
|
|
|
|
|
gimplify_assign (ptr, call, &seq);
|
|
|
|
|
accum = ptr;
|
|
|
|
|
}
|
|
|
|
|
else if (integer_zerop (ref_to_res))
|
|
|
|
|
r = var;
|
|
|
|
|
else
|
|
|
|
|
accum = ref_to_res;
|
|
|
|
|
|
|
|
|
|
if (accum)
|
|
|
|
|
{
|
2015-11-18 14:49:17 +01:00
|
|
|
|
/* UPDATE the accumulator. */
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
|
|
|
seq = NULL;
|
2015-11-18 14:49:17 +01:00
|
|
|
|
r = nvptx_reduction_update (gimple_location (call), &gsi,
|
|
|
|
|
accum, var, op);
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lhs)
|
|
|
|
|
gimplify_assign (lhs, r, &seq);
|
|
|
|
|
pop_gimplify_context (NULL);
|
|
|
|
|
|
|
|
|
|
gsi_replace_with_seq (&gsi, seq, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NVPTX implementation of GOACC_REDUCTION_TEARDOWN. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvptx_goacc_reduction_teardown (gcall *call)
|
|
|
|
|
{
|
|
|
|
|
gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
|
|
|
tree lhs = gimple_call_lhs (call);
|
|
|
|
|
tree var = gimple_call_arg (call, 2);
|
|
|
|
|
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
|
|
|
gimple_seq seq = NULL;
|
|
|
|
|
|
|
|
|
|
push_gimplify_context (true);
|
|
|
|
|
if (level == GOMP_DIM_WORKER)
|
|
|
|
|
{
|
|
|
|
|
/* Read the worker reduction buffer. */
|
|
|
|
|
tree offset = gimple_call_arg (call, 5);
|
|
|
|
|
tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), offset);
|
|
|
|
|
tree ptr = make_ssa_name (TREE_TYPE (call));
|
|
|
|
|
|
|
|
|
|
gimplify_assign (ptr, call, &seq);
|
|
|
|
|
var = build_simple_mem_ref (ptr);
|
|
|
|
|
TREE_THIS_VOLATILE (var) = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (level != GOMP_DIM_GANG)
|
|
|
|
|
{
|
|
|
|
|
/* Write to the receiver object. */
|
|
|
|
|
tree ref_to_res = gimple_call_arg (call, 1);
|
|
|
|
|
|
|
|
|
|
if (!integer_zerop (ref_to_res))
|
|
|
|
|
gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lhs)
|
|
|
|
|
gimplify_assign (lhs, var, &seq);
|
|
|
|
|
|
|
|
|
|
pop_gimplify_context (NULL);
|
|
|
|
|
|
|
|
|
|
gsi_replace_with_seq (&gsi, seq, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NVPTX reduction expander. */
|
|
|
|
|
|
2015-12-16 21:02:02 +01:00
|
|
|
|
static void
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
nvptx_goacc_reduction (gcall *call)
|
|
|
|
|
{
|
|
|
|
|
unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
|
|
|
|
|
|
|
|
|
|
switch (code)
|
|
|
|
|
{
|
|
|
|
|
case IFN_GOACC_REDUCTION_SETUP:
|
|
|
|
|
nvptx_goacc_reduction_setup (call);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case IFN_GOACC_REDUCTION_INIT:
|
|
|
|
|
nvptx_goacc_reduction_init (call);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case IFN_GOACC_REDUCTION_FINI:
|
|
|
|
|
nvptx_goacc_reduction_fini (call);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case IFN_GOACC_REDUCTION_TEARDOWN:
|
|
|
|
|
nvptx_goacc_reduction_teardown (call);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gcc_unreachable ();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#undef TARGET_OPTION_OVERRIDE
|
|
|
|
|
#define TARGET_OPTION_OVERRIDE nvptx_option_override
|
|
|
|
|
|
|
|
|
|
#undef TARGET_ATTRIBUTE_TABLE
|
|
|
|
|
#define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
|
|
|
|
|
|
2016-09-14 12:46:12 +02:00
|
|
|
|
#undef TARGET_LRA_P
|
|
|
|
|
#define TARGET_LRA_P hook_bool_void_false
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#undef TARGET_LEGITIMATE_ADDRESS_P
|
|
|
|
|
#define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
|
|
|
|
|
|
|
|
|
|
#undef TARGET_PROMOTE_FUNCTION_MODE
|
|
|
|
|
#define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
|
|
|
|
|
|
|
|
|
|
#undef TARGET_FUNCTION_ARG
|
|
|
|
|
#define TARGET_FUNCTION_ARG nvptx_function_arg
|
|
|
|
|
#undef TARGET_FUNCTION_INCOMING_ARG
|
|
|
|
|
#define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
|
|
|
|
|
#undef TARGET_FUNCTION_ARG_ADVANCE
|
|
|
|
|
#define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
|
2016-05-17 23:16:49 +02:00
|
|
|
|
#undef TARGET_FUNCTION_ARG_BOUNDARY
|
|
|
|
|
#define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#undef TARGET_PASS_BY_REFERENCE
|
|
|
|
|
#define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
|
|
|
|
|
#undef TARGET_FUNCTION_VALUE_REGNO_P
|
|
|
|
|
#define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
|
|
|
|
|
#undef TARGET_FUNCTION_VALUE
|
|
|
|
|
#define TARGET_FUNCTION_VALUE nvptx_function_value
|
|
|
|
|
#undef TARGET_LIBCALL_VALUE
|
|
|
|
|
#define TARGET_LIBCALL_VALUE nvptx_libcall_value
|
|
|
|
|
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
|
|
|
|
|
#define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
|
2015-08-21 21:34:34 +02:00
|
|
|
|
#undef TARGET_GET_DRAP_RTX
|
|
|
|
|
#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#undef TARGET_SPLIT_COMPLEX_ARG
|
|
|
|
|
#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
|
|
|
|
|
#undef TARGET_RETURN_IN_MEMORY
|
|
|
|
|
#define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
|
|
|
|
|
#undef TARGET_OMIT_STRUCT_RETURN_REG
|
|
|
|
|
#define TARGET_OMIT_STRUCT_RETURN_REG true
|
|
|
|
|
#undef TARGET_STRICT_ARGUMENT_NAMING
|
|
|
|
|
#define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
|
|
|
|
|
#undef TARGET_CALL_ARGS
|
|
|
|
|
#define TARGET_CALL_ARGS nvptx_call_args
|
|
|
|
|
#undef TARGET_END_CALL_ARGS
|
|
|
|
|
#define TARGET_END_CALL_ARGS nvptx_end_call_args
|
|
|
|
|
|
|
|
|
|
#undef TARGET_ASM_FILE_START
|
|
|
|
|
#define TARGET_ASM_FILE_START nvptx_file_start
|
|
|
|
|
#undef TARGET_ASM_FILE_END
|
|
|
|
|
#define TARGET_ASM_FILE_END nvptx_file_end
|
|
|
|
|
#undef TARGET_ASM_GLOBALIZE_LABEL
|
|
|
|
|
#define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
|
|
|
|
|
#undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
|
|
|
|
|
#define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
|
|
|
|
|
#undef TARGET_PRINT_OPERAND
|
|
|
|
|
#define TARGET_PRINT_OPERAND nvptx_print_operand
|
|
|
|
|
#undef TARGET_PRINT_OPERAND_ADDRESS
|
|
|
|
|
#define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
|
|
|
|
|
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
|
|
|
|
|
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
|
|
|
|
|
#undef TARGET_ASM_INTEGER
|
|
|
|
|
#define TARGET_ASM_INTEGER nvptx_assemble_integer
|
|
|
|
|
#undef TARGET_ASM_DECL_END
|
|
|
|
|
#define TARGET_ASM_DECL_END nvptx_assemble_decl_end
|
|
|
|
|
#undef TARGET_ASM_DECLARE_CONSTANT_NAME
|
|
|
|
|
#define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
|
|
|
|
|
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
|
|
|
|
|
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
|
|
|
|
|
#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
|
|
|
|
|
#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
|
|
|
|
|
|
|
|
|
|
#undef TARGET_MACHINE_DEPENDENT_REORG
|
|
|
|
|
#define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
|
|
|
|
|
#undef TARGET_NO_REGISTER_ALLOCATION
|
|
|
|
|
#define TARGET_NO_REGISTER_ALLOCATION true
|
|
|
|
|
|
nvptx-protos.h (npvptx_section_from_addr_space): Delete.
gcc/
* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space): Delete.
* config/nvptx/nvptx.c (enum nvptx_data_area): New.
(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
(nvptx_option_override): Set data ares for worker vars.
(nvptx_addr_space_from_sym): Delete.
(nvptx_encode_section_info): New.
(section_for_sym, section_for_decl): New.
(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
flags,
(nvptx_section_from_addr_space): Delete.
(nvptx_section_for_decl): Delete.
(nvptx_output_aligned, nvptx_declare_object_name,
nvptx_assemble_undefined_decl): Use section_for_decl, remove
unnecessary checks.
(nvptx_print_operand): Add 'D', adjust 'A'.
(nvptx_expand_worker_addr): Adjust unspec generation.
(TARGET_ENCODE_SECTION_INFO): Override.
* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
(UNSPEC_TO_GENERIC): New.
(nvptx_register_or_symbolic_operand): Delete.
(cvt_code, cvt_name, cvt_str): Delete.
(convaddr_<cvt_name><mode> [P]): Delete.
(convaddr_<mode> [P]): New.
gcc/testsuite/
* gcc.target/nvptx/decl.c: New.
* gcc.target/nvptx/uninit-decl.c: Robustify regexps.
From-SVN: r231227
2015-12-03 14:59:20 +01:00
|
|
|
|
#undef TARGET_ENCODE_SECTION_INFO
|
|
|
|
|
#define TARGET_ENCODE_SECTION_INFO nvptx_encode_section_info
|
2015-01-28 18:03:44 +01:00
|
|
|
|
#undef TARGET_RECORD_OFFLOAD_SYMBOL
|
|
|
|
|
#define TARGET_RECORD_OFFLOAD_SYMBOL nvptx_record_offload_symbol
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
#undef TARGET_VECTOR_ALIGNMENT
|
|
|
|
|
#define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
#undef TARGET_CANNOT_COPY_INSN_P
|
|
|
|
|
#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
|
|
|
|
|
|
2015-11-20 15:23:18 +01:00
|
|
|
|
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
|
|
|
|
|
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P nvptx_use_anchors_for_symbol_p
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
#undef TARGET_INIT_BUILTINS
|
|
|
|
|
#define TARGET_INIT_BUILTINS nvptx_init_builtins
|
|
|
|
|
#undef TARGET_EXPAND_BUILTIN
|
|
|
|
|
#define TARGET_EXPAND_BUILTIN nvptx_expand_builtin
|
|
|
|
|
#undef TARGET_BUILTIN_DECL
|
|
|
|
|
#define TARGET_BUILTIN_DECL nvptx_builtin_decl
|
|
|
|
|
|
2016-11-16 18:17:00 +01:00
|
|
|
|
#undef TARGET_SIMT_VF
|
|
|
|
|
#define TARGET_SIMT_VF nvptx_simt_vf
|
|
|
|
|
|
2015-09-30 21:16:29 +02:00
|
|
|
|
#undef TARGET_GOACC_VALIDATE_DIMS
|
|
|
|
|
#define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
|
|
|
|
|
|
2015-11-05 14:50:13 +01:00
|
|
|
|
#undef TARGET_GOACC_DIM_LIMIT
|
|
|
|
|
#define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
|
|
|
|
|
|
nvptx.h (struct machine_function): Add axis_predicate.
* config/nvptx/nvptx.h (struct machine_function): Add
axis_predicate.
* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_fork,
nvptx_expand_oacc_join): Declare.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID): Delete.
(UNSPEC_DIM_SIZE, UNSPEC_SHARED_DATA, UNSPEC_BIT_CONV,
UNSPEC_SHUFFLE, UNSPEC_BR_UNIFIED): New.
(UNSPECV_BARSYNC, UNSPECV_DIM_POS, UNSPECV_FORK, UNSPECV_FORKED,
UNSPECV_JOINING, UNSPECV_JOIN): New.
(BITS, BITD): New mode iterators.
(br_true_uni, br_false_uni): New.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid): Delete.
(oacc_dim_size, oacc_dim_pos): New.
(nvptx_fork, nvptx_forked, nvptx_joining, nvptx_join): New.
(oacc_fork, oacc_join): New.
(nvptx_shuffle<mode>, unpack<mode>si2, packsi<mode>2): New.
(worker_load<mode>, worker_store<mode>): New.
(nvptx_barsync): New.
* config/nvptx/nvptx.c: Include gimple.h & dumpfile.h.
(SHUFFLE_UP, SHUFFLE_DOWN, SHUFFLE_BFLY, SHUFFLE_IDX): Define.
(worker_bcast_hwm, worker_bcast_align, worker_bcast_name,
worker_bcast_sym): New.
(nvptx_option_override): Initialize worker broadcast buffer.
(nvptx_emit_forking, nvptx_emit_joining): New.
(nvptx_init_axis_predicate): New.
(nvptx_declare_function_name): Init axis predicates.
(nvptx_expand_call): Add fork/join markers around routine call.
(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): New.
(nvptx_gen_unpack, nvptx_gen_pack, nvptx_gen_shuffle): New.
(nvptx_gen_vcast): New.
(struct wcast_data_t): New.
(enum propagate_mask): New.
(nvptx_gen_wcast): New.
(nvptx_print_operand): Add 'S' case.
(struct parallel): New.
(parallel::parallel, parallel::~parallel): New.
(bb_insn_map_t, insn_bb_t, insn_bb_vec_t): New typedefs.
(nvptx_split_blocks, nvptx_discover_pre, nvptx_dump_pars,
nvptx_find_par, nvptx_discover_pars): New.
(nvptx_propagate): New.
(vprop_gen, nvptx_vpropagate): New.
(wprop_gen, nvptx_wpropagate): New.
(nvptx_wsync): New.
(nvptx_single, nvptx_skip_par): New.
(nvptx_process_pars, nvptx_neuter_pars): New.
(ntptx_reorg): Split blocks, generate parallel structure, apply
neutering.
(nvptx_cannot_copy_insn_p): New.
(nvptx_file_end): Emit worker broadcast decl.
(nvptx_goacc_fork_join): New.
(TARGET_CANNOT_COPY_INSN_P): Override.
(TARGET_GOACC_FORK_JOIN): Override.
From-SVN: r229486
2015-10-28 15:24:39 +01:00
|
|
|
|
#undef TARGET_GOACC_FORK_JOIN
|
|
|
|
|
#define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
|
|
|
|
|
|
nvptx.c: Include gimple headers.
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r229768
2015-11-04 17:58:52 +01:00
|
|
|
|
#undef TARGET_GOACC_REDUCTION
|
|
|
|
|
#define TARGET_GOACC_REDUCTION nvptx_goacc_reduction
|
|
|
|
|
|
2014-11-10 17:12:42 +01:00
|
|
|
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
|
|
|
|
|
|
|
|
|
#include "gt-nvptx.h"
|