RISC-V: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO
The Zbb support has introduced ctz and clz to the backend, but some transformations in GCC need to know what the value of c[lt]z at zero is. This affects how the optab is generated and may suppress use of CLZ/CTZ in tree passes. Among other things, this is needed for the transformation of table-based ctz-implementations, such as in deepsjeng, to work (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90838). Prior to this change, the test case from PR90838 would compile to on RISC-V targets with Zbb: myctz: lui a4,%hi(.LC0) ld a4,%lo(.LC0)(a4) neg a5,a0 and a5,a5,a0 mul a5,a5,a4 lui a4,%hi(.LANCHOR0) addi a4,a4,%lo(.LANCHOR0) srli a5,a5,58 sh2add a5,a5,a4 lw a0,0(a5) ret After this change, we get: myctz: ctz a0,a0 andi a0,a0,63 ret Testing this with deepsjeng_r (from SPEC 2017) against QEMU, this shows a clear reduction in dynamic instruction count: - before 1961888067076 - after 1907928279874 (2.75% reduction) This also merges the various target-specific test-cases (for x86-64, aarch64 and riscv) within gcc.dg/pr90838.c. This extends the macros (i.e., effective-target keywords) used in testing (lib/target-supports.exp) to reliably distinguish between RV32 and RV64 via __riscv_xlen (i.e., the integer register bitwidth) : testing for ILP32 could be misleading (as ILP32 is a valid memory model for 64bit systems). gcc/ChangeLog: * config/riscv/riscv.h (CLZ_DEFINED_VALUE_AT_ZERO): Implement. (CTZ_DEFINED_VALUE_AT_ZERO): Same. * doc/sourcebuild.texi: add documentation for RISC-V specific test target keywords gcc/testsuite/ChangeLog: * gcc.dg/pr90838.c: Add additional flags (dg-additional-options) when compiling for riscv64 and subsume gcc.target/aarch64/pr90838.c and gcc.target/i386/pr95863-2.c. * gcc.target/aarch64/pr90838.c: Removed. * gcc.target/i386/pr95863-2.c: Removed. * lib/target-supports.exp: Recognize RV32 or RV64 via XLEN Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> Signed-off-by: Manolis Tsamis <manolis.tsamis@vrull.eu> Co-authored-by: Manolis Tsamis <manolis.tsamis@vrull.eu>
This commit is contained in:
parent
7f40ac326a
commit
16f7fcadac
|
@ -1004,4 +1004,9 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
|
|||
|
||||
#define HARD_REGNO_RENAME_OK(FROM, TO) riscv_hard_regno_rename_ok (FROM, TO)
|
||||
|
||||
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||
|
||||
#endif /* ! GCC_RISCV_H */
|
||||
|
|
|
@ -2420,6 +2420,18 @@ PowerPC target pre-defines macro _ARCH_PWR9 which means the @code{-mcpu}
|
|||
setting is Power9 or later.
|
||||
@end table
|
||||
|
||||
@subsection RISC-V specific attributes
|
||||
|
||||
@table @code
|
||||
|
||||
@item rv32
|
||||
Test system has an integer register width of 32 bits.
|
||||
|
||||
@item rv64
|
||||
Test system has an integer register width of 64 bits.
|
||||
|
||||
@end table
|
||||
|
||||
@subsubsection Other hardware attributes
|
||||
|
||||
@c Please keep this table sorted alphabetically.
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-forwprop2-details" } */
|
||||
/* { dg-additional-options "-mbmi" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } */
|
||||
/* { dg-additional-options "-march=rv64gc_zbb" { target { rv64 } } } */
|
||||
/* { dg-additional-options "-march=rv32gc_zbb" { target { rv32 } } } */
|
||||
|
||||
int ctz1 (unsigned x)
|
||||
{
|
||||
|
@ -56,4 +59,26 @@ int ctz4 (unsigned long x)
|
|||
return table[(lsb * magic) >> 58];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-times "tzcntq\t" 1 { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-times "tzcntl\t" 3 { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-times "andl\t" 2 { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-not "negq" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-not "imulq" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
/* { dg-final { scan-assembler-not "shrq" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target aarch64*-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times "clz\t" 4 { target aarch64*-*-* } } } */
|
||||
/* { dg-final { scan-assembler-times "and\t" 2 { target aarch64*-*-* } } } */
|
||||
/* { dg-final { scan-assembler-not "cmp\t.*0" { target aarch64*-*-* } } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { rv64 } } } } */
|
||||
/* { dg-final { scan-assembler-times "ctz\t" 1 { target { rv64 } } } } */
|
||||
/* { dg-final { scan-assembler-times "ctzw\t" 3 { target { rv64 } } } } */
|
||||
/* { dg-final { scan-assembler-times "andi\t" 2 { target { rv64 } } } } */
|
||||
/* { dg-final { scan-assembler-not "mul" { target { rv64 } } } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times {= \.CTZ} 3 "forwprop2" { target { rv32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
int ctz1 (unsigned x)
|
||||
{
|
||||
static const char table[32] =
|
||||
{
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
};
|
||||
|
||||
return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
|
||||
}
|
||||
|
||||
int ctz2 (unsigned x)
|
||||
{
|
||||
#define u 0
|
||||
static short table[64] =
|
||||
{
|
||||
32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
|
||||
10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
|
||||
31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
|
||||
30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
|
||||
};
|
||||
|
||||
x = (x & -x) * 0x0450FBAF;
|
||||
return table[x >> 26];
|
||||
}
|
||||
|
||||
int ctz3 (unsigned x)
|
||||
{
|
||||
static int table[32] =
|
||||
{
|
||||
0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
|
||||
31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
|
||||
};
|
||||
|
||||
if (x == 0) return 32;
|
||||
x = (x & -x) * 0x04D7651F;
|
||||
return table[x >> 27];
|
||||
}
|
||||
|
||||
static const unsigned long long magic = 0x03f08c5392f756cdULL;
|
||||
|
||||
static const char table[64] = {
|
||||
0, 1, 12, 2, 13, 22, 17, 3,
|
||||
14, 33, 23, 36, 18, 58, 28, 4,
|
||||
62, 15, 34, 26, 24, 48, 50, 37,
|
||||
19, 55, 59, 52, 29, 44, 39, 5,
|
||||
63, 11, 21, 16, 32, 35, 57, 27,
|
||||
61, 25, 47, 49, 54, 51, 43, 38,
|
||||
10, 20, 31, 56, 60, 46, 53, 42,
|
||||
9, 30, 45, 41, 8, 40, 7, 6,
|
||||
};
|
||||
|
||||
int ctz4 (unsigned long x)
|
||||
{
|
||||
unsigned long lsb = x & -x;
|
||||
return table[(lsb * magic) >> 58];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "clz\t" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "and\t" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "cmp\t.*0" } } */
|
|
@ -1,27 +0,0 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O -mbmi" } */
|
||||
|
||||
static const unsigned long long magic = 0x03f08c5392f756cdULL;
|
||||
|
||||
static const char table[64] = {
|
||||
0, 1, 12, 2, 13, 22, 17, 3,
|
||||
14, 33, 23, 36, 18, 58, 28, 4,
|
||||
62, 15, 34, 26, 24, 48, 50, 37,
|
||||
19, 55, 59, 52, 29, 44, 39, 5,
|
||||
63, 11, 21, 16, 32, 35, 57, 27,
|
||||
61, 25, 47, 49, 54, 51, 43, 38,
|
||||
10, 20, 31, 56, 60, 46, 53, 42,
|
||||
9, 30, 45, 41, 8, 40, 7, 6,
|
||||
};
|
||||
|
||||
int ctz4 (unsigned long long x)
|
||||
{
|
||||
unsigned long long lsb = x & -x;
|
||||
return table[(lsb * magic) >> 58];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "andl\t" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "negq" } } */
|
||||
/* { dg-final { scan-assembler-not "imulq" } } */
|
||||
/* { dg-final { scan-assembler-not "shrq" } } */
|
|
@ -1689,6 +1689,36 @@ proc check_linker_plugin_available { } {
|
|||
} "-flto -fuse-linker-plugin"]
|
||||
}
|
||||
|
||||
# Return 1 if the target is RV32, 0 otherwise. Cache the result.
|
||||
|
||||
proc check_effective_target_rv32 { } {
|
||||
# Check that we are compiling for RV32 by checking the xlen size.
|
||||
return [check_no_compiler_messages riscv_rv32 assembly {
|
||||
#if !defined(__riscv_xlen)
|
||||
#error "__riscv_xlen not defined!"
|
||||
#else
|
||||
#if __riscv_xlen != 32
|
||||
#error "Not RV32"
|
||||
#endif
|
||||
#endif
|
||||
}]
|
||||
}
|
||||
|
||||
# Return 1 if the target is RV64, 0 otherwise. Cache the result.
|
||||
|
||||
proc check_effective_target_rv64 { } {
|
||||
# Check that we are compiling for RV64 by checking the xlen size.
|
||||
return [check_no_compiler_messages riscv_rv64 assembly {
|
||||
#if !defined(__riscv_xlen)
|
||||
#error "__riscv_xlen not defined!"
|
||||
#else
|
||||
#if __riscv_xlen != 64
|
||||
#error "Not RV64"
|
||||
#endif
|
||||
#endif
|
||||
}]
|
||||
}
|
||||
|
||||
# Return 1 if the target OS supports running SSE executables, 0
|
||||
# otherwise. Cache the result.
|
||||
|
||||
|
|
Loading…
Reference in New Issue