From 8953574fa9a0398611d76a9ff2b8a3bc981f9c68 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 18 Apr 2018 19:14:28 +0000 Subject: [PATCH] i386: Insert ENDBR after __morestack call Since __morestack will jump back to its callee via indirect call, we need to insert ENDBR after calling __morestack. gcc/ PR target/85388 * config/i386/i386.c (ix86_expand_split_stack_prologue): Insert ENDBR after calling __morestack. gcc/testsuite/ PR target/85388 * gcc.dg/pr85388-1.c: New test. * gcc.dg/pr85388-2.c: Likewise. * gcc.dg/pr85388-3.c: Likewise. * gcc.dg/pr85388-4.c: Likewise. * gcc.dg/pr85388-5.c: Likewise. * gcc.dg/pr85388-6.c: Likewise. From-SVN: r259475 --- gcc/ChangeLog | 6 +++ gcc/config/i386/i386.c | 11 ++++- gcc/testsuite/ChangeLog | 10 +++++ gcc/testsuite/gcc.dg/pr85388-1.c | 50 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr85388-2.c | 56 ++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr85388-3.c | 65 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr85388-4.c | 69 ++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr85388-5.c | 54 +++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr85388-6.c | 56 ++++++++++++++++++++++++++ 9 files changed, 376 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr85388-1.c create mode 100644 gcc/testsuite/gcc.dg/pr85388-2.c create mode 100644 gcc/testsuite/gcc.dg/pr85388-3.c create mode 100644 gcc/testsuite/gcc.dg/pr85388-4.c create mode 100644 gcc/testsuite/gcc.dg/pr85388-5.c create mode 100644 gcc/testsuite/gcc.dg/pr85388-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a74304871af..43bba1bc970 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-04-18 H.J. Lu + + PR target/85388 + * config/i386/i386.c (ix86_expand_split_stack_prologue): Insert + ENDBR after calling __morestack. + 2018-04-18 David Malcolm PR jit/85384 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9074526b8a1..d24c81b0dfe 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15265,7 +15265,16 @@ ix86_expand_split_stack_prologue (void) instruction--we need control flow to continue at the subsequent label. Therefore, we use an unspec. */ gcc_assert (crtl->args.pops_args < 65536); - emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); + rtx_insn *ret_insn + = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); + + if ((flag_cf_protection & CF_BRANCH)) + { + /* Insert ENDBR since __morestack will jump back here via indirect + call. */ + rtx cet_eb = gen_nop_endbr (); + emit_insn_after (cet_eb, ret_insn); + } /* If we are in 64-bit mode and this function uses a static chain, we saved %r10 in %rax before calling _morestack. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 179b4422ecf..9ac9731abb4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2018-04-18 H.J. Lu + + PR target/85388 + * gcc.dg/pr85388-1.c: New test. + * gcc.dg/pr85388-2.c: Likewise. + * gcc.dg/pr85388-3.c: Likewise. + * gcc.dg/pr85388-4.c: Likewise. + * gcc.dg/pr85388-5.c: Likewise. + * gcc.dg/pr85388-6.c: Likewise. + 2018-04-18 Jan Hubicka Martin Liska diff --git a/gcc/testsuite/gcc.dg/pr85388-1.c b/gcc/testsuite/gcc.dg/pr85388-1.c new file mode 100644 index 00000000000..86d4737e32b --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-1.c @@ -0,0 +1,50 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */ + +#include +#include +#include + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i) +{ + char buf[10000]; + + if (i > 0) + { + use_buffer (buf); + down (i - 1); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down (1000); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr85388-2.c b/gcc/testsuite/gcc.dg/pr85388-2.c new file mode 100644 index 00000000000..fd13d984c50 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-2.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-require-effective-target pthread_h } */ +/* { dg-options "-pthread -fsplit-stack -fcf-protection -mcet" } */ + +#include +#include + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i) +{ + char buf[10000]; + + if (i > 0) + { + use_buffer (buf); + down (i - 1); + } +} + +static void * +thread_routine (void *arg __attribute__ ((unused))) +{ + down (1000); + return NULL; +} + +int +main (void) +{ + int i; + pthread_t tid; + void *dummy; + + i = pthread_create (&tid, NULL, thread_routine, NULL); + if (i != 0) + abort (); + i = pthread_join (tid, &dummy); + if (i != 0) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr85388-3.c b/gcc/testsuite/gcc.dg/pr85388-3.c new file mode 100644 index 00000000000..730d2be9c22 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-3.c @@ -0,0 +1,65 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */ + +#include +#include +#include +#include + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* Each recursive call uses 10,000 bytes. We call it 1000 times, + using a total of 10,000,000 bytes. If -fsplit-stack is not + working, that will overflow our stack limit. */ + +static void +down (int i, ...) +{ + char buf[10000]; + va_list ap; + + va_start (ap, i); + if (va_arg (ap, int) != 1 + || va_arg (ap, int) != 2 + || va_arg (ap, int) != 3 + || va_arg (ap, int) != 4 + || va_arg (ap, int) != 5 + || va_arg (ap, int) != 6 + || va_arg (ap, int) != 7 + || va_arg (ap, int) != 8 + || va_arg (ap, int) != 9 + || va_arg (ap, int) != 10) + abort (); + + if (i > 0) + { + use_buffer (buf); + down (i - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down (1000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr85388-4.c b/gcc/testsuite/gcc.dg/pr85388-4.c new file mode 100644 index 00000000000..03937d0e735 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-4.c @@ -0,0 +1,69 @@ +/* This test needs to use setrlimit to set the stack size, so it can + only run on Unix. */ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */ + +#include +#include +#include +#include + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf, size_t) __attribute__ ((noinline)); +static void +use_buffer (char *buf, size_t c) +{ + size_t i; + + for (i = 0; i < c; ++i) + buf[i] = (char) i; +} + +/* Each recursive call uses 10 * i bytes. We call it 1000 times, + using a total of 5,000,000 bytes. If -fsplit-stack is not working, + that will overflow our stack limit. */ + +static void +down1 (int i) +{ + char buf[10 * i]; + + if (i > 0) + { + use_buffer (buf, 10 * i); + down1 (i - 1); + } +} + +/* Same thing, using alloca. */ + +static void +down2 (int i) +{ + char *buf = alloca (10 * i); + + if (i > 0) + { + use_buffer (buf, 10 * i); + down2 (i - 1); + } +} + +int +main (void) +{ + struct rlimit r; + + /* We set a stack limit because we are usually invoked via make, and + make sets the stack limit to be as large as possible. */ + r.rlim_cur = 8192 * 1024; + r.rlim_max = 8192 * 1024; + if (setrlimit (RLIMIT_STACK, &r) != 0) + abort (); + down1 (1000); + down2 (1000); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr85388-5.c b/gcc/testsuite/gcc.dg/pr85388-5.c new file mode 100644 index 00000000000..7462a40892c --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-5.c @@ -0,0 +1,54 @@ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -fcf-protection -mcet" } */ + +/* This test is like split-3.c, but tests with a smaller stack frame, + since that uses a different prologue. */ + +#include +#include + +/* Use a noinline function to ensure that the buffer is not removed + from the stack. */ +static void use_buffer (char *buf) __attribute__ ((noinline)); +static void +use_buffer (char *buf) +{ + buf[0] = '\0'; +} + +/* When using gold, the call to abort will force a stack split. */ + +static void +down (int i, ...) +{ + char buf[1]; + va_list ap; + + va_start (ap, i); + if (va_arg (ap, int) != 1 + || va_arg (ap, int) != 2 + || va_arg (ap, int) != 3 + || va_arg (ap, int) != 4 + || va_arg (ap, int) != 5 + || va_arg (ap, int) != 6 + || va_arg (ap, int) != 7 + || va_arg (ap, int) != 8 + || va_arg (ap, int) != 9 + || va_arg (ap, int) != 10) + abort (); + + if (i > 0) + { + use_buffer (buf); + down (i - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + } +} + +int +main (void) +{ + down (1000, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr85388-6.c b/gcc/testsuite/gcc.dg/pr85388-6.c new file mode 100644 index 00000000000..23b5d8e3df6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr85388-6.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* } } } */ +/* { dg-require-effective-target cet } */ +/* { dg-require-effective-target split_stack } */ +/* { dg-options "-fsplit-stack -O2 -fcf-protection -mcet" } */ +/* { dg-options "-fsplit-stack -O2 -mno-accumulate-outgoing-args -fcf-protection -mcet" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ + +/* A case that used to fail on 32-bit x86 when optimizing and not + using -maccumulate-args. The stack adjustment of the alloca got + mixed up with the arguments pushed on the stack to the function + before the call of alloca. */ + +#include + +typedef struct { const char* s; int l; } s; + +typedef unsigned long long align16 __attribute__ ((aligned(16))); + +s gobats (const void *, int) __attribute__ ((noinline)); + +s +gobats (const void* p __attribute__ ((unused)), + int l __attribute__ ((unused))) +{ + s v; + v.s = 0; + v.l = 0; + return v; +} + +void check_aligned (void *p) __attribute__ ((noinline)); + +void +check_aligned (void *p) +{ + if (((__SIZE_TYPE__) p & 0xf) != 0) + abort (); +} + +void gap (void *) __attribute__ ((noinline)); + +void gap (void *p) +{ + align16 a; + check_aligned (&a); +} + +int +main (int argc, char **argv) +{ + s *space; + gobats(0, 16); + space = (s *) alloca(sizeof(s) + 1); + *space = (s){0, 16}; + gap(space); + return 0; +}