Support amd64 AVX.

gdb/ 2010-04-07 H.J. Lu <hongjiu.lu@intel.com> * amd64-linux-nat.c: Include "regset.h", "elf/common.h", <sys/uio.h> and "i386-xstate.h". (PTRACE_GETREGSET): New. (PTRACE_SETREGSET): Likewise. (have_ptrace_getregset): Likewise. (amd64_linux_gregset64_reg_offset): Include 16 upper YMM registers. (amd64_linux_gregset32_reg_offset): Include 8 upper YMM registers. (amd64_linux_fetch_inferior_registers): Support PTRACE_GETFPREGS. (amd64_linux_store_inferior_registers): Likewise. (amd64_linux_read_description): Check and enable AVX target descriptions. * amd64-linux-tdep.c: Include "regset.h", "i386-linux-tdep.h" and "features/i386/amd64-avx-linux.c". (amd64_linux_regset_sections): New. (amd64_linux_core_read_description): Check and enable AVX target description. (amd64_linux_init_abi): Set xsave_xcr0_offset. Call set_gdbarch_core_regset_sections. (_initialize_amd64_linux_tdep): Call initialize_tdesc_amd64_avx_linux. * amd64-linux-tdep.h (AMD64_LINUX_ORIG_RAX_REGNUM): Replace AMD64_MXCSR_REGNUM with AMD64_YMM15H_REGNUM. (tdesc_amd64_avx_linux): New. (amd64_linux_update_xstateregset): Likewise. * amd64-tdep.c: Include "features/i386/amd64-avx.c". (amd64_ymm_names): New. (amd64_ymmh_names): Likewise. (amd64_register_name): Likewise. (amd64_supply_xstateregset): Likewise. (amd64_collect_xstateregset): Likewise. (amd64_supply_xsave): Likewise. (amd64_collect_xsave): Likewise. (AMD64_NUM_REGS): Removed. (amd64_dwarf_reg_to_regnum): Return %ymmN register number for %xmmN if AVX is available. (amd64_pseudo_register_name): Support pseudo YMM registers. (amd64_regset_from_core_section): Support .reg-xstate section. (amd64_init_abi): Set ymmh_register_names, num_ymm_regs and ymm0h_regnum. Call set_gdbarch_register_name. (amd64_init_abi): Call initialize_tdesc_amd64_avx. * amd64-tdep.h (amd64_regnum): Add AMD64_YMM0H_REGNUM and AMD64_YMM15H_REGNUM. (AMD64_NUM_REGS): New. (amd64_supply_xsave): Likewise. (amd64_collect_xsave): Likewise. (amd64_register_name): Removed. (amd64_register_type): Likewise. gdb/testsuite/ 2010-04-07 H.J. Lu <hongjiu.lu@intel.com> * gdb.arch/i386-avx.c: New. * gdb.arch/i386-avx.exp: Likewise. * gdb.arch/i386-cpuid.h: Updated from gcc 4.4.
2010-04-07 18:46:50 +00:00 · 2010-04-07 18:46:50 +00:00 · a055a18785
commit a055a18785
parent 31aeac7844
9 changed files with 638 additions and 78 deletions
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@ -1,3 +1,59 @@
+2010-04-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* amd64-linux-nat.c: Include "regset.h", "elf/common.h",
+	<sys/uio.h> and "i386-xstate.h".
+	(PTRACE_GETREGSET): New.
+	(PTRACE_SETREGSET): Likewise.
+	(have_ptrace_getregset): Likewise.
+	(amd64_linux_gregset64_reg_offset): Include 16 upper YMM
+	registers.
+	(amd64_linux_gregset32_reg_offset): Include 8 upper YMM
+	registers.
+	(amd64_linux_fetch_inferior_registers): Support PTRACE_GETFPREGS.
+	(amd64_linux_store_inferior_registers): Likewise.
+	(amd64_linux_read_description): Check and enable AVX target
+	descriptions.
+
+	* amd64-linux-tdep.c: Include "regset.h", "i386-linux-tdep.h"
+	and "features/i386/amd64-avx-linux.c".
+	(amd64_linux_regset_sections): New.
+	(amd64_linux_core_read_description): Check and enable AVX
+	target description.
+	(amd64_linux_init_abi): Set xsave_xcr0_offset.  Call
+	set_gdbarch_core_regset_sections.
+	(_initialize_amd64_linux_tdep): Call
+	initialize_tdesc_amd64_avx_linux.
+
+	* amd64-linux-tdep.h (AMD64_LINUX_ORIG_RAX_REGNUM): Replace
+	AMD64_MXCSR_REGNUM with AMD64_YMM15H_REGNUM.
+	(tdesc_amd64_avx_linux): New.
+	(amd64_linux_update_xstateregset): Likewise.
+
+	* amd64-tdep.c: Include "features/i386/amd64-avx.c".
+	(amd64_ymm_names): New.
+	(amd64_ymmh_names): Likewise.
+	(amd64_register_name): Likewise.
+	(amd64_supply_xstateregset): Likewise.
+	(amd64_collect_xstateregset): Likewise.
+	(amd64_supply_xsave): Likewise.
+	(amd64_collect_xsave): Likewise.
+	(AMD64_NUM_REGS): Removed.
+	(amd64_dwarf_reg_to_regnum): Return %ymmN register number for
+	%xmmN if AVX is available.
+	(amd64_pseudo_register_name): Support pseudo YMM registers.
+	(amd64_regset_from_core_section): Support .reg-xstate section.
+	(amd64_init_abi): Set ymmh_register_names, num_ymm_regs
+	and ymm0h_regnum.  Call set_gdbarch_register_name.
+	(amd64_init_abi): Call initialize_tdesc_amd64_avx.
+
+	* amd64-tdep.h (amd64_regnum): Add AMD64_YMM0H_REGNUM and
+	AMD64_YMM15H_REGNUM.
+	(AMD64_NUM_REGS): New.
+	(amd64_supply_xsave): Likewise.
+	(amd64_collect_xsave): Likewise.
+	(amd64_register_name): Removed.
+	(amd64_register_type): Likewise.
+
 2010-04-07  H.J. Lu  <hongjiu.lu@intel.com>

 	* i387-tdep.c: Include "i386-xstate.h".
--- a/gdb/amd64-linux-nat.c
+++ b/gdb/amd64-linux-nat.c
@ -23,11 +23,14 @@
 #include "inferior.h"
 #include "gdbcore.h"
 #include "regcache.h"
+#include "regset.h"
 #include "linux-nat.h"
 #include "amd64-linux-tdep.h"

 #include "gdb_assert.h"
 #include "gdb_string.h"
+#include "elf/common.h"
+#include <sys/uio.h>
 #include <sys/ptrace.h>
 #include <sys/debugreg.h>
 #include <sys/syscall.h>
@ -51,6 +54,18 @@
 #include "i386-linux-tdep.h"
 #include "amd64-nat.h"
 #include "i386-nat.h"
+#include "i386-xstate.h"
+
+#ifndef PTRACE_GETREGSET
+#define PTRACE_GETREGSET	0x4204
+#endif
+
+#ifndef PTRACE_SETREGSET
+#define PTRACE_SETREGSET	0x4205
+#endif
+
+/* Does the current host support PTRACE_GETREGSET?  */
+static int have_ptrace_getregset = -1;

 /* Mapping between the general-purpose registers in GNU/Linux x86-64
   `struct user' format and GDB's register cache layout.  */
@ -73,6 +88,8 @@ static int amd64_linux_gregset64_reg_offset[] =
  -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
  ORIG_RAX * 8
 };

@ -99,6 +116,7 @@ static int amd64_linux_gregset32_reg_offset[] =
  -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
  ORIG_RAX * 8			/* "orig_eax" */
 };

@ -183,10 +201,26 @@ amd64_linux_fetch_inferior_registers (struct target_ops *ops,
    {
      elf_fpregset_t fpregs;

-      if (ptrace (PTRACE_GETFPREGS, tid, 0, (long) &fpregs) < 0)
-	perror_with_name (_("Couldn't get floating point status"));
+      if (have_ptrace_getregset)
+	{
+	  char xstateregs[I386_XSTATE_MAX_SIZE];
+	  struct iovec iov;

-      amd64_supply_fxsave (regcache, -1, &fpregs);
+	  iov.iov_base = xstateregs;
+	  iov.iov_len = sizeof (xstateregs);
+	  if (ptrace (PTRACE_GETREGSET, tid,
+		      (unsigned int) NT_X86_XSTATE, (long) &iov) < 0)
+	    perror_with_name (_("Couldn't get extended state status"));
+
+	  amd64_supply_xsave (regcache, -1, xstateregs);
+	}
+      else
+	{
+	  if (ptrace (PTRACE_GETFPREGS, tid, 0, (long) &fpregs) < 0)
+	    perror_with_name (_("Couldn't get floating point status"));
+
+	  amd64_supply_fxsave (regcache, -1, &fpregs);
+	}
    }
 }

@ -226,15 +260,33 @@ amd64_linux_store_inferior_registers (struct target_ops *ops,
    {
      elf_fpregset_t fpregs;

-      if (ptrace (PTRACE_GETFPREGS, tid, 0, (long) &fpregs) < 0)
-	perror_with_name (_("Couldn't get floating point status"));
+      if (have_ptrace_getregset)
+	{
+	  char xstateregs[I386_XSTATE_MAX_SIZE];
+	  struct iovec iov;

-      amd64_collect_fxsave (regcache, regnum, &fpregs);
+	  iov.iov_base = xstateregs;
+	  iov.iov_len = sizeof (xstateregs);
+	  if (ptrace (PTRACE_GETREGSET, tid,
+		      (unsigned int) NT_X86_XSTATE, (long) &iov) < 0)
+	    perror_with_name (_("Couldn't get extended state status"));

-      if (ptrace (PTRACE_SETFPREGS, tid, 0, (long) &fpregs) < 0)
-	perror_with_name (_("Couldn't write floating point status"));
+	  amd64_collect_xsave (regcache, regnum, xstateregs, 0);

-      return;
+	  if (ptrace (PTRACE_SETREGSET, tid,
+		      (unsigned int) NT_X86_XSTATE, (long) &iov) < 0)
+	    perror_with_name (_("Couldn't write extended state status"));
+	}
+      else
+	{
+	  if (ptrace (PTRACE_GETFPREGS, tid, 0, (long) &fpregs) < 0)
+	    perror_with_name (_("Couldn't get floating point status"));
+
+	  amd64_collect_fxsave (regcache, regnum, &fpregs);
+
+	  if (ptrace (PTRACE_SETFPREGS, tid, 0, (long) &fpregs) < 0)
+	    perror_with_name (_("Couldn't write floating point status"));
+	}
    }
 }

@ -688,6 +740,8 @@ amd64_linux_read_description (struct target_ops *ops)
 {
  unsigned long cs;
  int tid;
+  int is_64bit;
+  static uint64_t xcr0;

  /* GNU/Linux LWP ID's are process ID's.  */
  tid = TIDGET (inferior_ptid);
@ -701,10 +755,46 @@ amd64_linux_read_description (struct target_ops *ops)
  if (errno != 0)
    perror_with_name (_("Couldn't get CS register"));

-  if (cs == AMD64_LINUX_USER64_CS)
-    return tdesc_amd64_linux;
+  is_64bit = cs == AMD64_LINUX_USER64_CS;
+
+  if (have_ptrace_getregset == -1)
+    {
+      uint64_t xstateregs[(I386_XSTATE_SSE_SIZE / sizeof (uint64_t))];
+      struct iovec iov;
+
+      iov.iov_base = xstateregs;
+      iov.iov_len = sizeof (xstateregs);
+
+      /* Check if PTRACE_GETREGSET works.  */
+      if (ptrace (PTRACE_GETREGSET, tid,
+		  (unsigned int) NT_X86_XSTATE, (long) &iov) < 0)
+	have_ptrace_getregset = 0;
+      else
+	{
+	  have_ptrace_getregset = 1;
+
+	  /* Get XCR0 from XSAVE extended state.  */
+	  xcr0 = xstateregs[(I386_LINUX_XSAVE_XCR0_OFFSET
+			     / sizeof (uint64_t))];
+	}
+    }
+
+  /* Check the native XCR0 only if PTRACE_GETREGSET is available.  */
+  if (have_ptrace_getregset
+      && (xcr0 & I386_XSTATE_AVX_MASK) == I386_XSTATE_AVX_MASK)
+    {
+      if (is_64bit)
+	return tdesc_amd64_avx_linux;
+      else
+	return tdesc_i386_avx_linux;
+    }
  else
-    return tdesc_i386_linux;
+    {
+      if (is_64bit)
+	return tdesc_amd64_linux;
+      else
+	return tdesc_i386_linux;
+    }
 }

 /* Provide a prototype to silence -Wmissing-prototypes.  */
--- a/gdb/amd64-linux-tdep.c
+++ b/gdb/amd64-linux-tdep.c
@ -28,8 +28,11 @@
 #include "symtab.h"
 #include "gdbtypes.h"
 #include "reggroups.h"
+#include "regset.h"
 #include "amd64-linux-tdep.h"
+#include "i386-linux-tdep.h"
 #include "linux-tdep.h"
+#include "i386-xstate.h"

 #include "gdb_string.h"

@ -38,6 +41,7 @@
 #include "xml-syscall.h"

 #include "features/i386/amd64-linux.c"
+#include "features/i386/amd64-avx-linux.c"

 /* The syscall's XML filename for i386.  */
 #define XML_SYSCALL_FILENAME_AMD64 "syscalls/amd64-linux.xml"
@ -45,6 +49,15 @@
 #include "record.h"
 #include "linux-record.h"

+/* Supported register note sections.  */
+static struct core_regset_section amd64_linux_regset_sections[] =
+{
+  { ".reg", 144, "general-purpose" },
+  { ".reg2", 512, "floating-point" },
+  { ".reg-xstate", I386_XSTATE_MAX_SIZE, "XSAVE extended state" },
+  { NULL, 0 }
+};
+
 /* Mapping between the general-purpose registers in `struct user'
   format and GDB's register cache layout.  */

@ -1250,12 +1263,17 @@ amd64_linux_core_read_description (struct gdbarch *gdbarch,
 				  bfd *abfd)
 {
  asection *section = bfd_get_section_by_name (abfd, ".reg2");
+  uint64_t xcr0;

  if (section == NULL)
    return NULL;

  /* Linux/x86-64.  */
-  return tdesc_amd64_linux;
+  xcr0 = i386_linux_core_read_xcr0 (gdbarch, target, abfd);
+  if ((xcr0 & I386_XSTATE_AVX_MASK) == I386_XSTATE_AVX_MASK)
+    return tdesc_amd64_avx_linux;
+  else
+    return tdesc_amd64_linux;
 }

 static void
@ -1297,6 +1315,8 @@ amd64_linux_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)
  tdep->sc_reg_offset = amd64_linux_sc_reg_offset;
  tdep->sc_num_regs = ARRAY_SIZE (amd64_linux_sc_reg_offset);

+  tdep->xsave_xcr0_offset = I386_LINUX_XSAVE_XCR0_OFFSET;
+
  /* GNU/Linux uses SVR4-style shared libraries.  */
  set_solib_svr4_fetch_link_map_offsets
    (gdbarch, svr4_lp64_fetch_link_map_offsets);
@ -1318,6 +1338,9 @@ amd64_linux_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)
  /* GNU/Linux uses SVR4-style shared libraries.  */
  set_gdbarch_skip_trampoline_code (gdbarch, find_solib_trampoline_target);

+  /* Install supported register note sections.  */
+  set_gdbarch_core_regset_sections (gdbarch, amd64_linux_regset_sections);
+
  set_gdbarch_core_read_description (gdbarch,
 				     amd64_linux_core_read_description);

@ -1517,4 +1540,5 @@ _initialize_amd64_linux_tdep (void)

  /* Initialize the Linux target description  */
  initialize_tdesc_amd64_linux ();
+  initialize_tdesc_amd64_avx_linux ();
 }
--- a/gdb/amd64-linux-tdep.h
+++ b/gdb/amd64-linux-tdep.h
@ -26,13 +26,14 @@
 /* Register number for the "orig_rax" register.  If this register
   contains a value >= 0 it is interpreted as the system call number
   that the kernel is supposed to restart.  */
-#define AMD64_LINUX_ORIG_RAX_REGNUM (AMD64_MXCSR_REGNUM + 1)
+#define AMD64_LINUX_ORIG_RAX_REGNUM (AMD64_YMM15H_REGNUM + 1)

 /* Total number of registers for GNU/Linux.  */
 #define AMD64_LINUX_NUM_REGS (AMD64_LINUX_ORIG_RAX_REGNUM + 1)

 /* Linux target description.  */
 extern struct target_desc *tdesc_amd64_linux;
+extern struct target_desc *tdesc_amd64_avx_linux;

 /* Enum that defines the syscall identifiers for amd64 linux.
   Used for process record/replay, these will be translated into
--- a/gdb/amd64-tdep.c
+++ b/gdb/amd64-tdep.c
@ -43,6 +43,7 @@
 #include "i387-tdep.h"

 #include "features/i386/amd64.c"
+#include "features/i386/amd64-avx.c"

 /* Note that the AMD64 architecture was previously known as x86-64.
   The latter is (forever) engraved into the canonical system name as
@ -71,8 +72,21 @@ static const char *amd64_register_names[] =
  "mxcsr",
 };

-/* Total number of registers.  */
-#define AMD64_NUM_REGS	ARRAY_SIZE (amd64_register_names)
+static const char *amd64_ymm_names[] = 
+{
+  "ymm0", "ymm1", "ymm2", "ymm3",
+  "ymm4", "ymm5", "ymm6", "ymm7",
+  "ymm8", "ymm9", "ymm10", "ymm11",
+  "ymm12", "ymm13", "ymm14", "ymm15"
+};
+
+static const char *amd64_ymmh_names[] = 
+{
+  "ymm0h", "ymm1h", "ymm2h", "ymm3h",
+  "ymm4h", "ymm5h", "ymm6h", "ymm7h",
+  "ymm8h", "ymm9h", "ymm10h", "ymm11h",
+  "ymm12h", "ymm13h", "ymm14h", "ymm15h"
+};

 /* The registers used to pass integer arguments during a function call.  */
 static int amd64_dummy_call_integer_regs[] =
@ -163,6 +177,8 @@ static const int amd64_dwarf_regmap_len =
 static int
 amd64_dwarf_reg_to_regnum (struct gdbarch *gdbarch, int reg)
 {
+  struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
+  int ymm0_regnum = tdep->ymm0_regnum;
  int regnum = -1;

  if (reg >= 0 && reg < amd64_dwarf_regmap_len)
@ -170,6 +186,9 @@ amd64_dwarf_reg_to_regnum (struct gdbarch *gdbarch, int reg)

  if (regnum == -1)
    warning (_("Unmapped DWARF Register #%d encountered."), reg);
+  else if (ymm0_regnum >= 0
+	   && i386_xmm_regnum_p (gdbarch, regnum))
+    regnum += ymm0_regnum - I387_XMM0_REGNUM (tdep);

  return regnum;
 }
@ -238,6 +257,19 @@ static const char *amd64_dword_names[] =
  "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
 };

+/* Return the name of register REGNUM, or the empty string if it is
+   an anonymous register. */
+
+static const char *
+amd64_register_name (struct gdbarch *gdbarch, int regnum)
+{
+  /* Hide the upper YMM registers.  */
+  if (i386_ymmh_regnum_p (gdbarch, regnum))
+    return "";
+
+  return tdesc_register_name (gdbarch, regnum);
+}
+
 /* Return the name of register REGNUM.  */

 static const char *
@ -246,6 +278,8 @@ amd64_pseudo_register_name (struct gdbarch *gdbarch, int regnum)
  struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
  if (i386_byte_regnum_p (gdbarch, regnum))
    return amd64_byte_names[regnum - tdep->al_regnum];
+  else if (i386_ymm_regnum_p (gdbarch, regnum))
+    return amd64_ymm_names[regnum - tdep->ymm0_regnum];
  else if (i386_word_regnum_p (gdbarch, regnum))
    return amd64_word_names[regnum - tdep->ax_regnum];
  else if (i386_dword_regnum_p (gdbarch, regnum))
@ -2176,6 +2210,28 @@ amd64_collect_fpregset (const struct regset *regset,
  amd64_collect_fxsave (regcache, regnum, fpregs);
 }

+/* Similar to amd64_supply_fpregset, but use XSAVE extended state.  */
+
+static void
+amd64_supply_xstateregset (const struct regset *regset,
+			   struct regcache *regcache, int regnum,
+			   const void *xstateregs, size_t len)
+{
+  const struct gdbarch_tdep *tdep = gdbarch_tdep (regset->arch);
+  amd64_supply_xsave (regcache, regnum, xstateregs);
+}
+
+/* Similar to amd64_collect_fpregset, but use XSAVE extended state.  */
+
+static void
+amd64_collect_xstateregset (const struct regset *regset,
+			    const struct regcache *regcache,
+			    int regnum, void *xstateregs, size_t len)
+{
+  const struct gdbarch_tdep *tdep = gdbarch_tdep (regset->arch);
+  amd64_collect_xsave (regcache, regnum, xstateregs, 1);
+}
+
 /* Return the appropriate register set for the core section identified
   by SECT_NAME and SECT_SIZE.  */

@ -2194,6 +2250,16 @@ amd64_regset_from_core_section (struct gdbarch *gdbarch,
      return tdep->fpregset;
    }

+  if (strcmp (sect_name, ".reg-xstate") == 0)
+    {
+      if (tdep->xstateregset == NULL)
+	tdep->xstateregset = regset_alloc (gdbarch,
+					   amd64_supply_xstateregset,
+					   amd64_collect_xstateregset);
+
+      return tdep->xstateregset;
+    }
+
  return i386_regset_from_core_section (gdbarch, sect_name, sect_size);
 }

@ -2256,6 +2322,13 @@ amd64_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)
  tdep->num_core_regs = AMD64_NUM_GREGS + I387_NUM_REGS;
  tdep->register_names = amd64_register_names;

+  if (tdesc_find_feature (tdesc, "org.gnu.gdb.i386.avx") != NULL)
+    {
+      tdep->ymmh_register_names = amd64_ymmh_names;
+      tdep->num_ymm_regs = 16;
+      tdep->ymm0h_regnum = AMD64_YMM0H_REGNUM;
+    }
+
  tdep->num_byte_regs = 20;
  tdep->num_word_regs = 16;
  tdep->num_dword_regs = 16;
@ -2269,6 +2342,8 @@ amd64_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)

  set_tdesc_pseudo_register_name (gdbarch, amd64_pseudo_register_name);

+  set_gdbarch_register_name (gdbarch, amd64_register_name);
+
  /* AMD64 has an FPU and 16 SSE registers.  */
  tdep->st0_regnum = AMD64_ST0_REGNUM;
  tdep->num_xmm_regs = 16;
@ -2349,6 +2424,7 @@ void
 _initialize_amd64_tdep (void)
 {
  initialize_tdesc_amd64 ();
+  initialize_tdesc_amd64_avx ();
 }


@ -2384,6 +2460,30 @@ amd64_supply_fxsave (struct regcache *regcache, int regnum,
    }
 }

+/* Similar to amd64_supply_fxsave, but use XSAVE extended state.  */
+
+void
+amd64_supply_xsave (struct regcache *regcache, int regnum,
+		    const void *xsave)
+{
+  struct gdbarch *gdbarch = get_regcache_arch (regcache);
+  struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
+
+  i387_supply_xsave (regcache, regnum, xsave);
+
+  if (xsave && gdbarch_ptr_bit (gdbarch) == 64)
+    {
+      const gdb_byte *regs = xsave;
+
+      if (regnum == -1 || regnum == I387_FISEG_REGNUM (tdep))
+	regcache_raw_supply (regcache, I387_FISEG_REGNUM (tdep),
+			     regs + 12);
+      if (regnum == -1 || regnum == I387_FOSEG_REGNUM (tdep))
+	regcache_raw_supply (regcache, I387_FOSEG_REGNUM (tdep),
+			     regs + 20);
+    }
+}
+
 /* Fill register REGNUM (if it is a floating-point or SSE register) in
   *FXSAVE with the value from REGCACHE.  If REGNUM is -1, do this for
   all registers.  This function doesn't touch any of the reserved
@ -2407,3 +2507,26 @@ amd64_collect_fxsave (const struct regcache *regcache, int regnum,
 	regcache_raw_collect (regcache, I387_FOSEG_REGNUM (tdep), regs + 20);
    }
 }
+
+/* Similar to amd64_collect_fxsave, but but use XSAVE extended state.  */
+
+void
+amd64_collect_xsave (const struct regcache *regcache, int regnum,
+		     void *xsave, int gcore)
+{
+  struct gdbarch *gdbarch = get_regcache_arch (regcache);
+  struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch);
+  gdb_byte *regs = xsave;
+
+  i387_collect_xsave (regcache, regnum, xsave, gcore);
+
+  if (gdbarch_ptr_bit (gdbarch) == 64)
+    {
+      if (regnum == -1 || regnum == I387_FISEG_REGNUM (tdep))
+	regcache_raw_collect (regcache, I387_FISEG_REGNUM (tdep),
+			      regs + 12);
+      if (regnum == -1 || regnum == I387_FOSEG_REGNUM (tdep))
+	regcache_raw_collect (regcache, I387_FOSEG_REGNUM (tdep),
+			      regs + 20);
+    }
+}
--- a/gdb/amd64-tdep.h
+++ b/gdb/amd64-tdep.h
@ -61,12 +61,16 @@ enum amd64_regnum
  AMD64_FSTAT_REGNUM = AMD64_ST0_REGNUM + 9,
  AMD64_XMM0_REGNUM = 40,	/* %xmm0 */
  AMD64_XMM1_REGNUM,		/* %xmm1 */
-  AMD64_MXCSR_REGNUM = AMD64_XMM0_REGNUM + 16
+  AMD64_MXCSR_REGNUM = AMD64_XMM0_REGNUM + 16,
+  AMD64_YMM0H_REGNUM,		/* %ymm0h */
+  AMD64_YMM15H_REGNUM = AMD64_YMM0H_REGNUM + 15
 };

 /* Number of general purpose registers.  */
 #define AMD64_NUM_GREGS		24

+#define AMD64_NUM_REGS		(AMD64_YMM15H_REGNUM + 1)
+
 extern struct displaced_step_closure *amd64_displaced_step_copy_insn
  (struct gdbarch *gdbarch, CORE_ADDR from, CORE_ADDR to,
   struct regcache *regs);
@ -77,12 +81,6 @@ extern void amd64_displaced_step_fixup (struct gdbarch *gdbarch,

 extern void amd64_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch);

-/* Functions from amd64-tdep.c which may be needed on architectures
-   with extra registers.  */
-
-extern const char *amd64_register_name (struct gdbarch *gdbarch, int regnum);
-extern struct type *amd64_register_type (struct gdbarch *gdbarch, int regnum);
-
 /* Fill register REGNUM in REGCACHE with the appropriate
   floating-point or SSE register value from *FXSAVE.  If REGNUM is
   -1, do this for all registers.  This function masks off any of the
@ -91,6 +89,10 @@ extern struct type *amd64_register_type (struct gdbarch *gdbarch, int regnum);
 extern void amd64_supply_fxsave (struct regcache *regcache, int regnum,
 				 const void *fxsave);

+/* Similar to amd64_supply_fxsave, but use XSAVE extended state.  */
+extern void amd64_supply_xsave (struct regcache *regcache, int regnum,
+				const void *xsave);
+
 /* Fill register REGNUM (if it is a floating-point or SSE register) in
   *FXSAVE with the value from REGCACHE.  If REGNUM is -1, do this for
   all registers.  This function doesn't touch any of the reserved
@ -99,6 +101,10 @@ extern void amd64_supply_fxsave (struct regcache *regcache, int regnum,
 extern void amd64_collect_fxsave (const struct regcache *regcache, int regnum,
 				  void *fxsave);

+/* Similar to amd64_collect_fxsave, but but use XSAVE extended state.  */
+extern void amd64_collect_xsave (const struct regcache *regcache,
+				 int regnum, void *xsave, int gcore);
+
 void amd64_classify (struct type *type, enum amd64_reg_class class[2]);


--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@ -1,3 +1,10 @@
+2010-04-07  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* gdb.arch/i386-avx.c: New.
+	* gdb.arch/i386-avx.exp: Likewise.
+
+	* gdb.arch/i386-cpuid.h: Updated from gcc 4.4.
+
 2010-04-06  Doug Evans  <dje@google.com>

 	* gdb.base/source-test.gdb: New file.
--- a/gdb/testsuite/gdb.arch/i386-avx.c
+++ b/gdb/testsuite/gdb.arch/i386-avx.c
@ -0,0 +1,128 @@
+/* Test program for AVX registers.
+
+   Copyright 2010 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include "i386-cpuid.h"
+
+typedef struct {
+  float f[8];
+} v8sf_t;
+
+
+v8sf_t data[] =
+  {
+    { {  0.0,  0.125,  0.25,  0.375,  0.50,  0.625,  0.75,  0.875 } },
+    { {  1.0,  1.125,  1.25,  1.375,  1.50,  1.625,  1.75,  1.875 } },
+    { {  2.0,  2.125,  2.25,  2.375,  2.50,  2.625,  2.75,  2.875 } },
+    { {  3.0,  3.125,  3.25,  3.375,  3.50,  3.625,  3.75,  3.875 } },
+    { {  4.0,  4.125,  4.25,  4.375,  4.50,  4.625,  4.75,  4.875 } },
+    { {  5.0,  5.125,  5.25,  5.375,  5.50,  5.625,  5.75,  5.875 } },
+    { {  6.0,  6.125,  6.25,  6.375,  6.50,  6.625,  6.75,  6.875 } },
+    { {  7.0,  7.125,  7.25,  7.375,  7.50,  7.625,  7.75,  7.875 } },
+#ifdef __x86_64__
+    { {  8.0,  8.125,  8.25,  8.375,  8.50,  8.625,  8.75,  8.875 } },
+    { {  9.0,  9.125,  9.25,  9.375,  9.50,  9.625,  9.75,  9.875 } },
+    { { 10.0, 10.125, 10.25, 10.375, 10.50, 10.625, 10.75, 10.875 } },
+    { { 11.0, 11.125, 11.25, 11.375, 11.50, 11.625, 11.75, 11.875 } },
+    { { 12.0, 12.125, 12.25, 12.375, 12.50, 12.625, 12.75, 12.875 } },
+    { { 13.0, 13.125, 13.25, 13.375, 13.50, 13.625, 13.75, 13.875 } },
+    { { 14.0, 14.125, 14.25, 14.375, 14.50, 14.625, 14.75, 14.875 } },
+    { { 15.0, 15.125, 15.25, 15.375, 15.50, 15.625, 15.75, 15.875 } },
+#endif
+  };
+
+
+int
+have_avx (void)
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  if ((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE))
+    return 1;
+  else
+    return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+  if (have_avx ())
+    {
+      asm ("vmovaps 0(%0), %%ymm0\n\t"
+           "vmovaps 32(%0), %%ymm1\n\t"
+           "vmovaps 64(%0), %%ymm2\n\t"
+           "vmovaps 96(%0), %%ymm3\n\t"
+           "vmovaps 128(%0), %%ymm4\n\t"
+           "vmovaps 160(%0), %%ymm5\n\t"
+           "vmovaps 192(%0), %%ymm6\n\t"
+           "vmovaps 224(%0), %%ymm7\n\t"
+           : /* no output operands */
+           : "r" (data) 
+           : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+#ifdef __x86_64__
+      asm ("vmovaps 256(%0), %%ymm8\n\t"
+           "vmovaps 288(%0), %%ymm9\n\t"
+           "vmovaps 320(%0), %%ymm10\n\t"
+           "vmovaps 352(%0), %%ymm11\n\t"
+           "vmovaps 384(%0), %%ymm12\n\t"
+           "vmovaps 416(%0), %%ymm13\n\t"
+           "vmovaps 448(%0), %%ymm14\n\t"
+           "vmovaps 480(%0), %%ymm15\n\t"
+           : /* no output operands */
+           : "r" (data) 
+           : "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15");
+#endif
+
+      asm ("nop"); /* first breakpoint here */
+
+      asm (
+           "vmovaps %%ymm0, 0(%0)\n\t"
+           "vmovaps %%ymm1, 32(%0)\n\t"
+           "vmovaps %%ymm2, 64(%0)\n\t"
+           "vmovaps %%ymm3, 96(%0)\n\t"
+           "vmovaps %%ymm4, 128(%0)\n\t"
+           "vmovaps %%ymm5, 160(%0)\n\t"
+           "vmovaps %%ymm6, 192(%0)\n\t"
+           "vmovaps %%ymm7, 224(%0)\n\t"
+           : /* no output operands */
+           : "r" (data) 
+           : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
+#ifdef __x86_64__
+      asm (
+           "vmovaps %%ymm8, 256(%0)\n\t"
+           "vmovaps %%ymm9, 288(%0)\n\t"
+           "vmovaps %%ymm10, 320(%0)\n\t"
+           "vmovaps %%ymm11, 352(%0)\n\t"
+           "vmovaps %%ymm12, 384(%0)\n\t"
+           "vmovaps %%ymm13, 416(%0)\n\t"
+           "vmovaps %%ymm14, 448(%0)\n\t"
+           "vmovaps %%ymm15, 480(%0)\n\t"
+           : /* no output operands */
+           : "r" (data) 
+           : "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15");
+#endif
+
+      puts ("Bye!"); /* second breakpoint here */
+    }
+
+  return 0;
+}
--- a/gdb/testsuite/gdb.arch/i386-cpuid.h
+++ b/gdb/testsuite/gdb.arch/i386-cpuid.h
@ -1,30 +1,186 @@
-/* Helper file for i386 platform.  Runtime check for MMX/SSE/SSE2 support.
+/* Helper file for i386 platform.  Runtime check for MMX/SSE/SSE2/AVX
+ * support. Copied from gcc 4.4.
+ *
+ * Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */

-   Copyright 2004, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_PCLMUL	(1 << 1)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_AES		(1 << 25)
+#define bit_XSAVE	(1 << 26)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)

-   This file is part of GDB.
+/* %edx */
+#define bit_CMPXCHG8B	(1 << 8)
+#define bit_CMOV	(1 << 15)
+#define bit_MMX		(1 << 23)
+#define bit_FXSAVE	(1 << 24)
+#define bit_SSE		(1 << 25)
+#define bit_SSE2	(1 << 26)

-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
-   (at your option) any later version.
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+#define bit_ABM		(1 << 5)
+#define bit_SSE4a	(1 << 6)
+#define bit_XOP         (1 << 11)
+#define bit_LWP 	(1 << 15)
+#define bit_FMA4        (1 << 16)

-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+/* %edx */
+#define bit_LM		(1 << 29)
+#define bit_3DNOWP	(1 << 30)
+#define bit_3DNOW	(1 << 31)

-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

-/* Used by 20020523-2.c and i386-sse-6.c, and possibly others.  */
-/* Plagarized from 20020523-2.c.  */
-/* Plagarized from gcc.  */
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register.  */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))

-#define bit_CMOV (1 << 15)
-#define bit_MMX (1 << 23)
-#define bit_SSE (1 << 25)
-#define bit_SSE2 (1 << 26)
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+#else
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction.  ext can
+   be either 0x0 or 0x8000000 to return highest supported value for
+   basic or extended cpuid information.  Function returns 0 if cpuid
+   is not supported or whatever cpuid returns in eax register.  If sig
+   pointer is non-null, then first four bytes of the signature
+   (as found in ebx register) are returned in location pointed by sig.  */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+  unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+#if __GNUC__ >= 3
+  /* See if we can use cpuid.  On AMD64 we always can.  */
+  __asm__ ("pushf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "mov{l}\t{%0, %1|%1, %0}\n\t"
+	   "xor{l}\t{%2, %0|%0, %2}\n\t"
+	   "push{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+  __asm__ ("pushfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "movl\t%0, %1\n\t"
+	   "xorl\t%2, %0\n\t"
+	   "pushl\t%0\n\t"
+	   "popfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "popfl\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#endif
+
+  if (!((__eax ^ __ebx) & 0x00200000))
+    return 0;
+#endif
+
+  /* Host supports cpuid.  Return highest supported cpuid input value.  */
+  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+  if (__sig)
+    *__sig = __ebx;
+
+  return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+   eax, ebx, ecx and edx registers.  The function checks if cpuid is
+   supported and returns 1 for valid cpuid information or 0 for
+   unsupported cpuid level.  All pointers are required to be non-null.  */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+	     unsigned int *__eax, unsigned int *__ebx,
+	     unsigned int *__ecx, unsigned int *__edx)
+{
+  unsigned int __ext = __level & 0x80000000;
+
+  if (__get_cpuid_max (__ext, 0) < __level)
+    return 0;
+
+  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
+}

 #ifndef NOINLINE
 #define NOINLINE __attribute__ ((noinline))
@ -35,41 +191,10 @@ unsigned int i386_cpuid (void) NOINLINE;
 unsigned int NOINLINE
 i386_cpuid (void)
 {
-  int fl1, fl2;
+  unsigned int eax, ebx, ecx, edx;

-#ifndef __x86_64__
-  /* See if we can use cpuid.  On AMD64 we always can.  */
-  __asm__ ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
-	   "pushl %0; popfl; pushfl; popl %0; popfl"
-	   : "=&r" (fl1), "=&r" (fl2)
-	   : "i" (0x00200000));
-  if (((fl1 ^ fl2) & 0x00200000) == 0)
-    return (0);
-#endif
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;

-  /* Host supports cpuid.  See if cpuid gives capabilities, try
-     CPUID(0).  Preserve %ebx and %ecx; cpuid insn clobbers these, we
-     don't need their CPUID values here, and %ebx may be the PIC
-     register.  */
-#ifdef __x86_64__
-  __asm__ ("pushq %%rcx; pushq %%rbx; cpuid; popq %%rbx; popq %%rcx"
-	   : "=a" (fl1) : "0" (0) : "rdx", "cc");
-#else
-  __asm__ ("pushl %%ecx; pushl %%ebx; cpuid; popl %%ebx; popl %%ecx"
-	   : "=a" (fl1) : "0" (0) : "edx", "cc");
-#endif
-  if (fl1 == 0)
-    return (0);
-
-  /* Invoke CPUID(1), return %edx; caller can examine bits to
-     determine what's supported.  */
-#ifdef __x86_64__
-  __asm__ ("pushq %%rcx; pushq %%rbx; cpuid; popq %%rbx; popq %%rcx"
-	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
-#else
-  __asm__ ("pushl %%ecx; pushl %%ebx; cpuid; popl %%ebx; popl %%ecx"
-	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
-#endif
-
-  return fl2;
+  return edx;
 }