re PR target/12902 (Invalid assembly generated when using SSE / xmmintrin.h)

PR target/12902
        * config/i386/i386.md (sse_movhps, sse_movlps): Remove.
        (sse_shufps): Change operand 3 to const_int_operand.
        (sse2_storelps): Fix typo in template.
        (sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
        * config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
        (ix86_expand_builtin): Likewise.

From-SVN: r92967
This commit is contained in:
Richard Henderson 2005-01-05 11:14:39 -08:00
parent a8182d3719
commit 2cdb314898
4 changed files with 143 additions and 59 deletions

View File

@ -1,4 +1,14 @@
2004-01-05 Julian Brown <julian@codesourcery.com>
2005-01-05 Richard Henderson <rth@redhat.com>
PR target/12902
* config/i386/i386.md (sse_movhps, sse_movlps): Remove.
(sse_shufps): Change operand 3 to const_int_operand.
(sse2_storelps): Fix typo in template.
(sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
* config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
(ix86_expand_builtin): Likewise.
2005-01-05 Julian Brown <julian@codesourcery.com>
* config/arm/arm.c (arm_return_in_memory): Treat complex types
as aggregates for AAPCS ABIs.

View File

@ -1,6 +1,6 @@
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004 Free Software Foundation, Inc.
2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of GCC.
@ -7645,11 +7645,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
else
emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
op0 = gen_lowpart (V4SFmode, op0);
m = adjust_address (op1, V4SFmode, 0);
emit_insn (gen_sse_movlps (op0, op0, m));
m = adjust_address (op1, V4SFmode, 8);
emit_insn (gen_sse_movhps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 8);
emit_insn (gen_sse_loadhps (op0, op0, m));
}
}
else if (MEM_P (op0))
@ -7684,11 +7683,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
else
{
op1 = gen_lowpart (V4SFmode, op1);
m = adjust_address (op0, V4SFmode, 0);
emit_insn (gen_sse_movlps (m, m, op1));
m = adjust_address (op0, V4SFmode, 8);
emit_insn (gen_sse_movhps (m, m, op1));
m = adjust_address (op0, V2SFmode, 0);
emit_insn (gen_sse_storelps (m, op1));
m = adjust_address (op0, V2SFmode, 8);
emit_insn (gen_sse_storehps (m, op1));
return;
}
}
@ -13508,8 +13506,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_LOADLPS:
case IX86_BUILTIN_LOADHPD:
case IX86_BUILTIN_LOADLPD:
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
: CODE_FOR_sse2_loadlpd);
arg0 = TREE_VALUE (arglist);
@ -13535,28 +13533,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_STOREHPS:
case IX86_BUILTIN_STORELPS:
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
: CODE_FOR_sse_movlps);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
mode0 = insn_data[icode].operand[1].mode;
mode1 = insn_data[icode].operand[2].mode;
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (op0, op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return const0_rtx;
case IX86_BUILTIN_STOREHPD:
case IX86_BUILTIN_STORELPD:
icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
: fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
: fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
: CODE_FOR_sse2_storelpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));

View File

@ -1,6 +1,6 @@
;; GCC machine description for IA-32 and x86-64.
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
;; 2001, 2002, 2003, 2004
;; 2001, 2002, 2003, 2004, 2005
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
@ -20335,29 +20335,98 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(define_insn "sse_movhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(vec_merge:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
;; Store the high V2SF of the source vector to the destination.
(define_insn "sse_storehps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE"
"@
movhps\t{%1, %0|%0, %1}
movhlps\t{%1, %0|%0, %1}
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(set_attr "mode" "V2SF")])
(define_insn "sse_movlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(vec_merge:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
(define_split
[(set (match_operand:V2SF 0 "register_operand" "")
(vec_select:V2SF
(match_operand:V4SF 1 "memory_operand" "")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE && reload_completed"
[(const_int 0)]
{
emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
DONE;
})
;; Load the high V2SF of the target vector from the source vector.
(define_insn "sse_loadhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
(vec_concat:V4SF
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
(parallel [(const_int 0) (const_int 1)]))
(match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
"TARGET_SSE"
"@
movhps\t{%2, %0|%0, %2}
movlhps\t{%2, %0|%0, %2}
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
(set_attr "mode" "V2SF")])
(define_split
[(set (match_operand:V4SF 0 "memory_operand" "")
(vec_concat:V4SF
(vec_select:V2SF
(match_dup 0)
(parallel [(const_int 0) (const_int 1)]))
(match_operand:V2SF 2 "register_operand" "")))]
"TARGET_SSE && reload_completed"
[(const_int 0)]
{
emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
DONE;
})
;; Store the low V2SF of the source vector to the destination.
(define_expand "sse_storelps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
{
operands[1] = gen_lowpart (V2SFmode, operands[1]);
emit_move_insn (operands[0], operands[1]);
DONE;
})
;; Load the low V2SF of the target vector from the source vector.
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(vec_concat:V4SF
(match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_SSE"
{
static const char * const alt[] = {
"movlps\t{%2, %0|%0, %2}",
"shufps\t{%2, %1, %0|%0, %1, %2}",
"movlps\t{%2, %0|%0, %2}"
};
if (which_alternative == 1)
operands[2] = GEN_INT (0xe4);
return alt[which_alternative];
}
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2SF")])
(define_expand "sse_loadss"
[(match_operand:V4SF 0 "register_operand" "")
@ -20405,10 +20474,9 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "immediate_operand" "i")]
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_SHUFFLE))]
"TARGET_SSE"
;; @@@ check operand order for intel/nonintel syntax
"shufps\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
@ -23902,7 +23970,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
(parallel [(const_int 1)])))]
(parallel [(const_int 0)])))]
"TARGET_SSE2"
{
operands[1] = gen_lowpart (DFmode, operands[1]);
@ -23910,7 +23978,7 @@
DONE;
})
;; Load the load double of the target vector from the source scalar.
;; Load the low double of the target vector from the source scalar.
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
(vec_concat:V2DF

View File

@ -0,0 +1,25 @@
/* PR 12902 */
/* { dg-do compile } */
/* { dg-options "-O1 -msse" } */
#include <xmmintrin.h>
typedef union
{
int i[4];
float f[4];
__m128 v;
} vector4_t;
void
swizzle (const void *a, vector4_t * b, vector4_t * c)
{
b->v = _mm_loadl_pi (b->v, (__m64 *) a);
c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
}
/* While one legal rendering of each statement would be movaps;movlps;movaps,
we can implmenent this with just movlps;movlps. Since we do now, anything
less would be a regression. */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler "movlps" } } */