NEON vldN optimization

When combining multiple values as part of a NEON array load, do explcit shift/or rather than using gen_bfi. This voids redundant mask operations. Signed-off-by: Paul Brook <paul@codesourcery.com>
2010-06-11 20:01:00 +01:00 · 2010-06-11 20:01:00 +01:00 · 41ba834146
commit 41ba834146
parent 0e2029a063
1 changed files with 4 additions and 2 deletions
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@ -3854,7 +3854,8 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                            tcg_gen_addi_i32(addr, addr, stride);
                            tmp2 = gen_ld16u(addr, IS_USER(s));
                            tcg_gen_addi_i32(addr, addr, stride);
-                            gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
+                            tcg_gen_shli_i32(tmp2, tmp2, 16);
+                            tcg_gen_or_i32(tmp, tmp, tmp2);
                            dead_tmp(tmp2);
                            neon_store_reg(rd, pass, tmp);
                        } else {
@ -3875,7 +3876,8 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                                if (n == 0) {
                                    tmp2 = tmp;
                                } else {
-                                    gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
+                                    tcg_gen_shli_i32(tmp, tmp, n * 8);
+                                    tcg_gen_or_i32(tmp2, tmp2, tmp);
                                    dead_tmp(tmp);
                                }
                            }