target/arm: Optimize aarch64 rev16
It is much shorter to reverse all 4 half-words in parallel than extract, reverse, and deposit each in turn. Suggested-by: Aurelien Jarno <aurelien@aurel32.net> Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
d97dd988ec
commit
abb1066df3
@ -4043,25 +4043,13 @@ static void handle_rev16(DisasContext *s, unsigned int sf,
|
||||
TCGv_i64 tcg_rd = cpu_reg(s, rd);
|
||||
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
|
||||
TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
|
||||
|
||||
tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
|
||||
tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
|
||||
|
||||
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
|
||||
tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
|
||||
tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
|
||||
tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
|
||||
|
||||
if (sf) {
|
||||
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
|
||||
tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
|
||||
tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
|
||||
tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
|
||||
|
||||
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
|
||||
tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
|
||||
tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
|
||||
}
|
||||
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
|
||||
tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
|
||||
tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
|
||||
tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
|
||||
tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
|
||||
|
||||
tcg_temp_free_i64(tcg_tmp);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user