diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index e3c9f45926..e3b274502c 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -408,8 +408,11 @@ void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); #if TARGET_LONG_BITS == 64 @@ -417,14 +420,21 @@ void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t); #define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i64 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64 + #else #define tcg_gen_vec_add8_tl tcg_gen_vec_add8_i32 #define tcg_gen_vec_sub8_tl tcg_gen_vec_sub8_i32 #define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32 #define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32 +#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32 +#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32 +#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32 #define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32 #define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32 #define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32 diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index c8fb403957..ffe55e908f 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -2678,6 +2678,13 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff << c); + tcg_gen_shli_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) { uint32_t mask = dup_const(MO_16, 0xffff << c); @@ -2736,6 +2743,13 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_gen_andi_i64(d, d, mask); } +void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t mask = dup_const(MO_8, 0xff >> c); + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(d, d, mask); +} + void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) { uint32_t mask = dup_const(MO_16, 0xffff >> c); @@ -2808,6 +2822,20 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c) tcg_temp_free_i64(s); } +void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) +{ + uint32_t s_mask = dup_const(MO_8, 0x80 >> c); + uint32_t c_mask = dup_const(MO_8, 0xff >> c); + TCGv_i32 s = tcg_temp_new_i32(); + + tcg_gen_shri_i32(d, a, c); + tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */ + tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */ + tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */ + tcg_gen_or_i32(d, d, s); /* include sign extension */ + tcg_temp_free_i32(s); +} + void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c) { uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);