tcg/s390x: Implement TCG_TARGET_HAS_sat_vec

The unsigned saturations are handled via generic code
using min/max.  The signed saturations are expanded using
double-sized arithmetic and a saturating pack.

Since all operations are done via expansion, do not
actually set TCG_TARGET_HAS_sat_vec.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-09-14 20:17:29 -07:00
parent 220db7a6c4
commit 4223c9c1c6
2 changed files with 66 additions and 0 deletions

View File

@ -291,7 +291,10 @@ typedef enum S390Opcode {
VRRc_VNO = 0xe76b,
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
VRRc_VS = 0xe7f7,
VRRa_VUPH = 0xe7d7,
VRRa_VUPL = 0xe7d6,
VRRc_VX = 0xe76d,
VRRf_VLVGP = 0xe762,
@ -2800,6 +2803,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
break;
case INDEX_op_s390_vuph_vec:
tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
break;
case INDEX_op_s390_vupl_vec:
tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
break;
case INDEX_op_s390_vpks_vec:
tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
@ -2842,6 +2855,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return -1;
case INDEX_op_mul_vec:
return vece < MO_64;
case INDEX_op_ssadd_vec:
case INDEX_op_sssub_vec:
return vece < MO_64 ? -1 : 0;
default:
return 0;
}
@ -2897,6 +2913,43 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
}
}
static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
{
TCGv_vec h1 = tcg_temp_new_vec(type);
TCGv_vec h2 = tcg_temp_new_vec(type);
TCGv_vec l1 = tcg_temp_new_vec(type);
TCGv_vec l2 = tcg_temp_new_vec(type);
tcg_debug_assert (vece < MO_64);
/* Unpack with sign-extension. */
vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
tcgv_vec_arg(h1), tcgv_vec_arg(v1));
vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
tcgv_vec_arg(h2), tcgv_vec_arg(v2));
vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
tcgv_vec_arg(l1), tcgv_vec_arg(v1));
vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
tcgv_vec_arg(l2), tcgv_vec_arg(v2));
/* Arithmetic on a wider element size. */
vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
tcgv_vec_arg(h1), tcgv_vec_arg(h2));
vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
tcgv_vec_arg(l1), tcgv_vec_arg(l2));
/* Pack with saturation. */
vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
tcg_temp_free_vec(h1);
tcg_temp_free_vec(h2);
tcg_temp_free_vec(l1);
tcg_temp_free_vec(l2);
}
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
@ -2920,6 +2973,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
tcg_temp_free_vec(t0);
break;
case INDEX_op_ssadd_vec:
expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
break;
case INDEX_op_sssub_vec:
expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
break;
default:
g_assert_not_reached();
}
@ -3080,6 +3140,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_sari_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_s390_vuph_vec:
case INDEX_op_s390_vupl_vec:
return C_O1_I1(v, v);
case INDEX_op_add_vec:
case INDEX_op_sub_vec:
@ -3099,6 +3161,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_smin_vec:
case INDEX_op_umax_vec:
case INDEX_op_umin_vec:
case INDEX_op_s390_vpks_vec:
return C_O1_I2(v, v, v);
case INDEX_op_rotls_vec:
case INDEX_op_shls_vec:

View File

@ -10,3 +10,6 @@
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
* consider these to be UNSPEC with names.
*/
DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)