[ARM] Add ARMv8.2 FP16 vmul/vmla/vmls (by scalar)

gas/
  * config/tc-arm.c (do_neon_mac_maybe_scalar): Allow F16.
  * testsuite/gas/arm/armv8-2-fp16-simd.s: New tests.
  * testsuite/gas/arm/armv8-2-fp16-simd.d: New expected results.
  * testsuite/gas/arm/armv8-2-fp16-simd-thum.d: Likewise for Thumb.
  * testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New warning results.
  * testsuite/gas/arm/simd_by_scalar_low_regbank.s: New test source.
  * testsuite/gas/arm/simd_by_scalar_low_regbank.d: New testcase.
  * testsuite/gas/arm/simd_by_scalar_low_regbank_thumb.d: Likewise for Thumb.
  * testsuite/gas/arm/simd_by_scalar_low_regbank.l: New warning results.

opcodes/
  * arm-dis.c: Support FP16 vmul, vmla, vmls (by scalar).
This commit is contained in:
Jiong Wang 2016-04-05 15:54:00 +01:00
parent 94e5c97160
commit 589a7d8830
12 changed files with 180 additions and 7 deletions

View File

@ -1,3 +1,15 @@
2016-04-05 Jiong Wang <jiong.wang@arm.com>
* config/tc-arm.c (do_neon_mac_maybe_scalar): Allow F16.
* testsuite/gas/arm/armv8-2-fp16-simd.s: New tests.
* testsuite/gas/arm/armv8-2-fp16-simd.d: New expected results.
* testsuite/gas/arm/armv8-2-fp16-simd-thum.d: Likewise for Thumb.
* testsuite/gas/arm/armv8-2-fp16-simd-warning.l: New warning results.
* testsuite/gas/arm/simd_by_scalar_low_regbank.s: New test source.
* testsuite/gas/arm/simd_by_scalar_low_regbank.d: New testcase.
* testsuite/gas/arm/simd_by_scalar_low_regbank_thumb.d: Likewise for Thumb.
* testsuite/gas/arm/simd_by_scalar_low_regbank.l: New warning results.
2016-04-05 Claudiu Zissulescu <claziss@synopsys.com>
* config/tc-arc.c (assemble_insn): Prohibit pc-rel relocations for

View File

@ -14992,7 +14992,7 @@ do_neon_mac_maybe_scalar (void)
{
enum neon_shape rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
struct neon_type_el et = neon_check_type (3, rs,
N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY);
N_EQK, N_EQK, N_I16 | N_I32 | N_F_16_32 | N_KEY);
NEON_ENCODE (SCALAR, inst);
neon_mul_mac (et, neon_quad (rs));
}

View File

@ -145,3 +145,15 @@ Disassembly of section .text:
214: fff5 c5c4 vcle.f16 q14, q2, #0
218: ffb5 e602 vclt.f16 d14, d2, #0
21c: fff5 c644 vclt.f16 q14, q2, #0
220: ef90 7941 vmul.f16 d7, d0, d1\[0\]
224: ef98 4966 vmul.f16 d4, d8, d6\[2\]
228: ff90 49c8 vmul.f16 q2, q8, d0\[1\]
22c: ff90 49ef vmul.f16 q2, q8, d7\[3\]
230: ef94 2141 vmla.f16 d2, d4, d1\[0\]
234: ff98 4141 vmla.f16 q2, q4, d1\[0\]
238: ef94 2541 vmls.f16 d2, d4, d1\[0\]
23c: ff98 4541 vmls.f16 q2, q4, d1\[0\]
240: ef98 116f vmla.f16 d1, d8, d7\[3\]
244: ff90 21ef vmla.f16 q1, q8, d7\[3\]
248: ef98 156f vmls.f16 d1, d8, d7\[3\]
24c: ff90 25ef vmls.f16 q1, q8, d7\[3\]

View File

@ -135,3 +135,15 @@
[^:]*:224: Error: selected processor does not support fp16 instruction -- `vcle.f16 q14,q2,#0'
[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 d14,d2,#0'
[^:]*:224: Error: selected processor does not support fp16 instruction -- `vclt.f16 q14,q2,#0'
[^:]*:242: Error: selected processor does not support fp16 instruction -- `vmul.f16 d7,d0,d1\[0\]'
[^:]*:243: Error: selected processor does not support fp16 instruction -- `vmul.f16 d4,d8,d6\[2\]'
[^:]*:244: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,d0\[1\]'
[^:]*:245: Error: selected processor does not support fp16 instruction -- `vmul.f16 q2,q8,d7\[3\]'
[^:]*:248: Error: selected processor does not support fp16 instruction -- `vmla.f16 d2,d4,d1\[0\]'
[^:]*:248: Error: selected processor does not support fp16 instruction -- `vmla.f16 q2,q4,d1\[0\]'
[^:]*:248: Error: selected processor does not support fp16 instruction -- `vmls.f16 d2,d4,d1\[0\]'
[^:]*:248: Error: selected processor does not support fp16 instruction -- `vmls.f16 q2,q4,d1\[0\]'
[^:]*:249: Error: selected processor does not support fp16 instruction -- `vmla.f16 d1,d8,d7\[3\]'
[^:]*:249: Error: selected processor does not support fp16 instruction -- `vmla.f16 q1,q8,d7\[3\]'
[^:]*:249: Error: selected processor does not support fp16 instruction -- `vmls.f16 d1,d8,d7\[3\]'
[^:]*:249: Error: selected processor does not support fp16 instruction -- `vmls.f16 q1,q8,d7\[3\]'

View File

@ -145,3 +145,15 @@ Disassembly of section .text:
214: f3f5c5c4 vcle.f16 q14, q2, #0
218: f3b5e602 vclt.f16 d14, d2, #0
21c: f3f5c644 vclt.f16 q14, q2, #0
220: f2907941 vmul.f16 d7, d0, d1\[0\]
224: f2984966 vmul.f16 d4, d8, d6\[2\]
228: f39049c8 vmul.f16 q2, q8, d0\[1\]
22c: f39049ef vmul.f16 q2, q8, d7\[3\]
230: f2942141 vmla.f16 d2, d4, d1\[0\]
234: f3984141 vmla.f16 q2, q4, d1\[0\]
238: f2942541 vmls.f16 d2, d4, d1\[0\]
23c: f3984541 vmls.f16 q2, q4, d1\[0\]
240: f298116f vmla.f16 d1, d8, d7\[3\]
244: f39021ef vmla.f16 q1, q8, d7\[3\]
248: f298156f vmls.f16 d1, d8, d7\[3\]
24c: f39025ef vmls.f16 q1, q8, d7\[3\]

View File

@ -222,3 +222,28 @@ func:
# neon_fcmp_imm0
f16_dq_fcmp_imm0 14, 2
.macro f16_d_by_scalar op reg0 reg1 reg2 idx
\op d\reg0, d\reg1, d\reg2[\idx]
.endm
.macro f16_q_by_scalar op reg0 reg1 reg2 idx
\op q\reg0, q\reg1, d\reg2[\idx]
.endm
.macro f16_dq_fmacmaybe_by_scalar reg0 reg1 reg2 idx
.irp op, vmla.f16, vmls.f16
\op d\reg0, d\reg1, d\reg2[\idx]
\op q\reg0, q\reg1, d\reg2[\idx]
.endr
.endm
# neon_mul (by scalar)
f16_d_by_scalar vmul.f16 7 0 1 0
f16_d_by_scalar vmul.f16 4 8 6 2
f16_q_by_scalar vmul.f16 2 8 0 1
f16_q_by_scalar vmul.f16 2 8 7 3
# neon_mac_maybe_scalar (by scalar)
f16_dq_fmacmaybe_by_scalar 2 4 1 0
f16_dq_fmacmaybe_by_scalar 1 8 7 3

View File

@ -0,0 +1,4 @@
#name: VMUL/VMLA/VMLS by scalar reg restriction
#source: simd_by_scalar_low_regbank.s
#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8
#error-output: simd_by_scalar_low_regbank.l

View File

@ -0,0 +1,49 @@
[^:]*: Assembler messages:
[^:]*:21: Error: scalar out of range for multiply instruction -- `vmul.i32 d3,d12,d7\[2\]'
[^:]*:21: Error: scalar out of range for multiply instruction -- `vmul.i32 q3,q12,d7\[2\]'
[^:]*:21: Error: scalar out of range for multiply instruction -- `vmul.f32 d3,d12,d7\[2\]'
[^:]*:21: Error: scalar out of range for multiply instruction -- `vmul.f32 q3,q12,d7\[2\]'
[^:]*:22: Error: scalar out of range for multiply instruction -- `vmul.i16 d4,d9,d8\[1\]'
[^:]*:22: Error: scalar out of range for multiply instruction -- `vmul.i16 q4,q9,d8\[1\]'
[^:]*:22: Error: scalar out of range for multiply instruction -- `vmul.f16 d4,d9,d8\[1\]'
[^:]*:22: Error: scalar out of range for multiply instruction -- `vmul.f16 q4,q9,d8\[1\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.i16 d13,d6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.i16 q13,q6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.f16 d13,d6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.f16 q13,q6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.i32 d13,d6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.i32 q13,q6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.f32 d13,d6,d15\[3\]'
[^:]*:23: Error: scalar out of range for multiply instruction -- `vmul.f32 q13,q6,d15\[3\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmla.i32 d5,d4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmla.i32 q5,q4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmla.f32 d5,d4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmla.f32 q5,q4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmls.i32 d5,d4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmls.i32 q5,q4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmls.f32 d5,d4,d6\[2\]'
[^:]*:25: Error: scalar out of range for multiply instruction -- `vmls.f32 q5,q4,d6\[2\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmla.i16 d4,d13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmla.i16 q4,q13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmla.f16 d4,d13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmla.f16 q4,q13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmls.i16 d4,d13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmls.i16 q4,q13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmls.f16 d4,d13,d10\[1\]'
[^:]*:26: Error: scalar out of range for multiply instruction -- `vmls.f16 q4,q13,d10\[1\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.i16 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.i16 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.i32 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.i32 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.f16 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.f16 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.f32 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmla.f32 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.i16 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.i16 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.i32 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.i32 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.f16 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.f16 q12,q6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.f32 d12,d6,d13\[3\]'
[^:]*:27: Error: scalar out of range for multiply instruction -- `vmls.f32 q12,q6,d13\[3\]'

View File

@ -0,0 +1,27 @@
.macro vmul_iter reg0 reg1 reg2 idx
.irp op, vmul.i16 vmul.f16 vmul.i32 vmul.f32
\op d\reg0, d\reg1, d\reg2[\idx]
\op q\reg0, q\reg1, d\reg2[\idx]
.endr
.endm
.macro vmul_acc_iter reg0 reg1 reg2 idx
.irp op, vmla.i16 vmla.i32 vmla.f16 vmla.f32 vmls.i16 vmls.i32 vmls.f16 vmls.f32
\op d\reg0, d\reg1, d\reg2[\idx]
\op q\reg0, q\reg1, d\reg2[\idx]
.endr
.endm
# There are two restriction on the scalar operand:
# * The scalar operand is restricted to D0-D7 if size is 16bit wide,
# or D0 - D15 otherwise.
# * The scalar index should within range, 0-3 if size is 16bit wide,
# 0-1 if size is 32bit wide.
vmul_iter 0 1 3 0
vmul_iter 3 12 7 2
vmul_iter 4 9 8 1
vmul_iter 13 6 15 3
vmul_acc_iter 2 7 1 0
vmul_acc_iter 5 4 6 2
vmul_acc_iter 4 13 10 1
vmul_acc_iter 12 6 13 3

View File

@ -0,0 +1,4 @@
#name: VMUL/VMLA/VMLS by scalar reg restriction (Thumb)
#source: simd_by_scalar_low_regbank.s
#as: -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mthumb
#error-output: simd_by_scalar_low_regbank.l

View File

@ -1,3 +1,7 @@
2016-04-05 Jiong Wang <jiong.wang@arm.com>
* arm-dis.c: Support FP16 vmul, vmla, vmls (by scalar).
2016-03-31 Andrew Burgess <andrew.burgess@embecosm.com>
* arc-opc.c (arc_operands): Fix operand flags for NPS_R_DST, and

View File

@ -1543,19 +1543,25 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800040, 0xff800f50, "vmla%c.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800140, 0xff800f50, "vmla%c.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
0xf2800140, 0xff900f50, "vmla%c.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf2900140, 0xffb00f50, "vmla%c.f16\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800340, 0xff800f50, "vqdmlal%c.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800440, 0xff800f50, "vmls%c.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800540, 0xff800f50, "vmls%c.f%20-21S6\t%12-15,22D, %16-19,7D, %D"},
0xf2800540, 0xff900f50, "vmls%c.f%20-21S6\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf2900540, 0xffb00f50, "vmls%c.f16\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800740, 0xff800f50, "vqdmlsl%c.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800840, 0xff800f50, "vmul%c.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800940, 0xff800f50, "vmul%c.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
0xf2800940, 0xff900f50, "vmul%c.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf2900940, 0xffb00f50, "vmul%c.f16\t%12-15,22D, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf2800b40, 0xff800f50, "vqdmull%c.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
@ -1565,15 +1571,21 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800040, 0xff800f50, "vmla%c.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800140, 0xff800f50, "vmla%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
0xf3800140, 0xff900f50, "vmla%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf3900140, 0xffb00f50, "vmla%c.f16\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800440, 0xff800f50, "vmls%c.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800540, 0xff800f50, "vmls%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
0xf3800540, 0xff900f50, "vmls%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf3900540, 0xffb00f50, "vmls%c.f16\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800840, 0xff800f50, "vmul%c.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800940, 0xff800f50, "vmul%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
0xf3800940, 0xff900f50, "vmul%c.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (ARM_EXT2_FP16_INST),
0xf3900940, 0xffb00f50, "vmul%c.f16\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),
0xf3800c40, 0xff800f50, "vqdmulh%c.s%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
{ARM_FEATURE_COPROC (FPU_NEON_EXT_V1),