arc: Update ctz/clz patterns

ARCv2 ISA introduces special clz/ctz instructions. This patch is
adding support for them when available.

Corner case:
mov            r0,0x0 : (w0) r0 <= 0x00000000 *
ffs            r1,r0 : (w0) r1 <= 0x0000001f *
fls            r2,r0 : (w0) r2 <= 0x00000000 *

gcc/
2021-05-10  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.h (CLZ_DEFINED_VALUE_AT_ZERO): Define.
	(CTZ_DEFINED_VALUE_AT_ZERO): Likewise.
	* config/arc/arc.md (clrsbsi2): Cleanup pattern.
	(norm_f): Likewise.
	(ffs): Likewise.
	(ffs_f): Likewise.
	(clzsi2): Use fls instruction when available.
	(arc_clzsi2): Likewise.

Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
This commit is contained in:
Claudiu Zissulescu 2021-05-10 09:03:41 +03:00
parent 3b9eb2f8a8
commit b70c7c06aa
2 changed files with 36 additions and 23 deletions

View File

@ -1445,6 +1445,12 @@ do { \
*/ */
#define SHIFT_COUNT_TRUNCATED 1 #define SHIFT_COUNT_TRUNCATED 1
/* Defines if the CLZ result is undefined or has a useful value. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 31, 2)
/* Defines if the CTZ result is undefined or has a useful value. */
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 31, 2)
/* We assume that the store-condition-codes instructions store 0 for false /* We assume that the store-condition-codes instructions store 0 for false
and some other value for true. This is the value stored for true. */ and some other value for true. This is the value stored for true. */
#define STORE_FLAG_VALUE 1 #define STORE_FLAG_VALUE 1

View File

@ -4396,24 +4396,20 @@ core_3, archs4x, archs4xd, archs4xd_slow"
;; Instructions generated through builtins ;; Instructions generated through builtins
(define_insn "clrsbsi2" (define_insn "clrsbsi2"
[(set (match_operand:SI 0 "dest_reg_operand" "=w,w") [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))] (clrsb:SI (match_operand:SI 1 "general_operand" "rL,Cal")))]
"TARGET_NORM" "TARGET_NORM"
"@ "norm\\t%0,%1"
norm \t%0, %1
norm \t%0, %1"
[(set_attr "length" "4,8") [(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core,two_cycle_core")]) (set_attr "type" "two_cycle_core,two_cycle_core")])
(define_insn "norm_f" (define_insn "norm_f"
[(set (match_operand:SI 0 "dest_reg_operand" "=w,w") [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
(clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal"))) (clrsb:SI (match_operand:SI 1 "general_operand" "rL,Cal")))
(set (reg:CC_ZN CC_REG) (set (reg:CC_ZN CC_REG)
(compare:CC_ZN (match_dup 1) (const_int 0)))] (compare:CC_ZN (match_dup 1) (const_int 0)))]
"TARGET_NORM" "TARGET_NORM"
"@ "norm.f\\t%0,%1"
norm.f\t%0, %1
norm.f\t%0, %1"
[(set_attr "length" "4,8") [(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core,two_cycle_core")]) (set_attr "type" "two_cycle_core,two_cycle_core")])
@ -4443,7 +4439,17 @@ core_3, archs4x, archs4xd, archs4xd_slow"
(clz:SI (match_operand:SI 1 "register_operand" ""))) (clz:SI (match_operand:SI 1 "register_operand" "")))
(clobber (match_dup 2))])] (clobber (match_dup 2))])]
"TARGET_NORM" "TARGET_NORM"
"operands[2] = gen_rtx_REG (CC_ZNmode, CC_REG);") "
if (TARGET_V2)
{
/* ARCv2's FLS is a bit more optimal than using norm. */
rtx tmp = gen_reg_rtx (SImode);
emit_insn (gen_fls (tmp, operands[1]));
emit_insn (gen_subsi3 (operands[0], GEN_INT (31), tmp));
DONE;
}
operands[2] = gen_rtx_REG (CC_ZNmode, CC_REG);
")
(define_insn_and_split "*arc_clzsi2" (define_insn_and_split "*arc_clzsi2"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r")
@ -4475,8 +4481,13 @@ core_3, archs4x, archs4xd, archs4xd_slow"
(match_operand:SI 1 "register_operand" "")] (match_operand:SI 1 "register_operand" "")]
"TARGET_NORM" "TARGET_NORM"
" "
emit_insn (gen_arc_ctzsi2 (operands[0], operands[1])); if (TARGET_V2)
DONE; {
emit_insn (gen_ffs (operands[0], operands[1]));
DONE;
}
emit_insn (gen_arc_ctzsi2 (operands[0], operands[1]));
DONE;
") ")
(define_insn_and_split "arc_ctzsi2" (define_insn_and_split "arc_ctzsi2"
@ -5575,26 +5586,22 @@ core_3, archs4x, archs4xd, archs4xd_slow"
(set_attr "type" "misc")]) (set_attr "type" "misc")])
(define_insn "ffs" (define_insn "ffs"
[(set (match_operand:SI 0 "dest_reg_operand" "=w,w") [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
(unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")] (unspec:SI [(match_operand:SI 1 "general_operand" "rL,Cal")]
UNSPEC_ARC_FFS))] UNSPEC_ARC_FFS))]
"TARGET_NORM && TARGET_V2" "TARGET_NORM && TARGET_V2"
"@ "ffs\\t%0,%1"
ffs \t%0, %1
ffs \t%0, %1"
[(set_attr "length" "4,8") [(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core,two_cycle_core")]) (set_attr "type" "two_cycle_core,two_cycle_core")])
(define_insn "ffs_f" (define_insn "ffs_f"
[(set (match_operand:SI 0 "dest_reg_operand" "=w,w") [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
(unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")] (unspec:SI [(match_operand:SI 1 "general_operand" "rL,Cal")]
UNSPEC_ARC_FFS)) UNSPEC_ARC_FFS))
(set (reg:CC_ZN CC_REG) (set (reg:CC_ZN CC_REG)
(compare:CC_ZN (match_dup 1) (const_int 0)))] (compare:CC_ZN (match_dup 1) (const_int 0)))]
"TARGET_NORM && TARGET_V2" "TARGET_NORM && TARGET_V2"
"@ "ffs.f\\t%0,%1"
ffs.f\t%0, %1
ffs.f\t%0, %1"
[(set_attr "length" "4,8") [(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core,two_cycle_core")]) (set_attr "type" "two_cycle_core,two_cycle_core")])