re PR target/79170 (memcmp builtin expansion sequence can overflow)

2017-01-27  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>

	PR target/79170
	* gcc.dg/memcmp-1.c: Improved to catch failures seen in PR 79170.

2017-01-27  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>

	PR target/79170
	* config/rs6000/altivec.md (*setb_internal): Rename to setb_signed.
	(setb_unsigned) New pattern for setb with CCUNS.
	* config/rs6000/rs6000.c (expand_block_compare): Use a different
	subfc./subfe sequence to avoid overflow problems.  Generate a
	shorter sequence with cmpld/setb for power9.
	* config/rs6000/rs6000.md (subf<mode>3_carry_dot2): Add a new pattern
	for generating subfc. instruction.
	(cmpstrsi): Add TARGET_POPCNTD predicate as the generate sequence
	now uses this instruction.

From-SVN: r245041
This commit is contained in:
Aaron Sawdey 2017-01-30 23:24:24 +00:00 committed by Aaron Sawdey
parent 8657c838e2
commit 3095f65123
6 changed files with 591 additions and 391 deletions

View File

@ -1,3 +1,16 @@
2017-01-30 Aaron Sawdey <acsawdey@linux.vnet.ibm.com>
PR target/79170
* config/rs6000/altivec.md (*setb_internal): Rename to setb_signed.
(setb_unsigned) New pattern for setb with CCUNS.
* config/rs6000/rs6000.c (expand_block_compare): Use a different
subfc./subfe sequence to avoid overflow problems. Generate a
shorter sequence with cmpld/setb for power9.
* config/rs6000/rs6000.md (subf<mode>3_carry_dot2): Add a new pattern
for generating subfc. instruction.
(cmpstrsi): Add TARGET_POPCNTD predicate as the generate sequence
now uses this instruction.
2017-01-30 Ian Lance Taylor <iant@google.com>
PR debug/79289

View File

@ -3838,7 +3838,7 @@
;; Otherwise, set operand 0 to 0. Note that the result stored into
;; register operand 0 is non-zero iff either the LT or GT bits are on
;; within condition register operand 1.
(define_insn "*setb_internal"
(define_insn "setb_signed"
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
(if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y")
(const_int 0))
@ -3851,6 +3851,19 @@
"setb %0,%1"
[(set_attr "type" "logical")])
(define_insn "setb_unsigned"
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
(if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y")
(const_int 0))
(const_int -1)
(if_then_else (gtu (match_dup 1)
(const_int 0))
(const_int 1)
(const_int 0))))]
"TARGET_P9_MISC"
"setb %0,%1"
[(set_attr "type" "logical")])
;; Test byte within two ranges.
;;
;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx

View File

@ -17292,7 +17292,7 @@ rs6000_init_builtins (void)
TYPE_NAME (V16QI_type_node) = tdecl;
tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
TYPE_NAME ( bool_V16QI_type_node) = tdecl;
TYPE_NAME (bool_V16QI_type_node) = tdecl;
tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
@ -19458,24 +19458,31 @@ expand_block_compare (rtx operands[])
rtx src1 = orig_src1;
rtx src2 = orig_src2;
/* If this is not a fixed size compare, just call memcmp */
/* This case is complicated to handle because the subtract
with carry instructions do not generate the 64-bit
carry and so we must emit code to calculate it ourselves.
We choose not to implement this yet. */
if (TARGET_32BIT && TARGET_POWERPC64)
return false;
/* If this is not a fixed size compare, just call memcmp. */
if (!CONST_INT_P (bytes_rtx))
return false;
/* This must be a fixed size alignment */
/* This must be a fixed size alignment. */
if (!CONST_INT_P (align_rtx))
return false;
unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
/* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
|| SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
return false;
gcc_assert (GET_MODE (target) == SImode);
/* Anything to move? */
/* Anything to move? */
unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
if (bytes == 0)
return true;
@ -19490,6 +19497,13 @@ expand_block_compare (rtx operands[])
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
/* P7/P8 code uses cond for subfc. but P9 uses
it for cmpld which needs CCUNSmode. */
rtx cond;
if (TARGET_P9_MISC)
cond = gen_reg_rtx (CCUNSmode);
else
cond = gen_reg_rtx (CCmode);
/* If we have an LE target without ldbrx and word_mode is DImode,
then we must avoid using word_mode. */
@ -19512,27 +19526,35 @@ expand_block_compare (rtx operands[])
rtx convert_label = NULL;
rtx final_label = NULL;
/* Example of generated code for 11 bytes aligned 1 byte:
.L10:
ldbrx 10,6,9
ldbrx 9,7,9
subf. 9,9,10
bne 0,.L8
addi 9,4,7
lwbrx 10,0,9
addi 9,5,7
lwbrx 9,0,9
/* Example of generated code for 18 bytes aligned 1 byte.
Compiled with -fno-reorder-blocks for clarity.
ldbrx 10,31,8
ldbrx 9,7,8
subfc. 9,9,10
bne 0,.L6487
addi 9,12,8
addi 5,11,8
ldbrx 10,0,9
ldbrx 9,0,5
subfc. 9,9,10
bne 0,.L6487
addi 9,12,16
lhbrx 10,0,9
addi 9,11,16
lhbrx 9,0,9
subf 9,9,10
b .L9
.L8: # convert_label
cntlzd 9,9
addi 9,9,-1
xori 9,9,0x3f
.L9: # final_label
b .L6488
.p2align 4,,15
.L6487: #convert_label
popcntd 9,9
subfe 10,10,10
or 9,9,10
.L6488: #final_label
extsw 10,9
We start off with DImode and have a compare/branch to something
with a smaller mode then we will need a block with the DI->SI conversion
that may or may not be executed. */
We start off with DImode for two blocks that jump to the DI->SI conversion
if the difference is found there, then a final block of HImode that skips
the DI->SI conversion. */
while (bytes > 0)
{
@ -19600,26 +19622,18 @@ expand_block_compare (rtx operands[])
}
}
/* We previously did a block that need 64->32 conversion but
the current block does not, so a label is needed to jump
to the end. */
if (generate_6432_conversion && !final_label
&& GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
final_label = gen_label_rtx ();
/* Do we need a 64->32 conversion block? */
int remain = bytes - cmp_bytes;
if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
{
generate_6432_conversion = true;
if (remain > 0 && !convert_label)
convert_label = gen_label_rtx ();
}
if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
{
/* Target is larger than load size so we don't need to
reduce result size. */
/* We previously did a block that need 64->32 conversion but
the current block does not, so a label is needed to jump
to the end. */
if (generate_6432_conversion && !final_label)
final_label = gen_label_rtx ();
if (remain > 0)
{
/* This is not the last block, branch to the end if the result
@ -19627,11 +19641,12 @@ expand_block_compare (rtx operands[])
if (!final_label)
final_label = gen_label_rtx ();
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
rtx cond = gen_reg_rtx (CCmode);
rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
rtx cr = gen_reg_rtx (CCmode);
rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
emit_insn (gen_movsi (target,
gen_lowpart (SImode, tmp_reg_src2)));
rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
fin_ref, pc_rtx);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
@ -19662,7 +19677,11 @@ expand_block_compare (rtx operands[])
}
else
{
/* Do we need a 64->32 conversion block? We need the 64->32
conversion even if target size == load_mode size because
the subtract generates one extra bit. */
generate_6432_conversion = true;
if (remain > 0)
{
if (!convert_label)
@ -19670,9 +19689,22 @@ expand_block_compare (rtx operands[])
/* Compare to zero and branch to convert_label if not zero. */
rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
rtx cond = gen_reg_rtx (CCmode);
rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
tmp_reg_src2);
emit_insn (gen_rtx_SET (cond, cmp));
}
else
/* Generate a subfc. and use the longer
sequence for conversion. */
if (TARGET_64BIT)
emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
tmp_reg_src1, cond));
else
emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
tmp_reg_src1, cond));
rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
cvt_ref, pc_rtx);
@ -19682,10 +19714,21 @@ expand_block_compare (rtx operands[])
}
else
{
/* Just do the subtract. Since this is the last block the
convert code will be generated immediately following. */
emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
tmp_reg_src2));
/* Just do the subtract/compare. Since this is the last block
the convert code will be generated immediately following. */
if (TARGET_P9_MISC)
{
rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
tmp_reg_src2);
emit_insn (gen_rtx_SET (cond, cmp));
}
else
if (TARGET_64BIT)
emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
tmp_reg_src1));
else
emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
tmp_reg_src1));
}
}
@ -19699,12 +19742,46 @@ expand_block_compare (rtx operands[])
emit_label (convert_label);
/* We need to produce DI result from sub, then convert to target SI
while maintaining <0 / ==0 / >0 properties.
Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
while maintaining <0 / ==0 / >0 properties. This sequence works:
subfc L,A,B
subfe H,H,H
popcntd L,L
rldimi L,H,6,0
This is an alternate one Segher cooked up if somebody
wants to expand this for something that doesn't have popcntd:
subfc L,a,b
subfe H,x,x
addic t,L,-1
subfe v,t,L
or z,v,H
And finally, p9 can just do this:
cmpld A,B
setb r */
if (TARGET_P9_MISC)
{
emit_insn (gen_setb_unsigned (target, cond));
}
else
{
if (TARGET_64BIT)
{
rtx tmp_reg_ca = gen_reg_rtx (DImode);
emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
}
else
{
rtx tmp_reg_ca = gen_reg_rtx (SImode);
emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
}
}
}
if (final_label)
@ -21246,7 +21323,7 @@ register_to_reg_type (rtx reg, bool *is_altivec)
regno = true_regnum (reg);
if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
return PSEUDO_REG_TYPE;
}
}
gcc_assert (regno >= 0);

View File

@ -2068,6 +2068,35 @@
"subfic %0,%1,%2"
[(set_attr "type" "add")])
(define_insn_and_split "subf<mode>3_carry_dot2"
[(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
(compare:CC (minus:P (match_operand:P 2 "gpc_reg_operand" "r,r")
(match_operand:P 1 "gpc_reg_operand" "r,r"))
(const_int 0)))
(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
(minus:P (match_dup 2)
(match_dup 1)))
(set (reg:P CA_REGNO)
(leu:P (match_dup 1)
(match_dup 2)))]
"<MODE>mode == Pmode"
"@
subfc. %0,%1,%2
#"
"&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
[(parallel [(set (match_dup 0)
(minus:P (match_dup 2)
(match_dup 1)))
(set (reg:P CA_REGNO)
(leu:P (match_dup 1)
(match_dup 2)))])
(set (match_dup 3)
(compare:CC (match_dup 0)
(const_int 0)))]
""
[(set_attr "type" "add")
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
(define_insn "subf<mode>3_carry"
[(set (match_operand:P 0 "gpc_reg_operand" "=r")
@ -9146,11 +9175,11 @@
(match_operand:BLK 2)))
(use (match_operand:SI 3))
(use (match_operand:SI 4))])]
""
"TARGET_POPCNTD"
{
if (expand_block_compare (operands))
DONE;
else
else
FAIL;
})

View File

@ -1,3 +1,8 @@
2017-01-30 Aaron Sawdey <acsawdey@linux.vnet.ibm.com>
PR target/79170
* gcc.dg/memcmp-1.c: Improved to catch failures seen in PR 79170.
2017-01-30 Martin Sebor <msebor@redhat.com>
PR testsuite/79293

View File

@ -1,58 +1,124 @@
/* Test memcmp builtin expansion for compilation and proper execution. */
/* Test memcmp/strncmp builtin expansion for compilation and proper execution. */
/* { dg-do run } */
/* { dg-options "-O2" } */
/* { dg-require-effective-target ptr32plus } */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define RUN_TEST(SZ, ALIGN) test_memcmp_ ## SZ ## _ ## ALIGN ()
int lib_memcmp(const void *a, const void *b, size_t n) asm("memcmp");
int lib_strncmp(const char *a, const char *b, size_t n) asm("strncmp");
#define DEF_TEST(SZ, ALIGN) \
static void test_memcmp_ ## SZ ## _ ## ALIGN (void) { \
char one[3 * (SZ > 10 ? SZ : 10)]; \
char two[3 * (SZ > 10 ? SZ : 10)]; \
int i,j; \
for (i = 0 ; i < SZ ; i++) \
{ \
int r1; \
char *a = one + (i & 1) * ALIGN; \
char *b = two + (i & 1) * ALIGN; \
memset (a, '-', SZ); \
memset (b, '-', SZ); \
a[i] = '1'; \
b[i] = '2'; \
a[SZ] = 0; \
b[SZ] = 0; \
if (!((r1 = memcmp (b, a, SZ)) > 0)) \
{ \
abort (); \
} \
if (!((r1 = memcmp (a, b, SZ)) < 0)) \
{ \
abort (); \
} \
b[i] = '1'; \
if (!((r1 = memcmp (a, b, SZ)) == 0)) \
{ \
abort (); \
} \
for(j = i; j < SZ ; j++) \
{ \
a[j] = '1'; \
b[j] = '2'; \
} \
if (!((r1 = memcmp (b, a, SZ)) > 0)) \
{ \
abort (); \
} \
if (!((r1 = memcmp (a, b, SZ)) < 0)) \
{ \
abort (); \
} \
} \
}
#ifndef NRAND
#define NRAND 10000
#endif
#define MAX_SZ 200
static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, int),
void (test_strncmp)(const char *, const char *, int),
size_t sz, int align)
{
char buf1[MAX_SZ*2+10],buf2[MAX_SZ*2+10];
size_t test_sz = (sz<MAX_SZ)?sz:MAX_SZ;
size_t diff_pos, zero_pos;
uint32_t e;
int i,j,l;
for(l=0;l<sz;l++) {
for(i=0;i<NRAND/sz;i++) {
for(j=0;j<l;j++) {
buf1[j] = random() & 0xff;
buf2[j] = buf1[j];
}
for(j=l;j<sz;j++) {
buf1[j] = random() & 0xff;
buf2[j] = random() & 0xff;
}
}
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
}
for(diff_pos = ((test_sz>10)?(test_sz-10):0); diff_pos < test_sz+10; diff_pos++)
for(zero_pos = ((test_sz>10)?(test_sz-10):0); zero_pos < test_sz+10; zero_pos++)
{
memset(buf1, 'A', 2*test_sz);
memset(buf2, 'A', 2*test_sz);
buf2[diff_pos] = 'B';
buf1[zero_pos] = 0;
buf2[zero_pos] = 0;
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
(*test_memcmp)(buf2,buf1,-e);
(*test_memcmp)(buf2,buf2,0);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
(*test_strncmp)(buf2,buf1,-e);
(*test_strncmp)(buf2,buf2,0);
/* differing length: */
buf2[diff_pos] = 0;
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
memset(buf2+diff_pos,'B',sizeof(buf2)-diff_pos);
buf2[zero_pos] = 0;
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
(*test_memcmp)(buf2,buf1,-e);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
(*test_strncmp)(buf2,buf1,-e);
}
}
#define RUN_TEST(SZ, ALIGN) test_driver_memcmp (test_memcmp_ ## SZ ## _ ## ALIGN, test_strncmp_ ## SZ ## _ ## ALIGN, SZ, ALIGN);
#define DEF_TEST(SZ, ALIGN) \
static void test_memcmp_ ## SZ ## _ ## ALIGN (const char *str1, const char *str2, int expect) \
{ \
char three[8192] __attribute__ ((aligned (4096))); \
char four[8192] __attribute__ ((aligned (4096))); \
char *a, *b; \
int i,j,r; \
for (j = 0; j < 2; j++) \
{ \
for (i = 0; i < 2; i++) \
{ \
a = three+i*ALIGN+j*(4096-2*i*ALIGN); \
b = four+i*ALIGN+j*(4096-2*i*ALIGN); \
memcpy(a,str1,SZ); \
memcpy(b,str2,SZ); \
r = memcmp(a,b,SZ); \
if ( r < 0 && !(expect < 0) ) abort(); \
if ( r > 0 && !(expect > 0) ) abort(); \
if ( r == 0 && !(expect == 0) ) abort(); \
} \
} \
} \
static void test_strncmp_ ## SZ ## _ ## ALIGN (const char *str1, const char *str2, int expect) \
{ \
char three[8192] __attribute__ ((aligned (4096))); \
char four[8192] __attribute__ ((aligned (4096))); \
char *a, *b; \
int i,j,r; \
for (j = 0; j < 2; j++) \
{ \
for (i = 0; i < 2; i++) \
{ \
a = three+i*ALIGN+j*(4096-2*i*ALIGN); \
b = four+i*ALIGN+j*(4096-2*i*ALIGN); \
strcpy(a,str1); \
strcpy(b,str2); \
r = strncmp(a,b,SZ); \
if ( r < 0 && !(expect < 0) ) abort(); \
if ( r > 0 && !(expect > 0) ) abort(); \
if ( r == 0 && !(expect == 0) ) abort(); \
} \
} \
}
#ifdef TEST_ALL
DEF_TEST(1,1)
@ -300,305 +366,302 @@ DEF_TEST(49,2)
DEF_TEST(49,4)
DEF_TEST(49,8)
DEF_TEST(49,16)
DEF_TEST(100,1)
DEF_TEST(100,2)
DEF_TEST(100,4)
DEF_TEST(100,8)
DEF_TEST(100,16)
#else
DEF_TEST(3,1)
DEF_TEST(4,1)
DEF_TEST(4,2)
DEF_TEST(4,4)
DEF_TEST(5,1)
DEF_TEST(5,8)
DEF_TEST(6,1)
DEF_TEST(6,4)
DEF_TEST(6,8)
DEF_TEST(7,1)
DEF_TEST(7,2)
DEF_TEST(7,4)
DEF_TEST(7,8)
DEF_TEST(8,1)
DEF_TEST(8,2)
DEF_TEST(8,4)
DEF_TEST(8,8)
DEF_TEST(9,1)
DEF_TEST(16,1)
DEF_TEST(16,2)
DEF_TEST(16,4)
DEF_TEST(16,8)
DEF_TEST(16,16)
DEF_TEST(32,1)
DEF_TEST(32,2)
DEF_TEST(32,4)
DEF_TEST(32,8)
DEF_TEST(32,16)
DEF_TEST(100,1)
DEF_TEST(100,8)
#endif
int
main(int argc, char **argv)
{
#ifdef TEST_ALL
RUN_TEST(1,1);
RUN_TEST(1,2);
RUN_TEST(1,4);
RUN_TEST(1,8);
RUN_TEST(1,16);
RUN_TEST(2,1);
RUN_TEST(2,2);
RUN_TEST(2,4);
RUN_TEST(2,8);
RUN_TEST(2,16);
RUN_TEST(3,1);
RUN_TEST(3,2);
RUN_TEST(3,4);
RUN_TEST(3,8);
RUN_TEST(3,16);
RUN_TEST(4,1);
RUN_TEST(4,2);
RUN_TEST(4,4);
RUN_TEST(4,8);
RUN_TEST(4,16);
RUN_TEST(5,1);
RUN_TEST(5,2);
RUN_TEST(5,4);
RUN_TEST(5,8);
RUN_TEST(5,16);
RUN_TEST(6,1);
RUN_TEST(6,2);
RUN_TEST(6,4);
RUN_TEST(6,8);
RUN_TEST(6,16);
RUN_TEST(7,1);
RUN_TEST(7,2);
RUN_TEST(7,4);
RUN_TEST(7,8);
RUN_TEST(7,16);
RUN_TEST(8,1);
RUN_TEST(8,2);
RUN_TEST(8,4);
RUN_TEST(8,8);
RUN_TEST(8,16);
RUN_TEST(9,1);
RUN_TEST(9,2);
RUN_TEST(9,4);
RUN_TEST(9,8);
RUN_TEST(9,16);
RUN_TEST(10,1);
RUN_TEST(10,2);
RUN_TEST(10,4);
RUN_TEST(10,8);
RUN_TEST(10,16);
RUN_TEST(11,1);
RUN_TEST(11,2);
RUN_TEST(11,4);
RUN_TEST(11,8);
RUN_TEST(11,16);
RUN_TEST(12,1);
RUN_TEST(12,2);
RUN_TEST(12,4);
RUN_TEST(12,8);
RUN_TEST(12,16);
RUN_TEST(13,1);
RUN_TEST(13,2);
RUN_TEST(13,4);
RUN_TEST(13,8);
RUN_TEST(13,16);
RUN_TEST(14,1);
RUN_TEST(14,2);
RUN_TEST(14,4);
RUN_TEST(14,8);
RUN_TEST(14,16);
RUN_TEST(15,1);
RUN_TEST(15,2);
RUN_TEST(15,4);
RUN_TEST(15,8);
RUN_TEST(15,16);
RUN_TEST(16,1);
RUN_TEST(16,2);
RUN_TEST(16,4);
RUN_TEST(16,8);
RUN_TEST(16,16);
RUN_TEST(17,1);
RUN_TEST(17,2);
RUN_TEST(17,4);
RUN_TEST(17,8);
RUN_TEST(17,16);
RUN_TEST(18,1);
RUN_TEST(18,2);
RUN_TEST(18,4);
RUN_TEST(18,8);
RUN_TEST(18,16);
RUN_TEST(19,1);
RUN_TEST(19,2);
RUN_TEST(19,4);
RUN_TEST(19,8);
RUN_TEST(19,16);
RUN_TEST(20,1);
RUN_TEST(20,2);
RUN_TEST(20,4);
RUN_TEST(20,8);
RUN_TEST(20,16);
RUN_TEST(21,1);
RUN_TEST(21,2);
RUN_TEST(21,4);
RUN_TEST(21,8);
RUN_TEST(21,16);
RUN_TEST(22,1);
RUN_TEST(22,2);
RUN_TEST(22,4);
RUN_TEST(22,8);
RUN_TEST(22,16);
RUN_TEST(23,1);
RUN_TEST(23,2);
RUN_TEST(23,4);
RUN_TEST(23,8);
RUN_TEST(23,16);
RUN_TEST(24,1);
RUN_TEST(24,2);
RUN_TEST(24,4);
RUN_TEST(24,8);
RUN_TEST(24,16);
RUN_TEST(25,1);
RUN_TEST(25,2);
RUN_TEST(25,4);
RUN_TEST(25,8);
RUN_TEST(25,16);
RUN_TEST(26,1);
RUN_TEST(26,2);
RUN_TEST(26,4);
RUN_TEST(26,8);
RUN_TEST(26,16);
RUN_TEST(27,1);
RUN_TEST(27,2);
RUN_TEST(27,4);
RUN_TEST(27,8);
RUN_TEST(27,16);
RUN_TEST(28,1);
RUN_TEST(28,2);
RUN_TEST(28,4);
RUN_TEST(28,8);
RUN_TEST(28,16);
RUN_TEST(29,1);
RUN_TEST(29,2);
RUN_TEST(29,4);
RUN_TEST(29,8);
RUN_TEST(29,16);
RUN_TEST(30,1);
RUN_TEST(30,2);
RUN_TEST(30,4);
RUN_TEST(30,8);
RUN_TEST(30,16);
RUN_TEST(31,1);
RUN_TEST(31,2);
RUN_TEST(31,4);
RUN_TEST(31,8);
RUN_TEST(31,16);
RUN_TEST(32,1);
RUN_TEST(32,2);
RUN_TEST(32,4);
RUN_TEST(32,8);
RUN_TEST(32,16);
RUN_TEST(33,1);
RUN_TEST(33,2);
RUN_TEST(33,4);
RUN_TEST(33,8);
RUN_TEST(33,16);
RUN_TEST(34,1);
RUN_TEST(34,2);
RUN_TEST(34,4);
RUN_TEST(34,8);
RUN_TEST(34,16);
RUN_TEST(35,1);
RUN_TEST(35,2);
RUN_TEST(35,4);
RUN_TEST(35,8);
RUN_TEST(35,16);
RUN_TEST(36,1);
RUN_TEST(36,2);
RUN_TEST(36,4);
RUN_TEST(36,8);
RUN_TEST(36,16);
RUN_TEST(37,1);
RUN_TEST(37,2);
RUN_TEST(37,4);
RUN_TEST(37,8);
RUN_TEST(37,16);
RUN_TEST(38,1);
RUN_TEST(38,2);
RUN_TEST(38,4);
RUN_TEST(38,8);
RUN_TEST(38,16);
RUN_TEST(39,1);
RUN_TEST(39,2);
RUN_TEST(39,4);
RUN_TEST(39,8);
RUN_TEST(39,16);
RUN_TEST(40,1);
RUN_TEST(40,2);
RUN_TEST(40,4);
RUN_TEST(40,8);
RUN_TEST(40,16);
RUN_TEST(41,1);
RUN_TEST(41,2);
RUN_TEST(41,4);
RUN_TEST(41,8);
RUN_TEST(41,16);
RUN_TEST(42,1);
RUN_TEST(42,2);
RUN_TEST(42,4);
RUN_TEST(42,8);
RUN_TEST(42,16);
RUN_TEST(43,1);
RUN_TEST(43,2);
RUN_TEST(43,4);
RUN_TEST(43,8);
RUN_TEST(43,16);
RUN_TEST(44,1);
RUN_TEST(44,2);
RUN_TEST(44,4);
RUN_TEST(44,8);
RUN_TEST(44,16);
RUN_TEST(45,1);
RUN_TEST(45,2);
RUN_TEST(45,4);
RUN_TEST(45,8);
RUN_TEST(45,16);
RUN_TEST(46,1);
RUN_TEST(46,2);
RUN_TEST(46,4);
RUN_TEST(46,8);
RUN_TEST(46,16);
RUN_TEST(47,1);
RUN_TEST(47,2);
RUN_TEST(47,4);
RUN_TEST(47,8);
RUN_TEST(47,16);
RUN_TEST(48,1);
RUN_TEST(48,2);
RUN_TEST(48,4);
RUN_TEST(48,8);
RUN_TEST(48,16);
RUN_TEST(49,1);
RUN_TEST(49,2);
RUN_TEST(49,4);
RUN_TEST(49,8);
RUN_TEST(49,16);
RUN_TEST(1,1)
RUN_TEST(1,2)
RUN_TEST(1,4)
RUN_TEST(1,8)
RUN_TEST(1,16)
RUN_TEST(2,1)
RUN_TEST(2,2)
RUN_TEST(2,4)
RUN_TEST(2,8)
RUN_TEST(2,16)
RUN_TEST(3,1)
RUN_TEST(3,2)
RUN_TEST(3,4)
RUN_TEST(3,8)
RUN_TEST(3,16)
RUN_TEST(4,1)
RUN_TEST(4,2)
RUN_TEST(4,4)
RUN_TEST(4,8)
RUN_TEST(4,16)
RUN_TEST(5,1)
RUN_TEST(5,2)
RUN_TEST(5,4)
RUN_TEST(5,8)
RUN_TEST(5,16)
RUN_TEST(6,1)
RUN_TEST(6,2)
RUN_TEST(6,4)
RUN_TEST(6,8)
RUN_TEST(6,16)
RUN_TEST(7,1)
RUN_TEST(7,2)
RUN_TEST(7,4)
RUN_TEST(7,8)
RUN_TEST(7,16)
RUN_TEST(8,1)
RUN_TEST(8,2)
RUN_TEST(8,4)
RUN_TEST(8,8)
RUN_TEST(8,16)
RUN_TEST(9,1)
RUN_TEST(9,2)
RUN_TEST(9,4)
RUN_TEST(9,8)
RUN_TEST(9,16)
RUN_TEST(10,1)
RUN_TEST(10,2)
RUN_TEST(10,4)
RUN_TEST(10,8)
RUN_TEST(10,16)
RUN_TEST(11,1)
RUN_TEST(11,2)
RUN_TEST(11,4)
RUN_TEST(11,8)
RUN_TEST(11,16)
RUN_TEST(12,1)
RUN_TEST(12,2)
RUN_TEST(12,4)
RUN_TEST(12,8)
RUN_TEST(12,16)
RUN_TEST(13,1)
RUN_TEST(13,2)
RUN_TEST(13,4)
RUN_TEST(13,8)
RUN_TEST(13,16)
RUN_TEST(14,1)
RUN_TEST(14,2)
RUN_TEST(14,4)
RUN_TEST(14,8)
RUN_TEST(14,16)
RUN_TEST(15,1)
RUN_TEST(15,2)
RUN_TEST(15,4)
RUN_TEST(15,8)
RUN_TEST(15,16)
RUN_TEST(16,1)
RUN_TEST(16,2)
RUN_TEST(16,4)
RUN_TEST(16,8)
RUN_TEST(16,16)
RUN_TEST(17,1)
RUN_TEST(17,2)
RUN_TEST(17,4)
RUN_TEST(17,8)
RUN_TEST(17,16)
RUN_TEST(18,1)
RUN_TEST(18,2)
RUN_TEST(18,4)
RUN_TEST(18,8)
RUN_TEST(18,16)
RUN_TEST(19,1)
RUN_TEST(19,2)
RUN_TEST(19,4)
RUN_TEST(19,8)
RUN_TEST(19,16)
RUN_TEST(20,1)
RUN_TEST(20,2)
RUN_TEST(20,4)
RUN_TEST(20,8)
RUN_TEST(20,16)
RUN_TEST(21,1)
RUN_TEST(21,2)
RUN_TEST(21,4)
RUN_TEST(21,8)
RUN_TEST(21,16)
RUN_TEST(22,1)
RUN_TEST(22,2)
RUN_TEST(22,4)
RUN_TEST(22,8)
RUN_TEST(22,16)
RUN_TEST(23,1)
RUN_TEST(23,2)
RUN_TEST(23,4)
RUN_TEST(23,8)
RUN_TEST(23,16)
RUN_TEST(24,1)
RUN_TEST(24,2)
RUN_TEST(24,4)
RUN_TEST(24,8)
RUN_TEST(24,16)
RUN_TEST(25,1)
RUN_TEST(25,2)
RUN_TEST(25,4)
RUN_TEST(25,8)
RUN_TEST(25,16)
RUN_TEST(26,1)
RUN_TEST(26,2)
RUN_TEST(26,4)
RUN_TEST(26,8)
RUN_TEST(26,16)
RUN_TEST(27,1)
RUN_TEST(27,2)
RUN_TEST(27,4)
RUN_TEST(27,8)
RUN_TEST(27,16)
RUN_TEST(28,1)
RUN_TEST(28,2)
RUN_TEST(28,4)
RUN_TEST(28,8)
RUN_TEST(28,16)
RUN_TEST(29,1)
RUN_TEST(29,2)
RUN_TEST(29,4)
RUN_TEST(29,8)
RUN_TEST(29,16)
RUN_TEST(30,1)
RUN_TEST(30,2)
RUN_TEST(30,4)
RUN_TEST(30,8)
RUN_TEST(30,16)
RUN_TEST(31,1)
RUN_TEST(31,2)
RUN_TEST(31,4)
RUN_TEST(31,8)
RUN_TEST(31,16)
RUN_TEST(32,1)
RUN_TEST(32,2)
RUN_TEST(32,4)
RUN_TEST(32,8)
RUN_TEST(32,16)
RUN_TEST(33,1)
RUN_TEST(33,2)
RUN_TEST(33,4)
RUN_TEST(33,8)
RUN_TEST(33,16)
RUN_TEST(34,1)
RUN_TEST(34,2)
RUN_TEST(34,4)
RUN_TEST(34,8)
RUN_TEST(34,16)
RUN_TEST(35,1)
RUN_TEST(35,2)
RUN_TEST(35,4)
RUN_TEST(35,8)
RUN_TEST(35,16)
RUN_TEST(36,1)
RUN_TEST(36,2)
RUN_TEST(36,4)
RUN_TEST(36,8)
RUN_TEST(36,16)
RUN_TEST(37,1)
RUN_TEST(37,2)
RUN_TEST(37,4)
RUN_TEST(37,8)
RUN_TEST(37,16)
RUN_TEST(38,1)
RUN_TEST(38,2)
RUN_TEST(38,4)
RUN_TEST(38,8)
RUN_TEST(38,16)
RUN_TEST(39,1)
RUN_TEST(39,2)
RUN_TEST(39,4)
RUN_TEST(39,8)
RUN_TEST(39,16)
RUN_TEST(40,1)
RUN_TEST(40,2)
RUN_TEST(40,4)
RUN_TEST(40,8)
RUN_TEST(40,16)
RUN_TEST(41,1)
RUN_TEST(41,2)
RUN_TEST(41,4)
RUN_TEST(41,8)
RUN_TEST(41,16)
RUN_TEST(42,1)
RUN_TEST(42,2)
RUN_TEST(42,4)
RUN_TEST(42,8)
RUN_TEST(42,16)
RUN_TEST(43,1)
RUN_TEST(43,2)
RUN_TEST(43,4)
RUN_TEST(43,8)
RUN_TEST(43,16)
RUN_TEST(44,1)
RUN_TEST(44,2)
RUN_TEST(44,4)
RUN_TEST(44,8)
RUN_TEST(44,16)
RUN_TEST(45,1)
RUN_TEST(45,2)
RUN_TEST(45,4)
RUN_TEST(45,8)
RUN_TEST(45,16)
RUN_TEST(46,1)
RUN_TEST(46,2)
RUN_TEST(46,4)
RUN_TEST(46,8)
RUN_TEST(46,16)
RUN_TEST(47,1)
RUN_TEST(47,2)
RUN_TEST(47,4)
RUN_TEST(47,8)
RUN_TEST(47,16)
RUN_TEST(48,1)
RUN_TEST(48,2)
RUN_TEST(48,4)
RUN_TEST(48,8)
RUN_TEST(48,16)
RUN_TEST(49,1)
RUN_TEST(49,2)
RUN_TEST(49,4)
RUN_TEST(49,8)
RUN_TEST(49,16)
RUN_TEST(100,1)
RUN_TEST(100,2)
RUN_TEST(100,4)
RUN_TEST(100,8)
RUN_TEST(100,16)
#else
RUN_TEST(3,1);
RUN_TEST(4,1);
RUN_TEST(4,2);
RUN_TEST(4,4);
RUN_TEST(5,1);
RUN_TEST(6,1);
RUN_TEST(7,1);
RUN_TEST(8,1);
RUN_TEST(8,2);
RUN_TEST(8,4);
RUN_TEST(8,8);
RUN_TEST(9,1);
RUN_TEST(16,1);
RUN_TEST(16,2);
RUN_TEST(16,4);
RUN_TEST(16,8);
RUN_TEST(16,16);
RUN_TEST(32,1);
RUN_TEST(32,2);
RUN_TEST(32,4);
RUN_TEST(32,8);
RUN_TEST(32,16);
RUN_TEST(3,1)
RUN_TEST(4,1)
RUN_TEST(5,1)
RUN_TEST(5,8)
RUN_TEST(6,1)
RUN_TEST(6,4)
RUN_TEST(6,8)
RUN_TEST(7,1)
RUN_TEST(7,2)
RUN_TEST(7,4)
RUN_TEST(7,8)
RUN_TEST(8,1)
RUN_TEST(9,1)
RUN_TEST(16,1)
RUN_TEST(32,1)
RUN_TEST(100,1)
RUN_TEST(100,8)
#endif
return 0;
}