x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]

This code:

L(between_2_3):
	/* Load as big endian with overlapping loads and bswap to avoid
	   branches.  */
	movzwl	-2(%rdi, %rdx), %eax
	movzwl	-2(%rsi, %rdx), %ecx
	shll	$16, %eax
	shll	$16, %ecx
	movzwl	(%rdi), %edi
	movzwl	(%rsi), %esi
	orl	%edi, %eax
	orl	%esi, %ecx
	bswap	%eax
	bswap	%ecx
	subl	%ecx, %eax
	ret

needs a saturating subtract because the full register is used.
With this commit, only the lower 24 bits of the register are used,
so a regular subtraction suffices.

The test case change adds coverage for these kinds of bugs.
This commit is contained in:
Florian Weimer 2017-06-23 17:23:44 +02:00
parent 7fa1d9462b
commit 3ec7c02cc3
3 changed files with 23 additions and 15 deletions

View File

@ -1,3 +1,10 @@
2017-06-23 Florian Weimer <fweimer@redhat.com>
[BZ #21662]
* sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S (between_2_3):
Use only 24 bits of the register before the subtraction.
* string/test-memcmp.c (check1): Check with different lengths.
2017-06-23 Gabriel F. T. Gomes <gftg@linux.vnet.ibm.com>
* sysdeps/ieee754/float128/Makefile (CFLAGS-strfromf128.c): Add

View File

@ -441,11 +441,12 @@ check1 (void)
n = 116;
for (size_t i = 0; i < n; i++)
{
exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
FOR_EACH_IMPL (impl, 0)
check_result (impl, s1 + i, s2 + i, n - i, exp_result);
}
for (size_t len = 0; len <= n - i; ++len)
{
exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, len);
FOR_EACH_IMPL (impl, 0)
check_result (impl, s1 + i, s2 + i, len, exp_result);
}
}
/* This test checks that memcmp doesn't overrun buffers. */

View File

@ -137,18 +137,18 @@ L(exit):
.p2align 4
L(between_2_3):
/* Load as big endian with overlapping loads and bswap to avoid
branches. */
movzwl -2(%rdi, %rdx), %eax
movzwl -2(%rsi, %rdx), %ecx
shll $16, %eax
shll $16, %ecx
movzwl (%rdi), %edi
movzwl (%rsi), %esi
orl %edi, %eax
orl %esi, %ecx
/* Load as big endian to avoid branches. */
movzwl (%rdi), %eax
movzwl (%rsi), %ecx
shll $8, %eax
shll $8, %ecx
bswap %eax
bswap %ecx
movzbl -1(%rdi, %rdx), %edi
movzbl -1(%rsi, %rdx), %esi
orl %edi, %eax
orl %esi, %ecx
/* Subtraction is okay because the upper 8 bits a zero. */
subl %ecx, %eax
ret