Add SSSE3-optimized implementation of str{,n}cmp for x86-64.
This commit is contained in:
parent
9083bcc5dc
commit
0fda545d5f
@ -1,5 +1,14 @@
|
||||
2009-08-07 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* sysdeps/x86_64/strcmp.S: Add support to compile with
|
||||
USE_SSSE3. In this case palignr is used.
|
||||
* sysdeps/x86_64/multiarch/strcmp.S (strcmp): If SSE4.3 is not
|
||||
available but SSSE3 is, pick __str{,n}cmp_ssse3.
|
||||
* sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
|
||||
Add strcmp-ssse3 and strncmp-ssse3.
|
||||
* sysdeps/x86_64/multiarch/strcmp-ssse3.S: New file.
|
||||
* sysdeps/x86_64/multiarch/strncmp-ssse3.S: New file.
|
||||
|
||||
* sysdeps/x86_64/multiarch/strcspn-c.c (STRCSPN_SSE42): Avoid
|
||||
warning through fake initialization.
|
||||
|
||||
|
@ -4,7 +4,7 @@ gen-as-const-headers += ifunc-defines.sym
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),string)
|
||||
sysdep_routines += stpncpy-c strncpy-c
|
||||
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||
CFLAGS-strcspn-c.c += -msse4
|
||||
|
3
sysdeps/x86_64/multiarch/strcmp-ssse3.S
Normal file
3
sysdeps/x86_64/multiarch/strcmp-ssse3.S
Normal file
@ -0,0 +1,3 @@
|
||||
#define USE_SSSE3 1
|
||||
#define STRCMP __strcmp_ssse3
|
||||
#include "../strcmp.S"
|
@ -34,6 +34,7 @@
|
||||
mov %r9, %r11
|
||||
|
||||
#define STRCMP_SSE42 __strncmp_sse42
|
||||
#define STRCMP_SSSE3 __strncmp_ssse3
|
||||
#define STRCMP_SSE2 __strncmp_sse2
|
||||
#define __GI_STRCMP __GI_strncmp
|
||||
#else
|
||||
@ -41,6 +42,7 @@
|
||||
#ifndef STRCMP
|
||||
#define STRCMP strcmp
|
||||
#define STRCMP_SSE42 __strcmp_sse42
|
||||
#define STRCMP_SSSE3 __strcmp_ssse3
|
||||
#define STRCMP_SSE2 __strcmp_sse2
|
||||
#define __GI_STRCMP __GI_strcmp
|
||||
#endif
|
||||
@ -60,10 +62,14 @@ ENTRY(STRCMP)
|
||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leaq STRCMP_SSE2(%rip), %rax
|
||||
testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
|
||||
jz 2f
|
||||
1:
|
||||
leaq STRCMP_SSE42(%rip), %rax
|
||||
testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
|
||||
jnz 2f
|
||||
leaq STRCMP_SSSE3(%rip), %rax
|
||||
testl $(1<<9), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
|
||||
jnz 2f
|
||||
leaq STRCMP_SSE2(%rip), %rax
|
||||
2: ret
|
||||
END(STRCMP)
|
||||
|
||||
|
4
sysdeps/x86_64/multiarch/strncmp-ssse3.S
Normal file
4
sysdeps/x86_64/multiarch/strncmp-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
||||
#define USE_SSSE3 1
|
||||
#define STRCMP __strncmp_ssse3
|
||||
#define USE_AS_STRNCMP
|
||||
#include "../strcmp.S"
|
@ -51,7 +51,12 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
.text
|
||||
#else
|
||||
.section .text.ssse3,"ax",@progbits
|
||||
#endif
|
||||
|
||||
ENTRY (BP_SYM (STRCMP))
|
||||
#ifdef NOT_IN_libc
|
||||
/* Simple version since we can't use SSE registers in ld.so. */
|
||||
@ -244,9 +249,13 @@ LABEL(gobble_ashr_1):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4 /* store for next cycle */
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $1, %xmm3
|
||||
pslldq $15, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -269,9 +278,13 @@ LABEL(gobble_ashr_1):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4 /* store for next cycle */
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $1, %xmm3
|
||||
pslldq $15, %xmm2
|
||||
pslldq $15, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -363,9 +376,13 @@ LABEL(gobble_ashr_2):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $2, %xmm3
|
||||
pslldq $14, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -389,9 +406,13 @@ LABEL(gobble_ashr_2):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $2, %xmm3
|
||||
pslldq $14, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $14, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -477,9 +498,13 @@ LABEL(gobble_ashr_3):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $3, %xmm3
|
||||
pslldq $13, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -503,9 +528,13 @@ LABEL(gobble_ashr_3):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $3, %xmm3
|
||||
pslldq $13, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $13, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -591,9 +620,13 @@ LABEL(gobble_ashr_4):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $4, %xmm3
|
||||
pslldq $12, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -617,9 +650,13 @@ LABEL(gobble_ashr_4):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $4, %xmm3
|
||||
pslldq $12, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $12, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -705,9 +742,13 @@ LABEL(gobble_ashr_5):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $5, %xmm3
|
||||
pslldq $11, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -731,9 +772,13 @@ LABEL(gobble_ashr_5):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $5, %xmm3
|
||||
pslldq $11, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $11, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -819,9 +864,13 @@ LABEL(gobble_ashr_6):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $6, %xmm3
|
||||
pslldq $10, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -845,9 +894,13 @@ LABEL(gobble_ashr_6):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $6, %xmm3
|
||||
pslldq $10, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $10, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -933,9 +986,13 @@ LABEL(gobble_ashr_7):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $7, %xmm3
|
||||
pslldq $9, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -959,9 +1016,13 @@ LABEL(gobble_ashr_7):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $7, %xmm3
|
||||
pslldq $9, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $9, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1047,9 +1108,13 @@ LABEL(gobble_ashr_8):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $8, %xmm3
|
||||
pslldq $8, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1073,9 +1138,13 @@ LABEL(gobble_ashr_8):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $8, %xmm3
|
||||
pslldq $8, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $8, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1161,9 +1230,13 @@ LABEL(gobble_ashr_9):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $9, %xmm3
|
||||
pslldq $7, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1187,9 +1260,13 @@ LABEL(gobble_ashr_9):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $9, %xmm3
|
||||
pslldq $7, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $7, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1275,9 +1352,13 @@ LABEL(gobble_ashr_10):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $10, %xmm3
|
||||
pslldq $6, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1301,9 +1382,13 @@ LABEL(gobble_ashr_10):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $10, %xmm3
|
||||
pslldq $6, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $6, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1389,9 +1474,13 @@ LABEL(gobble_ashr_11):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $11, %xmm3
|
||||
pslldq $5, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1415,9 +1504,13 @@ LABEL(gobble_ashr_11):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $11, %xmm3
|
||||
pslldq $5, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $5, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1503,9 +1596,13 @@ LABEL(gobble_ashr_12):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $12, %xmm3
|
||||
pslldq $4, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1529,9 +1626,13 @@ LABEL(gobble_ashr_12):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $12, %xmm3
|
||||
pslldq $4, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $4, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1617,9 +1718,13 @@ LABEL(gobble_ashr_13):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $13, %xmm3
|
||||
pslldq $3, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1643,9 +1748,13 @@ LABEL(gobble_ashr_13):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $13, %xmm3
|
||||
pslldq $3, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1731,9 +1840,13 @@ LABEL(gobble_ashr_14):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $14, %xmm3
|
||||
pslldq $2, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1757,9 +1870,13 @@ LABEL(gobble_ashr_14):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $14, %xmm3
|
||||
pslldq $2, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $2, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1847,9 +1964,13 @@ LABEL(gobble_ashr_15):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $15, %xmm3
|
||||
pslldq $1, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
@ -1873,9 +1994,13 @@ LABEL(gobble_ashr_15):
|
||||
movdqa (%rdi, %rcx), %xmm2
|
||||
movdqa %xmm2, %xmm4
|
||||
|
||||
#ifndef USE_SSSE3
|
||||
psrldq $15, %xmm3
|
||||
pslldq $1, %xmm2
|
||||
por %xmm3, %xmm2
|
||||
pslldq $1, %xmm2
|
||||
por %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#else
|
||||
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||
#endif
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm2, %xmm1
|
||||
|
Loading…
Reference in New Issue
Block a user