lex.c (search_line_fast): Add new version to be used for Power8 and later targets when Altivec is enabled.

2014-10-03  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* lex.c (search_line_fast): Add new version to be used for Power8
	and later targets when Altivec is enabled.  Restrict the existing
	Altivec version to big-endian systems so that lvsr is not used on
	little endian, where it is deprecated.  Remove LE-specific code
	from the now-BE-only version.

From-SVN: r215873
This commit is contained in:
Bill Schmidt 2014-10-03 20:06:38 +00:00 committed by William Schmidt
parent 3c9aabbde5
commit 0ccaaab0e3
2 changed files with 112 additions and 11 deletions

View File

@ -1,3 +1,11 @@
2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* lex.c (search_line_fast): Add new version to be used for Power8
and later targets when Altivec is enabled. Restrict the existing
Altivec version to big-endian systems so that lvsr is not used on
little endian, where it is deprecated. Remove LE-specific code
from the now-BE-only version.
2014-10-02 Bernd Edlinger <bernd.edlinger@hotmail.de>
Jeff Law <law@redhat.com>

View File

@ -513,9 +513,111 @@ init_vectorized_lexer (void)
search_line_fast = impl;
}
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
/* A vection of the fast scanner using AltiVec vectorized byte compares. */
/* A vection of the fast scanner using AltiVec vectorized byte compares
and VSX unaligned loads (when VSX is available). This is otherwise
the same as the pre-GCC 5 version. */
static const uchar *
search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
{
typedef __attribute__((altivec(vector))) unsigned char vc;
const vc repl_nl = {
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
};
const vc repl_cr = {
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
};
const vc repl_bs = {
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
};
const vc repl_qm = {
'?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?',
};
const vc zero = { 0 };
vc data, t;
/* Main loop processing 16 bytes at a time. */
do
{
vc m_nl, m_cr, m_bs, m_qm;
data = *((const vc *)s);
s += 16;
m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
t = (m_nl | m_cr) | (m_bs | m_qm);
/* T now contains 0xff in bytes for which we matched one of the relevant
characters. We want to exit the loop if any byte in T is non-zero.
Below is the expansion of vec_any_ne(t, zero). */
}
while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
/* Restore s to to point to the 16 bytes we just processed. */
s -= 16;
{
#define N (sizeof(vc) / sizeof(long))
union {
vc v;
/* Statically assert that N is 2 or 4. */
unsigned long l[(N == 2 || N == 4) ? N : -1];
} u;
unsigned long l, i = 0;
u.v = t;
/* Find the first word of T that is non-zero. */
switch (N)
{
case 4:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
case 2:
l = u.l[i++];
if (l != 0)
break;
s += sizeof(unsigned long);
l = u.l[i];
}
/* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */
#ifdef __BIG_ENDIAN__
l = __builtin_clzl(l) >> 3;
#else
l = __builtin_ctzl(l) >> 3;
#endif
return s + l;
#undef N
}
}
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
/* A vection of the fast scanner using AltiVec vectorized byte compares.
This cannot be used for little endian because vec_lvsl/lvsr are
deprecated for little endian and the code won't work properly. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
so we can't compile this function without -maltivec on the command line
(or implied by some other switch). */
@ -557,13 +659,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
beginning with all ones and shifting in zeros according to the
mis-alignment. The LVSR instruction pulls the exact shift we
want from the address. */
#ifdef __BIG_ENDIAN__
mask = __builtin_vec_lvsr(0, s);
mask = __builtin_vec_perm(zero, ones, mask);
#else
mask = __builtin_vec_lvsl(0, s);
mask = __builtin_vec_perm(ones, zero, mask);
#endif
data &= mask;
/* While altivec loads mask addresses, we still need to align S so
@ -627,11 +724,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
/* L now contains 0xff in bytes for which we matched one of the
relevant characters. We can find the byte index by finding
its bit index and dividing by 8. */
#ifdef __BIG_ENDIAN__
l = __builtin_clzl(l) >> 3;
#else
l = __builtin_ctzl(l) >> 3;
#endif
return s + l;
#undef N