target-alpha: Add vector implementation for CMPBGE

While conditionalized on SSE2, it's a "portable" gcc generic vector
implementation, which could be enabled on other hosts.

Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2014-08-18 10:19:06 -07:00
parent 8d8d324e34
commit 32ad48abd7

View file

@ -60,6 +60,42 @@ uint64_t helper_zap(uint64_t val, uint64_t mask)
uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
{
#if defined(__SSE2__)
uint64_t r;
/* The cmpbge instruction is heavily used in the implementation of
every string function on Alpha. We can do much better than either
the default loop below, or even an unrolled version by using the
native vector support. */
{
typedef uint64_t Q __attribute__((vector_size(16)));
typedef uint8_t B __attribute__((vector_size(16)));
Q q1 = (Q){ op1, 0 };
Q q2 = (Q){ op2, 0 };
q1 = (Q)((B)q1 >= (B)q2);
r = q1[0];
}
/* Select only one bit from each byte. */
r &= 0x0101010101010101;
/* Collect the bits into the bottom byte. */
/* .......A.......B.......C.......D.......E.......F.......G.......H */
r |= r >> (8 - 1);
/* .......A......AB......BC......CD......DE......EF......FG......GH */
r |= r >> (16 - 2);
/* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */
r |= r >> (32 - 4);
/* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */
/* Return only the low 8 bits. */
return r & 0xff;
#else
uint8_t opa, opb, res;
int i;
@ -72,6 +108,7 @@ uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
}
}
return res;
#endif
}
uint64_t helper_minub8(uint64_t op1, uint64_t op2)