target/arm: Implement helper_mte_checkN

Fill out the stub that was added earlier.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200626033144.790098-27-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2020-06-25 20:31:24 -07:00 committed by Peter Maydell
parent 2e34ff45f3
commit 5add824855
2 changed files with 166 additions and 1 deletions

View File

@ -1321,6 +1321,8 @@ FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */
bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
uint64_t mte_check1(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
static inline int allocation_tag_from_addr(uint64_t ptr)
{

View File

@ -500,7 +500,170 @@ uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
/*
* Perform an MTE checked access for multiple logical accesses.
*/
/**
* checkN:
* @tag: tag memory to test
* @odd: true to begin testing at tags at odd nibble
* @cmp: the tag to compare against
* @count: number of tags to test
*
* Return the number of successful tests.
* Thus a return value < @count indicates a failure.
*
* A note about sizes: count is expected to be small.
*
* The most common use will be LDP/STP of two integer registers,
* which means 16 bytes of memory touching at most 2 tags, but
* often the access is aligned and thus just 1 tag.
*
* Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
* touching at most 5 tags. SVE LDR/STR (vector) with the default
* vector length is also 64 bytes; the maximum architectural length
* is 256 bytes touching at most 9 tags.
*
* The loop below uses 7 logical operations and 1 memory operation
* per tag pair. An implementation that loads an aligned word and
* uses masking to ignore adjacent tags requires 18 logical operations
* and thus does not begin to pay off until 6 tags.
* Which, according to the survey above, is unlikely to be common.
*/
static int checkN(uint8_t *mem, int odd, int cmp, int count)
{
int n = 0, diff;
/* Replicate the test tag and compare. */
cmp *= 0x11;
diff = *mem++ ^ cmp;
if (odd) {
goto start_odd;
}
while (1) {
/* Test even tag. */
if (unlikely((diff) & 0x0f)) {
break;
}
if (++n == count) {
break;
}
start_odd:
/* Test odd tag. */
if (unlikely((diff) & 0xf0)) {
break;
}
if (++n == count) {
break;
}
diff = *mem++ ^ cmp;
}
return n;
}
uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra)
{
int mmu_idx, ptr_tag, bit55;
uint64_t ptr_last, ptr_end, prev_page, next_page;
uint64_t tag_first, tag_end;
uint64_t tag_byte_first, tag_byte_end;
uint32_t esize, total, tag_count, tag_size, n, c;
uint8_t *mem1, *mem2;
MMUAccessType type;
bit55 = extract64(ptr, 55, 1);
/* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
if (unlikely(!tbi_check(desc, bit55))) {
return ptr;
}
ptr_tag = allocation_tag_from_addr(ptr);
if (tcma_check(desc, bit55, ptr_tag)) {
goto done;
}
mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
esize = FIELD_EX32(desc, MTEDESC, ESIZE);
total = FIELD_EX32(desc, MTEDESC, TSIZE);
/* Find the addr of the end of the access, and of the last element. */
ptr_end = ptr + total;
ptr_last = ptr_end - esize;
/* Round the bounds to the tag granule, and compute the number of tags. */
tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE);
tag_count = (tag_end - tag_first) / TAG_GRANULE;
/* Round the bounds to twice the tag granule, and compute the bytes. */
tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE);
/* Locate the page boundaries. */
prev_page = ptr & TARGET_PAGE_MASK;
next_page = prev_page + TARGET_PAGE_SIZE;
if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) {
/* Memory access stays on one page. */
tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE);
mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total,
MMU_DATA_LOAD, tag_size, ra);
if (!mem1) {
goto done;
}
/* Perform all of the comparisons. */
n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
} else {
/* Memory access crosses to next page. */
tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE);
mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
MMU_DATA_LOAD, tag_size, ra);
tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE);
mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
ptr_end - next_page,
MMU_DATA_LOAD, tag_size, ra);
/*
* Perform all of the comparisons.
* Note the possible but unlikely case of the operation spanning
* two pages that do not both have tagging enabled.
*/
n = c = (next_page - tag_first) / TAG_GRANULE;
if (mem1) {
n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
}
if (n == c) {
if (!mem2) {
goto done;
}
n += checkN(mem2, 0, ptr_tag, tag_count - c);
}
}
/*
* If we failed, we know which granule. Compute the element that
* is first in that granule, and signal failure on that element.
*/
if (unlikely(n < tag_count)) {
uint64_t fail_ofs;
fail_ofs = tag_first + n * TAG_GRANULE - ptr;
fail_ofs = ROUND_UP(fail_ofs, esize);
mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
}
done:
return useronly_clean_ptr(ptr);
}
uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr)
{
return ptr;
return mte_checkN(env, desc, ptr, GETPC());
}