d7fe639cab
NVMe command set specification for end-to-end data protection formatted
namespace states:
o If the Reference Tag Check bit of the PRCHK field is set to ‘1’ and
the namespace is formatted for Type 3 protection, then the
controller:
▪ should not compare the protection Information Reference Tag
field to the computed reference tag; and
▪ may ignore the ILBRT and EILBRT fields. If a command is
aborted as a result of the Reference Tag Check bit of the
PRCHK field being set to ‘1’, then that command should be
aborted with a status code of Invalid Protection Information,
but may be aborted with a status code of Invalid Field in
Command.
Currently qemu compares reftag in the nvme_dif_prchk function whenever
Reference Tag Check bit is set in the command. For type 3 namespaces
however, caller of nvme_dif_prchk - nvme_dif_check does not increment
reftag for each subsequent logical block. That way commands incorporating
more than one logical block for type 3 formatted namespaces with reftag
check bit set, always fail with End-to-end Reference Tag Check Error.
Comply with spec by handling case of set Reference Tag Check
bit in the type 3 formatted namespace.
Fixes: 146f720c55
("hw/block/nvme: end-to-end data protection")
Signed-off-by: Dmitry Tikhov <d.tihov@yadro.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
717 lines
21 KiB
C
717 lines
21 KiB
C
/*
|
|
* QEMU NVM Express End-to-End Data Protection support
|
|
*
|
|
* Copyright (c) 2021 Samsung Electronics Co., Ltd.
|
|
*
|
|
* Authors:
|
|
* Klaus Jensen <k.jensen@samsung.com>
|
|
* Gollu Appalanaidu <anaidu.gollu@samsung.com>
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "sysemu/block-backend.h"
|
|
|
|
#include "nvme.h"
|
|
#include "dif.h"
|
|
#include "trace.h"
|
|
|
|
uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
|
|
uint64_t reftag)
|
|
{
|
|
uint64_t mask = ns->pif ? 0xffffffffffff : 0xffffffff;
|
|
|
|
if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
|
|
(prinfo & NVME_PRINFO_PRCHK_REF) && (slba & mask) != reftag) {
|
|
return NVME_INVALID_PROT_INFO | NVME_DNR;
|
|
}
|
|
|
|
if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_3) &&
|
|
(prinfo & NVME_PRINFO_PRCHK_REF)) {
|
|
return NVME_INVALID_PROT_INFO;
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
/* from Linux kernel (crypto/crct10dif_common.c) */
|
|
static uint16_t crc16_t10dif(uint16_t crc, const unsigned char *buffer,
|
|
size_t len)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
crc = (crc << 8) ^ crc16_t10dif_table[((crc >> 8) ^ buffer[i]) & 0xff];
|
|
}
|
|
|
|
return crc;
|
|
}
|
|
|
|
/* from Linux kernel (lib/crc64.c) */
|
|
static uint64_t crc64_nvme(uint64_t crc, const unsigned char *buffer,
|
|
size_t len)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
crc = (crc >> 8) ^ crc64_nvme_table[(crc & 0xff) ^ buffer[i]];
|
|
}
|
|
|
|
return crc ^ (uint64_t)~0;
|
|
}
|
|
|
|
static void nvme_dif_pract_generate_dif_crc16(NvmeNamespace *ns, uint8_t *buf,
|
|
size_t len, uint8_t *mbuf,
|
|
size_t mlen, uint16_t apptag,
|
|
uint64_t *reftag)
|
|
{
|
|
uint8_t *end = buf + len;
|
|
int16_t pil = 0;
|
|
|
|
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
|
|
pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
|
|
}
|
|
|
|
trace_pci_nvme_dif_pract_generate_dif_crc16(len, ns->lbasz,
|
|
ns->lbasz + pil, apptag,
|
|
*reftag);
|
|
|
|
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
|
|
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
|
|
uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz);
|
|
|
|
if (pil) {
|
|
crc = crc16_t10dif(crc, mbuf, pil);
|
|
}
|
|
|
|
dif->g16.guard = cpu_to_be16(crc);
|
|
dif->g16.apptag = cpu_to_be16(apptag);
|
|
dif->g16.reftag = cpu_to_be32(*reftag);
|
|
|
|
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
|
|
(*reftag)++;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf,
|
|
size_t len, uint8_t *mbuf,
|
|
size_t mlen, uint16_t apptag,
|
|
uint64_t *reftag)
|
|
{
|
|
uint8_t *end = buf + len;
|
|
int16_t pil = 0;
|
|
|
|
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
|
|
pil = ns->lbaf.ms - 16;
|
|
}
|
|
|
|
trace_pci_nvme_dif_pract_generate_dif_crc64(len, ns->lbasz,
|
|
ns->lbasz + pil, apptag,
|
|
*reftag);
|
|
|
|
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
|
|
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
|
|
uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
|
|
|
|
if (pil) {
|
|
crc = crc64_nvme(crc, mbuf, pil);
|
|
}
|
|
|
|
dif->g64.guard = cpu_to_be64(crc);
|
|
dif->g64.apptag = cpu_to_be16(apptag);
|
|
|
|
dif->g64.sr[0] = *reftag >> 40;
|
|
dif->g64.sr[1] = *reftag >> 32;
|
|
dif->g64.sr[2] = *reftag >> 24;
|
|
dif->g64.sr[3] = *reftag >> 16;
|
|
dif->g64.sr[4] = *reftag >> 8;
|
|
dif->g64.sr[5] = *reftag;
|
|
|
|
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
|
|
(*reftag)++;
|
|
}
|
|
}
|
|
}
|
|
|
|
void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
|
|
uint8_t *mbuf, size_t mlen, uint16_t apptag,
|
|
uint64_t *reftag)
|
|
{
|
|
switch (ns->pif) {
|
|
case NVME_PI_GUARD_16:
|
|
return nvme_dif_pract_generate_dif_crc16(ns, buf, len, mbuf, mlen,
|
|
apptag, reftag);
|
|
case NVME_PI_GUARD_64:
|
|
return nvme_dif_pract_generate_dif_crc64(ns, buf, len, mbuf, mlen,
|
|
apptag, reftag);
|
|
}
|
|
|
|
abort();
|
|
}
|
|
|
|
static uint16_t nvme_dif_prchk_crc16(NvmeNamespace *ns, NvmeDifTuple *dif,
|
|
uint8_t *buf, uint8_t *mbuf, size_t pil,
|
|
uint8_t prinfo, uint16_t apptag,
|
|
uint16_t appmask, uint64_t reftag)
|
|
{
|
|
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
|
|
case NVME_ID_NS_DPS_TYPE_3:
|
|
if (be32_to_cpu(dif->g16.reftag) != 0xffffffff) {
|
|
break;
|
|
}
|
|
|
|
/* fallthrough */
|
|
case NVME_ID_NS_DPS_TYPE_1:
|
|
case NVME_ID_NS_DPS_TYPE_2:
|
|
if (be16_to_cpu(dif->g16.apptag) != 0xffff) {
|
|
break;
|
|
}
|
|
|
|
trace_pci_nvme_dif_prchk_disabled_crc16(be16_to_cpu(dif->g16.apptag),
|
|
be32_to_cpu(dif->g16.reftag));
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
|
|
uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz);
|
|
|
|
if (pil) {
|
|
crc = crc16_t10dif(crc, mbuf, pil);
|
|
}
|
|
|
|
trace_pci_nvme_dif_prchk_guard_crc16(be16_to_cpu(dif->g16.guard), crc);
|
|
|
|
if (be16_to_cpu(dif->g16.guard) != crc) {
|
|
return NVME_E2E_GUARD_ERROR;
|
|
}
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_APP) {
|
|
trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g16.apptag), apptag,
|
|
appmask);
|
|
|
|
if ((be16_to_cpu(dif->g16.apptag) & appmask) != (apptag & appmask)) {
|
|
return NVME_E2E_APP_ERROR;
|
|
}
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_REF) {
|
|
trace_pci_nvme_dif_prchk_reftag_crc16(be32_to_cpu(dif->g16.reftag),
|
|
reftag);
|
|
|
|
if (be32_to_cpu(dif->g16.reftag) != reftag) {
|
|
return NVME_E2E_REF_ERROR;
|
|
}
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif,
|
|
uint8_t *buf, uint8_t *mbuf, size_t pil,
|
|
uint8_t prinfo, uint16_t apptag,
|
|
uint16_t appmask, uint64_t reftag)
|
|
{
|
|
uint64_t r = 0;
|
|
|
|
r |= (uint64_t)dif->g64.sr[0] << 40;
|
|
r |= (uint64_t)dif->g64.sr[1] << 32;
|
|
r |= (uint64_t)dif->g64.sr[2] << 24;
|
|
r |= (uint64_t)dif->g64.sr[3] << 16;
|
|
r |= (uint64_t)dif->g64.sr[4] << 8;
|
|
r |= (uint64_t)dif->g64.sr[5];
|
|
|
|
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
|
|
case NVME_ID_NS_DPS_TYPE_3:
|
|
if (r != 0xffffffffffff) {
|
|
break;
|
|
}
|
|
|
|
/* fallthrough */
|
|
case NVME_ID_NS_DPS_TYPE_1:
|
|
case NVME_ID_NS_DPS_TYPE_2:
|
|
if (be16_to_cpu(dif->g64.apptag) != 0xffff) {
|
|
break;
|
|
}
|
|
|
|
trace_pci_nvme_dif_prchk_disabled_crc64(be16_to_cpu(dif->g16.apptag),
|
|
r);
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
|
|
uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
|
|
|
|
if (pil) {
|
|
crc = crc64_nvme(crc, mbuf, pil);
|
|
}
|
|
|
|
trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc);
|
|
|
|
if (be64_to_cpu(dif->g64.guard) != crc) {
|
|
return NVME_E2E_GUARD_ERROR;
|
|
}
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_APP) {
|
|
trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g64.apptag), apptag,
|
|
appmask);
|
|
|
|
if ((be16_to_cpu(dif->g64.apptag) & appmask) != (apptag & appmask)) {
|
|
return NVME_E2E_APP_ERROR;
|
|
}
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_REF) {
|
|
trace_pci_nvme_dif_prchk_reftag_crc64(r, reftag);
|
|
|
|
if (r != reftag) {
|
|
return NVME_E2E_REF_ERROR;
|
|
}
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
|
|
uint8_t *buf, uint8_t *mbuf, size_t pil,
|
|
uint8_t prinfo, uint16_t apptag,
|
|
uint16_t appmask, uint64_t reftag)
|
|
{
|
|
switch (ns->pif) {
|
|
case NVME_PI_GUARD_16:
|
|
return nvme_dif_prchk_crc16(ns, dif, buf, mbuf, pil, prinfo, apptag,
|
|
appmask, reftag);
|
|
case NVME_PI_GUARD_64:
|
|
return nvme_dif_prchk_crc64(ns, dif, buf, mbuf, pil, prinfo, apptag,
|
|
appmask, reftag);
|
|
}
|
|
|
|
abort();
|
|
}
|
|
|
|
uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
|
|
uint8_t *mbuf, size_t mlen, uint8_t prinfo,
|
|
uint64_t slba, uint16_t apptag,
|
|
uint16_t appmask, uint64_t *reftag)
|
|
{
|
|
uint8_t *bufp, *end = buf + len;
|
|
int16_t pil = 0;
|
|
uint16_t status;
|
|
|
|
status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
|
|
if (status) {
|
|
return status;
|
|
}
|
|
|
|
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
|
|
pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
|
|
}
|
|
|
|
trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
|
|
|
|
for (bufp = buf; bufp < end; bufp += ns->lbasz, mbuf += ns->lbaf.ms) {
|
|
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
|
|
status = nvme_dif_prchk(ns, dif, bufp, mbuf, pil, prinfo, apptag,
|
|
appmask, *reftag);
|
|
if (status) {
|
|
/*
|
|
* The first block of a 'raw' image is always allocated, so we
|
|
* cannot reliably know if the block is all zeroes or not. For
|
|
* CRC16 this works fine because the T10 CRC16 is 0x0 for all
|
|
* zeroes, but the Rocksoft CRC64 is not. Thus, if a guard error is
|
|
* detected for the first block, check if it is zeroed and manually
|
|
* set the protection information to all ones to disable protection
|
|
* information checking.
|
|
*/
|
|
if (status == NVME_E2E_GUARD_ERROR && slba == 0x0 && bufp == buf) {
|
|
g_autofree uint8_t *zeroes = g_malloc0(ns->lbasz);
|
|
|
|
if (memcmp(bufp, zeroes, ns->lbasz) == 0) {
|
|
memset(mbuf + pil, 0xff, nvme_pi_tuple_size(ns));
|
|
}
|
|
} else {
|
|
return status;
|
|
}
|
|
}
|
|
|
|
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
|
|
(*reftag)++;
|
|
}
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
|
|
uint64_t slba)
|
|
{
|
|
BlockBackend *blk = ns->blkconf.blk;
|
|
BlockDriverState *bs = blk_bs(blk);
|
|
|
|
int64_t moffset = 0, offset = nvme_l2b(ns, slba);
|
|
uint8_t *mbufp, *end;
|
|
bool zeroed;
|
|
int16_t pil = 0;
|
|
int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
|
|
int64_t pnum = 0;
|
|
|
|
Error *err = NULL;
|
|
|
|
|
|
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
|
|
pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
|
|
}
|
|
|
|
do {
|
|
int ret;
|
|
|
|
bytes -= pnum;
|
|
|
|
ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
|
|
if (ret < 0) {
|
|
error_setg_errno(&err, -ret, "unable to get block status");
|
|
error_report_err(err);
|
|
|
|
return NVME_INTERNAL_DEV_ERROR;
|
|
}
|
|
|
|
zeroed = !!(ret & BDRV_BLOCK_ZERO);
|
|
|
|
trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
|
|
|
|
if (zeroed) {
|
|
mbufp = mbuf + moffset;
|
|
mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
|
|
end = mbufp + mlen;
|
|
|
|
for (; mbufp < end; mbufp += ns->lbaf.ms) {
|
|
memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns));
|
|
}
|
|
}
|
|
|
|
moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
|
|
offset += pnum;
|
|
} while (pnum != bytes);
|
|
|
|
return NVME_SUCCESS;
|
|
}
|
|
|
|
static void nvme_dif_rw_cb(void *opaque, int ret)
|
|
{
|
|
NvmeBounceContext *ctx = opaque;
|
|
NvmeRequest *req = ctx->req;
|
|
NvmeNamespace *ns = req->ns;
|
|
BlockBackend *blk = ns->blkconf.blk;
|
|
|
|
trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
|
|
|
|
qemu_iovec_destroy(&ctx->data.iov);
|
|
g_free(ctx->data.bounce);
|
|
|
|
qemu_iovec_destroy(&ctx->mdata.iov);
|
|
g_free(ctx->mdata.bounce);
|
|
|
|
g_free(ctx);
|
|
|
|
nvme_rw_complete_cb(req, ret);
|
|
}
|
|
|
|
static void nvme_dif_rw_check_cb(void *opaque, int ret)
|
|
{
|
|
NvmeBounceContext *ctx = opaque;
|
|
NvmeRequest *req = ctx->req;
|
|
NvmeNamespace *ns = req->ns;
|
|
NvmeCtrl *n = nvme_ctrl(req);
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
|
|
uint16_t apptag = le16_to_cpu(rw->apptag);
|
|
uint16_t appmask = le16_to_cpu(rw->appmask);
|
|
uint64_t reftag = le32_to_cpu(rw->reftag);
|
|
uint64_t cdw3 = le32_to_cpu(rw->cdw3);
|
|
uint16_t status;
|
|
|
|
reftag |= cdw3 << 32;
|
|
|
|
trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
|
|
reftag);
|
|
|
|
if (ret) {
|
|
goto out;
|
|
}
|
|
|
|
status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
|
|
slba);
|
|
if (status) {
|
|
req->status = status;
|
|
goto out;
|
|
}
|
|
|
|
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
|
|
ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
|
|
slba, apptag, appmask, &reftag);
|
|
if (status) {
|
|
req->status = status;
|
|
goto out;
|
|
}
|
|
|
|
status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
|
|
NVME_TX_DIRECTION_FROM_DEVICE, req);
|
|
if (status) {
|
|
req->status = status;
|
|
goto out;
|
|
}
|
|
|
|
if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
|
|
goto out;
|
|
}
|
|
|
|
status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
|
|
NVME_TX_DIRECTION_FROM_DEVICE, req);
|
|
if (status) {
|
|
req->status = status;
|
|
}
|
|
|
|
out:
|
|
nvme_dif_rw_cb(ctx, ret);
|
|
}
|
|
|
|
static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
|
|
{
|
|
NvmeBounceContext *ctx = opaque;
|
|
NvmeRequest *req = ctx->req;
|
|
NvmeNamespace *ns = req->ns;
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
|
|
size_t mlen = nvme_m2b(ns, nlb);
|
|
uint64_t offset = nvme_moff(ns, slba);
|
|
BlockBackend *blk = ns->blkconf.blk;
|
|
|
|
trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
|
|
|
|
if (ret) {
|
|
goto out;
|
|
}
|
|
|
|
ctx->mdata.bounce = g_malloc(mlen);
|
|
|
|
qemu_iovec_reset(&ctx->mdata.iov);
|
|
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
|
|
|
|
req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
|
|
nvme_dif_rw_check_cb, ctx);
|
|
return;
|
|
|
|
out:
|
|
nvme_dif_rw_cb(ctx, ret);
|
|
}
|
|
|
|
static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
|
|
{
|
|
NvmeBounceContext *ctx = opaque;
|
|
NvmeRequest *req = ctx->req;
|
|
NvmeNamespace *ns = req->ns;
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
uint64_t offset = nvme_moff(ns, slba);
|
|
BlockBackend *blk = ns->blkconf.blk;
|
|
|
|
trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
|
|
|
|
if (ret) {
|
|
goto out;
|
|
}
|
|
|
|
req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
|
|
nvme_dif_rw_cb, ctx);
|
|
return;
|
|
|
|
out:
|
|
nvme_dif_rw_cb(ctx, ret);
|
|
}
|
|
|
|
uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
|
|
{
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
|
|
NvmeNamespace *ns = req->ns;
|
|
BlockBackend *blk = ns->blkconf.blk;
|
|
bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
|
|
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
size_t len = nvme_l2b(ns, nlb);
|
|
size_t mlen = nvme_m2b(ns, nlb);
|
|
size_t mapped_len = len;
|
|
int64_t offset = nvme_l2b(ns, slba);
|
|
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
|
|
uint16_t apptag = le16_to_cpu(rw->apptag);
|
|
uint16_t appmask = le16_to_cpu(rw->appmask);
|
|
uint64_t reftag = le32_to_cpu(rw->reftag);
|
|
uint64_t cdw3 = le32_to_cpu(rw->cdw3);
|
|
bool pract = !!(prinfo & NVME_PRINFO_PRACT);
|
|
NvmeBounceContext *ctx;
|
|
uint16_t status;
|
|
|
|
reftag |= cdw3 << 32;
|
|
|
|
trace_pci_nvme_dif_rw(pract, prinfo);
|
|
|
|
ctx = g_new0(NvmeBounceContext, 1);
|
|
ctx->req = req;
|
|
|
|
if (wrz) {
|
|
BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
|
|
|
|
if (prinfo & NVME_PRINFO_PRCHK_MASK) {
|
|
status = NVME_INVALID_PROT_INFO | NVME_DNR;
|
|
goto err;
|
|
}
|
|
|
|
if (pract) {
|
|
uint8_t *mbuf, *end;
|
|
int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
|
|
|
|
status = nvme_check_prinfo(ns, prinfo, slba, reftag);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
|
|
flags = 0;
|
|
|
|
ctx->mdata.bounce = g_malloc0(mlen);
|
|
|
|
qemu_iovec_init(&ctx->mdata.iov, 1);
|
|
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
|
|
|
|
mbuf = ctx->mdata.bounce;
|
|
end = mbuf + mlen;
|
|
|
|
if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
|
|
pil = 0;
|
|
}
|
|
|
|
for (; mbuf < end; mbuf += ns->lbaf.ms) {
|
|
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
|
|
|
|
switch (ns->pif) {
|
|
case NVME_PI_GUARD_16:
|
|
dif->g16.apptag = cpu_to_be16(apptag);
|
|
dif->g16.reftag = cpu_to_be32(reftag);
|
|
|
|
break;
|
|
|
|
case NVME_PI_GUARD_64:
|
|
dif->g64.guard = cpu_to_be64(0x6482d367eb22b64e);
|
|
dif->g64.apptag = cpu_to_be16(apptag);
|
|
|
|
dif->g64.sr[0] = reftag >> 40;
|
|
dif->g64.sr[1] = reftag >> 32;
|
|
dif->g64.sr[2] = reftag >> 24;
|
|
dif->g64.sr[3] = reftag >> 16;
|
|
dif->g64.sr[4] = reftag >> 8;
|
|
dif->g64.sr[5] = reftag;
|
|
|
|
break;
|
|
|
|
default:
|
|
abort();
|
|
}
|
|
|
|
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
|
|
case NVME_ID_NS_DPS_TYPE_1:
|
|
case NVME_ID_NS_DPS_TYPE_2:
|
|
reftag++;
|
|
}
|
|
}
|
|
}
|
|
|
|
req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
|
|
nvme_dif_rw_mdata_out_cb, ctx);
|
|
return NVME_NO_COMPLETE;
|
|
}
|
|
|
|
if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
|
|
mapped_len += mlen;
|
|
}
|
|
|
|
status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
|
|
ctx->data.bounce = g_malloc(len);
|
|
|
|
qemu_iovec_init(&ctx->data.iov, 1);
|
|
qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
|
|
|
|
if (req->cmd.opcode == NVME_CMD_READ) {
|
|
block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
|
|
BLOCK_ACCT_READ);
|
|
|
|
req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
|
|
nvme_dif_rw_mdata_in_cb, ctx);
|
|
return NVME_NO_COMPLETE;
|
|
}
|
|
|
|
status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
|
|
NVME_TX_DIRECTION_TO_DEVICE, req);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
|
|
ctx->mdata.bounce = g_malloc(mlen);
|
|
|
|
qemu_iovec_init(&ctx->mdata.iov, 1);
|
|
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
|
|
|
|
if (!(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
|
|
status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
|
|
NVME_TX_DIRECTION_TO_DEVICE, req);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
status = nvme_check_prinfo(ns, prinfo, slba, reftag);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
|
|
if (pract) {
|
|
/* splice generated protection information into the buffer */
|
|
nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
|
|
ctx->mdata.bounce, ctx->mdata.iov.size,
|
|
apptag, &reftag);
|
|
} else {
|
|
status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
|
|
ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
|
|
slba, apptag, appmask, &reftag);
|
|
if (status) {
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
|
|
BLOCK_ACCT_WRITE);
|
|
|
|
req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
|
|
nvme_dif_rw_mdata_out_cb, ctx);
|
|
|
|
return NVME_NO_COMPLETE;
|
|
|
|
err:
|
|
qemu_iovec_destroy(&ctx->data.iov);
|
|
g_free(ctx->data.bounce);
|
|
|
|
qemu_iovec_destroy(&ctx->mdata.iov);
|
|
g_free(ctx->mdata.bounce);
|
|
|
|
g_free(ctx);
|
|
|
|
return status;
|
|
}
|