From 17ccd01647960e760f9b3daa41bcdc6aabd2ddc9 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:50 +0200 Subject: [PATCH] igb: RX payload guest writting refactoring Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 18 ++-- hw/net/igb_core.c | 209 +++++++++++++++++++++++++-------------- tests/qtest/libqos/igb.c | 5 + 3 files changed, 148 insertions(+), 84 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 91aae37c12..cc243b7562 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, } static void -e1000e_write_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) { while (data_len > 0) { uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, while (copy_size) { iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - e1000e_write_to_rx_buffers(core, ba, &bastate, - iov->iov_base + iov_ofs, iov_copy); + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, + iov->iov_base + + iov_ofs, + iov_copy); copy_size -= iov_copy; iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, if (desc_offset + desc_size >= total_size) { /* Simulate FCS checksum presence in the last descriptor */ - e1000e_write_to_rx_buffers(core, ba, &bastate, + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } } diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index e14035876b..6d2712eaab 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size) bufsize; } +static uint32_t +igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; +} + void igb_start_recv(IGBCore *core) { @@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +typedef struct IGBPacketRxDMAState { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + struct iovec *iov; + size_t iov_ofs; + bool is_first; + uint16_t written; + hwaddr ba; +} IGBPacketRxDMAState; + static inline void igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, hwaddr *buff_addr) @@ -1514,19 +1537,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, } } -static void -igb_write_to_rx_buffers(IGBCore *core, - PCIDevice *d, - hwaddr ba, - uint16_t *written, - const char *data, - dma_addr_t data_len) -{ - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; -} - static void igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) @@ -1552,6 +1562,93 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static void +igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size) +{ + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } +} + +static void +igb_write_payload_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr ba, + uint16_t *written, + uint32_t cur_buf_len, + const char *data, + dma_addr_t data_len) +{ + trace_igb_rx_desc_buff_write(ba, *written, data, data_len); + pci_dma_write(d, ba + *written, data, data_len); + *written += data_len; +} + +static void +igb_write_payload_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + size_t *copy_size) +{ + static const uint32_t fcs_pad; + size_t iov_copy; + + /* Copy packet payload */ + while (*copy_size) { + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + pdma_st->iov->iov_base + + pdma_st->iov_ofs, + iov_copy); + + *copy_size -= iov_copy; + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } + + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + (const char *) &fcs_pad, + e1000x_fcs_len(core->mac)); + } +} + +static void +igb_write_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st) +{ + size_t copy_size; + + if (!pdma_st->ba) { + /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + return; + } + + if (pdma_st->desc_offset >= pdma_st->size) { + return; + } + + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); + copy_size = pdma_st->size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, ©_size); + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); +} + static void igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, const E1000E_RxRing *rxr, @@ -1561,91 +1658,51 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, PCIDevice *d; dma_addr_t base; union e1000_rx_desc_union desc; - size_t desc_size; - size_t desc_offset = 0; - size_t iov_ofs = 0; + const E1000ERingInfo *rxi; + size_t rx_desc_len; - struct iovec *iov = net_rx_pkt_get_iovec(pkt); - size_t size = net_rx_pkt_get_total_len(pkt); - size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000ERingInfo *rxi = rxr->i; - size_t bufsize = igb_rxbufsize(core, rxi); + IGBPacketRxDMAState pdma_st = {0}; + pdma_st.is_first = true; + pdma_st.size = net_rx_pkt_get_total_len(pkt); + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); + rxi = rxr->i; + rx_desc_len = core->rx_desc_len; + pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi); + pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi); + pdma_st.iov = net_rx_pkt_get_iovec(pkt); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); if (!d) { d = core->owner; } do { - hwaddr ba; - uint16_t written = 0; + pdma_st.written = 0; bool is_last = false; - desc_size = total_size - desc_offset; - - if (desc_size > bufsize) { - desc_size = bufsize; - } - if (igb_ring_empty(core, rxi)) { return; } base = igb_ring_head_descr(core, rxi); + pci_dma_read(d, base, &desc, rx_desc_len); + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - pci_dma_read(d, base, &desc, core->rx_desc_len); + igb_read_rx_descr(core, &desc, &pdma_st.ba); - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); - - igb_read_rx_descr(core, &desc, &ba); - - if (ba) { - if (desc_offset < size) { - static const uint32_t fcs_pad; - size_t iov_copy; - size_t copy_size = size - desc_offset; - if (copy_size > bufsize) { - copy_size = bufsize; - } - - /* Copy packet payload */ - while (copy_size) { - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - - igb_write_to_rx_buffers(core, d, ba, &written, - iov->iov_base + iov_ofs, iov_copy); - - copy_size -= iov_copy; - iov_ofs += iov_copy; - if (iov_ofs == iov->iov_len) { - iov++; - iov_ofs = 0; - } - } - - if (desc_offset + desc_size >= total_size) { - /* Simulate FCS checksum presence in the last descriptor */ - igb_write_to_rx_buffers(core, d, ba, &written, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } - desc_offset += desc_size; - if (desc_offset >= total_size) { + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); + pdma_st.desc_offset += pdma_st.desc_size; + if (pdma_st.desc_offset >= pdma_st.total_size) { is_last = true; } igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, written); - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); + rss_info, etqf, ts, pdma_st.written); + igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len); + igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN); + } while (pdma_st.desc_offset < pdma_st.total_size); - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); - - } while (desc_offset < total_size); - - igb_update_rx_stats(core, rxi, size, total_size); + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); } static bool diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index a603468beb..f40c4ec4cd 100644 --- a/tests/qtest/libqos/igb.c +++ b/tests/qtest/libqos/igb.c @@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj) E1000_RAH_AV | E1000_RAH_POOL_1 | le16_to_cpu(*(uint16_t *)(address + 4))); + /* Set supported receive descriptor mode */ + e1000e_macreg_write(&d->e1000e, + E1000_SRRCTL(0), + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); + /* Enable receive */ e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);