net: Introduce e1000e device emulation
This patch introduces emulation for the Intel 82574 adapter, AKA e1000e.
This implementation is derived from the e1000 emulation code, and
utilizes the TX/RX packet abstractions that were initially developed for
the vmxnet3 device. Although some parts of the introduced code may be
shared with e1000, the differences are substantial enough so that the
only shared resources for the two devices are the definitions in
hw/net/e1000_regs.h.
Similarly to vmxnet3, the new device uses virtio headers for task
offloads (for backends that support virtio extensions). Usage of
virtio headers may be forcibly disabled via a boolean device property
"vnet" (which is enabled by default). In such case task offloads
will be performed in software, in the same way it is done on
backends that do not support virtio headers.
The device code is split into two parts:
1. hw/net/e1000e.c: QEMU-specific code for a network device;
2. hw/net/e1000e_core.[hc]: Device emulation according to the spec.
The new device name is e1000e.
Intel specifications for the 82574 controller are available at:
http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
Throughput measurement results (iperf2):
Fedora 22 guest, TCP, RX
4 ++------------------------------------------+
| |
| X X X X X
3.5 ++ X X X X |
| X |
| |
3 ++ |
G | X |
b | |
/ 2.5 ++ |
s | |
| |
2 ++ |
| |
| |
1.5 X+ |
| |
+ + + + + + + + + + + +
1 ++--+---+---+---+---+---+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Fedora 22 guest, TCP, TX
18 ++-------------------------------------------+
| X |
16 ++ X X X X X
| X |
14 ++ |
| |
12 ++ |
G | X |
b 10 ++ |
/ | |
s 8 ++ |
| |
6 ++ X |
| |
4 ++ |
| X |
2 ++ X |
X + + + + + + + + + + +
0 ++--+---+---+---+---+----+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Fedora 22 guest, UDP, RX
3 ++------------------------------------------+
| X
| |
2.5 ++ |
| |
| |
2 ++ X |
G | |
b | |
/ 1.5 ++ |
s | X |
| |
1 ++ |
| |
| X |
0.5 ++ |
| X |
X + + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Fedora 22 guest, UDP, TX
1 ++------------------------------------------+
| X
0.9 ++ |
| |
0.8 ++ |
0.7 ++ |
| |
G 0.6 ++ |
b | |
/ 0.5 ++ |
s | X |
0.4 ++ |
| |
0.3 ++ |
0.2 ++ X |
| |
0.1 ++ X |
X X + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Windows 2012R2 guest, TCP, RX
3.2 ++------------------------------------------+
| X |
3 ++ |
| |
2.8 ++ |
| |
2.6 ++ X |
G | X X X X X
b 2.4 ++ X X |
/ | |
s 2.2 ++ |
| |
2 ++ |
| X X |
1.8 ++ |
| |
1.6 X+ |
+ + + + + + + + + + + +
1.4 ++--+---+---+---+---+---+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Windows 2012R2 guest, TCP, TX
14 ++-------------------------------------------+
| |
| X X
12 ++ |
| |
10 ++ |
| |
G | |
b 8 ++ |
/ | X |
s 6 ++ |
| |
| |
4 ++ X |
| |
2 ++ |
| X X X |
+ X X + + X X + + + + +
0 X+--+---+---+---+---+----+---+---+---+---+---+
32 64 128 256 512 1 2 4 8 16 32 64
B B B B B KB KB KB KB KB KB KB
Buffer size
Windows 2012R2 guest, UDP, RX
1.6 ++------------------------------------------X
| |
1.4 ++ |
| |
1.2 ++ |
| X |
| |
G 1 ++ |
b | |
/ 0.8 ++ |
s | |
0.6 ++ X |
| |
0.4 ++ |
| X |
| |
0.2 ++ X |
X + + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Windows 2012R2 guest, UDP, TX
0.6 ++------------------------------------------+
| X
| |
0.5 ++ |
| |
| |
0.4 ++ |
G | |
b | |
/ 0.3 ++ X |
s | |
| |
0.2 ++ |
| |
| X |
0.1 ++ |
| X |
X X + + + +
0 ++-------+--------+-------+--------+--------+
32 64 128 256 512 1
B B B B B KB
Datagram size
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-06-01 10:23:45 +02:00
|
|
|
/*
|
|
|
|
* Core code for QEMU e1000e emulation
|
|
|
|
*
|
|
|
|
* Software developer's manuals:
|
|
|
|
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
|
|
|
|
*
|
|
|
|
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
|
|
|
|
* Developed by Daynix Computing LTD (http://www.daynix.com)
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Dmitry Fleytman <dmitry@daynix.com>
|
|
|
|
* Leonid Bloch <leonid@daynix.com>
|
|
|
|
* Yan Vugenfirer <yan@daynix.com>
|
|
|
|
*
|
|
|
|
* Based on work done by:
|
|
|
|
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
|
|
|
|
* Copyright (c) 2008 Qumranet
|
|
|
|
* Based on work done by:
|
|
|
|
* Copyright (c) 2007 Dan Aloni
|
|
|
|
* Copyright (c) 2004 Antony T Curtis
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define E1000E_PHY_PAGE_SIZE (0x20)
|
|
|
|
#define E1000E_PHY_PAGES (0x07)
|
|
|
|
#define E1000E_MAC_SIZE (0x8000)
|
|
|
|
#define E1000E_EEPROM_SIZE (64)
|
|
|
|
#define E1000E_MSIX_VEC_NUM (5)
|
|
|
|
#define E1000E_NUM_QUEUES (2)
|
|
|
|
|
|
|
|
typedef struct E1000Core E1000ECore;
|
|
|
|
|
|
|
|
enum { PHY_R = BIT(0),
|
|
|
|
PHY_W = BIT(1),
|
|
|
|
PHY_RW = PHY_R | PHY_W,
|
|
|
|
PHY_ANYPAGE = BIT(2) };
|
|
|
|
|
|
|
|
typedef struct E1000IntrDelayTimer_st {
|
|
|
|
QEMUTimer *timer;
|
|
|
|
bool running;
|
|
|
|
uint32_t delay_reg;
|
|
|
|
uint32_t delay_resolution_ns;
|
|
|
|
E1000ECore *core;
|
|
|
|
} E1000IntrDelayTimer;
|
|
|
|
|
|
|
|
struct E1000Core {
|
|
|
|
uint32_t mac[E1000E_MAC_SIZE];
|
|
|
|
uint16_t phy[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE];
|
|
|
|
uint16_t eeprom[E1000E_EEPROM_SIZE];
|
|
|
|
|
|
|
|
uint32_t rxbuf_sizes[E1000_PSRCTL_BUFFS_PER_DESC];
|
|
|
|
uint32_t rx_desc_buf_size;
|
|
|
|
uint32_t rxbuf_min_shift;
|
|
|
|
uint8_t rx_desc_len;
|
|
|
|
|
|
|
|
QEMUTimer *autoneg_timer;
|
|
|
|
|
|
|
|
struct e1000e_tx {
|
|
|
|
e1000x_txd_props props;
|
|
|
|
|
|
|
|
bool skip_cp;
|
|
|
|
struct NetTxPkt *tx_pkt;
|
|
|
|
} tx[E1000E_NUM_QUEUES];
|
|
|
|
|
|
|
|
struct NetRxPkt *rx_pkt;
|
|
|
|
|
|
|
|
bool has_vnet;
|
|
|
|
int max_queue_num;
|
|
|
|
|
|
|
|
/* Interrupt moderation management */
|
|
|
|
uint32_t delayed_causes;
|
|
|
|
|
|
|
|
E1000IntrDelayTimer radv;
|
|
|
|
E1000IntrDelayTimer rdtr;
|
|
|
|
E1000IntrDelayTimer raid;
|
|
|
|
|
|
|
|
E1000IntrDelayTimer tadv;
|
|
|
|
E1000IntrDelayTimer tidv;
|
|
|
|
|
|
|
|
E1000IntrDelayTimer itr;
|
|
|
|
bool itr_intr_pending;
|
|
|
|
|
|
|
|
E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM];
|
|
|
|
bool eitr_intr_pending[E1000E_MSIX_VEC_NUM];
|
|
|
|
|
|
|
|
VMChangeStateEntry *vmstate;
|
|
|
|
|
|
|
|
uint32_t itr_guest_value;
|
|
|
|
uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM];
|
|
|
|
|
|
|
|
uint16_t vet;
|
|
|
|
|
|
|
|
uint8_t permanent_mac[ETH_ALEN];
|
|
|
|
|
|
|
|
NICState *owner_nic;
|
|
|
|
PCIDevice *owner;
|
|
|
|
void (*owner_start_recv)(PCIDevice *d);
|
|
|
|
};
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size);
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size);
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_pci_realize(E1000ECore *regs,
|
|
|
|
const uint16_t *eeprom_templ,
|
|
|
|
uint32_t eeprom_size,
|
|
|
|
const uint8_t *macaddr);
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_reset(E1000ECore *core);
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_pre_save(E1000ECore *core);
|
|
|
|
|
|
|
|
int
|
|
|
|
e1000e_core_post_load(E1000ECore *core);
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_set_link_status(E1000ECore *core);
|
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_core_pci_uninit(E1000ECore *core);
|
|
|
|
|
|
|
|
int
|
|
|
|
e1000e_can_receive(E1000ECore *core);
|
|
|
|
|
|
|
|
ssize_t
|
|
|
|
e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size);
|
|
|
|
|
|
|
|
ssize_t
|
|
|
|
e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
|
2016-09-15 08:14:24 +02:00
|
|
|
|
|
|
|
void
|
|
|
|
e1000e_start_recv(E1000ECore *core);
|