ppc patch queue for 2016-07-05

Here's the current ppc, sPAPR and related drivers patch queue.
 
   * The big addition is dynamic DMA window support (this includes some
     core VFIO changes)
   * There are also several fixes to the MMU emulation for bugs
     introduced with the HV mode patches
   * Several other bugfixes and cleanups
 
 Changes in v2:
   I messed up and forgot to make a fix in the last patch which BenH
   pointed out (introduced by my rebasing).  That's fixed in this
   version, and I'm replacing the tag in place with the revised
   version.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJXe0WaAAoJEGw4ysog2bOSKacP/3C6ouCDPgHXKvXMzd81LCjb
 Z3eHxl8SvG/dh8s9yn606Bwt0WfmG1JHkEF3KjHLch12QOuEAxk+npJGWSjHDQlR
 vt2RknKS6tLMkYCWvzrEg6iDKyxTo4Ux2eXnEDjVDylB8cA2C5q0wqfKFYkfINwC
 Q3xyVgx9ohLjZDdGZhwW85dNQ9d/mu5DpKTkINSn6yPXtT+Bvimw3J/HhTFSKgr6
 w1oWDZh/AYebLKopUFKdL/IFUT6fWV3vOiMwrg7eS4slWxc8JiyAqi7jFqO7sxNg
 4tZmG2WnU72sfgLDmTYpFJ1Gy3EwLeRKUslPEqQPtBctrQBa3nQRtduKFnVvxpaS
 yj5ZIux6Rm7j+vI9VTVpgVm0ybWFqJwm7h4Po0LGpp3oTKRV5TjYLwP1oogU1XUW
 s0bQoyId88zt17AuV/wjP3Boz4KLzGiaOzgcT8Ct9zM1nxG4vr8CdXtp/RfHSQMe
 j+p4GgIdmxJtK+6r1iU1dYx62MJl/uR2Xa5atK253FJIFjX2k8qTGOu1uHXaJHhO
 Wccaq+ibqte59FsvdQpRmRZGggeoHxmJJ5CUdq5MKbqStd+e6JmeEGr/UpiVXDVL
 MknaavRvoBaRdjsvk22Ta5IQPKxGHmDNrknI2vtS/P+diHFTkovUJfWnYUSB/HWM
 JOOXB4Ds+PBpHxU6nsU0
 =g+wU
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160705' into staging

ppc patch queue for 2016-07-05

Here's the current ppc, sPAPR and related drivers patch queue.

  * The big addition is dynamic DMA window support (this includes some
    core VFIO changes)
  * There are also several fixes to the MMU emulation for bugs
    introduced with the HV mode patches
  * Several other bugfixes and cleanups

Changes in v2:
  I messed up and forgot to make a fix in the last patch which BenH
  pointed out (introduced by my rebasing).  That's fixed in this
  version, and I'm replacing the tag in place with the revised
  version.

# gpg: Signature made Tue 05 Jul 2016 06:28:58 BST
# gpg:                using RSA key 0x6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>"
# gpg:                 aka "David Gibson (Red Hat) <dgibson@redhat.com>"
# gpg:                 aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E  87DC 6C38 CACA 20D9 B392

* remotes/dgibson/tags/ppc-for-2.7-20160705:
  ppc/hash64: Fix support for LPCR:ISL
  ppc/hash64: Add proper real mode translation support
  target-ppc: Return page shift from PTEG search
  target-ppc: Simplify HPTE matching
  target-ppc: Correct page size decoding in ppc_hash64_pteg_search()
  ppc: simplify ppc_hash64_hpte_page_shift_noslb()
  spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)
  vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)
  vfio: Add host side DMA window capabilities
  vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)
  spapr_iommu: Realloc guest visible TCE table when starting/stopping listening
  ppc: simplify max_smt initialization in ppc_cpu_realizefn()
  spapr: Ensure thread0 of CPU core is always realized first
  ppc: Fix xsrdpi, xvrdpi and xvrspi rounding

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-07-05 11:14:27 +01:00
commit 8662d7db39
20 changed files with 1098 additions and 238 deletions

View File

@ -8,6 +8,7 @@ obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
obj-y += spapr_pci_vfio.o
endif
obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
# PowerPC 4xx boards
obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
obj-y += ppc4xx_pci.o

View File

@ -1771,6 +1771,13 @@ static void ppc_spapr_init(MachineState *machine)
spapr->vrma_adjust = 1;
spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
}
/* Actually we don't support unbounded RMA anymore since we
* added proper emulation of HV mode. The max we can get is
* 16G which also happens to be what we configure for PAPR
* mode so make sure we don't do anything bigger than that
*/
spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
}
if (spapr->rma_size > node0_size) {
@ -2489,7 +2496,12 @@ DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
* pseries-2.6
*/
#define SPAPR_COMPAT_2_6 \
HW_COMPAT_2_6
HW_COMPAT_2_6 \
{ \
.driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
.property = "ddw",\
.value = stringify(off),\
},
static void spapr_machine_2_6_instance_options(MachineState *machine)
{

View File

@ -259,9 +259,9 @@ out:
error_propagate(errp, local_err);
}
static int spapr_cpu_core_realize_child(Object *child, void *opaque)
static void spapr_cpu_core_realize_child(Object *child, Error **errp)
{
Error **errp = opaque, *local_err = NULL;
Error *local_err = NULL;
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
CPUState *cs = CPU(child);
PowerPCCPU *cpu = POWERPC_CPU(cs);
@ -269,15 +269,14 @@ static int spapr_cpu_core_realize_child(Object *child, void *opaque)
object_property_set_bool(child, true, "realized", &local_err);
if (local_err) {
error_propagate(errp, local_err);
return 1;
return;
}
spapr_cpu_init(spapr, cpu, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return 1;
return;
}
return 0;
}
static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
@ -287,13 +286,13 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
const char *typename = object_class_get_name(sc->cpu_class);
size_t size = object_type_get_instance_size(typename);
Error *local_err = NULL;
Object *obj;
int i;
void *obj;
int i, j;
sc->threads = g_malloc0(size * cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
char id[32];
void *obj = sc->threads + i * size;
obj = sc->threads + i * size;
object_initialize(obj, size, typename);
snprintf(id, sizeof(id), "thread[%d]", i);
@ -303,12 +302,16 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
}
object_unref(obj);
}
object_child_foreach(OBJECT(dev), spapr_cpu_core_realize_child, &local_err);
if (local_err) {
goto err;
} else {
return;
for (j = 0; j < cc->nr_threads; j++) {
obj = sc->threads + j * size;
spapr_cpu_core_realize_child(obj, &local_err);
if (local_err) {
goto err;
}
}
return;
err:
while (--i >= 0) {

View File

@ -83,12 +83,12 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong pte_index = args[1];
target_ulong pteh = args[2];
target_ulong ptel = args[3];
unsigned apshift, spshift;
unsigned apshift;
target_ulong raddr;
target_ulong index;
uint64_t token;
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel, &spshift);
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
if (!apshift) {
/* Bad page size encoding */
return H_PARAMETER;

View File

@ -156,6 +156,16 @@ static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
return 1ULL << tcet->page_shift;
}
static void spapr_tce_notify_started(MemoryRegion *iommu)
{
spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
}
static void spapr_tce_notify_stopped(MemoryRegion *iommu)
{
spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false);
}
static int spapr_tce_table_post_load(void *opaque, int version_id)
{
sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@ -236,6 +246,8 @@ static const VMStateDescription vmstate_spapr_tce_table = {
static MemoryRegionIOMMUOps spapr_iommu_ops = {
.translate = spapr_tce_translate_iommu,
.get_min_page_size = spapr_tce_get_min_page_size,
.notify_started = spapr_tce_notify_started,
.notify_stopped = spapr_tce_notify_stopped,
};
static int spapr_tce_table_realize(DeviceState *dev)

View File

@ -35,6 +35,7 @@
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
#include "exec/address-spaces.h"
#include "exec/ram_addr.h"
#include <libfdt.h>
#include "trace.h"
#include "qemu/error-report.h"
@ -45,6 +46,7 @@
#include "hw/ppc/spapr_drc.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
#include "sysemu/hostmem.h"
#include "hw/vfio/vfio.h"
@ -1087,12 +1089,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
void *fdt = NULL;
int fdt_start_offset = 0, fdt_size;
if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
spapr_tce_set_need_vfio(tcet, true);
}
fdt = create_device_tree(&fdt_size);
fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
if (!fdt_start_offset) {
@ -1310,11 +1306,14 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
PCIBus *bus;
uint64_t msi_window_size = 4096;
sPAPRTCETable *tcet;
const unsigned windows_supported =
sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
if (sphb->index != (uint32_t)-1) {
hwaddr windows_base;
if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1)
if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1)
|| (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
|| (sphb->mem_win_addr != (hwaddr)-1)
|| (sphb->io_win_addr != (hwaddr)-1)) {
error_setg(errp, "Either \"index\" or other parameters must"
@ -1329,7 +1328,9 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
}
sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
sphb->dma_liobn = SPAPR_PCI_LIOBN(sphb->index, 0);
for (i = 0; i < windows_supported; ++i) {
sphb->dma_liobn[i] = SPAPR_PCI_LIOBN(sphb->index, i);
}
windows_base = SPAPR_PCI_WINDOW_BASE
+ sphb->index * SPAPR_PCI_WINDOW_SPACING;
@ -1342,8 +1343,9 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
return;
}
if (sphb->dma_liobn == (uint32_t)-1) {
error_setg(errp, "LIOBN not specified for PHB");
if ((sphb->dma_liobn[0] == (uint32_t)-1) ||
((sphb->dma_liobn[1] == (uint32_t)-1) && (windows_supported > 1))) {
error_setg(errp, "LIOBN(s) not specified for PHB");
return;
}
@ -1462,16 +1464,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
}
}
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn);
if (!tcet) {
error_setg(errp, "Unable to create TCE table for %s",
sphb->dtbusname);
return;
/* DMA setup */
for (i = 0; i < windows_supported; ++i) {
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
if (!tcet) {
error_setg(errp, "Creating window#%d failed for %s",
i, sphb->dtbusname);
return;
}
memory_region_add_subregion_overlap(&sphb->iommu_root, 0,
spapr_tce_get_iommu(tcet), 0);
}
memory_region_add_subregion_overlap(&sphb->iommu_root, 0,
spapr_tce_get_iommu(tcet), 0);
sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
}
@ -1488,13 +1492,19 @@ static int spapr_phb_children_reset(Object *child, void *opaque)
void spapr_phb_dma_reset(sPAPRPHBState *sphb)
{
sPAPRTCETable *tcet = spapr_tce_find_by_liobn(sphb->dma_liobn);
int i;
sPAPRTCETable *tcet;
if (tcet && tcet->nb_table) {
spapr_tce_table_disable(tcet);
for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
if (tcet && tcet->nb_table) {
spapr_tce_table_disable(tcet);
}
}
/* Register default 32bit DMA window */
tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
}
@ -1516,7 +1526,8 @@ static void spapr_phb_reset(DeviceState *qdev)
static Property spapr_phb_properties[] = {
DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1),
DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn[0], -1),
DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1),
DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
SPAPR_PCI_MMIO_WIN_SIZE),
@ -1528,6 +1539,11 @@ static Property spapr_phb_properties[] = {
/* Default DMA window is 0..1GB */
DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0),
DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000),
DEFINE_PROP_UINT64("dma64_win_addr", sPAPRPHBState, dma64_win_addr,
0x800000000000000ULL),
DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
(1ULL << 12) | (1ULL << 16)),
DEFINE_PROP_END_OF_LIST(),
};
@ -1604,7 +1620,7 @@ static const VMStateDescription vmstate_spapr_pci = {
.post_load = spapr_pci_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
VMSTATE_UINT32_EQUAL(dma_liobn[0], sPAPRPHBState),
VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
@ -1780,6 +1796,15 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
uint32_t interrupt_map_mask[] = {
cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
uint32_t ddw_applicable[] = {
cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
};
uint32_t ddw_extensions[] = {
cpu_to_be32(1),
cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
};
sPAPRTCETable *tcet;
PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
sPAPRFDT s_fdt;
@ -1804,6 +1829,14 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
_FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
_FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR));
/* Dynamic DMA window */
if (phb->ddw_enabled) {
_FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
sizeof(ddw_applicable)));
_FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
&ddw_extensions, sizeof(ddw_extensions)));
}
/* Build the interrupt-map, this must matches what is done
* in pci_spapr_map_irq
*/
@ -1827,7 +1860,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
sizeof(interrupt_map)));
tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
if (!tcet) {
return -1;
}

295
hw/ppc/spapr_rtas_ddw.c Normal file
View File

@ -0,0 +1,295 @@
/*
* QEMU sPAPR Dynamic DMA windows support
*
* Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include "qemu/error-report.h"
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
#include "trace.h"
static int spapr_phb_get_active_win_num_cb(Object *child, void *opaque)
{
sPAPRTCETable *tcet;
tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
if (tcet && tcet->nb_table) {
++*(unsigned *)opaque;
}
return 0;
}
static unsigned spapr_phb_get_active_win_num(sPAPRPHBState *sphb)
{
unsigned ret = 0;
object_child_foreach(OBJECT(sphb), spapr_phb_get_active_win_num_cb, &ret);
return ret;
}
static int spapr_phb_get_free_liobn_cb(Object *child, void *opaque)
{
sPAPRTCETable *tcet;
tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
if (tcet && !tcet->nb_table) {
*(uint32_t *)opaque = tcet->liobn;
return 1;
}
return 0;
}
static unsigned spapr_phb_get_free_liobn(sPAPRPHBState *sphb)
{
uint32_t liobn = 0;
object_child_foreach(OBJECT(sphb), spapr_phb_get_free_liobn_cb, &liobn);
return liobn;
}
static uint32_t spapr_page_mask_to_query_mask(uint64_t page_mask)
{
int i;
uint32_t mask = 0;
const struct { int shift; uint32_t mask; } masks[] = {
{ 12, RTAS_DDW_PGSIZE_4K },
{ 16, RTAS_DDW_PGSIZE_64K },
{ 24, RTAS_DDW_PGSIZE_16M },
{ 25, RTAS_DDW_PGSIZE_32M },
{ 26, RTAS_DDW_PGSIZE_64M },
{ 27, RTAS_DDW_PGSIZE_128M },
{ 28, RTAS_DDW_PGSIZE_256M },
{ 34, RTAS_DDW_PGSIZE_16G },
};
for (i = 0; i < ARRAY_SIZE(masks); ++i) {
if (page_mask & (1ULL << masks[i].shift)) {
mask |= masks[i].mask;
}
}
return mask;
}
static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
uint64_t buid, max_window_size;
uint32_t avail, addr, pgmask = 0;
MachineState *machine = MACHINE(spapr);
if ((nargs != 3) || (nret != 5)) {
goto param_error_exit;
}
buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
addr = rtas_ld(args, 0);
sphb = spapr_pci_find_phb(spapr, buid);
if (!sphb || !sphb->ddw_enabled) {
goto param_error_exit;
}
/* Translate page mask to LoPAPR format */
pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
/*
* This is "Largest contiguous block of TCEs allocated specifically
* for (that is, are reserved for) this PE".
* Return the maximum number as maximum supported RAM size was in 4K pages.
*/
if (machine->ram_size == machine->maxram_size) {
max_window_size = machine->ram_size;
} else {
MemoryHotplugState *hpms = &spapr->hotplug_memory;
max_window_size = hpms->base + memory_region_size(&hpms->mr);
}
avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, avail);
rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT);
rtas_st(rets, 3, pgmask);
rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask);
return;
param_error_exit:
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
static void rtas_ibm_create_pe_dma_window(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
sPAPRTCETable *tcet = NULL;
uint32_t addr, page_shift, window_shift, liobn;
uint64_t buid, win_addr;
int windows;
if ((nargs != 5) || (nret != 4)) {
goto param_error_exit;
}
buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
addr = rtas_ld(args, 0);
sphb = spapr_pci_find_phb(spapr, buid);
if (!sphb || !sphb->ddw_enabled) {
goto param_error_exit;
}
page_shift = rtas_ld(args, 3);
window_shift = rtas_ld(args, 4);
liobn = spapr_phb_get_free_liobn(sphb);
windows = spapr_phb_get_active_win_num(sphb);
if (!(sphb->page_size_mask & (1ULL << page_shift)) ||
(window_shift < page_shift)) {
goto param_error_exit;
}
if (!liobn || !sphb->ddw_enabled || windows == SPAPR_PCI_DMA_MAX_WINDOWS) {
goto hw_error_exit;
}
tcet = spapr_tce_find_by_liobn(liobn);
if (!tcet) {
goto hw_error_exit;
}
win_addr = (windows == 0) ? sphb->dma_win_addr : sphb->dma64_win_addr;
spapr_tce_table_enable(tcet, page_shift, win_addr,
1ULL << (window_shift - page_shift));
if (!tcet->nb_table) {
goto hw_error_exit;
}
trace_spapr_iommu_ddw_create(buid, addr, 1ULL << page_shift,
1ULL << window_shift, tcet->bus_offset, liobn);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, liobn);
rtas_st(rets, 2, tcet->bus_offset >> 32);
rtas_st(rets, 3, tcet->bus_offset & ((uint32_t) -1));
return;
hw_error_exit:
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
param_error_exit:
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
static void rtas_ibm_remove_pe_dma_window(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
sPAPRTCETable *tcet;
uint32_t liobn;
if ((nargs != 1) || (nret != 1)) {
goto param_error_exit;
}
liobn = rtas_ld(args, 0);
tcet = spapr_tce_find_by_liobn(liobn);
if (!tcet) {
goto param_error_exit;
}
sphb = SPAPR_PCI_HOST_BRIDGE(OBJECT(tcet)->parent);
if (!sphb || !sphb->ddw_enabled || !tcet->nb_table) {
goto param_error_exit;
}
spapr_tce_table_disable(tcet);
trace_spapr_iommu_ddw_remove(liobn);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
return;
param_error_exit:
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
static void rtas_ibm_reset_pe_dma_window(PowerPCCPU *cpu,
sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
uint32_t nret, target_ulong rets)
{
sPAPRPHBState *sphb;
uint64_t buid;
uint32_t addr;
if ((nargs != 3) || (nret != 1)) {
goto param_error_exit;
}
buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
addr = rtas_ld(args, 0);
sphb = spapr_pci_find_phb(spapr, buid);
if (!sphb || !sphb->ddw_enabled) {
goto param_error_exit;
}
spapr_phb_dma_reset(sphb);
trace_spapr_iommu_ddw_reset(buid, addr);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
return;
param_error_exit:
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
}
static void spapr_rtas_ddw_init(void)
{
spapr_rtas_register(RTAS_IBM_QUERY_PE_DMA_WINDOW,
"ibm,query-pe-dma-window",
rtas_ibm_query_pe_dma_window);
spapr_rtas_register(RTAS_IBM_CREATE_PE_DMA_WINDOW,
"ibm,create-pe-dma-window",
rtas_ibm_create_pe_dma_window);
spapr_rtas_register(RTAS_IBM_REMOVE_PE_DMA_WINDOW,
"ibm,remove-pe-dma-window",
rtas_ibm_remove_pe_dma_window);
spapr_rtas_register(RTAS_IBM_RESET_PE_DMA_WINDOW,
"ibm,reset-pe-dma-window",
rtas_ibm_reset_pe_dma_window);
}
type_init(spapr_rtas_ddw_init)

View File

@ -30,6 +30,10 @@ spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, un
spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d"
spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32
spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, unsigned wa, uint64_t win_size, uint32_t pgmask) "buid=%"PRIx64" addr=%"PRIx32", %u windows available, max window size=%"PRIx64", mask=%"PRIx32
spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64_t req_size, uint64_t start, uint32_t liobn) "buid=%"PRIx64" addr=%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start addr=%"PRIx64", liobn=%"PRIx32
spapr_iommu_ddw_remove(uint32_t liobn) "liobn=%"PRIx32
spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=%"PRIx64" addr=%"PRIx32
# hw/ppc/ppc.c
ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"

View File

@ -4,4 +4,5 @@ obj-$(CONFIG_PCI) += pci.o pci-quirks.o
obj-$(CONFIG_SOFTMMU) += platform.o
obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
obj-$(CONFIG_SOFTMMU) += amd-xgbe.o
obj-$(CONFIG_SOFTMMU) += spapr.o
endif

View File

@ -28,6 +28,7 @@
#include "exec/memory.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/range.h"
#include "sysemu/kvm.h"
#ifdef CONFIG_KVM
#include "linux/kvm.h"
@ -241,6 +242,44 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
return -errno;
}
static void vfio_host_win_add(VFIOContainer *container,
hwaddr min_iova, hwaddr max_iova,
uint64_t iova_pgsizes)
{
VFIOHostDMAWindow *hostwin;
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
if (ranges_overlap(hostwin->min_iova,
hostwin->max_iova - hostwin->min_iova + 1,
min_iova,
max_iova - min_iova + 1)) {
hw_error("%s: Overlapped IOMMU are not enabled", __func__);
}
}
hostwin = g_malloc0(sizeof(*hostwin));
hostwin->min_iova = min_iova;
hostwin->max_iova = max_iova;
hostwin->iova_pgsizes = iova_pgsizes;
QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
}
static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
hwaddr max_iova)
{
VFIOHostDMAWindow *hostwin;
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
QLIST_REMOVE(hostwin, hostwin_next);
return 0;
}
}
return -1;
}
static bool vfio_listener_skipped_section(MemoryRegionSection *section)
{
return (!memory_region_is_ram(section->mr) &&
@ -329,6 +368,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
Int128 llend, llsize;
void *vaddr;
int ret;
VFIOHostDMAWindow *hostwin;
bool hostwin_found;
if (vfio_listener_skipped_section(section)) {
trace_vfio_listener_region_add_skip(
@ -354,7 +395,40 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
end = int128_get64(int128_sub(llend, int128_one()));
if ((iova < container->min_iova) || (end > container->max_iova)) {
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
VFIOHostDMAWindow *hostwin;
hwaddr pgsize = 0;
/* For now intersections are not allowed, we may relax this later */
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
if (ranges_overlap(hostwin->min_iova,
hostwin->max_iova - hostwin->min_iova + 1,
section->offset_within_address_space,
int128_get64(section->size))) {
ret = -1;
goto fail;
}
}
ret = vfio_spapr_create_window(container, section, &pgsize);
if (ret) {
goto fail;
}
vfio_host_win_add(container, section->offset_within_address_space,
section->offset_within_address_space +
int128_get64(section->size) - 1, pgsize);
}
hostwin_found = false;
QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
hostwin_found = true;
break;
}
}
if (!hostwin_found) {
error_report("vfio: IOMMU container %p can't map guest IOVA region"
" 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx,
container, iova, end);
@ -369,10 +443,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
trace_vfio_listener_region_add_iommu(iova, end);
/*
* FIXME: We should do some checking to see if the
* capabilities of the host VFIO IOMMU are adequate to model
* the guest IOMMU
*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
* would be the right place to wire that up (tell the KVM
@ -493,6 +563,18 @@ static void vfio_listener_region_del(MemoryListener *listener,
"0x%"HWADDR_PRIx") = %d (%m)",
container, iova, int128_get64(llsize), ret);
}
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
vfio_spapr_remove_window(container,
section->offset_within_address_space);
if (vfio_host_win_del(container,
section->offset_within_address_space,
section->offset_within_address_space +
int128_get64(section->size) - 1) < 0) {
hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
__func__, section->offset_within_address_space);
}
}
}
static const MemoryListener vfio_memory_listener = {
@ -503,6 +585,9 @@ static const MemoryListener vfio_memory_listener = {
static void vfio_listener_release(VFIOContainer *container)
{
memory_listener_unregister(&container->listener);
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
memory_listener_unregister(&container->prereg_listener);
}
}
static struct vfio_info_cap_header *
@ -861,8 +946,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
goto free_container_exit;
}
ret = ioctl(fd, VFIO_SET_IOMMU,
v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU);
container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
if (ret) {
error_report("vfio: failed to set iommu for container: %m");
ret = -errno;
@ -876,19 +961,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
* existing Type1 IOMMUs generally support any IOVA we're
* going to actually try in practice.
*/
container->min_iova = 0;
container->max_iova = (hwaddr)-1;
/* Assume just 4K IOVA page size */
container->iova_pgsizes = 0x1000;
info.argsz = sizeof(info);
ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
/* Ignore errors */
if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
container->iova_pgsizes = info.iova_pgsizes;
if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
/* Assume 4k IOVA page size */
info.iova_pgsizes = 4096;
}
} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
struct vfio_iommu_spapr_tce_info info;
bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU);
ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
if (ret) {
@ -896,7 +980,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
ret = -errno;
goto free_container_exit;
}
ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
container->iommu_type =
v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
if (ret) {
error_report("vfio: failed to set iommu for container: %m");
ret = -errno;
@ -908,30 +994,54 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
* when container fd is closed so we do not call it explicitly
* in this file.
*/
ret = ioctl(fd, VFIO_IOMMU_ENABLE);
if (ret) {
error_report("vfio: failed to enable container: %m");
ret = -errno;
goto free_container_exit;
if (!v2) {
ret = ioctl(fd, VFIO_IOMMU_ENABLE);
if (ret) {
error_report("vfio: failed to enable container: %m");
ret = -errno;
goto free_container_exit;
}
} else {
container->prereg_listener = vfio_prereg_listener;
memory_listener_register(&container->prereg_listener,
&address_space_memory);
if (container->error) {
memory_listener_unregister(&container->prereg_listener);
error_report("vfio: RAM memory listener initialization failed for container");
goto free_container_exit;
}
}
/*
* This only considers the host IOMMU's 32-bit window. At
* some point we need to add support for the optional 64-bit
* window and dynamic windows
*/
info.argsz = sizeof(info);
ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
if (ret) {
error_report("vfio: VFIO_IOMMU_SPAPR_TCE_GET_INFO failed: %m");
ret = -errno;
if (v2) {
memory_listener_unregister(&container->prereg_listener);
}
goto free_container_exit;
}
container->min_iova = info.dma32_window_start;
container->max_iova = container->min_iova + info.dma32_window_size - 1;
/* Assume just 4K IOVA pages for now */
container->iova_pgsizes = 0x1000;
if (v2) {
/*
* There is a default window in just created container.
* To make region_add/del simpler, we better remove this
* window now and let those iommu_listener callbacks
* create/remove them when needed.
*/
ret = vfio_spapr_remove_window(container, info.dma32_window_start);
if (ret) {
goto free_container_exit;
}
} else {
/* The default table uses 4K pages */
vfio_host_win_add(container, info.dma32_window_start,
info.dma32_window_start +
info.dma32_window_size - 1,
0x1000);
}
} else {
error_report("vfio: No available IOMMU models");
ret = -EINVAL;

210
hw/vfio/spapr.c Normal file
View File

@ -0,0 +1,210 @@
/*
* DMA memory preregistration
*
* Authors:
* Alexey Kardashevskiy <aik@ozlabs.ru>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "cpu.h"
#include <sys/ioctl.h>
#include <linux/vfio.h>
#include "hw/vfio/vfio-common.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "trace.h"
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
{
if (memory_region_is_iommu(section->mr)) {
hw_error("Cannot possibly preregister IOMMU memory");
}
return !memory_region_is_ram(section->mr) ||
memory_region_is_skip_dump(section->mr);
}
static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa)
{
return memory_region_get_ram_ptr(section->mr) +
section->offset_within_region +
(gpa - section->offset_within_address_space);
}
static void vfio_prereg_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer,
prereg_listener);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
hwaddr page_mask = qemu_real_host_page_mask;
struct vfio_iommu_spapr_register_memory reg = {
.argsz = sizeof(reg),
.flags = 0,
};
if (vfio_prereg_listener_skipped_section(section)) {
trace_vfio_prereg_listener_region_add_skip(
section->offset_within_address_space,
section->offset_within_address_space +
int128_get64(int128_sub(section->size, int128_one())));
return;
}
if (unlikely((section->offset_within_address_space & ~page_mask) ||
(section->offset_within_region & ~page_mask) ||
(int128_get64(section->size) & ~page_mask))) {
error_report("%s received unaligned region", __func__);
return;
}
end = section->offset_within_address_space + int128_get64(section->size);
if (gpa >= end) {
return;
}
memory_region_ref(section->mr);
reg.vaddr = (uintptr_t) vfio_prereg_gpa_to_vaddr(section, gpa);
reg.size = end - gpa;
ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
trace_vfio_prereg_register(reg.vaddr, reg.size, ret ? -errno : 0);
if (ret) {
/*
* On the initfn path, store the first error in the container so we
* can gracefully fail. Runtime, there's not much we can do other
* than throw a hardware error.
*/
if (!container->initialized) {
if (!container->error) {
container->error = ret;
}
} else {
hw_error("vfio: Memory registering failed, unable to continue");
}
}
}
static void vfio_prereg_listener_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainer *container = container_of(listener, VFIOContainer,
prereg_listener);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
hwaddr page_mask = qemu_real_host_page_mask;
struct vfio_iommu_spapr_register_memory reg = {
.argsz = sizeof(reg),
.flags = 0,
};
if (vfio_prereg_listener_skipped_section(section)) {
trace_vfio_prereg_listener_region_del_skip(
section->offset_within_address_space,
section->offset_within_address_space +
int128_get64(int128_sub(section->size, int128_one())));
return;
}
if (unlikely((section->offset_within_address_space & ~page_mask) ||
(section->offset_within_region & ~page_mask) ||
(int128_get64(section->size) & ~page_mask))) {
error_report("%s received unaligned region", __func__);
return;
}
end = section->offset_within_address_space + int128_get64(section->size);
if (gpa >= end) {
return;
}
reg.vaddr = (uintptr_t) vfio_prereg_gpa_to_vaddr(section, gpa);
reg.size = end - gpa;
ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0);
}
const MemoryListener vfio_prereg_listener = {
.region_add = vfio_prereg_listener_region_add,
.region_del = vfio_prereg_listener_region_del,
};
int vfio_spapr_create_window(VFIOContainer *container,
MemoryRegionSection *section,
hwaddr *pgsize)
{
int ret;
unsigned pagesize = memory_region_iommu_get_min_page_size(section->mr);
unsigned entries, pages;
struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) };
/*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
* would be the right place to wire that up (tell the KVM
* device emulation the VFIO iommu handles to use).
*/
create.window_size = int128_get64(section->size);
create.page_shift = ctz64(pagesize);
/*
* SPAPR host supports multilevel TCE tables, there is some
* heuristic to decide how many levels we want for our table:
* 0..64 = 1; 65..4096 = 2; 4097..262144 = 3; 262145.. = 4
*/
entries = create.window_size >> create.page_shift;
pages = MAX((entries * sizeof(uint64_t)) / getpagesize(), 1);
pages = MAX(pow2ceil(pages) - 1, 1); /* Round up */
create.levels = ctz64(pages) / 6 + 1;
ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
if (ret) {
error_report("Failed to create a window, ret = %d (%m)", ret);
return -errno;
}
if (create.start_addr != section->offset_within_address_space) {
vfio_spapr_remove_window(container, create.start_addr);
error_report("Host doesn't support DMA window at %"HWADDR_PRIx", must be %"PRIx64,
section->offset_within_address_space,
(uint64_t)create.start_addr);
ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
return -EINVAL;
}
trace_vfio_spapr_create_window(create.page_shift,
create.window_size,
create.start_addr);
*pgsize = pagesize;
return 0;
}
int vfio_spapr_remove_window(VFIOContainer *container,
hwaddr offset_within_address_space)
{
struct vfio_iommu_spapr_tce_remove remove = {
.argsz = sizeof(remove),
.start_addr = offset_within_address_space,
};
int ret;
ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
if (ret) {
error_report("Failed to remove window at %"PRIx64,
(uint64_t)remove.start_addr);
return -errno;
}
trace_vfio_spapr_remove_window(offset_within_address_space);
return 0;
}

View File

@ -115,3 +115,11 @@ vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d
vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd = %d"
# hw/vfio/spapr.c
vfio_prereg_listener_region_add_skip(uint64_t start, uint64_t end) "%"PRIx64" - %"PRIx64
vfio_prereg_listener_region_del_skip(uint64_t start, uint64_t end) "%"PRIx64" - %"PRIx64
vfio_prereg_register(uint64_t va, uint64_t size, int ret) "va=%"PRIx64" size=%"PRIx64" ret=%d"
vfio_prereg_unregister(uint64_t va, uint64_t size, int ret) "va=%"PRIx64" size=%"PRIx64" ret=%d"
vfio_spapr_create_window(int ps, uint64_t ws, uint64_t off) "pageshift=0x%x winsize=0x%"PRIx64" offset=0x%"PRIx64
vfio_spapr_remove_window(uint64_t off) "offset=%"PRIx64

View File

@ -32,6 +32,8 @@
#define SPAPR_PCI_HOST_BRIDGE(obj) \
OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
#define SPAPR_PCI_DMA_MAX_WINDOWS 2
typedef struct sPAPRPHBState sPAPRPHBState;
typedef struct spapr_pci_msi {
@ -56,7 +58,7 @@ struct sPAPRPHBState {
hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
MemoryRegion memwindow, iowindow, msiwindow;
uint32_t dma_liobn;
uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS];
hwaddr dma_win_addr, dma_win_size;
AddressSpace iommu_as;
MemoryRegion iommu_root;
@ -71,6 +73,10 @@ struct sPAPRPHBState {
spapr_pci_msi_mig *msi_devs;
QLIST_ENTRY(sPAPRPHBState) list;
bool ddw_enabled;
uint64_t page_size_mask;
uint64_t dma64_win_addr;
};
#define SPAPR_PCI_MAX_INDEX 255

View File

@ -416,6 +416,16 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
#define RTAS_OUT_NOT_AUTHORIZED -9002
#define RTAS_OUT_SYSPARM_PARAM_ERROR -9999
/* DDW pagesize mask values from ibm,query-pe-dma-window */
#define RTAS_DDW_PGSIZE_4K 0x01
#define RTAS_DDW_PGSIZE_64K 0x02
#define RTAS_DDW_PGSIZE_16M 0x04
#define RTAS_DDW_PGSIZE_32M 0x08
#define RTAS_DDW_PGSIZE_64M 0x10
#define RTAS_DDW_PGSIZE_128M 0x20
#define RTAS_DDW_PGSIZE_256M 0x40
#define RTAS_DDW_PGSIZE_16G 0x80
/* RTAS tokens */
#define RTAS_TOKEN_BASE 0x2000
@ -457,8 +467,12 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
#define RTAS_IBM_SET_SLOT_RESET (RTAS_TOKEN_BASE + 0x23)
#define RTAS_IBM_CONFIGURE_PE (RTAS_TOKEN_BASE + 0x24)
#define RTAS_IBM_SLOT_ERROR_DETAIL (RTAS_TOKEN_BASE + 0x25)
#define RTAS_IBM_QUERY_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x26)
#define RTAS_IBM_CREATE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x27)
#define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28)
#define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29)
#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x26)
#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2A)
/* RTAS ibm,get-system-parameter token values */
#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20

View File

@ -73,6 +73,8 @@ typedef struct VFIOContainer {
VFIOAddressSpace *space;
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
MemoryListener listener;
MemoryListener prereg_listener;
unsigned iommu_type;
int error;
bool initialized;
/*
@ -80,9 +82,8 @@ typedef struct VFIOContainer {
* contiguous IOVA window. We may need to generalize that in
* future
*/
hwaddr min_iova, max_iova;
uint64_t iova_pgsizes;
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
QLIST_HEAD(, VFIOGroup) group_list;
QLIST_ENTRY(VFIOContainer) next;
} VFIOContainer;
@ -95,6 +96,13 @@ typedef struct VFIOGuestIOMMU {
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
} VFIOGuestIOMMU;
typedef struct VFIOHostDMAWindow {
hwaddr min_iova;
hwaddr max_iova;
uint64_t iova_pgsizes;
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
} VFIOHostDMAWindow;
typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODevice {
@ -158,4 +166,12 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info);
#endif
extern const MemoryListener vfio_prereg_listener;
int vfio_spapr_create_window(VFIOContainer *container,
MemoryRegionSection *section,
hwaddr *pgsize);
int vfio_spapr_remove_window(VFIOContainer *container,
hwaddr offset_within_address_space);
#endif /* !HW_VFIO_VFIO_COMMON_H */

View File

@ -1047,6 +1047,8 @@ struct CPUPPCState {
uint64_t insns_flags2;
#if defined(TARGET_PPC64)
struct ppc_segment_page_sizes sps;
ppc_slb_t vrma_slb;
target_ulong rmls;
bool ci_large_pages;
#endif

View File

@ -2689,19 +2689,19 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
helper_float_check_status(env); \
}
VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_nearest_even, 1)
VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1)
VSX_ROUND(xsrdpic, 1, float64, VsrD(0), FLOAT_ROUND_CURRENT, 1)
VSX_ROUND(xsrdpim, 1, float64, VsrD(0), float_round_down, 1)
VSX_ROUND(xsrdpip, 1, float64, VsrD(0), float_round_up, 1)
VSX_ROUND(xsrdpiz, 1, float64, VsrD(0), float_round_to_zero, 1)
VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_nearest_even, 0)
VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_ties_away, 0)
VSX_ROUND(xvrdpic, 2, float64, VsrD(i), FLOAT_ROUND_CURRENT, 0)
VSX_ROUND(xvrdpim, 2, float64, VsrD(i), float_round_down, 0)
VSX_ROUND(xvrdpip, 2, float64, VsrD(i), float_round_up, 0)
VSX_ROUND(xvrdpiz, 2, float64, VsrD(i), float_round_to_zero, 0)
VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_nearest_even, 0)
VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_ties_away, 0)
VSX_ROUND(xvrspic, 4, float32, VsrW(i), FLOAT_ROUND_CURRENT, 0)
VSX_ROUND(xvrspim, 4, float32, VsrW(i), float_round_down, 0)
VSX_ROUND(xvrspip, 4, float32, VsrW(i), float_round_up, 0)

View File

@ -450,128 +450,6 @@ void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token)
}
}
/* Returns the effective page shift or 0. MPSS isn't supported yet so
* this will always be the slb_pshift or 0
*/
static uint32_t ppc_hash64_pte_size_decode(uint64_t pte1, uint32_t slb_pshift)
{
switch (slb_pshift) {
case 12:
return 12;
case 16:
if ((pte1 & 0xf000) == 0x1000) {
return 16;
}
return 0;
case 24:
if ((pte1 & 0xff000) == 0) {
return 24;
}
return 0;
}
return 0;
}
static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
uint32_t slb_pshift, bool secondary,
target_ulong ptem, ppc_hash_pte64_t *pte)
{
CPUPPCState *env = &cpu->env;
int i;
uint64_t token;
target_ulong pte0, pte1;
target_ulong pte_index;
pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
token = ppc_hash64_start_access(cpu, pte_index);
if (!token) {
return -1;
}
for (i = 0; i < HPTES_PER_GROUP; i++) {
pte0 = ppc_hash64_load_hpte0(cpu, token, i);
pte1 = ppc_hash64_load_hpte1(cpu, token, i);
if ((pte0 & HPTE64_V_VALID)
&& (secondary == !!(pte0 & HPTE64_V_SECONDARY))
&& HPTE64_V_COMPARE(pte0, ptem)) {
uint32_t pshift = ppc_hash64_pte_size_decode(pte1, slb_pshift);
if (pshift == 0) {
continue;
}
/* We don't do anything with pshift yet as qemu TLB only deals
* with 4K pages anyway
*/
pte->pte0 = pte0;
pte->pte1 = pte1;
ppc_hash64_stop_access(cpu, token);
return (pte_index + i) * HASH_PTE_SIZE_64;
}
}
ppc_hash64_stop_access(cpu, token);
/*
* We didn't find a valid entry.
*/
return -1;
}
static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
ppc_slb_t *slb, target_ulong eaddr,
ppc_hash_pte64_t *pte)
{
CPUPPCState *env = &cpu->env;
hwaddr pte_offset;
hwaddr hash;
uint64_t vsid, epnmask, epn, ptem;
/* The SLB store path should prevent any bad page size encodings
* getting in there, so: */
assert(slb->sps);
epnmask = ~((1ULL << slb->sps->page_shift) - 1);
if (slb->vsid & SLB_VSID_B) {
/* 1TB segment */
vsid = (slb->vsid & SLB_VSID_VSID) >> SLB_VSID_SHIFT_1T;
epn = (eaddr & ~SEGMENT_MASK_1T) & epnmask;
hash = vsid ^ (vsid << 25) ^ (epn >> slb->sps->page_shift);
} else {
/* 256M segment */
vsid = (slb->vsid & SLB_VSID_VSID) >> SLB_VSID_SHIFT;
epn = (eaddr & ~SEGMENT_MASK_256M) & epnmask;
hash = vsid ^ (epn >> slb->sps->page_shift);
}
ptem = (slb->vsid & SLB_VSID_PTEM) | ((epn >> 16) & HPTE64_V_AVPN);
/* Page address translation */
qemu_log_mask(CPU_LOG_MMU,
"htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
env->htab_base, env->htab_mask, hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU,
"0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n",
env->htab_base, env->htab_mask, vsid, ptem, hash);
pte_offset = ppc_hash64_pteg_search(cpu, hash, slb->sps->page_shift,
0, ptem, pte);
if (pte_offset == -1) {
/* Secondary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU,
"1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n", env->htab_base,
env->htab_mask, vsid, ptem, ~hash);
pte_offset = ppc_hash64_pteg_search(cpu, ~hash, slb->sps->page_shift, 1,
ptem, pte);
}
return pte_offset;
}
static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
uint64_t pte0, uint64_t pte1)
{
@ -609,15 +487,128 @@ static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
return 0; /* Bad page size encoding */
}
static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
const struct ppc_one_seg_page_size *sps,
target_ulong ptem,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
CPUPPCState *env = &cpu->env;
int i;
uint64_t token;
target_ulong pte0, pte1;
target_ulong pte_index;
pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
token = ppc_hash64_start_access(cpu, pte_index);
if (!token) {
return -1;
}
for (i = 0; i < HPTES_PER_GROUP; i++) {
pte0 = ppc_hash64_load_hpte0(cpu, token, i);
pte1 = ppc_hash64_load_hpte1(cpu, token, i);
/* This compares V, B, H (secondary) and the AVPN */
if (HPTE64_V_COMPARE(pte0, ptem)) {
*pshift = hpte_page_shift(sps, pte0, pte1);
/*
* If there is no match, ignore the PTE, it could simply
* be for a different segment size encoding and the
* architecture specifies we should not match. Linux will
* potentially leave behind PTEs for the wrong base page
* size when demoting segments.
*/
if (*pshift == 0) {
continue;
}
/* We don't do anything with pshift yet as qemu TLB only deals
* with 4K pages anyway
*/
pte->pte0 = pte0;
pte->pte1 = pte1;
ppc_hash64_stop_access(cpu, token);
return (pte_index + i) * HASH_PTE_SIZE_64;
}
}
ppc_hash64_stop_access(cpu, token);
/*
* We didn't find a valid entry.
*/
return -1;
}
static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
ppc_slb_t *slb, target_ulong eaddr,
ppc_hash_pte64_t *pte, unsigned *pshift)
{
CPUPPCState *env = &cpu->env;
hwaddr pte_offset;
hwaddr hash;
uint64_t vsid, epnmask, epn, ptem;
const struct ppc_one_seg_page_size *sps = slb->sps;
/* The SLB store path should prevent any bad page size encodings
* getting in there, so: */
assert(sps);
/* If ISL is set in LPCR we need to clamp the page size to 4K */
if (env->spr[SPR_LPCR] & LPCR_ISL) {
/* We assume that when using TCG, 4k is first entry of SPS */
sps = &env->sps.sps[0];
assert(sps->page_shift == 12);
}
epnmask = ~((1ULL << sps->page_shift) - 1);
if (slb->vsid & SLB_VSID_B) {
/* 1TB segment */
vsid = (slb->vsid & SLB_VSID_VSID) >> SLB_VSID_SHIFT_1T;
epn = (eaddr & ~SEGMENT_MASK_1T) & epnmask;
hash = vsid ^ (vsid << 25) ^ (epn >> sps->page_shift);
} else {
/* 256M segment */
vsid = (slb->vsid & SLB_VSID_VSID) >> SLB_VSID_SHIFT;
epn = (eaddr & ~SEGMENT_MASK_256M) & epnmask;
hash = vsid ^ (epn >> sps->page_shift);
}
ptem = (slb->vsid & SLB_VSID_PTEM) | ((epn >> 16) & HPTE64_V_AVPN);
ptem |= HPTE64_V_VALID;
/* Page address translation */
qemu_log_mask(CPU_LOG_MMU,
"htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
" hash " TARGET_FMT_plx "\n",
env->htab_base, env->htab_mask, hash);
/* Primary PTEG lookup */
qemu_log_mask(CPU_LOG_MMU,
"0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n",
env->htab_base, env->htab_mask, vsid, ptem, hash);
pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
if (pte_offset == -1) {
/* Secondary PTEG lookup */
ptem |= HPTE64_V_SECONDARY;
qemu_log_mask(CPU_LOG_MMU,
"1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
" vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
" hash=" TARGET_FMT_plx "\n", env->htab_base,
env->htab_mask, vsid, ptem, ~hash);
pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
}
return pte_offset;
}
unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
uint64_t pte0, uint64_t pte1,
unsigned *seg_page_shift)
uint64_t pte0, uint64_t pte1)
{
CPUPPCState *env = &cpu->env;
int i;
if (!(pte0 & HPTE64_V_LARGE)) {
*seg_page_shift = 12;
return 12;
}
@ -635,12 +626,10 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
shift = hpte_page_shift(sps, pte0, pte1);
if (shift) {
*seg_page_shift = sps->page_shift;
return shift;
}
}
*seg_page_shift = 0;
return 0;
}
@ -701,11 +690,52 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
assert((rwx == 0) || (rwx == 1) || (rwx == 2));
/* Note on LPCR usage: 970 uses HID4, but our special variant
* of store_spr copies relevant fields into env->spr[SPR_LPCR].
* Similarily we filter unimplemented bits when storing into
* LPCR depending on the MMU version. This code can thus just
* use the LPCR "as-is".
*/
/* 1. Handle real mode accesses */
if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
/* Translation is off */
/* In real mode the top 4 effective address bits are ignored */
/* Translation is supposedly "off" */
/* In real mode the top 4 effective address bits are (mostly) ignored */
raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
/* In HV mode, add HRMOR if top EA bit is clear */
if (msr_hv || !env->has_hv_mode) {
if (!(eaddr >> 63)) {
raddr |= env->spr[SPR_HRMOR];
}
} else {
/* Otherwise, check VPM for RMA vs VRMA */
if (env->spr[SPR_LPCR] & LPCR_VPM0) {
slb = &env->vrma_slb;
if (slb->sps) {
goto skip_slb_search;
}
/* Not much else to do here */
cs->exception_index = POWERPC_EXCP_MCHECK;
env->error_code = 0;
return 1;
} else if (raddr < env->rmls) {
/* RMA. Check bounds in RMLS */
raddr |= env->spr[SPR_RMOR];
} else {
/* The access failed, generate the approriate interrupt */
if (rwx == 2) {
ppc_hash64_set_isi(cs, env, 0x08000000);
} else {
dsisr = 0x08000000;
if (rwx == 1) {
dsisr |= 0x02000000;
}
ppc_hash64_set_dsi(cs, env, eaddr, dsisr);
}
return 1;
}
}
tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
TARGET_PAGE_SIZE);
@ -714,7 +744,6 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
/* 2. Translation is on, so look up the SLB */
slb = slb_lookup(cpu, eaddr);
if (!slb) {
if (rwx == 2) {
cs->exception_index = POWERPC_EXCP_ISEG;
@ -727,6 +756,8 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
return 1;
}
skip_slb_search:
/* 3. Check for segment level no-execute violation */
if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
ppc_hash64_set_isi(cs, env, 0x10000000);
@ -734,7 +765,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
}
/* 4. Locate the PTE in the hash table */
pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte);
pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
if (pte_offset == -1) {
dsisr = 0x40000000;
if (rwx == 2) {
@ -750,18 +781,6 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
qemu_log_mask(CPU_LOG_MMU,
"found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
/* Validate page size encoding */
apshift = hpte_page_shift(slb->sps, pte.pte0, pte.pte1);
if (!apshift) {
error_report("Bad page size encoding in HPTE 0x%"PRIx64" - 0x%"PRIx64
" @ 0x%"HWADDR_PRIx, pte.pte0, pte.pte1, pte_offset);
/* Not entirely sure what the right action here, but machine
* check seems reasonable */
cs->exception_index = POWERPC_EXCP_MCHECK;
env->error_code = 0;
return 1;
}
/* 5. Check access permissions */
pp_prot = ppc_hash64_pte_prot(cpu, slb, pte);
@ -821,30 +840,44 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
{
CPUPPCState *env = &cpu->env;
ppc_slb_t *slb;
hwaddr pte_offset;
hwaddr pte_offset, raddr;
ppc_hash_pte64_t pte;
unsigned apshift;
/* Handle real mode */
if (msr_dr == 0) {
/* In real mode the top 4 effective address bits are ignored */
return addr & 0x0FFFFFFFFFFFFFFFULL;
raddr = addr & 0x0FFFFFFFFFFFFFFFULL;
/* In HV mode, add HRMOR if top EA bit is clear */
if ((msr_hv || !env->has_hv_mode) && !(addr >> 63)) {
return raddr | env->spr[SPR_HRMOR];
}
/* Otherwise, check VPM for RMA vs VRMA */
if (env->spr[SPR_LPCR] & LPCR_VPM0) {
slb = &env->vrma_slb;
if (!slb->sps) {
return -1;
}
} else if (raddr < env->rmls) {
/* RMA. Check bounds in RMLS */
return raddr | env->spr[SPR_RMOR];
} else {
return -1;
}
} else {
slb = slb_lookup(cpu, addr);
if (!slb) {
return -1;
}
}
slb = slb_lookup(cpu, addr);
if (!slb) {
return -1;
}
pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte);
pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
if (pte_offset == -1) {
return -1;
}
apshift = hpte_page_shift(slb->sps, pte.pte0, pte.pte1);
if (!apshift) {
return -1;
}
return deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, addr)
& TARGET_PAGE_MASK;
}
@ -883,6 +916,90 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
tlb_flush(CPU(cpu), 1);
}
void ppc_hash64_update_rmls(CPUPPCState *env)
{
uint64_t lpcr = env->spr[SPR_LPCR];
/*
* This is the full 4 bits encoding of POWER8. Previous
* CPUs only support a subset of these but the filtering
* is done when writing LPCR
*/
switch ((lpcr & LPCR_RMLS) >> LPCR_RMLS_SHIFT) {
case 0x8: /* 32MB */
env->rmls = 0x2000000ull;
break;
case 0x3: /* 64MB */
env->rmls = 0x4000000ull;
break;
case 0x7: /* 128MB */
env->rmls = 0x8000000ull;
break;
case 0x4: /* 256MB */
env->rmls = 0x10000000ull;
break;
case 0x2: /* 1GB */
env->rmls = 0x40000000ull;
break;
case 0x1: /* 16GB */
env->rmls = 0x400000000ull;
break;
default:
/* What to do here ??? */
env->rmls = 0;
}
}
void ppc_hash64_update_vrma(CPUPPCState *env)
{
const struct ppc_one_seg_page_size *sps = NULL;
target_ulong esid, vsid, lpcr;
ppc_slb_t *slb = &env->vrma_slb;
uint32_t vrmasd;
int i;
/* First clear it */
slb->esid = slb->vsid = 0;
slb->sps = NULL;
/* Is VRMA enabled ? */
lpcr = env->spr[SPR_LPCR];
if (!(lpcr & LPCR_VPM0)) {
return;
}
/* Make one up. Mostly ignore the ESID which will not be
* needed for translation
*/
vsid = SLB_VSID_VRMA;
vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
vsid |= (vrmasd << 4) & (SLB_VSID_L | SLB_VSID_LP);
esid = SLB_ESID_V;
for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
const struct ppc_one_seg_page_size *sps1 = &env->sps.sps[i];
if (!sps1->page_shift) {
break;
}
if ((vsid & SLB_VSID_LLP_MASK) == sps1->slb_enc) {
sps = sps1;
break;
}
}
if (!sps) {
error_report("Bad page size encoding esid 0x"TARGET_FMT_lx
" vsid 0x"TARGET_FMT_lx, esid, vsid);
return;
}
slb->vsid = vsid;
slb->esid = esid;
slb->sps = sps;
}
void helper_store_lpcr(CPUPPCState *env, target_ulong val)
{
uint64_t lpcr = 0;
@ -938,4 +1055,6 @@ void helper_store_lpcr(CPUPPCState *env, target_ulong val)
;
}
env->spr[SPR_LPCR] = lpcr;
ppc_hash64_update_rmls(env);
ppc_hash64_update_vrma(env);
}

View File

@ -17,8 +17,9 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
target_ulong pte_index,
target_ulong pte0, target_ulong pte1);
unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
uint64_t pte0, uint64_t pte1,
unsigned *seg_page_shift);
uint64_t pte0, uint64_t pte1);
void ppc_hash64_update_vrma(CPUPPCState *env);
void ppc_hash64_update_rmls(CPUPPCState *env);
#endif
/*
@ -37,6 +38,7 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
#define SLB_VSID_B_256M 0x0000000000000000ULL
#define SLB_VSID_B_1T 0x4000000000000000ULL
#define SLB_VSID_VSID 0x3FFFFFFFFFFFF000ULL
#define SLB_VSID_VRMA (0x0001FFFFFF000000ULL | SLB_VSID_B_1T)
#define SLB_VSID_PTEM (SLB_VSID_B | SLB_VSID_VSID)
#define SLB_VSID_KS 0x0000000000000800ULL
#define SLB_VSID_KP 0x0000000000000400ULL
@ -63,7 +65,7 @@ unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
#define HPTE64_V_AVPN_SHIFT 7
#define HPTE64_V_AVPN 0x3fffffffffffff80ULL
#define HPTE64_V_AVPN_VAL(x) (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT)
#define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff80ULL))
#define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff83ULL))
#define HPTE64_V_LARGE 0x0000000000000004ULL
#define HPTE64_V_SECONDARY 0x0000000000000002ULL
#define HPTE64_V_VALID 0x0000000000000001ULL

View File

@ -8791,11 +8791,19 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
/* Set emulated LPCR to not send interrupts to hypervisor. Note that
* under KVM, the actual HW LPCR will be set differently by KVM itself,
* the settings below ensure proper operations with TCG in absence of
* a real hypervisor
* a real hypervisor.
*
* Clearing VPM0 will also cause us to use RMOR in mmu-hash64.c for
* real mode accesses, which thankfully defaults to 0 and isn't
* accessible in guest mode.
*/
lpcr->default_value &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV);
lpcr->default_value |= LPCR_LPES0 | LPCR_LPES1;
/* Set RMLS to the max (ie, 16G) */
lpcr->default_value &= ~LPCR_RMLS;
lpcr->default_value |= 1ull << LPCR_RMLS_SHIFT;
/* P7 and P8 has slightly different PECE bits, mostly because P8 adds
* bit 47 and 48 which are reserved on P7. Here we set them all, which
* will work as expected for both implementations
@ -8811,6 +8819,10 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
/* Set a full AMOR so guest can use the AMR as it sees fit */
env->spr[SPR_AMOR] = amor->default_value = 0xffffffffffffffffull;
/* Update some env bits based on new LPCR value */
ppc_hash64_update_rmls(env);
ppc_hash64_update_vrma(env);
/* Tell KVM that we're in PAPR mode */
if (kvm_enabled()) {
kvmppc_set_papr(cpu);
@ -9516,7 +9528,7 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
Error *local_err = NULL;
#if !defined(CONFIG_USER_ONLY)
int max_smt = kvm_enabled() ? kvmppc_smt_threads() : 1;
int max_smt = kvmppc_smt_threads();
#endif
#if !defined(CONFIG_USER_ONLY)