intel-iommu: Extend address width to 48 bits

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.

After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that
is configured with large memory ( >=1TB ). To check whether 48 bits
aw is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty <prasad.singamsety@oracle.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Prasad Singamsetty 2017-11-14 18:13:50 -05:00 committed by Michael S. Tsirkin
parent 92e5d85e83
commit 37f51384ae
4 changed files with 65 additions and 49 deletions

View File

@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
AcpiDmarDeviceScope *scope = NULL;
/* Root complex IOAPIC use one path[0] only */
size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
assert(iommu);
if (iommu->intr_supported) {
@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
}
dmar = acpi_data_push(table_data, sizeof(*dmar));
dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
dmar->host_address_width = intel_iommu->aw_bits - 1;
dmar->flags = dmar_flags;
/* DMAR Remapping Hardware Unit Definition structure */

View File

@ -521,9 +521,9 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
}
static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
{
return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
}
/* Whether the pte indicates the address of the page frame */
@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
return true;
}
static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
{
uint32_t ce_agaw = vtd_ce_get_agaw(ce);
return 1ULL << MIN(ce_agaw, VTD_MGAW);
return 1ULL << MIN(ce_agaw, aw);
}
/* Return true if IOVA passes range check, otherwise false. */
static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
uint8_t aw)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
return !(iova & ~(vtd_iova_limit(ce) - 1));
return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
}
/*
@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
*/
static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint64_t *slptep, uint32_t *slpte_level,
bool *reads, bool *writes)
bool *reads, bool *writes, uint8_t aw_bits)
{
dma_addr_t addr = vtd_ce_get_slpt_base(ce);
uint32_t level = vtd_ce_get_level(ce);
@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint64_t slpte;
uint64_t access_right_check;
if (!vtd_iova_range_check(iova, ce)) {
if (!vtd_iova_range_check(iova, ce, aw_bits)) {
trace_vtd_err_dmar_iova_overflow(iova);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@ -714,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
*slpte_level = level;
return 0;
}
addr = vtd_get_slpte_addr(slpte);
addr = vtd_get_slpte_addr(slpte, aw_bits);
level--;
}
}
@ -732,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
* @read: whether parent level has read permission
* @write: whether parent level has write permission
* @notify_unmap: whether we should notify invalid entries
* @aw: maximum address width
*/
static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
uint64_t end, vtd_page_walk_hook hook_fn,
void *private, uint32_t level,
bool read, bool write, bool notify_unmap)
void *private, uint32_t level, bool read,
bool write, bool notify_unmap, uint8_t aw)
{
bool read_cur, write_cur, entry_valid;
uint32_t offset;
@ -783,7 +785,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
entry.target_as = &address_space_memory;
entry.iova = iova & subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
entry.translated_addr = vtd_get_slpte_addr(slpte);
entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
entry.addr_mask = ~subpage_mask;
entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
if (!entry_valid && !notify_unmap) {
@ -803,10 +805,10 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
MIN(iova_next, end), hook_fn, private,
level - 1, read_cur, write_cur,
notify_unmap);
notify_unmap, aw);
if (ret < 0) {
return ret;
}
@ -827,25 +829,26 @@ next:
* @end: IOVA range end address (start <= addr < end)
* @hook_fn: the hook that to be called for each detected area
* @private: private data for the hook function
* @aw: maximum address width
*/
static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
vtd_page_walk_hook hook_fn, void *private,
bool notify_unmap)
bool notify_unmap, uint8_t aw)
{
dma_addr_t addr = vtd_ce_get_slpt_base(ce);
uint32_t level = vtd_ce_get_level(ce);
if (!vtd_iova_range_check(start, ce)) {
if (!vtd_iova_range_check(start, ce, aw)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
if (!vtd_iova_range_check(end, ce)) {
if (!vtd_iova_range_check(end, ce, aw)) {
/* Fix end so that it reaches the maximum */
end = vtd_iova_limit(ce);
end = vtd_iova_limit(ce, aw);
}
return vtd_page_walk_level(addr, start, end, hook_fn, private,
level, true, true, notify_unmap);
level, true, true, notify_unmap, aw);
}
/* Map a device to its corresponding domain (context-entry) */
@ -867,7 +870,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
return -VTD_FR_ROOT_ENTRY_P;
}
if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(VTD_HOST_ADDRESS_WIDTH))) {
if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
trace_vtd_re_invalid(re.rsvd, re.val);
return -VTD_FR_ROOT_ENTRY_RSVD;
}
@ -884,7 +887,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(VTD_HOST_ADDRESS_WIDTH))) {
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
trace_vtd_ce_invalid(ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_RSVD;
}
@ -1166,7 +1169,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
&reads, &writes);
&reads, &writes, s->aw_bits);
if (ret_fr) {
ret_fr = -ret_fr;
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
@ -1183,7 +1186,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
access_flags, level);
out:
entry->iova = addr & page_mask;
entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
entry->perm = access_flags;
return true;
@ -1200,7 +1203,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
{
s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
s->root_extended = s->root & VTD_RTADDR_RTT;
s->root &= VTD_RTADDR_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits);
trace_vtd_reg_dmar_root(s->root, s->root_extended);
}
@ -1216,7 +1219,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
uint64_t value = 0;
value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
s->intr_root = value & VTD_IRTA_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits);
s->intr_eime = value & VTD_IRTA_EIME;
/* Notify global invalidation */
@ -1392,7 +1395,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
vtd_page_invalidate_notify_hook,
(void *)&vtd_as->iommu, true);
(void *)&vtd_as->iommu, true, s->aw_bits);
}
}
}
@ -1472,7 +1475,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
trace_vtd_inv_qi_enable(en);
if (en) {
s->iq = iqa_val & VTD_IQA_IQA_MASK(VTD_HOST_ADDRESS_WIDTH);
s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits);
/* 2^(x+8) entries */
s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
s->qi_enabled = true;
@ -2403,6 +2406,8 @@ static Property vtd_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_END_OF_LIST(),
};
@ -2758,6 +2763,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
hwaddr size;
hwaddr start = n->start;
hwaddr end = n->end;
IntelIOMMUState *s = as->iommu_state;
/*
* Note: all the codes in this function has a assumption that IOVA
@ -2765,12 +2771,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
* VT-d spec), otherwise we need to consider overflow of 64 bits.
*/
if (end > VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH)) {
if (end > VTD_ADDRESS_SIZE(s->aw_bits)) {
/*
* Don't need to unmap regions that is bigger than the whole
* VT-d supported address space size
*/
end = VTD_ADDRESS_SIZE(VTD_HOST_ADDRESS_WIDTH);
end = VTD_ADDRESS_SIZE(s->aw_bits);
}
assert(start <= end);
@ -2782,9 +2788,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
* suite the minimum available mask.
*/
int n = 64 - clz64(size);
if (n > VTD_MGAW) {
if (n > s->aw_bits) {
/* should not happen, but in case it happens, limit it */
n = VTD_MGAW;
n = s->aw_bits;
}
size = 1ULL << n;
}
@ -2844,7 +2850,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
PCI_FUNC(vtd_as->devfn),
VTD_CONTEXT_ENTRY_DID(ce.hi),
ce.hi, ce.lo);
vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false,
s->aw_bits);
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
@ -2859,7 +2866,6 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
static void vtd_init(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
uint8_t aw_bits = VTD_HOST_ADDRESS_WIDTH;
memset(s->csr, 0, DMAR_REG_SIZE);
memset(s->wmask, 0, DMAR_REG_SIZE);
@ -2878,21 +2884,24 @@ static void vtd_init(IntelIOMMUState *s)
s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(VTD_HOST_ADDRESS_WIDTH);
VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
if (s->aw_bits == VTD_HOST_AW_48BIT) {
s->cap |= VTD_CAP_SAGAW_48bit;
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
/*
* Rsvd field masks for spte
*/
vtd_paging_entry_rsvd_field[0] = ~0ULL;
vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(aw_bits);
vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
if (x86_iommu->intr_supported) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
@ -3029,6 +3038,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
}
}
/* Currently only address widths supported are 39 and 48 bits */
if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
(s->aw_bits != VTD_HOST_AW_48BIT)) {
error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
return false;
}
return true;
}

View File

@ -131,7 +131,7 @@
#define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK)
/* IVA_REG */
#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1))
#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL)
#define VTD_IVA_AM(val) ((val) & 0x3fULL)
/* GCMD_REG */
@ -197,7 +197,6 @@
#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */
#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
#define VTD_MGAW 39 /* Maximum Guest Address Width */
#define VTD_ADDRESS_SIZE(aw) (1ULL << (aw))
#define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16)
#define VTD_MAMV 18ULL
@ -213,7 +212,6 @@
#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT)
/* 48-bit AGAW, 4-level page-table */
#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT)
#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit
/* IQT_REG */
#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL)
@ -252,7 +250,7 @@
#define VTD_FRCD_SID_MASK 0xffffULL
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL))
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
/* DMA Remapping Fault Conditions */
typedef enum VTDFaultReason {
@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4)
#define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4)
#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \
((1ULL << VTD_MGAW) - 1))
#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL)
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL

View File

@ -304,6 +304,7 @@ struct IntelIOMMUState {
bool intr_eime; /* Extended interrupt mode enabled */
OnOffAuto intr_eim; /* Toggle for EIM cabability */
bool buggy_eim; /* Force buggy EIM unless eim=off */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
};
/* Find the VTD Address space associated with the given bus pointer,