qemu-e2k/target/ppc/mmu-hash64.h

177 lines
6.4 KiB
C
Raw Normal View History

#ifndef MMU_HASH64_H
#define MMU_HASH64_H
#ifndef CONFIG_USER_ONLY
#ifdef TARGET_PPC64
void dump_slb(FILE *f, fprintf_function cpu_fprintf, PowerPCCPU *cpu);
int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
target_ulong esid, target_ulong vsid);
hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
int mmu_idx);
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
uint64_t pte0, uint64_t pte1);
void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
target_ulong pte_index,
target_ulong pte0, target_ulong pte1);
unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
uint64_t pte0, uint64_t pte1);
void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
void ppc_hash64_init(PowerPCCPU *cpu);
void ppc_hash64_finalize(PowerPCCPU *cpu);
void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
bool (*cb)(void *, uint32_t, uint32_t),
void *opaque);
#endif
/*
* SLB definitions
*/
/* Bits in the SLB ESID word */
#define SLB_ESID_ESID 0xFFFFFFFFF0000000ULL
#define SLB_ESID_V 0x0000000008000000ULL /* valid */
/* Bits in the SLB VSID word */
#define SLB_VSID_SHIFT 12
#define SLB_VSID_SHIFT_1T 24
#define SLB_VSID_SSIZE_SHIFT 62
#define SLB_VSID_B 0xc000000000000000ULL
#define SLB_VSID_B_256M 0x0000000000000000ULL
#define SLB_VSID_B_1T 0x4000000000000000ULL
#define SLB_VSID_VSID 0x3FFFFFFFFFFFF000ULL
#define SLB_VSID_VRMA (0x0001FFFFFF000000ULL | SLB_VSID_B_1T)
#define SLB_VSID_PTEM (SLB_VSID_B | SLB_VSID_VSID)
#define SLB_VSID_KS 0x0000000000000800ULL
#define SLB_VSID_KP 0x0000000000000400ULL
#define SLB_VSID_N 0x0000000000000200ULL /* no-execute */
#define SLB_VSID_L 0x0000000000000100ULL
#define SLB_VSID_C 0x0000000000000080ULL /* class */
#define SLB_VSID_LP 0x0000000000000030ULL
#define SLB_VSID_ATTR 0x0000000000000FFFULL
#define SLB_VSID_LLP_MASK (SLB_VSID_L | SLB_VSID_LP)
#define SLB_VSID_4K 0x0000000000000000ULL
#define SLB_VSID_64K 0x0000000000000110ULL
#define SLB_VSID_16M 0x0000000000000100ULL
#define SLB_VSID_16G 0x0000000000000120ULL
/*
* Hash page table definitions
*/
#define SDR_64_HTABORG 0x0FFFFFFFFFFC0000ULL
#define SDR_64_HTABSIZE 0x000000000000001FULL
#define PATE0_HTABORG 0x0FFFFFFFFFFC0000ULL
#define HPTES_PER_GROUP 8
#define HASH_PTE_SIZE_64 16
#define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
pseries: Implement HPT resizing This patch implements hypercalls allowing a PAPR guest to resize its own hash page table. This will eventually allow for more flexible memory hotplug. The implementation is partially asynchronous, handled in a special thread running the hpt_prepare_thread() function. The state of a pending resize is stored in SPAPR_MACHINE->pending_hpt. The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or, if one is already in progress, monitor it for completion. If there is an existing HPT resize in progress that doesn't match the size specified in the call, it will cancel it, replacing it with a new one matching the given size. The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only be called successfully once H_RESIZE_HPT_PREPARE has successfully completed initialization of a new HPT. The guest must ensure that there are no concurrent accesses to the existing HPT while this is called (this effectively means stop_machine() for Linux guests). For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each HPTE into the new HPT. This can have quite high latency, but it seems to be of the order of typical migration downtime latencies for HPTs of size up to ~2GiB (which would be used in a 256GiB guest). In future we probably want to move more of the rehashing to the "prepare" phase, by having H_ENTER and other hcalls update both current and pending HPTs. That's a project for another day, but should be possible without any changes to the guest interface. Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 07:46:49 +02:00
#define HPTE64_V_SSIZE SLB_VSID_B
#define HPTE64_V_SSIZE_256M SLB_VSID_B_256M
#define HPTE64_V_SSIZE_1T SLB_VSID_B_1T
#define HPTE64_V_SSIZE_SHIFT 62
#define HPTE64_V_AVPN_SHIFT 7
#define HPTE64_V_AVPN 0x3fffffffffffff80ULL
#define HPTE64_V_AVPN_VAL(x) (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT)
#define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff83ULL))
pseries: Implement HPT resizing This patch implements hypercalls allowing a PAPR guest to resize its own hash page table. This will eventually allow for more flexible memory hotplug. The implementation is partially asynchronous, handled in a special thread running the hpt_prepare_thread() function. The state of a pending resize is stored in SPAPR_MACHINE->pending_hpt. The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or, if one is already in progress, monitor it for completion. If there is an existing HPT resize in progress that doesn't match the size specified in the call, it will cancel it, replacing it with a new one matching the given size. The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only be called successfully once H_RESIZE_HPT_PREPARE has successfully completed initialization of a new HPT. The guest must ensure that there are no concurrent accesses to the existing HPT while this is called (this effectively means stop_machine() for Linux guests). For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each HPTE into the new HPT. This can have quite high latency, but it seems to be of the order of typical migration downtime latencies for HPTs of size up to ~2GiB (which would be used in a 256GiB guest). In future we probably want to move more of the rehashing to the "prepare" phase, by having H_ENTER and other hcalls update both current and pending HPTs. That's a project for another day, but should be possible without any changes to the guest interface. Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 07:46:49 +02:00
#define HPTE64_V_BOLTED 0x0000000000000010ULL
#define HPTE64_V_LARGE 0x0000000000000004ULL
#define HPTE64_V_SECONDARY 0x0000000000000002ULL
#define HPTE64_V_VALID 0x0000000000000001ULL
#define HPTE64_R_PP0 0x8000000000000000ULL
#define HPTE64_R_TS 0x4000000000000000ULL
#define HPTE64_R_KEY_HI 0x3000000000000000ULL
#define HPTE64_R_RPN_SHIFT 12
#define HPTE64_R_RPN 0x0ffffffffffff000ULL
#define HPTE64_R_FLAGS 0x00000000000003ffULL
#define HPTE64_R_PP 0x0000000000000003ULL
#define HPTE64_R_N 0x0000000000000004ULL
#define HPTE64_R_G 0x0000000000000008ULL
#define HPTE64_R_M 0x0000000000000010ULL
#define HPTE64_R_I 0x0000000000000020ULL
#define HPTE64_R_W 0x0000000000000040ULL
#define HPTE64_R_WIMG 0x0000000000000078ULL
#define HPTE64_R_C 0x0000000000000080ULL
#define HPTE64_R_R 0x0000000000000100ULL
#define HPTE64_R_KEY_LO 0x0000000000000e00ULL
#define HPTE64_R_KEY(x) ((((x) & HPTE64_R_KEY_HI) >> 57) | \
(((x) & HPTE64_R_KEY_LO) >> 9))
#define HPTE64_V_1TB_SEG 0x4000000000000000ULL
#define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL
/* Format changes for ARCH v3 */
#define HPTE64_V_COMMON_BITS 0x000fffffffffffffULL
#define HPTE64_R_3_0_SSIZE_SHIFT 58
#define HPTE64_R_3_0_SSIZE_MASK (3ULL << HPTE64_R_3_0_SSIZE_SHIFT)
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
struct ppc_hash_pte64 {
uint64_t pte0, pte1;
};
const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
hwaddr ptex, int n);
void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
hwaddr ptex, int n);
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
const ppc_hash_pte64_t *hptes, int i)
{
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
return ldq_p(&(hptes[i].pte0));
}
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu,
const ppc_hash_pte64_t *hptes, int i)
{
target/ppc: Cleanup HPTE accessors for 64-bit hash MMU Accesses to the hashed page table (HPT) are complicated by the fact that the HPT could be in one of three places: 1) Within guest memory - when we're emulating a full guest CPU at the hardware level (e.g. powernv, mac99, g3beige) 2) Within qemu, but outside guest memory - when we're emulating user and supervisor instructions within TCG, but instead of emulating the CPU's hypervisor mode, we just emulate a hypervisor's behaviour (pseries in TCG or KVM-PR) 3) Within the host kernel - a pseries machine using KVM-HV acceleration. Mostly accesses to the HPT are handled by KVM, but there are a few cases where qemu needs to access it via a special fd for the purpose. In order to batch accesses to the fd in case (3), we use a somewhat awkward ppc_hash64_start_access() / ppc_hash64_stop_access() pair, which for case (3) reads / releases several HPTEs from the kernel as a batch (usually a whole PTEG). For cases (1) & (2) it just returns an address value. The actual HPTE load helpers then need to interpret the returned token differently in the 3 cases. This patch keeps the same basic structure, but simplfiies the details. First start_access() / stop_access() are renamed to map_hptes() and unmap_hptes() to make their operation more obvious. Second, map_hptes() now always returns a qemu pointer, which can always be used in the same way by the load_hpte() helpers. In case (1) it comes from address_space_map() in case (2) directly from qemu's HPT buffer and in case (3) from a temporary buffer read from the KVM fd. While we're at it, make things a bit more consistent in terms of types and variable names: avoid variables named 'index' (it shadows index(3) which can lead to confusing results), use 'hwaddr ptex' for HPTE indices and uint64_t for each of the HPTE words, use ptex throughout the call stack instead of pte_offset in some places (we still need that at the bottom layer, but nowhere else). Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-02-27 06:03:41 +01:00
return ldq_p(&(hptes[i].pte1));
}
/*
* MMU Options
*/
struct PPCHash64PageSize {
uint32_t page_shift; /* Page shift (or 0) */
uint32_t pte_enc; /* Encoding in the HPTE (>>12) */
};
typedef struct PPCHash64PageSize PPCHash64PageSize;
struct PPCHash64SegmentPageSizes {
uint32_t page_shift; /* Base page shift of segment (or 0) */
uint32_t slb_enc; /* SLB encoding for BookS */
PPCHash64PageSize enc[PPC_PAGE_SIZES_MAX_SZ];
};
struct PPCHash64Options {
#define PPC_HASH64_1TSEG 0x00001
#define PPC_HASH64_AMR 0x00002
#define PPC_HASH64_CI_LARGEPAGE 0x00004
unsigned flags;
unsigned slb_size;
PPCHash64SegmentPageSizes sps[PPC_PAGE_SIZES_MAX_SZ];
};
extern const PPCHash64Options ppc_hash64_opts_basic;
extern const PPCHash64Options ppc_hash64_opts_POWER7;
static inline bool ppc_hash64_has(PowerPCCPU *cpu, unsigned feature)
{
return !!(cpu->hash64_opts->flags & feature);
}
#endif /* CONFIG_USER_ONLY */
#if defined(CONFIG_USER_ONLY) || !defined(TARGET_PPC64)
static inline void ppc_hash64_init(PowerPCCPU *cpu)
{
}
static inline void ppc_hash64_finalize(PowerPCCPU *cpu)
{
}
#endif
#endif /* MMU_HASH64_H */