diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index ba418aaa404c..886f8ea82499 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - struct hl_ctx *ctx = hdev->user_ctx; + struct hl_ctx *ctx; u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, @@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data) if (!hdev->mmu_enable) return 0; + if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) + ctx = hdev->kernel_ctx; + else + ctx = hdev->user_ctx; + if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return 0; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 106074466dca..4e41f2669e6d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -297,6 +297,11 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 }; +static int goya_mmu_clear_pgt_range(struct hl_device *hdev); +static int goya_mmu_set_dram_default_page(struct hl_device *hdev); +static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); +static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); + void goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -554,6 +559,10 @@ int goya_late_init(struct hl_device *hdev) return rc; } + rc = goya_mmu_add_mappings_for_device_cpu(hdev); + if (rc) + return rc; + rc = goya_init_cpu_queues(hdev); if (rc) return rc; @@ -2065,10 +2074,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) goya_disable_external_queues(hdev); goya_disable_internal_queues(hdev); - if (hard_reset) + if (hard_reset) { goya_disable_msix(hdev); - else + goya_mmu_remove_device_cpu_mappings(hdev); + } else { goya_sync_irqs(hdev); + } } /* @@ -4584,7 +4595,7 @@ int goya_context_switch(struct hl_device *hdev, u32 asid) return 0; } -int goya_mmu_clear_pgt_range(struct hl_device *hdev) +static int goya_mmu_clear_pgt_range(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct goya_device *goya = hdev->asic_specific; @@ -4598,7 +4609,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, 0, true); } -int goya_mmu_set_dram_default_page(struct hl_device *hdev) +static int goya_mmu_set_dram_default_page(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; u64 addr = hdev->asic_prop.mmu_dram_default_page_addr; @@ -4611,7 +4622,112 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev) return goya_memset_device_memory(hdev, addr, size, val, true); } -void goya_mmu_prepare(struct hl_device *hdev, u32 asid) +static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; + s64 off, cpu_off; + int rc; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return 0; + + for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) { + rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off, + prop->dram_base_address + off, PAGE_SIZE_2MB); + if (rc) { + dev_err(hdev->dev, "Map failed for address 0x%llx\n", + prop->dram_base_address + off); + goto unmap; + } + } + + if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { + rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, + hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB); + + if (rc) { + dev_err(hdev->dev, + "Map failed for CPU accessible memory\n"); + off -= PAGE_SIZE_2MB; + goto unmap; + } + } else { + for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) { + rc = hl_mmu_map(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + hdev->cpu_accessible_dma_address + cpu_off, + PAGE_SIZE_4KB); + if (rc) { + dev_err(hdev->dev, + "Map failed for CPU accessible memory\n"); + cpu_off -= PAGE_SIZE_4KB; + goto unmap_cpu; + } + } + } + + goya->device_cpu_mmu_mappings_done = true; + + return 0; + +unmap_cpu: + for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB) + if (hl_mmu_unmap(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + PAGE_SIZE_4KB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); +unmap: + for (; off >= 0 ; off -= PAGE_SIZE_2MB) + if (hl_mmu_unmap(hdev->kernel_ctx, + prop->dram_base_address + off, PAGE_SIZE_2MB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + prop->dram_base_address + off); + + return rc; +} + +void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct goya_device *goya = hdev->asic_specific; + u32 off, cpu_off; + + if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + return; + + if (!goya->device_cpu_mmu_mappings_done) + return; + + if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) { + if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR, + PAGE_SIZE_2MB)) + dev_warn(hdev->dev, + "Failed to unmap CPU accessible memory\n"); + } else { + for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) + if (hl_mmu_unmap(hdev->kernel_ctx, + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off, + PAGE_SIZE_4KB)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap address 0x%llx\n", + VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off); + } + + for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) + if (hl_mmu_unmap(hdev->kernel_ctx, + prop->dram_base_address + off, PAGE_SIZE_2MB)) + dev_warn_ratelimited(hdev->dev, + "Failed to unmap address 0x%llx\n", + prop->dram_base_address + off); + + goya->device_cpu_mmu_mappings_done = false; +} + +static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) { struct goya_device *goya = hdev->asic_specific; int i; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 066b1d306977..f8c611883dc1 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -126,6 +126,12 @@ #define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \ VA_DDR_SPACE_START) /* 128GB */ +#if (HL_CPU_ACCESSIBLE_MEM_SIZE != SZ_2M) +#error "HL_CPU_ACCESSIBLE_MEM_SIZE must be exactly 2MB to enable MMU mapping" +#endif + +#define VA_CPU_ACCESSIBLE_MEM_ADDR 0x8000000000ull + #define DMA_MAX_TRANSFER_SIZE U32_MAX #define HW_CAP_PLL 0x00000001 @@ -157,6 +163,7 @@ struct goya_device { u64 ddr_bar_cur_addr; u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE]; u32 hw_cap_initialized; + u8 device_cpu_mmu_mappings_done; }; void goya_get_fixed_properties(struct hl_device *hdev); @@ -204,10 +211,6 @@ int goya_armcp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, void *data); void goya_halt_coresight(struct hl_device *hdev); -void goya_mmu_prepare(struct hl_device *hdev, u32 asid); -int goya_mmu_clear_pgt_range(struct hl_device *hdev); -int goya_mmu_set_dram_default_page(struct hl_device *hdev); - int goya_suspend(struct hl_device *hdev); int goya_resume(struct hl_device *hdev); @@ -225,5 +228,6 @@ void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); +void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 0462b7727da7..5e4a631b3d88 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -320,10 +320,8 @@ struct hl_cs_job; #define HL_EQ_LENGTH 64 #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) -/* KMD <-> ArmCP shared memory size (EQ + PQ + 2MB for packets) */ -#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \ - HL_QUEUE_SIZE_IN_BYTES + \ - SZ_2M) +/* KMD <-> ArmCP shared memory size */ +#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M /** * struct hl_hw_queue - describes a H/W transport queue.