diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e3ed08dca7b7..f8bbbb3a9504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) return r; } +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd) + r = kgd2kfd->pre_reset(adev->kfd); + + return r; +} + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd) + r = kgd2kfd->post_reset(adev->kfd); + + return r; +} + +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + amdgpu_device_gpu_recover(adev, NULL, false); +} + int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) @@ -461,6 +488,14 @@ err: return ret; } +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, !idle); +} + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { if (adev->kfd) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8418a3f4e9d..2f379c183ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); @@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); + +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea79908dac4c..ea3f698aef5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, unsigned long flags, end_jiffies; int retry; + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); @@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; unsigned int tmp; + if (adev->in_gpu_reset) + return -EIO; + for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; @@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) RREG32(mmVM_INVALIDATE_RESPONSE); return 0; } + + /** + * read_vmid_from_vmfault_reg - read vmid from register + * + * adev: amdgpu_device pointer + * @vmid: vmid pointer + * read vmid from register (CIK). + */ +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); + + return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 19dd665e7307..f6e53e9352bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; struct vi_mqd *m = get_mqd(mqd); + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) @@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; unsigned int tmp; + if (adev->in_gpu_reset) + return -EIO; + for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1db60aa5b7f0..8efedfcb9dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) @@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) @@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + if (adev->in_gpu_reset) + return -EIO; + if (ring->ready) return invalidate_tlbs_with_kiq(adev, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fa38a960ce00..8a707d8bbb1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,6 +1621,20 @@ bo_reserve_failed: return ret; } +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *mem) +{ + struct amdgpu_device *adev; + + adev = (struct amdgpu_device *)kgd; + if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + *mem = *adev->gmc.vm_fault_info; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + } + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ec53d8f96d06..13acef526c5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; + /* Block kfd */ + amdgpu_amdkfd_pre_reset(adev); + /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); @@ -3322,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (job && job->base.sched == &ring->sched) continue; - drm_sched_hw_job_reset(&ring->sched, &job->base); + drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); @@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); } + /*unlock kfd */ + amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6cb4948233cb..bb5a47a45790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -105,6 +105,8 @@ struct amdgpu_gmc { /* protects concurrent invalidation */ spinlock_t invalidate_lock; bool translate_further; + struct kfd_vm_fault_info *vm_fault_info; + atomic_t vm_fault_info_updated; const struct amdgpu_gmc_funcs *gmc_funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5d7d7900ccab..9eedc9810004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, eaddr = eaddr & ((1 << shift) - 1); flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + if (vm->root.base.bo->shadow) + flags |= AMDGPU_GEM_CREATE_SHADOW; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else - flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW); + flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { @@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else + else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); @@ -2662,8 +2663,7 @@ error_free_sched_entity: * - pasid (old PASID is released, because compute manages its own PASIDs) * * Reinitializes the page directory to reflect the changed ATS - * setting. May leave behind an unused shadow BO for the page - * directory when switching from SDMA updates to CPU updates. + * setting. * * Returns: * 0 for success, -errno for errors. @@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } + /* Free the shadow bo for compute VM */ + amdgpu_bo_unref(&vm->root.base.bo->shadow); + error: amdgpu_bo_unreserve(vm->root.base.bo); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 10920f0bd85f..36dc367c4b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -28,6 +28,7 @@ #include "cik.h" #include "gmc_v7_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #include "bif/bif_4_1_sh_mask.h" @@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v7_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); @@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 75f3ffb2891e..70fc97b59b4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "gmc_v8_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" @@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v8_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; if (amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", @@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 49df6c791cfc..5d2475d5392c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -25,12 +25,39 @@ #include "cik_int.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, - const uint32_t *ih_ring_entry) + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) { const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; unsigned int vmid, pasid; + /* This workaround is due to HW/FW limitation on Hawaii that + * VMID and PASID are not written into ih_ring_entry + */ + if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && + dev->device_info->asic_family == CHIP_HAWAII) { + struct cik_ih_ring_entry *tmp_ihre = + (struct cik_ih_ring_entry *)patched_ihre; + + *patched_flag = true; + *tmp_ihre = *ihre; + + vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); + pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); + + tmp_ihre->ring_id &= 0x000000ff; + tmp_ihre->ring_id |= vmid << 8; + tmp_ihre->ring_id |= pasid << 16; + + return (pasid != 0) && + vmid >= dev->vm_info.first_vmid_kfd && + vmid <= dev->vm_info.last_vmid_kfd; + } + /* Only handle interrupts from KFD VMIDs */ vmid = (ihre->ring_id & 0x0000ff00) >> 8; if (vmid < dev->vm_info.first_vmid_kfd || @@ -48,18 +75,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || + ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; } static void cik_event_interrupt_wq(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; uint32_t context_id = ihre->data & 0xfffffff; - - pasid = (ihre->ring_id & 0xffff0000) >> 16; + unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8; + unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16; if (pasid == 0) return; @@ -72,6 +100,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); + else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { + struct kfd_vm_fault_info info; + + kfd_process_vm_fault(dev->dqm, pasid); + + memset(&info, 0, sizeof(info)); + dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info); + if (!info.page_addr && !info.status) + return; + + if (info.vmid == vmid) + kfd_signal_vm_fault_event(dev, pasid, &info); + else + kfd_signal_vm_fault_event(dev, pasid, NULL); + } } const struct kfd_event_interrupt_class event_interrupt_class_cik = { diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 109298b9d507..76f8677a7926 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -20,8 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HSA_RADEON_CIK_INT_H_INCLUDED -#define HSA_RADEON_CIK_INT_H_INCLUDED +#ifndef CIK_INT_H_INCLUDED +#define CIK_INT_H_INCLUDED #include @@ -34,9 +34,10 @@ struct cik_ih_ring_entry { #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 -#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF +#define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92 +#define CIK_INTSRC_GFX_MEM_PROT_FAULT 0x93 #endif diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index f68aef02fc1f..3621efbd5759 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,18 +21,21 @@ */ static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820125, + 0xbf820001, 0xbf82012b, 0xb8f4f802, 0x89748674, 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, + 0x00000400, 0xbf850017, 0xc00a1e37, 0x00000000, 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, + 0xbf840005, 0x8f728374, + 0xb972e0c2, 0xbf800002, + 0xb9740002, 0xbe801d78, + 0xb8f5f803, 0x8675ff75, + 0x000001ff, 0xbf850002, + 0x80708470, 0x82718071, + 0x8671ff71, 0x0000ffff, + 0x8f728374, 0xb972e0c2, + 0xbf800002, 0xb9740002, 0xbe801f70, 0xb8f5f803, 0x8675ff75, 0x00000100, 0xbf840006, 0xbefa0080, @@ -168,7 +171,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x807c847c, 0x806eff6e, 0x00000400, 0xbf0a757c, 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, + 0xbf8200cd, 0xbef8007e, 0x8679ff7f, 0x0000ffff, 0x8779ff79, 0x00040000, 0xbefa0080, 0xbefb00ff, @@ -268,16 +271,18 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8f739773, 0xb976f807, 0x8671ff71, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, + 0x8f768374, 0xb976e0c2, + 0xbf800002, 0xb9740002, + 0xbf8a0000, 0x95807370, + 0xbf810000, 0x00000000, }; static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015a, + 0xbf820001, 0xbf82015d, 0xb8f8f802, 0x89788678, 0xb8f1f803, 0x866eff71, - 0x00000400, 0xbf850034, + 0x00000400, 0xbf850037, 0x866eff71, 0x00000800, 0xbf850003, 0x866eff71, 0x00000100, 0xbf840008, @@ -303,258 +308,261 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, - 0xb978f802, 0xbe801f6c, - 0x866dff6d, 0x0000ffff, - 0xbef00080, 0xb9700283, - 0xb8f02407, 0x8e709c70, - 0x876d706d, 0xb8f003c7, - 0x8e709b70, 0x876d706d, - 0xb8f0f807, 0x8670ff70, - 0x00007fff, 0xb970f807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x87708478, 0xb970f802, - 0xbf8e0002, 0xbf88fffe, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8f11605, - 0x80718171, 0x8e718671, - 0x80707170, 0x80707e70, - 0x8271807f, 0x8671ff71, - 0x0000ffff, 0xc0471cb8, - 0x00000040, 0xbf8cc07f, - 0xc04b1d38, 0x00000048, - 0xbf8cc07f, 0xc0431e78, - 0x00000058, 0xbf8cc07f, - 0xc0471eb8, 0x0000005c, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x8670ff7f, - 0x08000000, 0x8f708370, - 0x87777077, 0x8670ff7f, - 0x70000000, 0x8f708170, - 0x87777077, 0xbefb007c, - 0xbefa0080, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8f01605, 0x80708170, - 0x8e708670, 0x807a707a, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc007a, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611b3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611b7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611bba, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611bfa, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xb8f1f803, - 0xbefe007c, 0xbefc007a, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611a3a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xb8fbf801, - 0xbefe007c, 0xbefc007a, - 0xc0611efa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0x8670ff7f, - 0x04000000, 0xbeef0080, - 0x876f6f70, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbef00080, + 0xb9700283, 0xb8f02407, + 0x8e709c70, 0x876d706d, + 0xb8f003c7, 0x8e709b70, + 0x876d706d, 0xb8f0f807, + 0x8670ff70, 0x00007fff, + 0xb970f807, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x87708478, + 0xb970f802, 0xbf8e0002, + 0xbf88fffe, 0xb8f02a05, + 0x80708170, 0x8e708a70, 0xb8f11605, 0x80718171, - 0x8e718471, 0x8e768271, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747a74, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a717c, 0xbf85ffe7, - 0xbef40172, 0xbefa0080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0xe0724000, 0x7a1d0000, - 0xe0724100, 0x7a1d0100, - 0xe0724200, 0x7a1d0200, - 0xe0724300, 0x7a1d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8f14306, 0x8671c171, - 0xbf84002c, 0xbf8a0000, - 0x8670ff6f, 0x04000000, - 0xbf840028, 0x8e718671, - 0x8e718271, 0xbef60071, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f01605, - 0x80708170, 0x8e708670, - 0x807a707a, 0x807aff7a, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x7a1d0002, - 0x68040702, 0xd0c9006a, - 0x0000e302, 0xbf87fff7, - 0xbef70000, 0xbefa00ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8f12a05, - 0x80718171, 0x8e718271, - 0x8e768871, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a717c, 0xbf840015, - 0xbf11017c, 0x8071ff71, - 0x00001000, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x7a1d0000, 0xe0724100, - 0x7a1d0100, 0xe0724200, - 0x7a1d0200, 0xe0724300, - 0x7a1d0300, 0x807c847c, - 0x807aff7a, 0x00000400, - 0xbf0a717c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200d9, + 0x8e718671, 0x80707170, + 0x80707e70, 0x8271807f, + 0x8671ff71, 0x0000ffff, + 0xc0471cb8, 0x00000040, + 0xbf8cc07f, 0xc04b1d38, + 0x00000048, 0xbf8cc07f, + 0xc0431e78, 0x00000058, + 0xbf8cc07f, 0xc0471eb8, + 0x0000005c, 0xbf8cc07f, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, - 0x866eff7f, 0x04000000, - 0xbf84001e, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf840019, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, + 0x8670ff7f, 0x08000000, + 0x8f708370, 0x87777077, + 0x8670ff7f, 0x70000000, + 0x8f708170, 0x87777077, + 0xbefb007c, 0xbefa0080, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0xbef60084, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbef80080, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bba, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bfa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xb8f1f803, 0xbefe007c, + 0xbefc007a, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xb8fbf801, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0x8670ff7f, 0x04000000, + 0xbeef0080, 0x876f6f70, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f11605, + 0x80718171, 0x8e718471, + 0x8e768271, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747a74, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a717c, + 0xbf85ffe7, 0xbef40172, + 0xbefa0080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f14306, + 0x8671c171, 0xbf84002c, + 0xbf8a0000, 0x8670ff6f, + 0x04000000, 0xbf840028, + 0x8e718671, 0x8e718271, + 0xbef60071, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0x807aff7a, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x7a1d0002, 0x68040702, + 0xd0c9006a, 0x0000e302, + 0xbf87fff7, 0xbef70000, + 0xbefa00ff, 0x00000400, 0xbefe00c1, 0xbeff00c1, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x8e76886f, + 0xb8f12a05, 0x80718171, + 0x8e718271, 0x8e768871, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0x806fff6f, - 0x00008000, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, + 0xbefc0084, 0xbf0a717c, + 0xbf840015, 0xbf11017c, + 0x8071ff71, 0x00001000, 0x7e000300, 0x7e020301, 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0x807c847c, 0x807aff7a, + 0x00000400, 0xbf0a717c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200dc, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x08000000, 0x8f6e836e, + 0x87776e77, 0x866eff7f, + 0x70000000, 0x8f6e816e, + 0x87776e77, 0x866eff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf840019, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbef80080, 0xbefe00c1, + 0xbeff00c1, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x8e76886f, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x806fff6f, 0x00008000, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe007a, - 0xbeff007b, 0x866f71ff, - 0x000003ff, 0xb96f4803, - 0x866f71ff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc0071cb7, 0x00000040, - 0xc00b1d37, 0x00000048, - 0xc0031e77, 0x00000058, - 0xc0071eb7, 0x0000005c, - 0xbf8cc07f, 0x866fff6d, - 0xf0000000, 0x8f6f9c6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x08000000, 0x8f6f9b6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff70, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0xb970f802, 0xbf8a0000, + 0xbf8cc07f, 0xbefc006f, + 0xbefe007a, 0xbeff007b, + 0x866f71ff, 0x000003ff, + 0xb96f4803, 0x866f71ff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc0071cb7, + 0x00000040, 0xc00b1d37, + 0x00000048, 0xc0031e77, + 0x00000058, 0xc0071eb7, + 0x0000005c, 0xbf8cc07f, + 0x866fff6d, 0xf0000000, + 0x8f6f9c6f, 0x8e6f906f, + 0xbeee0080, 0x876e6f6e, + 0x866fff6d, 0x08000000, + 0x8f6f9b6f, 0x8e6f8f6f, + 0x876e6f6e, 0x866fff70, + 0x00800000, 0x8f6f976f, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8370, + 0xb96ee0c2, 0xbf800002, + 0xb9700002, 0xbf8a0000, 0x95806f6c, 0xbf810000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index a2a04bb64096..abe1a5da29fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -251,7 +255,7 @@ if (!EMU_RUN_HACK) s_waitcnt lgkmcnt(0) s_or_b32 ttmp7, ttmp8, ttmp9 s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler L_NO_NEXT_TRAP: @@ -262,7 +266,7 @@ L_NO_NEXT_TRAP: s_addc_u32 ttmp1, ttmp1, 0 L_EXCP_CASE: s_and_b32 ttmp1, ttmp1, 0xFFFF - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) s_rfe_b64 [ttmp0, ttmp1] end // ********* End handling of non-CWSR traps ******************* @@ -1053,7 +1057,7 @@ end s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time @@ -1134,3 +1138,11 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end + +function set_status_without_spi_prio(status, tmp) + // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. + s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp + s_nop 0x2 // avoid S_SETREG => S_SETREG hazard + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 998be96be736..0bb9c577b3a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -317,7 +321,7 @@ L_EXCP_CASE: // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + set_status_without_spi_prio(s_save_status, ttmp2) s_rfe_b64 [ttmp0, ttmp1] end @@ -1120,7 +1124,7 @@ end s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time @@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround s_waitcnt lgkmcnt(0) end end + +function set_status_without_spi_prio(status, tmp) + // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. + s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp + s_nop 0x2 // avoid S_SETREG => S_SETREG hazard + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f64c5551cdba..297b36c26a05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -122,6 +122,9 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); + if (kfd_is_locked()) + return -EAGAIN; + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", process->pasid, process->is_32bit_user_mode); @@ -389,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, return retval; } +static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, + void *data) +{ + int retval; + const int max_num_cus = 1024; + struct kfd_ioctl_set_cu_mask_args *args = data; + struct queue_properties properties; + uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; + size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); + + if ((args->num_cu_mask % 32) != 0) { + pr_debug("num_cu_mask 0x%x must be a multiple of 32", + args->num_cu_mask); + return -EINVAL; + } + + properties.cu_mask_count = args->num_cu_mask; + if (properties.cu_mask_count == 0) { + pr_debug("CU mask cannot be 0"); + return -EINVAL; + } + + /* To prevent an unreasonably large CU mask size, set an arbitrary + * limit of max_num_cus bits. We can then just drop any CU mask bits + * past max_num_cus bits and just use the first max_num_cus bits. + */ + if (properties.cu_mask_count > max_num_cus) { + pr_debug("CU mask cannot be greater than 1024 bits"); + properties.cu_mask_count = max_num_cus; + cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); + } + + properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); + if (!properties.cu_mask) + return -ENOMEM; + + retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); + if (retval) { + pr_debug("Could not copy CU mask from userspace"); + kfree(properties.cu_mask); + return -EFAULT; + } + + mutex_lock(&p->mutex); + + retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); + + mutex_unlock(&p->mutex); + + if (retval) + kfree(properties.cu_mask); + + return retval; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -754,7 +812,6 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, { struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; - struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); if (dev) @@ -766,11 +823,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, args->gpu_clock_counter = 0; /* No access to rdtsc. Using raw monotonic time */ - getrawmonotonic64(&time); - args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); - - get_monotonic_boottime64(&time); - args->system_clock_counter = (uint64_t)timespec64_to_ns(&time); + args->cpu_clock_counter = ktime_get_raw_ns(); + args->system_clock_counter = ktime_get_boot_ns(); /* Since the counter is in nano-seconds we use 1GHz frequency */ args->system_clock_freq = 1000000000; @@ -1558,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, kfd_ioctl_unmap_memory_from_gpu, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, + kfd_ioctl_set_cu_mask, 0), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 296b3f230280..ee4996029a86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -189,6 +189,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, return 0; } +static struct kfd_mem_properties * +find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, + struct kfd_topology_device *dev) +{ + struct kfd_mem_properties *props; + + list_for_each_entry(props, &dev->mem_props, list) { + if (props->heap_type == heap_type + && props->flags == flags + && props->width == width) + return props; + } + + return NULL; +} /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct * topology device present in the device_list */ @@ -197,36 +212,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, { struct kfd_mem_properties *props; struct kfd_topology_device *dev; + uint32_t heap_type; + uint64_t size_in_bytes; + uint32_t flags = 0; + uint32_t width; pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", mem->proximity_domain); list_for_each_entry(dev, device_list, list) { if (mem->proximity_domain == dev->proximity_domain) { - props = kfd_alloc_struct(props); - if (!props) - return -ENOMEM; - /* We're on GPU node */ if (dev->node_props.cpu_cores_count == 0) { /* APU */ if (mem->visibility_type == 0) - props->heap_type = + heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; /* dGPU */ else - props->heap_type = mem->visibility_type; + heap_type = mem->visibility_type; } else - props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) - props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; + flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) - props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; + flags |= HSA_MEM_FLAGS_NON_VOLATILE; - props->size_in_bytes = + size_in_bytes = ((uint64_t)mem->length_high << 32) + mem->length_low; - props->width = mem->width; + width = mem->width; + + /* Multiple banks of the same type are aggregated into + * one. User mode doesn't care about multiple physical + * memory segments. It's managed as a single virtual + * heap for user mode. + */ + props = find_subtype_mem(heap_type, flags, width, dev); + if (props) { + props->size_in_bytes += size_in_bytes; + break; + } + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->heap_type = heap_type; + props->flags = flags; + props->size_in_bytes = size_in_bytes; + props->width = width; dev->node_props.mem_banks_count++; list_add_tail(&props->list, &dev->mem_props); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index afb26f205d29..a3441b0e385b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -38,7 +38,6 @@ #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" #include "kfd_device_queue_manager.h" -#include "../../radeon/cik_reg.h" static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h index 03424c20920c..0619c777b47e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -60,6 +60,9 @@ enum { SH_REG_SIZE = SH_REG_END - SH_REG_BASE }; +/* SQ_CMD definitions */ +#define SQ_CMD 0x8DEC + enum SQ_IND_CMD_CMD { SQ_IND_CMD_CMD_NULL = 0x00000000, SQ_IND_CMD_CMD_HALT = 0x00000001, @@ -190,4 +193,38 @@ union ULARGE_INTEGER { void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, enum DBGDEV_TYPE type); +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +enum { + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits in order to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + #endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 4bd6ebfaf425..ab37d36d9cd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -21,6 +21,8 @@ */ #include +#include + #include "kfd_priv.h" static struct dentry *debugfs_root; @@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file) return single_open(file, show, NULL); } +static ssize_t kfd_debugfs_hang_hws_write(struct file *file, + const char __user *user_buf, size_t size, loff_t *ppos) +{ + struct kfd_dev *dev; + char tmp[16]; + uint32_t gpu_id; + int ret = -EINVAL; + + memset(tmp, 0, 16); + if (size >= 16) { + pr_err("Invalid input for gpu id.\n"); + goto out; + } + if (copy_from_user(tmp, user_buf, size)) { + ret = -EFAULT; + goto out; + } + if (kstrtoint(tmp, 10, &gpu_id)) { + pr_err("Invalid input for gpu id.\n"); + goto out; + } + dev = kfd_device_by_id(gpu_id); + if (dev) { + kfd_debugfs_hang_hws(dev); + ret = size; + } else + pr_err("Cannot find device %d.\n", gpu_id); + +out: + return ret; +} + static const struct file_operations kfd_debugfs_fops = { .owner = THIS_MODULE, .open = kfd_debugfs_open, @@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = { .release = single_release, }; +static const struct file_operations kfd_debugfs_hang_hws_fops = { + .owner = THIS_MODULE, + .open = kfd_debugfs_open, + .read = seq_read, + .write = kfd_debugfs_hang_hws_write, + .llseek = seq_lseek, + .release = single_release, +}; + void kfd_debugfs_init(void) { struct dentry *ent; @@ -65,6 +108,11 @@ void kfd_debugfs_init(void) ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, &kfd_debugfs_fops); + + ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root, + NULL, + &kfd_debugfs_hang_hws_fops); + if (!ent) pr_warn("Failed to create rls in kfd debugfs\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7ee6cec2c060..1b048715ab8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -30,7 +30,13 @@ #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 -static atomic_t kfd_device_suspended = ATOMIC_INIT(0); + +/* + * kfd_locked is used to lock the kfd driver during suspend or reset + * once locked, kfd driver will stop any further GPU execution. + * create process (open) will return -EAGAIN. + */ +static atomic_t kfd_locked = ATOMIC_INIT(0); #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { @@ -46,6 +52,7 @@ static const struct kfd_device_info kaveri_device_info = { .supports_cwsr = false, .needs_iommu_device = true, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info carrizo_device_info = { @@ -61,6 +68,22 @@ static const struct kfd_device_info carrizo_device_info = { .supports_cwsr = true, .needs_iommu_device = true, .needs_pci_atomics = false, + .num_sdma_engines = 2, +}; + +static const struct kfd_device_info raven_device_info = { + .asic_family = CHIP_RAVEN, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = true, + .needs_pci_atomics = true, + .num_sdma_engines = 1, }; #endif @@ -77,6 +100,7 @@ static const struct kfd_device_info hawaii_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info tonga_device_info = { @@ -91,6 +115,7 @@ static const struct kfd_device_info tonga_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info tonga_vf_device_info = { @@ -105,6 +130,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info fiji_device_info = { @@ -119,6 +145,7 @@ static const struct kfd_device_info fiji_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info fiji_vf_device_info = { @@ -133,6 +160,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; @@ -148,6 +176,7 @@ static const struct kfd_device_info polaris10_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info polaris10_vf_device_info = { @@ -162,6 +191,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info polaris11_device_info = { @@ -176,6 +206,7 @@ static const struct kfd_device_info polaris11_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info vega10_device_info = { @@ -190,6 +221,7 @@ static const struct kfd_device_info vega10_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info vega10_vf_device_info = { @@ -204,6 +236,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; @@ -241,6 +274,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ + { 0x15DD, &raven_device_info }, /* Raven */ #endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ @@ -514,13 +548,54 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfree(kfd); } +int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + if (!kfd->init_complete) + return 0; + kgd2kfd_suspend(kfd); + + /* hold dqm->lock to prevent further execution*/ + dqm_lock(kfd->dqm); + + kfd_signal_reset_event(kfd); + return 0; +} + +/* + * Fix me. KFD won't be able to resume existing process for now. + * We will keep all existing process in a evicted state and + * wait the process to be terminated. + */ + +int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + int ret, count; + + if (!kfd->init_complete) + return 0; + + dqm_unlock(kfd->dqm); + + ret = kfd_resume(kfd); + if (ret) + return ret; + count = atomic_dec_return(&kfd_locked); + WARN_ONCE(count != 0, "KFD reset ref. error"); + return 0; +} + +bool kfd_is_locked(void) +{ + return (atomic_read(&kfd_locked) > 0); +} + void kgd2kfd_suspend(struct kfd_dev *kfd) { if (!kfd->init_complete) return; /* For first KFD device suspend all the KFD processes */ - if (atomic_inc_return(&kfd_device_suspended) == 1) + if (atomic_inc_return(&kfd_locked) == 1) kfd_suspend_all_processes(); kfd->dqm->ops.stop(kfd->dqm); @@ -539,7 +614,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) if (ret) return ret; - count = atomic_dec_return(&kfd_device_suspended); + count = atomic_dec_return(&kfd_locked); WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); if (count == 0) ret = kfd_resume_all_processes(); @@ -577,14 +652,24 @@ dqm_start_error: /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { + uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; + bool is_patched = false; + if (!kfd->init_complete) return; + if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { + dev_err_once(kfd_device, "Ring entry too small\n"); + return; + } + spin_lock(&kfd->interrupt_lock); if (kfd->interrupts_active - && interrupt_is_wanted(kfd, ih_ring_entry) - && enqueue_ih_ring_entry(kfd, ih_ring_entry)) + && interrupt_is_wanted(kfd, ih_ring_entry, + patched_ihre, &is_patched) + && enqueue_ih_ring_entry(kfd, + is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); @@ -739,8 +824,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); - if ((*mem_obj) == NULL) + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!(*mem_obj)) return -ENOMEM; pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); @@ -857,3 +942,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) kfree(mem_obj); return 0; } + +#if defined(CONFIG_DEBUG_FS) + +/* This function will send a package to HIQ to hang the HWS + * which will trigger a GPU reset and bring the HWS back to normal state + */ +int kfd_debugfs_hang_hws(struct kfd_dev *dev) +{ + int r = 0; + + if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { + pr_err("HWS is not enabled"); + return -EINVAL; + } + + r = pm_debugfs_hang_hws(&dev->dqm->packets); + if (!r) + r = dqm_debugfs_execute_queues(dev->dqm); + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 668ad07ebe1f..ec0d62a16e53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); +static void kfd_process_hw_exception(struct work_struct *work); + static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { @@ -99,6 +101,17 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) return dqm->dev->shared_resources.num_pipe_per_mec; } +static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_sdma_engines; +} + +unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_sdma_engines + * KFD_SDMA_QUEUES_PER_ENGINE; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -240,7 +253,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, print_queue(q); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", @@ -297,7 +310,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, dqm->total_queue_count); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -346,10 +359,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd) + mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd_mgr) return -ENOMEM; retval = allocate_hqd(dqm, q); @@ -360,7 +373,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_deallocate_hqd; - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; @@ -374,15 +387,15 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (!q->properties.is_active) return 0; - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, - q->process->mm); + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); if (retval) goto out_uninit_mqd; return 0; out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_hqd: @@ -399,11 +412,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) + if (!mqd_mgr) return -ENOMEM; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { @@ -420,14 +433,14 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_doorbell(qpd, q); - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval == -ETIME) qpd->reset_wavefronts = true; - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); list_del(&q->list); if (list_empty(&qpd->queues_list)) { @@ -457,9 +470,9 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, { int retval; - mutex_lock(&dqm->lock); + dqm_lock(dqm); retval = destroy_queue_nocpsch_locked(dqm, qpd, q); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -467,19 +480,19 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; - mutex_lock(&dqm->lock); + dqm_lock(dqm); pdd = kfd_get_process_device_data(q->device, q->process); if (!pdd) { retval = -ENODEV; goto out_unlock; } - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out_unlock; } @@ -506,7 +519,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) { - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) { @@ -515,7 +528,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); /* * check active state vs. the previous state and modify @@ -533,44 +546,44 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } static struct mqd_manager *get_mqd_manager( struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) { - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; pr_debug("mqd type %d\n", type); - mqd = dqm->mqds[type]; - if (!mqd) { - mqd = mqd_manager_init(type, dqm->dev); - if (!mqd) + mqd_mgr = dqm->mqd_mgrs[type]; + if (!mqd_mgr) { + mqd_mgr = mqd_manager_init(type, dqm->dev); + if (!mqd_mgr) pr_err("mqd manager is NULL"); - dqm->mqds[type] = mqd; + dqm->mqd_mgrs[type] = mqd_mgr; } - return mqd; + return mqd_mgr; } static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct queue *q; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->evicted++ > 0) /* already evicted, do nothing */ goto out; @@ -582,16 +595,16 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_active) continue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { /* should not be here */ + if (!mqd_mgr) { /* should not be here */ pr_err("Cannot evict queue, mqd mgr is NULL\n"); retval = -ENOMEM; goto out; } q->properties.is_evicted = true; q->properties.is_active = false; - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) @@ -600,7 +613,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, } out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -611,7 +624,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, struct kfd_process_device *pdd; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->evicted++ > 0) /* already evicted, do nothing */ goto out; @@ -633,7 +646,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -641,7 +654,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct queue *q; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; uint32_t pd_base; int retval = 0; @@ -650,7 +663,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ goto out; if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ @@ -677,16 +690,16 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) continue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { /* should not be here */ + if (!mqd_mgr) { /* should not be here */ pr_err("Cannot restore queue, mqd mgr is NULL\n"); retval = -ENOMEM; goto out; } q->properties.is_evicted = false; q->properties.is_active = true; - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) @@ -695,7 +708,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, } qpd->evicted = 0; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -711,7 +724,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ goto out; if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ @@ -739,7 +752,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, if (!retval) qpd->evicted = 0; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -761,7 +774,7 @@ static int register_process(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_add(&n->list, &dqm->queues); /* Update PD Base in QPD */ @@ -769,9 +782,10 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); - dqm->processes_count++; + if (dqm->processes_count++ == 0) + dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -786,20 +800,22 @@ static int unregister_process(struct device_queue_manager *dqm, list_empty(&qpd->queues_list) ? "empty" : "not empty"); retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_for_each_entry_safe(cur, next, &dqm->queues, list) { if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - dqm->processes_count--; + if (--dqm->processes_count == 0) + dqm->dev->kfd2kgd->set_compute_idle( + dqm->dev->kgd, true); goto out; } } /* qpd not found in dqm list */ retval = 1; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -838,7 +854,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) if (!dqm->allocated_queues) return -ENOMEM; - mutex_init(&dqm->lock); + mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; @@ -853,7 +869,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; return 0; } @@ -866,8 +882,8 @@ static void uninitialize(struct device_queue_manager *dqm) kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) - kfree(dqm->mqds[i]); - mutex_destroy(&dqm->lock); + kfree(dqm->mqd_mgrs[i]); + mutex_destroy(&dqm->lock_hidden); kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } @@ -901,7 +917,7 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id) { - if (sdma_queue_id >= CIK_SDMA_QUEUES) + if (sdma_queue_id >= get_num_sdma_queues(dqm)) return; dqm->sdma_bitmap |= (1 << sdma_queue_id); } @@ -910,19 +926,19 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; int retval; - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd) + mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + if (!mqd_mgr) return -ENOMEM; retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; - q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); + q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); retval = allocate_doorbell(qpd, q); if (retval) @@ -933,19 +949,20 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; - retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, + NULL); if (retval) goto out_uninit_mqd; return 0; out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: @@ -1003,12 +1020,14 @@ static int initialize_cpsch(struct device_queue_manager *dqm) { pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); - mutex_init(&dqm->lock); + mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + + INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); return 0; } @@ -1041,9 +1060,11 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); + /* clear hang status when driver try to start the hw scheduler */ + dqm->is_hws_hang = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return 0; fail_allocate_vidmem: @@ -1055,9 +1076,9 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); @@ -1069,11 +1090,11 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new kernel queue because %d queues were already created\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return -EPERM; } @@ -1089,7 +1110,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->queue_count++; qpd->is_debug = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return 0; } @@ -1098,7 +1119,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -1110,18 +1131,18 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->total_queue_count--; pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", @@ -1135,19 +1156,19 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_unlock; q->properties.sdma_queue_id = - q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = - q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->sdma_id % get_num_sdma_engines(dqm); } retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out_deallocate_doorbell; } @@ -1164,7 +1185,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; @@ -1188,7 +1209,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; out_deallocate_doorbell: @@ -1197,7 +1218,8 @@ out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); + return retval; } @@ -1210,6 +1232,13 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, while (*fence_addr != fence_value) { if (time_after(jiffies, end_jiffies)) { pr_err("qcm fence wait loop timeout expired\n"); + /* In HWS case, this is used to halt the driver thread + * in order not to mess up CP states before doing + * scandumps for FW debugging. + */ + while (halt_if_hws_hang) + schedule(); + return -ETIME; } schedule(); @@ -1254,6 +1283,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, { int retval = 0; + if (dqm->is_hws_hang) + return -EIO; if (!dqm->active_runlist) return retval; @@ -1292,9 +1323,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, { int retval; + if (dqm->is_hws_hang) + return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + schedule_work(&dqm->hw_exception_work); return retval; } @@ -1306,7 +1341,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; bool preempt_all_queues; preempt_all_queues = false; @@ -1314,7 +1349,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, retval = 0; /* remove queue from list to prevent rescheduling after preemption */ - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->is_debug) { /* @@ -1326,9 +1361,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto failed; } @@ -1350,7 +1385,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); /* * Unconditionally decrement this counter, regardless of the queue's @@ -1360,14 +1395,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; failed: failed_try_destroy_debugged_queue: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1391,7 +1426,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, if (!dqm->asic_ops.set_cache_memory_policy) return retval; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (alternate_aperture_size == 0) { /* base > limit disables APE1 */ @@ -1437,7 +1472,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit); out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1468,7 +1503,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct device_process_node *cur, *next_dpn; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); /* Clear all user mode queues */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { @@ -1489,7 +1524,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, } } - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1500,14 +1535,14 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, int retval; struct queue *q, *next; struct kernel_queue *kq, *kq_next; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); /* Clean all kernel queues */ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { @@ -1542,7 +1577,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } retval = execute_queues_cpsch(dqm, filter, 0); - if (retval || qpd->reset_wavefronts) { + if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); qpd->reset_wavefronts = false; @@ -1550,19 +1585,19 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* lastly, free mqd resources */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out; } list_del(&q->list); qpd->queue_count--; - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); } out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1683,6 +1718,30 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_process_vm_fault(struct device_queue_manager *dqm, + unsigned int pasid) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + int ret = 0; + + if (!p) + return -EINVAL; + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + kfd_unref_process(p); + + return ret; +} + +static void kfd_process_hw_exception(struct work_struct *work) +{ + struct device_queue_manager *dqm = container_of(work, + struct device_queue_manager, hw_exception_work); + dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); +} + #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, @@ -1746,8 +1805,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) } } - for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { - for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { + for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { + for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); if (r) @@ -1764,4 +1823,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return r; } +int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) +{ + int r = 0; + + dqm_lock(dqm); + dqm->active_runlist = true; + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + dqm_unlock(dqm); + + return r; +} + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 59a6b1956932..00da3169a004 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -26,15 +26,14 @@ #include #include +#include +#include #include "kfd_priv.h" #include "kfd_mqd_manager.h" #define KFD_UNMAP_LATENCY_MS (4000) #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) - -#define CIK_SDMA_QUEUES (4) -#define CIK_SDMA_QUEUES_PER_ENGINE (2) -#define CIK_SDMA_ENGINE_NUM (2) +#define KFD_SDMA_QUEUES_PER_ENGINE (2) struct device_process_node { struct qcm_process_device *qpd; @@ -170,11 +169,12 @@ struct device_queue_manager { struct device_queue_manager_ops ops; struct device_queue_manager_asic_ops asic_ops; - struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; + struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; struct packet_manager packets; struct kfd_dev *dev; - struct mutex lock; + struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ struct list_head queues; + unsigned int saved_flags; unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; @@ -190,6 +190,10 @@ struct device_queue_manager { struct kfd_mem_obj *fence_mem; bool active_runlist; int sched_policy; + + /* hw exception */ + bool is_hws_hang; + struct work_struct hw_exception_work; }; void device_queue_manager_init_cik( @@ -207,6 +211,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); +unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { @@ -219,4 +224,19 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) return (pdd->lds_base >> 60) & 0x0E; } +/* The DQM lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void dqm_lock(struct device_queue_manager *dqm) +{ + mutex_lock(&dqm->lock_hidden); + dqm->saved_flags = memalloc_nofs_save(); +} +static inline void dqm_unlock(struct device_queue_manager *dqm) +{ + memalloc_nofs_restore(dqm->saved_flags); + mutex_unlock(&dqm->lock_hidden); +} + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 79e5bcf6367c..417515332c35 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (vega10_noretry && + if (noretry && !dqm->dev->device_info->needs_iommu_device) qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index c3744d89352c..ebe79bf00145 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, *doorbell_off = kfd->doorbell_id_offset + inx; pr_debug("Get kernel queue doorbell\n" - " doorbell offset == 0x%08X\n" - " kernel address == %p\n", - *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); + " doorbell offset == 0x%08X\n" + " doorbell index == 0x%x\n", + *doorbell_off, inx); return kfd->doorbell_kernel_ptr + inx; } @@ -199,7 +199,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) { unsigned int inx; - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) + * sizeof(u32) / kfd->device_info->doorbell_size; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_available_index); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 5562e94e786a..e9f0e0a1b41c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -850,6 +850,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ev->memory_exception_data = *ev_data; } + if (type == KFD_EVENT_TYPE_MEMORY) { + dev_warn(kfd_device, + "Sending SIGSEGV to HSA Process with PID %d ", + p->lead_thread->pid); + send_sig(SIGSEGV, p->lead_thread, 0); + } + /* Send SIGTERM no event of type "type" has been found*/ if (send_signal) { if (send_sigterm) { @@ -904,34 +911,41 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, memory_exception_data.failure.NotPresent = 1; memory_exception_data.failure.NoExecute = 0; memory_exception_data.failure.ReadOnly = 0; - if (vma) { - if (vma->vm_start > address) { - memory_exception_data.failure.NotPresent = 1; - memory_exception_data.failure.NoExecute = 0; + if (vma && address >= vma->vm_start) { + memory_exception_data.failure.NotPresent = 0; + + if (is_write_requested && !(vma->vm_flags & VM_WRITE)) + memory_exception_data.failure.ReadOnly = 1; + else memory_exception_data.failure.ReadOnly = 0; - } else { - memory_exception_data.failure.NotPresent = 0; - if (is_write_requested && !(vma->vm_flags & VM_WRITE)) - memory_exception_data.failure.ReadOnly = 1; - else - memory_exception_data.failure.ReadOnly = 0; - if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) - memory_exception_data.failure.NoExecute = 1; - else - memory_exception_data.failure.NoExecute = 0; - } + + if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) + memory_exception_data.failure.NoExecute = 1; + else + memory_exception_data.failure.NoExecute = 0; } up_read(&mm->mmap_sem); mmput(mm); - mutex_lock(&p->event_mutex); + pr_debug("notpresent %d, noexecute %d, readonly %d\n", + memory_exception_data.failure.NotPresent, + memory_exception_data.failure.NoExecute, + memory_exception_data.failure.ReadOnly); - /* Lookup events by type and signal them */ - lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, - &memory_exception_data); + /* Workaround on Raven to not kill the process when memory is freed + * before IOMMU is able to finish processing all the excessive PPRs + */ + if (dev->device_info->asic_family != CHIP_RAVEN) { + mutex_lock(&p->event_mutex); + + /* Lookup events by type and signal them */ + lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, + &memory_exception_data); + + mutex_unlock(&p->event_mutex); + } - mutex_unlock(&p->event_mutex); kfd_unref_process(p); } #endif /* KFD_SUPPORT_IOMMU_V2 */ @@ -956,3 +970,67 @@ void kfd_signal_hw_exception_event(unsigned int pasid) mutex_unlock(&p->event_mutex); kfd_unref_process(p); } + +void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info) +{ + struct kfd_event *ev; + uint32_t id; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct kfd_hsa_memory_exception_data memory_exception_data; + + if (!p) + return; /* Presumably process exited. */ + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + memory_exception_data.gpu_id = dev->id; + memory_exception_data.failure.imprecise = 1; + /* Set failure reason */ + if (info) { + memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; + memory_exception_data.failure.NotPresent = + info->prot_valid ? 1 : 0; + memory_exception_data.failure.NoExecute = + info->prot_exec ? 1 : 0; + memory_exception_data.failure.ReadOnly = + info->prot_write ? 1 : 0; + memory_exception_data.failure.imprecise = 0; + } + mutex_lock(&p->event_mutex); + + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) + if (ev->type == KFD_EVENT_TYPE_MEMORY) { + ev->memory_exception_data = memory_exception_data; + set_event(ev); + } + + mutex_unlock(&p->event_mutex); + kfd_unref_process(p); +} + +void kfd_signal_reset_event(struct kfd_dev *dev) +{ + struct kfd_hsa_hw_exception_data hw_exception_data; + struct kfd_process *p; + struct kfd_event *ev; + unsigned int temp; + uint32_t id, idx; + + /* Whole gpu reset caused by GPU hang and memory is lost */ + memset(&hw_exception_data, 0, sizeof(hw_exception_data)); + hw_exception_data.gpu_id = dev->id; + hw_exception_data.memory_lost = 1; + + idx = srcu_read_lock(&kfd_processes_srcu); + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->event_mutex); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) + if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + ev->hw_exception_data = hw_exception_data; + set_event(ev); + } + mutex_unlock(&p->event_mutex); + } + srcu_read_unlock(&kfd_processes_srcu, idx); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index abca5bfebbff..c7ac6c73af86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -66,6 +66,7 @@ struct kfd_event { /* type specific data */ union { struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; }; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 37029baa3346..f836897bbf58 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -26,7 +26,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, - const uint32_t *ih_ring_entry) + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) { uint16_t source_id, client_id, pasid, vmid; const uint32_t *data = ih_ring_entry; @@ -57,7 +59,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, return source_id == SOC15_INTSRC_CP_END_OF_PIPE || source_id == SOC15_INTSRC_SDMA_TRAP || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || - source_id == SOC15_INTSRC_CP_BAD_OPCODE; + source_id == SOC15_INTSRC_CP_BAD_OPCODE || + client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2; } static void event_interrupt_wq_v9(struct kfd_dev *dev, @@ -82,7 +86,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, kfd_signal_hw_exception_event(pasid); else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_UTCL2) { - /* TODO */ + struct kfd_vm_fault_info info = {0}; + uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + + info.vmid = vmid; + info.mc_id = client_id; + info.page_addr = ih_ring_entry[4] | + (uint64_t)(ih_ring_entry[5] & 0xf) << 32; + info.prot_valid = ring_id & 0x08; + info.prot_read = ring_id & 0x10; + info.prot_write = ring_id & 0x20; + + kfd_process_vm_fault(dev->dqm, pasid); + kfd_signal_vm_fault_event(dev, pasid, &info); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index db6d9336b80d..c56ac47cd318 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work) ih_ring_entry); } -bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) +bool interrupt_is_wanted(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, bool *flag) { /* integer and bitwise OR so there is no boolean short-circuiting */ unsigned int wanted = 0; wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, - ih_ring_entry); + ih_ring_entry, patched_ihre, flag); return wanted != 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index c71817963eea..7a61f38c09e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -190,7 +190,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, { struct kfd_dev *dev; - dev_warn(kfd_device, + dev_warn_ratelimited(kfd_device, "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", PCI_BUS_NUM(pdev->devfn), PCI_SLOT(pdev->devfn), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 476951d8c91c..9f84b4d9fb88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, switch (type) { case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_HIQ: - kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, + kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, KFD_MQD_TYPE_HIQ); break; default: @@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, return false; } - if (!kq->mqd) + if (!kq->mqd_mgr) return false; prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); @@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; prop.eop_ring_buffer_address = kq->eop_gpu_addr; prop.eop_ring_buffer_size = PAGE_SIZE; + prop.cu_mask = NULL; if (init_queue(&kq->queue, &prop) != 0) goto err_init_queue; @@ -130,7 +131,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->queue->device = dev; kq->queue->process = kfd_get_process(current); - retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, + retval = kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd, &kq->queue->mqd_mem_obj, &kq->queue->gart_mqd_addr, &kq->queue->properties); @@ -142,9 +143,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, pr_debug("Assigning hiq to hqd\n"); kq->queue->pipe = KFD_CIK_HIQ_PIPE; kq->queue->queue = KFD_CIK_HIQ_QUEUE; - kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, - kq->queue->queue, &kq->queue->properties, - NULL); + kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd, + kq->queue->pipe, kq->queue->queue, + &kq->queue->properties, NULL); } else { /* allocate fence for DIQ */ @@ -182,7 +183,7 @@ err_get_kernel_doorbell: static void uninitialize(struct kernel_queue *kq) { if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) - kq->mqd->destroy_mqd(kq->mqd, + kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, kq->queue->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -191,7 +192,8 @@ static void uninitialize(struct kernel_queue *kq) else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); - kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj); + kq->mqd_mgr->uninit_mqd(kq->mqd_mgr, kq->queue->mqd, + kq->queue->mqd_mem_obj); kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 97aff2041a5d..a7116a939029 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -70,7 +70,7 @@ struct kernel_queue { /* data */ struct kfd_dev *dev; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct queue *queue; uint64_t pending_wptr64; uint32_t pending_wptr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 76bf2dc8aec4..6e1f5c7c2d4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, + .pre_reset = kgd2kfd_pre_reset, + .post_reset = kgd2kfd_post_reset, }; int sched_policy = KFD_SCHED_POLICY_HWS; @@ -61,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc, int cwsr_enable = 1; module_param(cwsr_enable, int, 0444); -MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))"); int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); @@ -83,13 +85,19 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); -int vega10_noretry; -module_param_named(noretry, vega10_noretry, int, 0644); +int noretry; +module_param(noretry, int, 0644); MODULE_PARM_DESC(noretry, - "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)"); + +int halt_if_hws_hang; +module_param(halt_if_hws_hang, int, 0644); +MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + static int amdkfd_init_completed; + int kgd2kfd_init(unsigned int interface_version, const struct kgd2kfd_calls **g2f) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 4b8eb506642b..3bc25ab84f34 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -21,7 +21,7 @@ * */ -#include "kfd_priv.h" +#include "kfd_mqd_manager.h" struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) @@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, return NULL; } + +void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, + const uint32_t *cu_mask, uint32_t cu_mask_count, + uint32_t *se_mask) +{ + struct kfd_cu_info cu_info; + uint32_t cu_per_sh[4] = {0}; + int i, se, cu = 0; + + mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); + + if (cu_mask_count > cu_info.cu_active_number) + cu_mask_count = cu_info.cu_active_number; + + for (se = 0; se < cu_info.num_shader_engines; se++) + for (i = 0; i < 4; i++) + cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]); + + /* Symmetrically map cu_mask to all SEs: + * cu_mask[0] bit0 -> se_mask[0] bit0; + * cu_mask[0] bit1 -> se_mask[1] bit0; + * ... (if # SE is 4) + * cu_mask[0] bit4 -> se_mask[0] bit1; + * ... + */ + se = 0; + for (i = 0; i < cu_mask_count; i++) { + if (cu_mask[i / 32] & (1 << (i % 32))) + se_mask[se] |= 1 << cu; + + do { + se++; + if (se == cu_info.num_shader_engines) { + se = 0; + cu++; + } + } while (cu >= cu_per_sh[se] && cu < 32); + } +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 8972bcfbf701..4e84052d4e21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -93,4 +93,8 @@ struct mqd_manager { struct kfd_dev *dev; }; +void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, + const uint32_t *cu_mask, uint32_t cu_mask_count, + uint32_t *se_mask); + #endif /* KFD_MQD_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 06eaa218eba6..47243165a082 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) return (struct cik_sdma_rlc_registers *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("Update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && @@ -408,7 +435,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 684054ff02cd..f5fc3675f21e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -55,7 +80,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!*mqd_mem_obj) return -ENOMEM; retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, @@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && @@ -393,7 +420,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 481307b8b4db..b81fda3754da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) return (struct vi_sdma_mqd *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct vi_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("Update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && @@ -394,7 +421,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index c317feb43f69..1092631765cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -418,4 +418,30 @@ out: return 0; } +int pm_debugfs_hang_hws(struct packet_manager *pm) +{ + uint32_t *buffer, size; + int r = 0; + + size = pm->pmf->query_status_size; + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + r = -ENOMEM; + goto out; + } + memset(buffer, 0x55, size); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + + pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", + buffer[0], buffer[1], buffer[2], buffer[3], + buffer[4], buffer[5], buffer[6]); +out: + mutex_unlock(&pm->lock); + return r; +} + + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5e3990bb4c4b..f971710f1c91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -73,7 +73,7 @@ /* * When working with cp scheduler we should assign the HIQ manually or via - * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot + * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot * definitions for Kaveri. In Kaveri only the first ME queues participates * in the cp scheduling taking that in mind we set the HIQ slot in the * second ME. @@ -142,7 +142,12 @@ extern int ignore_crat; /* * Set sh_mem_config.retry_disable on Vega10 */ -extern int vega10_noretry; +extern int noretry; + +/* + * Halt if HWS hang is detected + */ +extern int halt_if_hws_hang; /** * enum kfd_sched_policy @@ -180,9 +185,10 @@ enum cache_policy { struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, - const uint32_t *ih_ring_entry); + const uint32_t *ih_ring_entry, uint32_t *patched_ihre, + bool *patched_flag); void (*interrupt_wq)(struct kfd_dev *dev, - const uint32_t *ih_ring_entry); + const uint32_t *ih_ring_entry); }; struct kfd_device_info { @@ -197,6 +203,7 @@ struct kfd_device_info { bool supports_cwsr; bool needs_iommu_device; bool needs_pci_atomics; + unsigned int num_sdma_engines; }; struct kfd_mem_obj { @@ -415,6 +422,9 @@ struct queue_properties { uint32_t ctl_stack_size; uint64_t tba_addr; uint64_t tma_addr; + /* Relevant for CU */ + uint32_t cu_mask_count; /* Must be a multiple of 32 */ + uint32_t *cu_mask; }; /** @@ -806,12 +816,18 @@ int kfd_interrupt_init(struct kfd_dev *dev); void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); -bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); +bool interrupt_is_wanted(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, bool *flag); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd); +/* GPU reset */ +int kgd2kfd_pre_reset(struct kfd_dev *kfd); +int kgd2kfd_post_reset(struct kfd_dev *kfd); + /* amdkfd Apertures */ int kfd_init_apertures(struct kfd_process *process); @@ -838,6 +854,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); +int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); /* Process Queue Manager */ struct process_queue_node { @@ -858,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); @@ -964,10 +983,17 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info); + +void kfd_signal_reset_event(struct kfd_dev *dev); + void kfd_flush_tlb(struct kfd_process_device *pdd); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); +bool kfd_is_locked(void); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) @@ -980,6 +1006,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data); int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); int pm_debugfs_runlist(struct seq_file *m, void *data); +int kfd_debugfs_hang_hws(struct kfd_dev *dev); +int pm_debugfs_hang_hws(struct packet_manager *pm); +int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); + #else static inline void kfd_debugfs_init(void) {} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1d80b4f7c681..4694386cc623 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -244,6 +244,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) return ERR_PTR(-EINVAL); process = find_process(thread); + if (!process) + return ERR_PTR(-EINVAL); return process; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d65ce0436b31..c8cad9c078ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -186,8 +186,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= - CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { pr_err("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; @@ -209,7 +208,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { - pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } @@ -326,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + kfree(pqn->q->properties.cu_mask); + pqn->q->properties.cu_mask = NULL; uninit_queue(pqn->q); } @@ -366,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, return 0; } +int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p) +{ + int retval; + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_debug("No queue %d exists for update operation\n", qid); + return -EFAULT; + } + + /* Free the old CU mask memory if it is already allocated, then + * allocate memory for the new CU mask. + */ + kfree(pqn->q->properties.cu_mask); + + pqn->q->properties.cu_mask_count = p->cu_mask_count; + pqn->q->properties.cu_mask = p->cu_mask; + + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); + if (retval != 0) + return retval; + + return 0; +} + struct kernel_queue *pqm_get_kernel_queue( struct process_queue_manager *pqm, unsigned int qid) @@ -387,7 +416,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) struct process_queue_node *pqn; struct queue *q; enum KFD_MQD_TYPE mqd_type; - struct mqd_manager *mqd_manager; + struct mqd_manager *mqd_mgr; int r = 0; list_for_each_entry(pqn, &pqm->queues, process_queue_list) { @@ -410,11 +439,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q->properties.type, q->device->id); continue; } - mqd_manager = q->device->dqm->ops.get_mqd_manager( + mqd_mgr = q->device->dqm->ops.get_mqd_manager( q->device->dqm, mqd_type); } else if (pqn->kq) { q = pqn->kq->queue; - mqd_manager = pqn->kq->mqd; + mqd_mgr = pqn->kq->mqd_mgr; switch (q->properties.type) { case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", @@ -434,7 +463,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) continue; } - r = mqd_manager->debugfs_show_mqd(m, q->mqd); + r = mqd_mgr->debugfs_show_mqd(m, q->mqd); if (r != 0) break; } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 5733fbee07f7..14391b06080c 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -47,6 +47,17 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_vm_fault_info { + uint64_t page_addr; + uint32_t vmid; + uint32_t mc_id; + uint32_t status; + bool prot_valid; + bool prot_read; + bool prot_write; + bool prot_exec; +}; + struct kfd_cu_info { uint32_t num_shader_engines; uint32_t num_shader_arrays_per_engine; @@ -259,6 +270,21 @@ struct tile_config { * IB to the corresponding ring (ring type). The IB is executed with the * specified VMID in a user mode context. * + * @get_vm_fault_info: Return information about a recent VM fault on + * GFXv7 and v8. If multiple VM faults occurred since the last call of + * this function, it will return information about the first of those + * faults. On GFXv9 VM fault information is fully contained in the IH + * packet and this function is not needed. + * + * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the + * IH ring entry. This function allows the KFD ISR to get the VMID + * from the fault status register as early as possible. + * + * @gpu_recover: let kgd reset gpu after kfd detect CPC hang + * + * @set_compute_idle: Indicates that compute is idle on a device. This + * can be used to change power profiles depending on compute activity. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -374,6 +400,14 @@ struct kfd2kgd_calls { int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); + + int (*get_vm_fault_info)(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + + void (*gpu_recover)(struct kgd_dev *kgd); + + void (*set_compute_idle)(struct kgd_dev *kgd, bool idle); }; /** @@ -399,6 +433,10 @@ struct kfd2kgd_calls { * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * + * @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu + * + * @post_reset: Notify amdkfd that amgpu successfully reseted the gpu + * * This structure contains function callback pointers so the kgd driver * will notify to the amdkfd about certain status changes. * @@ -417,6 +455,8 @@ struct kgd2kfd_calls { int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); + int (*pre_reset)(struct kfd_dev *kfd); + int (*post_reset)(struct kfd_dev *kfd); }; int kgd2kfd_init(unsigned interface_version, diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b4f5073dbac2..01674b56e14f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args { __u32 queue_priority; /* to KFD */ }; +struct kfd_ioctl_set_cu_mask_args { + __u32 queue_id; /* to KFD */ + __u32 num_cu_mask; /* to KFD */ + __u64 cu_mask_ptr; /* to KFD */ +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -189,6 +195,15 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_SIGNAL_EVENT_LIMIT 4096 +/* For kfd_event_data.hw_exception_data.reset_type. */ +#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 +#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 + +/* For kfd_event_data.hw_exception_data.reset_cause. */ +#define KFD_HW_EXCEPTION_GPU_HANG 0 +#define KFD_HW_EXCEPTION_ECC 1 + + struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ __u32 event_trigger_data; /* from KFD - signal events only */ @@ -219,7 +234,7 @@ struct kfd_memory_exception_failure { __u32 NotPresent; /* Page not present or supervisor privilege */ __u32 ReadOnly; /* Write access to a read-only page */ __u32 NoExecute; /* Execute access to a page marked NX */ - __u32 pad; + __u32 imprecise; /* Can't determine the exact fault address */ }; /* memory exception data*/ @@ -230,10 +245,19 @@ struct kfd_hsa_memory_exception_data { __u32 pad; }; -/* Event data*/ +/* hw exception data */ +struct kfd_hsa_hw_exception_data { + uint32_t reset_type; + uint32_t reset_cause; + uint32_t memory_lost; + uint32_t gpu_id; +}; + +/* Event data */ struct kfd_event_data { union { struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; }; /* From KFD */ __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ @@ -448,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) +#define AMDKFD_IOC_SET_CU_MASK \ + AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1A +#define AMDKFD_COMMAND_END 0x1B #endif