From 0337976f40b0a9605c24797762b95b3414619d71 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jul 2018 14:41:00 +0200 Subject: [PATCH 01/36] drm/admkfd use modern ktime accessors getrawmonotonic64() and get_monotonic_boottime64() are deprecated because of the nonstandard naming. The replacement functions ktime_get_raw_ns() and ktime_get_boot_ns() also simplify the callers. Reviewed-by: Felix Kuehling . Signed-off-by: Arnd Bergmann Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f64c5551cdba..7e717716b90e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -754,7 +754,6 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, { struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; - struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); if (dev) @@ -766,11 +765,8 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, args->gpu_clock_counter = 0; /* No access to rdtsc. Using raw monotonic time */ - getrawmonotonic64(&time); - args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time); - - get_monotonic_boottime64(&time); - args->system_clock_counter = (uint64_t)timespec64_to_ns(&time); + args->cpu_clock_counter = ktime_get_raw_ns(); + args->system_clock_counter = ktime_get_boot_ns(); /* Since the counter is in nano-seconds we use 1GHz frequency */ args->system_clock_freq = 1000000000; From efeaed4d98eb4dc9ce01e1dca6d3778d180b272c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 11 Jul 2018 22:32:44 -0400 Subject: [PATCH 02/36] drm/amdkfd: Reliably prevent reclaim-FS while holding DQM lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed to prevent deadlocks when MMU notifiers run in reclaim-FS context and take the DQM lock for userptr evictions. Previously this was done by making all memory allocations under DQM locks GFP_NOIO. This is error prone. Using memalloc_nofs_save/restore will reliably affect all memory allocations anywhere in the kernel while the DQM lock is held. Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 85 ++++++++++--------- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 20 ++++- 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 668ad07ebe1f..f2f81d26db0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -240,7 +240,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, print_queue(q); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", @@ -297,7 +297,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, dqm->total_queue_count); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -457,9 +457,9 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, { int retval; - mutex_lock(&dqm->lock); + dqm_lock(dqm); retval = destroy_queue_nocpsch_locked(dqm, qpd, q); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -471,7 +471,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) struct kfd_process_device *pdd; bool prev_active = false; - mutex_lock(&dqm->lock); + dqm_lock(dqm); pdd = kfd_get_process_device_data(q->device, q->process); if (!pdd) { retval = -ENODEV; @@ -537,7 +537,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) &q->properties, q->process->mm); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -570,7 +570,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct kfd_process_device *pdd; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->evicted++ > 0) /* already evicted, do nothing */ goto out; @@ -600,7 +600,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, } out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -611,7 +611,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, struct kfd_process_device *pdd; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->evicted++ > 0) /* already evicted, do nothing */ goto out; @@ -633,7 +633,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -650,7 +650,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ goto out; if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ @@ -695,7 +695,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, } qpd->evicted = 0; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -711,7 +711,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ goto out; if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ @@ -739,7 +739,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, if (!retval) qpd->evicted = 0; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -761,7 +761,7 @@ static int register_process(struct device_queue_manager *dqm, /* Retrieve PD base */ pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_add(&n->list, &dqm->queues); /* Update PD Base in QPD */ @@ -771,7 +771,7 @@ static int register_process(struct device_queue_manager *dqm, dqm->processes_count++; - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -786,7 +786,7 @@ static int unregister_process(struct device_queue_manager *dqm, list_empty(&qpd->queues_list) ? "empty" : "not empty"); retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_for_each_entry_safe(cur, next, &dqm->queues, list) { if (qpd == cur->qpd) { @@ -799,7 +799,7 @@ static int unregister_process(struct device_queue_manager *dqm, /* qpd not found in dqm list */ retval = 1; out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -838,7 +838,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) if (!dqm->allocated_queues) return -ENOMEM; - mutex_init(&dqm->lock); + mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; @@ -867,7 +867,7 @@ static void uninitialize(struct device_queue_manager *dqm) kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqds[i]); - mutex_destroy(&dqm->lock); + mutex_destroy(&dqm->lock_hidden); kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } @@ -1003,7 +1003,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) { pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); - mutex_init(&dqm->lock); + mutex_init(&dqm->lock_hidden); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; @@ -1041,9 +1041,9 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - mutex_lock(&dqm->lock); + dqm_lock(dqm); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return 0; fail_allocate_vidmem: @@ -1055,9 +1055,9 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); @@ -1069,11 +1069,11 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new kernel queue because %d queues were already created\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return -EPERM; } @@ -1089,7 +1089,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->queue_count++; qpd->is_debug = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return 0; } @@ -1098,7 +1098,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - mutex_lock(&dqm->lock); + dqm_lock(dqm); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -1110,7 +1110,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->total_queue_count--; pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, @@ -1121,7 +1121,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", @@ -1188,7 +1188,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; out_deallocate_doorbell: @@ -1197,7 +1197,8 @@ out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); out_unlock: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); + return retval; } @@ -1314,7 +1315,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, retval = 0; /* remove queue from list to prevent rescheduling after preemption */ - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (qpd->is_debug) { /* @@ -1360,14 +1361,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; failed: failed_try_destroy_debugged_queue: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1391,7 +1392,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, if (!dqm->asic_ops.set_cache_memory_policy) return retval; - mutex_lock(&dqm->lock); + dqm_lock(dqm); if (alternate_aperture_size == 0) { /* base > limit disables APE1 */ @@ -1437,7 +1438,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit); out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1468,7 +1469,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct device_process_node *cur, *next_dpn; int retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); /* Clear all user mode queues */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { @@ -1489,7 +1490,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, } } - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } @@ -1507,7 +1508,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, retval = 0; - mutex_lock(&dqm->lock); + dqm_lock(dqm); /* Clean all kernel queues */ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { @@ -1562,7 +1563,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - mutex_unlock(&dqm->lock); + dqm_unlock(dqm); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 59a6b1956932..0a23ddac345e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -26,6 +26,8 @@ #include #include +#include +#include #include "kfd_priv.h" #include "kfd_mqd_manager.h" @@ -173,8 +175,9 @@ struct device_queue_manager { struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; struct packet_manager packets; struct kfd_dev *dev; - struct mutex lock; + struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ struct list_head queues; + unsigned int saved_flags; unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; @@ -219,4 +222,19 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) return (pdd->lds_base >> 60) & 0x0E; } +/* The DQM lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void dqm_lock(struct device_queue_manager *dqm) +{ + mutex_lock(&dqm->lock_hidden); + dqm->saved_flags = memalloc_nofs_save(); +} +static inline void dqm_unlock(struct device_queue_manager *dqm) +{ + memalloc_nofs_restore(dqm->saved_flags); + mutex_unlock(&dqm->lock_hidden); +} + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ From 1cd106ecfc1f048db3795cc6aed8acb156ba6d4d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 11 Jul 2018 22:32:45 -0400 Subject: [PATCH 03/36] drm/amdkfd: Stop using GFP_NOIO explicitly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is no longer needed with the memalloc_nofs_save/restore in dqm_lock/unlock. Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7ee6cec2c060..48c505e83217 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -739,8 +739,8 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); - if ((*mem_obj) == NULL) + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!(*mem_obj)) return -ENOMEM; pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 06eaa218eba6..4872574f7a04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -408,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 684054ff02cd..ad5c9f80cccd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -55,7 +55,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!*mqd_mem_obj) return -ENOMEM; retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, @@ -393,7 +393,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 481307b8b4db..89e4242e43e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; From a60d811b2bf45f2e776e00d71f0dc3c5043245d9 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 11 Jul 2018 22:32:46 -0400 Subject: [PATCH 04/36] drm/amdkfd: Fix race between scheduler and context restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scheduler may raise SQ_WAVE_STATUS.SPI_PRIO via SQ_CMD before context restore has completed. Restoring SPI_PRIO=0 after this point may cause context save to fail as the lower priority wavefronts are not selected for execution among spin-waiting wavefronts. Leave SPI_PRIO at its SPI-initialized or scheduler-raised value. v2: Also fix race with exception handler Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 498 +++++++++--------- .../drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 18 +- .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 16 +- 3 files changed, 282 insertions(+), 250 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index f68aef02fc1f..3621efbd5759 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,18 +21,21 @@ */ static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820125, + 0xbf820001, 0xbf82012b, 0xb8f4f802, 0x89748674, 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, + 0x00000400, 0xbf850017, 0xc00a1e37, 0x00000000, 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, + 0xbf840005, 0x8f728374, + 0xb972e0c2, 0xbf800002, + 0xb9740002, 0xbe801d78, + 0xb8f5f803, 0x8675ff75, + 0x000001ff, 0xbf850002, + 0x80708470, 0x82718071, + 0x8671ff71, 0x0000ffff, + 0x8f728374, 0xb972e0c2, + 0xbf800002, 0xb9740002, 0xbe801f70, 0xb8f5f803, 0x8675ff75, 0x00000100, 0xbf840006, 0xbefa0080, @@ -168,7 +171,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x807c847c, 0x806eff6e, 0x00000400, 0xbf0a757c, 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, + 0xbf8200cd, 0xbef8007e, 0x8679ff7f, 0x0000ffff, 0x8779ff79, 0x00040000, 0xbefa0080, 0xbefb00ff, @@ -268,16 +271,18 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8f739773, 0xb976f807, 0x8671ff71, 0x0000ffff, 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, + 0x8f768374, 0xb976e0c2, + 0xbf800002, 0xb9740002, + 0xbf8a0000, 0x95807370, + 0xbf810000, 0x00000000, }; static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015a, + 0xbf820001, 0xbf82015d, 0xb8f8f802, 0x89788678, 0xb8f1f803, 0x866eff71, - 0x00000400, 0xbf850034, + 0x00000400, 0xbf850037, 0x866eff71, 0x00000800, 0xbf850003, 0x866eff71, 0x00000100, 0xbf840008, @@ -303,258 +308,261 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, - 0xb978f802, 0xbe801f6c, - 0x866dff6d, 0x0000ffff, - 0xbef00080, 0xb9700283, - 0xb8f02407, 0x8e709c70, - 0x876d706d, 0xb8f003c7, - 0x8e709b70, 0x876d706d, - 0xb8f0f807, 0x8670ff70, - 0x00007fff, 0xb970f807, - 0xbeee007e, 0xbeef007f, - 0xbefe0180, 0xbf900004, - 0x87708478, 0xb970f802, - 0xbf8e0002, 0xbf88fffe, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8f11605, - 0x80718171, 0x8e718671, - 0x80707170, 0x80707e70, - 0x8271807f, 0x8671ff71, - 0x0000ffff, 0xc0471cb8, - 0x00000040, 0xbf8cc07f, - 0xc04b1d38, 0x00000048, - 0xbf8cc07f, 0xc0431e78, - 0x00000058, 0xbf8cc07f, - 0xc0471eb8, 0x0000005c, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x8670ff7f, - 0x08000000, 0x8f708370, - 0x87777077, 0x8670ff7f, - 0x70000000, 0x8f708170, - 0x87777077, 0xbefb007c, - 0xbefa0080, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8f01605, 0x80708170, - 0x8e708670, 0x807a707a, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc007a, 0xc0611efa, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611b3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611b7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611bba, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611bfa, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xb8f1f803, - 0xbefe007c, 0xbefc007a, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611a3a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xb8fbf801, - 0xbefe007c, 0xbefc007a, - 0xc0611efa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0x8670ff7f, - 0x04000000, 0xbeef0080, - 0x876f6f70, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbef00080, + 0xb9700283, 0xb8f02407, + 0x8e709c70, 0x876d706d, + 0xb8f003c7, 0x8e709b70, + 0x876d706d, 0xb8f0f807, + 0x8670ff70, 0x00007fff, + 0xb970f807, 0xbeee007e, + 0xbeef007f, 0xbefe0180, + 0xbf900004, 0x87708478, + 0xb970f802, 0xbf8e0002, + 0xbf88fffe, 0xb8f02a05, + 0x80708170, 0x8e708a70, 0xb8f11605, 0x80718171, - 0x8e718471, 0x8e768271, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747a74, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a717c, 0xbf85ffe7, - 0xbef40172, 0xbefa0080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0xe0724000, 0x7a1d0000, - 0xe0724100, 0x7a1d0100, - 0xe0724200, 0x7a1d0200, - 0xe0724300, 0x7a1d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8f14306, 0x8671c171, - 0xbf84002c, 0xbf8a0000, - 0x8670ff6f, 0x04000000, - 0xbf840028, 0x8e718671, - 0x8e718271, 0xbef60071, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f01605, - 0x80708170, 0x8e708670, - 0x807a707a, 0x807aff7a, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, - 0xd1060002, 0x00011103, - 0x7e0602ff, 0x00000200, - 0xbefc00ff, 0x00010000, - 0xbe800077, 0x8677ff77, - 0xff7fffff, 0x8777ff77, - 0x00058000, 0xd8ec0000, - 0x00000002, 0xbf8cc07f, - 0xe0765000, 0x7a1d0002, - 0x68040702, 0xd0c9006a, - 0x0000e302, 0xbf87fff7, - 0xbef70000, 0xbefa00ff, - 0x00000400, 0xbefe00c1, - 0xbeff00c1, 0xb8f12a05, - 0x80718171, 0x8e718271, - 0x8e768871, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a717c, 0xbf840015, - 0xbf11017c, 0x8071ff71, - 0x00001000, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x7a1d0000, 0xe0724100, - 0x7a1d0100, 0xe0724200, - 0x7a1d0200, 0xe0724300, - 0x7a1d0300, 0x807c847c, - 0x807aff7a, 0x00000400, - 0xbf0a717c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200d9, + 0x8e718671, 0x80707170, + 0x80707e70, 0x8271807f, + 0x8671ff71, 0x0000ffff, + 0xc0471cb8, 0x00000040, + 0xbf8cc07f, 0xc04b1d38, + 0x00000048, 0xbf8cc07f, + 0xc0431e78, 0x00000058, + 0xbf8cc07f, 0xc0471eb8, + 0x0000005c, 0xbf8cc07f, 0xbef4007e, 0x8675ff7f, 0x0000ffff, 0x8775ff75, 0x00040000, 0xbef60080, 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x08000000, - 0x8f6e836e, 0x87776e77, - 0x866eff7f, 0x70000000, - 0x8f6e816e, 0x87776e77, - 0x866eff7f, 0x04000000, - 0xbf84001e, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf840019, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, + 0x8670ff7f, 0x08000000, + 0x8f708370, 0x87777077, + 0x8670ff7f, 0x70000000, + 0x8f708170, 0x87777077, + 0xbefb007c, 0xbefa0080, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0xbef60084, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbef80080, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bba, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bfa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xb8f1f803, 0xbefe007c, + 0xbefc007a, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xb8fbf801, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0x8670ff7f, 0x04000000, + 0xbeef0080, 0x876f6f70, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f11605, + 0x80718171, 0x8e718471, + 0x8e768271, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747a74, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a717c, + 0xbf85ffe7, 0xbef40172, + 0xbefa0080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f14306, + 0x8671c171, 0xbf84002c, + 0xbf8a0000, 0x8670ff6f, + 0x04000000, 0xbf840028, + 0x8e718671, 0x8e718271, + 0xbef60071, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0x807aff7a, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x7a1d0002, 0x68040702, + 0xd0c9006a, 0x0000e302, + 0xbf87fff7, 0xbef70000, + 0xbefa00ff, 0x00000400, 0xbefe00c1, 0xbeff00c1, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x8e76886f, + 0xb8f12a05, 0x80718171, + 0x8e718271, 0x8e768871, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0x806fff6f, - 0x00008000, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, + 0xbefc0084, 0xbf0a717c, + 0xbf840015, 0xbf11017c, + 0x8071ff71, 0x00001000, 0x7e000300, 0x7e020301, 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0x807c847c, 0x807aff7a, + 0x00000400, 0xbf0a717c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200dc, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x08000000, 0x8f6e836e, + 0x87776e77, 0x866eff7f, + 0x70000000, 0x8f6e816e, + 0x87776e77, 0x866eff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf840019, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbef80080, 0xbefe00c1, + 0xbeff00c1, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x8e76886f, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x806fff6f, 0x00008000, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe007a, - 0xbeff007b, 0x866f71ff, - 0x000003ff, 0xb96f4803, - 0x866f71ff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc0071cb7, 0x00000040, - 0xc00b1d37, 0x00000048, - 0xc0031e77, 0x00000058, - 0xc0071eb7, 0x0000005c, - 0xbf8cc07f, 0x866fff6d, - 0xf0000000, 0x8f6f9c6f, - 0x8e6f906f, 0xbeee0080, - 0x876e6f6e, 0x866fff6d, - 0x08000000, 0x8f6f9b6f, - 0x8e6f8f6f, 0x876e6f6e, - 0x866fff70, 0x00800000, - 0x8f6f976f, 0xb96ef807, - 0x866dff6d, 0x0000ffff, - 0x86fe7e7e, 0x86ea6a6a, - 0xb970f802, 0xbf8a0000, + 0xbf8cc07f, 0xbefc006f, + 0xbefe007a, 0xbeff007b, + 0x866f71ff, 0x000003ff, + 0xb96f4803, 0x866f71ff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc0071cb7, + 0x00000040, 0xc00b1d37, + 0x00000048, 0xc0031e77, + 0x00000058, 0xc0071eb7, + 0x0000005c, 0xbf8cc07f, + 0x866fff6d, 0xf0000000, + 0x8f6f9c6f, 0x8e6f906f, + 0xbeee0080, 0x876e6f6e, + 0x866fff6d, 0x08000000, + 0x8f6f9b6f, 0x8e6f8f6f, + 0x876e6f6e, 0x866fff70, + 0x00800000, 0x8f6f976f, + 0xb96ef807, 0x866dff6d, + 0x0000ffff, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8370, + 0xb96ee0c2, 0xbf800002, + 0xb9700002, 0xbf8a0000, 0x95806f6c, 0xbf810000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index a2a04bb64096..abe1a5da29fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -251,7 +255,7 @@ if (!EMU_RUN_HACK) s_waitcnt lgkmcnt(0) s_or_b32 ttmp7, ttmp8, ttmp9 s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler L_NO_NEXT_TRAP: @@ -262,7 +266,7 @@ L_NO_NEXT_TRAP: s_addc_u32 ttmp1, ttmp1, 0 L_EXCP_CASE: s_and_b32 ttmp1, ttmp1, 0xFFFF - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) s_rfe_b64 [ttmp0, ttmp1] end // ********* End handling of non-CWSR traps ******************* @@ -1053,7 +1057,7 @@ end s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time @@ -1134,3 +1138,11 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end + +function set_status_without_spi_prio(status, tmp) + // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. + s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp + s_nop 0x2 // avoid S_SETREG => S_SETREG hazard + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +end diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 998be96be736..0bb9c577b3a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -103,6 +103,10 @@ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_STATUS_HALT_MASK = 0x2000 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -317,7 +321,7 @@ L_EXCP_CASE: // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + set_status_without_spi_prio(s_save_status, ttmp2) s_rfe_b64 [ttmp0, ttmp1] end @@ -1120,7 +1124,7 @@ end s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time @@ -1212,3 +1216,11 @@ function ack_sqc_store_workaround s_waitcnt lgkmcnt(0) end end + +function set_status_without_spi_prio(status, tmp) + // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. + s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp + s_nop 0x2 // avoid S_SETREG => S_SETREG hazard + s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +end From e47cb828eb3fca3e8999a0b9aa053dda18552071 Mon Sep 17 00:00:00 2001 From: Wei Lu Date: Wed, 11 Jul 2018 22:32:47 -0400 Subject: [PATCH 05/36] drm/amdkfd: Fix error codes in kfd_get_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return ERR_PTR(-EINVAL) if kfd_get_process fails to find the process. This fixes kernel oopses when a child process calls KFD ioctls with a file descriptor inherited from the parent process. Signed-off-by: Wei Lu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1d80b4f7c681..4694386cc623 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -244,6 +244,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) return ERR_PTR(-EINVAL); process = find_process(thread); + if (!process) + return ERR_PTR(-EINVAL); return process; } From 101fee63cbb0a3df9e54aaafbfad0ab5821a34e6 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Wed, 11 Jul 2018 22:32:48 -0400 Subject: [PATCH 06/36] drm/amdkfd: send SIGSEGV to process upon KFD_EVENT_TYPE_MEMORY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Moses Reuben Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 5562e94e786a..3d5a8332e8c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -850,6 +850,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ev->memory_exception_data = *ev_data; } + if (type == KFD_EVENT_TYPE_MEMORY) { + dev_warn(kfd_device, + "Sending SIGSEGV to HSA Process with PID %d ", + p->lead_thread->pid); + send_sig(SIGSEGV, p->lead_thread, 0); + } + /* Send SIGTERM no event of type "type" has been found*/ if (send_signal) { if (send_sigterm) { From b97dfa27ef3ad3eddd2cb97a3b6a140d7037827a Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 11 Jul 2018 22:32:49 -0400 Subject: [PATCH 07/36] drm/amdgpu: save vm fault information for amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu save the vm fault related information for KFD usage and keep the copy until KFD read it. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 ++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 33 ++++++++++++++++++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 20 +++++++++++ 8 files changed, 105 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8418a3f4e9d..3dc76d9b4d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea79908dac4c..befc7c48b1cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 19dd665e7307..c68ef85f7753 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fa38a960ce00..8a707d8bbb1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,6 +1621,20 @@ bo_reserve_failed: return ret; } +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *mem) +{ + struct amdgpu_device *adev; + + adev = (struct amdgpu_device *)kgd; + if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + *mem = *adev->gmc.vm_fault_info; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + } + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6cb4948233cb..bb5a47a45790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -105,6 +105,8 @@ struct amdgpu_gmc { /* protects concurrent invalidation */ spinlock_t invalidate_lock; bool translate_further; + struct kfd_vm_fault_info *vm_fault_info; + atomic_t vm_fault_info_updated; const struct amdgpu_gmc_funcs *gmc_funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 10920f0bd85f..36dc367c4b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -28,6 +28,7 @@ #include "cik.h" #include "gmc_v7_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #include "bif/bif_4_1_sh_mask.h" @@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v7_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); @@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 75f3ffb2891e..70fc97b59b4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "gmc_v8_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" @@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v8_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; if (amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", @@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 5733fbee07f7..28b11d105288 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -47,6 +47,17 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_vm_fault_info { + uint64_t page_addr; + uint32_t vmid; + uint32_t mc_id; + uint32_t status; + bool prot_valid; + bool prot_read; + bool prot_write; + bool prot_exec; +}; + struct kfd_cu_info { uint32_t num_shader_engines; uint32_t num_shader_arrays_per_engine; @@ -259,6 +270,12 @@ struct tile_config { * IB to the corresponding ring (ring type). The IB is executed with the * specified VMID in a user mode context. * + * @get_vm_fault_info: Return information about a recent VM fault on + * GFXv7 and v8. If multiple VM faults occurred since the last call of + * this function, it will return information about the first of those + * faults. On GFXv9 VM fault information is fully contained in the IH + * packet and this function is not needed. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -374,6 +391,9 @@ struct kfd2kgd_calls { int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); + + int (*get_vm_fault_info)(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); }; /** From 2640c3facbd6e21e63c95f19588cc24913a263cd Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 11 Jul 2018 22:32:50 -0400 Subject: [PATCH 08/36] drm/amdkfd: Handle VM faults in KFD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Pre-GFX9 the amdgpu ISR saves the vm-fault status and address per per-vmid. amdkfd needs to get the information from amdgpu through the new get_vm_fault_info interface. On GFX9 and later, all the required information is in the IH ring 2. amdkfd unmaps all queues from the faulting process and create new run-list without the guilty process 3. amdkfd notifies the runtime of the vm fault trap via EVENT_TYPE_MEMORY Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdkfd/cik_event_interrupt.c | 25 +++++++++++-- drivers/gpu/drm/amd/amdkfd/cik_int.h | 2 + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 17 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 37 +++++++++++++++++++ .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 18 ++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++ include/uapi/linux/kfd_ioctl.h | 2 +- 7 files changed, 98 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 49df6c791cfc..cc33870e7edb 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -48,18 +48,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || + ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; } static void cik_event_interrupt_wq(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; uint32_t context_id = ihre->data & 0xfffffff; - - pasid = (ihre->ring_id & 0xffff0000) >> 16; + unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8; + unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16; if (pasid == 0) return; @@ -72,6 +73,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); + else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { + struct kfd_vm_fault_info info; + + kfd_process_vm_fault(dev->dqm, pasid); + + memset(&info, 0, sizeof(info)); + dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info); + if (!info.page_addr && !info.status) + return; + + if (info.vmid == vmid) + kfd_signal_vm_fault_event(dev, pasid, &info); + else + kfd_signal_vm_fault_event(dev, pasid, NULL); + } } const struct kfd_event_interrupt_class event_interrupt_class_cik = { diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index 109298b9d507..a2079a04a673 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -37,6 +37,8 @@ struct cik_ih_ring_entry { #define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF +#define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92 +#define CIK_INTSRC_GFX_MEM_PROT_FAULT 0x93 #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f2f81d26db0c..44fc2038770e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1684,6 +1684,23 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_process_vm_fault(struct device_queue_manager *dqm, + unsigned int pasid) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + int ret = 0; + + if (!p) + return -EINVAL; + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + kfd_unref_process(p); + + return ret; +} + #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 3d5a8332e8c0..b58a0e665ebc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -963,3 +963,40 @@ void kfd_signal_hw_exception_event(unsigned int pasid) mutex_unlock(&p->event_mutex); kfd_unref_process(p); } + +void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info) +{ + struct kfd_event *ev; + uint32_t id; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct kfd_hsa_memory_exception_data memory_exception_data; + + if (!p) + return; /* Presumably process exited. */ + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + memory_exception_data.gpu_id = dev->id; + memory_exception_data.failure.imprecise = 1; + /* Set failure reason */ + if (info) { + memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; + memory_exception_data.failure.NotPresent = + info->prot_valid ? 1 : 0; + memory_exception_data.failure.NoExecute = + info->prot_exec ? 1 : 0; + memory_exception_data.failure.ReadOnly = + info->prot_write ? 1 : 0; + memory_exception_data.failure.imprecise = 0; + } + mutex_lock(&p->event_mutex); + + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) + if (ev->type == KFD_EVENT_TYPE_MEMORY) { + ev->memory_exception_data = memory_exception_data; + set_event(ev); + } + + mutex_unlock(&p->event_mutex); + kfd_unref_process(p); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 37029baa3346..d6b64e692760 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -57,7 +57,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, return source_id == SOC15_INTSRC_CP_END_OF_PIPE || source_id == SOC15_INTSRC_SDMA_TRAP || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || - source_id == SOC15_INTSRC_CP_BAD_OPCODE; + source_id == SOC15_INTSRC_CP_BAD_OPCODE || + client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2; } static void event_interrupt_wq_v9(struct kfd_dev *dev, @@ -82,7 +84,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, kfd_signal_hw_exception_event(pasid); else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_UTCL2) { - /* TODO */ + struct kfd_vm_fault_info info = {0}; + uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + + info.vmid = vmid; + info.mc_id = client_id; + info.page_addr = ih_ring_entry[4] | + (uint64_t)(ih_ring_entry[5] & 0xf) << 32; + info.prot_valid = ring_id & 0x08; + info.prot_read = ring_id & 0x10; + info.prot_write = ring_id & 0x20; + + kfd_process_vm_fault(dev->dqm, pasid); + kfd_signal_vm_fault_event(dev, pasid, &info); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5e3990bb4c4b..91a3368421b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -838,6 +838,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); +int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); /* Process Queue Manager */ struct process_queue_node { @@ -964,6 +965,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info); + void kfd_flush_tlb(struct kfd_process_device *pdd); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b4f5073dbac2..46a54ab1e728 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -219,7 +219,7 @@ struct kfd_memory_exception_failure { __u32 NotPresent; /* Page not present or supervisor privilege */ __u32 ReadOnly; /* Write access to a read-only page */ __u32 NoExecute; /* Execute access to a page marked NX */ - __u32 pad; + __u32 imprecise; /* Can't determine the exact fault address */ }; /* memory exception data*/ From 58e698861255129a00765b69c0499bc0d044feb4 Mon Sep 17 00:00:00 2001 From: Lan Xiao Date: Wed, 11 Jul 2018 22:32:51 -0400 Subject: [PATCH 09/36] drm/amdkfd: fix zero reading of VMID and PASID for Hawaii MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upon VM Fault, the VMID and PASID written by HW are zeros in Hawaii. Instead of reading from ih_ring_entry, read directly from the registers. This workaround fix the soft hang issues caused by mishandled VM Fault in Hawaii. Signed-off-by: Lan Xiao Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 20 ++++++++++++- .../gpu/drm/amd/amdkfd/cik_event_interrupt.c | 29 ++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 14 +++++++-- .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 ++- drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 6 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++++-- .../gpu/drm/amd/include/kgd_kfd_interface.h | 5 ++++ 7 files changed, 77 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index befc7c48b1cf..b4a05c510c75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint32_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) RREG32(mmVM_INVALIDATE_RESPONSE); return 0; } + + /** + * read_vmid_from_vmfault_reg - read vmid from register + * + * adev: amdgpu_device pointer + * @vmid: vmid pointer + * read vmid from register (CIK). + */ +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); + + return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); +} diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index cc33870e7edb..5d2475d5392c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -25,12 +25,39 @@ #include "cik_int.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, - const uint32_t *ih_ring_entry) + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) { const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; unsigned int vmid, pasid; + /* This workaround is due to HW/FW limitation on Hawaii that + * VMID and PASID are not written into ih_ring_entry + */ + if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && + dev->device_info->asic_family == CHIP_HAWAII) { + struct cik_ih_ring_entry *tmp_ihre = + (struct cik_ih_ring_entry *)patched_ihre; + + *patched_flag = true; + *tmp_ihre = *ihre; + + vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); + pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); + + tmp_ihre->ring_id &= 0x000000ff; + tmp_ihre->ring_id |= vmid << 8; + tmp_ihre->ring_id |= pasid << 16; + + return (pasid != 0) && + vmid >= dev->vm_info.first_vmid_kfd && + vmid <= dev->vm_info.last_vmid_kfd; + } + /* Only handle interrupts from KFD VMIDs */ vmid = (ihre->ring_id & 0x0000ff00) >> 8; if (vmid < dev->vm_info.first_vmid_kfd || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 48c505e83217..600751175760 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -577,14 +577,24 @@ dqm_start_error: /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { + uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; + bool is_patched = false; + if (!kfd->init_complete) return; + if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { + dev_err_once(kfd_device, "Ring entry too small\n"); + return; + } + spin_lock(&kfd->interrupt_lock); if (kfd->interrupts_active - && interrupt_is_wanted(kfd, ih_ring_entry) - && enqueue_ih_ring_entry(kfd, ih_ring_entry)) + && interrupt_is_wanted(kfd, ih_ring_entry, + patched_ihre, &is_patched) + && enqueue_ih_ring_entry(kfd, + is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d6b64e692760..f836897bbf58 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -26,7 +26,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, - const uint32_t *ih_ring_entry) + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) { uint16_t source_id, client_id, pasid, vmid; const uint32_t *data = ih_ring_entry; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index db6d9336b80d..c56ac47cd318 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work) ih_ring_entry); } -bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) +bool interrupt_is_wanted(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, bool *flag) { /* integer and bitwise OR so there is no boolean short-circuiting */ unsigned int wanted = 0; wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, - ih_ring_entry); + ih_ring_entry, patched_ihre, flag); return wanted != 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 91a3368421b1..cd5121d925e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -180,9 +180,10 @@ enum cache_policy { struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, - const uint32_t *ih_ring_entry); + const uint32_t *ih_ring_entry, uint32_t *patched_ihre, + bool *patched_flag); void (*interrupt_wq)(struct kfd_dev *dev, - const uint32_t *ih_ring_entry); + const uint32_t *ih_ring_entry); }; struct kfd_device_info { @@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev); void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); -bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); +bool interrupt_is_wanted(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, bool *flag); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 28b11d105288..76a30cbeee19 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -276,6 +276,10 @@ struct tile_config { * faults. On GFXv9 VM fault information is fully contained in the IH * packet and this function is not needed. * + * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the + * IH ring entry. This function allows the KFD ISR to get the VMID + * from the fault status register as early as possible. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -394,6 +398,7 @@ struct kfd2kgd_calls { int (*get_vm_fault_info)(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); + uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); }; /** From 6d15ca0af0e070e67863c46b54d14f3664d925b4 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:52 -0400 Subject: [PATCH 10/36] drm/amd: Add gpu reset interfaces between amdgpu and amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 76a30cbeee19..7df5e4ab839f 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -280,6 +280,8 @@ struct tile_config { * IH ring entry. This function allows the KFD ISR to get the VMID * from the fault status register as early as possible. * + * @gpu_recover: let kgd reset gpu after kfd detect CPC hang + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -399,6 +401,8 @@ struct kfd2kgd_calls { int (*get_vm_fault_info)(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + + void (*gpu_recover)(struct kgd_dev *kgd); }; /** @@ -424,6 +428,10 @@ struct kfd2kgd_calls { * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * + * @pre_reset: Notifies amdkfd that amdgpu about to reset the gpu + * + * @post_reset: Notify amdkfd that amgpu successfully reseted the gpu + * * This structure contains function callback pointers so the kgd driver * will notify to the amdkfd about certain status changes. * @@ -442,6 +450,8 @@ struct kgd2kfd_calls { int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); + int (*pre_reset)(struct kfd_dev *kfd); + int (*post_reset)(struct kfd_dev *kfd); }; int kgd2kfd_init(unsigned interface_version, From 0c119abad7f0d7987c3ce4ea76b30bde76d0436e Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:53 -0400 Subject: [PATCH 11/36] drm/amd: Add kfd ioctl defines for hw_exception event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 46a54ab1e728..88d17c39dbf9 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -189,6 +189,15 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_SIGNAL_EVENT_LIMIT 4096 +/* For kfd_event_data.hw_exception_data.reset_type. */ +#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 +#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 + +/* For kfd_event_data.hw_exception_data.reset_cause. */ +#define KFD_HW_EXCEPTION_GPU_HANG 0 +#define KFD_HW_EXCEPTION_ECC 1 + + struct kfd_ioctl_create_event_args { __u64 event_page_offset; /* from KFD */ __u32 event_trigger_data; /* from KFD - signal events only */ @@ -230,10 +239,19 @@ struct kfd_hsa_memory_exception_data { __u32 pad; }; -/* Event data*/ +/* hw exception data */ +struct kfd_hsa_hw_exception_data { + uint32_t reset_type; + uint32_t reset_cause; + uint32_t memory_lost; + uint32_t gpu_id; +}; + +/* Event data */ struct kfd_event_data { union { struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; }; /* From KFD */ __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ From e3b7a967743c2e10457442ffafcb715a41255f28 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:54 -0400 Subject: [PATCH 12/36] drm/amdkfd: Add gpu reset interface and place holder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 600751175760..a8226d8b86cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -514,6 +514,16 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfree(kfd); } +int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + return 0; +} + +int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + return 0; +} + void kgd2kfd_suspend(struct kfd_dev *kfd) { if (!kfd->init_complete) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 76bf2dc8aec4..ee7bf07db472 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, + .pre_reset = kgd2kfd_pre_reset, + .post_reset = kgd2kfd_post_reset, }; int sched_policy = KFD_SCHED_POLICY_HWS; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cd5121d925e0..4bc8d5af419d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -815,6 +815,10 @@ bool interrupt_is_wanted(struct kfd_dev *dev, void kgd2kfd_suspend(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd); +/* GPU reset */ +int kgd2kfd_pre_reset(struct kfd_dev *kfd); +int kgd2kfd_post_reset(struct kfd_dev *kfd); + /* amdkfd Apertures */ int kfd_init_apertures(struct kfd_process *process); From 5c6dd71e597f33d517710affe3b8d8de253bc86d Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:55 -0400 Subject: [PATCH 13/36] drm/amdgpu: Call KFD reset handlers during GPU reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ 3 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e3ed08dca7b7..f3a3aba08c2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -243,6 +243,26 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) return r; } +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd) + r = kgd2kfd->pre_reset(adev->kfd); + + return r; +} + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd) + r = kgd2kfd->post_reset(adev->kfd); + + return r; +} + int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 3dc76d9b4d12..7dc551b8f682 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -126,6 +126,10 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ec53d8f96d06..b895584bb99f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; + /* Block kfd */ + amdgpu_amdkfd_pre_reset(adev); + /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); @@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); } + /*unlock kfd */ + amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); From e42051d2133b7912db99bd3a307c9219a88fe7c2 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:56 -0400 Subject: [PATCH 14/36] drm/amdkfd: Implement GPU reset handlers in KFD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lock KFD and evict existing queues on reset. Notify user mode by signaling hw_exception events. Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 43 ++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 27 +++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +++ 5 files changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7e717716b90e..b5338bff8cef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -122,6 +122,9 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); + if (kfd_is_locked()) + return -EAGAIN; + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", process->pasid, process->is_32bit_user_mode); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8226d8b86cc..9f63ac366284 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -30,7 +30,13 @@ #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 -static atomic_t kfd_device_suspended = ATOMIC_INIT(0); + +/* + * kfd_locked is used to lock the kfd driver during suspend or reset + * once locked, kfd driver will stop any further GPU execution. + * create process (open) will return -EAGAIN. + */ +static atomic_t kfd_locked = ATOMIC_INIT(0); #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { @@ -516,21 +522,52 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) int kgd2kfd_pre_reset(struct kfd_dev *kfd) { + if (!kfd->init_complete) + return 0; + kgd2kfd_suspend(kfd); + + /* hold dqm->lock to prevent further execution*/ + dqm_lock(kfd->dqm); + + kfd_signal_reset_event(kfd); return 0; } +/* + * Fix me. KFD won't be able to resume existing process for now. + * We will keep all existing process in a evicted state and + * wait the process to be terminated. + */ + int kgd2kfd_post_reset(struct kfd_dev *kfd) { + int ret, count; + + if (!kfd->init_complete) + return 0; + + dqm_unlock(kfd->dqm); + + ret = kfd_resume(kfd); + if (ret) + return ret; + count = atomic_dec_return(&kfd_locked); + WARN_ONCE(count != 0, "KFD reset ref. error"); return 0; } +bool kfd_is_locked(void) +{ + return (atomic_read(&kfd_locked) > 0); +} + void kgd2kfd_suspend(struct kfd_dev *kfd) { if (!kfd->init_complete) return; /* For first KFD device suspend all the KFD processes */ - if (atomic_inc_return(&kfd_device_suspended) == 1) + if (atomic_inc_return(&kfd_locked) == 1) kfd_suspend_all_processes(); kfd->dqm->ops.stop(kfd->dqm); @@ -549,7 +586,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) if (ret) return ret; - count = atomic_dec_return(&kfd_device_suspended); + count = atomic_dec_return(&kfd_locked); WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); if (count == 0) ret = kfd_resume_all_processes(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index b58a0e665ebc..820133cdef83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1000,3 +1000,30 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); kfd_unref_process(p); } + +void kfd_signal_reset_event(struct kfd_dev *dev) +{ + struct kfd_hsa_hw_exception_data hw_exception_data; + struct kfd_process *p; + struct kfd_event *ev; + unsigned int temp; + uint32_t id, idx; + + /* Whole gpu reset caused by GPU hang and memory is lost */ + memset(&hw_exception_data, 0, sizeof(hw_exception_data)); + hw_exception_data.gpu_id = dev->id; + hw_exception_data.memory_lost = 1; + + idx = srcu_read_lock(&kfd_processes_srcu); + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->event_mutex); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; + idr_for_each_entry_continue(&p->event_idr, ev, id) + if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + ev->hw_exception_data = hw_exception_data; + set_event(ev); + } + mutex_unlock(&p->event_mutex); + } + srcu_read_unlock(&kfd_processes_srcu, idx); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index abca5bfebbff..c7ac6c73af86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -66,6 +66,7 @@ struct kfd_event { /* type specific data */ union { struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; }; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4bc8d5af419d..2e03d6c80aa0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -975,10 +975,14 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, struct kfd_vm_fault_info *info); +void kfd_signal_reset_event(struct kfd_dev *dev); + void kfd_flush_tlb(struct kfd_process_device *pdd); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); +bool kfd_is_locked(void); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) From 24da5a9ca6c225ecdc33ea9c7c1b3aa0debed203 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:57 -0400 Subject: [PATCH 15/36] drm/amdgpu: Enable the gpu reset from KFD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook up the gpu_recover callback from KFD to amdgpu to enable handling of GPU hangs detected by KFD. Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 + 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f3a3aba08c2f..454c22c722c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -263,6 +263,13 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) return r; } +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + amdgpu_device_gpu_recover(adev, NULL, false); +} + int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 7dc551b8f682..60207ea48bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -130,6 +130,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b4a05c510c75..70b275a711c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -218,7 +218,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, - .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg + .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .gpu_recover = amdgpu_amdkfd_gpu_reset }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index c68ef85f7753..6053b1d143ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -176,7 +176,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, + .gpu_recover = amdgpu_amdkfd_gpu_reset }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1db60aa5b7f0..56d29cfd8227 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -213,6 +213,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .gpu_recover = amdgpu_amdkfd_gpu_reset }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) From 73ea648d921ed5c58895c8592321456fe12b697f Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:58 -0400 Subject: [PATCH 16/36] drm/amdkfd: Implement hang detection in KFD and call amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reset will be performed in a new hw_exception work thread to handle HWS hang without blocking the thread that detected the hang. Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 21 ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 4 ++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 44fc2038770e..6b59eab39fbe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -61,6 +61,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); +static void kfd_process_hw_exception(struct work_struct *work); + static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { @@ -1010,6 +1012,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_runlist = false; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); + return 0; } @@ -1042,6 +1046,8 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); dqm_lock(dqm); + /* clear hang status when driver try to start the hw scheduler */ + dqm->is_hws_hang = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1255,6 +1261,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, { int retval = 0; + if (dqm->is_hws_hang) + return -EIO; if (!dqm->active_runlist) return retval; @@ -1293,9 +1301,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, { int retval; + if (dqm->is_hws_hang) + return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + schedule_work(&dqm->hw_exception_work); return retval; } @@ -1543,7 +1555,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } retval = execute_queues_cpsch(dqm, filter, 0); - if (retval || qpd->reset_wavefronts) { + if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); qpd->reset_wavefronts = false; @@ -1701,6 +1713,13 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, return ret; } +static void kfd_process_hw_exception(struct work_struct *work) +{ + struct device_queue_manager *dqm = container_of(work, + struct device_queue_manager, hw_exception_work); + dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); +} + #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 0a23ddac345e..70179a6826f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -193,6 +193,10 @@ struct device_queue_manager { struct kfd_mem_obj *fence_mem; bool active_runlist; int sched_policy; + + /* hw exception */ + bool is_hws_hang; + struct work_struct hw_exception_work; }; void device_queue_manager_init_cik( From b5d21aac33179e55807d0fb0c0e1f694c1dce2c3 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:32:59 -0400 Subject: [PATCH 17/36] drm/amdgpu: Don't use shadow BO for compute context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute contexts cannot keep going after a GPU reset. Currently the process must terminate. In the future a process may be able recreate its context from scratch. Either way, there is no need to restore the GPUVM page table from shadow BOs. Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5d7d7900ccab..9eedc9810004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, eaddr = eaddr & ((1 << shift) - 1); flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + if (vm->root.base.bo->shadow) + flags |= AMDGPU_GEM_CREATE_SHADOW; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else - flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW); + flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { @@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else + else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); @@ -2662,8 +2663,7 @@ error_free_sched_entity: * - pasid (old PASID is released, because compute manages its own PASIDs) * * Reinitializes the page directory to reflect the changed ATS - * setting. May leave behind an unused shadow BO for the page - * directory when switching from SDMA updates to CPU updates. + * setting. * * Returns: * 0 for success, -errno for errors. @@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } + /* Free the shadow bo for compute VM */ + amdgpu_bo_unref(&vm->root.base.bo->shadow); + error: amdgpu_bo_unreserve(vm->root.base.bo); return r; From 67ccea60591a0515c885af9bf8dfb932a905457f Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:33:00 -0400 Subject: [PATCH 18/36] drm/amdgpu: Check NULL pointer for job before reset job's ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit job could be NULL when amdgpu_device_gpu_recover is called Signed-off-by: Shaoyun Liu Reviewed-by: Andrey Grodzovsky Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b895584bb99f..13acef526c5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3325,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (job && job->base.sched == &ring->sched) continue; - drm_sched_hw_job_reset(&ring->sched, &job->base); + drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); From 951df6d9cfd07f205f1905bf3b27d994612e0614 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:33:01 -0400 Subject: [PATCH 19/36] drm/amdkfd: Fix kernel queue 64 bit doorbell offset calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bitmap index calculation should reverse the logic used on allocation so it will clear the same bit used on allocation Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index c3744d89352c..ebe79bf00145 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, *doorbell_off = kfd->doorbell_id_offset + inx; pr_debug("Get kernel queue doorbell\n" - " doorbell offset == 0x%08X\n" - " kernel address == %p\n", - *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); + " doorbell offset == 0x%08X\n" + " doorbell index == 0x%x\n", + *doorbell_off, inx); return kfd->doorbell_kernel_ptr + inx; } @@ -199,7 +199,8 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) { unsigned int inx; - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) + * sizeof(u32) / kfd->device_info->doorbell_size; mutex_lock(&kfd->doorbell_mutex); __clear_bit(inx, kfd->doorbell_available_index); From bff418a2ee5df84bd75ac43659af7289ec1bab1c Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:33:02 -0400 Subject: [PATCH 20/36] drm/amdgpu: Avoid invalidate tlbs when gpu is on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 70b275a711c2..2551a4547393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -886,6 +886,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; unsigned int tmp; + if (adev->in_gpu_reset) + return -EIO; + for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6053b1d143ae..55749ce4f166 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -846,6 +846,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; unsigned int tmp; + if (adev->in_gpu_reset) + return -EIO; + for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 56d29cfd8227..847f6c1e3624 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -867,6 +867,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int vmid; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + if (adev->in_gpu_reset) + return -EIO; + if (ring->ready) return invalidate_tlbs_with_kiq(adev, pasid); From 1b0bfcff463f390c4032ebe36a4d5fb777c00a4c Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:33:03 -0400 Subject: [PATCH 21/36] drm/amdgpu: Avoid destroy hqd when GPU is on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 2551a4547393..98eac00e5d32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -575,6 +575,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, unsigned long flags, end_jiffies; int retry; + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 55749ce4f166..ad563b1ee547 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -570,6 +570,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, int retry; struct vi_mqd *m = get_mqd(mqd); + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 847f6c1e3624..3b07d37b6fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -680,6 +680,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); + if (adev->in_gpu_reset) + return -EIO; + acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) From a29ec470b19e58044005973301f233e0b20ed8c4 Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Wed, 11 Jul 2018 22:33:04 -0400 Subject: [PATCH 22/36] drm/amdkfd: Add debugfs interface to trigger HWS hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Shaoyun Liu Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 48 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 +++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 12 +++++ .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 ++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++ 5 files changed, 113 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 4bd6ebfaf425..ab37d36d9cd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -21,6 +21,8 @@ */ #include +#include + #include "kfd_priv.h" static struct dentry *debugfs_root; @@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file) return single_open(file, show, NULL); } +static ssize_t kfd_debugfs_hang_hws_write(struct file *file, + const char __user *user_buf, size_t size, loff_t *ppos) +{ + struct kfd_dev *dev; + char tmp[16]; + uint32_t gpu_id; + int ret = -EINVAL; + + memset(tmp, 0, 16); + if (size >= 16) { + pr_err("Invalid input for gpu id.\n"); + goto out; + } + if (copy_from_user(tmp, user_buf, size)) { + ret = -EFAULT; + goto out; + } + if (kstrtoint(tmp, 10, &gpu_id)) { + pr_err("Invalid input for gpu id.\n"); + goto out; + } + dev = kfd_device_by_id(gpu_id); + if (dev) { + kfd_debugfs_hang_hws(dev); + ret = size; + } else + pr_err("Cannot find device %d.\n", gpu_id); + +out: + return ret; +} + static const struct file_operations kfd_debugfs_fops = { .owner = THIS_MODULE, .open = kfd_debugfs_open, @@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = { .release = single_release, }; +static const struct file_operations kfd_debugfs_hang_hws_fops = { + .owner = THIS_MODULE, + .open = kfd_debugfs_open, + .read = seq_read, + .write = kfd_debugfs_hang_hws_write, + .llseek = seq_lseek, + .release = single_release, +}; + void kfd_debugfs_init(void) { struct dentry *ent; @@ -65,6 +108,11 @@ void kfd_debugfs_init(void) ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, &kfd_debugfs_fops); + + ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root, + NULL, + &kfd_debugfs_hang_hws_fops); + if (!ent) pr_warn("Failed to create rls in kfd debugfs\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9f63ac366284..8faa8db3eba5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -914,3 +914,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) kfree(mem_obj); return 0; } + +#if defined(CONFIG_DEBUG_FS) + +/* This function will send a package to HIQ to hang the HWS + * which will trigger a GPU reset and bring the HWS back to normal state + */ +int kfd_debugfs_hang_hws(struct kfd_dev *dev) +{ + int r = 0; + + if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { + pr_err("HWS is not enabled"); + return -EINVAL; + } + + r = pm_debugfs_hang_hws(&dev->dqm->packets); + if (!r) + r = dqm_debugfs_execute_queues(dev->dqm); + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6b59eab39fbe..32e93b53e7e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1801,4 +1801,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return r; } +int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) +{ + int r = 0; + + dqm_lock(dqm); + dqm->active_runlist = true; + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + dqm_unlock(dqm); + + return r; +} + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index c317feb43f69..1092631765cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -418,4 +418,30 @@ out: return 0; } +int pm_debugfs_hang_hws(struct packet_manager *pm) +{ + uint32_t *buffer, size; + int r = 0; + + size = pm->pmf->query_status_size; + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + r = -ENOMEM; + goto out; + } + memset(buffer, 0x55, size); + pm->priv_queue->ops.submit_packet(pm->priv_queue); + + pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", + buffer[0], buffer[1], buffer[2], buffer[3], + buffer[4], buffer[5], buffer[6]); +out: + mutex_unlock(&pm->lock); + return r; +} + + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2e03d6c80aa0..d9bf70b52857 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -995,6 +995,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data); int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); int pm_debugfs_runlist(struct seq_file *m, void *data); +int kfd_debugfs_hang_hws(struct kfd_dev *dev); +int pm_debugfs_hang_hws(struct packet_manager *pm); +int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); + #else static inline void kfd_debugfs_init(void) {} From 0e9a860c72ec387140a0feb4b8d9a6d0004e9316 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 11 Jul 2018 22:33:05 -0400 Subject: [PATCH 23/36] drm/amdkfd: Introduce KFD module parameter halt_if_hws_hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids triggering a GPU reset or otherwise changing the HW state. Instead KFD will hang, which allows HW debugging tools to analyze the problem. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 32e93b53e7e8..5d05d125c3c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1217,6 +1217,13 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, while (*fence_addr != fence_value) { if (time_after(jiffies, end_jiffies)) { pr_err("qcm fence wait loop timeout expired\n"); + /* In HWS case, this is used to halt the driver thread + * in order not to mess up CP states before doing + * scandumps for FW debugging. + */ + while (halt_if_hws_hang) + schedule(); + return -ETIME; } schedule(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index ee7bf07db472..3a8c15ad0c64 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -92,6 +92,10 @@ MODULE_PARM_DESC(noretry, static int amdkfd_init_completed; +int halt_if_hws_hang; +module_param(halt_if_hws_hang, int, 0644); +MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + int kgd2kfd_init(unsigned int interface_version, const struct kgd2kfd_calls **g2f) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d9bf70b52857..8473e7b3dcc2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -144,6 +144,11 @@ extern int ignore_crat; */ extern int vega10_noretry; +/* + * Halt if HWS hang is detected + */ +extern int halt_if_hws_hang; + /** * enum kfd_sched_policy * From 2b281977f5e072171bbd9e562c1dd27f0fe8461f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 11 Jul 2018 22:33:06 -0400 Subject: [PATCH 24/36] drm/amdkfd: Use module parameters noretry as the internal variable name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes all module parameters use the same form. Meanwhile clean up the surrounding code. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 14 ++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 79e5bcf6367c..417515332c35 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (vega10_noretry && + if (noretry && !dqm->dev->device_info->needs_iommu_device) qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 3a8c15ad0c64..6e1f5c7c2d4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -63,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc, int cwsr_enable = 1; module_param(cwsr_enable, int, 0444); -MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))"); int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); @@ -85,17 +85,19 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); -int vega10_noretry; -module_param_named(noretry, vega10_noretry, int, 0644); +int noretry; +module_param(noretry, int, 0644); MODULE_PARM_DESC(noretry, - "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); - -static int amdkfd_init_completed; + "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled (default), 1 = retry disabled)"); int halt_if_hws_hang; module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +static int amdkfd_init_completed; + + int kgd2kfd_init(unsigned int interface_version, const struct kgd2kfd_calls **g2f) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8473e7b3dcc2..0646eda473df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -142,7 +142,7 @@ extern int ignore_crat; /* * Set sh_mem_config.retry_disable on Vega10 */ -extern int vega10_noretry; +extern int noretry; /* * Halt if HWS hang is detected From 8d5f355290880331e265d4c3f3b66c805969f18e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 11 Jul 2018 22:33:07 -0400 Subject: [PATCH 25/36] drm/amdkfd: Replace mqd with mqd_mgr as the variable name for mqd_manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will make reading code much easier. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 105 +++++++++--------- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 17 +-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 2 +- .../amd/amdkfd/kfd_process_queue_manager.c | 8 +- 5 files changed, 68 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5d05d125c3c1..97c9f10ff473 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -348,10 +348,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd) + mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd_mgr) return -ENOMEM; retval = allocate_hqd(dqm, q); @@ -362,7 +362,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_deallocate_hqd; - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; @@ -376,15 +376,15 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (!q->properties.is_active) return 0; - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, - q->process->mm); + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); if (retval) goto out_uninit_mqd; return 0; out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_hqd: @@ -401,11 +401,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) + if (!mqd_mgr) return -ENOMEM; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { @@ -422,14 +422,14 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_doorbell(qpd, q); - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval == -ETIME) qpd->reset_wavefronts = true; - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); list_del(&q->list); if (list_empty(&qpd->queues_list)) { @@ -469,7 +469,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; bool prev_active = false; @@ -479,9 +479,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = -ENODEV; goto out_unlock; } - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out_unlock; } @@ -508,7 +508,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) { - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) { @@ -517,7 +517,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); /* * check active state vs. the previous state and modify @@ -535,7 +535,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || q->properties.type == KFD_QUEUE_TYPE_SDMA)) - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); out_unlock: @@ -546,29 +546,29 @@ out_unlock: static struct mqd_manager *get_mqd_manager( struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) { - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; pr_debug("mqd type %d\n", type); - mqd = dqm->mqds[type]; - if (!mqd) { - mqd = mqd_manager_init(type, dqm->dev); - if (!mqd) + mqd_mgr = dqm->mqd_mgrs[type]; + if (!mqd_mgr) { + mqd_mgr = mqd_manager_init(type, dqm->dev); + if (!mqd_mgr) pr_err("mqd manager is NULL"); - dqm->mqds[type] = mqd; + dqm->mqd_mgrs[type] = mqd_mgr; } - return mqd; + return mqd_mgr; } static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct queue *q; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; int retval = 0; @@ -584,16 +584,16 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_active) continue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { /* should not be here */ + if (!mqd_mgr) { /* should not be here */ pr_err("Cannot evict queue, mqd mgr is NULL\n"); retval = -ENOMEM; goto out; } q->properties.is_evicted = true; q->properties.is_active = false; - retval = mqd->destroy_mqd(mqd, q->mqd, + retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) @@ -643,7 +643,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct queue *q; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; uint32_t pd_base; int retval = 0; @@ -679,16 +679,16 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) continue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { /* should not be here */ + if (!mqd_mgr) { /* should not be here */ pr_err("Cannot restore queue, mqd mgr is NULL\n"); retval = -ENOMEM; goto out; } q->properties.is_evicted = false; q->properties.is_active = true; - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) @@ -868,7 +868,7 @@ static void uninitialize(struct device_queue_manager *dqm) kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) - kfree(dqm->mqds[i]); + kfree(dqm->mqd_mgrs[i]); mutex_destroy(&dqm->lock_hidden); kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } @@ -912,11 +912,11 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; int retval; - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd) + mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); + if (!mqd_mgr) return -ENOMEM; retval = allocate_sdma_queue(dqm, &q->sdma_id); @@ -935,19 +935,20 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; - retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, + NULL); if (retval) goto out_uninit_mqd; return 0; out_uninit_mqd: - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: @@ -1123,7 +1124,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; retval = 0; @@ -1150,10 +1151,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out_deallocate_doorbell; } @@ -1170,7 +1171,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; - retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) goto out_deallocate_doorbell; @@ -1326,7 +1327,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; bool preempt_all_queues; preempt_all_queues = false; @@ -1346,9 +1347,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto failed; } @@ -1370,7 +1371,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); /* * Unconditionally decrement this counter, regardless of the queue's @@ -1520,7 +1521,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, int retval; struct queue *q, *next; struct kernel_queue *kq, *kq_next; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; @@ -1570,15 +1571,15 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* lastly, free mqd resources */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd = dqm->ops.get_mqd_manager(dqm, + mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd) { + if (!mqd_mgr) { retval = -ENOMEM; goto out; } list_del(&q->list); qpd->queue_count--; - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); } out: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 70179a6826f4..52e708c4f9a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -172,7 +172,7 @@ struct device_queue_manager { struct device_queue_manager_ops ops; struct device_queue_manager_asic_ops asic_ops; - struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; + struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; struct packet_manager packets; struct kfd_dev *dev; struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 476951d8c91c..2c8897e9073d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, switch (type) { case KFD_QUEUE_TYPE_DIQ: case KFD_QUEUE_TYPE_HIQ: - kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, + kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, KFD_MQD_TYPE_HIQ); break; default: @@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, return false; } - if (!kq->mqd) + if (!kq->mqd_mgr) return false; prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); @@ -130,7 +130,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->queue->device = dev; kq->queue->process = kfd_get_process(current); - retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, + retval = kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd, &kq->queue->mqd_mem_obj, &kq->queue->gart_mqd_addr, &kq->queue->properties); @@ -142,9 +142,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, pr_debug("Assigning hiq to hqd\n"); kq->queue->pipe = KFD_CIK_HIQ_PIPE; kq->queue->queue = KFD_CIK_HIQ_QUEUE; - kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, - kq->queue->queue, &kq->queue->properties, - NULL); + kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd, + kq->queue->pipe, kq->queue->queue, + &kq->queue->properties, NULL); } else { /* allocate fence for DIQ */ @@ -182,7 +182,7 @@ err_get_kernel_doorbell: static void uninitialize(struct kernel_queue *kq) { if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) - kq->mqd->destroy_mqd(kq->mqd, + kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, kq->queue->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -191,7 +191,8 @@ static void uninitialize(struct kernel_queue *kq) else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); - kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj); + kq->mqd_mgr->uninit_mqd(kq->mqd_mgr, kq->queue->mqd, + kq->queue->mqd_mem_obj); kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 97aff2041a5d..a7116a939029 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -70,7 +70,7 @@ struct kernel_queue { /* data */ struct kfd_dev *dev; - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; struct queue *queue; uint64_t pending_wptr64; uint32_t pending_wptr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d65ce0436b31..f85e5c06010e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -387,7 +387,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) struct process_queue_node *pqn; struct queue *q; enum KFD_MQD_TYPE mqd_type; - struct mqd_manager *mqd_manager; + struct mqd_manager *mqd_mgr; int r = 0; list_for_each_entry(pqn, &pqm->queues, process_queue_list) { @@ -410,11 +410,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q->properties.type, q->device->id); continue; } - mqd_manager = q->device->dqm->ops.get_mqd_manager( + mqd_mgr = q->device->dqm->ops.get_mqd_manager( q->device->dqm, mqd_type); } else if (pqn->kq) { q = pqn->kq->queue; - mqd_manager = pqn->kq->mqd; + mqd_mgr = pqn->kq->mqd_mgr; switch (q->properties.type) { case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", @@ -434,7 +434,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) continue; } - r = mqd_manager->debugfs_show_mqd(m, q->mqd); + r = mqd_mgr->debugfs_show_mqd(m, q->mqd); if (r != 0) break; } From e7016d8e6f1b92888cc6093fd4e09d9f85934eb9 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 11 Jul 2018 22:33:08 -0400 Subject: [PATCH 26/36] drm/amdkfd: Clean up reference of radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/cik_int.h | 5 +-- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 37 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- .../amd/amdkfd/kfd_process_queue_manager.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h index a2079a04a673..76f8677a7926 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_int.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h @@ -20,8 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HSA_RADEON_CIK_INT_H_INCLUDED -#define HSA_RADEON_CIK_INT_H_INCLUDED +#ifndef CIK_INT_H_INCLUDED +#define CIK_INT_H_INCLUDED #include @@ -34,7 +34,6 @@ struct cik_ih_ring_entry { #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 -#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 #define CIK_INTSRC_SDMA_TRAP 0xE0 #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF #define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index afb26f205d29..a3441b0e385b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -38,7 +38,6 @@ #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" #include "kfd_device_queue_manager.h" -#include "../../radeon/cik_reg.h" static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h index 03424c20920c..0619c777b47e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -60,6 +60,9 @@ enum { SH_REG_SIZE = SH_REG_END - SH_REG_BASE }; +/* SQ_CMD definitions */ +#define SQ_CMD 0x8DEC + enum SQ_IND_CMD_CMD { SQ_IND_CMD_CMD_NULL = 0x00000000, SQ_IND_CMD_CMD_HALT = 0x00000001, @@ -190,4 +193,38 @@ union ULARGE_INTEGER { void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, enum DBGDEV_TYPE type); +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +enum { + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits in order to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + #endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0646eda473df..37d179ed51dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -73,7 +73,7 @@ /* * When working with cp scheduler we should assign the HIQ manually or via - * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot + * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot * definitions for Kaveri. In Kaveri only the first ME queues participates * in the cp scheduling taking that in mind we set the HIQ slot in the * second ME. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index f85e5c06010e..1303b1457950 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -209,7 +209,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { - pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } From f3ed5df84c268235fb1d489ab8979af423da02de Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:43 -0400 Subject: [PATCH 27/36] drm/amdkfd: Consolidate duplicate memory banks info in topology If there are several memory banks that has the same properties in CRAT, we aggregate them into one memory bank. This cleans up memory banks on APUs (e.g. Raven) where the CRAT reports each memory channel as a separate bank. This only confuses user mode, which only deals with virtual memory. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 57 +++++++++++++++++++++------ 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 296b3f230280..ee4996029a86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -189,6 +189,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, return 0; } +static struct kfd_mem_properties * +find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, + struct kfd_topology_device *dev) +{ + struct kfd_mem_properties *props; + + list_for_each_entry(props, &dev->mem_props, list) { + if (props->heap_type == heap_type + && props->flags == flags + && props->width == width) + return props; + } + + return NULL; +} /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct * topology device present in the device_list */ @@ -197,36 +212,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, { struct kfd_mem_properties *props; struct kfd_topology_device *dev; + uint32_t heap_type; + uint64_t size_in_bytes; + uint32_t flags = 0; + uint32_t width; pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", mem->proximity_domain); list_for_each_entry(dev, device_list, list) { if (mem->proximity_domain == dev->proximity_domain) { - props = kfd_alloc_struct(props); - if (!props) - return -ENOMEM; - /* We're on GPU node */ if (dev->node_props.cpu_cores_count == 0) { /* APU */ if (mem->visibility_type == 0) - props->heap_type = + heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; /* dGPU */ else - props->heap_type = mem->visibility_type; + heap_type = mem->visibility_type; } else - props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) - props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; + flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) - props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; + flags |= HSA_MEM_FLAGS_NON_VOLATILE; - props->size_in_bytes = + size_in_bytes = ((uint64_t)mem->length_high << 32) + mem->length_low; - props->width = mem->width; + width = mem->width; + + /* Multiple banks of the same type are aggregated into + * one. User mode doesn't care about multiple physical + * memory segments. It's managed as a single virtual + * heap for user mode. + */ + props = find_subtype_mem(heap_type, flags, width, dev); + if (props) { + props->size_in_bytes += size_in_bytes; + break; + } + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->heap_type = heap_type; + props->flags = flags; + props->size_in_bytes = size_in_bytes; + props->width = width; dev->node_props.mem_banks_count++; list_add_tail(&props->list, &dev->mem_props); From 98bb92222eef6ac022f352aa4224f4c94a119199 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:44 -0400 Subject: [PATCH 28/36] drm/amdkfd: Make SDMA engine number an ASIC-dependent variable On Raven there is only one SDMA engine instead of previously assumed two, so we need to adapt our code to this new scenario. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 ++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 29 +++++++++++++------ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 6 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + .../amd/amdkfd/kfd_process_queue_manager.c | 3 +- 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8faa8db3eba5..572235cdd061 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -52,6 +52,7 @@ static const struct kfd_device_info kaveri_device_info = { .supports_cwsr = false, .needs_iommu_device = true, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info carrizo_device_info = { @@ -67,6 +68,7 @@ static const struct kfd_device_info carrizo_device_info = { .supports_cwsr = true, .needs_iommu_device = true, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; #endif @@ -83,6 +85,7 @@ static const struct kfd_device_info hawaii_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info tonga_device_info = { @@ -97,6 +100,7 @@ static const struct kfd_device_info tonga_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info tonga_vf_device_info = { @@ -111,6 +115,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .supports_cwsr = false, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info fiji_device_info = { @@ -125,6 +130,7 @@ static const struct kfd_device_info fiji_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info fiji_vf_device_info = { @@ -139,6 +145,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; @@ -154,6 +161,7 @@ static const struct kfd_device_info polaris10_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info polaris10_vf_device_info = { @@ -168,6 +176,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info polaris11_device_info = { @@ -182,6 +191,7 @@ static const struct kfd_device_info polaris11_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = true, + .num_sdma_engines = 2, }; static const struct kfd_device_info vega10_device_info = { @@ -196,6 +206,7 @@ static const struct kfd_device_info vega10_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; static const struct kfd_device_info vega10_vf_device_info = { @@ -210,6 +221,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .supports_cwsr = true, .needs_iommu_device = false, .needs_pci_atomics = false, + .num_sdma_engines = 2, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 97c9f10ff473..ace94d6e54cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -101,6 +101,17 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) return dqm->dev->shared_resources.num_pipe_per_mec; } +static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_sdma_engines; +} + +unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_sdma_engines + * KFD_SDMA_QUEUES_PER_ENGINE; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -855,7 +866,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; return 0; } @@ -903,7 +914,7 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id) { - if (sdma_queue_id >= CIK_SDMA_QUEUES) + if (sdma_queue_id >= get_num_sdma_queues(dqm)) return; dqm->sdma_bitmap |= (1 << sdma_queue_id); } @@ -923,8 +934,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; - q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); + q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); retval = allocate_doorbell(qpd, q); if (retval) @@ -1011,7 +1022,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; + dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1142,9 +1153,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_unlock; q->properties.sdma_queue_id = - q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = - q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->sdma_id % get_num_sdma_engines(dqm); } retval = allocate_doorbell(qpd, q); @@ -1791,8 +1802,8 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) } } - for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { - for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { + for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { + for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); if (r) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 52e708c4f9a5..00da3169a004 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -33,10 +33,7 @@ #define KFD_UNMAP_LATENCY_MS (4000) #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) - -#define CIK_SDMA_QUEUES (4) -#define CIK_SDMA_QUEUES_PER_ENGINE (2) -#define CIK_SDMA_ENGINE_NUM (2) +#define KFD_SDMA_QUEUES_PER_ENGINE (2) struct device_process_node { struct qcm_process_device *qpd; @@ -214,6 +211,7 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); +unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 37d179ed51dc..ca83254719fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -203,6 +203,7 @@ struct kfd_device_info { bool supports_cwsr; bool needs_iommu_device; bool needs_pci_atomics; + unsigned int num_sdma_engines; }; struct kfd_mem_obj { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 1303b1457950..eb4e5fb4f2f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -186,8 +186,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= - CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) { + if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { pr_err("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; From eab69801cf4388aeba2c730ce4db746ae164eada Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:45 -0400 Subject: [PATCH 29/36] drm/amdkfd: Avoid flooding dmesg on Raven due to IOMMU issues On Raven Invalid PPRs (peripheral page requests) can be reported because multiple PPRs can be still queued when memory is freed. Apply a rate limit to avoid flooding the log in this case. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index c71817963eea..7a61f38c09e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -190,7 +190,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, { struct kfd_dev *dev; - dev_warn(kfd_device, + dev_warn_ratelimited(kfd_device, "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", PCI_BUS_NUM(pdev->devfn), PCI_SLOT(pdev->devfn), From 8725aecac331954e0827d6bed7be02eb7b8f1e9e Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:46 -0400 Subject: [PATCH 30/36] drm/amdkfd: Workaround to accommodate Raven too many PPR issue On Raven multiple PPRs can be queued up by the hardware. When the first of those requests is handled by the IOMMU driver, the memory access succeeds. After that the application may be done with the memory and unmap it. At that point the page table entries are invalidated, but there are still outstanding duplicate PPRs for those addresses. When the IOMMU driver processes those duplicate requests, it finds invalid page table entries and triggers an invalid PPR fault. As a workaround, don't signal invalid PPR faults on Raven to avoid segfaulting applications that haven't done anything wrong. As a side effect, real GPU memory access faults may go unnoticed by the application. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 820133cdef83..4dcacce2db86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -932,13 +932,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, up_read(&mm->mmap_sem); mmput(mm); - mutex_lock(&p->event_mutex); + pr_debug("notpresent %d, noexecute %d, readonly %d\n", + memory_exception_data.failure.NotPresent, + memory_exception_data.failure.NoExecute, + memory_exception_data.failure.ReadOnly); - /* Lookup events by type and signal them */ - lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, - &memory_exception_data); + /* Workaround on Raven to not kill the process when memory is freed + * before IOMMU is able to finish processing all the excessive PPRs + */ + if (dev->device_info->asic_family != CHIP_RAVEN) { + mutex_lock(&p->event_mutex); + + /* Lookup events by type and signal them */ + lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, + &memory_exception_data); + + mutex_unlock(&p->event_mutex); + } - mutex_unlock(&p->event_mutex); kfd_unref_process(p); } #endif /* KFD_SUPPORT_IOMMU_V2 */ From 359cecdd499783abcf4ba6589066db8d8cb58e88 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:47 -0400 Subject: [PATCH 31/36] drm/amdkfd: Optimize out some duplicated code in kfd_signal_iommu_event() memory_exception_data is already initialized for not-present faults. It only needs to be overridden for permission faults. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 26 +++++++++++-------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4dcacce2db86..e9f0e0a1b41c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -911,22 +911,18 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, memory_exception_data.failure.NotPresent = 1; memory_exception_data.failure.NoExecute = 0; memory_exception_data.failure.ReadOnly = 0; - if (vma) { - if (vma->vm_start > address) { - memory_exception_data.failure.NotPresent = 1; - memory_exception_data.failure.NoExecute = 0; + if (vma && address >= vma->vm_start) { + memory_exception_data.failure.NotPresent = 0; + + if (is_write_requested && !(vma->vm_flags & VM_WRITE)) + memory_exception_data.failure.ReadOnly = 1; + else memory_exception_data.failure.ReadOnly = 0; - } else { - memory_exception_data.failure.NotPresent = 0; - if (is_write_requested && !(vma->vm_flags & VM_WRITE)) - memory_exception_data.failure.ReadOnly = 1; - else - memory_exception_data.failure.ReadOnly = 0; - if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) - memory_exception_data.failure.NoExecute = 1; - else - memory_exception_data.failure.NoExecute = 0; - } + + if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) + memory_exception_data.failure.NoExecute = 1; + else + memory_exception_data.failure.NoExecute = 0; } up_read(&mm->mmap_sem); From 4d663df6588709e8763b976310117aa0f9825bc6 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 13 Jul 2018 16:17:48 -0400 Subject: [PATCH 32/36] drm/amdkfd: Enable Raven for KFD Add DID and kfd_device_info for Raven. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 572235cdd061..1b048715ab8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -70,6 +70,21 @@ static const struct kfd_device_info carrizo_device_info = { .needs_pci_atomics = false, .num_sdma_engines = 2, }; + +static const struct kfd_device_info raven_device_info = { + .asic_family = CHIP_RAVEN, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = true, + .needs_pci_atomics = true, + .num_sdma_engines = 1, +}; #endif static const struct kfd_device_info hawaii_device_info = { @@ -259,6 +274,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ + { 0x15DD, &raven_device_info }, /* Raven */ #endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ From a7fe68a1e8e4bce007505f729bc33e427c540386 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sat, 14 Jul 2018 19:05:58 -0400 Subject: [PATCH 33/36] drm/amd: Add CU-masking ioctl definition to kfd_ioctl.h Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 88d17c39dbf9..01674b56e14f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args { __u32 queue_priority; /* to KFD */ }; +struct kfd_ioctl_set_cu_mask_args { + __u32 queue_id; /* to KFD */ + __u32 num_cu_mask; /* to KFD */ + __u64 cu_mask_ptr; /* to KFD */ +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -466,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) +#define AMDKFD_IOC_SET_CU_MASK \ + AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1A +#define AMDKFD_COMMAND_END 0x1B #endif From 39e7f331864d2b9e30d5f3fd2121e182b2c9c8a9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sat, 14 Jul 2018 19:05:59 -0400 Subject: [PATCH 34/36] drm/amdkfd: Add CU-masking ioctl to KFD CU-masking allows a KFD client to control the set of CUs used by a user mode queue for executing compute dispatches. This can be used for optimizing the partitioning of the GPU and minimize conflicts between concurrent tasks. Signed-off-by: Flora Cui Signed-off-by: Kent Russell Signed-off-by: Eric Huang Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 58 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 41 ++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 4 ++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 27 +++++++++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 27 +++++++++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 27 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++ .../amd/amdkfd/kfd_process_queue_manager.c | 30 ++++++++++ 9 files changed, 219 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b5338bff8cef..297b36c26a05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -392,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, return retval; } +static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, + void *data) +{ + int retval; + const int max_num_cus = 1024; + struct kfd_ioctl_set_cu_mask_args *args = data; + struct queue_properties properties; + uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; + size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); + + if ((args->num_cu_mask % 32) != 0) { + pr_debug("num_cu_mask 0x%x must be a multiple of 32", + args->num_cu_mask); + return -EINVAL; + } + + properties.cu_mask_count = args->num_cu_mask; + if (properties.cu_mask_count == 0) { + pr_debug("CU mask cannot be 0"); + return -EINVAL; + } + + /* To prevent an unreasonably large CU mask size, set an arbitrary + * limit of max_num_cus bits. We can then just drop any CU mask bits + * past max_num_cus bits and just use the first max_num_cus bits. + */ + if (properties.cu_mask_count > max_num_cus) { + pr_debug("CU mask cannot be greater than 1024 bits"); + properties.cu_mask_count = max_num_cus; + cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); + } + + properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); + if (!properties.cu_mask) + return -ENOMEM; + + retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); + if (retval) { + pr_debug("Could not copy CU mask from userspace"); + kfree(properties.cu_mask); + return -EFAULT; + } + + mutex_lock(&p->mutex); + + retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); + + mutex_unlock(&p->mutex); + + if (retval) + kfree(properties.cu_mask); + + return retval; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1557,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, kfd_ioctl_unmap_memory_from_gpu, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, + kfd_ioctl_set_cu_mask, 0), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 2c8897e9073d..9f84b4d9fb88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; prop.eop_ring_buffer_address = kq->eop_gpu_addr; prop.eop_ring_buffer_size = PAGE_SIZE; + prop.cu_mask = NULL; if (init_queue(&kq->queue, &prop) != 0) goto err_init_queue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 4b8eb506642b..3bc25ab84f34 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -21,7 +21,7 @@ * */ -#include "kfd_priv.h" +#include "kfd_mqd_manager.h" struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) @@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, return NULL; } + +void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, + const uint32_t *cu_mask, uint32_t cu_mask_count, + uint32_t *se_mask) +{ + struct kfd_cu_info cu_info; + uint32_t cu_per_sh[4] = {0}; + int i, se, cu = 0; + + mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); + + if (cu_mask_count > cu_info.cu_active_number) + cu_mask_count = cu_info.cu_active_number; + + for (se = 0; se < cu_info.num_shader_engines; se++) + for (i = 0; i < 4; i++) + cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]); + + /* Symmetrically map cu_mask to all SEs: + * cu_mask[0] bit0 -> se_mask[0] bit0; + * cu_mask[0] bit1 -> se_mask[1] bit0; + * ... (if # SE is 4) + * cu_mask[0] bit4 -> se_mask[0] bit1; + * ... + */ + se = 0; + for (i = 0; i < cu_mask_count; i++) { + if (cu_mask[i / 32] & (1 << (i % 32))) + se_mask[se] |= 1 << cu; + + do { + se++; + if (se == cu_info.num_shader_engines) { + se = 0; + cu++; + } + } while (cu >= cu_per_sh[se] && cu < 32); + } +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 8972bcfbf701..4e84052d4e21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -93,4 +93,8 @@ struct mqd_manager { struct kfd_dev *dev; }; +void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, + const uint32_t *cu_mask, uint32_t cu_mask_count, + uint32_t *se_mask); + #endif /* KFD_MQD_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4872574f7a04..47243165a082 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) return (struct cik_sdma_rlc_registers *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("Update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index ad5c9f80cccd..f5fc3675f21e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -41,6 +41,31 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -198,6 +223,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 89e4242e43e7..b81fda3754da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -43,6 +43,31 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) return (struct vi_sdma_mqd *)mqd; } +static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct vi_mqd *m; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + + if (q->cu_mask_count == 0) + return; + + mqd_symmetrically_map_cu_mask(mm, + q->cu_mask, q->cu_mask_count, se_mask); + + m = get_mqd(mqd); + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + + pr_debug("Update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, + m->compute_static_thread_mgmt_se2, + m->compute_static_thread_mgmt_se3); +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + update_cu_mask(mm, mqd, q); + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0 && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ca83254719fc..f971710f1c91 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -422,6 +422,9 @@ struct queue_properties { uint32_t ctl_stack_size; uint64_t tba_addr; uint64_t tma_addr; + /* Relevant for CU */ + uint32_t cu_mask_count; /* Must be a multiple of 32 */ + uint32_t *cu_mask; }; /** @@ -872,6 +875,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index eb4e5fb4f2f2..c8cad9c078ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -325,6 +325,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + kfree(pqn->q->properties.cu_mask); + pqn->q->properties.cu_mask = NULL; uninit_queue(pqn->q); } @@ -365,6 +367,34 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, return 0; } +int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p) +{ + int retval; + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_debug("No queue %d exists for update operation\n", qid); + return -EFAULT; + } + + /* Free the old CU mask memory if it is already allocated, then + * allocate memory for the new CU mask. + */ + kfree(pqn->q->properties.cu_mask); + + pqn->q->properties.cu_mask_count = p->cu_mask_count; + pqn->q->properties.cu_mask = p->cu_mask; + + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); + if (retval != 0) + return retval; + + return 0; +} + struct kernel_queue *pqm_get_kernel_queue( struct process_queue_manager *pqm, unsigned int qid) From 01c097dbfc03ac1357b9c8a1f52fd43d636bc7f7 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 16 Jul 2018 19:10:36 -0400 Subject: [PATCH 35/36] drm/amdgpu: Add kfd2kgd.set_compute_idle interface This allows automatic switching to the compute power profile depending on compute activity. Signed-off-by: Felix Kuehling Reviewed-by: Eric Huang Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 ++- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 +++++ 6 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 454c22c722c2..f8bbbb3a9504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -488,6 +488,14 @@ err: return ret; } +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, !idle); +} + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { if (adev->kfd) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 60207ea48bf5..2f379c183ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 98eac00e5d32..ea3f698aef5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -219,7 +219,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .submit_ib = amdgpu_amdkfd_submit_ib, .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, - .gpu_recover = amdgpu_amdkfd_gpu_reset + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index ad563b1ee547..f6e53e9352bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -177,7 +177,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, - .gpu_recover = amdgpu_amdkfd_gpu_reset + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 3b07d37b6fd4..8efedfcb9dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -213,7 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, - .gpu_recover = amdgpu_amdkfd_gpu_reset + .gpu_recover = amdgpu_amdkfd_gpu_reset, + .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 7df5e4ab839f..14391b06080c 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -282,6 +282,9 @@ struct tile_config { * * @gpu_recover: let kgd reset gpu after kfd detect CPC hang * + * @set_compute_idle: Indicates that compute is idle on a device. This + * can be used to change power profiles depending on compute activity. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -403,6 +406,8 @@ struct kfd2kgd_calls { uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); void (*gpu_recover)(struct kgd_dev *kgd); + + void (*set_compute_idle)(struct kgd_dev *kgd, bool idle); }; /** From b5aa3f4aef724e9c0f626dcf69948b22efcc5176 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 16 Jul 2018 19:10:37 -0400 Subject: [PATCH 36/36] drm/amdkfd: Call kfd2kgd.set_compute_idle User mode queue submissions don't go through KFD. Therefore we don't know exactly when compute is idle or not idle. We use the existence of user mode queues on a device as an approximation. register_process is called when the first queue of a process is created. Conversely unregister_process is called when the last queue is destroyed. The first process that is registered takes compute out of idle. The last process that is unregisters sets compute back to idle. Signed-off-by: Felix Kuehling Reviewed-by: Eric Huang Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ace94d6e54cf..ec0d62a16e53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -782,7 +782,8 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); - dqm->processes_count++; + if (dqm->processes_count++ == 0) + dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); dqm_unlock(dqm); @@ -805,7 +806,9 @@ static int unregister_process(struct device_queue_manager *dqm, if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - dqm->processes_count--; + if (--dqm->processes_count == 0) + dqm->dev->kfd2kgd->set_compute_idle( + dqm->dev->kgd, true); goto out; } }