diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 5d83fec6dfdd..76310726ee9c 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -72,6 +72,7 @@ #if IS_ENABLED(CONFIG_HMM) +struct hmm; /* * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page @@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) } +#if IS_ENABLED(CONFIG_HMM_MIRROR) +/* + * Mirroring: how to synchronize device page table with CPU page table. + * + * A device driver that is participating in HMM mirroring must always + * synchronize with CPU page table updates. For this, device drivers can either + * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device + * drivers can decide to register one mirror per device per process, or just + * one mirror per process for a group of devices. The pattern is: + * + * int device_bind_address_space(..., struct mm_struct *mm, ...) + * { + * struct device_address_space *das; + * + * // Device driver specific initialization, and allocation of das + * // which contains an hmm_mirror struct as one of its fields. + * ... + * + * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); + * if (ret) { + * // Cleanup on error + * return ret; + * } + * + * // Other device driver specific initialization + * ... + * } + * + * Once an hmm_mirror is registered for an address space, the device driver + * will get callbacks through sync_cpu_device_pagetables() operation (see + * hmm_mirror_ops struct). + * + * Device driver must not free the struct containing the hmm_mirror struct + * before calling hmm_mirror_unregister(). The expected usage is to do that when + * the device driver is unbinding from an address space. + * + * + * void device_unbind_address_space(struct device_address_space *das) + * { + * // Device driver specific cleanup + * ... + * + * hmm_mirror_unregister(&das->mirror); + * + * // Other device driver specific cleanup, and now das can be freed + * ... + * } + */ + +struct hmm_mirror; + +/* + * enum hmm_update_type - type of update + * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) + */ +enum hmm_update_type { + HMM_UPDATE_INVALIDATE, +}; + +/* + * struct hmm_mirror_ops - HMM mirror device operations callback + * + * @update: callback to update range on a device + */ +struct hmm_mirror_ops { + /* sync_cpu_device_pagetables() - synchronize page tables + * + * @mirror: pointer to struct hmm_mirror + * @update_type: type of update that occurred to the CPU page table + * @start: virtual start address of the range to update + * @end: virtual end address of the range to update + * + * This callback ultimately originates from mmu_notifiers when the CPU + * page table is updated. The device driver must update its page table + * in response to this callback. The update argument tells what action + * to perform. + * + * The device driver must not return from this callback until the device + * page tables are completely updated (TLBs flushed, etc); this is a + * synchronous call. + */ + void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, + enum hmm_update_type update_type, + unsigned long start, + unsigned long end); +}; + +/* + * struct hmm_mirror - mirror struct for a device driver + * + * @hmm: pointer to struct hmm (which is unique per mm_struct) + * @ops: device driver callback for HMM mirror operations + * @list: for list of mirrors of a given mm + * + * Each address space (mm_struct) being mirrored by a device must register one + * instance of an hmm_mirror struct with HMM. HMM will track the list of all + * mirrors for each mm_struct. + */ +struct hmm_mirror { + struct hmm *hmm; + const struct hmm_mirror_ops *ops; + struct list_head list; +}; + +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); +void hmm_mirror_unregister(struct hmm_mirror *mirror); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + /* Below are for HMM internal use only! Not to be used by device driver! */ void hmm_mm_destroy(struct mm_struct *mm); diff --git a/mm/Kconfig b/mm/Kconfig index 037fa26d16a2..254db99f263d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -705,6 +705,18 @@ config ARCH_HAS_HMM config HMM bool +config HMM_MIRROR + bool "HMM mirror CPU page table into a device page table" + depends on ARCH_HAS_HMM + select MMU_NOTIFIER + select HMM + help + Select HMM_MIRROR if you want to mirror range of the CPU page table of a + process into a device page table. Here, mirror means "keep synchronized". + Prerequisites: the device must provide the ability to write-protect its + page tables (at PAGE_SIZE granularity), and must be able to recover from + the resulting potential page faults. + config FRAME_VECTOR bool diff --git a/mm/hmm.c b/mm/hmm.c index de032ff9e576..d37daf9edcd3 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -21,16 +21,27 @@ #include #include #include +#include #ifdef CONFIG_HMM +static const struct mmu_notifier_ops hmm_mmu_notifier_ops; + /* * struct hmm - HMM per mm struct * * @mm: mm struct this HMM struct is bound to + * @sequence: we track updates to the CPU page table with a sequence number + * @mirrors: list of mirrors for this mm + * @mmu_notifier: mmu notifier to track updates to CPU page table + * @mirrors_sem: read/write semaphore protecting the mirrors list */ struct hmm { struct mm_struct *mm; + atomic_t sequence; + struct list_head mirrors; + struct mmu_notifier mmu_notifier; + struct rw_semaphore mirrors_sem; }; /* @@ -43,27 +54,48 @@ struct hmm { */ static struct hmm *hmm_register(struct mm_struct *mm) { - if (!mm->hmm) { - struct hmm *hmm = NULL; - - hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); - if (!hmm) - return NULL; - hmm->mm = mm; - - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - kfree(hmm); - spin_unlock(&mm->page_table_lock); - } + struct hmm *hmm = READ_ONCE(mm->hmm); + bool cleanup = false; /* * The hmm struct can only be freed once the mm_struct goes away, * hence we should always have pre-allocated an new hmm struct * above. */ + if (hmm) + return hmm; + + hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); + if (!hmm) + return NULL; + INIT_LIST_HEAD(&hmm->mirrors); + init_rwsem(&hmm->mirrors_sem); + atomic_set(&hmm->sequence, 0); + hmm->mmu_notifier.ops = NULL; + hmm->mm = mm; + + /* + * We should only get here if hold the mmap_sem in write mode ie on + * registration of first mirror through hmm_mirror_register() + */ + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } + + spin_lock(&mm->page_table_lock); + if (!mm->hmm) + mm->hmm = hmm; + else + cleanup = true; + spin_unlock(&mm->page_table_lock); + + if (cleanup) { + mmu_notifier_unregister(&hmm->mmu_notifier, mm); + kfree(hmm); + } + return mm->hmm; } @@ -72,3 +104,94 @@ void hmm_mm_destroy(struct mm_struct *mm) kfree(mm->hmm); } #endif /* CONFIG_HMM */ + +#if IS_ENABLED(CONFIG_HMM_MIRROR) +static void hmm_invalidate_range(struct hmm *hmm, + enum hmm_update_type action, + unsigned long start, + unsigned long end) +{ + struct hmm_mirror *mirror; + + down_read(&hmm->mirrors_sem); + list_for_each_entry(mirror, &hmm->mirrors, list) + mirror->ops->sync_cpu_device_pagetables(mirror, action, + start, end); + up_read(&hmm->mirrors_sem); +} + +static void hmm_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + atomic_inc(&hmm->sequence); +} + +static void hmm_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end); +} + +static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { + .invalidate_range_start = hmm_invalidate_range_start, + .invalidate_range_end = hmm_invalidate_range_end, +}; + +/* + * hmm_mirror_register() - register a mirror against an mm + * + * @mirror: new mirror struct to register + * @mm: mm to register against + * + * To start mirroring a process address space, the device driver must register + * an HMM mirror struct. + * + * THE mm->mmap_sem MUST BE HELD IN WRITE MODE ! + */ +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) +{ + /* Sanity check */ + if (!mm || !mirror || !mirror->ops) + return -EINVAL; + + mirror->hmm = hmm_register(mm); + if (!mirror->hmm) + return -ENOMEM; + + down_write(&mirror->hmm->mirrors_sem); + list_add(&mirror->list, &mirror->hmm->mirrors); + up_write(&mirror->hmm->mirrors_sem); + + return 0; +} +EXPORT_SYMBOL(hmm_mirror_register); + +/* + * hmm_mirror_unregister() - unregister a mirror + * + * @mirror: new mirror struct to register + * + * Stop mirroring a process address space, and cleanup. + */ +void hmm_mirror_unregister(struct hmm_mirror *mirror) +{ + struct hmm *hmm = mirror->hmm; + + down_write(&hmm->mirrors_sem); + list_del(&mirror->list); + up_write(&hmm->mirrors_sem); +} +EXPORT_SYMBOL(hmm_mirror_unregister); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */