Two fixes for the interrupt subsystem:

- Make the handling of the firmware node consistent and do not free the
    node after the domain has been created successfully. The core code
    stores a pointer to it which can lead to a use after free or double
    free.
 
    This used to "work" because the pointer was not stored when the initial
    code was written, but at some point later it was required to store
    it. Of course nobody noticed that the existing users break that way.
 
  - Handle affinity setting on inactive interrupts correctly when
    hierarchical irq domains are enabled. When interrupts are inactive with
    the modern hierarchical irqdomain design, the interrupt chips are not
    necessarily in a state where affinity changes can be handled. The legacy
    irq chip design allowed this because interrupts are immediately fully
    initialized at allocation time. X86 has a hacky workaround for this, but
    other implementations do not. This cased malfunction on GIC-V3. Instead
    of playing whack a mole to find all affected drivers, change the core
    code to store the requested affinity setting and then establish it when
    the interrupt is allocated, which makes the X86 hack go away.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl8UP+4THHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoSuZD/9tNPR4fIDt4mC9ciSvwSqGTV+q1y1D
 zhXSDro4cJNjzy/9D475IJqOlvchaF9Nfun55b60Q6vnA4VN8G+kABEaG8uwr8mV
 ijTB4f0qKfW/9kUDTJRScq3nNmC3miqg8ZFgFEn6Ecxj3NHmwidATIi5sF6f/XVG
 DdhL0Jys7ycNeGBf7yIKbT5/NOULMHYy9rK1NDAeBo9u3klvmrwrHgdNsiDDhEaU
 KlHtwuQLCdjFY3Lf67YpSah+Hx/gXPI1VHUxDDFRoFmC4RlB0VjyXGydjsisOrSQ
 Cl2gnkQ6VOlLaLbN38nmia9nyb6npzE5iK1h9EDcaRhBACG9O23Bdo+YZYxl6BOP
 mXuyIVKJYczJEp7j1fGHW/aNCoEqC8dGVyN7toxMVfGZmF12JzMSt4SYItPeSjFC
 bPNPRCscpiMOQdgwgO0woK1764V46g1BlmxXtJRdWB4iwWgXcryaz65xzSfNeZF4
 0+TvdYs2FYjxwwIyWj8xJ3Npe1lKhH+06DA6gziwJt1u4it8rl82UcqMFyf/ty1w
 o5LHyMBWYm7SJXSeaZZj+nv7moJKJnmRYKnpry21cUzsK/vQEPX0vqhwh4dSFN3O
 BaBocDsOk+9wkmUwi6haP+6+vpadAFQrsqVhURtwc6OVSWn2/vsf2ZH5P36xwFWD
 tlFanb8hX9y2NQ==
 =elM3
 -----END PGP SIGNATURE-----

Merge tag 'irq-urgent-2020-07-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into master

Pull irq fixes from Thomas Gleixner:
 "Two fixes for the interrupt subsystem:

   - Make the handling of the firmware node consistent and do not free
     the node after the domain has been created successfully. The core
     code stores a pointer to it which can lead to a use after free or
     double free.

     This used to "work" because the pointer was not stored when the
     initial code was written, but at some point later it was required
     to store it. Of course nobody noticed that the existing users break
     that way.

   - Handle affinity setting on inactive interrupts correctly when
     hierarchical irq domains are enabled.

     When interrupts are inactive with the modern hierarchical irqdomain
     design, the interrupt chips are not necessarily in a state where
     affinity changes can be handled. The legacy irq chip design allowed
     this because interrupts are immediately fully initialized at
     allocation time. X86 has a hacky workaround for this, but other
     implementations do not.

     This cased malfunction on GIC-V3. Instead of playing whack a mole
     to find all affected drivers, change the core code to store the
     requested affinity setting and then establish it when the interrupt
     is allocated, which makes the X86 hack go away"

* tag 'irq-urgent-2020-07-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  genirq/affinity: Handle affinity setting on inactive interrupts correctly
  irqdomain/treewide: Keep firmware node unconditionally allocated
This commit is contained in:
Linus Torvalds 2020-07-19 11:53:08 -07:00
commit 9413cd7792
11 changed files with 76 additions and 42 deletions

View File

@ -627,9 +627,10 @@ static int bridge_probe(struct platform_device *pdev)
return -ENOMEM;
domain = irq_domain_create_hierarchy(parent, 0, 8, fn,
&bridge_domain_ops, NULL);
irq_domain_free_fwnode(fn);
if (!domain)
if (!domain) {
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
pci_set_flags(PCI_PROBE_ONLY);

View File

@ -2316,12 +2316,12 @@ static int mp_irqdomain_create(int ioapic)
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
(void *)(long)ioapic);
/* Release fw handle if it was allocated above */
if (!cfg->dev)
irq_domain_free_fwnode(fn);
if (!ip->irqdomain)
if (!ip->irqdomain) {
/* Release fw handle if it was allocated above */
if (!cfg->dev)
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
ip->irqdomain->parent = parent;

View File

@ -263,12 +263,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
msi_default_domain =
pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
parent);
irq_domain_free_fwnode(fn);
}
if (!msi_default_domain)
if (!msi_default_domain) {
irq_domain_free_fwnode(fn);
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
else
} else {
msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
}
}
#ifdef CONFIG_IRQ_REMAP
@ -301,7 +302,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
if (!fn)
return NULL;
d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
irq_domain_free_fwnode(fn);
if (!d)
irq_domain_free_fwnode(fn);
return d;
}
#endif
@ -364,7 +366,8 @@ static struct irq_domain *dmar_get_irq_domain(void)
if (fn) {
dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
x86_vector_domain);
irq_domain_free_fwnode(fn);
if (!dmar_domain)
irq_domain_free_fwnode(fn);
}
out:
mutex_unlock(&dmar_lock);
@ -489,7 +492,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
}
d = msi_create_irq_domain(fn, domain_info, parent);
irq_domain_free_fwnode(fn);
if (!d) {
irq_domain_free_fwnode(fn);
kfree(domain_info);
}
return d;
}

View File

@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
trace_vector_activate(irqd->irq, apicd->is_managed,
apicd->can_reserve, reserve);
/* Nothing to do for fixed assigned vectors */
if (!apicd->can_reserve && !apicd->is_managed)
return 0;
raw_spin_lock_irqsave(&vector_lock, flags);
if (reserve || irqd_is_managed_and_shutdown(irqd))
if (!apicd->can_reserve && !apicd->is_managed)
assign_irq_vector_any_locked(irqd);
else if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@ -709,7 +707,6 @@ int __init arch_early_irq_init(void)
x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
NULL);
BUG_ON(x86_vector_domain == NULL);
irq_domain_free_fwnode(fn);
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
@ -775,20 +772,10 @@ void lapic_offline(void)
static int apic_set_affinity(struct irq_data *irqd,
const struct cpumask *dest, bool force)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
int err;
/*
* Core code can call here for inactive interrupts. For inactive
* interrupts which use managed or reservation mode there is no
* point in going through the vector assignment right now as the
* activation will assign a vector which fits the destination
* cpumask. Let the core code store the destination mask and be
* done with it.
*/
if (!irqd_is_activated(irqd) &&
(apicd->is_managed || apicd->can_reserve))
return IRQ_SET_MASK_OK;
if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
return -EIO;
raw_spin_lock(&vector_lock);
cpumask_and(vector_searchmask, dest, cpu_online_mask);

View File

@ -167,9 +167,10 @@ static struct irq_domain *uv_get_irq_domain(void)
goto out;
uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL);
irq_domain_free_fwnode(fn);
if (uv_domain)
uv_domain->parent = x86_vector_domain;
else
irq_domain_free_fwnode(fn);
out:
mutex_unlock(&uv_lock);

View File

@ -3985,9 +3985,10 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
if (!fn)
return -ENOMEM;
iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu);
irq_domain_free_fwnode(fn);
if (!iommu->ir_domain)
if (!iommu->ir_domain) {
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
iommu->ir_domain->parent = arch_get_ir_parent_domain();
iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain,

View File

@ -155,7 +155,10 @@ static int __init hyperv_prepare_irq_remapping(void)
0, IOAPIC_REMAPPING_ENTRY, fn,
&hyperv_ir_domain_ops, NULL);
irq_domain_free_fwnode(fn);
if (!ioapic_ir_domain) {
irq_domain_free_fwnode(fn);
return -ENOMEM;
}
/*
* Hyper-V doesn't provide irq remapping function for

View File

@ -563,8 +563,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
0, INTR_REMAP_TABLE_ENTRIES,
fn, &intel_ir_domain_ops,
iommu);
irq_domain_free_fwnode(fn);
if (!iommu->ir_domain) {
irq_domain_free_fwnode(fn);
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
goto out_free_bitmap;
}

View File

@ -142,10 +142,11 @@ static int ioc3_irq_domain_setup(struct ioc3_priv_data *ipd, int irq)
goto err;
domain = irq_domain_create_linear(fn, 24, &ioc3_irq_domain_ops, ipd);
if (!domain)
if (!domain) {
irq_domain_free_fwnode(fn);
goto err;
}
irq_domain_free_fwnode(fn);
ipd->domain = domain;
irq_set_chained_handler_and_data(irq, ioc3_irq_handler, domain);

View File

@ -546,9 +546,10 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info,
x86_vector_domain);
irq_domain_free_fwnode(fn);
if (!vmd->irq_domain)
if (!vmd->irq_domain) {
irq_domain_free_fwnode(fn);
return -ENODEV;
}
pci_add_resource(&resources, &vmd->resources[0]);
pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);

View File

@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_desc *desc)
set_bit(IRQTF_AFFINITY, &action->thread_flags);
}
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static void irq_validate_effective_affinity(struct irq_data *data)
{
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
@ -205,9 +205,19 @@ static void irq_validate_effective_affinity(struct irq_data *data)
return;
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
#endif
}
static inline void irq_init_effective_affinity(struct irq_data *data,
const struct cpumask *mask)
{
cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
}
#else
static inline void irq_validate_effective_affinity(struct irq_data *data) { }
static inline void irq_init_effective_affinity(struct irq_data *data,
const struct cpumask *mask) { }
#endif
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct irq_data *data,
return ret;
}
static bool irq_set_affinity_deactivated(struct irq_data *data,
const struct cpumask *mask, bool force)
{
struct irq_desc *desc = irq_data_to_desc(data);
/*
* If the interrupt is not yet activated, just store the affinity
* mask and do not call the chip driver at all. On activation the
* driver has to make sure anyway that the interrupt is in a
* useable state so startup works.
*/
if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
return false;
cpumask_copy(desc->irq_common_data.affinity, mask);
irq_init_effective_affinity(data, mask);
irqd_set(data, IRQD_AFFINITY_SET);
return true;
}
int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (!chip || !chip->irq_set_affinity)
return -EINVAL;
if (irq_set_affinity_deactivated(data, mask, force))
return 0;
if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
ret = irq_try_set_affinity(data, mask, force);
} else {