$NetBSD: patch-XSA237,v 1.1 2017/10/17 10:57:34 bouyer Exp $

From: Jan Beulich <jbeulich@suse.com>
Subject: x86: don't allow MSI pIRQ mapping on unowned device

MSI setup should be permitted only for existing devices owned by the
respective guest (the operation may still be carried out by the domain
controlling that guest).

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- xen/arch/x86/irq.c.orig
+++ xen/arch/x86/irq.c
@@ -1961,7 +1961,10 @@ int map_domain_pirq(
         if ( !cpu_has_apic )
             goto done;
 
-        pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
+        pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
+        if ( !pdev )
+            goto done;
+
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
         {
From: Jan Beulich <jbeulich@suse.com>
Subject: x86: enforce proper privilege when (un)mapping pIRQ-s

(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE*
actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET.
This in turn requires bypassing the XSM check in physdev_unmap_pirq()
for the HVM emuirq case just like is being done in physdev_map_pirq().
The primary goal security wise, however, is to no longer allow HVM
guests, by specifying their own domain ID instead of DOMID_SELF, to
enter code paths intended for PV guest and the control domains of HVM
guests only.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- xen/arch/x86/physdev.c.orig
+++ xen/arch/x86/physdev.c
@@ -110,7 +110,7 @@ int physdev_map_pirq(domid_t domid, int
     if ( d == NULL )
         return -ESRCH;
 
-    ret = xsm_map_domain_pirq(XSM_TARGET, d);
+    ret = xsm_map_domain_pirq(XSM_DM_PRIV, d);
     if ( ret )
         goto free_domain;
 
@@ -255,13 +255,14 @@ int physdev_map_pirq(domid_t domid, int
 int physdev_unmap_pirq(domid_t domid, int pirq)
 {
     struct domain *d;
-    int ret;
+    int ret = 0;
 
     d = rcu_lock_domain_by_any_id(domid);
     if ( d == NULL )
         return -ESRCH;
 
-    ret = xsm_unmap_domain_pirq(XSM_TARGET, d);
+    if ( domid != DOMID_SELF || !is_hvm_domain(d) )
+        ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d);
     if ( ret )
         goto free_domain;
 
--- xen/include/xsm/dummy.h.orig
+++ xen/include/xsm/dummy.h
@@ -446,7 +446,7 @@ static XSM_INLINE char *xsm_show_irq_sid
 
 static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
 {
-    XSM_ASSERT_ACTION(XSM_TARGET);
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
 
@@ -458,7 +458,7 @@ static XSM_INLINE int xsm_map_domain_irq
 
 static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d)
 {
-    XSM_ASSERT_ACTION(XSM_TARGET);
+    XSM_ASSERT_ACTION(XSM_DM_PRIV);
     return xsm_default_action(action, current->domain, d);
 }
 
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/MSI: disallow redundant enabling

At the moment, Xen attempts to allow redundant enabling of MSI by
having pci_enable_msi() return 0, and point to the existing MSI
descriptor, when the msi already exists.

Unfortunately, if subsequent errors are encountered, the cleanup
paths assume pci_enable_msi() had done full initialization, and
hence undo everything that was assumed to be done by that
function without also undoing other setup that would normally
occur only after that function was called (in map_domain_pirq()
itself).

Rather than try to make the redundant enabling case work properly, just
forbid it entirely by having pci_enable_msi() return -EEXIST when MSI
is already set up.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- xen/arch/x86/msi.c.orig
+++ xen/arch/x86/msi.c
@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i
     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
     if ( old_desc )
     {
-        printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
+        printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
                msi->irq, msi->seg, msi->bus,
                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
-        *desc = old_desc;
-        return 0;
+        return -EEXIST;
     }
 
     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_
     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
     if ( old_desc )
     {
-        printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
+        printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
                msi->irq, msi->seg, msi->bus,
                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
-        *desc = old_desc;
-        return 0;
+        return -EEXIST;
     }
 
     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths

Mappings that had been set up before should not be torn down when
handling unrelated errors.

This is part of XSA-237.

Reported-by: HW42 <hw42@ipsumj.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>

--- xen/arch/x86/irq.c.orig
+++ xen/arch/x86/irq.c
@@ -1249,7 +1249,8 @@ static int prepare_domain_irq_pirq(struc
         return -ENOMEM;
     }
     *pinfo = info;
-    return 0;
+
+    return !!err;
 }
 
 static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
@@ -1292,7 +1293,10 @@ int init_domain_irq_mapping(struct domai
             continue;
         err = prepare_domain_irq_pirq(d, i, i, &info);
         if ( err )
+        {
+            ASSERT(err < 0);
             break;
+        }
         set_domain_irq_pirq(d, i, info);
     }
 
@@ -1900,6 +1904,7 @@ int map_domain_pirq(
     struct pirq *info;
     struct irq_desc *desc;
     unsigned long flags;
+    DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
 
     ASSERT(spin_is_locked(&d->event_lock));
 
@@ -1943,8 +1948,10 @@ int map_domain_pirq(
     }
 
     ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
-    if ( ret )
+    if ( ret < 0 )
         goto revoke;
+    if ( !ret )
+        __set_bit(0, prepared);
 
     desc = irq_to_desc(irq);
 
@@ -2016,8 +2023,10 @@ int map_domain_pirq(
             irq = create_irq(NUMA_NO_NODE);
             ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
                            : irq;
-            if ( ret )
+            if ( ret < 0 )
                 break;
+            if ( !ret )
+                __set_bit(nr, prepared);
             msi_desc[nr].irq = irq;
 
             if ( irq_permit_access(d, irq) != 0 )
@@ -2050,15 +2059,15 @@ int map_domain_pirq(
                 desc->msi_desc = NULL;
                 spin_unlock_irqrestore(&desc->lock, flags);
             }
-            while ( nr-- )
+            while ( nr )
             {
                 if ( irq >= 0 && irq_deny_access(d, irq) )
                     printk(XENLOG_G_ERR
                            "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
                            d->domain_id, irq, pirq);
-                if ( info )
+                if ( info && test_bit(nr, prepared) )
                     cleanup_domain_irq_pirq(d, irq, info);
-                info = pirq_info(d, pirq + nr);
+                info = pirq_info(d, pirq + --nr);
                 irq = info->arch.irq;
             }
             msi_desc->irq = -1;
@@ -2074,12 +2083,14 @@ int map_domain_pirq(
         spin_lock_irqsave(&desc->lock, flags);
         set_domain_irq_pirq(d, irq, info);
         spin_unlock_irqrestore(&desc->lock, flags);
+        ret = 0;
     }
 
 done:
     if ( ret )
     {
-        cleanup_domain_irq_pirq(d, irq, info);
+        if ( test_bit(0, prepared) )
+            cleanup_domain_irq_pirq(d, irq, info);
  revoke:
         if ( irq_deny_access(d, irq) )
             printk(XENLOG_G_ERR
--- xen/arch/x86/physdev.c.orig
+++ xen/arch/x86/physdev.c
@@ -185,7 +185,7 @@ int physdev_map_pirq(domid_t domid, int
         }
         else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
         {
-            if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+            if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS )
                 ret = -EDOM;
             else if ( msi->entry_nr != 1 && !iommu_intremap )
                 ret = -EOPNOTSUPP;
--- xen/include/asm-x86/msi.h.orig
+++ xen/include/asm-x86/msi.h
@@ -55,6 +55,8 @@
 /* MAX fixed pages reserved for mapping MSIX tables. */
 #define FIX_MSIX_MAX_PAGES              512
 
+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
+
 struct msi_info {
     u16 seg;
     u8 bus;
From: Jan Beulich <jbeulich@suse.com>
Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook

The caller and the FLASK implementation of xsm_unmap_domain_irq()
disagreed about what the "data" argument points to in the MSI case:
Change both sides to pass/take a PCI device.

This is part of XSA-237.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- xen/arch/x86/irq.c.orig
+++ xen/arch/x86/irq.c
@@ -2141,7 +2141,8 @@ int unmap_domain_pirq(struct domain *d,
         nr = msi_desc->msi.nvec;
     }
 
-    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
+    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
+                               msi_desc ? msi_desc->dev : NULL);
     if ( ret )
         goto done;
 
--- xen/xsm/flask/hooks.c.orig
+++ xen/xsm/flask/hooks.c
@@ -897,8 +897,8 @@ static int flask_unmap_domain_msi (struc
                                    u32 *sid, struct avc_audit_data *ad)
 {
 #ifdef HAS_PCI
-    struct msi_info *msi = data;
-    u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn;
+    const struct pci_dev *pdev = data;
+    u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn;
 
     AVC_AUDIT_DATA_INIT(ad, DEV);
     ad->device = machine_bdf;
