$NetBSD: patch-XSA328,v 1.1 2020/07/16 09:57:17 bouyer Exp $

From: Jan Beulich <jbeulich@suse.com>
Subject: x86/EPT: ept_set_middle_entry() related adjustments

ept_split_super_page() wants to further modify the newly allocated
table, so have ept_set_middle_entry() return the mapped pointer rather
than tearing it down and then getting re-established right again.

Similarly ept_next_level() wants to hand back a mapped pointer of
the next level page, so re-use the one established by
ept_set_middle_entry() in case that path was taken.

Pull the setting of suppress_ve ahead of insertion into the higher level
table, and don't have ept_split_super_page() set the field a 2nd time.

This is part of XSA-328.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- xen/arch/x86/mm/p2m-ept.c.orig
+++ xen/arch/x86/mm/p2m-ept.c
@@ -228,8 +228,9 @@ static void ept_p2m_type_to_flags(struct
 #define GUEST_TABLE_SUPER_PAGE  2
 #define GUEST_TABLE_POD_PAGE    3
 
-/* Fill in middle levels of ept table */
-static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
+/* Fill in middle level of ept table; return pointer to mapped new table. */
+static ept_entry_t *ept_set_middle_entry(struct p2m_domain *p2m,
+                                         ept_entry_t *ept_entry)
 {
     mfn_t mfn;
     ept_entry_t *table;
@@ -237,7 +238,12 @@ static int ept_set_middle_entry(struct p
 
     mfn = p2m_alloc_ptp(p2m, 0);
     if ( mfn_eq(mfn, INVALID_MFN) )
-        return 0;
+        return NULL;
+
+    table = map_domain_page(mfn);
+
+    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+        table[i].suppress_ve = 1;
 
     ept_entry->epte = 0;
     ept_entry->mfn = mfn_x(mfn);
@@ -249,14 +255,7 @@ static int ept_set_middle_entry(struct p
 
     ept_entry->suppress_ve = 1;
 
-    table = map_domain_page(mfn);
-
-    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
-        table[i].suppress_ve = 1;
-
-    unmap_domain_page(table);
-
-    return 1;
+    return table;
 }
 
 /* free ept sub tree behind an entry */
@@ -294,10 +293,10 @@ static bool_t ept_split_super_page(struc
 
     ASSERT(is_epte_superpage(ept_entry));
 
-    if ( !ept_set_middle_entry(p2m, &new_ept) )
+    table = ept_set_middle_entry(p2m, &new_ept);
+    if ( !table )
         return 0;
 
-    table = map_domain_page(_mfn(new_ept.mfn));
     trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
 
     for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
@@ -308,7 +307,6 @@ static bool_t ept_split_super_page(struc
         epte->sp = (level > 1);
         epte->mfn += i * trunk;
         epte->snp = (iommu_enabled && iommu_snoop);
-        epte->suppress_ve = 1;
 
         ept_p2m_type_to_flags(p2m, epte, epte->sa_p2mt, epte->access);
 
@@ -347,8 +345,7 @@ static int ept_next_level(struct p2m_dom
                           ept_entry_t **table, unsigned long *gfn_remainder,
                           int next_level)
 {
-    unsigned long mfn;
-    ept_entry_t *ept_entry, e;
+    ept_entry_t *ept_entry, *next = NULL, e;
     u32 shift, index;
 
     shift = next_level * EPT_TABLE_ORDER;
@@ -373,19 +370,17 @@ static int ept_next_level(struct p2m_dom
         if ( read_only )
             return GUEST_TABLE_MAP_FAILED;
 
-        if ( !ept_set_middle_entry(p2m, ept_entry) )
+        next = ept_set_middle_entry(p2m, ept_entry);
+        if ( !next )
             return GUEST_TABLE_MAP_FAILED;
-        else
-            e = atomic_read_ept_entry(ept_entry); /* Refresh */
+        /* e is now stale and hence may not be used anymore below. */
     }
-
     /* The only time sp would be set here is if we had hit a superpage */
-    if ( is_epte_superpage(&e) )
+    else if ( is_epte_superpage(&e) )
         return GUEST_TABLE_SUPER_PAGE;
 
-    mfn = e.mfn;
     unmap_domain_page(*table);
-    *table = map_domain_page(_mfn(mfn));
+    *table = next ?: map_domain_page(_mfn(e.mfn));
     *gfn_remainder &= (1UL << shift) - 1;
     return GUEST_TABLE_NORMAL_PAGE;
 }
From: <security@xenproject.org>
Subject: x86/ept: atomically modify entries in ept_next_level

ept_next_level was passing a live PTE pointer to ept_set_middle_entry,
which was then modified without taking into account that the PTE could
be part of a live EPT table. This wasn't a security issue because the
pages returned by p2m_alloc_ptp are zeroed, so adding such an entry
before actually initializing it didn't allow a guest to access
physical memory addresses it wasn't supposed to access.

This is part of XSA-328.

Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- xen/arch/x86/mm/p2m-ept.c.orig
+++ xen/arch/x86/mm/p2m-ept.c
@@ -348,6 +348,8 @@ static int ept_next_level(struct p2m_dom
     ept_entry_t *ept_entry, *next = NULL, e;
     u32 shift, index;
 
+    ASSERT(next_level);
+
     shift = next_level * EPT_TABLE_ORDER;
 
     index = *gfn_remainder >> shift;
@@ -364,16 +366,20 @@ static int ept_next_level(struct p2m_dom
 
     if ( !is_epte_present(&e) )
     {
+        int rc;
+
         if ( e.sa_p2mt == p2m_populate_on_demand )
             return GUEST_TABLE_POD_PAGE;
 
         if ( read_only )
             return GUEST_TABLE_MAP_FAILED;
 
-        next = ept_set_middle_entry(p2m, ept_entry);
+        next = ept_set_middle_entry(p2m, &e);
         if ( !next )
             return GUEST_TABLE_MAP_FAILED;
-        /* e is now stale and hence may not be used anymore below. */
+
+        rc = atomic_write_ept_entry(ept_entry, e, next_level);
+        ASSERT(rc == 0);
     }
     /* The only time sp would be set here is if we had hit a superpage */
     else if ( is_epte_superpage(&e) )

this has to be applied after patch-XSA328

From: <security@xenproject.org>
Subject: x86/ept: flush cache when modifying PTEs and sharing page tables

Modifications made to the page tables by EPT code need to be written
to memory when the page tables are shared with the IOMMU, as Intel
IOMMUs can be non-coherent and thus require changes to be written to
memory in order to be visible to the IOMMU.

In order to achieve this make sure data is written back to memory
after writing an EPT entry when the recalc bit is not set in
atomic_write_ept_entry. If such bit is set, the entry will be
adjusted and atomic_write_ept_entry will be called a second time
without the recalc bit set. Note that when splitting a super page the
new tables resulting of the split should also be written back.

Failure to do so can allow devices behind the IOMMU access to the
stale super page, or cause coherency issues as changes made by the
processor to the page tables are not visible to the IOMMU.

This allows to remove the VT-d specific iommu_pte_flush helper, since
the cache write back is now performed by atomic_write_ept_entry, and
hence iommu_iotlb_flush can be used to flush the IOMMU TLB. The newly
used method (iommu_iotlb_flush) can result in less flushes, since it
might sometimes be called rightly with 0 flags, in which case it
becomes a no-op.

This is part of XSA-321.

Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- xen/arch/x86/mm/p2m-ept.c.orig
+++ xen/arch/x86/mm/p2m-ept.c
@@ -394,6 +394,9 @@
         if ( !next )
             return GUEST_TABLE_MAP_FAILED;
 
+        if ( iommu_hap_pt_share )
+            iommu_sync_cache(next, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
+
         rc = atomic_write_ept_entry(ept_entry, e, next_level);
         ASSERT(rc == 0);
     }
