$NetBSD: patch-XSA226,v 1.1 2017/10/17 10:57:34 bouyer Exp $

From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: don't use possibly unbounded tail calls

There is no guarantee that the compiler would actually translate them
to branches instead of calls, so only ones with a known recursion limit
are okay:
- __release_grant_for_copy() can call itself only once, as
  __acquire_grant_for_copy() won't permit use of multi-level transitive
  grants,
- __acquire_grant_for_copy() is fine to call itself with the last
  argument false, as that prevents further recursion,
- __acquire_grant_for_copy() must not call itself to recover from an
  observed change to the active entry's pin count

This is part of CVE-2017-12135 / XSA-226.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- xen/common/compat/grant_table.c.orig
+++ xen/common/compat/grant_table.c
@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int cmd,
                 rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n);
             if ( rc > 0 )
             {
-                ASSERT(rc < n);
-                i -= n - rc;
-                n = rc;
+                ASSERT(rc <= n);
+                i -= rc;
+                n -= rc;
             }
             if ( rc >= 0 )
             {
--- xen/common/grant_table.c.orig
+++ xen/common/grant_table.c
@@ -2089,8 +2089,10 @@ __release_grant_for_copy(
 
     if ( td != rd )
     {
-        /* Recursive calls, but they're tail calls, so it's
-           okay. */
+        /*
+         * Recursive calls, but they're bounded (acquire permits only a single
+         * level of transitivity), so it's okay.
+         */
         if ( released_write )
             __release_grant_for_copy(td, trans_gref, 0);
         else if ( released_read )
@@ -2241,10 +2243,11 @@ __acquire_grant_for_copy(
                 return rc;
             }
 
-            /* We dropped the lock, so we have to check that nobody
-               else tried to pin (or, for that matter, unpin) the
-               reference in *this* domain.  If they did, just give up
-               and try again. */
+            /*
+             * We dropped the lock, so we have to check that nobody else tried
+             * to pin (or, for that matter, unpin) the reference in *this*
+             * domain.  If they did, just give up and tell the caller to retry.
+             */
             if ( act->pin != old_pin )
             {
                 __fixup_status_for_copy_pin(act, status);
@@ -2252,9 +2255,8 @@ __acquire_grant_for_copy(
                 active_entry_release(act);
                 read_unlock(&rgt->lock);
                 put_page(*page);
-                return __acquire_grant_for_copy(rd, gref, ldom, readonly,
-                                                frame, page, page_off, length,
-                                                allow_transitive);
+                *page = NULL;
+                return ERESTART;
             }
 
             /* The actual remote remote grant may or may not be a
@@ -2560,7 +2562,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(src);
         rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2570,7 +2572,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(dest);
         rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2579,6 +2581,14 @@ static int gnttab_copy_one(const struct
     return rc;
 }
 
+/*
+ * gnttab_copy(), other than the various other helpers of
+ * do_grant_table_op(), returns (besides possible error indicators)
+ * "count - i" rather than "i" to ensure that even if no progress
+ * was made at all (perhaps due to gnttab_copy_one() returning a
+ * positive value) a non-zero value is being handed back (zero needs
+ * to be avoided, as that means "success, all done").
+ */
 static long gnttab_copy(
     XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
 {
@@ -2592,7 +2602,7 @@ static long gnttab_copy(
     {
         if ( i && hypercall_preempt_check() )
         {
-            rc = i;
+            rc = count - i;
             break;
         }
 
@@ -2602,13 +2612,20 @@ static long gnttab_copy(
             break;
         }
 
-        op.status = gnttab_copy_one(&op, &dest, &src);
-        if ( op.status != GNTST_okay )
+        rc = gnttab_copy_one(&op, &dest, &src);
+        if ( rc > 0 )
+        {
+            rc = count - i;
+            break;
+        }
+        if ( rc != GNTST_okay )
         {
             gnttab_copy_release_buf(&src);
             gnttab_copy_release_buf(&dest);
         }
 
+        op.status = rc;
+        rc = 0;
         if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
         {
             rc = -EFAULT;
@@ -3146,6 +3163,7 @@ do_grant_table_op(
         rc = gnttab_copy(copy, count);
         if ( rc > 0 )
         {
+            rc = count - rc;
             guest_handle_add_offset(copy, rc);
             uop = guest_handle_cast(copy, void);
         }
From: Jan Beulich <jbeulich@suse.com>
Subject: gnttab: fix transitive grant handling

Processing of transitive grants must not use the fast path, or else
reference counting breaks due to the skipped recursive call to
__acquire_grant_for_copy() (its __release_grant_for_copy()
counterpart occurs independent of original pin count). Furthermore
after re-acquiring temporarily dropped locks we need to verify no grant
properties changed if the original pin count was non-zero; checking
just the pin counts is sufficient only for well-behaved guests. As a
result, __release_grant_for_copy() needs to mirror that new behavior.

Furthermore a __release_grant_for_copy() invocation was missing on the
retry path of __acquire_grant_for_copy(), and gnttab_set_version() also
needs to bail out upon encountering a transitive grant.

This is part of CVE-2017-12135 / XSA-226.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- xen/common/grant_table.c.orig
+++ xen/common/grant_table.c
@@ -2036,13 +2036,8 @@ __release_grant_for_copy(
     unsigned long r_frame;
     uint16_t *status;
     grant_ref_t trans_gref;
-    int released_read;
-    int released_write;
     struct domain *td;
 
-    released_read = 0;
-    released_write = 0;
-
     read_lock(&rgt->lock);
 
     act = active_entry_acquire(rgt, gref);
@@ -2072,17 +2067,11 @@ __release_grant_for_copy(
 
         act->pin -= GNTPIN_hstw_inc;
         if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
-        {
-            released_write = 1;
             gnttab_clear_flag(_GTF_writing, status);
-        }
     }
 
     if ( !act->pin )
-    {
         gnttab_clear_flag(_GTF_reading, status);
-        released_read = 1;
-    }
 
     active_entry_release(act);
     read_unlock(&rgt->lock);
@@ -2090,13 +2079,10 @@ __release_grant_for_copy(
     if ( td != rd )
     {
         /*
-         * Recursive calls, but they're bounded (acquire permits only a single
+         * Recursive call, but it is bounded (acquire permits only a single
          * level of transitivity), so it's okay.
          */
-        if ( released_write )
-            __release_grant_for_copy(td, trans_gref, 0);
-        else if ( released_read )
-            __release_grant_for_copy(td, trans_gref, 1);
+        __release_grant_for_copy(td, trans_gref, readonly);
 
         rcu_unlock_domain(td);
     }
@@ -2170,8 +2156,108 @@ __acquire_grant_for_copy(
                  act->domid, ldom, act->pin);
 
     old_pin = act->pin;
-    if ( !act->pin ||
-         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+    if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
+    {
+        if ( (!old_pin || (!readonly &&
+                           !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) &&
+             (rc = _set_status_v2(ldom, readonly, 0, shah, act,
+                                  status)) != GNTST_okay )
+            goto unlock_out;
+
+        if ( !allow_transitive )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant when transitivity not allowed\n");
+
+        trans_domid = sha2->transitive.trans_domid;
+        trans_gref = sha2->transitive.gref;
+        barrier(); /* Stop the compiler from re-loading
+                      trans_domid from shared memory */
+        if ( trans_domid == rd->domain_id )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grants cannot be self-referential\n");
+
+        /*
+         * We allow the trans_domid == ldom case, which corresponds to a
+         * grant being issued by one domain, sent to another one, and then
+         * transitively granted back to the original domain.  Allowing it
+         * is easy, and means that you don't need to go out of your way to
+         * avoid it in the guest.
+         */
+
+        /* We need to leave the rrd locked during the grant copy. */
+        td = rcu_lock_domain_by_id(trans_domid);
+        if ( td == NULL )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant referenced bad domain %d\n",
+                     trans_domid);
+
+        /*
+         * __acquire_grant_for_copy() could take the lock on the
+         * remote table (if rd == td), so we have to drop the lock
+         * here and reacquire.
+         */
+        active_entry_release(act);
+        read_unlock(&rgt->lock);
+
+        rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+                                      readonly, &grant_frame, page,
+                                      &trans_page_off, &trans_length, 0);
+
+        read_lock(&rgt->lock);
+        act = active_entry_acquire(rgt, gref);
+
+        if ( rc != GNTST_okay )
+        {
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            read_unlock(&rgt->lock);
+            return rc;
+        }
+
+        /*
+         * We dropped the lock, so we have to check that the grant didn't
+         * change, and that nobody else tried to pin/unpin it. If anything
+         * changed, just give up and tell the caller to retry.
+         */
+        if ( rgt->gt_version != 2 ||
+             act->pin != old_pin ||
+             (old_pin && (act->domid != ldom || act->frame != grant_frame ||
+                          act->start != trans_page_off ||
+                          act->length != trans_length ||
+                          act->trans_domain != td ||
+                          act->trans_gref != trans_gref ||
+                          !act->is_sub_page)) )
+        {
+            __release_grant_for_copy(td, trans_gref, readonly);
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            read_unlock(&rgt->lock);
+            put_page(*page);
+            *page = NULL;
+            return ERESTART;
+        }
+
+        if ( !old_pin )
+        {
+            act->domid = ldom;
+            act->start = trans_page_off;
+            act->length = trans_length;
+            act->trans_domain = td;
+            act->trans_gref = trans_gref;
+            act->frame = grant_frame;
+            act->gfn = -1ul;
+            /*
+             * The actual remote remote grant may or may not be a sub-page,
+             * but we always treat it as one because that blocks mappings of
+             * transitive grants.
+             */
+            act->is_sub_page = 1;
+        }
+    }
+    else if ( !old_pin ||
+              (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
     {
         if ( (rc = _set_status(rgt->gt_version, ldom,
                                readonly, 0, shah, act,
@@ -2192,79 +2278,6 @@ __acquire_grant_for_copy(
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
         }
-        else if ( (shah->flags & GTF_type_mask) == GTF_transitive )
-        {
-            if ( !allow_transitive )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant when transitivity not allowed\n");
-
-            trans_domid = sha2->transitive.trans_domid;
-            trans_gref = sha2->transitive.gref;
-            barrier(); /* Stop the compiler from re-loading
-                          trans_domid from shared memory */
-            if ( trans_domid == rd->domain_id )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grants cannot be self-referential\n");
-
-            /* We allow the trans_domid == ldom case, which
-               corresponds to a grant being issued by one domain, sent
-               to another one, and then transitively granted back to
-               the original domain.  Allowing it is easy, and means
-               that you don't need to go out of your way to avoid it
-               in the guest. */
-
-            /* We need to leave the rrd locked during the grant copy */
-            td = rcu_lock_domain_by_id(trans_domid);
-            if ( td == NULL )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant referenced bad domain %d\n",
-                         trans_domid);
-
-            /*
-             * __acquire_grant_for_copy() could take the lock on the
-             * remote table (if rd == td), so we have to drop the lock
-             * here and reacquire
-             */
-            active_entry_release(act);
-            read_unlock(&rgt->lock);
-
-            rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
-                                          readonly, &grant_frame, page,
-                                          &trans_page_off, &trans_length, 0);
-
-            read_lock(&rgt->lock);
-            act = active_entry_acquire(rgt, gref);
-
-            if ( rc != GNTST_okay ) {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                read_unlock(&rgt->lock);
-                return rc;
-            }
-
-            /*
-             * We dropped the lock, so we have to check that nobody else tried
-             * to pin (or, for that matter, unpin) the reference in *this*
-             * domain.  If they did, just give up and tell the caller to retry.
-             */
-            if ( act->pin != old_pin )
-            {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                read_unlock(&rgt->lock);
-                put_page(*page);
-                *page = NULL;
-                return ERESTART;
-            }
-
-            /* The actual remote remote grant may or may not be a
-               sub-page, but we always treat it as one because that
-               blocks mappings of transitive grants. */
-            is_sub_page = 1;
-            act->gfn = -1ul;
-        }
         else if ( !(sha2->hdr.flags & GTF_sub_page) )
         {
             rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd);
@@ -2696,10 +2709,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     case 2:
         for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
         {
-            if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) ==
-                  GTF_permit_access) &&
-                 (shared_entry_v2(gt, i).full_page.frame >> 32) )
+            switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
             {
+            case GTF_permit_access:
+                 if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
+                     break;
+                 /* fall through */
+            case GTF_transitive:
                 gdprintk(XENLOG_WARNING,
                          "tried to change grant table version to 1 with non-representable entries\n");
                 res = -ERANGE;
