$NetBSD: patch-XSA115-c,v 1.1 2020/12/17 16:48:12 bouyer Exp $

From e92f3dfeaae21a335e666c9247954424e34e5c56 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:37 +0200
Subject: [PATCH 01/10] tools/xenstore: allow removing child of a node
 exceeding quota

An unprivileged user of Xenstore is not allowed to write nodes with a
size exceeding a global quota, while privileged users like dom0 are
allowed to write such nodes. The size of a node is the needed space
to store all node specific data, this includes the names of all
children of the node.

When deleting a node its parent has to be modified by removing the
name of the to be deleted child from it.

This results in the strange situation that an unprivileged owner of a
node might not succeed in deleting that node in case its parent is
exceeding the quota of that unprivileged user (it might have been
written by dom0), as the user is not allowed to write the updated
parent node.

Fix that by not checking the quota when writing a node for the
purpose of removing a child's name only.

The same applies to transaction handling: a node being read during a
transaction is written to the transaction specific area and it should
not be tested for exceeding the quota, as it might not be owned by
the reader and presumably the original write would have failed if the
node is owned by the reader.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c        | 20 +++++++++++---------
 tools/xenstore/xenstored_core.h        |  3 ++-
 tools/xenstore/xenstored_transaction.c |  2 +-
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 97ceabf9642d..b43e1018babd 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -417,7 +417,8 @@ static struct node *read_node(struct connection *conn, const void *ctx,
 	return node;
 }
 
-int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node)
+int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+		   bool no_quota_check)
 {
 	TDB_DATA data;
 	void *p;
@@ -427,7 +428,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node)
 		+ node->num_perms*sizeof(node->perms[0])
 		+ node->datalen + node->childlen;
 
-	if (domain_is_unprivileged(conn) &&
+	if (!no_quota_check && domain_is_unprivileged(conn) &&
 	    data.dsize >= quota_max_entry_size) {
 		errno = ENOSPC;
 		return errno;
@@ -455,14 +456,15 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node)
 	return 0;
 }
 
-static int write_node(struct connection *conn, struct node *node)
+static int write_node(struct connection *conn, struct node *node,
+		      bool no_quota_check)
 {
 	TDB_DATA key;
 
 	if (access_node(conn, node, NODE_ACCESS_WRITE, &key))
 		return errno;
 
-	return write_node_raw(conn, &key, node);
+	return write_node_raw(conn, &key, node, no_quota_check);
 }
 
 static enum xs_perm_type perm_for_conn(struct connection *conn,
@@ -999,7 +1001,7 @@ static struct node *create_node(struct connection *conn, const void *ctx,
 	/* We write out the nodes down, setting destructor in case
 	 * something goes wrong. */
 	for (i = node; i; i = i->parent) {
-		if (write_node(conn, i)) {
+		if (write_node(conn, i, false)) {
 			domain_entry_dec(conn, i);
 			return NULL;
 		}
@@ -1039,7 +1041,7 @@ static int do_write(struct connection *conn, struct buffered_data *in)
 	} else {
 		node->data = in->buffer + offset;
 		node->datalen = datalen;
-		if (write_node(conn, node))
+		if (write_node(conn, node, false))
 			return errno;
 	}
 
@@ -1115,7 +1117,7 @@ static int remove_child_entry(struct connection *conn, struct node *node,
 	size_t childlen = strlen(node->children + offset);
 	memdel(node->children, offset, childlen + 1, node->childlen);
 	node->childlen -= childlen + 1;
-	return write_node(conn, node);
+	return write_node(conn, node, true);
 }
 
 
@@ -1254,7 +1256,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 	node->num_perms = num;
 	domain_entry_inc(conn, node);
 
-	if (write_node(conn, node))
+	if (write_node(conn, node, false))
 		return errno;
 
 	fire_watches(conn, in, name, false);
@@ -1514,7 +1516,7 @@ static void manual_node(const char *name, const char *child)
 	if (child)
 		node->childlen = strlen(child) + 1;
 
-	if (write_node(NULL, node))
+	if (write_node(NULL, node, false))
 		barf_perror("Could not create initial node %s", name);
 	talloc_free(node);
 }
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 56a279cfbb47..3cb1c235a101 100644
--- tools/xenstore/xenstored_core.h.orig
+++ tools/xenstore/xenstored_core.h
@@ -149,7 +149,8 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
 char *canonicalize(struct connection *conn, const void *ctx, const char *node);
 
 /* Write a node to the tdb data base. */
-int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node);
+int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+		   bool no_quota_check);
 
 /* Get this node, checking we have permissions. */
 struct node *get_node(struct connection *conn,
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
index 2824f7b359b8..e87897573469 100644
--- tools/xenstore/xenstored_transaction.c.orig
+++ tools/xenstore/xenstored_transaction.c
@@ -276,7 +276,7 @@ int access_node(struct connection *conn, struct node *node,
 			i->check_gen = true;
 			if (node->generation != NO_GENERATION) {
 				set_tdb_key(trans_name, &local_key);
-				ret = write_node_raw(conn, &local_key, node);
+				ret = write_node_raw(conn, &local_key, node, true);
 				if (ret)
 					goto err;
 				i->ta_node = true;
-- 
2.17.1

From e8076f73de65c4816f69d6ebf75839c706145fcd Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:38 +0200
Subject: [PATCH 02/10] tools/xenstore: ignore transaction id for [un]watch

Instead of ignoring the transaction id for XS_WATCH and XS_UNWATCH
commands as it is documented in docs/misc/xenstore.txt, it is tested
for validity today.

Really ignore the transaction id for XS_WATCH and XS_UNWATCH.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index b43e1018babd..bb2f9fd4e76e 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -1268,13 +1268,17 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 static struct {
 	const char *str;
 	int (*func)(struct connection *conn, struct buffered_data *in);
+	unsigned int flags;
+#define XS_FLAG_NOTID		(1U << 0)	/* Ignore transaction id. */
 } const wire_funcs[XS_TYPE_COUNT] = {
 	[XS_CONTROL]           = { "CONTROL",           do_control },
 	[XS_DIRECTORY]         = { "DIRECTORY",         send_directory },
 	[XS_READ]              = { "READ",              do_read },
 	[XS_GET_PERMS]         = { "GET_PERMS",         do_get_perms },
-	[XS_WATCH]             = { "WATCH",             do_watch },
-	[XS_UNWATCH]           = { "UNWATCH",           do_unwatch },
+	[XS_WATCH]             =
+	    { "WATCH",         do_watch,        XS_FLAG_NOTID },
+	[XS_UNWATCH]           =
+	    { "UNWATCH",       do_unwatch,      XS_FLAG_NOTID },
 	[XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start },
 	[XS_TRANSACTION_END]   = { "TRANSACTION_END",   do_transaction_end },
 	[XS_INTRODUCE]         = { "INTRODUCE",         do_introduce },
@@ -1296,7 +1300,7 @@ static struct {
 
 static const char *sockmsg_string(enum xsd_sockmsg_type type)
 {
-	if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].str)
+	if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str)
 		return wire_funcs[type].str;
 
 	return "**UNKNOWN**";
@@ -1311,7 +1315,14 @@ static void process_message(struct connection *conn, struct buffered_data *in)
 	enum xsd_sockmsg_type type = in->hdr.msg.type;
 	int ret;
 
-	trans = transaction_lookup(conn, in->hdr.msg.tx_id);
+	if ((unsigned int)type >= XS_TYPE_COUNT || !wire_funcs[type].func) {
+		eprintf("Client unknown operation %i", type);
+		send_error(conn, ENOSYS);
+		return;
+	}
+
+	trans = (wire_funcs[type].flags & XS_FLAG_NOTID)
+		? NULL : transaction_lookup(conn, in->hdr.msg.tx_id);
 	if (IS_ERR(trans)) {
 		send_error(conn, -PTR_ERR(trans));
 		return;
@@ -1320,12 +1331,7 @@ static void process_message(struct connection *conn, struct buffered_data *in)
 	assert(conn->transaction == NULL);
 	conn->transaction = trans;
 
-	if ((unsigned)type < XS_TYPE_COUNT && wire_funcs[type].func)
-		ret = wire_funcs[type].func(conn, in);
-	else {
-		eprintf("Client unknown operation %i", type);
-		ret = ENOSYS;
-	}
+	ret = wire_funcs[type].func(conn, in);
 	if (ret)
 		send_error(conn, ret);
 
-- 
2.17.1

From b8c6dbb67ebb449126023446a7d209eedf966537 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:39 +0200
Subject: [PATCH 03/10] tools/xenstore: fix node accounting after failed node
 creation

When a node creation fails the number of nodes of the domain should be
the same as before the failed node creation. In case of failure when
trying to create a node requiring to create one or more intermediate
nodes as well (e.g. when /a/b/c/d is to be created, but /a/b isn't
existing yet) it might happen that the number of nodes of the creating
domain is not reset to the value it had before.

So move the quota accounting out of construct_node() and into the node
write loop in create_node() in order to be able to undo the accounting
in case of an error in the intermediate node destructor.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Acked-by: Julien Grall <jgrall@amazon.com>
---
 tools/xenstore/xenstored_core.c | 37 ++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bb2f9fd4e76e..db9b9ca7957d 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -925,11 +925,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx,
 	if (!parent)
 		return NULL;
 
-	if (domain_entry(conn) >= quota_nb_entry_per_domain) {
-		errno = ENOSPC;
-		return NULL;
-	}
-
 	/* Add child to parent. */
 	base = basename(name);
 	baselen = strlen(base) + 1;
@@ -962,7 +957,6 @@ static struct node *construct_node(struct connection *conn, const void *ctx,
 	node->children = node->data = NULL;
 	node->childlen = node->datalen = 0;
 	node->parent = parent;
-	domain_entry_inc(conn, node);
 	return node;
 
 nomem:
@@ -982,6 +976,9 @@ static int destroy_node(void *_node)
 	key.dsize = strlen(node->name);
 
 	tdb_delete(tdb_ctx, key);
+
+	domain_entry_dec(talloc_parent(node), node);
+
 	return 0;
 }
 
@@ -998,18 +995,34 @@ static struct node *create_node(struct connection *conn, const void *ctx,
 	node->data = data;
 	node->datalen = datalen;
 
-	/* We write out the nodes down, setting destructor in case
-	 * something goes wrong. */
+	/*
+	 * We write out the nodes bottom up.
+	 * All new created nodes will have i->parent set, while the final
+	 * node will be already existing and won't have i->parent set.
+	 * New nodes are subject to quota handling.
+	 * Initially set a destructor for all new nodes removing them from
+	 * TDB again and undoing quota accounting for the case of an error
+	 * during the write loop.
+	 */
 	for (i = node; i; i = i->parent) {
-		if (write_node(conn, i, false)) {
-			domain_entry_dec(conn, i);
+		/* i->parent is set for each new node, so check quota. */
+		if (i->parent &&
+		    domain_entry(conn) >= quota_nb_entry_per_domain) {
+			errno = ENOSPC;
 			return NULL;
 		}
-		talloc_set_destructor(i, destroy_node);
+		if (write_node(conn, i, false))
+			return NULL;
+
+		/* Account for new node, set destructor for error case. */
+		if (i->parent) {
+			domain_entry_inc(conn, i);
+			talloc_set_destructor(i, destroy_node);
+		}
 	}
 
 	/* OK, now remove destructors so they stay around */
-	for (i = node; i; i = i->parent)
+	for (i = node; i->parent; i = i->parent)
 		talloc_set_destructor(i, NULL);
 	return node;
 }
-- 
2.17.1

From 318aa75bd0c05423e717ad0b64adb204282025db Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:40 +0200
Subject: [PATCH 04/10] tools/xenstore: simplify and rename check_event_node()

There is no path which allows to call check_event_node() without a
event name. So don't let the result depend on the name being NULL and
add an assert() covering that case.

Rename the function to check_special_event() to better match the
semantics.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_watch.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index 7dedca60dfd6..f2f1bed47cc6 100644
--- tools/xenstore/xenstored_watch.c.orig
+++ tools/xenstore/xenstored_watch.c
@@ -47,13 +47,11 @@ struct watch
 	char *node;
 };
 
-static bool check_event_node(const char *node)
+static bool check_special_event(const char *name)
 {
-	if (!node || !strstarts(node, "@")) {
-		errno = EINVAL;
-		return false;
-	}
-	return true;
+	assert(name);
+
+	return strstarts(name, "@");
 }
 
 /* Is child a subnode of parent, or equal? */
@@ -87,7 +85,7 @@ static void add_event(struct connection *conn,
 	unsigned int len;
 	char *data;
 
-	if (!check_event_node(name)) {
+	if (!check_special_event(name)) {
 		/* Can this conn load node, or see that it doesn't exist? */
 		struct node *node = get_node(conn, ctx, name, XS_PERM_READ);
 		/*
-- 
2.17.1

From c625fae44aedc246776b52eb1173cf847a3d4d80 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:41 +0200
Subject: [PATCH 05/10] tools/xenstore: check privilege for
 XS_IS_DOMAIN_INTRODUCED

The Xenstore command XS_IS_DOMAIN_INTRODUCED should be possible for
privileged domains only (the only user in the tree is the xenpaging
daemon).

Instead of having the privilege test for each command introduce a
per-command flag for that purpose.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c   | 24 ++++++++++++++++++------
 tools/xenstore/xenstored_domain.c |  7 ++-----
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index db9b9ca7957d..6afd58431111 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -1283,8 +1283,10 @@ static struct {
 	int (*func)(struct connection *conn, struct buffered_data *in);
 	unsigned int flags;
 #define XS_FLAG_NOTID		(1U << 0)	/* Ignore transaction id. */
+#define XS_FLAG_PRIV		(1U << 1)	/* Privileged domain only. */
 } const wire_funcs[XS_TYPE_COUNT] = {
-	[XS_CONTROL]           = { "CONTROL",           do_control },
+	[XS_CONTROL]           =
+	    { "CONTROL",       do_control,      XS_FLAG_PRIV },
 	[XS_DIRECTORY]         = { "DIRECTORY",         send_directory },
 	[XS_READ]              = { "READ",              do_read },
 	[XS_GET_PERMS]         = { "GET_PERMS",         do_get_perms },
@@ -1294,8 +1296,10 @@ static struct {
 	    { "UNWATCH",       do_unwatch,      XS_FLAG_NOTID },
 	[XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start },
 	[XS_TRANSACTION_END]   = { "TRANSACTION_END",   do_transaction_end },
-	[XS_INTRODUCE]         = { "INTRODUCE",         do_introduce },
-	[XS_RELEASE]           = { "RELEASE",           do_release },
+	[XS_INTRODUCE]         =
+	    { "INTRODUCE",     do_introduce,    XS_FLAG_PRIV },
+	[XS_RELEASE]           =
+	    { "RELEASE",       do_release,      XS_FLAG_PRIV },
 	[XS_GET_DOMAIN_PATH]   = { "GET_DOMAIN_PATH",   do_get_domain_path },
 	[XS_WRITE]             = { "WRITE",             do_write },
 	[XS_MKDIR]             = { "MKDIR",             do_mkdir },
@@ -1304,9 +1308,11 @@ static struct {
 	[XS_WATCH_EVENT]       = { "WATCH_EVENT",       NULL },
 	[XS_ERROR]             = { "ERROR",             NULL },
 	[XS_IS_DOMAIN_INTRODUCED] =
-			{ "IS_DOMAIN_INTRODUCED", do_is_domain_introduced },
-	[XS_RESUME]            = { "RESUME",            do_resume },
-	[XS_SET_TARGET]        = { "SET_TARGET",        do_set_target },
+	    { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced, XS_FLAG_PRIV },
+	[XS_RESUME]            =
+	    { "RESUME",        do_resume,       XS_FLAG_PRIV },
+	[XS_SET_TARGET]        =
+	    { "SET_TARGET",    do_set_target,   XS_FLAG_PRIV },
 	[XS_RESET_WATCHES]     = { "RESET_WATCHES",     do_reset_watches },
 	[XS_DIRECTORY_PART]    = { "DIRECTORY_PART",    send_directory_part },
 };
@@ -1334,6 +1340,12 @@ static void process_message(struct connection *conn, struct buffered_data *in)
 		return;
 	}
 
+	if ((wire_funcs[type].flags & XS_FLAG_PRIV) &&
+	    domain_is_unprivileged(conn)) {
+		send_error(conn, EACCES);
+		return;
+	}
+
 	trans = (wire_funcs[type].flags & XS_FLAG_NOTID)
 		? NULL : transaction_lookup(conn, in->hdr.msg.tx_id);
 	if (IS_ERR(trans)) {
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 1eae703ef680..0e2926e2a3d0 100644
--- tools/xenstore/xenstored_domain.c.orig
+++ tools/xenstore/xenstored_domain.c
@@ -377,7 +377,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in)
 	if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
 		return EINVAL;
 
-	if (domain_is_unprivileged(conn) || !conn->can_write)
+	if (!conn->can_write)
 		return EACCES;
 
 	domid = atoi(vec[0]);
@@ -445,7 +445,7 @@ int do_set_target(struct connection *conn, struct buffered_data *in)
 	if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
 		return EINVAL;
 
-	if (domain_is_unprivileged(conn) || !conn->can_write)
+	if (!conn->can_write)
 		return EACCES;
 
 	domid = atoi(vec[0]);
@@ -480,9 +480,6 @@ static struct domain *onearg_domain(struct connection *conn,
 	if (!domid)
 		return ERR_PTR(-EINVAL);
 
-	if (domain_is_unprivileged(conn))
-		return ERR_PTR(-EACCES);
-
 	return find_connected_domain(domid);
 }
 
-- 
2.17.1

From 461c880600175c06e23a63e62d9f1ccab755d708 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:42 +0200
Subject: [PATCH 06/10] tools/xenstore: rework node removal

Today a Xenstore node is being removed by deleting it from the parent
first and then deleting itself and all its children. This results in
stale entries remaining in the data base in case e.g. a memory
allocation is failing during processing. This would result in the
rather strange behavior to be able to read a node (as its still in the
data base) while not being visible in the tree view of Xenstore.

Fix that by deleting the nodes from the leaf side instead of starting
at the root.

As fire_watches() is now called from _rm() the ctx parameter needs a
const attribute.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c  | 99 ++++++++++++++++----------------
 tools/xenstore/xenstored_watch.c |  4 +-
 tools/xenstore/xenstored_watch.h |  2 +-
 3 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 6afd58431111..1cb729a2cd5f 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -1087,74 +1087,76 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in)
 	return 0;
 }
 
-static void delete_node(struct connection *conn, struct node *node)
-{
-	unsigned int i;
-	char *name;
-
-	/* Delete self, then delete children.  If we crash, then the worst
-	   that can happen is the children will continue to take up space, but
-	   will otherwise be unreachable. */
-	delete_node_single(conn, node);
-
-	/* Delete children, too. */
-	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
-		struct node *child;
-
-		name = talloc_asprintf(node, "%s/%s", node->name,
-				       node->children + i);
-		child = name ? read_node(conn, node, name) : NULL;
-		if (child) {
-			delete_node(conn, child);
-		}
-		else {
-			trace("delete_node: Error deleting child '%s/%s'!\n",
-			      node->name, node->children + i);
-			/* Skip it, we've already deleted the parent. */
-		}
-		talloc_free(name);
-	}
-}
-
-
 /* Delete memory using memmove. */
 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
 {
 	memmove(mem + off, mem + off + len, total - off - len);
 }
 
-
-static int remove_child_entry(struct connection *conn, struct node *node,
-			      size_t offset)
+static void remove_child_entry(struct connection *conn, struct node *node,
+			       size_t offset)
 {
 	size_t childlen = strlen(node->children + offset);
+
 	memdel(node->children, offset, childlen + 1, node->childlen);
 	node->childlen -= childlen + 1;
-	return write_node(conn, node, true);
+	if (write_node(conn, node, true))
+		corrupt(conn, "Can't update parent node '%s'", node->name);
 }
 
-
-static int delete_child(struct connection *conn,
-			struct node *node, const char *childname)
+static void delete_child(struct connection *conn,
+			 struct node *node, const char *childname)
 {
 	unsigned int i;
 
 	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
 		if (streq(node->children+i, childname)) {
-			return remove_child_entry(conn, node, i);
+			remove_child_entry(conn, node, i);
+			return;
 		}
 	}
 	corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
-	return ENOENT;
 }
 
+static int delete_node(struct connection *conn, struct node *parent,
+		       struct node *node)
+{
+	char *name;
+
+	/* Delete children. */
+	while (node->childlen) {
+		struct node *child;
+
+		name = talloc_asprintf(node, "%s/%s", node->name,
+				       node->children);
+		child = name ? read_node(conn, node, name) : NULL;
+		if (child) {
+			if (delete_node(conn, node, child))
+				return errno;
+		} else {
+			trace("delete_node: Error deleting child '%s/%s'!\n",
+			      node->name, node->children);
+			/* Quit deleting. */
+			errno = ENOMEM;
+			return errno;
+		}
+		talloc_free(name);
+	}
+
+	delete_node_single(conn, node);
+	delete_child(conn, parent, basename(node->name));
+	talloc_free(node);
+
+	return 0;
+}
 
 static int _rm(struct connection *conn, const void *ctx, struct node *node,
 	       const char *name)
 {
-	/* Delete from parent first, then if we crash, the worst that can
-	   happen is the child will continue to take up space, but will
-	   otherwise be unreachable. */
+	/*
+	 * Deleting node by node, so the result is always consistent even in
+	 * case of a failure.
+	 */
 	struct node *parent;
 	char *parentname = get_parent(ctx, name);
 
@@ -1165,11 +1167,13 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
 	if (!parent)
 		return (errno == ENOMEM) ? ENOMEM : EINVAL;
 
-	if (delete_child(conn, parent, basename(name)))
-		return EINVAL;
-
-	delete_node(conn, node);
-	return 0;
+	/*
+	 * Fire the watches now, when we can still see the node permissions.
+	 * This fine as we are single threaded and the next possible read will
+	 * be handled only after the node has been really removed.
+	 */
+	fire_watches(conn, ctx, name, true);
+	return delete_node(conn, parent, node);
 }
 
 
@@ -1207,7 +1211,6 @@ static int do_rm(struct connection *conn, struct buffered_data *in)
 	if (ret)
 		return ret;
 
-	fire_watches(conn, in, name, true);
 	send_ack(conn, XS_RM);
 
 	return 0;
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index f2f1bed47cc6..f0bbfe7a6dc6 100644
--- tools/xenstore/xenstored_watch.c.orig
+++ tools/xenstore/xenstored_watch.c
@@ -77,7 +77,7 @@ static bool is_child(const char *child, const char *parent)
  * Temporary memory allocations are done with ctx.
  */
 static void add_event(struct connection *conn,
-		      void *ctx,
+		      const void *ctx,
 		      struct watch *watch,
 		      const char *name)
 {
@@ -121,7 +121,7 @@ static void add_event(struct connection *conn,
  * Check whether any watch events are to be sent.
  * Temporary memory allocations are done with ctx.
  */
-void fire_watches(struct connection *conn, void *ctx, const char *name,
+void fire_watches(struct connection *conn, const void *ctx, const char *name,
 		  bool recurse)
 {
 	struct connection *i;
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
index c72ea6a68542..54d4ea7e0d41 100644
--- tools/xenstore/xenstored_watch.h.orig
+++ tools/xenstore/xenstored_watch.h
@@ -25,7 +25,7 @@ int do_watch(struct connection *conn, struct buffered_data *in);
 int do_unwatch(struct connection *conn, struct buffered_data *in);
 
 /* Fire all watches: recurse means all the children are affected (ie. rm). */
-void fire_watches(struct connection *conn, void *tmp, const char *name,
+void fire_watches(struct connection *conn, const void *tmp, const char *name,
 		  bool recurse);
 
 void conn_delete_all_watches(struct connection *conn);
-- 
2.17.1

From 6ca2e14b43aecc79effc1a0cd528a4aceef44d42 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:43 +0200
Subject: [PATCH 07/10] tools/xenstore: fire watches only when removing a
 specific node

Instead of firing all watches for removing a subtree in one go, do so
only when the related node is being removed.

The watches for the top-most node being removed include all watches
including that node, while watches for nodes below that are only fired
if they are matching exactly. This avoids firing any watch more than
once when removing a subtree.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c  | 11 ++++++-----
 tools/xenstore/xenstored_watch.c | 13 ++++++++-----
 tools/xenstore/xenstored_watch.h |  4 ++--
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 1cb729a2cd5f..d7c025616ead 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -1118,8 +1118,8 @@ static void delete_child(struct connection *conn,
 	corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
 }
 
-static int delete_node(struct connection *conn, struct node *parent,
-		       struct node *node)
+static int delete_node(struct connection *conn, const void *ctx,
+		       struct node *parent, struct node *node)
 {
 	char *name;
 
@@ -1131,7 +1131,7 @@ static int delete_node(struct connection *conn, struct node *parent,
 				       node->children);
 		child = name ? read_node(conn, node, name) : NULL;
 		if (child) {
-			if (delete_node(conn, node, child))
+			if (delete_node(conn, ctx, node, child))
 				return errno;
 		} else {
 			trace("delete_node: Error deleting child '%s/%s'!\n",
@@ -1143,6 +1143,7 @@ static int delete_node(struct connection *conn, struct node *parent,
 		talloc_free(name);
 	}
 
+	fire_watches(conn, ctx, node->name, true);
 	delete_node_single(conn, node);
 	delete_child(conn, parent, basename(node->name));
 	talloc_free(node);
@@ -1172,8 +1173,8 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
 	 * This fine as we are single threaded and the next possible read will
 	 * be handled only after the node has been really removed.
 	 */
-	fire_watches(conn, ctx, name, true);
-	return delete_node(conn, parent, node);
+	fire_watches(conn, ctx, name, false);
+	return delete_node(conn, ctx, parent, node);
 }
 
 
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index f0bbfe7a6dc6..3836675459fa 100644
--- tools/xenstore/xenstored_watch.c.orig
+++ tools/xenstore/xenstored_watch.c
@@ -122,7 +122,7 @@ static void add_event(struct connection *conn,
  * Temporary memory allocations are done with ctx.
  */
 void fire_watches(struct connection *conn, const void *ctx, const char *name,
-		  bool recurse)
+		  bool exact)
 {
 	struct connection *i;
 	struct watch *watch;
@@ -134,10 +134,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
 	/* Create an event for each watch. */
 	list_for_each_entry(i, &connections, list) {
 		list_for_each_entry(watch, &i->watches, list) {
-			if (is_child(name, watch->node))
-				add_event(i, ctx, watch, name);
-			else if (recurse && is_child(watch->node, name))
-				add_event(i, ctx, watch, watch->node);
+			if (exact) {
+				if (streq(name, watch->node))
+					add_event(i, ctx, watch, name);
+			} else {
+				if (is_child(name, watch->node))
+					add_event(i, ctx, watch, name);
+			}
 		}
 	}
 }
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
index 54d4ea7e0d41..1b3c80d3dda1 100644
--- tools/xenstore/xenstored_watch.h.orig
+++ tools/xenstore/xenstored_watch.h
@@ -24,9 +24,9 @@
 int do_watch(struct connection *conn, struct buffered_data *in);
 int do_unwatch(struct connection *conn, struct buffered_data *in);
 
-/* Fire all watches: recurse means all the children are affected (ie. rm). */
+/* Fire all watches: !exact means all the children are affected (ie. rm). */
 void fire_watches(struct connection *conn, const void *tmp, const char *name,
-		  bool recurse);
+		  bool exact);
 
 void conn_delete_all_watches(struct connection *conn);
 
-- 
2.17.1

From 2d4f410899bf59e112c107f371c3d164f8a592f8 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:44 +0200
Subject: [PATCH 08/10] tools/xenstore: introduce node_perms structure

There are several places in xenstored using a permission array and the
size of that array. Introduce a new struct node_perms containing both.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c   | 79 +++++++++++++++----------------
 tools/xenstore/xenstored_core.h   |  8 +++-
 tools/xenstore/xenstored_domain.c | 12 ++---
 3 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index d7c025616ead..fe9943113b9f 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -401,14 +401,14 @@ static struct node *read_node(struct connection *conn, const void *ctx,
 	/* Datalen, childlen, number of permissions */
 	hdr = (void *)data.dptr;
 	node->generation = hdr->generation;
-	node->num_perms = hdr->num_perms;
+	node->perms.num = hdr->num_perms;
 	node->datalen = hdr->datalen;
 	node->childlen = hdr->childlen;
 
 	/* Permissions are struct xs_permissions. */
-	node->perms = hdr->perms;
+	node->perms.p = hdr->perms;
 	/* Data is binary blob (usually ascii, no nul). */
-	node->data = node->perms + node->num_perms;
+	node->data = node->perms.p + node->perms.num;
 	/* Children is strings, nul separated. */
 	node->children = node->data + node->datalen;
 
@@ -425,7 +425,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
 	struct xs_tdb_record_hdr *hdr;
 
 	data.dsize = sizeof(*hdr)
-		+ node->num_perms*sizeof(node->perms[0])
+		+ node->perms.num * sizeof(node->perms.p[0])
 		+ node->datalen + node->childlen;
 
 	if (!no_quota_check && domain_is_unprivileged(conn) &&
@@ -437,12 +437,13 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
 	data.dptr = talloc_size(node, data.dsize);
 	hdr = (void *)data.dptr;
 	hdr->generation = node->generation;
-	hdr->num_perms = node->num_perms;
+	hdr->num_perms = node->perms.num;
 	hdr->datalen = node->datalen;
 	hdr->childlen = node->childlen;
 
-	memcpy(hdr->perms, node->perms, node->num_perms*sizeof(node->perms[0]));
-	p = hdr->perms + node->num_perms;
+	memcpy(hdr->perms, node->perms.p,
+	       node->perms.num * sizeof(*node->perms.p));
+	p = hdr->perms + node->perms.num;
 	memcpy(p, node->data, node->datalen);
 	p += node->datalen;
 	memcpy(p, node->children, node->childlen);
@@ -468,8 +469,7 @@ static int write_node(struct connection *conn, struct node *node,
 }
 
 static enum xs_perm_type perm_for_conn(struct connection *conn,
-				       struct xs_permissions *perms,
-				       unsigned int num)
+				       const struct node_perms *perms)
 {
 	unsigned int i;
 	enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
@@ -478,16 +478,16 @@ static enum xs_perm_type perm_for_conn(struct connection *conn,
 		mask &= ~XS_PERM_WRITE;
 
 	/* Owners and tools get it all... */
-	if (!domain_is_unprivileged(conn) || perms[0].id == conn->id
-                || (conn->target && perms[0].id == conn->target->id))
+	if (!domain_is_unprivileged(conn) || perms->p[0].id == conn->id
+                || (conn->target && perms->p[0].id == conn->target->id))
 		return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
 
-	for (i = 1; i < num; i++)
-		if (perms[i].id == conn->id
-                        || (conn->target && perms[i].id == conn->target->id))
-			return perms[i].perms & mask;
+	for (i = 1; i < perms->num; i++)
+		if (perms->p[i].id == conn->id
+                        || (conn->target && perms->p[i].id == conn->target->id))
+			return perms->p[i].perms & mask;
 
-	return perms[0].perms & mask;
+	return perms->p[0].perms & mask;
 }
 
 /*
@@ -534,7 +534,7 @@ static int ask_parents(struct connection *conn, const void *ctx,
 		return 0;
 	}
 
-	*perm = perm_for_conn(conn, node->perms, node->num_perms);
+	*perm = perm_for_conn(conn, &node->perms);
 	return 0;
 }
 
@@ -580,8 +580,7 @@ struct node *get_node(struct connection *conn,
 	node = read_node(conn, ctx, name);
 	/* If we don't have permission, we don't have node. */
 	if (node) {
-		if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
-		    != perm) {
+		if ((perm_for_conn(conn, &node->perms) & perm) != perm) {
 			errno = EACCES;
 			node = NULL;
 		}
@@ -757,16 +756,15 @@ const char *onearg(struct buffered_data *in)
 	return in->buffer;
 }
 
-static char *perms_to_strings(const void *ctx,
-			      struct xs_permissions *perms, unsigned int num,
+static char *perms_to_strings(const void *ctx, const struct node_perms *perms,
 			      unsigned int *len)
 {
 	unsigned int i;
 	char *strings = NULL;
 	char buffer[MAX_STRLEN(unsigned int) + 1];
 
-	for (*len = 0, i = 0; i < num; i++) {
-		if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer)))
+	for (*len = 0, i = 0; i < perms->num; i++) {
+		if (!xs_perm_to_string(&perms->p[i], buffer, sizeof(buffer)))
 			return NULL;
 
 		strings = talloc_realloc(ctx, strings, char,
@@ -945,13 +943,13 @@ static struct node *construct_node(struct connection *conn, const void *ctx,
 		goto nomem;
 
 	/* Inherit permissions, except unprivileged domains own what they create */
-	node->num_perms = parent->num_perms;
-	node->perms = talloc_memdup(node, parent->perms,
-				    node->num_perms * sizeof(node->perms[0]));
-	if (!node->perms)
+	node->perms.num = parent->perms.num;
+	node->perms.p = talloc_memdup(node, parent->perms.p,
+				      node->perms.num * sizeof(*node->perms.p));
+	if (!node->perms.p)
 		goto nomem;
 	if (domain_is_unprivileged(conn))
-		node->perms[0].id = conn->id;
+		node->perms.p[0].id = conn->id;
 
 	/* No children, no data */
 	node->children = node->data = NULL;
@@ -1228,7 +1226,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in)
 	if (!node)
 		return errno;
 
-	strings = perms_to_strings(node, node->perms, node->num_perms, &len);
+	strings = perms_to_strings(node, &node->perms, &len);
 	if (!strings)
 		return errno;
 
@@ -1239,13 +1237,12 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in)
 
 static int do_set_perms(struct connection *conn, struct buffered_data *in)
 {
-	unsigned int num;
-	struct xs_permissions *perms;
+	struct node_perms perms;
 	char *name, *permstr;
 	struct node *node;
 
-	num = xs_count_strings(in->buffer, in->used);
-	if (num < 2)
+	perms.num = xs_count_strings(in->buffer, in->used);
+	if (perms.num < 2)
 		return EINVAL;
 
 	/* First arg is node name. */
@@ -1256,21 +1253,21 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 		return errno;
 
 	permstr = in->buffer + strlen(in->buffer) + 1;
-	num--;
+	perms.num--;
 
-	perms = talloc_array(node, struct xs_permissions, num);
-	if (!perms)
+	perms.p = talloc_array(node, struct xs_permissions, perms.num);
+	if (!perms.p)
 		return ENOMEM;
-	if (!xs_strings_to_perms(perms, num, permstr))
+	if (!xs_strings_to_perms(perms.p, perms.num, permstr))
 		return errno;
 
 	/* Unprivileged domains may not change the owner. */
-	if (domain_is_unprivileged(conn) && perms[0].id != node->perms[0].id)
+	if (domain_is_unprivileged(conn) &&
+	    perms.p[0].id != node->perms.p[0].id)
 		return EPERM;
 
 	domain_entry_dec(conn, node);
 	node->perms = perms;
-	node->num_perms = num;
 	domain_entry_inc(conn, node);
 
 	if (write_node(conn, node, false))
@@ -1545,8 +1542,8 @@ static void manual_node(const char *name, const char *child)
 		barf_perror("Could not allocate initial node %s", name);
 
 	node->name = name;
-	node->perms = &perms;
-	node->num_perms = 1;
+	node->perms.p = &perms;
+	node->perms.num = 1;
 	node->children = (char *)child;
 	if (child)
 		node->childlen = strlen(child) + 1;
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 3cb1c235a101..193d93142636 100644
--- tools/xenstore/xenstored_core.h.orig
+++ tools/xenstore/xenstored_core.h
@@ -109,6 +109,11 @@ struct connection
 };
 extern struct list_head connections;
 
+struct node_perms {
+	unsigned int num;
+	struct xs_permissions *p;
+};
+
 struct node {
 	const char *name;
 
@@ -120,8 +125,7 @@ struct node {
 #define NO_GENERATION ~((uint64_t)0)
 
 	/* Permissions. */
-	unsigned int num_perms;
-	struct xs_permissions *perms;
+	struct node_perms perms;
 
 	/* Contents. */
 	unsigned int datalen;
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 0e2926e2a3d0..dc51cdfa9aa7 100644
--- tools/xenstore/xenstored_domain.c.orig
+++ tools/xenstore/xenstored_domain.c
@@ -657,12 +657,12 @@ void domain_entry_inc(struct connection *conn, struct node *node)
 	if (!conn)
 		return;
 
-	if (node->perms && node->perms[0].id != conn->id) {
+	if (node->perms.p && node->perms.p[0].id != conn->id) {
 		if (conn->transaction) {
 			transaction_entry_inc(conn->transaction,
-				node->perms[0].id);
+				node->perms.p[0].id);
 		} else {
-			d = find_domain_by_domid(node->perms[0].id);
+			d = find_domain_by_domid(node->perms.p[0].id);
 			if (d)
 				d->nbentry++;
 		}
@@ -683,12 +683,12 @@ void domain_entry_dec(struct connection *conn, struct node *node)
 	if (!conn)
 		return;
 
-	if (node->perms && node->perms[0].id != conn->id) {
+	if (node->perms.p && node->perms.p[0].id != conn->id) {
 		if (conn->transaction) {
 			transaction_entry_dec(conn->transaction,
-				node->perms[0].id);
+				node->perms.p[0].id);
 		} else {
-			d = find_domain_by_domid(node->perms[0].id);
+			d = find_domain_by_domid(node->perms.p[0].id);
 			if (d && d->nbentry)
 				d->nbentry--;
 		}
-- 
2.17.1

From cddf74031b3c8a108e8fd7db0bf56e9c2809d3e2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:45 +0200
Subject: [PATCH 09/10] tools/xenstore: allow special watches for privileged
 callers only

The special watches "@introduceDomain" and "@releaseDomain" should be
allowed for privileged callers only, as they allow to gain information
about presence of other guests on the host. So send watch events for
those watches via privileged connections only.

In order to allow for disaggregated setups where e.g. driver domains
need to make use of those special watches add support for calling
"set permissions" for those special nodes, too.

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 docs/misc/xenstore.txt            |  5 +++
 tools/xenstore/xenstored_core.c   | 27 ++++++++------
 tools/xenstore/xenstored_core.h   |  2 ++
 tools/xenstore/xenstored_domain.c | 60 +++++++++++++++++++++++++++++++
 tools/xenstore/xenstored_domain.h |  5 +++
 tools/xenstore/xenstored_watch.c  |  4 +++
 6 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/docs/misc/xenstore.txt b/docs/misc/xenstore.txt
index 6f8569d5760f..32969eb3fecd 100644
--- docs/misc/xenstore.txt.orig
+++ docs/misc/xenstore.txt
@@ -170,6 +170,9 @@ SET_PERMS		<path>|<perm-as-string>|+?
 		n<domid>	no access
 	See http://wiki.xen.org/wiki/XenBus section
 	`Permissions' for details of the permissions system.
+	It is possible to set permissions for the special watch paths
+	"@introduceDomain" and "@releaseDomain" to enable receiving those
+	watches in unprivileged domains.
 
 ---------- Watches ----------
 
@@ -194,6 +197,8 @@ WATCH			<wpath>|<token>|?
 	    @releaseDomain 	occurs on any domain crash or
 				shutdown, and also on RELEASE
 				and domain destruction
+	<wspecial> events are sent to privileged callers or explicitly
+	via SET_PERMS enabled domains only.
 
 	When a watch is first set up it is triggered once straight
 	away, with <path> equal to <wpath>.  Watches may be triggered
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index fe9943113b9f..720bec269dd3 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -468,8 +468,8 @@ static int write_node(struct connection *conn, struct node *node,
 	return write_node_raw(conn, &key, node, no_quota_check);
 }
 
-static enum xs_perm_type perm_for_conn(struct connection *conn,
-				       const struct node_perms *perms)
+enum xs_perm_type perm_for_conn(struct connection *conn,
+				const struct node_perms *perms)
 {
 	unsigned int i;
 	enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
@@ -1245,22 +1245,29 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 	if (perms.num < 2)
 		return EINVAL;
 
-	/* First arg is node name. */
-	/* We must own node to do this (tools can do this too). */
-	node = get_node_canonicalized(conn, in, in->buffer, &name,
-				      XS_PERM_WRITE | XS_PERM_OWNER);
-	if (!node)
-		return errno;
-
 	permstr = in->buffer + strlen(in->buffer) + 1;
 	perms.num--;
 
-	perms.p = talloc_array(node, struct xs_permissions, perms.num);
+	perms.p = talloc_array(in, struct xs_permissions, perms.num);
 	if (!perms.p)
 		return ENOMEM;
 	if (!xs_strings_to_perms(perms.p, perms.num, permstr))
 		return errno;
 
+	/* First arg is node name. */
+	if (strstarts(in->buffer, "@")) {
+		if (set_perms_special(conn, in->buffer, &perms))
+			return errno;
+		send_ack(conn, XS_SET_PERMS);
+		return 0;
+	}
+
+	/* We must own node to do this (tools can do this too). */
+	node = get_node_canonicalized(conn, in, in->buffer, &name,
+				      XS_PERM_WRITE | XS_PERM_OWNER);
+	if (!node)
+		return errno;
+
 	/* Unprivileged domains may not change the owner. */
 	if (domain_is_unprivileged(conn) &&
 	    perms.p[0].id != node->perms.p[0].id)
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 193d93142636..f3da6bbc943d 100644
--- tools/xenstore/xenstored_core.h.orig
+++ tools/xenstore/xenstored_core.h
@@ -165,6 +165,8 @@ struct node *get_node(struct connection *conn,
 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
 void check_store(void);
 void corrupt(struct connection *conn, const char *fmt, ...);
+enum xs_perm_type perm_for_conn(struct connection *conn,
+				const struct node_perms *perms);
 
 /* Is this a valid node name? */
 bool is_valid_nodename(const char *node);
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index dc51cdfa9aa7..7afabe0ae084 100644
--- tools/xenstore/xenstored_domain.c.orig
+++ tools/xenstore/xenstored_domain.c
@@ -41,6 +41,9 @@ static evtchn_port_t virq_port;
 
 xenevtchn_handle *xce_handle = NULL;
 
+static struct node_perms dom_release_perms;
+static struct node_perms dom_introduce_perms;
+
 struct domain
 {
 	struct list_head list;
@@ -589,6 +592,59 @@ void restore_existing_connections(void)
 {
 }
 
+static int set_dom_perms_default(struct node_perms *perms)
+{
+	perms->num = 1;
+	perms->p = talloc_array(NULL, struct xs_permissions, perms->num);
+	if (!perms->p)
+		return -1;
+	perms->p->id = 0;
+	perms->p->perms = XS_PERM_NONE;
+
+	return 0;
+}
+
+static struct node_perms *get_perms_special(const char *name)
+{
+	if (!strcmp(name, "@releaseDomain"))
+		return &dom_release_perms;
+	if (!strcmp(name, "@introduceDomain"))
+		return &dom_introduce_perms;
+	return NULL;
+}
+
+int set_perms_special(struct connection *conn, const char *name,
+		      struct node_perms *perms)
+{
+	struct node_perms *p;
+
+	p = get_perms_special(name);
+	if (!p)
+		return EINVAL;
+
+	if ((perm_for_conn(conn, p) & (XS_PERM_WRITE | XS_PERM_OWNER)) !=
+	    (XS_PERM_WRITE | XS_PERM_OWNER))
+		return EACCES;
+
+	p->num = perms->num;
+	talloc_free(p->p);
+	p->p = perms->p;
+	talloc_steal(NULL, perms->p);
+
+	return 0;
+}
+
+bool check_perms_special(const char *name, struct connection *conn)
+{
+	struct node_perms *p;
+
+	p = get_perms_special(name);
+	if (!p)
+		return false;
+
+	return perm_for_conn(conn, p) & XS_PERM_READ;
+}
+
 static int dom0_init(void) 
 { 
 	evtchn_port_t port;
@@ -610,6 +666,10 @@ static int dom0_init(void)
 
 	xenevtchn_notify(xce_handle, dom0->port);
 
+	if (set_dom_perms_default(&dom_release_perms) ||
+	    set_dom_perms_default(&dom_introduce_perms))
+		return -1;
+
 	return 0; 
 }
 
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
index 56ae01597475..259183962a9c 100644
--- tools/xenstore/xenstored_domain.h.orig
+++ tools/xenstore/xenstored_domain.h
@@ -65,6 +65,11 @@ void domain_watch_inc(struct connection *conn);
 void domain_watch_dec(struct connection *conn);
 int domain_watch(struct connection *conn);
 
+/* Special node permission handling. */
+int set_perms_special(struct connection *conn, const char *name,
+		      struct node_perms *perms);
+bool check_perms_special(const char *name, struct connection *conn);
+
 /* Write rate limiting */
 
 #define WRL_FACTOR   1000 /* for fixed-point arithmetic */
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index 3836675459fa..f4e289362eb6 100644
--- tools/xenstore/xenstored_watch.c.orig
+++ tools/xenstore/xenstored_watch.c
@@ -133,6 +133,10 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
 
 	/* Create an event for each watch. */
 	list_for_each_entry(i, &connections, list) {
+		/* introduce/release domain watches */
+		if (check_special_event(name) && !check_perms_special(name, i))
+			continue;
+
 		list_for_each_entry(watch, &i->watches, list) {
 			if (exact) {
 				if (streq(name, watch->node))
-- 
2.17.1

From e57b7687b43b033fe45e755e285efbe67bc71921 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 11 Jun 2020 16:12:46 +0200
Subject: [PATCH 10/10] tools/xenstore: avoid watch events for nodes without
 access

Today watch events are sent regardless of the access rights of the
node the event is sent for. This enables any guest to e.g. setup a
watch for "/" in order to have a detailed record of all Xenstore
modifications.

Modify that by sending only watch events for nodes that the watcher
has a chance to see otherwise (either via direct reads or by querying
the children of a node). This includes cases where the visibility of
a node for a watcher is changing (permissions being removed).

This is part of XSA-115.

Signed-off-by: Juergen Gross <jgross@suse.com>
[julieng: Handle rebase conflict]
Reviewed-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 tools/xenstore/xenstored_core.c        | 28 +++++-----
 tools/xenstore/xenstored_core.h        | 15 ++++--
 tools/xenstore/xenstored_domain.c      |  6 +--
 tools/xenstore/xenstored_transaction.c | 21 +++++++-
 tools/xenstore/xenstored_watch.c       | 75 +++++++++++++++++++-------
 tools/xenstore/xenstored_watch.h       |  2 +-
 6 files changed, 104 insertions(+), 43 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index 720bec269dd3..1c2845454560 100644
--- tools/xenstore/xenstored_core.c.orig
+++ tools/xenstore/xenstored_core.c
@@ -358,8 +358,8 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx,
  * If it fails, returns NULL and sets errno.
  * Temporary memory allocations will be done with ctx.
  */
-static struct node *read_node(struct connection *conn, const void *ctx,
-			      const char *name)
+struct node *read_node(struct connection *conn, const void *ctx,
+		       const char *name)
 {
 	TDB_DATA key, data;
 	struct xs_tdb_record_hdr *hdr;
@@ -494,7 +494,7 @@ enum xs_perm_type perm_for_conn(struct connection *conn,
  * Get name of node parent.
  * Temporary memory allocations are done with ctx.
  */
-static char *get_parent(const void *ctx, const char *node)
+char *get_parent(const void *ctx, const char *node)
 {
 	char *parent;
 	char *slash = strrchr(node + 1, '/');
@@ -566,10 +566,10 @@ static int errno_from_parents(struct connection *conn, const void *ctx,
  * If it fails, returns NULL and sets errno.
  * Temporary memory allocations are done with ctx.
  */
-struct node *get_node(struct connection *conn,
-		      const void *ctx,
-		      const char *name,
-		      enum xs_perm_type perm)
+static struct node *get_node(struct connection *conn,
+			     const void *ctx,
+			     const char *name,
+			     enum xs_perm_type perm)
 {
 	struct node *node;
 
@@ -1056,7 +1056,7 @@ static int do_write(struct connection *conn, struct buffered_data *in)
 			return errno;
 	}
 
-	fire_watches(conn, in, name, false);
+	fire_watches(conn, in, name, node, false, NULL);
 	send_ack(conn, XS_WRITE);
 
 	return 0;
@@ -1078,7 +1078,7 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in)
 		node = create_node(conn, in, name, NULL, 0);
 		if (!node)
 			return errno;
-		fire_watches(conn, in, name, false);
+		fire_watches(conn, in, name, node, false, NULL);
 	}
 	send_ack(conn, XS_MKDIR);
 
@@ -1141,7 +1141,7 @@ static int delete_node(struct connection *conn, const void *ctx,
 		talloc_free(name);
 	}
 
-	fire_watches(conn, ctx, node->name, true);
+	fire_watches(conn, ctx, node->name, node, true, NULL);
 	delete_node_single(conn, node);
 	delete_child(conn, parent, basename(node->name));
 	talloc_free(node);
@@ -1165,13 +1165,14 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
 	parent = read_node(conn, ctx, parentname);
 	if (!parent)
 		return (errno == ENOMEM) ? ENOMEM : EINVAL;
+	node->parent = parent;
 
 	/*
 	 * Fire the watches now, when we can still see the node permissions.
 	 * This fine as we are single threaded and the next possible read will
 	 * be handled only after the node has been really removed.
 	 */
-	fire_watches(conn, ctx, name, false);
+	fire_watches(conn, ctx, name, node, false, NULL);
 	return delete_node(conn, ctx, parent, node);
 }
 
@@ -1237,7 +1238,7 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in)
 
 static int do_set_perms(struct connection *conn, struct buffered_data *in)
 {
-	struct node_perms perms;
+	struct node_perms perms, old_perms;
 	char *name, *permstr;
 	struct node *node;
 
@@ -1273,6 +1274,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 	    perms.p[0].id != node->perms.p[0].id)
 		return EPERM;
 
+	old_perms = node->perms;
 	domain_entry_dec(conn, node);
 	node->perms = perms;
 	domain_entry_inc(conn, node);
@@ -1280,7 +1282,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
 	if (write_node(conn, node, false))
 		return errno;
 
-	fire_watches(conn, in, name, false);
+	fire_watches(conn, in, name, node, false, &old_perms);
 	send_ack(conn, XS_SET_PERMS);
 
 	return 0;
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index f3da6bbc943d..e050b27cbdde 100644
--- tools/xenstore/xenstored_core.h.orig
+++ tools/xenstore/xenstored_core.h
@@ -152,15 +152,17 @@ void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
 /* Canonicalize this path if possible. */
 char *canonicalize(struct connection *conn, const void *ctx, const char *node);
 
+/* Get access permissions. */
+enum xs_perm_type perm_for_conn(struct connection *conn,
+				const struct node_perms *perms);
+
 /* Write a node to the tdb data base. */
 int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
 		   bool no_quota_check);
 
-/* Get this node, checking we have permissions. */
-struct node *get_node(struct connection *conn,
-		      const void *ctx,
-		      const char *name,
-		      enum xs_perm_type perm);
+/* Get a node from the tdb data base. */
+struct node *read_node(struct connection *conn, const void *ctx,
+		       const char *name);
 
 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
 void check_store(void);
@@ -171,6 +173,9 @@ enum xs_perm_type perm_for_conn(struct connection *conn,
 /* Is this a valid node name? */
 bool is_valid_nodename(const char *node);
 
+/* Get name of parent node. */
+char *get_parent(const void *ctx, const char *node);
+
 /* Tracing infrastructure. */
 void trace_create(const void *data, const char *type);
 void trace_destroy(const void *data, const char *type);
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 7afabe0ae084..711a11b18ad6 100644
--- tools/xenstore/xenstored_domain.c.orig
+++ tools/xenstore/xenstored_domain.c
@@ -206,7 +206,7 @@ static int destroy_domain(void *_domain)
 			unmap_interface(domain->interface);
 	}
 
-	fire_watches(NULL, domain, "@releaseDomain", false);
+	fire_watches(NULL, domain, "@releaseDomain", NULL, false, NULL);
 
 	wrl_domain_destroy(domain);
 
@@ -244,7 +244,7 @@ static void domain_cleanup(void)
 	}
 
 	if (notify)
-		fire_watches(NULL, NULL, "@releaseDomain", false);
+		fire_watches(NULL, NULL, "@releaseDomain", NULL, false, NULL);
 }
 
 /* We scan all domains rather than use the information given here. */
@@ -410,7 +410,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in)
 		/* Now domain belongs to its connection. */
 		talloc_steal(domain->conn, domain);
 
-		fire_watches(NULL, in, "@introduceDomain", false);
+		fire_watches(NULL, in, "@introduceDomain", NULL, false, NULL);
 	} else if ((domain->mfn == mfn) && (domain->conn != conn)) {
 		/* Use XS_INTRODUCE for recreating the xenbus event-channel. */
 		if (domain->port)
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
index e87897573469..a7d8c5d475ec 100644
--- tools/xenstore/xenstored_transaction.c.orig
+++ tools/xenstore/xenstored_transaction.c
@@ -114,6 +114,9 @@ struct accessed_node
 	/* Generation count (or NO_GENERATION) for conflict checking. */
 	uint64_t generation;
 
+	/* Original node permissions. */
+	struct node_perms perms;
+
 	/* Generation count checking required? */
 	bool check_gen;
 
@@ -260,6 +263,15 @@ int access_node(struct connection *conn, struct node *node,
 		i->node = talloc_strdup(i, node->name);
 		if (!i->node)
 			goto nomem;
+		if (node->generation != NO_GENERATION && node->perms.num) {
+			i->perms.p = talloc_array(i, struct xs_permissions,
+						  node->perms.num);
+			if (!i->perms.p)
+				goto nomem;
+			i->perms.num = node->perms.num;
+			memcpy(i->perms.p, node->perms.p,
+			       i->perms.num * sizeof(*i->perms.p));
+		}
 
 		introduce = true;
 		i->ta_node = false;
@@ -368,9 +380,14 @@ static int finalize_transaction(struct connection *conn,
 				talloc_free(data.dptr);
 				if (ret)
 					goto err;
-			} else if (tdb_delete(tdb_ctx, key))
+				fire_watches(conn, trans, i->node, NULL, false,
+					     i->perms.p ? &i->perms : NULL);
+			} else {
+				fire_watches(conn, trans, i->node, NULL, false,
+					     i->perms.p ? &i->perms : NULL);
+				if (tdb_delete(tdb_ctx, key))
 					goto err;
-			fire_watches(conn, trans, i->node, false);
+			}
 		}
 
 		if (i->ta_node && tdb_delete(tdb_ctx, ta_key))
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
index f4e289362eb6..71c108ea99f1 100644
--- tools/xenstore/xenstored_watch.c.orig
+++ tools/xenstore/xenstored_watch.c
@@ -85,22 +85,6 @@ static void add_event(struct connection *conn,
 	unsigned int len;
 	char *data;
 
-	if (!check_special_event(name)) {
-		/* Can this conn load node, or see that it doesn't exist? */
-		struct node *node = get_node(conn, ctx, name, XS_PERM_READ);
-		/*
-		 * XXX We allow EACCES here because otherwise a non-dom0
-		 * backend driver cannot watch for disappearance of a frontend
-		 * xenstore directory. When the directory disappears, we
-		 * revert to permissions of the parent directory for that path,
-		 * which will typically disallow access for the backend.
-		 * But this breaks device-channel teardown!
-		 * Really we should fix this better...
-		 */
-		if (!node && errno != ENOENT && errno != EACCES)
-			return;
-	}
-
 	if (watch->relative_path) {
 		name += strlen(watch->relative_path);
 		if (*name == '/') /* Could be "" */
@@ -117,12 +101,60 @@ static void add_event(struct connection *conn,
 	talloc_free(data);
 }
 
+/*
+ * Check permissions of a specific watch to fire:
+ * Either the node itself or its parent have to be readable by the connection
+ * the watch has been setup for. In case a watch event is created due to
+ * changed permissions we need to take the old permissions into account, too.
+ */
+static bool watch_permitted(struct connection *conn, const void *ctx,
+			    const char *name, struct node *node,
+			    struct node_perms *perms)
+{
+	enum xs_perm_type perm;
+	struct node *parent;
+	char *parent_name;
+
+	if (perms) {
+		perm = perm_for_conn(conn, perms);
+		if (perm & XS_PERM_READ)
+			return true;
+	}
+
+	if (!node) {
+		node = read_node(conn, ctx, name);
+		if (!node)
+			return false;
+	}
+
+	perm = perm_for_conn(conn, &node->perms);
+	if (perm & XS_PERM_READ)
+		return true;
+
+	parent = node->parent;
+	if (!parent) {
+		parent_name = get_parent(ctx, node->name);
+		if (!parent_name)
+			return false;
+		parent = read_node(conn, ctx, parent_name);
+		if (!parent)
+			return false;
+	}
+
+	perm = perm_for_conn(conn, &parent->perms);
+
+	return perm & XS_PERM_READ;
+}
+
 /*
  * Check whether any watch events are to be sent.
  * Temporary memory allocations are done with ctx.
+ * We need to take the (potential) old permissions of the node into account
+ * as a watcher losing permissions to access a node should receive the
+ * watch event, too.
  */
 void fire_watches(struct connection *conn, const void *ctx, const char *name,
-		  bool exact)
+		  struct node *node, bool exact, struct node_perms *perms)
 {
 	struct connection *i;
 	struct watch *watch;
@@ -134,8 +166,13 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
 	/* Create an event for each watch. */
 	list_for_each_entry(i, &connections, list) {
 		/* introduce/release domain watches */
-		if (check_special_event(name) && !check_perms_special(name, i))
-			continue;
+		if (check_special_event(name)) {
+			if (!check_perms_special(name, i))
+				continue;
+		} else {
+			if (!watch_permitted(i, ctx, name, node, perms))
+				continue;
+		}
 
 		list_for_each_entry(watch, &i->watches, list) {
 			if (exact) {
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
index 1b3c80d3dda1..03094374f379 100644
--- tools/xenstore/xenstored_watch.h.orig
+++ tools/xenstore/xenstored_watch.h
@@ -26,7 +26,7 @@ int do_unwatch(struct connection *conn, struct buffered_data *in);
 
 /* Fire all watches: !exact means all the children are affected (ie. rm). */
 void fire_watches(struct connection *conn, const void *tmp, const char *name,
-		  bool exact);
+		  struct node *node, bool exact, struct node_perms *perms);
 
 void conn_delete_all_watches(struct connection *conn);
 
-- 
2.17.1

