dovecot-2.1: director: Implemented ability to remove directors f...

dovecot at dovecot.org dovecot at dovecot.org
Sat May 19 21:18:29 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.1/rev/42cca8a1d179
changeset: 14535:42cca8a1d179
user:      Timo Sirainen <tss at iki.fi>
date:      Sat May 19 21:18:04 2012 +0300
description:
director: Implemented ability to remove directors from a running ring.
Also added doveadm command for adding a new director to a running ring.

diffstat:

 src/director/director-connection.c |   44 ++++++++++++--
 src/director/director-host.c       |   35 +++++++++++-
 src/director/director-host.h       |    9 ++-
 src/director/director.c            |  110 ++++++++++++++++++++++++++++++++++--
 src/director/director.h            |   15 ++++-
 src/director/doveadm-connection.c  |   60 +++++++++++++++++++-
 src/director/main.c                |    5 +-
 src/doveadm/doveadm-director.c     |   67 ++++++++++++++++++++++
 8 files changed, 326 insertions(+), 19 deletions(-)

diffs (truncated from 614 to 300 lines):

diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-connection.c
--- a/src/director/director-connection.c	Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-connection.c	Sat May 19 21:18:04 2012 +0300
@@ -383,7 +383,12 @@
 	   elsewhere with CONNECT. however, before disconnecting it verify
 	   first that our left side is actually still functional.
 	*/
+	i_assert(conn->host == NULL);
 	conn->host = director_host_get(dir, &ip, port);
+	/* the host shouldn't be removed at this point, but if for some
+	   reason it is we don't want to crash */
+	conn->host->removed = FALSE;
+	director_host_ref(conn->host);
 	/* make sure we don't keep old sequence values across restarts */
 	conn->host->last_seq = 0;
 
@@ -587,6 +592,10 @@
 			/* ignore updates to ourself */
 			return TRUE;
 		}
+		if (host->removed) {
+			/* ignore re-adds of removed directors */
+			return TRUE;
+		}
 
 		/* already have this. just reset its last_network_failure
 		   timestamp, since it might be up now. */
@@ -598,18 +607,32 @@
 		}
 	} else {
 		/* save the director and forward it */
-		director_host_add(conn->dir, &ip, port);
+		host = director_host_add(conn->dir, &ip, port);
 		forward = TRUE;
 	}
 	if (forward) {
-		director_update_send(conn->dir,
-			director_connection_get_host(conn),
-			t_strdup_printf("DIRECTOR\t%s\t%u\n",
-					net_ip2addr(&ip), port));
+		director_notify_ring_added(host,
+			director_connection_get_host(conn));
 	}
 	return TRUE;
 }
 
+static bool director_cmd_director_remove(struct director_connection *conn,
+					 const char *const *args)
+{
+	struct director_host *host;
+	struct ip_addr ip;
+	unsigned int port;
+
+	if (!director_args_parse_ip_port(conn, args, &ip, &port))
+		return FALSE;
+
+	host = director_host_lookup(conn->dir, &ip, port);
+	if (host != NULL && !host->removed)
+		director_ring_remove(host, director_connection_get_host(conn));
+	return TRUE;
+}
+
 static bool
 director_cmd_host_hand_start(struct director_connection *conn,
 			     const char *const *args)
@@ -659,7 +682,7 @@
 	*_args = args + 3;
 
 	host = director_host_lookup(conn->dir, &ip, port);
-	if (host == NULL) {
+	if (host == NULL || host->removed) {
 		/* director is already gone, but we can't be sure if this
 		   command was sent everywhere. re-send it as if it was from
 		   ourself. */
@@ -1191,6 +1214,8 @@
 		return director_cmd_user_killed_everywhere(conn, args);
 	if (strcmp(cmd, "DIRECTOR") == 0)
 		return director_cmd_director(conn, args);
+	if (strcmp(cmd, "DIRECTOR-REMOVE") == 0)
+		return director_cmd_director_remove(conn, args);
 	if (strcmp(cmd, "SYNC") == 0)
 		return director_connection_sync(conn, args);
 	if (strcmp(cmd, "CONNECT") == 0)
@@ -1279,6 +1304,8 @@
 	struct director_host *const *hostp;
 
 	array_foreach(&conn->dir->dir_hosts, hostp) {
+		if ((*hostp)->removed)
+			continue;
 		str_printfa(str, "DIRECTOR\t%s\t%u\n",
 			    net_ip2addr(&(*hostp)->ip), (*hostp)->port);
 	}
@@ -1433,12 +1460,15 @@
 {
 	struct director_connection *conn;
 
+	i_assert(!host->removed);
+
 	/* make sure we don't keep old sequence values across restarts */
 	host->last_seq = 0;
 
 	conn = director_connection_init_common(dir, fd);
 	conn->name = i_strdup_printf("%s/out", host->name);
 	conn->host = host;
+	director_host_ref(host);
 	conn->io = io_add(conn->fd, IO_WRITE,
 			  director_connection_connected, conn);
 	return conn;
@@ -1471,6 +1501,8 @@
 	}
 	if (dir->right == conn)
 		dir->right = NULL;
+	if (conn->host != NULL)
+		director_host_unref(conn->host);
 
 	if (conn->user_iter != NULL)
 		user_directory_iter_deinit(&conn->user_iter);
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-host.c
--- a/src/director/director-host.c	Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-host.c	Sat May 19 21:18:04 2012 +0300
@@ -29,6 +29,8 @@
 	struct director_host *host;
 
 	host = i_new(struct director_host, 1);
+	host->dir = dir;
+	host->refcount = 1;
 	host->ip = *ip;
 	host->port = port;
 	host->name = i_strdup_printf("%s:%u", net_ip2addr(ip), port);
@@ -41,8 +43,39 @@
 	return host;
 }
 
-void director_host_free(struct director_host *host)
+void director_host_free(struct director_host **_host)
 {
+	struct director_host *host = *_host;
+
+	i_assert(host->refcount == 1);
+
+	*_host = NULL;
+	director_host_unref(host);
+}
+
+void director_host_ref(struct director_host *host)
+{
+	i_assert(host->refcount > 0);
+	host->refcount++;
+}
+
+void director_host_unref(struct director_host *host)
+{
+	struct director_host *const *hosts;
+	unsigned int i, count;
+
+	i_assert(host->refcount > 0);
+
+	if (--host->refcount > 0)
+		return;
+
+	hosts = array_get(&host->dir->dir_hosts, &count);
+	for (i = 0; i < count; i++) {
+		if (hosts[i] == host) {
+			array_delete(&host->dir->dir_hosts, i, 1);
+			break;
+		}
+	}
 	i_free(host->name);
 	i_free(host);
 }
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-host.h
--- a/src/director/director-host.h	Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-host.h	Sat May 19 21:18:04 2012 +0300
@@ -6,6 +6,9 @@
 struct director;
 
 struct director_host {
+	struct director *dir;
+	int refcount;
+
 	struct ip_addr ip;
 	unsigned int port;
 
@@ -22,12 +25,16 @@
 	time_t last_protocol_failure;
 	/* we are this director */
 	unsigned int self:1;
+	unsigned int removed:1;
 };
 
 struct director_host *
 director_host_add(struct director *dir, const struct ip_addr *ip,
 		  unsigned int port);
-void director_host_free(struct director_host *host);
+void director_host_free(struct director_host **host);
+
+void director_host_ref(struct director_host *host);
+void director_host_unref(struct director_host *host);
 
 struct director_host *
 director_host_get(struct director *dir, const struct ip_addr *ip,
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director.c
--- a/src/director/director.c	Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director.c	Sat May 19 21:18:04 2012 +0300
@@ -20,6 +20,7 @@
 #define DIRECTOR_SYNC_TIMEOUT_MSECS (5*1000)
 #define DIRECTOR_RING_MIN_WAIT_SECS 20
 #define DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS 1000
+#define DIRECTOR_DELAYED_DIR_REMOVE_MSECS (1000*30)
 
 static bool director_is_self_ip_set(struct director *dir)
 {
@@ -127,8 +128,8 @@
 static struct director_host *
 director_get_preferred_right_host(struct director *dir)
 {
-	struct director_host *const *hosts;
-	unsigned int count, self_idx;
+	struct director_host *const *hosts, *host;
+	unsigned int i, count, self_idx;
 
 	hosts = array_get(&dir->dir_hosts, &count);
 	if (count == 1) {
@@ -137,7 +138,13 @@
 	}
 
 	self_idx = director_find_self_idx(dir);
-	return hosts[(self_idx + 1) % count];
+	for (i = 0; i < count; i++) {
+		host = hosts[(self_idx + i + 1) % count];
+		if (!host->removed)
+			return host;
+	}
+	/* self, with some removed hosts */
+	return NULL;
 }
 
 static bool director_wait_for_others(struct director *dir)
@@ -177,6 +184,9 @@
 	for (i = 1; i < count; i++) {
 		unsigned int idx = (self_idx + i) % count;
 
+		if (hosts[idx]->removed)
+			continue;
+
 		if (hosts[idx]->last_network_failure +
 		    DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
 			/* connection failed recently, don't try retrying here */
@@ -408,6 +418,79 @@
 		director_connection_uncork(*connp);
 }
 
+void director_notify_ring_added(struct director_host *added_host,
+				struct director_host *src)
+{
+	const char *cmd;
+
+	cmd = t_strdup_printf("DIRECTOR\t%s\t%u\n",
+			      net_ip2addr(&added_host->ip), added_host->port);
+	director_update_send(added_host->dir, src, cmd);
+}
+
+static void director_delayed_dir_remove_timeout(struct director *dir)
+{
+	struct director_host *const *hosts, *host;
+	unsigned int i, count;
+
+	timeout_remove(&dir->to_remove_dirs);
+
+	hosts = array_get(&dir->dir_hosts, &count);
+	for (i = 0; i < count; ) {
+		if (hosts[i]->removed) {
+			host = hosts[i];
+			director_host_free(&host);
+			hosts = array_get(&dir->dir_hosts, &count);
+		} else {
+			i++;
+		}
+	}
+}
+
+void director_ring_remove(struct director_host *removed_host,
+			  struct director_host *src)
+{
+	struct director *dir = removed_host->dir;
+	struct director_connection *const *conns, *conn;
+	unsigned int i, count;
+	const char *cmd;
+
+	if (removed_host->self) {
+		/* others will just disconnect us */
+		return;
+	}
+
+	/* mark the host as removed and fully remove it later. this delay is
+	   needed, because the removal may trigger director reconnections,
+	   which may send the director back and we don't want to re-add it */
+	removed_host->removed = TRUE;
+	if (dir->to_remove_dirs == NULL) {
+		dir->to_remove_dirs =


More information about the dovecot-cvs mailing list