dovecot-2.0: director: A lot of fixes.

dovecot at dovecot.org dovecot at dovecot.org
Fri Jun 18 21:21:55 EEST 2010


details:   http://hg.dovecot.org/dovecot-2.0/rev/f0fb8151c6b4
changeset: 11585:f0fb8151c6b4
user:      Timo Sirainen <tss at iki.fi>
date:      Fri Jun 18 19:21:50 2010 +0100
description:
director: A lot of fixes.

diffstat:

 src/director/director-connection.c |  81 +++++++++++++++++++++++++++++-----------
 src/director/director-connection.h |   2 +
 src/director/director-request.c    |   6 ++-
 src/director/director.c            |  22 ++++++++++-
 src/director/director.h            |   7 +---
 src/director/doveadm-connection.c  |   5 +-
 src/director/mail-host.c           |   4 +-
 src/director/main.c                |   3 +-
 8 files changed, 91 insertions(+), 39 deletions(-)

diffs (truncated from 355 to 300 lines):

diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/director-connection.c
--- a/src/director/director-connection.c	Fri Jun 18 19:20:38 2010 +0100
+++ b/src/director/director-connection.c	Fri Jun 18 19:21:50 2010 +0100
@@ -49,6 +49,7 @@
 	unsigned int me_received:1;
 	unsigned int handshake_received:1;
 	unsigned int ignore_host_events:1;
+	unsigned int handshake_sending_hosts:1;
 };
 
 static void director_connection_ping(struct director_connection *conn);
@@ -176,12 +177,17 @@
 			net_ip2addr(&user->host->ip),
 			net_ip2addr(&host->ip));
 
-		/* change the host anyway. we'll also need to remove the user
-		   from the old host's user_count, because we can't keep track
-		   of the user for more than one host */
-		user->host->user_count--;
-		user->host = host;
-		user->host->user_count++;
+		/* we want all the directors to redirect the user to same
+		   server, but we don't want two directors fighting over which
+		   server it belongs to, so always use the lower IP address */
+		if (net_ip_cmp(&user->host->ip, &host->ip) > 0) {
+			/* change the host. we'll also need to remove the user
+			   from the old host's user_count, because we can't
+			   keep track of the user for more than one host */
+			user->host->user_count--;
+			user->host = host;
+			user->host->user_count++;
+		}
 		ret = TRUE;
 	}
 	*user_r = user;
@@ -265,6 +271,7 @@
 		/* ignore whatever remote sends */
 		conn->ignore_host_events = TRUE;
 	}
+	conn->handshake_sending_hosts = TRUE;
 	return TRUE;
 }
 
@@ -285,6 +292,7 @@
 	if (conn->ignore_host_events) {
 		/* remote is sending hosts in a handshake, but it doesn't have
 		   a completed ring and we do. */
+		i_assert(conn->handshake_sending_hosts);
 		return TRUE;
 	}
 
@@ -297,9 +305,8 @@
 	}
 
 	if (update) {
-		/* FIXME: 1) shouldn't be unconditional, 2) if we're not
-		   handshaking, we should do SYNC before making it visible */
-		host->vhost_count = vhost_count;
+		mail_host_set_vhost_count(conn->dir->mail_hosts,
+					  host, vhost_count);
 		director_update_host(conn->dir, conn->host, host);
 	}
 	return TRUE;
@@ -365,7 +372,8 @@
 		}
 	}
 
-	if (dir->left != NULL && dir->right != NULL) {
+	if (dir->left != NULL && dir->right != NULL &&
+	    dir->left->handshake_received && dir->right->handshake_received) {
 		/* we're connected to both directors. see if the ring is
 		   finished by sending a SYNC. if we get it back, it's done. */
 		dir->sync_seq = ++dir->self_host->last_seq;
@@ -424,20 +432,28 @@
 	/* only incoming connections get DIRECTOR and HOST lists */
 	if (conn->in && strcmp(cmd, "DIRECTOR") == 0 && conn->me_received)
 		return director_cmd_director(conn, args);
-	if (conn->in && strcmp(cmd, "HOST") == 0 && conn->me_received)
+
+	if (strcmp(cmd, "HOST") == 0) {
+		/* allow hosts from all connections always,
+		   this could be an host update */
 		return director_cmd_host(conn, args);
-	if (strcmp(cmd, "HOST-HAND-START") == 0)
-		return director_cmd_host_hand_start(conn, args);
-	if (strcmp(cmd, "HOST-HAND-END") == 0) {
-		conn->ignore_host_events = TRUE;
+	}
+	if (conn->handshake_sending_hosts &&
+	    strcmp(cmd, "HOST-HAND-END") == 0) {
+		conn->ignore_host_events = FALSE;
+		conn->handshake_sending_hosts = FALSE;
 		return TRUE;
 	}
+	if (conn->in && strcmp(cmd, "HOST-HAND-START") == 0 &&
+	    conn->me_received)
+		return director_cmd_host_hand_start(conn, args);
 
 	/* only incoming connections get a USER list */
 	if (conn->in && strcmp(cmd, "USER") == 0 && conn->me_received)
 		return director_handshake_cmd_user(conn, args);
 	/* both get DONE */
-	if (strcmp(cmd, "DONE") == 0 && !conn->handshake_received) {
+	if (strcmp(cmd, "DONE") == 0 && !conn->handshake_received &&
+	    !conn->handshake_sending_hosts) {
 		director_handshake_cmd_done(conn);
 		return TRUE;
 	}
@@ -476,6 +492,7 @@
 static bool director_connection_sync(struct director_connection *conn,
 				     const char *const *args, const char *line)
 {
+	struct director *dir = conn->dir;
 	struct director_host *host;
 	struct ip_addr ip;
 	unsigned int port, seq;
@@ -489,26 +506,39 @@
 
 	/* find the originating director. if we don't see it, it was already
 	   removed and we can ignore this sync. */
-	host = director_host_lookup(conn->dir, &ip, port);
+	host = director_host_lookup(dir, &ip, port);
 	if (host == NULL)
 		return TRUE;
 
 	if (host->self) {
-		if (conn->dir->sync_seq != seq) {
+		if (dir->sync_seq != seq) {
 			/* stale SYNC event */
 			return TRUE;
 		}
-		if (conn->dir->ring_handshaked)
+		if (!dir->ring_handshaked) {
+			/* the ring is handshaked */
+			director_set_ring_handshaked(dir);
 			return TRUE;
+		}
 
-		/* the ring is handshaked */
-		director_set_ring_handshaked(conn->dir);
+		if (dir->ring_synced) {
+			i_error("Received SYNC from %s (seq=%u) "
+				"while already synced", conn->name, seq);
+			return TRUE;
+		}
+
+		if (dir->debug) {
+			i_debug("Ring is synced (%s sent seq=%u)",
+				conn->name, seq);
+		}
+		dir->ring_synced = TRUE;
+		director_set_state_changed(dir);
 		return TRUE;
 	}
 
 	/* forward it to the connection on right */
-	if (conn->dir->right != NULL) {
-		director_connection_send(conn->dir->right,
+	if (dir->right != NULL) {
+		director_connection_send(dir->right,
 					 t_strconcat(line, "\n", NULL));
 	}
 	return TRUE;
@@ -884,3 +914,8 @@
 				    director_connection_ping_timeout, conn);
 	director_connection_send(conn, "PING\n");
 }
+
+const char *director_connection_get_name(struct director_connection *conn)
+{
+	return conn->name;
+}
diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/director-connection.h
--- a/src/director/director-connection.h	Fri Jun 18 19:20:38 2010 +0100
+++ b/src/director/director-connection.h	Fri Jun 18 19:21:50 2010 +0100
@@ -17,4 +17,6 @@
 				     struct director_host *skip_host,
 				     const char *data);
 
+const char *director_connection_get_name(struct director_connection *conn);
+
 #endif
diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/director-request.c
--- a/src/director/director-request.c	Fri Jun 18 19:20:38 2010 +0100
+++ b/src/director/director-request.c	Fri Jun 18 19:21:50 2010 +0100
@@ -75,7 +75,7 @@
 	if (!dir->ring_handshaked) {
 		/* delay requests until ring handshaking is complete */
 		if (!dir->ring_handshake_warning_sent) {
-			i_warning("Delaying connections until all "
+			i_warning("Delaying requests until all "
 				  "directors have connected");
 			dir->ring_handshake_warning_sent = TRUE;
 		}
@@ -86,8 +86,10 @@
 	if (user != NULL)
 		user_directory_refresh(dir->users, user);
 	else {
-		if (array_count(&dir->desynced_host_changes) != 0) {
+		if (!dir->ring_synced) {
 			/* delay adding new users until ring is again synced */
+			if (dir->debug)
+				i_debug("Delaying request until ring is synced");
 			return FALSE;
 		}
 		host = mail_host_get_by_hash(dir->mail_hosts,
diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/director.c
--- a/src/director/director.c	Fri Jun 18 19:20:38 2010 +0100
+++ b/src/director/director.c	Fri Jun 18 19:21:50 2010 +0100
@@ -137,9 +137,26 @@
 		i_debug("Director ring handshaked");
 
 	dir->ring_handshaked = TRUE;
+	dir->ring_synced = TRUE;
 	director_set_state_changed(dir);
 }
 
+static void director_sync(struct director *dir)
+{
+	/* we're synced again, once we receive this SYNC back */
+	dir->sync_seq++;
+	dir->ring_synced = FALSE;
+
+	if (dir->debug) {
+		i_debug("Ring is desynced (seq=%u, sending SYNC to %s)",
+			dir->sync_seq, director_connection_get_name(dir->right));
+	}
+
+	director_connection_send(dir->right, t_strdup_printf(
+		"SYNC\t%s\t%u\t%u\n", net_ip2addr(&dir->self_ip),
+		dir->self_port, dir->sync_seq));
+}
+
 void director_update_host(struct director *dir, struct director_host *src,
 			  struct mail_host *host)
 {
@@ -147,6 +164,7 @@
 
 	director_update_send(dir, src, t_strdup_printf(
 		"HOST\t%s\t%u\n", net_ip2addr(&host->ip), host->vhost_count));
+	director_sync(dir);
 }
 
 void director_remove_host(struct director *dir, struct director_host *src,
@@ -156,6 +174,7 @@
 		"HOST-REMOVE\t%s\n", net_ip2addr(&host->ip)));
 	user_directory_remove_host(dir->users, host);
 	mail_host_remove(dir->mail_hosts, host);
+	director_sync(dir);
 }
 
 void director_flush_host(struct director *dir, struct director_host *src,
@@ -164,6 +183,7 @@
 	director_update_send(dir, src, t_strdup_printf(
 		"HOST-FLUSH\t%s\n", net_ip2addr(&host->ip)));
 	user_directory_remove_host(dir->users, host);
+	director_sync(dir);
 }
 
 void director_update_user(struct director *dir, struct director_host *src,
@@ -204,7 +224,6 @@
 	dir->state_change_callback = callback;
 	i_array_init(&dir->dir_hosts, 16);
 	i_array_init(&dir->pending_requests, 16);
-	i_array_init(&dir->desynced_host_changes, 16);
 	dir->users = user_directory_init(set->director_user_expire);
 	dir->mail_hosts = mail_hosts_init();
 	return dir;
@@ -229,7 +248,6 @@
 		timeout_remove(&dir->to_request);
 	array_foreach(&dir->dir_hosts, hostp)
 		director_host_free(*hostp);
-	array_free(&dir->desynced_host_changes);
 	array_free(&dir->pending_requests);
 	array_free(&dir->dir_hosts);
 	i_free(dir);
diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/director.h
--- a/src/director/director.h	Fri Jun 18 19:20:38 2010 +0100
+++ b/src/director/director.h	Fri Jun 18 19:21:50 2010 +0100
@@ -49,18 +49,13 @@
 	/* director hosts are sorted by IP (and port) */
 	ARRAY_DEFINE(dir_hosts, struct director_host *);
 
-	/* this array contains host changes done by directors.
-	   while it's non-empty, new user mappings can't be added, because
-	   different directors may see different hosts. SYNC events remove
-	   these changes. */
-	ARRAY_DEFINE(desynced_host_changes, struct director_host_change);
-
 	unsigned int sync_seq;
 
 	/* director ring handshaking is complete.
 	   director can start serving clients. */
 	unsigned int ring_handshaked:1;
 	unsigned int ring_handshake_warning_sent:1;
+	unsigned int ring_synced:1;
 	unsigned int debug:1;
 };
 
diff -r e6f15ba78007 -r f0fb8151c6b4 src/director/doveadm-connection.c


More information about the dovecot-cvs mailing list