dovecot-2.1: director: Redesigned connection handling and error ...

dovecot at dovecot.org dovecot at dovecot.org
Tue Apr 3 05:58:44 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.1/rev/b43ae3805f5f
changeset: 14373:b43ae3805f5f
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Apr 03 05:58:29 2012 +0300
description:
director: Redesigned connection handling and error handling.
Director now accepts all connections from everywhere and syncs them until
the handshaking is finished. At that point it finally decides if this is
a connection that should be used as our left/right connection, or if it
should be disconnected. This should make connecting more reliable,
especially if one of the directors sends broken handshake or has other
trouble.

diffstat:

 src/director/director-connection.c |  539 ++++++++++++++++++++++--------------
 src/director/director-connection.h |    4 +-
 src/director/director.c            |   40 +-
 src/director/director.h            |    4 +
 4 files changed, 357 insertions(+), 230 deletions(-)

diffs (truncated from 929 to 300 lines):

diff -r 544a8c4705e5 -r b43ae3805f5f src/director/director-connection.c
--- a/src/director/director-connection.c	Tue Apr 03 00:50:12 2012 +0300
+++ b/src/director/director-connection.c	Tue Apr 03 05:58:29 2012 +0300
@@ -7,8 +7,10 @@
 
    VERSION
    ME
-   <wait for remote handshake>
+   <wait for DONE from remote handshake>
    DONE
+   <make this connection our "left" connection, potentially disconnecting
+   another one>
 
    Outgoing director connections send:
 
@@ -21,6 +23,9 @@
    [0..n] USER
    <possibly other non-handshake commands between USERs>
    DONE
+   <wait for DONE from remote>
+   <make this connection our "right" connection, potentially disconnecting
+   another one>
 */
 
 #include "lib.h"
@@ -44,10 +49,16 @@
 #define MAX_INBUF_SIZE 1024
 #define MAX_OUTBUF_SIZE (1024*1024*10)
 #define OUTBUF_FLUSH_THRESHOLD (1024*128)
-/* Max idling time while connecting/handshaking before disconnecting */
-#define DIRECTOR_CONNECTION_INIT_TIMEOUT_MSECS (10*1000)
-/* How long to wait for PONG after PING request */
-#define DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS (10*1000)
+/* Max idling time before "ME" command must have been received,
+   or we'll disconnect. */
+#define DIRECTOR_CONNECTION_ME_TIMEOUT_MSECS (2*1000)
+/* Max idling time before "DONE" command must have been received,
+   or we'll disconnect. */
+#define DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS (30*1000)
+/* How long to wait for PONG for an idling connection */
+#define DIRECTOR_CONNECTION_PING_IDLE_TIMEOUT_MSECS (2*1000)
+/* Maximum time to wait for PONG reply */
+#define DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS (60*1000)
 /* How long to wait to send PING when connection is idle */
 #define DIRECTOR_CONNECTION_PING_INTERVAL_MSECS (15*1000)
 /* How long to wait before sending PING while waiting for SYNC reply */
@@ -56,6 +67,14 @@
    mark the host as failed so we won't try to reconnect to it immediately */
 #define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 10
 
+#if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS
+#  error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low
+#endif
+
+#if DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_IDLE_TIMEOUT_MSECS
+#  error DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS is too low
+#endif
+
 #define CMD_IS_USER_HANDHAKE(args) \
 	(str_array_length(args) > 2)
 
@@ -72,7 +91,7 @@
 	struct io *io;
 	struct istream *input;
 	struct ostream *output;
-	struct timeout *to, *to_ping;
+	struct timeout *to, *to_ping, *to_pong;
 
 	struct user_directory_iter *user_iter;
 
@@ -88,9 +107,10 @@
 	unsigned int handshake_sending_hosts:1;
 	unsigned int ping_waiting:1;
 	unsigned int synced:1;
+	unsigned int wrong_host:1;
+	unsigned int verifying_left:1;
 };
 
-static void director_connection_ping(struct director_connection *conn);
 static void director_connection_disconnected(struct director_connection **conn);
 
 static void ATTR_FORMAT(2, 3)
@@ -104,6 +124,172 @@
 	va_end(args);
 }
 
+static void
+director_connection_init_timeout(struct director_connection *conn)
+{
+	unsigned int secs = ioloop_time - conn->created;
+
+	if (!conn->connected) {
+		i_error("director(%s): Connect timed out (%u secs)",
+			conn->name, secs);
+	} else if (!conn->me_received) {
+		i_error("director(%s): Handshaking ME timed out (%u secs)",
+			conn->name, secs);
+	} else {
+		i_error("director(%s): Handshaking DONE timed out (%u secs)",
+			conn->name, secs);
+	}
+	director_connection_disconnected(&conn);
+}
+
+static void
+director_connection_set_ping_timeout(struct director_connection *conn)
+{
+	unsigned int msecs;
+
+	msecs = conn->synced || !conn->handshake_received ?
+		DIRECTOR_CONNECTION_PING_INTERVAL_MSECS :
+		DIRECTOR_CONNECTION_PING_SYNC_INTERVAL_MSECS;
+
+	timeout_remove(&conn->to_ping);
+	conn->to_ping = timeout_add(msecs, director_connection_ping, conn);
+}
+
+static void director_connection_send_connect(struct director_connection *conn,
+					     struct director_host *host)
+{
+	const char *connect_str;
+
+	connect_str = t_strdup_printf("CONNECT\t%s\t%u\n",
+				      net_ip2addr(&host->ip), host->port);
+	director_connection_send(conn, connect_str);
+	(void)o_stream_flush(conn->output);
+	o_stream_uncork(conn->output);
+}
+
+static void director_connection_assigned(struct director_connection *conn)
+{
+	struct director *dir = conn->dir;
+
+	if (dir->left != NULL && dir->right != NULL) {
+		/* we're connected to both directors. see if the ring is
+		   finished by sending a SYNC. if we get it back, it's done. */
+		dir->sync_seq++;
+		director_set_ring_unsynced(dir);
+		director_sync_send(dir, dir->self_host, dir->sync_seq,
+				   DIRECTOR_VERSION_MINOR);
+	}
+	director_connection_set_ping_timeout(conn);
+}
+
+static bool director_connection_assign_left(struct director_connection *conn)
+{
+	struct director *dir = conn->dir;
+
+	i_assert(conn->in);
+	i_assert(dir->left != conn);
+
+	/* make sure this is the correct incoming connection */
+	if (conn->host->self) {
+		i_error("Connection from self, dropping");
+		return FALSE;
+	} else if (dir->left == NULL) {
+		/* no conflicts yet */
+	} else if (dir->left->host == conn->host) {
+		i_info("Dropping existing connection %s "
+		       "in favor of its new connection %s",
+		       dir->left->host->name, conn->host->name);
+		director_connection_deinit(&dir->left);
+	} else if (dir->left->verifying_left) {
+		/* we're waiting to verify if our current left is still
+		   working. if we don't receive a PONG, the current left
+		   gets disconnected and a new left gets assigned. if we do
+		   receive a PONG, we'll wait until the current left
+		   disconnects us and then reassign the new left. */
+		return TRUE;
+	} else if (director_host_cmp_to_self(dir->left->host, conn->host,
+					     dir->self_host) < 0) {
+		/* the old connection is the correct one.
+		   refer the client there (FIXME: do we ever get here?) */
+		i_warning("Director connection %s tried to connect to "
+			  "us, should use %s instead",
+			  conn->name, dir->left->host->name);
+		director_connection_send_connect(conn, dir->left->host);
+		return FALSE;
+	} else {
+		/* this new connection is the correct one, but wait until the
+		   old connection gets disconnected before using this one.
+		   that guarantees that the director inserting itself into
+		   the ring has finished handshaking its left side, so the
+		   switch will be fast. */
+		return TRUE;
+	}
+	dir->left = conn;
+	i_free(conn->name);
+	conn->name = i_strdup_printf("%s/left", conn->host->name);
+	director_connection_assigned(conn);
+	return TRUE;
+}
+
+static void director_assign_left(struct director *dir)
+{
+	struct director_connection *conn, *const *connp;
+
+	array_foreach(&dir->connections, connp) {
+		conn = *connp;
+
+		if (conn->in && conn->handshake_received && conn != dir->left) {
+			/* either use this or disconnect it */
+			if (!director_connection_assign_left(conn)) {
+				/* we don't want this */
+				director_connection_deinit(&dir->left);
+				director_assign_left(dir);
+				break;
+			}
+		}
+	}
+}
+
+static bool director_has_outgoing_connections(struct director *dir)
+{
+	struct director_connection *const *connp;
+
+	array_foreach(&dir->connections, connp) {
+		if (!(*connp)->in)
+			return TRUE;
+	}
+	return FALSE;
+}
+
+static bool director_connection_assign_right(struct director_connection *conn)
+{
+	struct director *dir = conn->dir;
+
+	i_assert(!conn->in);
+
+	if (dir->right != NULL) {
+		/* see if we should disconnect or keep the existing
+		   connection. */
+		if (director_host_cmp_to_self(conn->host, dir->right->host,
+					      dir->self_host) <= 0) {
+			/* the old connection is the correct one */
+			i_warning("Aborting incorrect outgoing connection to %s "
+				  "(already connected to correct one: %s)",
+				  conn->host->name, dir->right->host->name);
+			conn->wrong_host = TRUE;
+			return FALSE;
+		}
+		i_info("Replacing right director connection %s with %s",
+		       dir->right->host->name, conn->host->name);
+		director_connection_deinit(&dir->right);
+	}
+	dir->right = conn;
+	i_free(conn->name);
+	conn->name = i_strdup_printf("%s/right", conn->host->name);
+	director_connection_assigned(conn);
+	return TRUE;
+}
+
 static bool
 director_args_parse_ip_port(struct director_connection *conn,
 			    const char *const *args,
@@ -128,7 +314,6 @@
 			    const char *const *args)
 {
 	struct director *dir = conn->dir;
-	struct director_host *host;
 	const char *connect_str;
 	struct ip_addr ip;
 	unsigned int port;
@@ -148,75 +333,55 @@
 			net_ip2addr(&ip), port);
 		return FALSE;
 	}
-	host = director_host_get(dir, &ip, port);
-	/* the host is up now, make sure we can connect to it immediately
-	   if needed */
-	host->last_failed = 0;
 	conn->me_received = TRUE;
 
+	timeout_remove(&conn->to_ping);
+	conn->to_ping = timeout_add(DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS,
+				    director_connection_init_timeout, conn);
+
 	if (!conn->in)
 		return TRUE;
 
-	i_free(conn->name);
-	conn->name = i_strdup_printf("%s/left", host->name);
-	conn->host = host;
+	/* Incoming connection:
+
+	   a) we don't have an established ring yet. make sure we're connecting
+	   to our right side (which might become our left side).
+
+	   b) it's our current "left" connection. the previous connection
+	   is most likely dead.
+
+	   c) we have an existing ring. tell our current "left" to connect to
+	   it with CONNECT command.
+
+	   d) the incoming connection doesn't belong to us at all, refer it
+	   elsewhere with CONNECT. however, before disconnecting it verify
+	   first that our left side is actually still functional.
+	*/
+	conn->host = director_host_get(dir, &ip, port);
 	/* make sure we don't keep old sequence values across restarts */


More information about the dovecot-cvs mailing list