dovecot-2.2: director: Don't communicate with directors that rec...

dovecot at dovecot.org dovecot at dovecot.org
Sun May 20 03:26:31 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/366b9e5fc85c
changeset: 14432:366b9e5fc85c
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Apr 09 08:17:52 2012 +0300
description:
director: Don't communicate with directors that recently sent invalid input.
Track network and protocol failures separately. If a director had sent
invalid protocol data within last 60 seconds, don't try to connect to it and
don't allow it to connect to us.

diffstat:

 src/director/director-connection.c |  56 ++++++++++++++++++++++++++-----------
 src/director/director-host.h       |   5 ++-
 src/director/director.c            |  14 ++++++--
 src/director/director.h            |   4 ++
 src/director/doveadm-connection.c  |   6 +++-
 5 files changed, 61 insertions(+), 24 deletions(-)

diffs (229 lines):

diff -r 084064444f89 -r 366b9e5fc85c src/director/director-connection.c
--- a/src/director/director-connection.c	Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director-connection.c	Mon Apr 09 08:17:52 2012 +0300
@@ -65,7 +65,7 @@
 #define DIRECTOR_CONNECTION_PING_SYNC_INTERVAL_MSECS 1000
 /* If outgoing director connection exists for less than this many seconds,
    mark the host as failed so we won't try to reconnect to it immediately */
-#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 10
+#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 40
 
 #if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS
 #  error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low
@@ -113,6 +113,7 @@
 };
 
 static void director_connection_disconnected(struct director_connection **conn);
+static void director_connection_protocol_error(struct director_connection **conn);
 
 static void ATTR_FORMAT(2, 3)
 director_cmd_error(struct director_connection *conn, const char *fmt, ...)
@@ -318,6 +319,7 @@
 	const char *connect_str;
 	struct ip_addr ip;
 	unsigned int port;
+	time_t next_comm_attempt;
 
 	if (!director_args_parse_ip_port(conn, args, &ip, &port))
 		return FALSE;
@@ -362,10 +364,19 @@
 	/* make sure we don't keep old sequence values across restarts */
 	conn->host->last_seq = 0;
 
-	if (dir->left == NULL) {
+	next_comm_attempt = conn->host->last_protocol_failure +
+		DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS;
+	if (next_comm_attempt > ioloop_time) {
+		/* the director recently sent invalid protocol data,
+		   don't try retrying yet */
+		i_error("director(%s): Remote sent invalid protocol data recently, "
+			"waiting %u secs before allowing further communication",
+			conn->name, (unsigned int)(next_comm_attempt-ioloop_time));
+		return FALSE;
+	} else if (dir->left == NULL) {
 		/* a) - just in case the left is also our right side reset
 		   its failed state, so we can connect to it */
-		conn->host->last_failed = 0;
+		conn->host->last_network_failure = 0;
 		if (!director_has_outgoing_connections(dir))
 			director_connect(dir);
 	} else if (dir->left->host == conn->host) {
@@ -548,9 +559,9 @@
 
 	host = director_host_lookup(conn->dir, &ip, port);
 	if (host != NULL) {
-		/* already have this. just reset its last_failed timestamp,
-		   since it might be up now. */
-		host->last_failed = 0;
+		/* already have this. just reset its last_network_failure
+		   timestamp, since it might be up now. */
+		host->last_network_failure = 0;
 		return TRUE;
 	}
 
@@ -853,7 +864,7 @@
 
 	/* the host is up now, make sure we can connect to it immediately
 	   if needed */
-	conn->host->last_failed = 0;
+	conn->host->last_network_failure = 0;
 
 	conn->handshake_received = TRUE;
 	if (conn->in) {
@@ -1024,7 +1035,7 @@
 
 	host = director_host_get(conn->dir, &ip, port);
 	/* reset failure timestamp so we'll actually try to connect there. */
-	host->last_failed = 0;
+	host->last_network_failure = 0;
 
 	/* remote suggests us to connect elsewhere */
 	if (dir->right != NULL &&
@@ -1193,7 +1204,7 @@
 		/* buffer full */
 		i_error("BUG: Director %s sent us more than %d bytes",
 			conn->name, MAX_INBUF_SIZE);
-		director_connection_disconnected(&conn);
+		director_connection_protocol_error(&conn);
 		return;
 	}
 
@@ -1208,7 +1219,7 @@
 				i_debug("director(%s): Invalid input, disconnecting",
 					conn->name);
 			}
-			director_connection_disconnected(&conn);
+			director_connection_protocol_error(&conn);
 			break;
 		}
 	}
@@ -1394,13 +1405,6 @@
 	if (dir->debug && conn->host != NULL)
 		i_debug("Disconnecting from %s", conn->host->name);
 
-	if (conn->host != NULL && !conn->wrong_host &&
-	    (!conn->handshake_received ||
-	     conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time)) {
-		/* avoid reconnecting back here immediately */
-		conn->host->last_failed = ioloop_time;
-	}
-
 	conns = array_get(&dir->connections, &count);
 	for (i = 0; i < count; i++) {
 		if (conns[i] == conn) {
@@ -1449,6 +1453,24 @@
 	struct director_connection *conn = *_conn;
 	struct director *dir = conn->dir;
 
+	if (conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time) {
+		/* connection didn't exist for very long, assume it has a
+		   network problem */
+		conn->host->last_network_failure = ioloop_time;
+	}
+
+	director_connection_deinit(_conn);
+	if (dir->right == NULL)
+		director_connect(dir);
+}
+
+void director_connection_protocol_error(struct director_connection **_conn)
+{
+	struct director_connection *conn = *_conn;
+	struct director *dir = conn->dir;
+
+	conn->host->last_protocol_failure = ioloop_time;
+
 	director_connection_deinit(_conn);
 	if (dir->right == NULL)
 		director_connect(dir);
diff -r 084064444f89 -r 366b9e5fc85c src/director/director-host.h
--- a/src/director/director-host.h	Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director-host.h	Mon Apr 09 08:17:52 2012 +0300
@@ -17,8 +17,9 @@
 	   it can be ignored (or: it must be ignored to avoid potential command
 	   loops) */
 	unsigned int last_seq;
-	/* Last time host was detected to be down/broken */
-	time_t last_failed;
+	/* Last time host was detected to be down */
+	time_t last_network_failure;
+	time_t last_protocol_failure;
 	/* we are this director */
 	unsigned int self:1;
 };
diff -r 084064444f89 -r 366b9e5fc85c src/director/director.c
--- a/src/director/director.c	Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director.c	Mon Apr 09 08:17:52 2012 +0300
@@ -110,13 +110,13 @@
 	port = dir->test_port != 0 ? dir->test_port : host->port;
 	fd = net_connect_ip(&host->ip, port, &dir->self_ip);
 	if (fd == -1) {
-		host->last_failed = ioloop_time;
+		host->last_network_failure = ioloop_time;
 		i_error("connect(%s) failed: %m", host->name);
 		return -1;
 	}
 	/* Reset timestamp so that director_connect() won't skip this host
 	   while we're still trying to connect to it */
-	host->last_failed = 0;
+	host->last_network_failure = 0;
 
 	director_connection_init_out(dir, fd, host);
 	return 0;
@@ -151,9 +151,15 @@
 	for (i = 1; i < count; i++) {
 		unsigned int idx = (self_idx + i) % count;
 
-		if (hosts[idx]->last_failed +
+		if (hosts[idx]->last_network_failure +
 		    DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
-			/* failed recently, don't try retrying here */
+			/* connection failed recently, don't try retrying here */
+			continue;
+		}
+		if (hosts[idx]->last_protocol_failure +
+		    DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) {
+			/* the director recently sent invalid protocol data,
+			   don't try retrying yet */
 			continue;
 		}
 
diff -r 084064444f89 -r 366b9e5fc85c src/director/director.h
--- a/src/director/director.h	Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director.h	Mon Apr 09 08:17:52 2012 +0300
@@ -11,6 +11,10 @@
 /* weak users supported in protocol v1.1+ */
 #define DIRECTOR_VERSION_WEAK_USERS 1
 
+/* Minimum time between even attempting to communicate with a director that
+   failed due to a protocol error. */
+#define DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS 60
+
 struct director;
 struct mail_host;
 struct user;
diff -r 084064444f89 -r 366b9e5fc85c src/director/doveadm-connection.c
--- a/src/director/doveadm-connection.c	Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/doveadm-connection.c	Mon Apr 09 08:17:52 2012 +0300
@@ -99,6 +99,7 @@
 	string_t *str = t_str_new(1024);
 	const char *type;
 	bool left, right;
+	time_t last_failed;
 
 	array_foreach(&dir->dir_hosts, hostp) {
 		const struct director_host *host = *hostp;
@@ -116,9 +117,12 @@
 			type = "right";
 		else
 			type = "";
+
+		last_failed = I_MAX(host->last_network_failure,
+				    host->last_protocol_failure);
 		str_printfa(str, "%s\t%u\t%s\t%lu\n",
 			    net_ip2addr(&host->ip), host->port, type,
-			    (unsigned long)host->last_failed);
+			    (unsigned long)last_failed);
 	}
 	str_append_c(str, '\n');
 	o_stream_send(conn->output, str_data(str), str_len(str));


More information about the dovecot-cvs mailing list