dovecot-2.2: director: Improved logging related to disconnections.

dovecot at dovecot.org dovecot at dovecot.org
Wed Sep 26 18:01:29 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/27d3289e1f5c
changeset: 15132:27d3289e1f5c
user:      Timo Sirainen <tss at iki.fi>
date:      Wed Sep 12 00:20:47 2012 +0300
description:
director: Improved logging related to disconnections.

diffstat:

 src/director/director-connection.c |  135 ++++++++++++++++++++++++++----------
 src/director/director-connection.h |    3 +-
 src/director/director.c            |    9 +-
 src/director/director.h            |    4 +-
 4 files changed, 109 insertions(+), 42 deletions(-)

diffs (truncated from 359 to 300 lines):

diff -r 24e791bbcf69 -r 27d3289e1f5c src/director/director-connection.c
--- a/src/director/director-connection.c	Wed Sep 12 00:04:34 2012 +0300
+++ b/src/director/director-connection.c	Wed Sep 12 00:20:47 2012 +0300
@@ -71,6 +71,8 @@
    mark the host as failed so we won't try to reconnect to it immediately */
 #define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 40
 #define DIRECTOR_WAIT_DISCONNECT_SECS 10
+#define DIRECTOR_HANDSHAKE_WARN_SECS 29
+#define DIRECTOR_HANDSHAKE_BYTES_LOG_MIN_SECS (60*30)
 
 #if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS
 #  error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low
@@ -92,6 +94,8 @@
 	/* for incoming connections the director host isn't known until
 	   ME-line is received */
 	struct director_host *host;
+	/* this is set only for wrong connections: */
+	struct director_host *connect_request_to;
 
 	int fd;
 	struct io *io;
@@ -118,7 +122,9 @@
 };
 
 static void director_connection_disconnected(struct director_connection **conn);
-static void director_connection_reconnect(struct director_connection **conn);
+static void director_connection_reconnect(struct director_connection **conn,
+					  const char *reason);
+static void director_connection_log_disconnect(struct director_connection *conn);
 
 static void ATTR_FORMAT(2, 3)
 director_cmd_error(struct director_connection *conn, const char *fmt, ...)
@@ -169,7 +175,9 @@
 
 static void director_connection_wait_timeout(struct director_connection *conn)
 {
-	director_connection_deinit(&conn);
+	director_connection_log_disconnect(conn);
+	director_connection_deinit(&conn,
+		"Timeout waiting for disconnect after CONNECT");
 }
 
 static void director_connection_send_connect(struct director_connection *conn,
@@ -186,6 +194,12 @@
 	(void)o_stream_flush(conn->output);
 	o_stream_uncork(conn->output);
 
+	/* wait for a while for the remote to disconnect, so it will hopefully
+	   see our CONNECT command. we'll also log the warning later to avoid
+	   multiple log lines about it. */
+	conn->connect_request_to = host;
+	director_host_ref(conn->connect_request_to);
+
 	conn->to_disconnect =
 		timeout_add(DIRECTOR_WAIT_DISCONNECT_SECS*1000,
 			    director_connection_wait_timeout, conn);
@@ -220,10 +234,10 @@
 	} else if (dir->left == NULL) {
 		/* no conflicts yet */
 	} else if (dir->left->host == conn->host) {
-		i_info("Dropping existing connection %s "
-		       "in favor of its new connection %s",
-		       dir->left->host->name, conn->host->name);
-		director_connection_deinit(&dir->left);
+		i_warning("Replacing left director connection %s with %s",
+			  dir->left->host->name, conn->host->name);
+		director_connection_deinit(&dir->left, t_strdup_printf(
+			"Replacing with %s", conn->host->name));
 	} else if (dir->left->verifying_left) {
 		/* we're waiting to verify if our current left is still
 		   working. if we don't receive a PONG, the current left
@@ -235,9 +249,6 @@
 					     dir->self_host) < 0) {
 		/* the old connection is the correct one.
 		   refer the client there (FIXME: do we ever get here?) */
-		i_warning("Director connection %s tried to connect to "
-			  "us, should use %s instead",
-			  conn->name, dir->left->host->name);
 		director_connection_send_connect(conn, dir->left->host);
 		return TRUE;
 	} else {
@@ -267,7 +278,8 @@
 			/* either use this or disconnect it */
 			if (!director_connection_assign_left(conn)) {
 				/* we don't want this */
-				director_connection_deinit(&conn);
+				director_connection_deinit(&conn,
+					"Unwanted incoming connection");
 				director_assign_left(dir);
 				break;
 			}
@@ -304,9 +316,10 @@
 			conn->wrong_host = TRUE;
 			return FALSE;
 		}
-		i_info("Replacing right director connection %s with %s",
-		       dir->right->host->name, conn->host->name);
-		director_connection_deinit(&dir->right);
+		i_warning("Replacing right director connection %s with %s",
+			  dir->right->host->name, conn->host->name);
+		director_connection_deinit(&dir->right, t_strdup_printf(
+			"Replacing with %s", conn->host->name));
 	}
 	dir->right = conn;
 	i_free(conn->name);
@@ -410,7 +423,8 @@
 	} else if (dir->left->host == conn->host) {
 		/* b) */
 		i_assert(dir->left != conn);
-		director_connection_deinit(&dir->left);
+		director_connection_deinit(&dir->left,
+			"Replacing with new incoming connection");
 	} else if (director_host_cmp_to_self(conn->host, dir->left->host,
 					     dir->self_host) < 0) {
 		/* c) */
@@ -919,14 +933,19 @@
 static bool director_handshake_cmd_done(struct director_connection *conn)
 {
 	struct director *dir = conn->dir;
+	unsigned int handshake_secs = time(NULL) - conn->created;
+	string_t *str;
 
-	if (dir->debug) {
-		unsigned int secs = time(NULL)-conn->created;
-
-		i_debug("director(%s): Handshake took %u secs, "
-			"bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T,
-			conn->name, secs, conn->input->v_offset,
-			conn->output->offset);
+	if (handshake_secs >= DIRECTOR_HANDSHAKE_WARN_SECS || dir->debug) {
+		str = t_str_new(128);
+		str_printfa(str, "director(%s): Handshake took %u secs, "
+			    "bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T,
+			    conn->name, handshake_secs, conn->input->v_offset,
+			    conn->output->offset);
+		if (handshake_secs >= DIRECTOR_HANDSHAKE_WARN_SECS)
+			i_warning("%s", str_c(str));
+		else
+			i_debug("%s", str_c(str));
 	}
 
 	/* the host is up now, make sure we can connect to it immediately
@@ -1143,12 +1162,8 @@
 		if (conn->in && conn != dir->left && conn->me_received &&
 		    conn->to_disconnect == NULL &&
 		    director_host_cmp_to_self(dir->left->host, conn->host,
-					      dir->self_host) < 0) {
-			i_warning("Director connection %s tried to connect to "
-				  "us, should use %s instead",
-				  conn->name, dir->left->host->name);
+					      dir->self_host) < 0)
 			director_connection_send_connect(conn, dir->left->host);
-		}
 	}
 }
 
@@ -1220,6 +1235,11 @@
 		return director_connection_sync(conn, args);
 	if (strcmp(cmd, "CONNECT") == 0)
 		return director_cmd_connect(conn, args);
+	if (strcmp(cmd, "QUIT") == 0) {
+		i_warning("Director %s disconnected us with reason: %s",
+			  conn->name, t_strarray_join(args, " "));
+		return FALSE;
+	}
 
 	director_cmd_error(conn, "Unknown command %s", cmd);
 	return FALSE;
@@ -1247,6 +1267,37 @@
 	return ret;
 }
 
+static void
+director_connection_log_disconnect(struct director_connection *conn)
+{
+	unsigned int secs = ioloop_time - conn->created;
+	string_t *str = t_str_new(128);
+
+	i_assert(conn->connected);
+
+	if (conn->connect_request_to != NULL) {
+		i_warning("Director %s tried to connect to us, "
+			  "should use %s instead",
+			  conn->name, conn->connect_request_to->name);
+		return;
+	}
+
+	str_printfa(str, "Director %s disconnected: ", conn->name);
+	str_append(str, "Connection closed");
+	if (errno != 0 && errno != EPIPE)
+		str_printfa(str, ": %m");
+
+	str_printfa(str, " (connected %u secs, "
+		    "in=%"PRIuUOFF_T" out=%"PRIuUOFF_T,
+		    secs, conn->input->v_offset, conn->output->offset);
+
+	if (!conn->me_received)
+		str_append(str, ", handshake ME not received");
+	else if (!conn->handshake_received)
+		str_append(str, ", handshake DONE not received");
+	i_error("%s", str_c(str));
+}
+
 static void director_connection_input(struct director_connection *conn)
 {
 	struct director *dir = conn->dir;
@@ -1258,16 +1309,14 @@
 		return;
 	case -1:
 		/* disconnected */
-		i_error("Director %s disconnected%s", conn->name,
-			conn->handshake_received ? "" :
-			" before handshake finished");
+		director_connection_log_disconnect(conn);
 		director_connection_disconnected(&conn);
 		return;
 	case -2:
 		/* buffer full */
 		director_cmd_error(conn, "Director sent us more than %d bytes",
 				   MAX_INBUF_SIZE);
-		director_connection_reconnect(&conn);
+		director_connection_reconnect(&conn, "Too long input line");
 		return;
 	}
 
@@ -1289,7 +1338,8 @@
 		} T_END;
 
 		if (!ret) {
-			director_connection_reconnect(&conn);
+			director_connection_reconnect(&conn, t_strdup_printf(
+				"Invalid input: %s", line));
 			break;
 		}
 	}
@@ -1474,7 +1524,8 @@
 	return conn;
 }
 
-void director_connection_deinit(struct director_connection **_conn)
+void director_connection_deinit(struct director_connection **_conn,
+				const char *remote_reason)
 {
 	struct director_connection *const *conns, *conn = *_conn;
 	struct director *dir = conn->dir;
@@ -1482,8 +1533,15 @@
 
 	*_conn = NULL;
 
-	if (dir->debug && conn->host != NULL)
-		i_debug("Disconnecting from %s", conn->host->name);
+	if (dir->debug && conn->host != NULL) {
+		i_debug("Disconnecting from %s: %s",
+			conn->host->name, remote_reason);
+	}
+	if (*remote_reason != '\0' &&
+	    conn->minor_version >= DIRECTOR_VERSION_QUIT) {
+		o_stream_send_str(conn->output, t_strdup_printf(
+			"QUIT\t%s\n", remote_reason));
+	}
 
 	conns = array_get(&dir->connections, &count);
 	for (i = 0; i < count; i++) {
@@ -1504,6 +1562,8 @@
 	if (conn->host != NULL)
 		director_host_unref(conn->host);
 
+	if (conn->connect_request_to != NULL)
+		director_host_unref(conn->connect_request_to);
 	if (conn->user_iter != NULL)
 		user_directory_iter_deinit(&conn->user_iter);
 	if (conn->to_disconnect != NULL)
@@ -1542,17 +1602,18 @@
 		conn->host->last_network_failure = ioloop_time;
 	}
 
-	director_connection_deinit(_conn);
+	director_connection_deinit(_conn, "");
 	if (dir->right == NULL)
 		director_connect(dir);
 }
 
-void director_connection_reconnect(struct director_connection **_conn)
+static void director_connection_reconnect(struct director_connection **_conn,
+					  const char *reason)
 {
 	struct director_connection *conn = *_conn;
 	struct director *dir = conn->dir;
 
-	director_connection_deinit(_conn);
+	director_connection_deinit(_conn, reason);
 	if (dir->right == NULL)
 		director_connect(dir);
 }
diff -r 24e791bbcf69 -r 27d3289e1f5c src/director/director-connection.h
--- a/src/director/director-connection.h	Wed Sep 12 00:04:34 2012 +0300
+++ b/src/director/director-connection.h	Wed Sep 12 00:20:47 2012 +0300
@@ -10,7 +10,8 @@
 struct director_connection *
 director_connection_init_out(struct director *dir, int fd,


More information about the dovecot-cvs mailing list