dovecot-2.2: director: Don't communicate with directors that rec...
dovecot at dovecot.org
dovecot at dovecot.org
Sun May 20 03:26:31 EEST 2012
details: http://hg.dovecot.org/dovecot-2.2/rev/366b9e5fc85c
changeset: 14432:366b9e5fc85c
user: Timo Sirainen <tss at iki.fi>
date: Mon Apr 09 08:17:52 2012 +0300
description:
director: Don't communicate with directors that recently sent invalid input.
Track network and protocol failures separately. If a director had sent
invalid protocol data within last 60 seconds, don't try to connect to it and
don't allow it to connect to us.
diffstat:
src/director/director-connection.c | 56 ++++++++++++++++++++++++++-----------
src/director/director-host.h | 5 ++-
src/director/director.c | 14 ++++++--
src/director/director.h | 4 ++
src/director/doveadm-connection.c | 6 +++-
5 files changed, 61 insertions(+), 24 deletions(-)
diffs (229 lines):
diff -r 084064444f89 -r 366b9e5fc85c src/director/director-connection.c
--- a/src/director/director-connection.c Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director-connection.c Mon Apr 09 08:17:52 2012 +0300
@@ -65,7 +65,7 @@
#define DIRECTOR_CONNECTION_PING_SYNC_INTERVAL_MSECS 1000
/* If outgoing director connection exists for less than this many seconds,
mark the host as failed so we won't try to reconnect to it immediately */
-#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 10
+#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 40
#if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS
# error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low
@@ -113,6 +113,7 @@
};
static void director_connection_disconnected(struct director_connection **conn);
+static void director_connection_protocol_error(struct director_connection **conn);
static void ATTR_FORMAT(2, 3)
director_cmd_error(struct director_connection *conn, const char *fmt, ...)
@@ -318,6 +319,7 @@
const char *connect_str;
struct ip_addr ip;
unsigned int port;
+ time_t next_comm_attempt;
if (!director_args_parse_ip_port(conn, args, &ip, &port))
return FALSE;
@@ -362,10 +364,19 @@
/* make sure we don't keep old sequence values across restarts */
conn->host->last_seq = 0;
- if (dir->left == NULL) {
+ next_comm_attempt = conn->host->last_protocol_failure +
+ DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS;
+ if (next_comm_attempt > ioloop_time) {
+ /* the director recently sent invalid protocol data,
+ don't try retrying yet */
+ i_error("director(%s): Remote sent invalid protocol data recently, "
+ "waiting %u secs before allowing further communication",
+ conn->name, (unsigned int)(next_comm_attempt-ioloop_time));
+ return FALSE;
+ } else if (dir->left == NULL) {
/* a) - just in case the left is also our right side reset
its failed state, so we can connect to it */
- conn->host->last_failed = 0;
+ conn->host->last_network_failure = 0;
if (!director_has_outgoing_connections(dir))
director_connect(dir);
} else if (dir->left->host == conn->host) {
@@ -548,9 +559,9 @@
host = director_host_lookup(conn->dir, &ip, port);
if (host != NULL) {
- /* already have this. just reset its last_failed timestamp,
- since it might be up now. */
- host->last_failed = 0;
+ /* already have this. just reset its last_network_failure
+ timestamp, since it might be up now. */
+ host->last_network_failure = 0;
return TRUE;
}
@@ -853,7 +864,7 @@
/* the host is up now, make sure we can connect to it immediately
if needed */
- conn->host->last_failed = 0;
+ conn->host->last_network_failure = 0;
conn->handshake_received = TRUE;
if (conn->in) {
@@ -1024,7 +1035,7 @@
host = director_host_get(conn->dir, &ip, port);
/* reset failure timestamp so we'll actually try to connect there. */
- host->last_failed = 0;
+ host->last_network_failure = 0;
/* remote suggests us to connect elsewhere */
if (dir->right != NULL &&
@@ -1193,7 +1204,7 @@
/* buffer full */
i_error("BUG: Director %s sent us more than %d bytes",
conn->name, MAX_INBUF_SIZE);
- director_connection_disconnected(&conn);
+ director_connection_protocol_error(&conn);
return;
}
@@ -1208,7 +1219,7 @@
i_debug("director(%s): Invalid input, disconnecting",
conn->name);
}
- director_connection_disconnected(&conn);
+ director_connection_protocol_error(&conn);
break;
}
}
@@ -1394,13 +1405,6 @@
if (dir->debug && conn->host != NULL)
i_debug("Disconnecting from %s", conn->host->name);
- if (conn->host != NULL && !conn->wrong_host &&
- (!conn->handshake_received ||
- conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time)) {
- /* avoid reconnecting back here immediately */
- conn->host->last_failed = ioloop_time;
- }
-
conns = array_get(&dir->connections, &count);
for (i = 0; i < count; i++) {
if (conns[i] == conn) {
@@ -1449,6 +1453,24 @@
struct director_connection *conn = *_conn;
struct director *dir = conn->dir;
+ if (conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time) {
+ /* connection didn't exist for very long, assume it has a
+ network problem */
+ conn->host->last_network_failure = ioloop_time;
+ }
+
+ director_connection_deinit(_conn);
+ if (dir->right == NULL)
+ director_connect(dir);
+}
+
+void director_connection_protocol_error(struct director_connection **_conn)
+{
+ struct director_connection *conn = *_conn;
+ struct director *dir = conn->dir;
+
+ conn->host->last_protocol_failure = ioloop_time;
+
director_connection_deinit(_conn);
if (dir->right == NULL)
director_connect(dir);
diff -r 084064444f89 -r 366b9e5fc85c src/director/director-host.h
--- a/src/director/director-host.h Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director-host.h Mon Apr 09 08:17:52 2012 +0300
@@ -17,8 +17,9 @@
it can be ignored (or: it must be ignored to avoid potential command
loops) */
unsigned int last_seq;
- /* Last time host was detected to be down/broken */
- time_t last_failed;
+ /* Last time host was detected to be down */
+ time_t last_network_failure;
+ time_t last_protocol_failure;
/* we are this director */
unsigned int self:1;
};
diff -r 084064444f89 -r 366b9e5fc85c src/director/director.c
--- a/src/director/director.c Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director.c Mon Apr 09 08:17:52 2012 +0300
@@ -110,13 +110,13 @@
port = dir->test_port != 0 ? dir->test_port : host->port;
fd = net_connect_ip(&host->ip, port, &dir->self_ip);
if (fd == -1) {
- host->last_failed = ioloop_time;
+ host->last_network_failure = ioloop_time;
i_error("connect(%s) failed: %m", host->name);
return -1;
}
/* Reset timestamp so that director_connect() won't skip this host
while we're still trying to connect to it */
- host->last_failed = 0;
+ host->last_network_failure = 0;
director_connection_init_out(dir, fd, host);
return 0;
@@ -151,9 +151,15 @@
for (i = 1; i < count; i++) {
unsigned int idx = (self_idx + i) % count;
- if (hosts[idx]->last_failed +
+ if (hosts[idx]->last_network_failure +
DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
- /* failed recently, don't try retrying here */
+ /* connection failed recently, don't try retrying here */
+ continue;
+ }
+ if (hosts[idx]->last_protocol_failure +
+ DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) {
+ /* the director recently sent invalid protocol data,
+ don't try retrying yet */
continue;
}
diff -r 084064444f89 -r 366b9e5fc85c src/director/director.h
--- a/src/director/director.h Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/director.h Mon Apr 09 08:17:52 2012 +0300
@@ -11,6 +11,10 @@
/* weak users supported in protocol v1.1+ */
#define DIRECTOR_VERSION_WEAK_USERS 1
+/* Minimum time between even attempting to communicate with a director that
+ failed due to a protocol error. */
+#define DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS 60
+
struct director;
struct mail_host;
struct user;
diff -r 084064444f89 -r 366b9e5fc85c src/director/doveadm-connection.c
--- a/src/director/doveadm-connection.c Mon Apr 09 07:52:25 2012 +0300
+++ b/src/director/doveadm-connection.c Mon Apr 09 08:17:52 2012 +0300
@@ -99,6 +99,7 @@
string_t *str = t_str_new(1024);
const char *type;
bool left, right;
+ time_t last_failed;
array_foreach(&dir->dir_hosts, hostp) {
const struct director_host *host = *hostp;
@@ -116,9 +117,12 @@
type = "right";
else
type = "";
+
+ last_failed = I_MAX(host->last_network_failure,
+ host->last_protocol_failure);
str_printfa(str, "%s\t%u\t%s\t%lu\n",
net_ip2addr(&host->ip), host->port, type,
- (unsigned long)host->last_failed);
+ (unsigned long)last_failed);
}
str_append_c(str, '\n');
o_stream_send(conn->output, str_data(str), str_len(str));
More information about the dovecot-cvs
mailing list