dovecot-2.2: director: Redesigned connection handling and error ...
dovecot at dovecot.org
dovecot at dovecot.org
Sun May 20 03:26:31 EEST 2012
details: http://hg.dovecot.org/dovecot-2.2/rev/b43ae3805f5f
changeset: 14409:b43ae3805f5f
user: Timo Sirainen <tss at iki.fi>
date: Tue Apr 03 05:58:29 2012 +0300
description:
director: Redesigned connection handling and error handling.
Director now accepts all connections from everywhere and syncs them until
the handshaking is finished. At that point it finally decides if this is
a connection that should be used as our left/right connection, or if it
should be disconnected. This should make connecting more reliable,
especially if one of the directors sends broken handshake or has other
trouble.
diffstat:
src/director/director-connection.c | 539 ++++++++++++++++++++++--------------
src/director/director-connection.h | 4 +-
src/director/director.c | 40 +-
src/director/director.h | 4 +
4 files changed, 357 insertions(+), 230 deletions(-)
diffs (truncated from 929 to 300 lines):
diff -r 544a8c4705e5 -r b43ae3805f5f src/director/director-connection.c
--- a/src/director/director-connection.c Tue Apr 03 00:50:12 2012 +0300
+++ b/src/director/director-connection.c Tue Apr 03 05:58:29 2012 +0300
@@ -7,8 +7,10 @@
VERSION
ME
- <wait for remote handshake>
+ <wait for DONE from remote handshake>
DONE
+ <make this connection our "left" connection, potentially disconnecting
+ another one>
Outgoing director connections send:
@@ -21,6 +23,9 @@
[0..n] USER
<possibly other non-handshake commands between USERs>
DONE
+ <wait for DONE from remote>
+ <make this connection our "right" connection, potentially disconnecting
+ another one>
*/
#include "lib.h"
@@ -44,10 +49,16 @@
#define MAX_INBUF_SIZE 1024
#define MAX_OUTBUF_SIZE (1024*1024*10)
#define OUTBUF_FLUSH_THRESHOLD (1024*128)
-/* Max idling time while connecting/handshaking before disconnecting */
-#define DIRECTOR_CONNECTION_INIT_TIMEOUT_MSECS (10*1000)
-/* How long to wait for PONG after PING request */
-#define DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS (10*1000)
+/* Max idling time before "ME" command must have been received,
+ or we'll disconnect. */
+#define DIRECTOR_CONNECTION_ME_TIMEOUT_MSECS (2*1000)
+/* Max idling time before "DONE" command must have been received,
+ or we'll disconnect. */
+#define DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS (30*1000)
+/* How long to wait for PONG for an idling connection */
+#define DIRECTOR_CONNECTION_PING_IDLE_TIMEOUT_MSECS (2*1000)
+/* Maximum time to wait for PONG reply */
+#define DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS (60*1000)
/* How long to wait to send PING when connection is idle */
#define DIRECTOR_CONNECTION_PING_INTERVAL_MSECS (15*1000)
/* How long to wait before sending PING while waiting for SYNC reply */
@@ -56,6 +67,14 @@
mark the host as failed so we won't try to reconnect to it immediately */
#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 10
+#if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS
+# error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low
+#endif
+
+#if DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_IDLE_TIMEOUT_MSECS
+# error DIRECTOR_CONNECTION_PONG_TIMEOUT_MSECS is too low
+#endif
+
#define CMD_IS_USER_HANDHAKE(args) \
(str_array_length(args) > 2)
@@ -72,7 +91,7 @@
struct io *io;
struct istream *input;
struct ostream *output;
- struct timeout *to, *to_ping;
+ struct timeout *to, *to_ping, *to_pong;
struct user_directory_iter *user_iter;
@@ -88,9 +107,10 @@
unsigned int handshake_sending_hosts:1;
unsigned int ping_waiting:1;
unsigned int synced:1;
+ unsigned int wrong_host:1;
+ unsigned int verifying_left:1;
};
-static void director_connection_ping(struct director_connection *conn);
static void director_connection_disconnected(struct director_connection **conn);
static void ATTR_FORMAT(2, 3)
@@ -104,6 +124,172 @@
va_end(args);
}
+static void
+director_connection_init_timeout(struct director_connection *conn)
+{
+ unsigned int secs = ioloop_time - conn->created;
+
+ if (!conn->connected) {
+ i_error("director(%s): Connect timed out (%u secs)",
+ conn->name, secs);
+ } else if (!conn->me_received) {
+ i_error("director(%s): Handshaking ME timed out (%u secs)",
+ conn->name, secs);
+ } else {
+ i_error("director(%s): Handshaking DONE timed out (%u secs)",
+ conn->name, secs);
+ }
+ director_connection_disconnected(&conn);
+}
+
+static void
+director_connection_set_ping_timeout(struct director_connection *conn)
+{
+ unsigned int msecs;
+
+ msecs = conn->synced || !conn->handshake_received ?
+ DIRECTOR_CONNECTION_PING_INTERVAL_MSECS :
+ DIRECTOR_CONNECTION_PING_SYNC_INTERVAL_MSECS;
+
+ timeout_remove(&conn->to_ping);
+ conn->to_ping = timeout_add(msecs, director_connection_ping, conn);
+}
+
+static void director_connection_send_connect(struct director_connection *conn,
+ struct director_host *host)
+{
+ const char *connect_str;
+
+ connect_str = t_strdup_printf("CONNECT\t%s\t%u\n",
+ net_ip2addr(&host->ip), host->port);
+ director_connection_send(conn, connect_str);
+ (void)o_stream_flush(conn->output);
+ o_stream_uncork(conn->output);
+}
+
+static void director_connection_assigned(struct director_connection *conn)
+{
+ struct director *dir = conn->dir;
+
+ if (dir->left != NULL && dir->right != NULL) {
+ /* we're connected to both directors. see if the ring is
+ finished by sending a SYNC. if we get it back, it's done. */
+ dir->sync_seq++;
+ director_set_ring_unsynced(dir);
+ director_sync_send(dir, dir->self_host, dir->sync_seq,
+ DIRECTOR_VERSION_MINOR);
+ }
+ director_connection_set_ping_timeout(conn);
+}
+
+static bool director_connection_assign_left(struct director_connection *conn)
+{
+ struct director *dir = conn->dir;
+
+ i_assert(conn->in);
+ i_assert(dir->left != conn);
+
+ /* make sure this is the correct incoming connection */
+ if (conn->host->self) {
+ i_error("Connection from self, dropping");
+ return FALSE;
+ } else if (dir->left == NULL) {
+ /* no conflicts yet */
+ } else if (dir->left->host == conn->host) {
+ i_info("Dropping existing connection %s "
+ "in favor of its new connection %s",
+ dir->left->host->name, conn->host->name);
+ director_connection_deinit(&dir->left);
+ } else if (dir->left->verifying_left) {
+ /* we're waiting to verify if our current left is still
+ working. if we don't receive a PONG, the current left
+ gets disconnected and a new left gets assigned. if we do
+ receive a PONG, we'll wait until the current left
+ disconnects us and then reassign the new left. */
+ return TRUE;
+ } else if (director_host_cmp_to_self(dir->left->host, conn->host,
+ dir->self_host) < 0) {
+ /* the old connection is the correct one.
+ refer the client there (FIXME: do we ever get here?) */
+ i_warning("Director connection %s tried to connect to "
+ "us, should use %s instead",
+ conn->name, dir->left->host->name);
+ director_connection_send_connect(conn, dir->left->host);
+ return FALSE;
+ } else {
+ /* this new connection is the correct one, but wait until the
+ old connection gets disconnected before using this one.
+ that guarantees that the director inserting itself into
+ the ring has finished handshaking its left side, so the
+ switch will be fast. */
+ return TRUE;
+ }
+ dir->left = conn;
+ i_free(conn->name);
+ conn->name = i_strdup_printf("%s/left", conn->host->name);
+ director_connection_assigned(conn);
+ return TRUE;
+}
+
+static void director_assign_left(struct director *dir)
+{
+ struct director_connection *conn, *const *connp;
+
+ array_foreach(&dir->connections, connp) {
+ conn = *connp;
+
+ if (conn->in && conn->handshake_received && conn != dir->left) {
+ /* either use this or disconnect it */
+ if (!director_connection_assign_left(conn)) {
+ /* we don't want this */
+ director_connection_deinit(&dir->left);
+ director_assign_left(dir);
+ break;
+ }
+ }
+ }
+}
+
+static bool director_has_outgoing_connections(struct director *dir)
+{
+ struct director_connection *const *connp;
+
+ array_foreach(&dir->connections, connp) {
+ if (!(*connp)->in)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static bool director_connection_assign_right(struct director_connection *conn)
+{
+ struct director *dir = conn->dir;
+
+ i_assert(!conn->in);
+
+ if (dir->right != NULL) {
+ /* see if we should disconnect or keep the existing
+ connection. */
+ if (director_host_cmp_to_self(conn->host, dir->right->host,
+ dir->self_host) <= 0) {
+ /* the old connection is the correct one */
+ i_warning("Aborting incorrect outgoing connection to %s "
+ "(already connected to correct one: %s)",
+ conn->host->name, dir->right->host->name);
+ conn->wrong_host = TRUE;
+ return FALSE;
+ }
+ i_info("Replacing right director connection %s with %s",
+ dir->right->host->name, conn->host->name);
+ director_connection_deinit(&dir->right);
+ }
+ dir->right = conn;
+ i_free(conn->name);
+ conn->name = i_strdup_printf("%s/right", conn->host->name);
+ director_connection_assigned(conn);
+ return TRUE;
+}
+
static bool
director_args_parse_ip_port(struct director_connection *conn,
const char *const *args,
@@ -128,7 +314,6 @@
const char *const *args)
{
struct director *dir = conn->dir;
- struct director_host *host;
const char *connect_str;
struct ip_addr ip;
unsigned int port;
@@ -148,75 +333,55 @@
net_ip2addr(&ip), port);
return FALSE;
}
- host = director_host_get(dir, &ip, port);
- /* the host is up now, make sure we can connect to it immediately
- if needed */
- host->last_failed = 0;
conn->me_received = TRUE;
+ timeout_remove(&conn->to_ping);
+ conn->to_ping = timeout_add(DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS,
+ director_connection_init_timeout, conn);
+
if (!conn->in)
return TRUE;
- i_free(conn->name);
- conn->name = i_strdup_printf("%s/left", host->name);
- conn->host = host;
+ /* Incoming connection:
+
+ a) we don't have an established ring yet. make sure we're connecting
+ to our right side (which might become our left side).
+
+ b) it's our current "left" connection. the previous connection
+ is most likely dead.
+
+ c) we have an existing ring. tell our current "left" to connect to
+ it with CONNECT command.
+
+ d) the incoming connection doesn't belong to us at all, refer it
+ elsewhere with CONNECT. however, before disconnecting it verify
+ first that our left side is actually still functional.
+ */
+ conn->host = director_host_get(dir, &ip, port);
/* make sure we don't keep old sequence values across restarts */
More information about the dovecot-cvs
mailing list