dovecot-2.2: director: Detect if directors' hosts have become de...
dovecot at dovecot.org
dovecot at dovecot.org
Mon Oct 12 12:49:32 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/8f225e43e6e3
changeset: 19293:8f225e43e6e3
user: Timo Sirainen <tss at iki.fi>
date: Mon Oct 12 15:47:46 2015 +0300
description:
director: Detect if directors' hosts have become desynced by sending hosts_hash in SYNC parameter.
Also fix up such a situation by resending all HOSTs.
diffstat:
src/director/director-connection.c | 47 ++++++++++++++++++++++++++++++++--
src/director/director-host.h | 4 ++
src/director/director.c | 51 ++++++++++++++++++++++++++-----------
src/director/director.h | 3 +-
4 files changed, 84 insertions(+), 21 deletions(-)
diffs (234 lines):
diff -r bae8efd8b5b3 -r 8f225e43e6e3 src/director/director-connection.c
--- a/src/director/director-connection.c Mon Oct 12 15:41:55 2015 +0300
+++ b/src/director/director-connection.c Mon Oct 12 15:47:46 2015 +0300
@@ -228,7 +228,8 @@
dir->sync_seq++;
director_set_ring_unsynced(dir);
director_sync_send(dir, dir->self_host, dir->sync_seq,
- DIRECTOR_VERSION_MINOR, ioloop_time);
+ DIRECTOR_VERSION_MINOR, ioloop_time,
+ mail_hosts_hash(dir->mail_hosts));
}
director_connection_set_ping_timeout(conn);
}
@@ -1243,7 +1244,7 @@
director_connection_sync_host(struct director_connection *conn,
struct director_host *host,
uint32_t seq, unsigned int minor_version,
- unsigned int timestamp)
+ unsigned int timestamp, unsigned int hosts_hash)
{
struct director *dir = conn->dir;
@@ -1261,6 +1262,16 @@
successfully connected to both directions */
i_assert(dir->left != NULL && dir->right != NULL);
+ if (hosts_hash != 0 &&
+ hosts_hash != mail_hosts_hash(conn->dir->mail_hosts)) {
+ i_error("director(%s): Hosts unexpectedly changed during SYNC reply - resending"
+ "(seq=%u, old hosts_hash=%u, new hosts_hash=%u)",
+ conn->name, seq, hosts_hash,
+ mail_hosts_hash(dir->mail_hosts));
+ (void)director_resend_sync(dir);
+ return FALSE;
+ }
+
dir->ring_min_version = minor_version;
if (!dir->ring_handshaked) {
/* the ring is handshaked */
@@ -1311,10 +1322,32 @@
return FALSE;
}
+ if (hosts_hash != 0 &&
+ hosts_hash != mail_hosts_hash(conn->dir->mail_hosts)) {
+ if (host->desynced_hosts_hash != hosts_hash) {
+ dir_debug("Ignore director %s stale SYNC request whose hosts don't match us "
+ "(seq=%u, remote hosts_hash=%u, my hosts_hash=%u)",
+ net_ip2addr(&host->ip), seq, hosts_hash,
+ mail_hosts_hash(dir->mail_hosts));
+ host->desynced_hosts_hash = hosts_hash;
+ return FALSE;
+ }
+ /* we'll get here only if we received a SYNC twice
+ with the same wrong hosts_hash. FIXME: this gets
+ triggered unnecessarily sometimes if hosts are
+ changing rapidly. */
+ i_error("director(%s): Director %s SYNC request hosts don't match us - resending hosts "
+ "(seq=%u, remote hosts_hash=%u, my hosts_hash=%u)",
+ conn->name, net_ip2addr(&host->ip), seq,
+ hosts_hash, mail_hosts_hash(dir->mail_hosts));
+ director_resend_hosts(dir);
+ return FALSE;
+ }
+ host->desynced_hosts_hash = 0;
if (dir->right != NULL) {
/* forward it to the connection on right */
director_sync_send(dir, host, seq, minor_version,
- timestamp);
+ timestamp, hosts_hash);
}
}
return TRUE;
@@ -1328,6 +1361,7 @@
struct ip_addr ip;
in_port_t port;
unsigned int arg_count, seq, minor_version = 0, timestamp = ioloop_time;
+ unsigned int hosts_hash = 0;
arg_count = str_array_length(args);
if (arg_count < 3 ||
@@ -1344,13 +1378,18 @@
director_cmd_error(conn, "Invalid parameters");
return FALSE;
}
+ if (arg_count >= 6 && str_to_uint(args[5], &hosts_hash) < 0) {
+ director_cmd_error(conn, "Invalid parameters");
+ return FALSE;
+ }
/* find the originating director. if we don't see it, it was already
removed and we can ignore this sync. */
host = director_host_lookup(dir, &ip, port);
if (host != NULL) {
if (!director_connection_sync_host(conn, host, seq,
- minor_version, timestamp))
+ minor_version, timestamp,
+ hosts_hash))
return TRUE;
}
diff -r bae8efd8b5b3 -r 8f225e43e6e3 src/director/director-host.h
--- a/src/director/director-host.h Mon Oct 12 15:41:55 2015 +0300
+++ b/src/director/director-host.h Mon Oct 12 15:47:46 2015 +0300
@@ -23,6 +23,10 @@
/* use these to avoid infinitely sending SYNCs for directors that
aren't connected in the ring. */
unsigned int last_sync_seq, last_sync_seq_counter, last_sync_timestamp;
+ /* whenever we receive a SYNC with stale hosts_hash, set this. if it's
+ already set and equals the current hosts_hash, re-send our hosts to
+ everybody in case they somehow got out of sync. */
+ unsigned int desynced_hosts_hash;
/* Last time host was detected to be down */
time_t last_network_failure;
time_t last_protocol_failure;
diff -r bae8efd8b5b3 -r 8f225e43e6e3 src/director/director.c
--- a/src/director/director.c Mon Oct 12 15:41:55 2015 +0300
+++ b/src/director/director.c Mon Oct 12 15:47:46 2015 +0300
@@ -321,7 +321,7 @@
void director_sync_send(struct director *dir, struct director_host *host,
uint32_t seq, unsigned int minor_version,
- unsigned int timestamp)
+ unsigned int timestamp, unsigned int hosts_hash)
{
string_t *str;
@@ -331,7 +331,8 @@
if (minor_version > 0 &&
director_connection_get_minor_version(dir->right) > 0) {
/* only minor_version>0 supports extra parameters */
- str_printfa(str, "\t%u\t%u", minor_version, timestamp);
+ str_printfa(str, "\t%u\t%u\t%u", minor_version,
+ timestamp, hosts_hash);
}
str_append_c(str, '\n');
director_connection_send(dir->right, str_c(str));
@@ -349,7 +350,8 @@
/* send a new SYNC in case the previous one got dropped */
dir->self_host->last_sync_timestamp = ioloop_time;
director_sync_send(dir, dir->self_host, dir->sync_seq,
- DIRECTOR_VERSION_MINOR, ioloop_time);
+ DIRECTOR_VERSION_MINOR, ioloop_time,
+ mail_hosts_hash(dir->mail_hosts));
if (dir->to_sync != NULL)
timeout_reset(dir->to_sync);
return TRUE;
@@ -412,7 +414,8 @@
director_connection_set_synced(dir->left, FALSE);
director_connection_set_synced(dir->right, FALSE);
director_sync_send(dir, dir->self_host, dir->sync_seq,
- DIRECTOR_VERSION_MINOR, ioloop_time);
+ DIRECTOR_VERSION_MINOR, ioloop_time,
+ mail_hosts_hash(dir->mail_hosts));
}
void director_sync_freeze(struct director *dir)
@@ -515,21 +518,13 @@
DIRECTOR_VERSION_RING_REMOVE, cmd);
}
-void director_update_host(struct director *dir, struct director_host *src,
- struct director_host *orig_src,
- struct mail_host *host)
+static void
+director_send_host(struct director *dir, struct director_host *src,
+ struct director_host *orig_src,
+ struct mail_host *host)
{
string_t *str;
- /* update state in case this is the first mail host being added */
- director_set_state_changed(dir);
-
- dir_debug("Updating host %s vhost_count=%u "
- "down=%d last_updown_change=%ld (hosts_hash=%u)",
- net_ip2addr(&host->ip), host->vhost_count, host->down,
- (long)host->last_updown_change,
- mail_hosts_hash(dir->mail_hosts));
-
if (orig_src == NULL) {
orig_src = dir->self_host;
orig_src->last_seq++;
@@ -556,6 +551,30 @@
}
str_append_c(str, '\n');
director_update_send(dir, src, str_c(str));
+}
+
+void director_resend_hosts(struct director *dir)
+{
+ struct mail_host *const *hostp;
+
+ array_foreach(mail_hosts_get(dir->mail_hosts), hostp)
+ director_send_host(dir, dir->self_host, NULL, *hostp);
+}
+
+void director_update_host(struct director *dir, struct director_host *src,
+ struct director_host *orig_src,
+ struct mail_host *host)
+{
+ /* update state in case this is the first mail host being added */
+ director_set_state_changed(dir);
+
+ dir_debug("Updating host %s vhost_count=%u "
+ "down=%d last_updown_change=%ld (hosts_hash=%u)",
+ net_ip2addr(&host->ip), host->vhost_count, host->down,
+ (long)host->last_updown_change,
+ mail_hosts_hash(dir->mail_hosts));
+
+ director_send_host(dir, src, orig_src, host);
host->desynced = TRUE;
director_sync(dir);
diff -r bae8efd8b5b3 -r 8f225e43e6e3 src/director/director.h
--- a/src/director/director.h Mon Oct 12 15:41:55 2015 +0300
+++ b/src/director/director.h Mon Oct 12 15:47:46 2015 +0300
@@ -115,7 +115,7 @@
void director_set_state_changed(struct director *dir);
void director_sync_send(struct director *dir, struct director_host *host,
uint32_t seq, unsigned int minor_version,
- unsigned int timestamp);
+ unsigned int timestamp, unsigned int hosts_hash);
bool director_resend_sync(struct director *dir);
void director_notify_ring_added(struct director_host *added_host,
@@ -126,6 +126,7 @@
void director_update_host(struct director *dir, struct director_host *src,
struct director_host *orig_src,
struct mail_host *host) ATTR_NULL(3);
+void director_resend_hosts(struct director *dir);
void director_remove_host(struct director *dir, struct director_host *src,
struct director_host *orig_src,
struct mail_host *host) ATTR_NULL(2, 3);
More information about the dovecot-cvs
mailing list