dovecot-2.2: director: Implemented ability to remove directors f...
dovecot at dovecot.org
dovecot at dovecot.org
Sun May 20 03:26:36 EEST 2012
details: http://hg.dovecot.org/dovecot-2.2/rev/42cca8a1d179
changeset: 14571:42cca8a1d179
user: Timo Sirainen <tss at iki.fi>
date: Sat May 19 21:18:04 2012 +0300
description:
director: Implemented ability to remove directors from a running ring.
Also added doveadm command for adding a new director to a running ring.
diffstat:
src/director/director-connection.c | 44 ++++++++++++--
src/director/director-host.c | 35 +++++++++++-
src/director/director-host.h | 9 ++-
src/director/director.c | 110 ++++++++++++++++++++++++++++++++++--
src/director/director.h | 15 ++++-
src/director/doveadm-connection.c | 60 +++++++++++++++++++-
src/director/main.c | 5 +-
src/doveadm/doveadm-director.c | 67 ++++++++++++++++++++++
8 files changed, 326 insertions(+), 19 deletions(-)
diffs (truncated from 614 to 300 lines):
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-connection.c
--- a/src/director/director-connection.c Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-connection.c Sat May 19 21:18:04 2012 +0300
@@ -383,7 +383,12 @@
elsewhere with CONNECT. however, before disconnecting it verify
first that our left side is actually still functional.
*/
+ i_assert(conn->host == NULL);
conn->host = director_host_get(dir, &ip, port);
+ /* the host shouldn't be removed at this point, but if for some
+ reason it is we don't want to crash */
+ conn->host->removed = FALSE;
+ director_host_ref(conn->host);
/* make sure we don't keep old sequence values across restarts */
conn->host->last_seq = 0;
@@ -587,6 +592,10 @@
/* ignore updates to ourself */
return TRUE;
}
+ if (host->removed) {
+ /* ignore re-adds of removed directors */
+ return TRUE;
+ }
/* already have this. just reset its last_network_failure
timestamp, since it might be up now. */
@@ -598,18 +607,32 @@
}
} else {
/* save the director and forward it */
- director_host_add(conn->dir, &ip, port);
+ host = director_host_add(conn->dir, &ip, port);
forward = TRUE;
}
if (forward) {
- director_update_send(conn->dir,
- director_connection_get_host(conn),
- t_strdup_printf("DIRECTOR\t%s\t%u\n",
- net_ip2addr(&ip), port));
+ director_notify_ring_added(host,
+ director_connection_get_host(conn));
}
return TRUE;
}
+static bool director_cmd_director_remove(struct director_connection *conn,
+ const char *const *args)
+{
+ struct director_host *host;
+ struct ip_addr ip;
+ unsigned int port;
+
+ if (!director_args_parse_ip_port(conn, args, &ip, &port))
+ return FALSE;
+
+ host = director_host_lookup(conn->dir, &ip, port);
+ if (host != NULL && !host->removed)
+ director_ring_remove(host, director_connection_get_host(conn));
+ return TRUE;
+}
+
static bool
director_cmd_host_hand_start(struct director_connection *conn,
const char *const *args)
@@ -659,7 +682,7 @@
*_args = args + 3;
host = director_host_lookup(conn->dir, &ip, port);
- if (host == NULL) {
+ if (host == NULL || host->removed) {
/* director is already gone, but we can't be sure if this
command was sent everywhere. re-send it as if it was from
ourself. */
@@ -1191,6 +1214,8 @@
return director_cmd_user_killed_everywhere(conn, args);
if (strcmp(cmd, "DIRECTOR") == 0)
return director_cmd_director(conn, args);
+ if (strcmp(cmd, "DIRECTOR-REMOVE") == 0)
+ return director_cmd_director_remove(conn, args);
if (strcmp(cmd, "SYNC") == 0)
return director_connection_sync(conn, args);
if (strcmp(cmd, "CONNECT") == 0)
@@ -1279,6 +1304,8 @@
struct director_host *const *hostp;
array_foreach(&conn->dir->dir_hosts, hostp) {
+ if ((*hostp)->removed)
+ continue;
str_printfa(str, "DIRECTOR\t%s\t%u\n",
net_ip2addr(&(*hostp)->ip), (*hostp)->port);
}
@@ -1433,12 +1460,15 @@
{
struct director_connection *conn;
+ i_assert(!host->removed);
+
/* make sure we don't keep old sequence values across restarts */
host->last_seq = 0;
conn = director_connection_init_common(dir, fd);
conn->name = i_strdup_printf("%s/out", host->name);
conn->host = host;
+ director_host_ref(host);
conn->io = io_add(conn->fd, IO_WRITE,
director_connection_connected, conn);
return conn;
@@ -1471,6 +1501,8 @@
}
if (dir->right == conn)
dir->right = NULL;
+ if (conn->host != NULL)
+ director_host_unref(conn->host);
if (conn->user_iter != NULL)
user_directory_iter_deinit(&conn->user_iter);
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-host.c
--- a/src/director/director-host.c Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-host.c Sat May 19 21:18:04 2012 +0300
@@ -29,6 +29,8 @@
struct director_host *host;
host = i_new(struct director_host, 1);
+ host->dir = dir;
+ host->refcount = 1;
host->ip = *ip;
host->port = port;
host->name = i_strdup_printf("%s:%u", net_ip2addr(ip), port);
@@ -41,8 +43,39 @@
return host;
}
-void director_host_free(struct director_host *host)
+void director_host_free(struct director_host **_host)
{
+ struct director_host *host = *_host;
+
+ i_assert(host->refcount == 1);
+
+ *_host = NULL;
+ director_host_unref(host);
+}
+
+void director_host_ref(struct director_host *host)
+{
+ i_assert(host->refcount > 0);
+ host->refcount++;
+}
+
+void director_host_unref(struct director_host *host)
+{
+ struct director_host *const *hosts;
+ unsigned int i, count;
+
+ i_assert(host->refcount > 0);
+
+ if (--host->refcount > 0)
+ return;
+
+ hosts = array_get(&host->dir->dir_hosts, &count);
+ for (i = 0; i < count; i++) {
+ if (hosts[i] == host) {
+ array_delete(&host->dir->dir_hosts, i, 1);
+ break;
+ }
+ }
i_free(host->name);
i_free(host);
}
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director-host.h
--- a/src/director/director-host.h Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director-host.h Sat May 19 21:18:04 2012 +0300
@@ -6,6 +6,9 @@
struct director;
struct director_host {
+ struct director *dir;
+ int refcount;
+
struct ip_addr ip;
unsigned int port;
@@ -22,12 +25,16 @@
time_t last_protocol_failure;
/* we are this director */
unsigned int self:1;
+ unsigned int removed:1;
};
struct director_host *
director_host_add(struct director *dir, const struct ip_addr *ip,
unsigned int port);
-void director_host_free(struct director_host *host);
+void director_host_free(struct director_host **host);
+
+void director_host_ref(struct director_host *host);
+void director_host_unref(struct director_host *host);
struct director_host *
director_host_get(struct director *dir, const struct ip_addr *ip,
diff -r 40f958c7643b -r 42cca8a1d179 src/director/director.c
--- a/src/director/director.c Sat May 19 21:16:42 2012 +0300
+++ b/src/director/director.c Sat May 19 21:18:04 2012 +0300
@@ -20,6 +20,7 @@
#define DIRECTOR_SYNC_TIMEOUT_MSECS (5*1000)
#define DIRECTOR_RING_MIN_WAIT_SECS 20
#define DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS 1000
+#define DIRECTOR_DELAYED_DIR_REMOVE_MSECS (1000*30)
static bool director_is_self_ip_set(struct director *dir)
{
@@ -127,8 +128,8 @@
static struct director_host *
director_get_preferred_right_host(struct director *dir)
{
- struct director_host *const *hosts;
- unsigned int count, self_idx;
+ struct director_host *const *hosts, *host;
+ unsigned int i, count, self_idx;
hosts = array_get(&dir->dir_hosts, &count);
if (count == 1) {
@@ -137,7 +138,13 @@
}
self_idx = director_find_self_idx(dir);
- return hosts[(self_idx + 1) % count];
+ for (i = 0; i < count; i++) {
+ host = hosts[(self_idx + i + 1) % count];
+ if (!host->removed)
+ return host;
+ }
+ /* self, with some removed hosts */
+ return NULL;
}
static bool director_wait_for_others(struct director *dir)
@@ -177,6 +184,9 @@
for (i = 1; i < count; i++) {
unsigned int idx = (self_idx + i) % count;
+ if (hosts[idx]->removed)
+ continue;
+
if (hosts[idx]->last_network_failure +
DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
/* connection failed recently, don't try retrying here */
@@ -408,6 +418,79 @@
director_connection_uncork(*connp);
}
+void director_notify_ring_added(struct director_host *added_host,
+ struct director_host *src)
+{
+ const char *cmd;
+
+ cmd = t_strdup_printf("DIRECTOR\t%s\t%u\n",
+ net_ip2addr(&added_host->ip), added_host->port);
+ director_update_send(added_host->dir, src, cmd);
+}
+
+static void director_delayed_dir_remove_timeout(struct director *dir)
+{
+ struct director_host *const *hosts, *host;
+ unsigned int i, count;
+
+ timeout_remove(&dir->to_remove_dirs);
+
+ hosts = array_get(&dir->dir_hosts, &count);
+ for (i = 0; i < count; ) {
+ if (hosts[i]->removed) {
+ host = hosts[i];
+ director_host_free(&host);
+ hosts = array_get(&dir->dir_hosts, &count);
+ } else {
+ i++;
+ }
+ }
+}
+
+void director_ring_remove(struct director_host *removed_host,
+ struct director_host *src)
+{
+ struct director *dir = removed_host->dir;
+ struct director_connection *const *conns, *conn;
+ unsigned int i, count;
+ const char *cmd;
+
+ if (removed_host->self) {
+ /* others will just disconnect us */
+ return;
+ }
+
+ /* mark the host as removed and fully remove it later. this delay is
+ needed, because the removal may trigger director reconnections,
+ which may send the director back and we don't want to re-add it */
+ removed_host->removed = TRUE;
+ if (dir->to_remove_dirs == NULL) {
+ dir->to_remove_dirs =
More information about the dovecot-cvs
mailing list