dovecot-2.1: auth: Throttle SQL auth worker process creation if ...

dovecot at dovecot.org dovecot at dovecot.org
Thu Jan 19 17:47:08 EET 2012


details:   http://hg.dovecot.org/dovecot-2.1/rev/7175320feafc
changeset: 13958:7175320feafc
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Jan 19 17:46:52 2012 +0200
description:
auth: Throttle SQL auth worker process creation if they can't connect to database.

diffstat:

 src/auth/auth-settings.c      |   5 +++
 src/auth/auth-worker-client.c |  41 +++++++++++++++++++++++++-
 src/auth/auth-worker-client.h |   3 +
 src/auth/auth-worker-server.c |  68 +++++++++++++++++++++++++++++++++++++++++-
 src/auth/db-sql.c             |  20 ++++++++++++
 src/auth/db-sql.h             |   3 +
 src/auth/main.c               |  16 ++-------
 src/auth/passdb-sql.c         |   4 +-
 src/auth/userdb-sql.c         |   6 +++-
 9 files changed, 149 insertions(+), 17 deletions(-)

diffs (truncated from 377 to 300 lines):

diff -r 1fa75cada826 -r 7175320feafc src/auth/auth-settings.c
--- a/src/auth/auth-settings.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-settings.c	Thu Jan 19 17:46:52 2012 +0200
@@ -282,6 +282,11 @@
 	if (set->debug)
 		set->verbose = TRUE;
 
+	if (set->worker_max_count == 0) {
+		*error_r = "auth_worker_max_count must be above zero";
+		return FALSE;
+	}
+
 	if (set->cache_size > 0 && set->cache_size < 1024) {
 		/* probably a configuration error.
 		   older versions used megabyte numbers */
diff -r 1fa75cada826 -r 7175320feafc src/auth/auth-worker-client.c
--- a/src/auth/auth-worker-client.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-client.c	Thu Jan 19 17:46:52 2012 +0200
@@ -8,6 +8,7 @@
 #include "ostream.h"
 #include "hex-binary.h"
 #include "str.h"
+#include "process-title.h"
 #include "master-service.h"
 #include "auth-request.h"
 #include "auth-worker-client.h"
@@ -30,6 +31,7 @@
 
 	unsigned int version_received:1;
 	unsigned int dbhash_received:1;
+	unsigned int error_sent:1;
 };
 
 struct auth_worker_list_context {
@@ -40,10 +42,23 @@
 };
 
 struct auth_worker_client *auth_worker_client;
+static bool auth_worker_client_error = FALSE;
 
 static void auth_worker_input(struct auth_worker_client *client);
 static int auth_worker_output(struct auth_worker_client *client);
 
+void auth_worker_refresh_proctitle(const char *state)
+{
+	if (!global_auth_settings->verbose_proctitle || !worker)
+		return;
+
+	if (auth_worker_client_error)
+		state = "error";
+	else if (auth_worker_client == NULL)
+		state = "waiting for connection";
+	process_title_set(t_strdup_printf("worker: %s", state));
+}
+
 static void
 auth_worker_client_check_throttle(struct auth_worker_client *client)
 {
@@ -673,6 +688,8 @@
 	auth_worker_refresh_proctitle(CLIENT_STATE_HANDSHAKE);
 
 	auth_worker_client = client;
+	if (auth_worker_client_error)
+		auth_worker_client_send_error();
 	return client;
 }
 
@@ -694,8 +711,8 @@
 	client->fd = -1;
 	auth_worker_client_unref(&client);
 
-	auth_worker_refresh_proctitle(NULL);
 	auth_worker_client = NULL;
+	auth_worker_refresh_proctitle("");
 	master_service_client_connection_destroyed(master_service);
 }
 
@@ -712,3 +729,25 @@
 	o_stream_unref(&client->output);
 	i_free(client);
 }
+
+void auth_worker_client_send_error(void)
+{
+	auth_worker_client_error = TRUE;
+	if (auth_worker_client != NULL &&
+	    !auth_worker_client->error_sent) {
+		o_stream_send_str(auth_worker_client->output, "ERROR\n");
+		auth_worker_client->error_sent = TRUE;
+	}
+	auth_worker_refresh_proctitle("");
+}
+
+void auth_worker_client_send_success(void)
+{
+	auth_worker_client_error = FALSE;
+	if (auth_worker_client != NULL &&
+	    auth_worker_client->error_sent) {
+		o_stream_send_str(auth_worker_client->output, "SUCCESS\n");
+		auth_worker_client->error_sent = FALSE;
+	}
+	auth_worker_refresh_proctitle(CLIENT_STATE_IDLE);
+}
diff -r 1fa75cada826 -r 7175320feafc src/auth/auth-worker-client.h
--- a/src/auth/auth-worker-client.h	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-client.h	Thu Jan 19 17:46:52 2012 +0200
@@ -11,6 +11,9 @@
 void auth_worker_client_destroy(struct auth_worker_client **client);
 void auth_worker_client_unref(struct auth_worker_client **client);
 
+void auth_worker_client_send_error(void);
+void auth_worker_client_send_success(void);
+
 const char *auth_worker_client_get_state(struct auth_worker_client *client);
 
 #endif
diff -r 1fa75cada826 -r 7175320feafc src/auth/auth-worker-server.c
--- a/src/auth/auth-worker-server.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-server.c	Thu Jan 19 17:46:52 2012 +0200
@@ -41,14 +41,16 @@
 	struct auth_worker_request *request;
 	unsigned int id_counter;
 
+	unsigned int received_error:1;
 	unsigned int shutdown:1;
 };
 
 static ARRAY_DEFINE(connections, struct auth_worker_connection *) = ARRAY_INIT;
-static unsigned int idle_count;
+static unsigned int idle_count = 0, auth_workers_with_errors = 0;
 static ARRAY_DEFINE(worker_request_array, struct auth_worker_request *);
 static struct aqueue *worker_request_queue;
 static time_t auth_worker_last_warn;
+static unsigned int auth_workers_throttle_count;
 
 static const char *worker_socket_path;
 
@@ -150,7 +152,7 @@
 	struct auth_worker_connection *conn;
 	int fd;
 
-	if (array_count(&connections) >= global_auth_settings->worker_max_count)
+	if (array_count(&connections) >= auth_workers_throttle_count)
 		return NULL;
 
 	fd = net_connect_unix_with_retries(worker_socket_path, 5000);
@@ -189,6 +191,12 @@
 
 	*_conn = NULL;
 
+	if (conn->received_error) {
+		i_assert(auth_workers_with_errors > 0);
+		i_assert(auth_workers_with_errors <= array_count(&connections));
+		auth_workers_with_errors--;
+	}
+
 	array_foreach(&connections, conns) {
 		if (*conns == conn) {
 			idx = array_foreach_idx(&connections, conns);
@@ -260,6 +268,51 @@
 		io_remove(&conn->io);
 }
 
+static bool auth_worker_error(struct auth_worker_connection *conn)
+{
+	if (conn->received_error)
+		return TRUE;
+	conn->received_error = TRUE;
+	auth_workers_with_errors++;
+	i_assert(auth_workers_with_errors <= array_count(&connections));
+
+	if (auth_workers_with_errors == 1) {
+		/* this is the only failing auth worker connection.
+		   don't create new ones until this one sends SUCCESS. */
+		auth_workers_throttle_count = array_count(&connections);
+		return TRUE;
+	}
+
+	/* too many auth workers, reduce them */
+	i_assert(array_count(&connections) > 1);
+	if (auth_workers_throttle_count >= array_count(&connections))
+		auth_workers_throttle_count = array_count(&connections)-1;
+	else if (auth_workers_throttle_count > 1)
+		auth_workers_throttle_count--;
+	auth_worker_destroy(&conn, "Internal auth worker failure", FALSE);
+	return FALSE;
+}
+
+static void auth_worker_success(struct auth_worker_connection *conn)
+{
+	unsigned int max_count = global_auth_settings->worker_max_count;
+
+	if (!conn->received_error)
+		return;
+
+	i_assert(auth_workers_with_errors > 0);
+	i_assert(auth_workers_with_errors <= array_count(&connections));
+	auth_workers_with_errors--;
+
+	if (auth_workers_with_errors == 0) {
+		/* all workers are succeeding now, set the limit back to
+		   original. */
+		auth_workers_throttle_count = max_count;
+	} else if (auth_workers_throttle_count < max_count)
+		auth_workers_throttle_count++;
+	conn->received_error = FALSE;
+}
+
 static void worker_input(struct auth_worker_connection *conn)
 {
 	const char *line, *id_str;
@@ -286,6 +339,15 @@
 			conn->shutdown = TRUE;
 			continue;
 		}
+		if (strcmp(line, "ERROR") == 0) {
+			if (!auth_worker_error(conn))
+				return;
+			continue;
+		}
+		if (strcmp(line, "SUCCESS") == 0) {
+			auth_worker_success(conn);
+			continue;
+		}
 		id_str = line;
 		line = strchr(line, '\t');
 		if (line == NULL ||
@@ -358,6 +420,8 @@
 void auth_worker_server_init(void)
 {
 	worker_socket_path = "auth-worker";
+	auth_workers_throttle_count = global_auth_settings->worker_max_count;
+	i_assert(auth_workers_throttle_count > 0);
 
 	i_array_init(&worker_request_array, 128);
 	worker_request_queue = aqueue_init(&worker_request_array.arr);
diff -r 1fa75cada826 -r 7175320feafc src/auth/db-sql.c
--- a/src/auth/db-sql.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/db-sql.c	Thu Jan 19 17:46:52 2012 +0200
@@ -6,6 +6,7 @@
 
 #include "settings.h"
 #include "auth-request.h"
+#include "auth-worker-client.h"
 #include "db-sql.h"
 
 #include <stddef.h>
@@ -129,6 +130,25 @@
 	pool_unref(&conn->pool);
 }
 
+void db_sql_connect(struct sql_connection *conn)
+{
+	if (sql_connect(conn->db) < 0 && worker) {
+		/* auth worker's sql connection failed. we can't do anything
+		   useful until the connection works. there's no point in
+		   having tons of worker processes all logging failures,
+		   so tell the auth master to stop creating new workers (and
+		   maybe close old ones). this handling is especially useful if
+		   we reach the max. number of connections for sql server. */
+		auth_worker_client_send_error();
+	}
+}
+
+void db_sql_success(struct sql_connection *conn ATTR_UNUSED)
+{
+	if (worker)
+		auth_worker_client_send_success();
+}
+
 void db_sql_check_userdb_warning(struct sql_connection *conn)
 {
 	if (worker || conn->userdb_used || conn->set.userdb_warning_disable)
diff -r 1fa75cada826 -r 7175320feafc src/auth/db-sql.h
--- a/src/auth/db-sql.h	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/db-sql.h	Thu Jan 19 17:46:52 2012 +0200
@@ -34,6 +34,9 @@
 struct sql_connection *db_sql_init(const char *config_path, bool userdb);
 void db_sql_unref(struct sql_connection **conn);
 
+void db_sql_connect(struct sql_connection *conn);
+void db_sql_success(struct sql_connection *conn);
+
 void db_sql_check_userdb_warning(struct sql_connection *conn);
 
 #endif
diff -r 1fa75cada826 -r 7175320feafc src/auth/main.c
--- a/src/auth/main.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/main.c	Thu Jan 19 17:46:52 2012 +0200
@@ -71,16 +71,6 @@
 		auth_request_state_count[AUTH_REQUEST_STATE_USERDB]));
 }
 
-void auth_worker_refresh_proctitle(const char *state)
-{
-	if (!global_auth_settings->verbose_proctitle || !worker)
-		return;
-
-	if (state == NULL)
-		state = "waiting for connection";


More information about the dovecot-cvs mailing list