[dovecot-cvs] dovecot/src/plugins/fts fts-storage.c,1.13,1.14

tss at dovecot.org tss at dovecot.org
Thu Dec 21 13:16:35 UTC 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts
In directory talvi:/tmp/cvs-serv24065

Modified Files:
	fts-storage.c 
Log Message:
Make indexing non-blocking. Optimize also header searches.



Index: fts-storage.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/plugins/fts/fts-storage.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- fts-storage.c	20 Dec 2006 21:26:37 -0000	1.13
+++ fts-storage.c	21 Dec 2006 13:16:32 -0000	1.14
@@ -17,6 +17,8 @@
 	*((void **)array_idx_modifiable(&(obj)->module_contexts, \
 					fts_storage_module_id))
 
+#define FTS_SEARCH_NONBLOCK_COUNT 10
+
 struct fts_mailbox {
 	struct mailbox_vfuncs super;
 	struct fts_backend *backend_exact;
@@ -30,10 +32,25 @@
 	ARRAY_TYPE(seq_range) result;
 	unsigned int result_pos;
 
+	struct mail_search_arg *args, *best_arg;
 	struct fts_backend *backend;
+	struct fts_storage_build_context *build_ctx;
+
 	unsigned int locked:1;
 };
 
+struct fts_storage_build_context {
+	struct mail_search_context *search_ctx;
+	struct mail_search_seqset seqset;
+	struct mail_search_arg search_arg;
+	struct mail *mail;
+	struct fts_backend_build_context *build;
+
+	uint32_t uid;
+	string_t *headers;
+	bool save_part;
+};
+
 struct fts_transaction_context {
 	bool expunges;
 };
@@ -83,13 +100,6 @@
 	return 0;
 }
 
-struct fts_storage_build_context {
-	struct fts_backend_build_context *build;
-	uint32_t uid;
-	string_t *headers;
-	bool save_part;
-};
-
 static int fts_build_mail_flush(struct fts_storage_build_context *ctx)
 {
 	if (str_len(ctx->headers) == 0)
@@ -140,8 +150,7 @@
 	return fts_build_mail_flush(ctx);
 }
 
-static int
-fts_build_mail(struct fts_storage_build_context *ctx, struct mail *mail)
+static int fts_build_mail(struct fts_storage_build_context *ctx)
 {
 	struct istream *input;
 	struct message_parser_ctx *parser;
@@ -150,9 +159,9 @@
 	struct message_part *prev_part, *skip_part;
 	int ret;
 
-	ctx->uid = mail->uid;
+	ctx->uid = ctx->mail->uid;
 
-	input = mail_get_stream(mail, NULL, NULL);
+	input = mail_get_stream(ctx->mail, NULL, NULL);
 	if (input == NULL)
 		return -1;
 
@@ -196,7 +205,7 @@
 					break;
 			}
 		} else {
-			if (fts_backend_build_more(ctx->build, mail->uid,
+			if (fts_backend_build_more(ctx->build, ctx->mail->uid,
 						   block.data,
 						   block.size) < 0) {
 				ret = -1;
@@ -209,16 +218,14 @@
 	return ret;
 }
 
-static int fts_build_new(struct fts_backend *backend,
-			 struct mailbox_transaction_context *t)
+static int fts_build_init(struct fts_search_context *fctx,
+			  struct mailbox_transaction_context *t)
 {
-	struct fts_storage_build_context ctx;
-	struct mail_search_context *search_ctx;
+	struct fts_backend *backend = fctx->backend;
+	struct fts_storage_build_context *ctx;
+	struct fts_backend_build_context *build;
 	struct mail_search_seqset seqset;
-	struct mail_search_arg search_arg;
-	struct mail *mail;
 	uint32_t last_uid, last_uid_locked;
-	int ret = 0;
 
 	if (fts_backend_get_last_uid(backend, &last_uid) < 0)
 		return -1;
@@ -232,8 +239,7 @@
 		return 0;
 	}
 
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.build = fts_backend_build_init(backend, &last_uid_locked);
+	build = fts_backend_build_init(backend, &last_uid_locked);
 	if (last_uid != last_uid_locked) {
 		/* changed, need to get again the sequences */
 		i_assert(last_uid < last_uid_locked);
@@ -241,138 +247,292 @@
 		last_uid = last_uid_locked;
 		if (mailbox_get_uids(t->box, last_uid+1, (uint32_t)-1,
 				     &seqset.seq1, &seqset.seq2) < 0) {
-			(void)fts_backend_build_deinit(ctx.build);
+			(void)fts_backend_build_deinit(build);
 			return -1;
 		}
 		if (seqset.seq1 == 0) {
 			/* no new messages */
-			(void)fts_backend_build_deinit(ctx.build);
+			(void)fts_backend_build_deinit(build);
 			return 0;
 		}
 	}
 
-	memset(&search_arg, 0, sizeof(search_arg));
-	search_arg.type = SEARCH_SEQSET;
-	search_arg.value.seqset = &seqset;
 
-	ctx.headers = str_new(default_pool, 512);
-	mail = mail_alloc(t, 0, NULL);
-	search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
-	while (mailbox_search_next(search_ctx, mail) > 0) {
-		t_push();
-		ret = fts_build_mail(&ctx, mail);
-		t_pop();
+	ctx = i_new(struct fts_storage_build_context, 1);
+	ctx->build = build;
+	ctx->seqset = seqset;
+	ctx->search_arg.type = SEARCH_SEQSET;
+	ctx->search_arg.value.seqset = &ctx->seqset;
 
-		if (ret < 0)
-			break;
-	}
-	if (mailbox_search_deinit(&search_ctx) < 0)
+	ctx->headers = str_new(default_pool, 512);
+	ctx->mail = mail_alloc(t, 0, NULL);
+	ctx->search_ctx = mailbox_search_init(t, NULL, &ctx->search_arg, NULL);
+
+	fctx->build_ctx = ctx;
+	return 0;
+}
+
+static int fts_build_deinit(struct fts_storage_build_context *ctx)
+{
+	int ret = 0;
+
+	if (mailbox_search_deinit(&ctx->search_ctx) < 0)
 		ret = -1;
-	mail_free(&mail);
+	mail_free(&ctx->mail);
 
-	if (fts_backend_build_deinit(ctx.build) < 0)
+	if (fts_backend_build_deinit(ctx->build) < 0)
 		ret = -1;
-	str_free(&ctx.headers);
+	str_free(&ctx->headers);
+	i_free(ctx);
 	return ret;
 }
 
-#define SEARCH_ARG_IS_OPTIMIZABLE(backend, arg) \
-	((arg)->type == SEARCH_BODY_FAST || (arg)->type == SEARCH_TEXT_FAST || \
-	 ((backend) != NULL && \
-	  ((backend)->flags & FTS_BACKEND_FLAG_EXACT_LOOKUPS) != 0 && \
-	  ((arg)->type == SEARCH_BODY || (arg)->type == SEARCH_TEXT)))
+static int fts_build_more(struct fts_storage_build_context *ctx)
+{
+	unsigned int count = 0;
+	int ret;
 
+	while (mailbox_search_next(ctx->search_ctx, ctx->mail) > 0) {
+		t_push();
+		ret = fts_build_mail(ctx);
+		t_pop();
 
-static struct mail_search_context *
-fts_mailbox_search_init(struct mailbox_transaction_context *t,
-			const char *charset, struct mail_search_arg *args,
-			const enum mail_sort_type *sort_program)
-{
-	struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
-	struct mail_search_context *ctx;
-	struct fts_search_context *fctx;
-	struct fts_backend *backend = NULL;
-	struct mail_search_arg *tmp;
-	ARRAY_TYPE(seq_range) uid_result;
+		if (ret < 0)
+			return -1;
 
-	ctx = fbox->super.search_init(t, charset, args, sort_program);
+		if (++count == FTS_SEARCH_NONBLOCK_COUNT)
+			return 0;
+	}
+	return 1;
+}
 
-	fctx = i_new(struct fts_search_context, 1);
-	array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx);
+static void fts_search_filter_args(struct fts_search_context *fctx,
+				   struct mail_search_arg *args,
+				   ARRAY_TYPE(seq_range) *uid_result)
+{
+	const char *key;
 
-	if (fbox->backend_exact == NULL && fbox->backend_fast == NULL)
-		return ctx;
+	for (; args != NULL; args = args->next) {
+		switch (args->type) {
+		case SEARCH_BODY_FAST:
+		case SEARCH_TEXT_FAST:
+			if ((fctx->backend->flags &
+			     FTS_BACKEND_FLAG_EXACT_LOOKUPS) == 0)
+				break;
+			/* fall through */
+		case SEARCH_BODY:
+		case SEARCH_TEXT:
+		case SEARCH_HEADER:
+			if (args == fctx->best_arg) {
+				/* already handled this one */
+				break;
+			}
 
-	/* FIXME: handle AND/OR. Maybe also header lookups? */
+			key = args->value.str;
+			if (*key == '\0') {
+				i_assert(args->type == SEARCH_HEADER);
 
-	/* first check if we can use backend_fast */
-	for (tmp = args; tmp != NULL; tmp = tmp->next) {
-		if (args->type == SEARCH_BODY_FAST ||
-		    args->type == SEARCH_TEXT_FAST)
-			break;
-	}
-	if (tmp != NULL) {
-		/* yes */
-		backend = fbox->backend_fast;
-	} else if (fbox->backend_exact != NULL) {
-		/* nope. what about backend_exact? */
-		for (tmp = args; tmp != NULL; tmp = tmp->next) {
-			if (args->type == SEARCH_BODY ||
-			    args->type == SEARCH_TEXT) {
-				backend = fbox->backend_exact;
+				/* we're only checking the existence
+				   of the header. */
+				key = args->hdr_field_name;
+			}
+
+			if (fts_backend_filter(fctx->backend, key,
+					       uid_result) < 0) {
+				/* failed, but we already have limited
+				   the search, so just ignore this */
 				break;
 			}
+			if (args->type != SEARCH_HEADER &&
+			    (fctx->backend->flags &
+			     FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
+				args->match_always = TRUE;
+				args->result = 1;
+			}
+			break;
+		case SEARCH_OR:
+		case SEARCH_SUB:
+			fts_search_filter_args(fctx, args->value.subargs,
+					       uid_result);
+			break;
+		default:
+			break;
 		}
 	}
-	args = tmp;
-
-	if (backend == NULL)
-		return ctx;
+}
 
-	/* update the backend */
-	if (fts_build_new(backend, t) < 0)
-		return ctx;
+static void fts_search_init(struct mailbox *box,
+			    struct fts_search_context *fctx)
+{
+	struct fts_backend *backend = fctx->backend;
+	const char *key;
+	ARRAY_TYPE(seq_range) uid_result;
 
 	if (fts_backend_lock(backend) <= 0)
-		return ctx;
-	fctx->backend = backend;
+		return;
 	fctx->locked = TRUE;
 
+	key = fctx->best_arg->value.str;
+	if (*key == '\0') {
+		i_assert(fctx->best_arg->type == SEARCH_HEADER);
+
+		/* we're only checking the existence
+		   of the header. */
+		key = fctx->best_arg->hdr_field_name;
+	}
+
 	i_array_init(&uid_result, 64);
-	if (fts_backend_lookup(backend, args->value.str, &uid_result) < 0) {
+	if (fts_backend_lookup(backend, key, &uid_result) < 0) {
 		/* failed, fallback to reading everything */
 		array_free(&uid_result);
+		return;
 	}
 
 	if ((backend->flags & FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
-		args->match_always = TRUE;
-		args->result = 1;
+		fctx->best_arg->match_always = TRUE;
+		fctx->best_arg->result = 1;
 	}
-	args = args->next;
-	while (args != NULL) {
-		if (SEARCH_ARG_IS_OPTIMIZABLE(backend, args)) {
-			if ((backend->flags &
-			     FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) {
+
+	fts_search_filter_args(fctx, fctx->args, &uid_result);
+
+	(void)uid_range_to_seq(box, &uid_result, &fctx->result);
+	array_free(&uid_result);
+}
+
+static bool arg_is_better(const struct mail_search_arg *new_arg,
+			  const struct mail_search_arg *old_arg)
+{
+	if (old_arg == NULL)
+		return TRUE;
+
+	/* prefer not to use headers. they have a larger possibility of
+	   having lots of identical strings */
+	if (old_arg->type == SEARCH_HEADER)
+		return TRUE;
+	else if (new_arg->type == SEARCH_HEADER)
+		return FALSE;
+
+	return strlen(new_arg->value.str) > strlen(old_arg->value.str);
+}
+
+static void fts_search_args_check(struct mail_search_arg *args,
+				  bool *have_fast_r, bool *have_exact_r,
+				  struct mail_search_arg **best_fast_arg,
+				  struct mail_search_arg **best_exact_arg)
+{
+	for (; args != NULL; args = args->next) {
+		switch (args->type) {
+		case SEARCH_BODY_FAST:
+		case SEARCH_TEXT_FAST:
+			if (*args->value.str == '\0') {
+				/* this matches everything */
 				args->match_always = TRUE;
 				args->result = 1;
+				break;
 			}
-			if (fts_backend_filter(backend, args->value.str,
-					       &uid_result) < 0) {
-				/* failed, but we already have limited
-				   the search, so just ignore this */
+			if (arg_is_better(args, *best_fast_arg)) {
+				*best_fast_arg = args;
+				*have_fast_r = TRUE;
+			}
+			break;
+		case SEARCH_BODY:
+		case SEARCH_TEXT:
+			if (*args->value.str == '\0') {
+				/* this matches everything */
+				args->match_always = TRUE;
+				args->result = 1;
 				break;
 			}
+		case SEARCH_HEADER:
+			if (arg_is_better(args, *best_exact_arg)) {
+				*best_exact_arg = args;
+				*have_exact_r = TRUE;
+			}
+			break;
+		case SEARCH_OR:
+		case SEARCH_SUB:
+			fts_search_args_check(args->value.subargs,
+					      have_fast_r, have_exact_r,
+					      best_fast_arg, best_exact_arg);
+			break;
+		default:
+			break;
 		}
-		args = args->next;
 	}
+}
 
-	if (array_is_created(&uid_result)) {
-		(void)uid_range_to_seq(t->box, &uid_result, &fctx->result);
-		array_free(&uid_result);
+static struct mail_search_context *
+fts_mailbox_search_init(struct mailbox_transaction_context *t,
+			const char *charset, struct mail_search_arg *args,
+			const enum mail_sort_type *sort_program)
+{
+	struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
+	struct mail_search_context *ctx;
+	struct fts_search_context *fctx;
+	struct mail_search_arg *best_fast_arg, *best_exact_arg;
+	bool have_fast, have_exact;
+
+	ctx = fbox->super.search_init(t, charset, args, sort_program);
+
+	fctx = i_new(struct fts_search_context, 1);
+	fctx->args = args;
+	array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx);
+
+	if (fbox->backend_exact == NULL && fbox->backend_fast == NULL)
+		return ctx;
+
+	have_fast = have_exact = FALSE;
+	best_fast_arg = best_exact_arg = NULL;
+	fts_search_args_check(args, &have_fast, &have_exact,
+			      &best_fast_arg, &best_exact_arg);
+	if (have_fast && fbox->backend_fast != NULL) {
+		/* use fast backend whenever possible */
+		fctx->backend = fbox->backend_fast;
+		fctx->best_arg = best_fast_arg;
+	} else if (have_exact || have_fast) {
+		fctx->backend = fbox->backend_exact;
+		fctx->best_arg = arg_is_better(best_exact_arg, best_fast_arg) ?
+			best_exact_arg : best_fast_arg;
 	}
+
+	if (fctx->backend != NULL) {
+		if (fts_build_init(fctx, t) < 0)
+			fctx->backend = NULL;
+		else if (fctx->build_ctx == NULL) {
+			/* the index was up to date */
+			fts_search_init(t->box, fctx);
+		}
+	}
+
 	return ctx;
 }
 
+static int fts_mailbox_search_next_nonblock(struct mail_search_context *ctx,
+					    struct mail *mail, bool *tryagain_r)
+{
+	struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
+	struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+	int ret;
+
+	if (fctx->build_ctx != NULL) {
+		/* still building the index */
+		ret = fts_build_more(fctx->build_ctx);
+		if (ret == 0) {
+			*tryagain_r = TRUE;
+			return 0;
+		}
+
+		/* finished / error */
+		fts_build_deinit(fctx->build_ctx);
+		fctx->build_ctx = NULL;
+
+		if (ret == 0)
+			fts_search_init(ctx->transaction->box, fctx);
+	}
+	return fbox->super.search_next_nonblock(ctx, mail, tryagain_r);
+
+}
+
 static int fts_mailbox_search_next_update_seq(struct mail_search_context *ctx)
 {
 	struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
@@ -418,6 +578,11 @@
 	struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
 	struct fts_search_context *fctx = FTS_CONTEXT(ctx);
 
+	if (fctx->build_ctx != NULL) {
+		/* the search was cancelled */
+		fts_build_deinit(fctx->build_ctx);
+	}
+
 	if (fctx->locked)
 		fts_backend_unlock(fctx->backend);
 
@@ -574,6 +739,7 @@
 	fbox->super = box->v;
 	box->v.close = fts_mailbox_close;
 	box->v.search_init = fts_mailbox_search_init;
+	box->v.search_next_nonblock = fts_mailbox_search_next_nonblock;
 	box->v.search_next_update_seq = fts_mailbox_search_next_update_seq;
 	box->v.search_deinit = fts_mailbox_search_deinit;
 	box->v.mail_alloc = fts_mail_alloc;



More information about the dovecot-cvs mailing list