dovecot: Simplified and optimized the sorting code.

dovecot at dovecot.org dovecot at dovecot.org
Sat Dec 8 21:36:25 EET 2007


details:   http://hg.dovecot.org/dovecot/rev/09e1e8d4aa53
changeset: 6968:09e1e8d4aa53
user:      Timo Sirainen <tss at iki.fi>
date:      Sat Dec 08 21:36:20 2007 +0200
description:
Simplified and optimized the sorting code.

diffstat:

1 file changed, 174 insertions(+), 279 deletions(-)
src/lib-storage/index/index-sort.c |  453 +++++++++++++-----------------------

diffs (truncated from 607 to 300 lines):

diff -r 4c3002f3cd51 -r 09e1e8d4aa53 src/lib-storage/index/index-sort.c
--- a/src/lib-storage/index/index-sort.c	Sat Dec 08 21:28:46 2007 +0200
+++ b/src/lib-storage/index/index-sort.c	Sat Dec 08 21:36:20 2007 +0200
@@ -53,28 +53,21 @@ struct mail_search_sort_program {
 	const char *primary_sort_header;
 	struct mail *temp_mail;
 
-	ARRAY_TYPE(mail_sort_node) nodes;
+	ARRAY_TYPE(mail_sort_node) nodes, all_nodes;
 	const struct mail_sort_node *nodes_ptr;
 	unsigned int nodes_count, iter_idx;
 
-	ARRAY_TYPE(mail_sort_node) all_nodes;
-
 	uint32_t ext_id;
-	uint32_t prev_seq, last_sorted_seq;
+	unsigned int first_missing_sort_id_idx;
 
 	unsigned int reverse:1;
-	unsigned int skipped_mails:1;
 	unsigned int sort_ids_added:1;
+	unsigned int missing_sort_ids:1;
 };
 
 struct sort_cmp_context {
 	struct mail_search_sort_program *program;
 	struct mail *mail;
-
-	uint32_t cache_seq;
-	enum mail_sort_type cache_type;
-	uint32_t cache_value;
-	const char *cache_str;
 };
 
 static struct sort_cmp_context static_node_cmp_context;
@@ -168,19 +161,21 @@ static const char *get_first_mailbox(str
 	return addr != NULL ? addr->mailbox : "";
 }
 
-static const char *
-sort_header_get(enum mail_sort_type sort_type, struct mail *mail, uint32_t seq)
+static void
+sort_header_get(string_t *dest, enum mail_sort_type sort_type,
+		struct mail *mail, uint32_t seq)
 {
 	const char *str;
-	string_t *buf;
 
 	mail_set_seq(mail, seq);
 	switch (sort_type & MAIL_SORT_MASK) {
 	case MAIL_SORT_SUBJECT:
 		if (mail_get_first_header(mail, "Subject", &str) <= 0)
-			return "";
-		return imap_get_base_subject_cased(pool_datastack_create(),
-						   str, NULL);
+			return;
+		str = imap_get_base_subject_cased(pool_datastack_create(),
+						  str, NULL);
+		str_append(dest, str);
+		return;
 	case MAIL_SORT_CC:
 		str = get_first_mailbox(mail, "Cc");
 		break;
@@ -194,9 +189,7 @@ sort_header_get(enum mail_sort_type sort
 		i_unreached();
 	}
 
-	buf = t_str_new(128);
-	(void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, buf);
-	return str_c(buf);
+	(void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, dest);
 }
 
 static uint32_t sort_get_arrival(struct mail *mail)
@@ -259,23 +252,19 @@ static int sort_node_cmp_type(struct sor
 	case MAIL_SORT_TO:
 	case MAIL_SORT_SUBJECT:
 		T_FRAME_BEGIN {
-			const char *str1, *str2;
-
-			str1 = n1->seq == ctx->cache_seq &&
-				ctx->cache_type == sort_type ? ctx->cache_str :
-				sort_header_get(sort_type, ctx->mail, n1->seq);
-			str2 = sort_header_get(sort_type, ctx->mail, n2->seq);
-
-			ret = strcmp(str1, str2);
+			string_t *str1, *str2;
+
+			str1 = t_str_new(256);
+			str2 = t_str_new(256);
+			sort_header_get(str1, sort_type, ctx->mail, n1->seq);
+			sort_header_get(str2, sort_type, ctx->mail, n2->seq);
+
+			ret = strcmp(str_c(str1), str_c(str2));
 		} T_FRAME_END;
 		break;
 	case MAIL_SORT_ARRIVAL:
-		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
-			time1 = ctx->cache_value;
-		else {
-			mail_set_seq(ctx->mail, n1->seq);
-			time1 = sort_get_arrival(ctx->mail);
-		}
+		mail_set_seq(ctx->mail, n1->seq);
+		time1 = sort_get_arrival(ctx->mail);
 
 		mail_set_seq(ctx->mail, n2->seq);
 		time2 = sort_get_arrival(ctx->mail);
@@ -284,12 +273,8 @@ static int sort_node_cmp_type(struct sor
 			(time1 > time2 ? 1 : 0);
 		break;
 	case MAIL_SORT_DATE:
-		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
-			time1 = ctx->cache_value;
-		else {
-			mail_set_seq(ctx->mail, n1->seq);
-			time1 = sort_get_date(ctx->mail);
-		}
+		mail_set_seq(ctx->mail, n1->seq);
+		time1 = sort_get_date(ctx->mail);
 
 		mail_set_seq(ctx->mail, n2->seq);
 		time2 = sort_get_date(ctx->mail);
@@ -298,12 +283,8 @@ static int sort_node_cmp_type(struct sor
 			(time1 > time2 ? 1 : 0);
 		break;
 	case MAIL_SORT_SIZE:
-		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
-			size1 = ctx->cache_value;
-		else {
-			mail_set_seq(ctx->mail, n1->seq);
-			size1 = sort_get_size(ctx->mail);
-		}
+		mail_set_seq(ctx->mail, n1->seq);
+		size1 = sort_get_size(ctx->mail);
 
 		mail_set_seq(ctx->mail, n2->seq);
 		size2 = sort_get_size(ctx->mail);
@@ -341,31 +322,24 @@ static int sort_node_cmp(const void *p1,
 	return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2);
 }
 
-static int sort_node_cmp_no_sort_id(const void *p1, const void *p2)
+static int sort_node_cmp_nozero_sort_id(const void *p1, const void *p2)
 {
 	struct sort_cmp_context *ctx = &static_node_cmp_context;
-
-	return sort_node_cmp_type(ctx, ctx->program->sort_program, p1, p2);
-}
-
-static void
-index_sort_save_ids(struct mail_search_sort_program *program,
-		    uint32_t first_seq)
-{
-	struct index_transaction_context *t =
-		(struct index_transaction_context *)program->t;
-	const struct mail_sort_node *nodes;
-	unsigned int i, count;
-
-	nodes = array_get(&program->all_nodes, &count);
-	for (i = 0; i < count; i++) {
-		if (nodes[i].seq < first_seq)
-			continue;
-
-		i_assert(nodes[i].sort_id != 0);
-		mail_index_update_ext(t->trans, nodes[i].seq,
-				      program->ext_id, &nodes[i].sort_id, NULL);
-	}
+	const struct mail_sort_node *n1 = p1, *n2 = p2;
+	const enum mail_sort_type *sort_program;
+
+	/* Use sort IDs only if both have them */
+	if (n1->sort_id != 0 && n2->sort_id != 0) {
+		if (n1->sort_id < n2->sort_id)
+			return -1;
+		if (n1->sort_id > n2->sort_id)
+			return 1;
+		sort_program = ctx->program->sort_program + 1;
+	} else {
+		sort_program = ctx->program->sort_program;
+	}
+
+	return sort_node_cmp_type(ctx, ctx->program->sort_program, n1, n2);
 }
 
 static bool
@@ -375,19 +349,18 @@ index_sort_add_ids_range(struct mail_sea
 {
 	struct mail_sort_node *nodes;
 	unsigned int i, count;
-	const char *last_str = "";
 	uint32_t prev_id = 0, last_id = (uint32_t)-1;
-	string_t *prev_str;
-	const char *str;
+	string_t *last_str, *prev_str, *str;
 	unsigned int skip;
 
+	last_str = t_str_new(256);
 	nodes = array_get_modifiable(&program->all_nodes, &count);
 	if (nodes[idx2].sort_id != 0) {
 		i_assert(idx1 != idx2);
 		last_id = nodes[idx2].sort_id;
 
-		last_str = sort_header_get(program->sort_program[0], mail,
-					   nodes[idx2].seq);
+		sort_header_get(last_str, program->sort_program[0], mail,
+				nodes[idx2].seq);
 		idx2--;
 	}
 
@@ -395,19 +368,21 @@ index_sort_add_ids_range(struct mail_sea
 	if (nodes[idx1].sort_id != 0) {
 		prev_id = nodes[idx1].sort_id;
 
-		str_append(prev_str,
-			   sort_header_get(program->sort_program[0], mail,
-					   nodes[idx1].seq));
+		sort_header_get(prev_str, program->sort_program[0], mail,
+				nodes[idx1].seq);
 		idx1++;
 	}
 
+	str = str_new(default_pool, 256);
 	for (i = idx1; i <= idx2; i++) {
-		str = sort_header_get(program->sort_program[0], mail,
-				      nodes[i].seq);
-
-		if (i == idx2 && strcmp(str, last_str) == 0)
+		T_FRAME(
+			sort_header_get(str, program->sort_program[0], mail,
+					nodes[i].seq);
+		);
+
+		if (i == idx2 && str_equals(str, last_str))
 			nodes[i].sort_id = last_id;
-		else if (strcmp(str, str_c(prev_str)) == 0 && prev_id != 0)
+		else if (prev_id != 0 && str_equals(str, prev_str) == 0)
 			nodes[i].sort_id = prev_id;
 		else {
 			/* divide the available space so that each message gets
@@ -420,14 +395,16 @@ index_sort_add_ids_range(struct mail_sea
 			if (nodes[i].sort_id == last_id) {
 				/* we ran out of ID space. have to renumber
 				   the IDs. */
+				str_free(&str);
 				return FALSE;
 			}
 
 			prev_id = nodes[i].sort_id;
 			str_truncate(prev_str, 0);
-			str_append(prev_str, str);
-		}
-	}
+			str_append_str(prev_str, str);
+		}
+	}
+	str_free(&str);
 	return TRUE;
 }
 
@@ -516,7 +493,6 @@ static void index_sort_preset_sort_ids(s
 				       uint32_t last_seq)
 {
 	struct mail_sort_node node;
-	struct mail *mail;
 	uint32_t (*get_sort_id)(struct mail *);
 
 	switch (program->sort_program[0] & MAIL_SORT_MASK) {
@@ -534,114 +510,77 @@ static void index_sort_preset_sort_ids(s
 	}
 
 	/* add the missing nodes with their sort_ids */
-	mail = program->temp_mail;
 	node.seq = array_count(&program->all_nodes) + 1;
 	for (; node.seq <= last_seq; node.seq++) {
-		mail_set_seq(mail, node.seq);
-		node.sort_id = get_sort_id(mail);
+		mail_set_seq(program->temp_mail, node.seq);
+		node.sort_id = get_sort_id(program->temp_mail);
+
 		i_assert(node.sort_id != 0);
 		array_append(&program->all_nodes, &node, 1);
 	}
-
-	/* @UNSAFE: and sort them */
-	memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
-	static_node_cmp_context.program = program;
-	static_node_cmp_context.mail = mail;
-
-	qsort(array_idx_modifiable(&program->all_nodes, 0), last_seq,
-	      sizeof(struct mail_sort_node), sort_node_cmp);
-}
-
-static void index_sort_cache_seq(struct sort_cmp_context *ctx,
-				 enum mail_sort_type sort_type, uint32_t seq)
-{
-	ctx->cache_seq = seq;
-	ctx->cache_type = sort_type & MAIL_SORT_MASK;
-
-	mail_set_seq(ctx->mail, seq);


More information about the dovecot-cvs mailing list