dovecot-1.2: Thread index fixes and optimizations.

dovecot at dovecot.org dovecot at dovecot.org
Thu Jun 26 19:12:17 EEST 2008


details:   http://hg.dovecot.org/dovecot-1.2/rev/04a3be30e5a6
changeset: 7952:04a3be30e5a6
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Jun 26 19:11:56 2008 +0300
description:
Thread index fixes and optimizations.

diffstat:

9 files changed, 765 insertions(+), 213 deletions(-)
doc/thread-refs.txt                          |   15 -
src/lib-index/mail-hash.c                    |   33 ++
src/lib-index/mail-hash.h                    |   10 
src/lib-storage/index/Makefile.am            |    1 
src/lib-storage/index/index-thread-finish.c  |  113 ++++++---
src/lib-storage/index/index-thread-links.c   |  208 +++++++++-------
src/lib-storage/index/index-thread-list.c    |  325 ++++++++++++++++++++++++++
src/lib-storage/index/index-thread-private.h |   73 ++++-
src/lib-storage/index/index-thread.c         |  200 ++++++++++------

diffs (truncated from 1581 to 300 lines):

diff -r 5e3b995372d4 -r 04a3be30e5a6 doc/thread-refs.txt
--- a/doc/thread-refs.txt	Thu Jun 26 08:49:02 2008 +0300
+++ b/doc/thread-refs.txt	Thu Jun 26 19:11:56 2008 +0300
@@ -62,17 +62,18 @@ node {
   parent: Pointer to parent node. Children pointers aren't required.
   uid: Message's UID (0 = expunged or never even existed)
 
-  link_refcount: Number of messages referencing "this node" -> "parent node"
-    (e.g. "References: a b" would increase this message's reference as well as
-    b's reference, regardless of how the links are really added to the thread
-    tree).
+  parent_link_refcount: Number of messages containing "this message" -> "parent
+    message" link, i.e. "number of links to parent node". However since parents
+    can change, not all of these references might be from our current child
+    nodes. When this refcount reaches 0, it means we must detach from our
+    parent.
   expunge_rebuilds: If this message gets expunged, rebuild the thread tree.
   parent_unref_rebuilds: If a link between this node and its child gets
     unreferenced, rebuild the thread tree.
 }
 
 link_reference(parent_node, child_node)
-  parent_node.link_refcount++
+  child_node.parent_link_refcount++
   if is_ancestor(child_node, parent_node)
     // child_node is an ancestor of parent_node. Adding child_node ->
     // parent_node would introduce a loop. If any messages referencing the
@@ -144,8 +145,8 @@ unref_link(parent, child)
   if parent.parent_unref_rebuilds
     return false
 
-  child.link_refcount--
-  if child.link_refcount == 0
+  child.parent_link_refcount--
+  if child.parent_link_refcount == 0
     child.parent = NIL
   return true  
 
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-index/mail-hash.c
--- a/src/lib-index/mail-hash.c	Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-index/mail-hash.c	Thu Jun 26 19:11:56 2008 +0300
@@ -232,6 +232,8 @@ mail_hash_header_init(struct mail_hash *
 
 	hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE);
 	hdr->record_count = 1; /* [0] always exists */
+	hdr->reset_counter = hash->hdr == NULL ? ioloop_time :
+		hash->hdr->reset_counter + 1;
 }
 
 static bool mail_hash_check_header(struct mail_hash *hash,
@@ -603,6 +605,11 @@ void mail_hash_unlock(struct mail_hash *
 	hash->lock_type = F_UNLCK;
 }
 
+int mail_hash_get_lock_type(struct mail_hash *hash)
+{
+	return hash->lock_type;
+}
+
 int mail_hash_create_excl_locked(struct mail_hash *hash)
 {
 	bool created;
@@ -696,6 +703,7 @@ int mail_hash_transaction_write(struct m
 	int fd = hash->fd;
 
 	i_assert(hash->lock_type == F_WRLCK);
+	i_assert(trans->hdr.hashed_count <= trans->hdr.record_count);
 
 	if (trans->failed)
 		return -1;
@@ -726,7 +734,8 @@ int mail_hash_transaction_write(struct m
 		trans->base_count * trans->hdr.record_size;
 	if (fd == -1) {
 		temp_path = t_strconcat(hash->filepath, ".tmp", NULL);
-		fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+		fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC,
+			  hash->index->mode);
 		if (fd == -1) {
 			if (ENOSPACE(errno)) {
 				hash->index->nodiskspace = TRUE;
@@ -807,6 +816,17 @@ bool mail_hash_transaction_is_broken(str
 	return trans->failed;
 }
 
+bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans)
+{
+	return trans->hash->in_memory;
+}
+
+struct mail_hash *
+mail_hash_transaction_get_hash(struct mail_hash_transaction *trans)
+{
+	return trans->hash;
+}
+
 void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
 					 const char *error)
 {
@@ -966,6 +986,11 @@ static void mail_hash_insert_idx(struct 
 	if (trans->hdr.first_hole_idx != 0) {
 		/* allocate the record from the first hole */
 		idx = trans->hdr.first_hole_idx;
+		if (idx >= trans->hdr.record_count) {
+			mail_hash_transaction_set_corrupted(trans,
+				"Corrupted first_hole_idx");
+			idx = 0;
+		}
 		rec = mail_hash_idx(trans, idx);
 
 		memcpy(&trans->hdr.first_hole_idx, rec + 1,
@@ -1104,8 +1129,8 @@ void mail_hash_remove(struct mail_hash_t
 						"Too low hashed_count");
 			return;
 		}
-		trans->hdr.hashed_count--;
-	}
-
+	}
+
+	trans->hdr.hashed_count--;
 	mail_hash_remove_idx(trans, idx);
 }
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-index/mail-hash.h
--- a/src/lib-index/mail-hash.h	Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-index/mail-hash.h	Thu Jun 26 19:11:56 2008 +0300
@@ -47,6 +47,8 @@ struct mail_hash_header {
 	uint32_t last_uid;
 	/* Number of message records (records with non-zero UID) */
 	uint32_t message_count;
+	/* Increased every time the hash is reset */
+	uint32_t reset_counter;
 };
 
 struct mail_hash_record {
@@ -95,6 +97,8 @@ int mail_hash_lock_exclusive(struct mail
 int mail_hash_lock_exclusive(struct mail_hash *hash,
 			     enum mail_hash_lock_flags flags);
 void mail_hash_unlock(struct mail_hash *hash);
+/* Returns the current locking state (F_UNLCK, F_RDLCK, F_WRLCK) */
+int mail_hash_get_lock_type(struct mail_hash *hash);
 
 struct mail_hash_transaction *
 mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size);
@@ -103,6 +107,12 @@ void mail_hash_transaction_end(struct ma
 /* Returns TRUE if transaction is in broken state because of an earlier
    I/O error or detected file corruption. */
 bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans);
+/* Returns TRUE if hash is currently being updated in memory. */
+bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans);
+
+/* Returns the hash structure of the transaction. */
+struct mail_hash *
+mail_hash_transaction_get_hash(struct mail_hash_transaction *trans);
 
 /* Clear the entire hash file's contents. */
 void mail_hash_reset(struct mail_hash_transaction *trans);
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-storage/index/Makefile.am
--- a/src/lib-storage/index/Makefile.am	Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-storage/index/Makefile.am	Thu Jun 26 19:11:56 2008 +0300
@@ -26,6 +26,7 @@ libstorage_index_a_SOURCES = \
 	index-thread.c \
 	index-thread-finish.c \
 	index-thread-links.c \
+	index-thread-list.c \
 	index-transaction.c
 
 headers = \
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-storage/index/index-thread-finish.c
--- a/src/lib-storage/index/index-thread-finish.c	Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-storage/index/index-thread-finish.c	Thu Jun 26 19:11:56 2008 +0300
@@ -37,6 +37,7 @@ struct thread_finish_context {
 
 	struct mail *tmp_mail;
 	struct mail_hash_transaction *hash_trans;
+	struct mail_thread_list_context *hash_list_ctx;
 
 	ARRAY_DEFINE(roots, struct mail_thread_root_node);
 	ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node);
@@ -127,16 +128,24 @@ static int mail_thread_child_node_cmp(co
 	return 0;
 }
 
+static uint32_t
+thread_lookup_existing(struct thread_finish_context *ctx, uint32_t idx)
+{
+	const struct mail_thread_node *node;
+
+	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+	i_assert(MAIL_INDEX_NODE_EXISTS(node));
+	i_assert(node->uid_or_id != 0);
+	return node->uid_or_id;
+}
+
 static int
 thread_child_node_fill(struct thread_finish_context *ctx,
 		       struct mail_thread_child_node *child)
 {
-	const struct mail_thread_node *node;
 	int tz;
 
-	node = mail_hash_lookup_idx(ctx->hash_trans, child->idx);
-	i_assert(node->uid != 0 && node->exists);
-	child->uid = node->uid;
+	child->uid = thread_lookup_existing(ctx, child->idx);
 
 	if (!mail_set_uid(ctx->tmp_mail, child->uid)) {
 		/* the UID should have existed. we would have rebuild
@@ -164,7 +173,6 @@ thread_sort_children(struct thread_finis
 		     ARRAY_TYPE(mail_thread_child_node) *sorted_children)
 {
 	const struct mail_thread_shadow_node *shadows;
-	const struct mail_thread_node *node;
 	struct mail_thread_child_node child, *children;
 	unsigned int count;
 
@@ -177,9 +185,7 @@ thread_sort_children(struct thread_finis
 	i_assert(child.idx != 0);
 	if (shadows[child.idx].next_sibling_idx == 0) {
 		/* only child - don't bother setting sort date */
-		node = mail_hash_lookup_idx(ctx->hash_trans, child.idx);
-		i_assert(node->uid != 0 && node->exists);
-		child.uid = node->uid;
+		child.uid = thread_lookup_existing(ctx, child.idx);
 
 		array_append(sorted_children, &child, 1);
 		return 0;
@@ -202,12 +208,11 @@ static int gather_base_subjects(struct t
 {
 	struct subject_gather_context gather_ctx;
 	struct mail_thread_root_node *roots;
-	const struct mail_thread_node *node;
 	const char *subject;
 	unsigned int i, count;
 	ARRAY_TYPE(mail_thread_child_node) sorted_children;
 	const struct mail_thread_child_node *children;
-	uint32_t idx;
+	uint32_t idx, uid;
 	int ret = 0;
 
 	memset(&gather_ctx, 0, sizeof(gather_ctx));
@@ -240,10 +245,8 @@ static int gather_base_subjects(struct t
 			continue;
 		}
 
-		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-		i_assert(node->uid != 0 && node->exists);
-
-		if (!mail_set_uid(ctx->tmp_mail, node->uid)) {
+		uid = thread_lookup_existing(ctx, idx);
+		if (!mail_set_uid(ctx->tmp_mail, uid)) {
 			/* the UID should have existed. we would have rebuild
 			   the thread tree otherwise. */
 			mail_hash_transaction_set_corrupted(ctx->hash_trans,
@@ -443,7 +446,8 @@ static int sort_root_nodes_ref2(struct t
 	roots = array_get_modifiable(&ctx->roots, &root_count);
 	for (idx = 1; idx < record_count; idx++) {
 		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) || !node->exists)
+		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) ||
+		    !MAIL_INDEX_NODE_EXISTS(node))
 			continue;
 
 		child.idx = idx;
@@ -467,14 +471,17 @@ static int sort_root_nodes_ref2(struct t
 	return 0;
 }
 
-static void mail_thread_create_shadows(struct thread_finish_context *ctx,
-				       uint32_t record_count)
-{
+static int mail_thread_create_shadows(struct thread_finish_context *ctx,
+				      uint32_t record_count)
+{
+	struct mail_thread_list_update_context *thread_list_ctx;
 	struct mail_thread_node *node, *parent;
 	struct mail_thread_root_node root;
 	struct mail_thread_child_node child;
+	struct mail_hash *hash;
 	uint8_t *referenced;
 	uint32_t idx, i, j, parent_idx, alloc_size, max;
+	int ret = 0;
 
 	ctx->use_sent_date = FALSE;
 
@@ -485,26 +492,42 @@ static void mail_thread_create_shadows(s
 	referenced = i_new(uint8_t, alloc_size);
 	for (idx = 1; idx < record_count; idx++) {
 		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec))
+		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) {
+			/* @UNSAFE: mark deleted records as referenced, so we
+			   don't waste time with them */
+			referenced[idx / 8] |= 1 << (idx % 8);
 			continue;
-
-		if (node->exists) {
+		}
+
+		if (MAIL_INDEX_NODE_EXISTS(node)) {
 			/* @UNSAFE: don't remove existing nodes */


More information about the dovecot-cvs mailing list