dovecot-1.2: Rewrote thread indexing code. It's a lot simpler an...

dovecot at dovecot.org dovecot at dovecot.org
Mon Sep 1 15:17:15 EEST 2008


details:   http://hg.dovecot.org/dovecot-1.2/rev/70b53e9b232e
changeset: 8146:70b53e9b232e
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Sep 01 15:17:00 2008 +0300
description:
Rewrote thread indexing code. It's a lot simpler and takes less disk space.
We no longer try to keep a hash table and the entire thread tree stored on
disk. Instead we keep a simple Message-ID string (actually just "uid, ref#"
pointer) -> unique index number mapping on disk, read it to memory and use
it to build the thread tree. After the initial build the thread tree is
still updated incrementally.

diffstat:

18 files changed, 2308 insertions(+), 2510 deletions(-)
.hgignore                                    |    1 
src/imap/cmd-thread.c                        |   21 
src/lib-index/Makefile.am                    |    4 
src/lib-index/mail-hash.c                    | 1136 ------------------------
src/lib-index/mail-hash.h                    |  151 ---
src/lib-index/mail-index-private.h           |    5 
src/lib-index/mail-index-strmap.c            | 1224 ++++++++++++++++++++++++++
src/lib-index/mail-index-strmap.h            |   79 +
src/lib-index/mail-index.c                   |   47 
src/lib-storage/index/Makefile.am            |    1 
src/lib-storage/index/index-search.c         |    3 
src/lib-storage/index/index-thread-finish.c  |  296 +-----
src/lib-storage/index/index-thread-links.c   |  526 +++--------
src/lib-storage/index/index-thread-private.h |  113 --
src/lib-storage/index/index-thread.c         | 1013 ++++++++++-----------
src/lib-storage/mail-thread.h                |    6 
src/util/Makefile.am                         |   15 
src/util/threadview.c                        |  177 +++

diffs (truncated from 5447 to 300 lines):

diff -r b296beccb70e -r 70b53e9b232e .hgignore
--- a/.hgignore	Mon Sep 01 15:11:54 2008 +0300
+++ b/.hgignore	Mon Sep 01 15:17:00 2008 +0300
@@ -78,5 +78,6 @@ src/util/logview
 src/util/logview
 src/util/maildirlock
 src/util/rawlog
+src/util/threadview
 src/plugins/quota/rquota_xdr.c
 src/plugins/quota/rquota.h
diff -r b296beccb70e -r 70b53e9b232e src/imap/cmd-thread.c
--- a/src/imap/cmd-thread.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/imap/cmd-thread.c	Mon Sep 01 15:17:00 2008 +0300
@@ -80,27 +80,18 @@ static int imap_thread(struct client_com
 {
 	struct mail_thread_context *ctx;
 	string_t *str;
-	bool reset = FALSE;
 	int ret;
 
 	i_assert(thread_type == MAIL_THREAD_REFERENCES ||
 		 thread_type == MAIL_THREAD_REFERENCES2);
 
 	str = str_new(default_pool, 1024);
-	for (;;) {
-		ret = mail_thread_init(cmd->client->mailbox, reset,
-				       search_args, &ctx);
-		if (ret == 0) {
-			ret = imap_thread_write_reply(ctx, str, thread_type,
-						      !cmd->uid);
-			mail_thread_deinit(&ctx);
-		}
-
-		if (ret == 0 || reset)
-			break;
-		/* try again with in-memory hash */
-		reset = TRUE;
-		str_truncate(str, 0);
+	ret = mail_thread_init(cmd->client->mailbox,
+			       search_args, &ctx);
+	if (ret == 0) {
+		ret = imap_thread_write_reply(ctx, str, thread_type,
+					      !cmd->uid);
+		mail_thread_deinit(&ctx);
 	}
 
 	if (ret == 0) {
diff -r b296beccb70e -r 70b53e9b232e src/lib-index/Makefile.am
--- a/src/lib-index/Makefile.am	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-index/Makefile.am	Mon Sep 01 15:17:00 2008 +0300
@@ -12,7 +12,6 @@ libindex_a_SOURCES = \
 	mail-cache-lookup.c \
 	mail-cache-transaction.c \
 	mail-cache-sync-update.c \
-        mail-hash.c \
         mail-index.c \
         mail-index-dummy-view.c \
         mail-index-fsck.c \
@@ -21,6 +20,7 @@ libindex_a_SOURCES = \
         mail-index-modseq.c \
         mail-index-transaction.c \
         mail-index-transaction-view.c \
+        mail-index-strmap.c \
         mail-index-sync.c \
         mail-index-sync-ext.c \
         mail-index-sync-keywords.c \
@@ -38,10 +38,10 @@ headers = \
 headers = \
 	mail-cache.h \
 	mail-cache-private.h \
-        mail-hash.h \
 	mail-index.h \
         mail-index-modseq.h \
 	mail-index-private.h \
+        mail-index-strmap.h \
 	mail-index-sync-private.h \
 	mail-index-transaction-private.h \
 	mail-index-view-private.h \
diff -r b296beccb70e -r 70b53e9b232e src/lib-index/mail-hash.c
--- a/src/lib-index/mail-hash.c	Mon Sep 01 15:11:54 2008 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1136 +0,0 @@
-/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
-
-#include "lib.h"
-#include "ioloop.h"
-#include "array.h"
-#include "primes.h"
-#include "nfs-workarounds.h"
-#include "file-dotlock.h"
-#include "file-set-size.h"
-#include "read-full.h"
-#include "write-full.h"
-#include "mmap-util.h"
-#include "nfs-workarounds.h"
-#include "mail-index-private.h"
-#include "mail-hash.h"
-
-#include <stdio.h>
-#include <stddef.h>
-#include <utime.h>
-#include <sys/stat.h>
-
-/* How large to create the file initially */
-#define FILE_SIZE_INIT_PERCENTAGE 120
-/* How much larger to grow the file when it needs to be done */
-#define MAIL_HASH_GROW_PERCENTAGE 20
-/* Minimum hash size to use */
-#define MAIL_HASH_MIN_SIZE 109
-
-#define MAIL_HASH_SHRINK_PRESSURE 0.3
-#define MAIL_HASH_GROW_PRESSURE 2
-
-#define MAIL_HASH_TIMEOUT_SECS 60
-
-struct mail_hash {
-	struct mail_index *index;
-
-	hash_callback_t *key_hash_cb;
-	mail_hash_ctx_cmp_callback_t *key_compare_cb;
-	mail_hash_remap_callback_t *remap_callback;
-	hash_callback_t *rec_hash_cb;
-	void *cb_context;
-	unsigned int transaction_count;
-
-	char *filepath;
-	char *suffix;
-	int fd;
-	unsigned int record_size;
-
-	dev_t dev;
-	ino_t ino;
-
-	void *mmap_base;
-	size_t mmap_size;
-
-	time_t mtime, mapped_mtime;
-	size_t change_offset_start, change_offset_end;
-
-	int lock_type;
-	struct file_lock *file_lock;
-	struct dotlock *dotlock;
-	struct dotlock_settings dotlock_settings;
-
-	const struct mail_hash_header *hdr;
-
-	unsigned int in_memory:1;
-	unsigned int recreate:1;
-	unsigned int recreated:1;
-};
-
-#define HASH_RECORD_IDX(trans, idx) \
-	PTR_OFFSET((trans)->records_base, (idx) * (trans)->hdr.record_size)
-
-struct mail_hash_transaction {
-	struct mail_hash *hash;
-
-	struct mail_hash_header hdr;
-	/* hash size is [hdr.hash_size] */
-	uint32_t *hash_base;
-	/* record [0] is always unused */
-	void *records_base;
-	/* number of records in records_base.
-	   base_count + inserts.count == hdr.record_count */
-	unsigned int base_count;
-
-	/* bit array of modified data. each bit represents 1024 bytes of the
-	   hash file. used only for data read into memory from hash (not
-	   for mmaped data) */
-	ARRAY_TYPE(uint32_t) updates;
-	/* Records inserted within this transaction */
-	ARRAY_TYPE(mail_hash_record) inserts;
-	unsigned int next_grow_hashed_count;
-
-	uint32_t *hash_buf;
-	uint32_t records_base_1; /* used as records_base if base_count=1 */
-
-	unsigned int failed:1;
-	unsigned int mapped:1;
-};
-
-struct mail_hash_iterate_context {
-	struct mail_hash_transaction *trans;
-	uint32_t next_idx;
-	unsigned int iter_count;
-};
-
-const struct dotlock_settings default_dotlock_settings = {
-	MEMBER(temp_prefix) NULL,
-	MEMBER(lock_suffix) NULL,
-
-	MEMBER(timeout) 10,
-	MEMBER(stale_timeout) 30
-};
-
-static void mail_hash_set_syscall_error(struct mail_hash *hash,
-					const char *function)
-{
-	if (ENOSPACE(errno)) {
-		hash->index->nodiskspace = TRUE;
-		return;
-	}
-
-	mail_index_set_error(hash->index,
-			     "%s failed with index hash file %s: %m",
-			     function, hash->filepath);
-}
-
-void mail_hash_set_corrupted(struct mail_hash *hash, const char *error)
-{
-	mail_index_set_error(hash->index, "Corrupted index hash file %s: %s",
-			     hash->filepath, error);
-	if (unlink(hash->filepath) < 0 && errno != ENOENT)
-		mail_hash_set_syscall_error(hash, "unlink()");
-}
-
-static inline struct mail_hash_record *
-mail_hash_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	if (idx < trans->base_count)
-		return HASH_RECORD_IDX(trans, idx);
-
-	i_assert(idx < trans->hdr.record_count);
-	return array_idx_modifiable(&trans->inserts, idx - trans->base_count);
-}
-
-static void mail_hash_file_close(struct mail_hash *hash)
-{
-	i_assert(hash->transaction_count == 0);
-
-	if (hash->file_lock != NULL)
-		file_lock_free(&hash->file_lock);
-
-	if (hash->mmap_base != NULL) {
-		if (munmap(hash->mmap_base, hash->mmap_size) < 0)
-			mail_hash_set_syscall_error(hash, "munmap()");
-		hash->mmap_base = NULL;
-		hash->mmap_size = 0;
-	}
-	hash->ino = 0;
-	hash->mapped_mtime = 0;
-
-	if (hash->fd != -1) {
-		if (close(hash->fd) < 0)
-			mail_hash_set_syscall_error(hash, "close()");
-		hash->fd = -1;
-	}
-
-	hash->hdr = NULL;
-	hash->recreate = FALSE;
-	hash->recreated = FALSE;
-}
-
-struct mail_hash *
-mail_hash_alloc(struct mail_index *index, const char *suffix,
-		unsigned int record_size,
-		hash_callback_t *key_hash_cb,
-		hash_callback_t *rec_hash_cb,
-		mail_hash_ctx_cmp_callback_t *key_compare_cb,
-		mail_hash_remap_callback_t *remap_callback,
-		void *context)
-{
-	struct mail_hash *hash;
-
-	i_assert(record_size >= sizeof(struct mail_hash_record));
-
-	hash = i_new(struct mail_hash, 1);
-	hash->index = index;
-	hash->in_memory = MAIL_INDEX_IS_IN_MEMORY(index) || suffix == NULL;
-	hash->filepath = hash->in_memory ? i_strdup("(in-memory hash)") :
-		i_strconcat(index->filepath, suffix, NULL);
-	i_assert(hash->filepath != NULL);
-
-	hash->suffix = i_strdup(suffix);
-	hash->record_size = record_size;
-	hash->fd = -1;
-	hash->lock_type = F_UNLCK;
-	hash->dotlock_settings = default_dotlock_settings;
-	hash->dotlock_settings.use_excl_lock = index->use_excl_dotlocks;
-	hash->dotlock_settings.nfs_flush = index->nfs_flush;
-
-	hash->key_hash_cb = key_hash_cb;
-	hash->rec_hash_cb = rec_hash_cb;
-	hash->key_compare_cb = key_compare_cb;
-	hash->remap_callback = remap_callback,
-	hash->cb_context = context;
-	return hash;
-}
-
-void mail_hash_free(struct mail_hash **_hash)
-{
-	struct mail_hash *hash = *_hash;
-
-	*_hash = NULL;
-
-	mail_hash_file_close(hash);
-	i_free(hash->filepath);
-	i_free(hash->suffix);
-	i_free(hash);
-}


More information about the dovecot-cvs mailing list