dovecot-1.2: Thread index fixes and optimizations.
dovecot at dovecot.org
dovecot at dovecot.org
Thu Jun 26 19:12:17 EEST 2008
details: http://hg.dovecot.org/dovecot-1.2/rev/04a3be30e5a6
changeset: 7952:04a3be30e5a6
user: Timo Sirainen <tss at iki.fi>
date: Thu Jun 26 19:11:56 2008 +0300
description:
Thread index fixes and optimizations.
diffstat:
9 files changed, 765 insertions(+), 213 deletions(-)
doc/thread-refs.txt | 15 -
src/lib-index/mail-hash.c | 33 ++
src/lib-index/mail-hash.h | 10
src/lib-storage/index/Makefile.am | 1
src/lib-storage/index/index-thread-finish.c | 113 ++++++---
src/lib-storage/index/index-thread-links.c | 208 +++++++++-------
src/lib-storage/index/index-thread-list.c | 325 ++++++++++++++++++++++++++
src/lib-storage/index/index-thread-private.h | 73 ++++-
src/lib-storage/index/index-thread.c | 200 ++++++++++------
diffs (truncated from 1581 to 300 lines):
diff -r 5e3b995372d4 -r 04a3be30e5a6 doc/thread-refs.txt
--- a/doc/thread-refs.txt Thu Jun 26 08:49:02 2008 +0300
+++ b/doc/thread-refs.txt Thu Jun 26 19:11:56 2008 +0300
@@ -62,17 +62,18 @@ node {
parent: Pointer to parent node. Children pointers aren't required.
uid: Message's UID (0 = expunged or never even existed)
- link_refcount: Number of messages referencing "this node" -> "parent node"
- (e.g. "References: a b" would increase this message's reference as well as
- b's reference, regardless of how the links are really added to the thread
- tree).
+ parent_link_refcount: Number of messages containing "this message" -> "parent
+ message" link, i.e. "number of links to parent node". However since parents
+ can change, not all of these references might be from our current child
+ nodes. When this refcount reaches 0, it means we must detach from our
+ parent.
expunge_rebuilds: If this message gets expunged, rebuild the thread tree.
parent_unref_rebuilds: If a link between this node and its child gets
unreferenced, rebuild the thread tree.
}
link_reference(parent_node, child_node)
- parent_node.link_refcount++
+ child_node.parent_link_refcount++
if is_ancestor(child_node, parent_node)
// child_node is an ancestor of parent_node. Adding child_node ->
// parent_node would introduce a loop. If any messages referencing the
@@ -144,8 +145,8 @@ unref_link(parent, child)
if parent.parent_unref_rebuilds
return false
- child.link_refcount--
- if child.link_refcount == 0
+ child.parent_link_refcount--
+ if child.parent_link_refcount == 0
child.parent = NIL
return true
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-index/mail-hash.c
--- a/src/lib-index/mail-hash.c Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-index/mail-hash.c Thu Jun 26 19:11:56 2008 +0300
@@ -232,6 +232,8 @@ mail_hash_header_init(struct mail_hash *
hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE);
hdr->record_count = 1; /* [0] always exists */
+ hdr->reset_counter = hash->hdr == NULL ? ioloop_time :
+ hash->hdr->reset_counter + 1;
}
static bool mail_hash_check_header(struct mail_hash *hash,
@@ -603,6 +605,11 @@ void mail_hash_unlock(struct mail_hash *
hash->lock_type = F_UNLCK;
}
+int mail_hash_get_lock_type(struct mail_hash *hash)
+{
+ return hash->lock_type;
+}
+
int mail_hash_create_excl_locked(struct mail_hash *hash)
{
bool created;
@@ -696,6 +703,7 @@ int mail_hash_transaction_write(struct m
int fd = hash->fd;
i_assert(hash->lock_type == F_WRLCK);
+ i_assert(trans->hdr.hashed_count <= trans->hdr.record_count);
if (trans->failed)
return -1;
@@ -726,7 +734,8 @@ int mail_hash_transaction_write(struct m
trans->base_count * trans->hdr.record_size;
if (fd == -1) {
temp_path = t_strconcat(hash->filepath, ".tmp", NULL);
- fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC,
+ hash->index->mode);
if (fd == -1) {
if (ENOSPACE(errno)) {
hash->index->nodiskspace = TRUE;
@@ -807,6 +816,17 @@ bool mail_hash_transaction_is_broken(str
return trans->failed;
}
+bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans)
+{
+ return trans->hash->in_memory;
+}
+
+struct mail_hash *
+mail_hash_transaction_get_hash(struct mail_hash_transaction *trans)
+{
+ return trans->hash;
+}
+
void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
const char *error)
{
@@ -966,6 +986,11 @@ static void mail_hash_insert_idx(struct
if (trans->hdr.first_hole_idx != 0) {
/* allocate the record from the first hole */
idx = trans->hdr.first_hole_idx;
+ if (idx >= trans->hdr.record_count) {
+ mail_hash_transaction_set_corrupted(trans,
+ "Corrupted first_hole_idx");
+ idx = 0;
+ }
rec = mail_hash_idx(trans, idx);
memcpy(&trans->hdr.first_hole_idx, rec + 1,
@@ -1104,8 +1129,8 @@ void mail_hash_remove(struct mail_hash_t
"Too low hashed_count");
return;
}
- trans->hdr.hashed_count--;
- }
-
+ }
+
+ trans->hdr.hashed_count--;
mail_hash_remove_idx(trans, idx);
}
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-index/mail-hash.h
--- a/src/lib-index/mail-hash.h Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-index/mail-hash.h Thu Jun 26 19:11:56 2008 +0300
@@ -47,6 +47,8 @@ struct mail_hash_header {
uint32_t last_uid;
/* Number of message records (records with non-zero UID) */
uint32_t message_count;
+ /* Increased every time the hash is reset */
+ uint32_t reset_counter;
};
struct mail_hash_record {
@@ -95,6 +97,8 @@ int mail_hash_lock_exclusive(struct mail
int mail_hash_lock_exclusive(struct mail_hash *hash,
enum mail_hash_lock_flags flags);
void mail_hash_unlock(struct mail_hash *hash);
+/* Returns the current locking state (F_UNLCK, F_RDLCK, F_WRLCK) */
+int mail_hash_get_lock_type(struct mail_hash *hash);
struct mail_hash_transaction *
mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size);
@@ -103,6 +107,12 @@ void mail_hash_transaction_end(struct ma
/* Returns TRUE if transaction is in broken state because of an earlier
I/O error or detected file corruption. */
bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans);
+/* Returns TRUE if hash is currently being updated in memory. */
+bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans);
+
+/* Returns the hash structure of the transaction. */
+struct mail_hash *
+mail_hash_transaction_get_hash(struct mail_hash_transaction *trans);
/* Clear the entire hash file's contents. */
void mail_hash_reset(struct mail_hash_transaction *trans);
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-storage/index/Makefile.am
--- a/src/lib-storage/index/Makefile.am Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-storage/index/Makefile.am Thu Jun 26 19:11:56 2008 +0300
@@ -26,6 +26,7 @@ libstorage_index_a_SOURCES = \
index-thread.c \
index-thread-finish.c \
index-thread-links.c \
+ index-thread-list.c \
index-transaction.c
headers = \
diff -r 5e3b995372d4 -r 04a3be30e5a6 src/lib-storage/index/index-thread-finish.c
--- a/src/lib-storage/index/index-thread-finish.c Thu Jun 26 08:49:02 2008 +0300
+++ b/src/lib-storage/index/index-thread-finish.c Thu Jun 26 19:11:56 2008 +0300
@@ -37,6 +37,7 @@ struct thread_finish_context {
struct mail *tmp_mail;
struct mail_hash_transaction *hash_trans;
+ struct mail_thread_list_context *hash_list_ctx;
ARRAY_DEFINE(roots, struct mail_thread_root_node);
ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node);
@@ -127,16 +128,24 @@ static int mail_thread_child_node_cmp(co
return 0;
}
+static uint32_t
+thread_lookup_existing(struct thread_finish_context *ctx, uint32_t idx)
+{
+ const struct mail_thread_node *node;
+
+ node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+ i_assert(MAIL_INDEX_NODE_EXISTS(node));
+ i_assert(node->uid_or_id != 0);
+ return node->uid_or_id;
+}
+
static int
thread_child_node_fill(struct thread_finish_context *ctx,
struct mail_thread_child_node *child)
{
- const struct mail_thread_node *node;
int tz;
- node = mail_hash_lookup_idx(ctx->hash_trans, child->idx);
- i_assert(node->uid != 0 && node->exists);
- child->uid = node->uid;
+ child->uid = thread_lookup_existing(ctx, child->idx);
if (!mail_set_uid(ctx->tmp_mail, child->uid)) {
/* the UID should have existed. we would have rebuild
@@ -164,7 +173,6 @@ thread_sort_children(struct thread_finis
ARRAY_TYPE(mail_thread_child_node) *sorted_children)
{
const struct mail_thread_shadow_node *shadows;
- const struct mail_thread_node *node;
struct mail_thread_child_node child, *children;
unsigned int count;
@@ -177,9 +185,7 @@ thread_sort_children(struct thread_finis
i_assert(child.idx != 0);
if (shadows[child.idx].next_sibling_idx == 0) {
/* only child - don't bother setting sort date */
- node = mail_hash_lookup_idx(ctx->hash_trans, child.idx);
- i_assert(node->uid != 0 && node->exists);
- child.uid = node->uid;
+ child.uid = thread_lookup_existing(ctx, child.idx);
array_append(sorted_children, &child, 1);
return 0;
@@ -202,12 +208,11 @@ static int gather_base_subjects(struct t
{
struct subject_gather_context gather_ctx;
struct mail_thread_root_node *roots;
- const struct mail_thread_node *node;
const char *subject;
unsigned int i, count;
ARRAY_TYPE(mail_thread_child_node) sorted_children;
const struct mail_thread_child_node *children;
- uint32_t idx;
+ uint32_t idx, uid;
int ret = 0;
memset(&gather_ctx, 0, sizeof(gather_ctx));
@@ -240,10 +245,8 @@ static int gather_base_subjects(struct t
continue;
}
- node = mail_hash_lookup_idx(ctx->hash_trans, idx);
- i_assert(node->uid != 0 && node->exists);
-
- if (!mail_set_uid(ctx->tmp_mail, node->uid)) {
+ uid = thread_lookup_existing(ctx, idx);
+ if (!mail_set_uid(ctx->tmp_mail, uid)) {
/* the UID should have existed. we would have rebuild
the thread tree otherwise. */
mail_hash_transaction_set_corrupted(ctx->hash_trans,
@@ -443,7 +446,8 @@ static int sort_root_nodes_ref2(struct t
roots = array_get_modifiable(&ctx->roots, &root_count);
for (idx = 1; idx < record_count; idx++) {
node = mail_hash_lookup_idx(ctx->hash_trans, idx);
- if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) || !node->exists)
+ if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) ||
+ !MAIL_INDEX_NODE_EXISTS(node))
continue;
child.idx = idx;
@@ -467,14 +471,17 @@ static int sort_root_nodes_ref2(struct t
return 0;
}
-static void mail_thread_create_shadows(struct thread_finish_context *ctx,
- uint32_t record_count)
-{
+static int mail_thread_create_shadows(struct thread_finish_context *ctx,
+ uint32_t record_count)
+{
+ struct mail_thread_list_update_context *thread_list_ctx;
struct mail_thread_node *node, *parent;
struct mail_thread_root_node root;
struct mail_thread_child_node child;
+ struct mail_hash *hash;
uint8_t *referenced;
uint32_t idx, i, j, parent_idx, alloc_size, max;
+ int ret = 0;
ctx->use_sent_date = FALSE;
@@ -485,26 +492,42 @@ static void mail_thread_create_shadows(s
referenced = i_new(uint8_t, alloc_size);
for (idx = 1; idx < record_count; idx++) {
node = mail_hash_lookup_idx(ctx->hash_trans, idx);
- if (MAIL_HASH_RECORD_IS_DELETED(&node->rec))
+ if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) {
+ /* @UNSAFE: mark deleted records as referenced, so we
+ don't waste time with them */
+ referenced[idx / 8] |= 1 << (idx % 8);
continue;
-
- if (node->exists) {
+ }
+
+ if (MAIL_INDEX_NODE_EXISTS(node)) {
/* @UNSAFE: don't remove existing nodes */
More information about the dovecot-cvs
mailing list