[dovecot-cvs] dovecot/src/lib-index mail-hash.c, 1.24, 1.25 mail-hash.h, 1.10, 1.11

tss at dovecot.org tss at dovecot.org
Sun Nov 26 15:20:14 UTC 2006


Update of /var/lib/cvs/dovecot/src/lib-index
In directory talvi:/tmp/cvs-serv13569/lib-index

Modified Files:
	mail-hash.c mail-hash.h 
Log Message:
Resize the hash when needed. Also other fixes and cleanups.



Index: mail-hash.c
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-index/mail-hash.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- mail-hash.c	4 Nov 2006 18:48:11 -0000	1.24
+++ mail-hash.c	26 Nov 2006 15:20:11 -0000	1.25
@@ -1,8 +1,5 @@
 /* Copyright (C) 2006 Timo Sirainen */
 
-/* FIXME: we need to be able to rebuild the hash table when it gets
-   too small or large */
-
 #include "lib.h"
 #include "ioloop.h"
 #include "array.h"
@@ -16,6 +13,7 @@
 #include "mail-index-private.h"
 #include "mail-hash.h"
 
+#include <stdio.h>
 #include <stddef.h>
 #include <utime.h>
 #include <sys/stat.h>
@@ -24,17 +22,21 @@
 #define FILE_SIZE_INIT_PERCENTAGE 120
 /* How much larger to grow the file when it needs to be done */
 #define MAIL_HASH_GROW_PERCENTAGE 20
+/* Minimum hash size to use */
+#define MAIL_HASH_MIN_SIZE 109
 
 #define MAIL_HASH_TIMEOUT_SECS 3
 
 struct mail_hash {
 	struct mail_index *index;
 
-	hash_callback_t *hash_cb;
+	hash_callback_t *key_hash_cb;
 	hash_ctx_cmp_callback_t *key_compare_cb;
+	hash_callback_t *rec_hash_cb;
 	void *cb_context;
 
 	char *filepath;
+	char *suffix;
 	int fd;
 
 	dev_t dev;
@@ -99,7 +101,7 @@
 	size_t offset = offsetof(struct mail_hash_header, corrupted);
 	struct stat st;
 
-	if (MAIL_HASH_IS_IN_MEMORY(hash))
+	if (hash->fd == -1)
 		return 0;
 
 	hash->hdr->corrupted = set ? 1 : 0;
@@ -212,6 +214,8 @@
 	hash->hdr = NULL;
 	hash->hash_base = NULL;
 	hash->records_base = NULL;
+
+	hash->locked = FALSE;
 }
 
 static int mail_hash_file_map_finish(struct mail_hash *hash)
@@ -410,7 +414,7 @@
 	}
 }
 
-static int mail_hash_file_open(struct mail_hash *hash)
+static int mail_hash_file_open(struct mail_hash *hash, bool lock)
 {
 	int ret;
 
@@ -422,21 +426,27 @@
 		return -1;
 	}
 
-	if (mail_hash_file_lock(hash, F_RDLCK) <= 0)
-		return -1;
+	if (!lock) {
+		if (mail_hash_file_lock(hash, F_RDLCK) <= 0)
+			return -1;
 
-	ret = mail_hash_file_map(hash, FALSE);
-	if (hash->fd != -1)
-		(void)mail_hash_file_lock(hash, F_UNLCK);
+		ret = mail_hash_file_map(hash, FALSE);
+		if (hash->fd != -1)
+			(void)mail_hash_file_lock(hash, F_UNLCK);
+	} else {
+		if (mail_hash_file_lock(hash, F_WRLCK) <= 0)
+			return -1;
+
+		hash->locked = TRUE;
+		ret = mail_hash_file_map(hash, TRUE);
+	}
 	return ret;
 }
 
 static void
-mail_hash_header_init(struct mail_hash *hash, struct mail_hash_header *hdr,
-		      uoff_t *file_size_r)
+mail_hash_header_init(struct mail_hash *hash, unsigned int initial_count,
+		      struct mail_hash_header *hdr, uoff_t *file_size_r)
 {
-	unsigned int message_count;
-
 	memset(hdr, 0, sizeof(*hdr));
 	hdr->version = MAIL_HASH_VERSION;
 	hdr->base_header_size = sizeof(*hdr);
@@ -446,16 +456,19 @@
 	   uid_validity may be 0 */
 	hdr->uid_validity = hash->index->hdr->uid_validity;
 
-	message_count = I_MAX(hash->index->hdr->messages_count, 25);
-	hdr->hash_size = primes_closest(I_MAX(message_count * 2, 109));
+	if (initial_count == 0)
+		initial_count = I_MAX(hash->index->hdr->messages_count, 25);
+	hdr->hash_size = I_MAX(primes_closest(initial_count * 2),
+			       MAIL_HASH_MIN_SIZE);
 
 	*file_size_r = hdr->header_size +
 		hdr->hash_size * sizeof(uint32_t) +
-		hash->record_size * (message_count *
+		hash->record_size * (initial_count *
 				     FILE_SIZE_INIT_PERCENTAGE / 100);
 }
 
-static int mail_hash_file_create(struct mail_hash *hash)
+static int
+mail_hash_file_create(struct mail_hash *hash, unsigned int initial_count)
 {
 	struct dotlock *dotlock;
 	struct mail_hash_header hdr;
@@ -468,7 +481,7 @@
 		return -1;
 	}
 
-	mail_hash_header_init(hash, &hdr, &file_size);
+	mail_hash_header_init(hash, initial_count, &hdr, &file_size);
 	if (write_full(fd, &hdr, sizeof(hdr)) < 0 ||
 	    file_set_size(fd, file_size) < 0) {
 		mail_hash_set_syscall_error(hash, "write()");
@@ -483,12 +496,13 @@
 	return 0;
 }
 
-static void mail_hash_create_in_memory(struct mail_hash *hash)
+static void mail_hash_create_in_memory(struct mail_hash *hash,
+				       unsigned int initial_count)
 {
 	struct mail_hash_header hdr;
 	uoff_t file_size;
 
-	mail_hash_header_init(hash, &hdr, &file_size);
+	mail_hash_header_init(hash, initial_count, &hdr, &file_size);
 
 	hash->mmap_size = file_size;
 	hash->mmap_base = mmap_anon(hash->mmap_size);
@@ -506,8 +520,10 @@
 
 struct mail_hash *
 mail_hash_open(struct mail_index *index, const char *suffix,
-	       enum mail_hash_open_flags flags,
-	       unsigned int record_size, hash_callback_t *hash_cb,
+	       enum mail_hash_open_flags flags, unsigned int record_size,
+	       unsigned int initial_count,
+	       hash_callback_t *key_hash_cb,
+	       hash_callback_t *rec_hash_cb,
 	       hash_ctx_cmp_callback_t *key_compare_cb,
 	       void *context)
 {
@@ -521,16 +537,18 @@
 	hash->filepath = (flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ?
 		i_strdup("(in-memory hash)") :
 		i_strconcat(index->filepath, suffix, NULL);
+	hash->suffix = i_strdup(suffix);
 	hash->record_size = record_size;
 	hash->fd = -1;
 
-	hash->hash_cb = hash_cb;
+	hash->key_hash_cb = key_hash_cb;
+	hash->rec_hash_cb = rec_hash_cb;
 	hash->key_compare_cb = key_compare_cb;
 	hash->cb_context = context;
 
 	ret = MAIL_INDEX_IS_IN_MEMORY(hash->index) ||
 		(flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ? -1 :
-		mail_hash_file_open(hash);
+		mail_hash_file_open(hash, FALSE);
 
 	if (ret <= 0 && (flags & MAIL_HASH_OPEN_FLAG_CREATE) == 0) {
 		/* we don't want to create the hash */
@@ -539,12 +557,12 @@
 	}
 	if (ret == 0) {
 		/* not found or broken, recreate it */
-		ret = mail_hash_reset(hash);
+		ret = mail_hash_reset(hash, initial_count);
 	}
 	if (ret < 0) {
 		/* fallback to in-memory hash */
 		mail_hash_file_close(hash);
-		mail_hash_create_in_memory(hash);
+		mail_hash_create_in_memory(hash, initial_count);
 	}
 	return hash;
 }
@@ -557,28 +575,26 @@
 
 	mail_hash_file_close(hash);
 	i_free(hash->filepath);
+	i_free(hash->suffix);
 	i_free(hash);
 }
 
-int mail_hash_reset(struct mail_hash *hash)
+int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count)
 {
+	bool locked = hash->locked;
 	int ret;
 
 	mail_hash_file_close(hash);
 
-	ret = mail_hash_file_create(hash);
+	ret = mail_hash_file_create(hash, initial_count);
 	if (ret == 0) {
 		/* should work now, try opening again */
-		ret = mail_hash_file_open(hash);
+		ret = mail_hash_file_open(hash, locked);
 		if (ret == 0) {
 			mail_hash_set_corrupted(hash,
 				"Newly created hash file broken");
 			return -1;
 		}
-		if (ret > 0 && hash->locked) {
-			if (mail_hash_file_lock(hash, F_WRLCK) <= 0)
-				return -1;
-		}
 	}
 	return ret < 0 ? -1 : 0;
 }
@@ -589,13 +605,13 @@
 
 	mail_hash_file_close(hash);
 
-	if ((ret = mail_hash_file_open(hash)) < 0)
+	if ((ret = mail_hash_file_open(hash, FALSE)) < 0)
 		return -1;
 	if (ret > 0)
 		return 0;
 
 	/* not found or broken, recreate it */
-	return mail_hash_reset(hash);
+	return mail_hash_reset(hash, 0);
 }
 
 static int mail_hash_reopen_if_needed(struct mail_hash *hash)
@@ -671,7 +687,7 @@
 	unsigned int hash_idx;
 	uint32_t idx;
 
-	hash_idx = hash->hash_cb(key) % hash->hdr->hash_size;
+	hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size;
 	for (idx = hash->hash_base[hash_idx]; idx != 0; ) {
 		if (idx > hash->hdr->record_count) {
 			mail_hash_set_corrupted(hash,
@@ -773,8 +789,8 @@
 	return 0;
 }
 
-int mail_hash_insert(struct mail_hash *hash, const void *key,
-		     const void *value, uint32_t *idx_r)
+static int mail_hash_insert_with_hash(struct mail_hash *hash, const void *value,
+				      uint32_t hash_key, uint32_t *idx_r)
 {
 	struct mail_hash_record *rec;
 	uint32_t idx, *idx_p;
@@ -807,13 +823,11 @@
 	}
 
 	memcpy(rec, value, hash->record_size);
-
 	if (mail_hash_update_header(hash, rec, FALSE) < 0)
 		return -1;
 
-	if (key != NULL) {
-		idx_p = &hash->hash_base[hash->hash_cb(key) %
-					 hash->hdr->hash_size];
+	if (hash_key != 0) {
+		idx_p = &hash->hash_base[hash_key % hash->hdr->hash_size];
 		while (*idx_p != 0) {
 			rec = HASH_RECORD_IDX(hash, *idx_p);
 			idx_p = &rec->next_idx;
@@ -822,11 +836,26 @@
 		if (mail_hash_mark_update(hash, idx_p, sizeof(*idx_p)) < 0)
 			return -1;
 		*idx_p = idx;
+
+		hash->hdr->hashed_count++;
 	}
+
 	*idx_r = idx;
 	return 0;
 }
 
+int mail_hash_insert(struct mail_hash *hash, const void *key,
+		     const void *value, uint32_t *idx_r)
+{
+	uint32_t hash_key = key == NULL ? 0 : hash->key_hash_cb(key);
+
+	i_assert((key == NULL && hash->rec_hash_cb(value) == 0) ||
+		 (key != NULL && hash_key != 0 &&
+		  hash->rec_hash_cb(value) != 0));
+
+	return mail_hash_insert_with_hash(hash, value, hash_key, idx_r);
+}
+
 int mail_hash_remove(struct mail_hash *hash, const void *key)
 {
 	return mail_hash_remove_idx(hash, 0, key);
@@ -836,12 +865,12 @@
 {
 	struct mail_hash_record *rec = NULL;
 	unsigned int hash_idx;
-	uint32_t *idx_p;
+	uint32_t hash_key, *idx_p;
 
 	i_assert(idx != 0 || key != NULL);
 
 	if (key != NULL) {
-		hash_idx = hash->hash_cb(key) % hash->hdr->hash_size;
+		hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size;
 		for (idx_p = &hash->hash_base[hash_idx]; *idx_p != 0; ) {
 			if (*idx_p > hash->hdr->record_count) {
 				mail_hash_set_corrupted(hash,
@@ -881,6 +910,15 @@
 		hash->hdr->message_count--;
 	}
 
+	hash_key = hash->rec_hash_cb(rec);
+	if (hash_key != 0) {
+		if (hash->hdr->hashed_count == 0) {
+			mail_hash_set_corrupted(hash, "Too low hashed_count");
+			return -1;
+		}
+		hash->hdr->hashed_count--;
+	}
+
 	if (idx == hash->hdr->record_count) {
 		hash->hdr->record_count--;
 	} else {
@@ -946,3 +984,77 @@
 
 	return mail_hash_update_header(hash, rec, had_uid);
 }
+
+int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count)
+{
+	struct mail_hash *tmp_hash;
+	const struct mail_hash_record *rec;
+	const char *tmp_filename;
+	uint32_t hash_key, idx, new_idx;
+	float nodes_per_list;
+	int ret = 0;
+
+	if (MAIL_HASH_IS_IN_MEMORY(hash))
+		return 0;
+
+	i_assert(hash->locked);
+
+	nodes_per_list = (float)(hash->hdr->hashed_count + grow_count) /
+		(float)hash->hdr->hash_size;
+	if ((nodes_per_list > 0.3 && nodes_per_list < 2.0) ||
+	    hash->hdr->hash_size <= MAIL_HASH_MIN_SIZE)
+		return 0;
+
+	/* create a temporary hash */
+	tmp_hash = mail_hash_open(hash->index,
+				  t_strconcat(hash->suffix, ".tmp", NULL),
+				  MAIL_HASH_OPEN_FLAG_CREATE,
+				  hash->record_size,
+				  hash->hdr->hashed_count + grow_count,
+				  hash->key_hash_cb,
+				  hash->rec_hash_cb,
+				  hash->key_compare_cb,
+				  hash->cb_context);
+	if (tmp_hash == NULL)
+		return -1;
+
+	/* populate */
+	for (idx = 1; idx <= hash->hdr->record_count; idx++) {
+		rec = HASH_RECORD_IDX(hash, idx);
+		hash_key = hash->rec_hash_cb(rec);
+
+		if (mail_hash_insert_with_hash(tmp_hash, rec, hash_key,
+					       &new_idx) < 0) {
+			ret = -1;
+			break;
+		}
+		i_assert(idx == new_idx);
+	}
+	(void)mail_hash_file_write_changes(tmp_hash);
+
+	tmp_filename = t_strdup(tmp_hash->filepath);
+	mail_hash_free(&tmp_hash);
+	if (ret < 0) {
+		(void)unlink(tmp_filename);
+		return -1;
+	}
+
+	/* replace the old */
+	if (rename(tmp_filename, hash->filepath) < 0) {
+		mail_hash_set_syscall_error(hash, "rename()");
+		(void)unlink(tmp_filename);
+		return -1;
+	}
+
+	/* reopen the hash */
+	mail_hash_file_close(hash);
+	if ((ret = mail_hash_file_open(hash, TRUE)) < 0)
+		return -1;
+	if (ret == 0) {
+		mail_hash_set_corrupted(hash,
+			"Newly created hash file broken");
+		return -1;
+	}
+
+	return 0;
+}

Index: mail-hash.h
===================================================================
RCS file: /var/lib/cvs/dovecot/src/lib-index/mail-hash.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- mail-hash.h	30 Jul 2006 23:07:46 -0000	1.10
+++ mail-hash.h	26 Nov 2006 15:20:11 -0000	1.11
@@ -30,6 +30,8 @@
 	uint32_t record_count;
 	/* Number of message records (records with non-zero UID) */
 	uint32_t message_count;
+	/* Number of messages with non-zero hash */
+	uint32_t hashed_count;
 
 	/* UID validity. */
 	uint32_t uid_validity;
@@ -57,21 +59,24 @@
 	MAIL_HASH_OPEN_FLAG_IN_MEMORY	= 0x02
 };
 
-/* Returns hash code. */
-typedef unsigned int hash_ctx_callback_t(const void *p, void *context);
 /* Returns 0 if the pointers are equal. */
 typedef bool hash_ctx_cmp_callback_t(const void *key, const void *data,
 				     void *context);
 
 struct mail_hash *
 mail_hash_open(struct mail_index *index, const char *suffix,
-	       enum mail_hash_open_flags flags,
-	       unsigned int record_size, hash_callback_t *hash_cb,
+	       enum mail_hash_open_flags flags, unsigned int record_size,
+	       unsigned int initial_count,
+	       hash_callback_t *key_hash_cb,
+	       hash_callback_t *rec_hash_cb,
 	       hash_ctx_cmp_callback_t *key_compare_cb,
 	       void *context);
 void mail_hash_free(struct mail_hash **hash);
 
-int mail_hash_reset(struct mail_hash *hash);
+/* If reset or resize fails, the hash file is closed and the hash is in
+   unusable state until mail_hash_lock() succeeds. */
+int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count);
+int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count);
 
 /* Lock hash file. Returns 1 if we locked the file, 0 if timeouted or hash
    is in memory, -1 if error. */
@@ -83,7 +88,8 @@
 int mail_hash_lookup(struct mail_hash *hash, const void *key,
 		     const void **value_r, uint32_t *idx_r);
 /* Remember that inserting may cause existing returned values to be
-   invalidated */
+   invalidated. If key=NULL, it's not inserted into hash table. Note that
+   hash=0 equals to key=NULL insert, so a valid hash value must never be 0. */
 int mail_hash_insert(struct mail_hash *hash, const void *key,
 		     const void *value, uint32_t *idx_r);
 int mail_hash_remove(struct mail_hash *hash, const void *key);



More information about the dovecot-cvs mailing list