dovecot-2.2: Merged changes from v2.1 tree.

dovecot at dovecot.org dovecot at dovecot.org
Wed Jan 9 07:02:36 EET 2013


details:   http://hg.dovecot.org/dovecot-2.2/rev/ec9b98df8c72
changeset: 15616:ec9b98df8c72
user:      Timo Sirainen <tss at iki.fi>
date:      Wed Jan 09 07:02:28 2013 +0200
description:
Merged changes from v2.1 tree.

diffstat:

 src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c |  63 +++++++++++----
 1 files changed, 47 insertions(+), 16 deletions(-)

diffs (146 lines):

diff -r a0493fa018fa -r ec9b98df8c72 src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c
--- a/src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c	Wed Jan 09 05:43:09 2013 +0200
+++ b/src/lib-storage/index/dbox-multi/mdbox-storage-rebuild.c	Wed Jan 09 07:02:28 2013 +0200
@@ -21,10 +21,13 @@
 #include <unistd.h>
 
 struct mdbox_rebuild_msg {
+	struct mdbox_rebuild_msg *guid_hash_next;
+
 	guid_128_t guid_128;
 	uint32_t file_id;
 	uint32_t offset;
-	uint32_t size;
+	uint32_t rec_size;
+	uoff_t mail_size;
 	uint32_t map_uid;
 
 	uint16_t refcount;
@@ -106,9 +109,9 @@
 	if ((*m1)->offset > (*m2)->offset)
 		return 1;
 
-	if ((*m1)->size < (*m2)->size)
+	if ((*m1)->rec_size < (*m2)->rec_size)
 		return -1;
-	if ((*m1)->size > (*m2)->size)
+	if ((*m1)->rec_size > (*m2)->rec_size)
 		return 1;
 	return 0;
 }
@@ -176,7 +179,8 @@
 		rec = p_new(ctx->pool, struct mdbox_rebuild_msg, 1);
 		rec->file_id = file_id;
 		rec->offset = offset;
-		rec->size = file->input->v_offset - offset;
+		rec->rec_size = file->input->v_offset - offset;
+		rec->mail_size = dbox_file_get_plaintext_size(file);
 		mail_generate_guid_128_hash(guid, rec->guid_128);
 		i_assert(!guid_128_is_empty(rec->guid_128));
 		array_append(&ctx->msgs, &rec, 1);
@@ -185,17 +189,26 @@
 		old_rec = hash_table_lookup(ctx->guid_hash, guid_p);
 		if (old_rec == NULL)
 			hash_table_insert(ctx->guid_hash, guid_p, rec);
-		else if (rec->size == old_rec->size) {
-			/* duplicate. save this as a refcount=0 to map,
-			   so it will eventually be deleted. */
+		else if (rec->mail_size == old_rec->mail_size) {
+			/* two mails' GUID and size are the same, which quite
+			   likely means that their contents are the same as
+			   well. we'll compare the mail sizes instead of the
+			   record sizes, because the records' metadata may
+			   differ.
+
+			   save this duplicate mail with refcount=0 to the map,
+			   so it will eventually be purged. */
 			rec->seen_zero_ref_in_map = TRUE;
 		} else {
 			/* duplicate GUID, but not a duplicate message. */
 			i_error("mdbox %s: Duplicate GUID %s in "
-				"m.%u:%u and m.%u:%u",
+				"m.%u:%u (size=%"PRIuUOFF_T") and m.%u:%u "
+				"(size=%"PRIuUOFF_T")",
 				ctx->storage->storage_dir, guid,
-				old_rec->file_id, old_rec->offset,
-				rec->file_id, rec->offset);
+				old_rec->file_id, old_rec->offset, old_rec->mail_size,
+				rec->file_id, rec->offset, rec->mail_size);
+			rec->guid_hash_next = old_rec->guid_hash_next;
+			old_rec->guid_hash_next = rec;
 		}
 	}
 	if (ret < 0)
@@ -292,7 +305,7 @@
 
 		rec.file_id = msgs[i]->file_id;
 		rec.offset = msgs[i]->offset;
-		rec.size = msgs[i]->size;
+		rec.size = msgs[i]->rec_size;
 
 		msgs[i]->map_uid = next_uid++;
 		mail_index_append(ctx->atomic->sync_trans,
@@ -326,7 +339,7 @@
 		   the (file_id, offset, size) triplet */
 		search_msg.file_id = rec.rec.file_id;
 		search_msg.offset = rec.rec.offset;
-		search_msg.size = rec.rec.size;
+		search_msg.rec_size = rec.rec.size;
 		pos = array_bsearch(&ctx->msgs, &search_msgp,
 				    mdbox_rebuild_msg_offset_cmp);
 		if (pos == NULL || (*pos)->map_uid != 0) {
@@ -361,6 +374,20 @@
 	return pos == NULL ? NULL : *pos;
 }
 
+static bool
+guid_hash_have_map_uid(struct mdbox_rebuild_msg **recp, uint32_t map_uid)
+{
+	struct mdbox_rebuild_msg *rec;
+
+	for (rec = *recp; rec != NULL; rec = rec->guid_hash_next) {
+		if (rec->map_uid == map_uid) {
+			*recp = rec;
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
 static void
 rebuild_mailbox_multi(struct mdbox_storage_rebuild_context *ctx,
 		      struct index_rebuild_context *rebuild_ctx,
@@ -405,17 +432,21 @@
 			   still try to look it up using map_uid. */
 			rec = map_uid == 0 ? NULL :
 				rebuild_lookup_map_uid(ctx, map_uid);
-		} else if (map_uid != rec->map_uid) {
+			map_uid = rec->map_uid;
+		} else if (!guid_hash_have_map_uid(&rec, map_uid)) {
 			/* message's GUID and map_uid point to different
 			   physical messages. assume that GUID is correct and
 			   map_uid is wrong. */
+			map_uid = rec->map_uid;
 		} else {
-			/* everything was ok */
+			/* everything was ok. use this specific record's
+			   map_uid to avoid duplicating mails in case the same
+			   GUID exists multiple times */
 		}
 
 		if (rec != NULL) T_BEGIN {
 			/* keep this message. add it to mailbox index. */
-			i_assert(rec->map_uid != 0);
+			i_assert(map_uid != 0);
 			rec->refcount++;
 
 			mail_index_lookup_uid(view, old_seq, &uid);
@@ -423,7 +454,7 @@
 			index_rebuild_index_metadata(rebuild_ctx,
 						     new_seq, uid);
 
-			new_dbox_rec.map_uid = rec->map_uid;
+			new_dbox_rec.map_uid = map_uid;
 			mail_index_update_ext(trans, new_seq, mbox->ext_id,
 					      &new_dbox_rec, NULL);
 			mail_index_update_ext(trans, new_seq, mbox->guid_ext_id,


More information about the dovecot-cvs mailing list