dovecot-2.2: dsync: If same GUID already exists in storage, try ...

dovecot at dovecot.org dovecot at dovecot.org
Tue Jan 20 01:25:05 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/39d00448490f
changeset: 18180:39d00448490f
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Jan 20 03:23:29 2015 +0200
description:
dsync: If same GUID already exists in storage, try to copy it instead of recreating the mail.
This way most mailbox backends can reduce disk space by only doing a
reference count update.

This feature isn't enabled by default. A virtual "All Mails" mailbox needs
to be configured using the virtual plugin. Then you need to give this
mailbox as -a parameter, e.g.:
doveadm sync -a "Virtual/All Mails" ...

Currently this is implemented by reading through all the GUIDs in the
virtual mailbox. This of course isn't very efficient for things like
incremental replication. An upcoming conversation plugin will keep track of
all the mails' GUIDs, so in future replication should be able to have this
functionality efficiently as well.

diffstat:

 src/doveadm/doveadm-dsync.c              |    7 +-
 src/doveadm/dsync/dsync-brain-mailbox.c  |    3 +-
 src/doveadm/dsync/dsync-brain-private.h  |    1 +
 src/doveadm/dsync/dsync-brain.c          |   18 +++
 src/doveadm/dsync/dsync-brain.h          |    4 +
 src/doveadm/dsync/dsync-ibc-pipe.c       |    1 +
 src/doveadm/dsync/dsync-ibc-stream.c     |    8 +-
 src/doveadm/dsync/dsync-ibc.h            |    3 +
 src/doveadm/dsync/dsync-mailbox-import.c |  146 +++++++++++++++++++++++++-----
 src/doveadm/dsync/dsync-mailbox-import.h |    1 +
 10 files changed, 165 insertions(+), 27 deletions(-)

diffs (truncated from 503 to 300 lines):

diff -r e83c50fda315 -r 39d00448490f src/doveadm/doveadm-dsync.c
--- a/src/doveadm/doveadm-dsync.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/doveadm-dsync.c	Tue Jan 20 03:23:29 2015 +0200
@@ -37,7 +37,7 @@
 #include <ctype.h>
 #include <sys/wait.h>
 
-#define DSYNC_COMMON_GETOPT_ARGS "+1dEfg:l:m:n:NPr:Rs:t:Ux:"
+#define DSYNC_COMMON_GETOPT_ARGS "+1a:dEfg:l:m:n:NPr:Rs:t:Ux:"
 #define DSYNC_REMOTE_CMD_EXIT_WAIT_SECS 30
 /* The broken_char is mainly set to get a proper error message when trying to
    convert a mailbox with a name that can't be used properly translated between
@@ -56,6 +56,7 @@
 	struct doveadm_mail_cmd_context ctx;
 	enum dsync_brain_sync_type sync_type;
 	const char *mailbox;
+	const char *virtual_all_box;
 	guid_128_t mailbox_guid;
 	const char *state_input, *rawlog_path;
 	ARRAY_TYPE(const_string) exclude_mailboxes;
@@ -543,6 +544,7 @@
 	}
 	set.sync_since_timestamp = ctx->sync_since_timestamp;
 	set.sync_box = ctx->mailbox;
+	set.virtual_all_box = ctx->virtual_all_box;
 	memcpy(set.sync_box_guid, ctx->mailbox_guid, sizeof(set.sync_box_guid));
 	set.lock_timeout_secs = ctx->lock_timeout;
 	set.state = ctx->state_input;
@@ -909,6 +911,9 @@
 		ctx->oneway = TRUE;
 		ctx->backup = TRUE;
 		break;
+	case 'a':
+		ctx->virtual_all_box = optarg;
+		break;
 	case 'd':
 		ctx->default_replica_location = TRUE;
 		break;
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-brain-mailbox.c
--- a/src/doveadm/dsync/dsync-brain-mailbox.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-brain-mailbox.c	Tue Jan 20 03:23:29 2015 +0200
@@ -213,7 +213,8 @@
 		import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128;
 
 	brain->box_importer = brain->backup_send ? NULL :
-		dsync_mailbox_import_init(brain->box, brain->log_scan,
+		dsync_mailbox_import_init(brain->box, brain->virtual_all_box,
+					  brain->log_scan,
 					  last_common_uid, last_common_modseq,
 					  last_common_pvt_modseq,
 					  remote_dsync_box->uid_next,
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-brain-private.h
--- a/src/doveadm/dsync/dsync-brain-private.h	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-brain-private.h	Tue Jan 20 03:23:29 2015 +0200
@@ -52,6 +52,7 @@
 	const char *process_title_prefix;
 	ARRAY(struct mail_namespace *) sync_namespaces;
 	const char *sync_box;
+	struct mailbox *virtual_all_box;
 	guid_128_t sync_box_guid;
 	const char *const *exclude_mailboxes;
 	enum dsync_brain_sync_type sync_type;
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-brain.c
--- a/src/doveadm/dsync/dsync-brain.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-brain.c	Tue Jan 20 03:23:29 2015 +0200
@@ -141,6 +141,16 @@
 		(flags & DSYNC_BRAIN_FLAG_NO_MAIL_PREFETCH) != 0;
 }
 
+static void
+dsync_brain_open_virtual_all_box(struct dsync_brain *brain,
+				 const char *vname)
+{
+	struct mail_namespace *ns;
+
+	ns = mail_namespace_find(brain->user->namespaces, vname);
+	brain->virtual_all_box = mailbox_alloc(ns->list, vname, 0);
+}
+
 struct dsync_brain *
 dsync_brain_master_init(struct mail_user *user, struct dsync_ibc *ibc,
 			enum dsync_brain_sync_type sync_type,
@@ -185,6 +195,9 @@
 	brain->master_brain = TRUE;
 	dsync_brain_set_flags(brain, flags);
 
+	if (set->virtual_all_box != NULL)
+		dsync_brain_open_virtual_all_box(brain, set->virtual_all_box);
+
 	if (sync_type != DSYNC_BRAIN_SYNC_TYPE_STATE)
 		;
 	else if (dsync_mailbox_states_import(brain->mailbox_states, brain->pool,
@@ -207,6 +220,7 @@
 	ibc_set.sync_ns_prefixes = sync_ns_str == NULL ?
 		NULL : str_c(sync_ns_str);
 	ibc_set.sync_box = set->sync_box;
+	ibc_set.virtual_all_box = set->virtual_all_box;
 	ibc_set.exclude_mailboxes = set->exclude_mailboxes;
 	ibc_set.sync_since_timestamp = set->sync_since_timestamp;
 	memcpy(ibc_set.sync_box_guid, set->sync_box_guid,
@@ -295,6 +309,8 @@
 
 	if (brain->box != NULL)
 		dsync_brain_sync_mailbox_deinit(brain);
+	if (brain->virtual_all_box != NULL)
+		mailbox_free(&brain->virtual_all_box);
 	if (brain->local_tree_iter != NULL)
 		dsync_mailbox_tree_iter_deinit(&brain->local_tree_iter);
 	if (brain->local_mailbox_tree != NULL)
@@ -465,6 +481,8 @@
 	brain->purge = (ibc_set->brain_flags &
 			DSYNC_BRAIN_FLAG_PURGE_REMOTE) != 0;
 
+	if (ibc_set->virtual_all_box != NULL)
+		dsync_brain_open_virtual_all_box(brain, ibc_set->virtual_all_box);
 	dsync_brain_mailbox_trees_init(brain);
 
 	if (brain->sync_type == DSYNC_BRAIN_SYNC_TYPE_STATE)
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-brain.h
--- a/src/doveadm/dsync/dsync-brain.h	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-brain.h	Tue Jan 20 03:23:29 2015 +0200
@@ -47,6 +47,10 @@
 	ARRAY(struct mail_namespace *) sync_namespaces;
 	/* Sync only this mailbox name */
 	const char *sync_box;
+	/* Use this virtual \All mailbox to be able to copy mails with the same
+	   GUID instead of saving them twice. With most storages this results
+	   in less disk space usage. */
+	const char *virtual_all_box;
 	/* Sync only this mailbox GUID */
 	guid_128_t sync_box_guid;
 	/* Exclude these mailboxes from the sync. They can contain '*'
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-ibc-pipe.c
--- a/src/doveadm/dsync/dsync-ibc-pipe.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-ibc-pipe.c	Tue Jan 20 03:23:29 2015 +0200
@@ -167,6 +167,7 @@
 	item->u.set.sync_ns_prefixes =
 		p_strdup(item->pool, set->sync_ns_prefixes);
 	item->u.set.sync_box = p_strdup(item->pool, set->sync_box);
+	item->u.set.virtual_all_box = p_strdup(item->pool, set->virtual_all_box);
 	item->u.set.exclude_mailboxes = set->exclude_mailboxes == NULL ? NULL :
 		p_strarray_dup(item->pool, set->exclude_mailboxes);
 	memcpy(item->u.set.sync_box_guid, set->sync_box_guid,
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-ibc-stream.c
--- a/src/doveadm/dsync/dsync-ibc-stream.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-ibc-stream.c	Tue Jan 20 03:23:29 2015 +0200
@@ -76,7 +76,7 @@
 	  	"debug sync_visible_namespaces exclude_mailboxes  "
 	  	"send_mail_requests backup_send backup_recv lock_timeout "
 	  	"no_mail_sync no_backup_overwrite purge_remote "
-		"sync_since_timestamp"
+		"sync_since_timestamp virtual_all_box"
 	},
 	{ .name = "mailbox_state",
 	  .chr = 'S',
@@ -613,6 +613,10 @@
 	}
 	if (set->sync_box != NULL)
 		dsync_serializer_encode_add(encoder, "sync_box", set->sync_box);
+	if (set->virtual_all_box != NULL) {
+		dsync_serializer_encode_add(encoder, "virtual_all_box",
+					    set->virtual_all_box);
+	}
 	if (set->exclude_mailboxes != NULL) {
 		string_t *substr = t_str_new(64);
 		unsigned int i;
@@ -710,6 +714,8 @@
 		set->sync_ns_prefixes = p_strdup(pool, value);
 	if (dsync_deserializer_decode_try(decoder, "sync_box", &value))
 		set->sync_box = p_strdup(pool, value);
+	if (dsync_deserializer_decode_try(decoder, "virtual_all_box", &value))
+		set->virtual_all_box = p_strdup(pool, value);
 	if (dsync_deserializer_decode_try(decoder, "sync_box_guid", &value) &&
 	    guid_128_from_string(value, set->sync_box_guid) < 0) {
 		dsync_ibc_input_error(ibc, decoder,
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-ibc.h
--- a/src/doveadm/dsync/dsync-ibc.h	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-ibc.h	Tue Jan 20 03:23:29 2015 +0200
@@ -47,6 +47,9 @@
 	const char *sync_ns_prefixes;
 	/* if non-NULL, sync only this mailbox name */
 	const char *sync_box;
+	/* if non-NULL, use this mailbox for finding messages with GUIDs and
+	   copying them instead of saving them again. */
+	const char *virtual_all_box;
 	/* if non-empty, sync only this mailbox GUID */
 	guid_128_t sync_box_guid;
 	/* Exclude these mailboxes from the sync. They can contain '*'
diff -r e83c50fda315 -r 39d00448490f src/doveadm/dsync/dsync-mailbox-import.c
--- a/src/doveadm/dsync/dsync-mailbox-import.c	Tue Jan 20 03:14:37 2015 +0200
+++ b/src/doveadm/dsync/dsync-mailbox-import.c	Tue Jan 20 03:23:29 2015 +0200
@@ -36,6 +36,9 @@
 	uint32_t local_uid;
 	/* the original remote UID, or 0 if exists only remotely */
 	uint32_t remote_uid;
+	/* UID for the mail in the virtual \All mailbox */
+	uint32_t virtual_all_uid;
+
 	unsigned int uid_in_local:1;
 	unsigned int uid_is_usable:1;
 	unsigned int skip:1;
@@ -64,6 +67,10 @@
 	struct mail_search_context *search_ctx;
 	struct mail *mail, *ext_mail;
 
+	struct mailbox *virtual_all_box;
+	struct mailbox_transaction_context *virtual_trans;
+	struct mail *virtual_mail;
+
 	struct mail *cur_mail;
 	const char *cur_guid;
 	const char *cur_hdr_hash;
@@ -108,9 +115,10 @@
 	unsigned int delete_mailbox:1;
 };
 
-static void dsync_mailbox_save_newmails(struct dsync_mailbox_importer *importer,
+static bool dsync_mailbox_save_newmails(struct dsync_mailbox_importer *importer,
 					const struct dsync_mail *mail,
-					struct importer_new_mail *all_newmails);
+					struct importer_new_mail *all_newmails,
+					bool remote_mail);
 static int dsync_mailbox_import_commit(struct dsync_mailbox_importer *importer,
 				       bool final);
 
@@ -170,6 +178,7 @@
 
 struct dsync_mailbox_importer *
 dsync_mailbox_import_init(struct mailbox *box,
+			  struct mailbox *virtual_all_box,
 			  struct dsync_transaction_log_scan *log_scan,
 			  uint32_t last_common_uid,
 			  uint64_t last_common_modseq,
@@ -190,6 +199,7 @@
 	importer = p_new(pool, struct dsync_mailbox_importer, 1);
 	importer->pool = pool;
 	importer->box = box;
+	importer->virtual_all_box = virtual_all_box;
 	importer->last_common_uid = last_common_uid;
 	importer->last_common_modseq = last_common_modseq;
 	importer->last_common_pvt_modseq = last_common_pvt_modseq;
@@ -1666,17 +1676,17 @@
 
 static int
 dsync_mailbox_import_local_uid(struct dsync_mailbox_importer *importer,
-			       uint32_t uid, const char *guid,
+			       struct mail *mail, uint32_t uid, const char *guid,
 			       struct dsync_mail *dmail_r)
 {
 	const char *error_field, *errstr;
 	enum mail_error error;
 
-	if (!mail_set_uid(importer->mail, uid))
+	if (!mail_set_uid(mail, uid))
 		return 0;
 
-	if (dsync_mail_fill(importer->mail, TRUE, dmail_r, &error_field) < 0) {
-		errstr = mailbox_get_last_error(importer->mail->box, &error);
+	if (dsync_mail_fill(mail, TRUE, dmail_r, &error_field) < 0) {
+		errstr = mailbox_get_last_error(mail->box, &error);
 		if (error == MAIL_ERROR_EXPUNGED)
 			return 0;
 
@@ -1834,13 +1844,32 @@
 	   other instances */
 	local_n = 0; seq_range_array_iter_init(&local_iter, local_uids);
 	while (seq_range_array_iter_nth(&local_iter, local_n++, &local_uid)) {
-		if (dsync_mailbox_import_local_uid(importer, local_uid,
-						   all_newmails->guid,
+		if (dsync_mailbox_import_local_uid(importer, importer->mail,
+						   local_uid, all_newmails->guid,
 						   &dmail) > 0) {
-			dsync_mailbox_save_newmails(importer, &dmail,
-						    all_newmails);
+			if (dsync_mailbox_save_newmails(importer, &dmail,
+							all_newmails, FALSE))
+				return TRUE;
+		}
+	}
+	return FALSE;
+}
+
+static bool
+dsync_mailbox_import_try_virtual_all(struct dsync_mailbox_importer *importer,
+				     struct importer_new_mail *all_newmails)
+{
+	struct dsync_mail dmail;
+
+	if (all_newmails->virtual_all_uid == 0)
+		return FALSE;
+
+	if (dsync_mailbox_import_local_uid(importer, importer->virtual_mail,
+					   all_newmails->virtual_all_uid,
+					   all_newmails->guid, &dmail) > 0) {
+		if (dsync_mailbox_save_newmails(importer, &dmail,
+						all_newmails, FALSE))
 			return TRUE;
-		}
 	}
 	return FALSE;


More information about the dovecot-cvs mailing list