dovecot: Drop fields that haven't been used for 30 days when com...

dovecot at dovecot.org dovecot at dovecot.org
Mon Aug 13 20:19:39 EEST 2007


details:   http://hg.dovecot.org/dovecot/rev/205ee38f10d1
changeset: 6296:205ee38f10d1
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Aug 13 20:16:55 2007 +0300
description:
Drop fields that haven't been used for 30 days when compressing.

diffstat:

4 files changed, 100 insertions(+), 32 deletions(-)
src/lib-index/mail-cache-compress.c    |   94 ++++++++++++++++++++++----------
src/lib-index/mail-cache-fields.c      |   29 ++++++++-
src/lib-index/mail-cache-private.h     |    5 +
src/lib-index/mail-cache-transaction.c |    4 +

diffs (287 lines):

diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-compress.c
--- a/src/lib-index/mail-cache-compress.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-compress.c	Mon Aug 13 20:16:55 2007 +0300
@@ -18,6 +18,8 @@ struct mail_cache_copy_context {
 
 	buffer_t *buffer, *field_seen;
 	ARRAY_DEFINE(bitmask_pos, unsigned int);
+	uint32_t *field_file_map;
+
 	uint8_t field_seen_value;
 	bool new_msg;
 };
@@ -44,15 +46,19 @@ mail_cache_compress_field(struct mail_ca
 mail_cache_compress_field(struct mail_cache_copy_context *ctx,
 			  const struct mail_cache_iterate_field *field)
 {
-	uint32_t field_idx = field->field_idx;
         struct mail_cache_field *cache_field;
 	enum mail_cache_decision_type dec;
+	uint32_t file_field_idx, size32;
 	uint8_t *field_seen;
-	uint32_t size32;
-
-	cache_field = &ctx->cache->fields[field_idx].field;
-
-	field_seen = buffer_get_space_unsafe(ctx->field_seen, field_idx, 1);
+
+	file_field_idx = ctx->field_file_map[field->field_idx];
+	if (file_field_idx == (uint32_t)-1)
+		return;
+
+	cache_field = &ctx->cache->fields[field->field_idx].field;
+
+	field_seen = buffer_get_space_unsafe(ctx->field_seen,
+					     field->field_idx, 1);
 	if (*field_seen == ctx->field_seen_value) {
 		/* duplicate */
 		if (cache_field->type == MAIL_CACHE_FIELD_BITMASK)
@@ -70,7 +76,7 @@ mail_cache_compress_field(struct mail_ca
 			return;
 	}
 
-	buffer_append(ctx->buffer, &field_idx, sizeof(field_idx));
+	buffer_append(ctx->buffer, &file_field_idx, sizeof(file_field_idx));
 
 	if (cache_field->field_size == (unsigned int)-1) {
 		size32 = (uint32_t)field->size;
@@ -101,6 +107,38 @@ get_next_file_seq(struct mail_cache *cac
 		file_seq = cache->hdr->file_seq + 1;
 
 	return file_seq != 0 ? file_seq : 1;
+}
+
+static void
+mail_cache_compress_get_fields(struct mail_cache_copy_context *ctx,
+			       unsigned int used_fields_count)
+{
+	struct mail_cache *cache = ctx->cache;
+	unsigned int i, j, idx;
+
+	/* Make mail_cache_header_fields_get() return the fields in
+	   the same order as we saved them. */
+	memcpy(cache->field_file_map, ctx->field_file_map,
+	       sizeof(uint32_t) * cache->fields_count);
+
+	/* reverse mapping */
+	cache->file_fields_count = used_fields_count;
+	i_free(cache->file_field_map);
+	cache->file_field_map = used_fields_count == 0 ? NULL :
+		i_new(unsigned int, used_fields_count);
+	for (i = j = 0; i < cache->fields_count; i++) {
+		idx = cache->field_file_map[i];
+		if (idx != (uint32_t)-1) {
+			i_assert(idx < used_fields_count &&
+				 cache->file_field_map[idx] == 0);
+			cache->file_field_map[idx] = i;
+			j++;
+		}
+	}
+	i_assert(j == used_fields_count);
+
+	buffer_set_used_size(ctx->buffer, 0);
+	mail_cache_header_fields_get(cache, ctx->buffer);
 }
 
 static int
@@ -117,8 +155,9 @@ mail_cache_copy(struct mail_cache *cache
 	struct mail_cache_header hdr;
 	struct mail_cache_record cache_rec;
 	struct ostream *output;
-	buffer_t *buffer;
-	uint32_t i, message_count, seq, first_new_seq, ext_offset;
+	uint32_t message_count, seq, first_new_seq, ext_offset;
+	unsigned int i, used_fields_count, orig_fields_count;
+	time_t max_drop_time;
 
 	view = mail_index_transaction_get_view(trans);
 
@@ -151,7 +190,20 @@ mail_cache_copy(struct mail_cache *cache
 	ctx.buffer = buffer_create_dynamic(default_pool, 4096);
 	ctx.field_seen = buffer_create_dynamic(default_pool, 64);
 	ctx.field_seen_value = 0;
+	ctx.field_file_map = t_new(uint32_t, cache->fields_count);
 	t_array_init(&ctx.bitmask_pos, 32);
+
+	/* @UNSAFE: drop unused fields and create a field mapping for
+	   used fields */
+	max_drop_time = idx_hdr->day_stamp - MAIL_CACHE_FIELD_DROP_SECS;
+	orig_fields_count = cache->fields_count;
+	for (i = used_fields_count = 0; i < orig_fields_count; i++) {
+		if (cache->fields[i].last_used < max_drop_time)
+			cache->fields[i].used = FALSE;
+
+		ctx.field_file_map[i] = !cache->fields[i].used ? (uint32_t)-1 :
+			used_fields_count++;
+	}
 
 	t_array_init(ext_offsets, message_count);
 	for (seq = 1; seq <= message_count; seq++) {
@@ -190,25 +242,11 @@ mail_cache_copy(struct mail_cache *cache
 		array_append(ext_offsets, &ext_offset, 1);
 	}
 	i_assert(array_count(ext_offsets) == message_count);
-
-	if (cache->fields_count != 0) {
-		hdr.field_header_offset =
-			mail_index_uint32_to_offset(output->offset);
-
-		/* we wrote everything using our internal field ids. so we want
-		   mail_cache_header_fields_get() to use them and ignore any
-		   existing id mappings in the old cache file. */
-		cache->file_fields_count = 0;
-		for (i = 0; i < cache->fields_count; i++)
-                        cache->field_file_map[i] = (uint32_t)-1;
-
-		t_push();
-		buffer = buffer_create_dynamic(pool_datastack_create(), 256);
-		mail_cache_header_fields_get(cache, buffer);
-		o_stream_send(output, buffer_get_data(buffer, NULL),
-			      buffer_get_used_size(buffer));
-		t_pop();
-	}
+	i_assert(orig_fields_count == cache->fields_count);
+
+	hdr.field_header_offset = mail_index_uint32_to_offset(output->offset);
+	mail_cache_compress_get_fields(&ctx, used_fields_count);
+	o_stream_send(output, ctx.buffer->data, ctx.buffer->used);
 
 	hdr.used_file_size = output->offset;
 	buffer_free(ctx.buffer);
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-fields.c
--- a/src/lib-index/mail-cache-fields.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-fields.c	Mon Aug 13 20:16:55 2007 +0300
@@ -211,6 +211,7 @@ int mail_cache_header_fields_read(struct
 	const char *p, *names, *end;
 	void *orig_key, *orig_value;
 	unsigned int new_fields_count;
+	time_t max_drop_time;
 	uint32_t offset, i;
 
 	if (mail_cache_header_fields_get_offset(cache, &offset) < 0)
@@ -274,6 +275,9 @@ int mail_cache_header_fields_read(struct
 	/* clear the old mapping */
 	for (i = 0; i < cache->fields_count; i++)
 		cache->field_file_map[i] = (uint32_t)-1;
+
+	max_drop_time = cache->index->map->hdr.day_stamp -
+		MAIL_CACHE_FIELD_DROP_SECS;
 
 	memset(&field, 0, sizeof(field));
 	for (i = 0; i < field_hdr->fields_count; i++) {
@@ -317,12 +321,19 @@ int mail_cache_header_fields_read(struct
 				"Duplicated field in header: %s", names);
 			return -1;
 		}
+		cache->fields[field.idx].used = TRUE;
+
 		cache->field_file_map[field.idx] = i;
 		cache->file_field_map[i] = field.idx;
 
 		/* update last_used if it's newer than ours */
 		if (last_used[i] > cache->fields[field.idx].last_used)
 			cache->fields[field.idx].last_used = last_used[i];
+
+		if (cache->fields[field.idx].last_used < max_drop_time) {
+			/* time to drop this field */
+			cache->need_compress_file_seq = cache->hdr->file_seq;
+		}
 
                 names = p + 1;
 	}
@@ -341,7 +352,8 @@ static void copy_to_buf(struct mail_cach
 		buffer_append(dest, data, size);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
 		data = CONST_PTR_OFFSET(&cache->fields[i], offset);
 		buffer_append(dest, data, size);
@@ -362,7 +374,8 @@ static void copy_to_buf_byte(struct mail
 		buffer_append(dest, &byte, 1);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
 		data = CONST_PTR_OFFSET(&cache->fields[i], offset);
 		byte = (uint8_t)*data;
@@ -435,7 +448,12 @@ void mail_cache_header_fields_get(struct
 	uint32_t i;
 
 	memset(&hdr, 0, sizeof(hdr));
-	hdr.fields_count = cache->fields_count;
+	hdr.fields_count = cache->file_fields_count;
+	for (i = 0; i < cache->fields_count; i++) {
+		if (cache->field_file_map[i] == (uint32_t)-1 &&
+		    cache->fields[i].used)
+			hdr.fields_count++;
+	}
 	buffer_append(dest, &hdr, sizeof(hdr));
 
 	/* we have to keep the field order for the existing fields. */
@@ -451,14 +469,17 @@ void mail_cache_header_fields_get(struct
 	i_assert(buffer_get_used_size(dest) == sizeof(hdr) +
 		 (sizeof(uint32_t)*2 + 2) * hdr.fields_count);
 
+	/* add fields' names */
 	for (i = 0; i < cache->file_fields_count; i++) {
 		field = cache->file_field_map[i];
 		name = cache->fields[field].field.name;
 		buffer_append(dest, name, strlen(name)+1);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
+
 		name = cache->fields[i].field.name;
 		buffer_append(dest, name, strlen(name)+1);
 	}
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-private.h
--- a/src/lib-index/mail-cache-private.h	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-private.h	Mon Aug 13 20:16:55 2007 +0300
@@ -6,6 +6,9 @@
 #include "mail-cache.h"
 
 #define MAIL_CACHE_VERSION 1
+
+/* Drop fields that haven't been accessed for n seconds */
+#define MAIL_CACHE_FIELD_DROP_SECS (3600*24*30)
 
 /* Never compress the file if it's smaller than this */
 #define MAIL_CACHE_COMPRESS_MIN_SIZE (1024*50)
@@ -112,6 +115,8 @@ struct mail_cache_field_private {
 	uint32_t uid_highwater;
 	uint32_t last_used;
 
+	/* Unused fields aren't written to cache file */
+	unsigned int used:1;
 	unsigned int decision_dirty:1;
 };
 
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-transaction.c
--- a/src/lib-index/mail-cache-transaction.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-transaction.c	Mon Aug 13 20:16:55 2007 +0300
@@ -1,6 +1,7 @@
 /* Copyright (C) 2003-2004 Timo Sirainen */
 
 #include "lib.h"
+#include "ioloop.h"
 #include "array.h"
 #include "buffer.h"
 #include "file-cache.h"
@@ -672,6 +673,9 @@ static int mail_cache_header_add_field(s
 	uint32_t offset, hdr_offset;
 	int ret = 0;
 
+	ctx->cache->fields[field_idx].last_used = ioloop_time;
+	ctx->cache->fields[field_idx].used = TRUE;
+
 	if ((ret = mail_cache_transaction_lock(ctx)) <= 0) {
 		/* create the cache file if it doesn't exist yet */
 		if (ctx->tried_compression)


More information about the dovecot-cvs mailing list