dovecot: Drop fields that haven't been used for 30 days when com...
dovecot at dovecot.org
dovecot at dovecot.org
Mon Aug 13 20:19:39 EEST 2007
details: http://hg.dovecot.org/dovecot/rev/205ee38f10d1
changeset: 6296:205ee38f10d1
user: Timo Sirainen <tss at iki.fi>
date: Mon Aug 13 20:16:55 2007 +0300
description:
Drop fields that haven't been used for 30 days when compressing.
diffstat:
4 files changed, 100 insertions(+), 32 deletions(-)
src/lib-index/mail-cache-compress.c | 94 ++++++++++++++++++++++----------
src/lib-index/mail-cache-fields.c | 29 ++++++++-
src/lib-index/mail-cache-private.h | 5 +
src/lib-index/mail-cache-transaction.c | 4 +
diffs (287 lines):
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-compress.c
--- a/src/lib-index/mail-cache-compress.c Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-compress.c Mon Aug 13 20:16:55 2007 +0300
@@ -18,6 +18,8 @@ struct mail_cache_copy_context {
buffer_t *buffer, *field_seen;
ARRAY_DEFINE(bitmask_pos, unsigned int);
+ uint32_t *field_file_map;
+
uint8_t field_seen_value;
bool new_msg;
};
@@ -44,15 +46,19 @@ mail_cache_compress_field(struct mail_ca
mail_cache_compress_field(struct mail_cache_copy_context *ctx,
const struct mail_cache_iterate_field *field)
{
- uint32_t field_idx = field->field_idx;
struct mail_cache_field *cache_field;
enum mail_cache_decision_type dec;
+ uint32_t file_field_idx, size32;
uint8_t *field_seen;
- uint32_t size32;
-
- cache_field = &ctx->cache->fields[field_idx].field;
-
- field_seen = buffer_get_space_unsafe(ctx->field_seen, field_idx, 1);
+
+ file_field_idx = ctx->field_file_map[field->field_idx];
+ if (file_field_idx == (uint32_t)-1)
+ return;
+
+ cache_field = &ctx->cache->fields[field->field_idx].field;
+
+ field_seen = buffer_get_space_unsafe(ctx->field_seen,
+ field->field_idx, 1);
if (*field_seen == ctx->field_seen_value) {
/* duplicate */
if (cache_field->type == MAIL_CACHE_FIELD_BITMASK)
@@ -70,7 +76,7 @@ mail_cache_compress_field(struct mail_ca
return;
}
- buffer_append(ctx->buffer, &field_idx, sizeof(field_idx));
+ buffer_append(ctx->buffer, &file_field_idx, sizeof(file_field_idx));
if (cache_field->field_size == (unsigned int)-1) {
size32 = (uint32_t)field->size;
@@ -101,6 +107,38 @@ get_next_file_seq(struct mail_cache *cac
file_seq = cache->hdr->file_seq + 1;
return file_seq != 0 ? file_seq : 1;
+}
+
+static void
+mail_cache_compress_get_fields(struct mail_cache_copy_context *ctx,
+ unsigned int used_fields_count)
+{
+ struct mail_cache *cache = ctx->cache;
+ unsigned int i, j, idx;
+
+ /* Make mail_cache_header_fields_get() return the fields in
+ the same order as we saved them. */
+ memcpy(cache->field_file_map, ctx->field_file_map,
+ sizeof(uint32_t) * cache->fields_count);
+
+ /* reverse mapping */
+ cache->file_fields_count = used_fields_count;
+ i_free(cache->file_field_map);
+ cache->file_field_map = used_fields_count == 0 ? NULL :
+ i_new(unsigned int, used_fields_count);
+ for (i = j = 0; i < cache->fields_count; i++) {
+ idx = cache->field_file_map[i];
+ if (idx != (uint32_t)-1) {
+ i_assert(idx < used_fields_count &&
+ cache->file_field_map[idx] == 0);
+ cache->file_field_map[idx] = i;
+ j++;
+ }
+ }
+ i_assert(j == used_fields_count);
+
+ buffer_set_used_size(ctx->buffer, 0);
+ mail_cache_header_fields_get(cache, ctx->buffer);
}
static int
@@ -117,8 +155,9 @@ mail_cache_copy(struct mail_cache *cache
struct mail_cache_header hdr;
struct mail_cache_record cache_rec;
struct ostream *output;
- buffer_t *buffer;
- uint32_t i, message_count, seq, first_new_seq, ext_offset;
+ uint32_t message_count, seq, first_new_seq, ext_offset;
+ unsigned int i, used_fields_count, orig_fields_count;
+ time_t max_drop_time;
view = mail_index_transaction_get_view(trans);
@@ -151,7 +190,20 @@ mail_cache_copy(struct mail_cache *cache
ctx.buffer = buffer_create_dynamic(default_pool, 4096);
ctx.field_seen = buffer_create_dynamic(default_pool, 64);
ctx.field_seen_value = 0;
+ ctx.field_file_map = t_new(uint32_t, cache->fields_count);
t_array_init(&ctx.bitmask_pos, 32);
+
+ /* @UNSAFE: drop unused fields and create a field mapping for
+ used fields */
+ max_drop_time = idx_hdr->day_stamp - MAIL_CACHE_FIELD_DROP_SECS;
+ orig_fields_count = cache->fields_count;
+ for (i = used_fields_count = 0; i < orig_fields_count; i++) {
+ if (cache->fields[i].last_used < max_drop_time)
+ cache->fields[i].used = FALSE;
+
+ ctx.field_file_map[i] = !cache->fields[i].used ? (uint32_t)-1 :
+ used_fields_count++;
+ }
t_array_init(ext_offsets, message_count);
for (seq = 1; seq <= message_count; seq++) {
@@ -190,25 +242,11 @@ mail_cache_copy(struct mail_cache *cache
array_append(ext_offsets, &ext_offset, 1);
}
i_assert(array_count(ext_offsets) == message_count);
-
- if (cache->fields_count != 0) {
- hdr.field_header_offset =
- mail_index_uint32_to_offset(output->offset);
-
- /* we wrote everything using our internal field ids. so we want
- mail_cache_header_fields_get() to use them and ignore any
- existing id mappings in the old cache file. */
- cache->file_fields_count = 0;
- for (i = 0; i < cache->fields_count; i++)
- cache->field_file_map[i] = (uint32_t)-1;
-
- t_push();
- buffer = buffer_create_dynamic(pool_datastack_create(), 256);
- mail_cache_header_fields_get(cache, buffer);
- o_stream_send(output, buffer_get_data(buffer, NULL),
- buffer_get_used_size(buffer));
- t_pop();
- }
+ i_assert(orig_fields_count == cache->fields_count);
+
+ hdr.field_header_offset = mail_index_uint32_to_offset(output->offset);
+ mail_cache_compress_get_fields(&ctx, used_fields_count);
+ o_stream_send(output, ctx.buffer->data, ctx.buffer->used);
hdr.used_file_size = output->offset;
buffer_free(ctx.buffer);
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-fields.c
--- a/src/lib-index/mail-cache-fields.c Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-fields.c Mon Aug 13 20:16:55 2007 +0300
@@ -211,6 +211,7 @@ int mail_cache_header_fields_read(struct
const char *p, *names, *end;
void *orig_key, *orig_value;
unsigned int new_fields_count;
+ time_t max_drop_time;
uint32_t offset, i;
if (mail_cache_header_fields_get_offset(cache, &offset) < 0)
@@ -274,6 +275,9 @@ int mail_cache_header_fields_read(struct
/* clear the old mapping */
for (i = 0; i < cache->fields_count; i++)
cache->field_file_map[i] = (uint32_t)-1;
+
+ max_drop_time = cache->index->map->hdr.day_stamp -
+ MAIL_CACHE_FIELD_DROP_SECS;
memset(&field, 0, sizeof(field));
for (i = 0; i < field_hdr->fields_count; i++) {
@@ -317,12 +321,19 @@ int mail_cache_header_fields_read(struct
"Duplicated field in header: %s", names);
return -1;
}
+ cache->fields[field.idx].used = TRUE;
+
cache->field_file_map[field.idx] = i;
cache->file_field_map[i] = field.idx;
/* update last_used if it's newer than ours */
if (last_used[i] > cache->fields[field.idx].last_used)
cache->fields[field.idx].last_used = last_used[i];
+
+ if (cache->fields[field.idx].last_used < max_drop_time) {
+ /* time to drop this field */
+ cache->need_compress_file_seq = cache->hdr->file_seq;
+ }
names = p + 1;
}
@@ -341,7 +352,8 @@ static void copy_to_buf(struct mail_cach
buffer_append(dest, data, size);
}
for (i = 0; i < cache->fields_count; i++) {
- if (cache->field_file_map[i] != (uint32_t)-1)
+ if (cache->field_file_map[i] != (uint32_t)-1 ||
+ !cache->fields[i].used)
continue;
data = CONST_PTR_OFFSET(&cache->fields[i], offset);
buffer_append(dest, data, size);
@@ -362,7 +374,8 @@ static void copy_to_buf_byte(struct mail
buffer_append(dest, &byte, 1);
}
for (i = 0; i < cache->fields_count; i++) {
- if (cache->field_file_map[i] != (uint32_t)-1)
+ if (cache->field_file_map[i] != (uint32_t)-1 ||
+ !cache->fields[i].used)
continue;
data = CONST_PTR_OFFSET(&cache->fields[i], offset);
byte = (uint8_t)*data;
@@ -435,7 +448,12 @@ void mail_cache_header_fields_get(struct
uint32_t i;
memset(&hdr, 0, sizeof(hdr));
- hdr.fields_count = cache->fields_count;
+ hdr.fields_count = cache->file_fields_count;
+ for (i = 0; i < cache->fields_count; i++) {
+ if (cache->field_file_map[i] == (uint32_t)-1 &&
+ cache->fields[i].used)
+ hdr.fields_count++;
+ }
buffer_append(dest, &hdr, sizeof(hdr));
/* we have to keep the field order for the existing fields. */
@@ -451,14 +469,17 @@ void mail_cache_header_fields_get(struct
i_assert(buffer_get_used_size(dest) == sizeof(hdr) +
(sizeof(uint32_t)*2 + 2) * hdr.fields_count);
+ /* add fields' names */
for (i = 0; i < cache->file_fields_count; i++) {
field = cache->file_field_map[i];
name = cache->fields[field].field.name;
buffer_append(dest, name, strlen(name)+1);
}
for (i = 0; i < cache->fields_count; i++) {
- if (cache->field_file_map[i] != (uint32_t)-1)
+ if (cache->field_file_map[i] != (uint32_t)-1 ||
+ !cache->fields[i].used)
continue;
+
name = cache->fields[i].field.name;
buffer_append(dest, name, strlen(name)+1);
}
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-private.h
--- a/src/lib-index/mail-cache-private.h Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-private.h Mon Aug 13 20:16:55 2007 +0300
@@ -6,6 +6,9 @@
#include "mail-cache.h"
#define MAIL_CACHE_VERSION 1
+
+/* Drop fields that haven't been accessed for n seconds */
+#define MAIL_CACHE_FIELD_DROP_SECS (3600*24*30)
/* Never compress the file if it's smaller than this */
#define MAIL_CACHE_COMPRESS_MIN_SIZE (1024*50)
@@ -112,6 +115,8 @@ struct mail_cache_field_private {
uint32_t uid_highwater;
uint32_t last_used;
+ /* Unused fields aren't written to cache file */
+ unsigned int used:1;
unsigned int decision_dirty:1;
};
diff -r 30904b20782d -r 205ee38f10d1 src/lib-index/mail-cache-transaction.c
--- a/src/lib-index/mail-cache-transaction.c Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-transaction.c Mon Aug 13 20:16:55 2007 +0300
@@ -1,6 +1,7 @@
/* Copyright (C) 2003-2004 Timo Sirainen */
#include "lib.h"
+#include "ioloop.h"
#include "array.h"
#include "buffer.h"
#include "file-cache.h"
@@ -672,6 +673,9 @@ static int mail_cache_header_add_field(s
uint32_t offset, hdr_offset;
int ret = 0;
+ ctx->cache->fields[field_idx].last_used = ioloop_time;
+ ctx->cache->fields[field_idx].used = TRUE;
+
if ((ret = mail_cache_transaction_lock(ctx)) <= 0) {
/* create the cache file if it doesn't exist yet */
if (ctx->tried_compression)
More information about the dovecot-cvs
mailing list