dovecot: Fixes to FTS handling.

dovecot at dovecot.org dovecot at dovecot.org
Mon Dec 3 11:25:53 EET 2007


details:   http://hg.dovecot.org/dovecot/rev/8c779f3e16be
changeset: 6905:8c779f3e16be
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Dec 03 11:25:49 2007 +0200
description:
Fixes to FTS handling.

diffstat:

5 files changed, 118 insertions(+), 94 deletions(-)
src/plugins/fts-squat/squat-trie-private.h |    6 +
src/plugins/fts-squat/squat-trie.c         |  114 +++++++++++++++++++---------
src/plugins/fts/fts-api.c                  |   57 +++++---------
src/plugins/fts/fts-storage.c              |   34 ++------
src/plugins/fts/fts-storage.h              |    1 

diffs (truncated from 458 to 300 lines):

diff -r 275d22eb25ba -r 8c779f3e16be src/plugins/fts-squat/squat-trie-private.h
--- a/src/plugins/fts-squat/squat-trie-private.h	Mon Dec 03 11:02:44 2007 +0200
+++ b/src/plugins/fts-squat/squat-trie-private.h	Mon Dec 03 11:25:49 2007 +0200
@@ -20,6 +20,10 @@ struct squat_file_header {
 	uint32_t root_unused_uids;
 	uint32_t root_next_uid;
 	uint32_t root_uidlist_idx;
+
+	uint8_t partial_len;
+	uint8_t full_len;
+	uint8_t normalize_map[256];
 };
 
 /*
@@ -119,7 +123,7 @@ struct squat_trie {
 	void *mmap_base;
 	size_t mmap_size;
 
-	unsigned char normalize_map[256];
+	unsigned char default_normalize_map[256];
 
 	unsigned int corrupted:1;
 };
diff -r 275d22eb25ba -r 8c779f3e16be src/plugins/fts-squat/squat-trie.c
--- a/src/plugins/fts-squat/squat-trie.c	Mon Dec 03 11:02:44 2007 +0200
+++ b/src/plugins/fts-squat/squat-trie.c	Mon Dec 03 11:25:49 2007 +0200
@@ -17,10 +17,12 @@
 #include <time.h>
 #include <sys/mman.h>
 
+#define DEFAULT_NORMALIZE_MAP_CHARS \
+	"EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789 at .-+#$%_&"
+#define DEFAULT_PARTIAL_LEN 4
+#define DEFAULT_FULL_LEN 4
+
 #define MAX_FAST_LEVEL 3
-#define MAX_PARTIAL_LEN 4
-#define MAX_FULL_LEN 4
-
 #define SEQUENTIAL_COUNT 46
 
 struct squat_trie_build_context {
@@ -62,33 +64,34 @@ static void squat_trie_normalize_map_bui
 static void squat_trie_normalize_map_build(struct squat_trie *trie)
 {
 	static unsigned char valid_chars[] =
-		"EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789 at .-+#$%_&";
+		DEFAULT_NORMALIZE_MAP_CHARS;
 	unsigned int i, j;
 
-	memset(trie->normalize_map, 0, sizeof(trie->normalize_map));
+	memset(trie->default_normalize_map, 0,
+	       sizeof(trie->default_normalize_map));
 
 #if 1
 	for (i = 0, j = 1; i < sizeof(valid_chars)-1; i++) {
 		unsigned char chr = valid_chars[i];
 
 		if (chr >= 'A' && chr <= 'Z')
-			trie->normalize_map[chr-'A'+'a'] = j;
-		trie->normalize_map[chr] = j++;
+			trie->default_normalize_map[chr-'A'+'a'] = j;
+		trie->default_normalize_map[chr] = j++;
 	}
 	i_assert(j <= SEQUENTIAL_COUNT);
 
 	for (i = 128; i < 256; i++)
-		trie->normalize_map[i] = j++;
+		trie->default_normalize_map[i] = j++;
 #else
 	for (i = 0; i < sizeof(valid_chars)-1; i++) {
 		unsigned char chr = valid_chars[i];
 
 		if (chr >= 'A' && chr <= 'Z')
-			trie->normalize_map[chr-'A'+'a'] = chr;
-		trie->normalize_map[chr] = chr;
+			trie->default_normalize_map[chr-'A'+'a'] = chr;
+		trie->default_normalize_map[chr] = chr;
 	}
 	for (i = 128; i < 256; i++)
-		trie->normalize_map[i] = i_toupper(i);
+		trie->default_normalize_map[i] = i_toupper(i);
 #endif
 }
 
@@ -113,6 +116,7 @@ static void node_free(struct squat_trie 
 
 static void squat_trie_clear(struct squat_trie *trie)
 {
+	trie->corrupted = FALSE;
 	node_free(trie, &trie->root);
 	memset(&trie->root, 0, sizeof(trie->root));
 	memset(&trie->hdr, 0, sizeof(trie->hdr));
@@ -166,6 +170,13 @@ static void squat_trie_header_init(struc
 	trie->hdr.version = SQUAT_TRIE_VERSION;
 	trie->hdr.indexid = ioloop_time;
 	trie->hdr.uidvalidity = trie->uidvalidity;
+	trie->hdr.partial_len = DEFAULT_PARTIAL_LEN;
+	trie->hdr.full_len = DEFAULT_FULL_LEN;
+
+	i_assert(sizeof(trie->hdr.normalize_map) ==
+		 sizeof(trie->default_normalize_map));
+	memcpy(trie->hdr.normalize_map, trie->default_normalize_map,
+	       sizeof(trie->hdr.normalize_map));
 }
 
 static int squat_trie_open_fd(struct squat_trie *trie)
@@ -694,16 +705,15 @@ squat_build_word(struct squat_trie *trie
 squat_build_word(struct squat_trie *trie, uint32_t uid,
 		 const unsigned char *data, unsigned int size)
 {
-#if MAX_PARTIAL_LEN > 0
 	unsigned int i;
 
 	for (i = size - 1; i > 0; i--) {
 		if (squat_build_add(trie, uid, data + i,
-				    I_MIN(MAX_PARTIAL_LEN, size - i)) < 0)
+				    I_MIN(trie->hdr.partial_len, size - i)) < 0)
 			return -1;
 	}
-#endif
-	return squat_build_add(trie, uid, data, I_MIN(size, MAX_FULL_LEN));
+	return squat_build_add(trie, uid, data,
+			       I_MIN(size, trie->hdr.full_len));
 }
 
 static unsigned char *
@@ -715,7 +725,7 @@ squat_data_normalize(struct squat_trie *
 
 	dest = t_malloc(size);
 	for (i = 0; i < size; i++)
-		dest[i] = trie->normalize_map[data[i]];
+		dest[i] = trie->hdr.normalize_map[data[i]];
 	return dest;
 }
 
@@ -862,7 +872,7 @@ squat_trie_iterate_uidlist_init(struct s
 	ctx = i_new(struct squat_trie_iterate_context, 1);
 	ctx->trie = trie;
 	ctx->cur.node = &trie->root;
-	i_array_init(&ctx->parents, MAX_FULL_LEN*2);
+	i_array_init(&ctx->parents, trie->hdr.partial_len*2);
 	return ctx;
 }
 
@@ -981,6 +991,23 @@ static int squat_trie_renumber_uidlists(
 	return squat_uidlist_rebuild_finish(ctx, ret < 0);
 }
 
+static bool squat_trie_check_header(struct squat_trie *trie)
+{
+	if (trie->hdr.version != SQUAT_TRIE_VERSION ||
+	    trie->hdr.uidvalidity != trie->uidvalidity)
+		return FALSE;
+
+	if (trie->hdr.partial_len > trie->hdr.full_len) {
+		i_error("Corrupted %s: partial len > full len", trie->path);
+		return FALSE;
+	}
+	if (trie->hdr.full_len == 0) {
+		i_error("Corrupted %s: full len=0", trie->path);
+		return FALSE;
+	}
+	return TRUE;
+}
+
 static int squat_trie_map_header(struct squat_trie *trie)
 {
 	if (trie->locked_file_size == 0) {
@@ -1008,8 +1035,7 @@ static int squat_trie_map_header(struct 
 
 	if (trie->hdr.root_offset == 0)
 		return 0;
-	if (trie->hdr.version != SQUAT_TRIE_VERSION ||
-	    trie->hdr.uidvalidity != trie->uidvalidity) {
+	if (!squat_trie_check_header(trie)) {
 		squat_trie_delete(trie);
 		squat_trie_close(trie);
 		squat_trie_header_init(trie);
@@ -1082,7 +1108,7 @@ int squat_trie_build_init(struct squat_t
 	ctx->trie = trie;
 	ctx->first_uid = trie->root.next_uid;
 
-	*last_uid_r = I_MAX(trie->root.next_uid/2, 1) - 1;
+	*last_uid_r = I_MAX((trie->root.next_uid+1)/2, 1) - 1;
 	*ctx_r = ctx;
 	return 0;
 }
@@ -1131,6 +1157,8 @@ static int squat_trie_write(struct squat
 	ret = squat_write_nodes(ctx);
 	ctx->output = NULL;
 
+	if (trie->corrupted)
+		ret = -1;
 	if (ret == 0) {
 		trie->hdr.used_file_size = output->offset;
 		o_stream_seek(output, 0);
@@ -1194,7 +1222,7 @@ int squat_trie_get_last_uid(struct squat
 			return -1;
 	}
 
-	*last_uid_r = I_MAX(trie->root.next_uid/2, 1) - 1;
+	*last_uid_r = I_MAX((trie->root.next_uid+1)/2, 1) - 1;
 	return 0;
 }
 
@@ -1334,10 +1362,10 @@ squat_trie_lookup_partial(struct squat_t
 	int ret;
 
 	do {
-		if (size <= MAX_PARTIAL_LEN)
+		if (size <= ctx->trie->hdr.partial_len)
 			block_len = size;
 		else
-			block_len = MAX_PARTIAL_LEN;
+			block_len = ctx->trie->hdr.partial_len;
 		block = data + size - block_len;
 
 		ret = squat_trie_lookup_data(ctx->trie, block, block_len,
@@ -1357,8 +1385,28 @@ squat_trie_lookup_partial(struct squat_t
 			seq_range_array_remove_invert_range(ctx->maybe_uids,
 							    &ctx->tmp_uids2);
 		}
-	} while (--size >= MAX_PARTIAL_LEN);
+	} while (--size >= ctx->trie->hdr.partial_len);
 	return 1;
+}
+
+static void squat_trie_add_unknown(struct squat_trie *trie,
+				   ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	struct seq_range *range, new_range;
+	unsigned int count;
+	uint32_t last_uid;
+
+	last_uid = I_MAX((trie->root.next_uid+1)/2, 1) - 1;
+
+	range = array_get_modifiable(maybe_uids, &count);
+	if (count > 0 && range[count-1].seq2 == last_uid) {
+		/* increase the range */
+		range[count-1].seq2 = (uint32_t)-1;
+	} else {
+		new_range.seq1 = last_uid + 1;
+		new_range.seq2 = (uint32_t)-1;
+		array_append(maybe_uids, &new_range, 1);
+	}
 }
 
 int squat_trie_lookup(struct squat_trie *trie, const char *str,
@@ -1407,11 +1455,12 @@ int squat_trie_lookup(struct squat_trie 
 							i - start);
 		}
 		t_pop();
-		return ret < 0 ? -1 :
-			(array_count(maybe_uids) > 0 ? 1 : 0);
-	}
-
-	if (MAX_FULL_LEN > MAX_PARTIAL_LEN || size <= MAX_PARTIAL_LEN) {
+		squat_trie_add_unknown(trie, maybe_uids);
+		return ret < 0 ? -1 : 0;
+	}
+
+	if (size <= trie->hdr.partial_len ||
+	    trie->hdr.full_len > trie->hdr.partial_len) {
 		ret = squat_trie_lookup_data(trie, data, size, &ctx.tmp_uids);
 		if (ret > 0) {
 			squat_trie_filter_type(type, &ctx.tmp_uids,
@@ -1421,7 +1470,7 @@ int squat_trie_lookup(struct squat_trie 
 		array_clear(definite_uids);
 	}
 
-	if (size <= MAX_PARTIAL_LEN || MAX_PARTIAL_LEN == 0) {
+	if (size <= trie->hdr.partial_len || trie->hdr.partial_len == 0) {
 		/* we have the result */
 		array_clear(maybe_uids);
 	} else {
@@ -1429,9 +1478,8 @@ int squat_trie_lookup(struct squat_trie 
 						i - start);
 	}
 	t_pop();
-	return ret < 0 ? -1 :
-		(array_count(maybe_uids) > 0 ||
-		 array_count(definite_uids) > 0 ? 1 : 0);
+	squat_trie_add_unknown(trie, maybe_uids);
+	return ret < 0 ? -1 : 0;
 }
 
 struct squat_uidlist *squat_trie_get_uidlist(struct squat_trie *trie)
diff -r 275d22eb25ba -r 8c779f3e16be src/plugins/fts/fts-api.c
--- a/src/plugins/fts/fts-api.c	Mon Dec 03 11:02:44 2007 +0200
+++ b/src/plugins/fts/fts-api.c	Mon Dec 03 11:25:49 2007 +0200
@@ -144,16 +144,8 @@ int fts_backend_lookup(struct fts_backen
 
 	ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT,
 				definite_uids, maybe_uids);
-	if (ret <= 0) {
-		if (unlikely(ret < 0))
-			return -1;
-		i_assert(array_count(definite_uids) == 0 &&
-			 array_count(maybe_uids) == 0);


More information about the dovecot-cvs mailing list