dovecot-2.2: fts-lucene: Added mime_parts parameter to index MIM...

dovecot at dovecot.org dovecot at dovecot.org
Thu Apr 17 14:42:26 UTC 2014


details:   http://hg.dovecot.org/dovecot-2.2/rev/cba90b79fa48
changeset: 17242:cba90b79fa48
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Apr 17 16:42:02 2014 +0200
description:
fts-lucene: Added mime_parts parameter to index MIME parts as separate documents.
This is in preparation for being able to actually read such information and
show it to clients via some new IMAP extension.

Also it might be better to index some MIME parts together to avoid wasting
disk space, but for now this is all or nothing. For example anything that is
"message body" could probably be indexed without the part number at all.

diffstat:

 src/plugins/fts-lucene/doveadm-fts-lucene.c |   5 +++-
 src/plugins/fts-lucene/fts-backend-lucene.c |  13 ++++++++++-
 src/plugins/fts-lucene/fts-lucene-plugin.c  |   4 +++
 src/plugins/fts-lucene/fts-lucene-plugin.h  |   1 +
 src/plugins/fts-lucene/lucene-wrapper.cc    |  31 +++++++++++++++++++++++++---
 src/plugins/fts-lucene/lucene-wrapper.h     |   6 ++--
 6 files changed, 50 insertions(+), 10 deletions(-)

diffs (220 lines):

diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/doveadm-fts-lucene.c
--- a/src/plugins/fts-lucene/doveadm-fts-lucene.c	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c	Thu Apr 17 16:42:02 2014 +0200
@@ -34,7 +34,10 @@
 			memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid));
 			printf("%s: ", guid_128_to_string(prev_guid));
 		}
-		printf("%u,", rec->uid);
+		printf("%u", rec->uid);
+		if (rec->part_num != 0)
+			printf("[%u]", rec->part_num);
+		printf("\n");
 	}
 	printf("\n");
 	if (lucene_index_iter_deinit(&iter) < 0)
diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/fts-backend-lucene.c
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Thu Apr 17 16:42:02 2014 +0200
@@ -5,6 +5,7 @@
 #include "hash.h"
 #include "hex-binary.h"
 #include "strescape.h"
+#include "message-part.h"
 #include "mail-namespace.h"
 #include "mail-storage-private.h"
 #include "fts-expunge-log.h"
@@ -41,7 +42,7 @@
 	uint32_t last_indexed_uid;
 	char *first_box_vname;
 
-	uint32_t uid;
+	uint32_t uid, part_num;
 	char *hdr_name;
 
 	unsigned int added_msgs;
@@ -49,6 +50,7 @@
 
 	bool lucene_opened;
 	bool last_indexed_uid_set;
+	bool mime_parts;
 };
 
 static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend)
@@ -203,11 +205,14 @@
 	struct lucene_fts_backend *backend =
 		(struct lucene_fts_backend *)_backend;
 	struct lucene_fts_backend_update_context *ctx;
+	struct fts_lucene_user *fuser =
+		FTS_LUCENE_USER_CONTEXT(_backend->ns->user);
 
 	i_assert(!backend->updating);
 
 	ctx = i_new(struct lucene_fts_backend_update_context, 1);
 	ctx->ctx.backend = _backend;
+	ctx->mime_parts = fuser->set.mime_parts;
 	backend->updating = TRUE;
 	return &ctx->ctx;
 }
@@ -375,6 +380,8 @@
 	}
 
 	ctx->uid = key->uid;
+	if (ctx->mime_parts)
+		ctx->part_num = message_part_to_idx(key->part);
 	return TRUE;
 }
 
@@ -385,6 +392,7 @@
 		(struct lucene_fts_backend_update_context *)_ctx;
 
 	ctx->uid = 0;
+	ctx->part_num = 0;
 	i_free_and_null(ctx->hdr_name);
 }
 
@@ -405,7 +413,8 @@
 
 	T_BEGIN {
 		ret = lucene_index_build_more(backend->index, ctx->uid,
-					      data, size, ctx->hdr_name);
+					      ctx->part_num, data, size,
+					      ctx->hdr_name);
 	} T_END;
 	return ret;
 }
diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/fts-lucene-plugin.c
--- a/src/plugins/fts-lucene/fts-lucene-plugin.c	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.c	Thu Apr 17 16:42:02 2014 +0200
@@ -32,6 +32,8 @@
 			set->normalize = TRUE;
 		} else if (strcmp(*tmp, "no_snowball") == 0) {
 			set->no_snowball = TRUE;
+		} else if (strcmp(*tmp, "mime_parts") == 0) {
+			set->mime_parts = TRUE;
 		} else {
 			i_error("fts_lucene: Invalid setting: %s", *tmp);
 			return -1;
@@ -79,6 +81,8 @@
 		crc = crc32_str_more(crc, "n");
 	if (set->no_snowball)
 		crc = crc32_str_more(crc, "s");
+	/* don't include mime_parts here, since changing it doesn't
+	   necessarily need the index to be rebuilt */
 	return crc;
 }
 
diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/fts-lucene-plugin.h
--- a/src/plugins/fts-lucene/fts-lucene-plugin.h	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.h	Thu Apr 17 16:42:02 2014 +0200
@@ -14,6 +14,7 @@
 	const char *whitespace_chars;
 	bool normalize;
 	bool no_snowball;
+	bool mime_parts;
 };
 
 struct fts_lucene_user {
diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/lucene-wrapper.cc
--- a/src/plugins/fts-lucene/lucene-wrapper.cc	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc	Thu Apr 17 16:42:02 2014 +0200
@@ -72,7 +72,7 @@
 	ARRAY(struct lucene_analyzer) analyzers;
 
 	Document *doc;
-	uint32_t prev_uid;
+	uint32_t prev_uid, prev_part_idx;
 };
 
 struct rescan_context {
@@ -307,6 +307,22 @@
 	return 0;
 }
 
+static uint32_t
+lucene_doc_get_part(struct lucene_index *index, Document *doc)
+{
+	Field *field = doc->getField(_T("part"));
+	const TCHAR *part = field == NULL ? NULL : field->stringValue();
+	if (part == NULL)
+		return 0;
+
+	uint32_t num = 0;
+	while (*part != 0) {
+		num = num*10 + (*part - '0');
+		part++;
+	}
+	return num;
+}
+
 int lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
 {
 	int ret = 0;
@@ -510,20 +526,25 @@
 }
 
 int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
-			    const unsigned char *data, size_t size,
-			    const char *hdr_name)
+			    uint32_t part_idx, const unsigned char *data,
+			    size_t size, const char *hdr_name)
 {
 	wchar_t id[MAX_INT_STRLEN];
 	size_t namesize, datasize;
 
-	if (uid != index->prev_uid) {
+	if (uid != index->prev_uid || part_idx != index->prev_part_idx) {
 		if (lucene_index_build_flush(index) < 0)
 			return -1;
 		index->prev_uid = uid;
+		index->prev_part_idx = part_idx;
 
 		index->doc = _CLNEW Document();
 		swprintf(id, N_ELEMENTS(id), L"%u", uid);
 		index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+		if (part_idx != 0) {
+			swprintf(id, N_ELEMENTS(id), L"%u", part_idx);
+			index->doc->add(*_CLNEW Field(_T("part"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+		}
 		index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
 	}
 
@@ -573,6 +594,7 @@
 		return 0;
 	}
 	index->prev_uid = 0;
+	index->prev_part_idx = 0;
 
 	if (index->writer == NULL) {
 		lucene_index_close(index);
@@ -1497,6 +1519,7 @@
 	(void)fts_lucene_get_mailbox_guid(iter->index, doc,
 					  iter->rec.mailbox_guid);
 	(void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid);
+	iter->rec.part_num = lucene_doc_get_part(iter->index, doc);
 	return &iter->rec;
 }
 
diff -r 7809bc519633 -r cba90b79fa48 src/plugins/fts-lucene/lucene-wrapper.h
--- a/src/plugins/fts-lucene/lucene-wrapper.h	Thu Apr 17 16:35:05 2014 +0200
+++ b/src/plugins/fts-lucene/lucene-wrapper.h	Thu Apr 17 16:42:02 2014 +0200
@@ -12,7 +12,7 @@
 
 struct lucene_index_record {
 	guid_128_t mailbox_guid;
-	uint32_t uid;
+	uint32_t uid, part_num;
 };
 
 HASH_TABLE_DEFINE_TYPE(wguid_result, wchar_t *, struct fts_result *);
@@ -31,8 +31,8 @@
 
 int lucene_index_build_init(struct lucene_index *index);
 int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
-			    const unsigned char *data, size_t size,
-			    const char *hdr_name);
+			    uint32_t part_num, const unsigned char *data,
+			    size_t size, const char *hdr_name);
 int lucene_index_build_deinit(struct lucene_index *index);
 
 void lucene_index_close(struct lucene_index *index);


More information about the dovecot-cvs mailing list