[dovecot-cvs] dovecot/src/lib-storage/index index-search.c,1.41,1.42

cras at procontrol.fi cras at procontrol.fi
Wed Nov 13 13:08:20 EET 2002


Update of /home/cvs/dovecot/src/lib-storage/index
In directory danu:/tmp/cvs-serv7219/lib-storage/index

Modified Files:
	index-search.c 
Log Message:
SEARCH CHARSET now works properly with message bodies, and in general body
searching works more correctly by decoding base64/qp data. Non-text MIME
parts are currently not included in search, that could be made optional.
Also the body is parsed separately for each keyword, that could be
optimized.

Changed base64_decode() behaviour so that it can accept non-base64 data as
well, ie. line feeds etc.



Index: index-search.c
===================================================================
RCS file: /home/cvs/dovecot/src/lib-storage/index/index-search.c,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -d -r1.41 -r1.42
--- index-search.c	3 Nov 2002 10:11:47 -0000	1.41
+++ index-search.c	13 Nov 2002 11:08:18 -0000	1.42
@@ -7,6 +7,7 @@
 #include "rfc822-tokenize.h"
 #include "rfc822-date.h"
 #include "message-size.h"
+#include "message-body-search.h"
 #include "message-header-search.h"
 #include "imap-date.h"
 #include "imap-envelope.h"
@@ -24,6 +25,9 @@
 		(arg)->result = !(arg)->not ? (res) : -(res); \
 	} STMT_END
 
+#define TXT_UNKNOWN_CHARSET "Unknown charset"
+#define TXT_INVALID_SEARCH_KEY "Invalid search key"
+
 typedef struct {
 	Pool hdr_pool;
 	IndexMailbox *ibox;
@@ -44,12 +48,10 @@
 } SearchHeaderContext;
 
 typedef struct {
-	MailSearchArg *args;
-	const char *msg;
-	size_t size;
-
-	size_t max_searchword_len;
-} SearchTextContext;
+        SearchIndexContext *index_ctx;
+	IBuffer *inbuf;
+	MessagePart *part;
+} SearchBodyContext;
 
 static int msgset_contains(const char *set, unsigned int match_num,
 			   unsigned int max_num)
@@ -320,7 +322,7 @@
 						  &unknown_charset);
 	if (arg->context == NULL) {
 		ctx->error = unknown_charset ?
-			"Unknown charset" : "Invalid search key";
+			TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY;
 	}
 
 	return arg->context;
@@ -519,72 +521,28 @@
 	}
 }
 
-static void search_text(MailSearchArg *arg, SearchTextContext *ctx)
+static void search_body(MailSearchArg *arg, void *context)
 {
-	const char *p;
-	size_t i, len, max;
+	SearchBodyContext *ctx = context;
+	int ret, unknown_charset;
 
-	if (arg->result != 0)
+	if (ctx->index_ctx->error != NULL)
 		return;
 
-	len = strlen(arg->value.str);
-	if (len > ctx->max_searchword_len)
-		ctx->max_searchword_len = len;
+	if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY) {
+		i_buffer_seek(ctx->inbuf, 0);
+		ret = message_body_search(arg->value.str,
+					  ctx->index_ctx->charset,
+					  &unknown_charset, ctx->inbuf,
+					  ctx->part);
 
-	if (ctx->size >= len) {
-		max = ctx->size-len;
-		for (i = 0, p = ctx->msg; i <= max; i++, p++) {
-			if (i_toupper(*p) == arg->value.str[0] &&
-			    strncasecmp(p, arg->value.str, len) == 0) {
-				/* match */
-				ARG_SET_RESULT(arg, 1);
-				return;
-			}
+		if (ret < 0) {
+			ctx->index_ctx->error = unknown_charset ?
+				TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY;
 		}
-	}
-}
-
-static void search_text_body(MailSearchArg *arg, void *context)
-{
-	SearchTextContext *ctx = context;
 
-	if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY)
-		search_text(arg, ctx);
-}
-
-static void search_arg_match_data(IBuffer *inbuf, MailSearchArg *args,
-				  MailSearchForeachFunc search_func)
-{
-	SearchTextContext ctx;
-	const unsigned char *data;
-	size_t size, max_searchword_len;
-
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.args = args;
-
-	/* first get the max. search keyword length */
-	mail_search_args_foreach(args, search_func, &ctx);
-        max_searchword_len = ctx.max_searchword_len;
-
-	/* do this in blocks: read data, compare it for all search words, skip
-	   for block size - (strlen(largest_searchword)-1) and continue. */
-	while (i_buffer_read_data(inbuf, &data, &size,
-				  max_searchword_len-1) > 0) {
-		ctx.msg = (const char *) data;
-		ctx.size = size;
-		mail_search_args_foreach(args, search_func, &ctx);
-		i_buffer_skip(inbuf, size - (max_searchword_len-1));
-	}
-
-	if (size > 0) {
-		/* last block */
-		ctx.msg = (const char *) data;
-		ctx.size = size;
-		mail_search_args_foreach(args, search_func, &ctx);
-		i_buffer_skip(inbuf, size);
+		ARG_SET_RESULT(arg, ret > 0);
 	}
-
-	i_buffer_set_read_limit(inbuf, 0);
 }
 
 static int search_arg_match_text(MailSearchArg *args, SearchIndexContext *ctx)
@@ -606,22 +564,23 @@
 		SearchHeaderContext hdr_ctx;
 
 		memset(&hdr_ctx, 0, sizeof(hdr_ctx));
-
-		/* header checks */
 		hdr_ctx.index_context = ctx;
 		hdr_ctx.custom_header = TRUE;
 		hdr_ctx.args = args;
+
 		message_parse_header(NULL, inbuf, &hdr_size,
 				     search_header, &hdr_ctx);
 	}
 
 	if (have_text || have_body) {
-		if (inbuf->v_offset == 0) {
-			/* skip over headers */
-			i_buffer_skip(inbuf, hdr_size.physical_size);
-		}
+		SearchBodyContext body_ctx;
 
-		search_arg_match_data(inbuf, args, search_text_body);
+		memset(&body_ctx, 0, sizeof(body_ctx));
+		body_ctx.index_ctx = ctx;
+		body_ctx.inbuf = inbuf;
+		body_ctx.part = imap_msgcache_get_parts(search_open_cache(ctx));
+
+		mail_search_args_foreach(args, search_body, &body_ctx);
 	}
 	return TRUE;
 }




More information about the dovecot-cvs mailing list