dovecot: Add only valid UTF-8 data to Squat index.

dovecot at dovecot.org dovecot at dovecot.org
Mon Jan 14 06:53:01 EET 2008


details:   http://hg.dovecot.org/dovecot/rev/f1220b37d7f7
changeset: 7160:f1220b37d7f7
user:      Timo Sirainen <tss at iki.fi>
date:      Mon Jan 14 06:52:53 2008 +0200
description:
Add only valid UTF-8 data to Squat index.

diffstat:

1 file changed, 12 insertions(+), 2 deletions(-)
src/plugins/fts-squat/squat-test.c |   14 ++++++++++++--

diffs (46 lines):

diff -r 2de2058a5cdc -r f1220b37d7f7 src/plugins/fts-squat/squat-test.c
--- a/src/plugins/fts-squat/squat-test.c	Mon Jan 14 06:52:37 2008 +0200
+++ b/src/plugins/fts-squat/squat-test.c	Mon Jan 14 06:52:53 2008 +0200
@@ -4,6 +4,7 @@
 #include "array.h"
 #include "file-lock.h"
 #include "istream.h"
+#include "unichar.h"
 #include "squat-trie.h"
 #include "squat-uidlist.h"
 
@@ -39,6 +40,7 @@ int main(int argc ATTR_UNUSED, char *arg
 	struct stat trie_st, uidlist_st;
 	ARRAY_TYPE(seq_range) definite_uids, maybe_uids;
 	char *line, *str, buf[4096];
+	buffer_t *valid;
 	int ret, fd;
 	unsigned int last = 0, seq = 1, node_count, uidlist_count;
 	enum squat_index_type index_type;
@@ -66,6 +68,7 @@ int main(int argc ATTR_UNUSED, char *arg
 	if (squat_trie_build_init(trie, &last_uid, &build_ctx) < 0)
 		return 1;
 
+	valid = buffer_create_dynamic(default_pool, 4096);
 	input = i_stream_create_fd(fd, 0, FALSE);
 	ret = 0;
 	while (ret == 0 && (line = i_stream_read_next_line(input)) != NULL) {
@@ -111,9 +114,16 @@ int main(int argc ATTR_UNUSED, char *arg
 
 		index_type = data_header ? SQUAT_INDEX_TYPE_HEADER :
 			SQUAT_INDEX_TYPE_BODY;
-		ret = squat_trie_build_more(build_ctx, seq, index_type,
-					    (const void *)line, strlen(line));
+
+		buffer_set_used_size(valid, 0);
+		uni_utf8_get_valid_data((const unsigned char *)line,
+					strlen(line), valid);
+		if (valid->used > 0) {
+			ret = squat_trie_build_more(build_ctx, seq, index_type,
+						    valid->data, valid->used);
+		}
 	}
+	buffer_free(&valid);
 	if (squat_trie_build_deinit(&build_ctx) < 0)
 		ret = -1;
 	if (ret < 0) {


More information about the dovecot-cvs mailing list