dovecot-2.2: lib-fts: Fixed handling tokens that contain only ap...

dovecot at dovecot.org dovecot at dovecot.org
Thu May 21 10:39:16 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/5b902db0cabc
changeset: 18733:5b902db0cabc
user:      Timo Sirainen <tss at iki.fi>
date:      Thu May 21 06:35:59 2015 -0400
description:
lib-fts: Fixed handling tokens that contain only apostrophes

diffstat:

 src/lib-fts/fts-tokenizer-generic.c |  9 +++------
 src/lib-fts/test-fts-tokenizer.c    |  2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diffs (34 lines):

diff -r 6c655ce3b857 -r 5b902db0cabc src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c	Thu May 21 06:29:15 2015 -0400
+++ b/src/lib-fts/fts-tokenizer-generic.c	Thu May 21 06:35:59 2015 -0400
@@ -234,15 +234,12 @@
 		    fts_apostrophe_word_break(tok, c)) {
 			len = char_start_i - start;
 			tok_append_truncated(tok, data + start, len);
-			if (tok->token->used == 0) {
-				start = i + char_size;
-				continue;
-			}
-
-			if (fts_tokenizer_generic_simple_current_token(tok, token_r)) {
+			if (tok->token->used > 0 &&
+			    fts_tokenizer_generic_simple_current_token(tok, token_r)) {
 				*skip_r = i + char_size;
 				return 1;
 			}
+			start = i + char_size;
 		}
 	}
 	/* word boundary not found yet */
diff -r 6c655ce3b857 -r 5b902db0cabc src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c	Thu May 21 06:29:15 2015 -0400
+++ b/src/lib-fts/test-fts-tokenizer.c	Thu May 21 06:35:59 2015 -0400
@@ -29,7 +29,7 @@
 
 	"1.",
 
-	"'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
+	"' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
 
 	/* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
 	   U+205A(e2 81 9a) and U+205F(e2 81 9f) */


More information about the dovecot-cvs mailing list