dovecot-2.2: lib-fts: Added fts_tokenizer_reset()

dovecot at dovecot.org dovecot at dovecot.org
Sat May 9 10:54:31 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/2dca6925bd88
changeset: 18580:2dca6925bd88
user:      Timo Sirainen <tss at iki.fi>
date:      Sat May 09 13:52:37 2015 +0300
description:
lib-fts: Added fts_tokenizer_reset()

diffstat:

 src/lib-fts/fts-tokenizer-address.c |  11 +++++++++++
 src/lib-fts/fts-tokenizer-generic.c |  14 ++++++++++++++
 src/lib-fts/fts-tokenizer-private.h |   1 +
 src/lib-fts/fts-tokenizer.c         |   5 +++++
 src/lib-fts/fts-tokenizer.h         |   4 ++++
 src/lib-fts/test-fts-tokenizer.c    |  19 ++++++++++++++++---
 6 files changed, 51 insertions(+), 3 deletions(-)

diffs (153 lines):

diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-address.c
--- a/src/lib-fts/fts-tokenizer-address.c	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c	Sat May 09 13:52:37 2015 +0300
@@ -197,6 +197,16 @@
 		str_append_n(tok->parent_data, data, size);
 }
 
+static void fts_tokenizer_email_address_reset(struct fts_tokenizer *_tok)
+{
+	struct email_address_fts_tokenizer *tok =
+		(struct email_address_fts_tokenizer *)_tok;
+
+	tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
+	str_truncate(tok->last_word, 0);
+	str_truncate(tok->parent_data, 0);
+}
+
 static int
 fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
                                  const unsigned char *data, size_t size,
@@ -290,6 +300,7 @@
 static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
 	fts_tokenizer_email_address_create,
 	fts_tokenizer_email_address_destroy,
+	fts_tokenizer_email_address_reset,
 	fts_tokenizer_email_address_next
 };
 
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-generic.c
--- a/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 13:52:37 2015 +0300
@@ -137,6 +137,17 @@
 	return is_word_break(c);
 }
 
+static void fts_tokenizer_generic_reset(struct fts_tokenizer *_tok)
+{
+	struct generic_fts_tokenizer *tok =
+		(struct generic_fts_tokenizer *)_tok;
+
+	tok->prev_letter = LETTER_TYPE_NONE;
+	tok->prev_prev_letter = LETTER_TYPE_NONE;
+	tok->last_size = 0;
+	buffer_set_used_size(tok->token, 0);
+}
+
 static int
 fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
                                   const unsigned char *data, size_t size,
@@ -580,6 +591,7 @@
 static const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs = {
 	fts_tokenizer_generic_create,
 	fts_tokenizer_generic_destroy,
+	fts_tokenizer_generic_reset,
 	fts_tokenizer_generic_next
 };
 
@@ -592,10 +604,12 @@
 const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple = {
 	fts_tokenizer_generic_create,
 	fts_tokenizer_generic_destroy,
+	fts_tokenizer_generic_reset,
 	fts_tokenizer_generic_next_simple
 };
 const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29 = {
 	fts_tokenizer_generic_create,
 	fts_tokenizer_generic_destroy,
+	fts_tokenizer_generic_reset,
 	fts_tokenizer_generic_next_tr29
 };
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer-private.h
--- a/src/lib-fts/fts-tokenizer-private.h	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-private.h	Sat May 09 13:52:37 2015 +0300
@@ -10,6 +10,7 @@
 		      struct fts_tokenizer **tokenizer_r, const char **error_r);
 	void (*destroy)(struct fts_tokenizer *tok);
 
+	void (*reset)(struct fts_tokenizer *tok);
 	int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
 	            size_t size, size_t *skip_r, const char **token_r);
 };
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer.c
--- a/src/lib-fts/fts-tokenizer.c	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c	Sat May 09 13:52:37 2015 +0300
@@ -157,6 +157,11 @@
 	return ret;
 }
 
+void fts_tokenizer_reset(struct fts_tokenizer *tok)
+{
+	tok->v->reset(tok);
+}
+
 int
 fts_tokenizer_next(struct fts_tokenizer *tok,
                    const unsigned char *data, size_t size, const char **token_r)
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/fts-tokenizer.h
--- a/src/lib-fts/fts-tokenizer.h	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h	Sat May 09 13:52:37 2015 +0300
@@ -63,6 +63,9 @@
 void fts_tokenizer_ref(struct fts_tokenizer *tok);
 void fts_tokenizer_unref(struct fts_tokenizer **tok);
 
+/* Reset FTS tokenizer state */
+void fts_tokenizer_reset(struct fts_tokenizer *tok);
+
 /*
    Returns 1 if token was returned, 0 if input was non-blocking and
    more data is needed, -1 if EOF/error.
@@ -82,4 +85,5 @@
                    const unsigned char *data, size_t size, const char **token_r);
 
 const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
+
 #endif
diff -r 363397c3701e -r 2dca6925bd88 src/lib-fts/test-fts-tokenizer.c
--- a/src/lib-fts/test-fts-tokenizer.c	Sat May 09 13:46:37 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Sat May 09 13:52:37 2015 +0300
@@ -555,18 +555,31 @@
 	}
 	test_assert(*eopp == NULL);
 
+	/* make sure state is forgotten at EOF */
 	test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token) == 0);
-	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+		    strcmp(token, "foo") == 0);
 	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
 
 	test_assert(fts_tokenizer_next(tok, (const void *)"bar at baz", 7, &token) == 0);
-	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+		    strcmp(token, "bar at baz") == 0);
 	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
 
 	test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token) == 0);
-	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0);
+	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+		    strcmp(token, "foo") == 0);
 	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
 
+	/* test reset explicitly */
+	test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token) == 0);
+	fts_tokenizer_reset(tok);
+	test_assert(fts_tokenizer_next(tok, (const void *)"b at c", 3, &token) == 0);
+	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 &&
+		    strcmp(token, "b at c") == 0);
+	test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0);
+
+
 	fts_tokenizer_unref(&tok);
 	fts_tokenizer_unref(&gen_tok);
 	fts_tokenizers_deinit();


More information about the dovecot-cvs mailing list