[dovecot-cvs] dovecot/src/plugins/fts-lucene .cvsignore, NONE, 1.1 Makefile.am, NONE, 1.1 fts-backend-lucene.c, NONE, 1.1 fts-lucene-plugin.c, NONE, 1.1 fts-lucene-plugin.h, NONE, 1.1 lucene-wrapper.cc, NONE, 1.1 lucene-wrapper.h, NONE, 1.1

cras at dovecot.org cras at dovecot.org
Mon Sep 18 02:15:56 EEST 2006


Update of /var/lib/cvs/dovecot/src/plugins/fts-lucene
In directory talvi:/tmp/cvs-serv17064/src/plugins/fts-lucene

Added Files:
	.cvsignore Makefile.am fts-backend-lucene.c 
	fts-lucene-plugin.c fts-lucene-plugin.h lucene-wrapper.cc 
	lucene-wrapper.h 
Log Message:
--with-lucene now enables lucene full text search indexing. Note that using
it breaks IMAP RFC. It also seems to have problems finding texts that have
special characters in them, such as email addresses.

You can anyway enable it by loading fts and fts_lucene plugins and setting
fts=lucene in plugin section.



--- NEW FILE: .cvsignore ---
*.la
*.lo
*.o
.deps
.libs
Makefile
Makefile.in
so_locations

--- NEW FILE: Makefile.am ---
AM_CPPFLAGS = \
	-I$(top_srcdir)/src/lib \
	-I$(top_srcdir)/src/lib-mail \
	-I$(top_srcdir)/src/lib-storage \
	-I$(top_srcdir)/src/plugins/fts \
	-I/usr/lib

lib02_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version

module_LTLIBRARIES = \
	lib02_fts_lucene_plugin.la

lib02_fts_lucene_plugin_la_LIBADD = \
	-lclucene

lib02_fts_lucene_plugin_la_SOURCES = \
	fts-lucene-plugin.c \
	fts-backend-lucene.c \
	lucene-wrapper.cc

noinst_HEADERS = \
	fts-lucene-plugin.h

install-exec-local:
	for d in imap lda; do \
	  $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
	  rm -f $(DESTDIR)$(moduledir)/$$d/lib02_fts_lucene_plugin.so; \
	  $(LN_S) ../lib02_fts_lucene_plugin.so $(DESTDIR)$(moduledir)/$$d; \
	done

--- NEW FILE: fts-backend-lucene.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "lucene-wrapper.h"
#include "fts-lucene-plugin.h"

struct lucene_fts_backend {
	struct fts_backend backend;
	struct lucene_index *index;

	uint32_t last_uid;
};

static struct fts_backend *fts_backend_lucene_init(const char *path)
{
	struct lucene_fts_backend *backend;

	backend = i_new(struct lucene_fts_backend, 1);
	backend->backend = fts_backend_lucene;
	backend->index = lucene_index_init(path);
	return &backend->backend;
}

static void fts_backend_lucene_deinit(struct fts_backend *_backend)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)_backend;

	lucene_index_deinit(backend->index);
	i_free(backend);
}

static struct fts_backend_build_context *
fts_backend_lucene_build_init(struct fts_backend *_backend, uint32_t *last_uid_r)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)_backend;
	struct fts_backend_build_context *ctx;

	ctx = i_new(struct fts_backend_build_context, 1);
	ctx->backend = _backend;
	if (lucene_index_build_init(backend->index, &backend->last_uid) < 0)
		ctx->failed = TRUE;

	*last_uid_r = backend->last_uid;
	return ctx;
}

static int
fts_backend_lucene_build_more(struct fts_backend_build_context *ctx,
			      uint32_t uid, const unsigned char *data,
			      size_t size)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)ctx->backend;

	if (ctx->failed)
		return -1;

	i_assert(uid >= backend->last_uid);
	backend->last_uid = uid;

	return lucene_index_build_more(backend->index, uid, data, size);
}

static int
fts_backend_lucene_build_deinit(struct fts_backend_build_context *ctx)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)ctx->backend;
	int ret = ctx->failed ? -1 : 0;

	lucene_index_build_deinit(backend->index);
	i_free(ctx);
	return ret;
}

static int
fts_backend_lucene_lookup(struct fts_backend *_backend, const char *key,
			 ARRAY_TYPE(seq_range) *result)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)_backend;

	return lucene_index_lookup(backend->index, key, result);
}

static int
fts_backend_lucene_filter(struct fts_backend *_backend, const char *key,
			 ARRAY_TYPE(seq_range) *result)
{
	struct lucene_fts_backend *backend =
		(struct lucene_fts_backend *)_backend;

	return lucene_index_filter(backend->index, key, result);
}

struct fts_backend fts_backend_lucene = {
	"lucene",
	TRUE,

	{
		fts_backend_lucene_init,
		fts_backend_lucene_deinit,
		fts_backend_lucene_build_init,
		fts_backend_lucene_build_more,
		fts_backend_lucene_build_deinit,
		fts_backend_lucene_lookup,
		fts_backend_lucene_filter
	}
};

--- NEW FILE: fts-lucene-plugin.c ---
/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "fts-lucene-plugin.h"

void fts_lucene_plugin_init(void)
{
	fts_backend_register(&fts_backend_lucene);
}

void fts_lucene_plugin_deinit(void)
{
	fts_backend_unregister(fts_backend_lucene.name);
}

--- NEW FILE: fts-lucene-plugin.h ---
#ifndef __FTS_LUCENE_PLUGIN_H
#define __FTS_LUCENE_PLUGIN_H

#include "fts-api-private.h"

extern struct fts_backend fts_backend_lucene;

void fts_lucene_plugin_init(void);
void fts_lucene_plugin_deinit(void);

#endif

--- NEW FILE: lucene-wrapper.cc ---
/* Copyright (C) 2006 Timo Sirainen */

extern "C" {
#include "lib.h"
#include "str-sanitize.h"
#include "lucene-wrapper.h"
};
#include <CLucene.h>

using namespace lucene::document;
using namespace lucene::index;
using namespace lucene::search;
using namespace lucene::queryParser;

struct lucene_index {
	char *path;

	IndexReader *reader;
	IndexWriter *writer;
	IndexSearcher *searcher;
	lucene::analysis::standard::StandardAnalyzer *analyzer;

	Document *doc;
	uint32_t prev_uid, last_uid;
};

static const uint8_t utf8_skip_table[256] = {
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};

struct lucene_index *lucene_index_init(const char *path)
{
	struct lucene_index *index;

	index = i_new(struct lucene_index, 1);
	index->path = i_strdup(path);
	return index;
}

static void lucene_index_close(struct lucene_index *index)
{
	_CLDELETE(index->reader);
	_CLDELETE(index->writer);
	_CLDELETE(index->searcher);
	_CLDELETE(index->analyzer);
}

void lucene_index_deinit(struct lucene_index *index)
{
	lucene_index_close(index);
	i_free(index->path);
	i_free(index);
}

static int lucene_index_open(struct lucene_index *index)
{
	if (index->reader != NULL)
		return 1;

	if (!IndexReader::indexExists(index->path))
		return 0;

	try {
		index->reader = IndexReader::open(index->path);
	} catch (CLuceneError &err) {
		i_error("lucene: IndexReader::open(%s): %s", index->path, err.what());
		return -1;
	}
	return 1;
}

static int lucene_index_open_search(struct lucene_index *index)
{
	int ret;

	if (index->searcher != NULL)
		return 1;

	if ((ret = lucene_index_open(index)) <= 0)
		return ret;

	if (index->analyzer == NULL) {
		index->analyzer =
			_CLNEW lucene::analysis::standard::StandardAnalyzer();
	}

	index->searcher = _CLNEW IndexSearcher(index->reader);
	return 1;
}

static int lucene_doc_get_uid(struct lucene_index *index,
			      Document *doc, uint32_t *uid_r)
{
	Field *field = doc->getField(_T("uid"));
	TCHAR *uid = field == NULL ? NULL : field->stringValue();
	if (uid == NULL) {
		i_error("lucene: Corrupted FTS index %s: No UID for document",
			index->path);
		return -1;
	}

	uint32_t num = 0;
	while (*uid != 0) {
		num = num*10 + (*uid - '0');
		uid++;
	}
	*uid_r = num;
	return 0;
}

static int
lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
{
	int32_t max_docnum = index->reader->maxDoc();

	if (max_docnum == 0) {
		*last_uid_r = 0;
		return 0;
	}

	Document *doc = index->reader->document(max_docnum-1);
	if (lucene_doc_get_uid(index, doc, last_uid_r) < 0) {
		_CLDELETE(doc);
		return -1;
	}
	_CLDELETE(doc);
	return 0;
}

int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
{
	uint32_t last_uid = 0;

	if (lucene_index_open(index) < 0)
		return -1;

	if (index->reader == NULL)
		index->last_uid = 0;
	else {
		if (lucene_index_get_last_uid(index, &index->last_uid) < 0)
			return -1;
	}
	*last_uid_r = index->last_uid;

	if (index->writer != NULL)
		return 0;

	bool exists = IndexReader::indexExists(index->path);
	index->analyzer = _CLNEW lucene::analysis::standard::StandardAnalyzer();
	try {
		index->writer = _CLNEW IndexWriter(index->path,
						   index->analyzer, !exists);
	} catch (CLuceneError &err) {
		i_error("lucene: IndexWriter(%s) failed: %s",
			index->path, err.what());
		return -1;
	}

	index->writer->setMaxFieldLength(MAX_INT_STRLEN);
	return 0;
}

static unsigned int utf8_strlen_n(const void *datap, size_t size)
{
	const unsigned char *data = (const unsigned char *)datap;
	const unsigned char *end = data + size;
	unsigned int skip, len = 0;
	size_t i;

	for (i = 0; i < size && data[i] != '\0'; ) {
		i += utf8_skip_table[data[i] & 0xff];
		i_assert(i <= size);
		len++;
	}
	return len;
}

static int lucene_index_build_flush(struct lucene_index *index)
{
	int ret = 0;

	if (index->doc == NULL)
		return 0;

	try {
		index->writer->addDocument(index->doc);
	} catch (CLuceneError &err) {
		i_error("lucene: IndexWriter::addDocument(%s) failed: %s",
			index->path, err.what());
		ret = -1;
	}

	_CLDELETE(index->doc);
	index->doc = NULL;
	return ret;
}

int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
			    const unsigned char *data, size_t size)
{
	unsigned int len;
	char id[MAX_INT_STRLEN];

	i_assert(uid > index->last_uid);
	i_assert(size > 0);

	len = utf8_strlen_n(data, size);
	wchar_t dest[len+1];
	lucene_utf8towcs(dest, (const char *)data, len + 1);

	if (uid != index->prev_uid) {
		char id[MAX_INT_STRLEN];
		TCHAR tid[MAX_INT_STRLEN];

		if (lucene_index_build_flush(index) < 0)
			return -1;
		index->prev_uid = uid;

		index->doc = _CLNEW Document();
		i_snprintf(id, sizeof(id), "%u", uid);
		STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
		index->doc->add(*Field::Text(_T("uid"), tid));
	}

	index->doc->add(*Field::Text(_T("contents"), dest));
	index->writer->addDocument(index->doc);
	return 0;
}

int lucene_index_build_deinit(struct lucene_index *index)
{
	int ret = 0;

	index->prev_uid = 0;
	if (index->writer == NULL) {
		lucene_index_close(index);
		return -1;
	}

	if (lucene_index_build_flush(index) < 0)
		ret = -1;

	try {
		index->writer->optimize();
	} catch (CLuceneError &err) {
		i_error("lucene: IndexWriter::optimize(%s) failed: %s",
			index->path, err.what());
		ret = -1;
	}
	try {
		index->writer->close();
	} catch (CLuceneError &err) {
		i_error("lucene: IndexWriter::close(%s) failed: %s",
			index->path, err.what());
		ret = -1;
	}

	lucene_index_close(index);
	return ret;
}

int lucene_index_lookup(struct lucene_index *index, const char *key,
			ARRAY_TYPE(seq_range) *result)
{
	const char *quoted_key;
	int ret = 0;

	if (lucene_index_open_search(index) <= 0)
		return -1;

	t_push();
	quoted_key = t_strdup_printf("\"%s\"", key);
	unsigned int len = utf8_strlen_n(quoted_key, (size_t)-1);
	wchar_t tkey[len + 1];
	lucene_utf8towcs(tkey, quoted_key, len + 1);
	t_pop();

	Query *query = NULL;
	try {
		query = QueryParser::parse(tkey, _T("contents"),
					   index->analyzer);
	} catch (CLuceneError &err) {
		i_error("lucene: QueryParser::parse(%s) failed: %s",
			str_sanitize(key, 40), err.what());
		lucene_index_close(index);
		return -1;
	}

	try {
		Hits *hits = index->searcher->search(query);

		for (int32_t i = 0; i < hits->length(); i++) {
			uint32_t uid;

			if (lucene_doc_get_uid(index, &hits->doc(i),
					       &uid) < 0) {
				ret = -1;
				break;
			}

			seq_range_array_add(result, 0, uid);
		}
		_CLDELETE(hits);
	} catch (CLuceneError &err) {
		i_error("lucene: search(%s) failed: %s",
			index->path, err.what());
		ret = -1;
	}

	_CLDELETE(query);
	lucene_index_close(index);
	return ret;
}

int lucene_index_filter(struct lucene_index *index, const char *key,
			ARRAY_TYPE(seq_range) *result)
{
	/* FIXME: implement */
	return -1;
}

--- NEW FILE: lucene-wrapper.h ---
#ifndef __LUCENE_WRAPPER_H
#define __LUCENE_WRAPPER_H

#include "fts-api-private.h"

struct lucene_index *lucene_index_init(const char *path);
void lucene_index_deinit(struct lucene_index *index);

int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r);
int lucene_index_build_more(struct lucene_index *index, uint32_t uid,
			    const unsigned char *data, size_t size);
int lucene_index_build_deinit(struct lucene_index *index);

int lucene_index_lookup(struct lucene_index *index, const char *key,
			ARRAY_TYPE(seq_range) *result);
int lucene_index_filter(struct lucene_index *index, const char *key,
			ARRAY_TYPE(seq_range) *result);

#endif



More information about the dovecot-cvs mailing list