dovecot-2.2: lib-storage: Added MAIL_FETCH_BODY_SNIPPET.
dovecot at dovecot.org
dovecot at dovecot.org
Fri Jan 16 22:33:42 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/a76507bc3f36
changeset: 18159:a76507bc3f36
user: Timo Sirainen <tss at iki.fi>
date: Sat Jan 17 00:31:35 2015 +0200
description:
lib-storage: Added MAIL_FETCH_BODY_SNIPPET.
diffstat:
src/lib-storage/index/index-mail-headers.c | 1 +
src/lib-storage/index/index-mail.c | 155 ++++++++++++++++++++++++++++-
src/lib-storage/index/index-mail.h | 4 +-
src/lib-storage/mail-storage.h | 3 +-
4 files changed, 159 insertions(+), 4 deletions(-)
diffs (truncated from 306 to 300 lines):
diff -r 08afd516a622 -r a76507bc3f36 src/lib-storage/index/index-mail-headers.c
--- a/src/lib-storage/index/index-mail-headers.c Sat Jan 17 00:24:36 2015 +0200
+++ b/src/lib-storage/index/index-mail-headers.c Sat Jan 17 00:31:35 2015 +0200
@@ -383,6 +383,7 @@
mail->data.save_sent_date = TRUE;
mail->data.save_bodystructure_header = TRUE;
mail->data.save_bodystructure_body = TRUE;
+ mail->data.save_body_snippet = TRUE;
mail->data.tee_stream = tee_i_stream_create(input);
input = tee_i_stream_create_child(mail->data.tee_stream);
diff -r 08afd516a622 -r a76507bc3f36 src/lib-storage/index/index-mail.c
--- a/src/lib-storage/index/index-mail.c Sat Jan 17 00:24:36 2015 +0200
+++ b/src/lib-storage/index/index-mail.c Sat Jan 17 00:31:35 2015 +0200
@@ -9,6 +9,7 @@
#include "message-date.h"
#include "message-part-serialize.h"
#include "message-parser.h"
+#include "message-snippet.h"
#include "imap-bodystructure.h"
#include "imap-envelope.h"
#include "mail-cache.h"
@@ -19,6 +20,9 @@
#include <fcntl.h>
+#define BODY_SNIPPET_ALGO_V1 "1"
+#define BODY_SNIPPET_MAX_CHARS 100
+
struct mail_cache_field global_cache_fields[MAIL_INDEX_CACHE_FIELD_COUNT] = {
{ .name = "flags",
.type = MAIL_CACHE_FIELD_BITMASK,
@@ -54,6 +58,8 @@
{ .name = "mime.parts",
.type = MAIL_CACHE_FIELD_VARIABLE_SIZE },
{ .name = "binary.parts",
+ .type = MAIL_CACHE_FIELD_VARIABLE_SIZE },
+ { .name = "body.snippet",
.type = MAIL_CACHE_FIELD_VARIABLE_SIZE }
};
@@ -766,6 +772,9 @@
case MAIL_CACHE_PHYSICAL_FULL_SIZE:
fetch_field = MAIL_FETCH_PHYSICAL_SIZE;
break;
+ case MAIL_CACHE_BODY_SNIPPET:
+ fetch_field = MAIL_FETCH_BODY_SNIPPET;
+ break;
default:
i_unreached();
}
@@ -783,6 +792,16 @@
}
}
+static void index_mail_body_parsed_cache_body_snippet(struct index_mail *mail)
+{
+ if (mail->data.body_snippet != NULL &&
+ index_mail_want_cache(mail, MAIL_CACHE_BODY_SNIPPET)) {
+ index_mail_cache_add(mail, MAIL_CACHE_BODY_SNIPPET,
+ mail->data.body_snippet,
+ strlen(mail->data.body_snippet)+1);
+ }
+}
+
static void index_mail_cache_sizes(struct index_mail *mail)
{
static enum index_cache_field size_fields[] = {
@@ -832,6 +851,86 @@
(void)index_mail_cache_sent_date(mail);
}
+static struct message_part *
+index_mail_find_first_text_mime_part(struct message_part *parts)
+{
+ struct message_part_body_data *body_data = parts->context;
+ struct message_part *part;
+
+ i_assert(body_data != NULL);
+
+ if (body_data->content_type == NULL ||
+ strcasecmp(body_data->content_type, "\"text\"") == 0) {
+ /* use any text/ part, even if we don't know what exactly
+ it is. */
+ return parts;
+ }
+ if (strcasecmp(body_data->content_type, "\"multipart\"") != 0) {
+ /* for now we support only text Content-Types */
+ return NULL;
+ }
+
+ if (strcasecmp(body_data->content_subtype, "\"alternative\"") == 0) {
+ /* text/plain > text/html > text/ */
+ struct message_part *html_part = NULL, *text_part = NULL;
+
+ for (part = parts->children; part != NULL; part = part->next) {
+ struct message_part_body_data *sub_body_data =
+ part->context;
+
+ i_assert(sub_body_data != NULL);
+
+ if (strcasecmp(sub_body_data->content_type, "\"text\"") == 0) {
+ if (strcasecmp(sub_body_data->content_subtype, "\"plain\"") == 0)
+ return part;
+ if (strcasecmp(sub_body_data->content_subtype, "\"html\"") == 0)
+ html_part = part;
+ else
+ text_part = part;
+ }
+ }
+ return html_part != NULL ? html_part : text_part;
+ }
+ /* find the first usable MIME part */
+ for (part = parts->children; part != NULL; part = part->next) {
+ struct message_part *subpart =
+ index_mail_find_first_text_mime_part(part);
+ if (subpart != NULL)
+ return subpart;
+ }
+ return NULL;
+}
+
+static int index_mail_write_body_snippet(struct index_mail *mail)
+{
+ struct message_part *part;
+ struct istream *input;
+ string_t *str;
+ int ret;
+
+ i_assert(mail->data.parsed_bodystructure);
+
+ part = index_mail_find_first_text_mime_part(mail->data.parts);
+ if (part == NULL) {
+ mail->data.body_snippet = BODY_SNIPPET_ALGO_V1;
+ return 0;
+ }
+
+ if (mail_get_stream(&mail->mail.mail, NULL, NULL, &input) < 0)
+ return -1;
+ i_stream_seek(input, part->physical_pos);
+ input = i_stream_create_limit(input, part->header_size.physical_size +
+ part->body_size.physical_size);
+
+ str = str_new(mail->mail.data_pool, 128);
+ str_append(str, BODY_SNIPPET_ALGO_V1);
+ ret = message_snippet_generate(input, BODY_SNIPPET_MAX_CHARS, str);
+ if (ret == 0)
+ mail->data.body_snippet = str_c(str);
+ i_stream_destroy(&input);
+ return ret;
+}
+
static int
index_mail_parse_body_finish(struct index_mail *mail,
enum index_cache_field field, bool success)
@@ -884,6 +983,11 @@
mail->data.save_bodystructure_body = FALSE;
i_assert(mail->data.parts != NULL);
}
+ if (mail->data.save_body_snippet) {
+ if (index_mail_write_body_snippet(mail) < 0)
+ return -1;
+ mail->data.save_body_snippet = FALSE;
+ }
if (mail->data.no_caching) {
/* if we're here because we aborted parsing, don't get any
@@ -897,6 +1001,7 @@
index_mail_body_parsed_cache_flags(mail);
index_mail_body_parsed_cache_message_parts(mail);
index_mail_body_parsed_cache_bodystructure(mail, field);
+ index_mail_body_parsed_cache_body_snippet(mail);
index_mail_cache_sizes(mail);
index_mail_cache_dates(mail);
return 0;
@@ -1089,13 +1194,14 @@
struct index_mail_data *data = &mail->data;
string_t *str;
- if (data->parsed_bodystructure) {
+ if (data->parsed_bodystructure && field != MAIL_CACHE_BODY_SNIPPET) {
/* we have everything parsed already, but just not written to
a string */
index_mail_body_parsed_cache_bodystructure(mail, field);
} else {
if (data->save_bodystructure_header ||
- !data->save_bodystructure_body) {
+ !data->save_bodystructure_body ||
+ field == MAIL_CACHE_BODY_SNIPPET) {
/* we haven't parsed the header yet */
data->save_bodystructure_header = TRUE;
data->save_bodystructure_body = TRUE;
@@ -1129,6 +1235,10 @@
data->bodystructure = str_c(str);
}
break;
+ case MAIL_CACHE_BODY_SNIPPET:
+ i_assert(data->body_snippet != NULL &&
+ data->body_snippet[0] != '\0');
+ break;
default:
i_unreached();
}
@@ -1146,6 +1256,35 @@
str_append(str, " NIL NIL NIL NIL");
}
+static int
+index_mail_fetch_body_snippet(struct index_mail *mail, const char **value_r)
+{
+ const struct mail_cache_field *cache_fields = mail->ibox->cache_fields;
+ const unsigned int cache_field =
+ cache_fields[MAIL_CACHE_BODY_SNIPPET].idx;
+ string_t *str;
+
+ if (mail->data.body_snippet == NULL) {
+ str = str_new(mail->mail.data_pool, 128);
+ if (index_mail_cache_lookup_field(mail, str, cache_field) > 0 &&
+ str_len(str) > 0)
+ mail->data.body_snippet = str_c(str);
+ }
+ if (mail->data.body_snippet != NULL) {
+ *value_r = mail->data.body_snippet;
+ return 0;
+ }
+
+ /* reuse the IMAP bodystructure parsing code to get all the useful
+ headers that we need. */
+ mail->data.save_body_snippet = TRUE;
+ if (index_mail_parse_bodystructure(mail, MAIL_CACHE_BODY_SNIPPET) < 0)
+ return -1;
+ i_assert(mail->data.body_snippet != NULL);
+ *value_r = mail->data.body_snippet;
+ return 0;
+}
+
int index_mail_get_special(struct mail *_mail,
enum mail_fetch_field field, const char **value_r)
{
@@ -1250,6 +1389,8 @@
*value_r = data->from_envelope != NULL ?
data->from_envelope : "";
return 0;
+ case MAIL_FETCH_BODY_SNIPPET:
+ return index_mail_fetch_body_snippet(mail, value_r);
case MAIL_FETCH_UIDL_FILE_NAME:
case MAIL_FETCH_UIDL_BACKEND:
case MAIL_FETCH_SEARCH_RELEVANCY:
@@ -1520,6 +1661,16 @@
data->save_sent_date = TRUE;
}
}
+ if ((data->wanted_fields & MAIL_FETCH_BODY_SNIPPET) != 0) {
+ const unsigned int cache_field =
+ cache_fields[MAIL_CACHE_BODY_SNIPPET].idx;
+
+ if (mail_cache_field_exists(cache_view, _mail->seq,
+ cache_field) <= 0) {
+ data->access_part |= PARSE_HDR | PARSE_BODY;
+ data->save_body_snippet = TRUE;
+ }
+ }
if ((data->wanted_fields & (MAIL_FETCH_STREAM_HEADER |
MAIL_FETCH_STREAM_BODY)) != 0) {
if ((data->wanted_fields & MAIL_FETCH_STREAM_HEADER) != 0)
diff -r 08afd516a622 -r a76507bc3f36 src/lib-storage/index/index-mail.h
--- a/src/lib-storage/index/index-mail.h Sat Jan 17 00:24:36 2015 +0200
+++ b/src/lib-storage/index/index-mail.h Sat Jan 17 00:31:35 2015 +0200
@@ -23,6 +23,7 @@
MAIL_CACHE_GUID,
MAIL_CACHE_MESSAGE_PARTS,
MAIL_CACHE_BINARY_PARTS,
+ MAIL_CACHE_BODY_SNIPPET,
MAIL_INDEX_CACHE_FIELD_COUNT
};
@@ -83,7 +84,7 @@
struct message_part *parts;
struct message_binary_part *bin_parts;
const char *envelope, *body, *bodystructure, *guid, *filename;
- const char *from_envelope;
+ const char *from_envelope, *body_snippet;
struct message_part_envelope_data *envelope_data;
uint32_t seq;
@@ -114,6 +115,7 @@
unsigned int save_bodystructure_header:1;
unsigned int save_bodystructure_body:1;
unsigned int save_message_parts:1;
+ unsigned int save_body_snippet:1;
unsigned int stream_has_only_header:1;
unsigned int parsed_bodystructure:1;
unsigned int hdr_size_set:1;
diff -r 08afd516a622 -r a76507bc3f36 src/lib-storage/mail-storage.h
--- a/src/lib-storage/mail-storage.h Sat Jan 17 00:24:36 2015 +0200
+++ b/src/lib-storage/mail-storage.h Sat Jan 17 00:31:35 2015 +0200
@@ -154,7 +154,8 @@
MAIL_FETCH_SEARCH_RELEVANCY = 0x00100000,
MAIL_FETCH_GUID = 0x00200000,
MAIL_FETCH_POP3_ORDER = 0x00400000,
More information about the dovecot-cvs
mailing list