dovecot-2.2-pigeonhole: lib-sieve: body extension: Properly impl...
pigeonhole at rename-it.nl
pigeonhole at rename-it.nl
Tue Oct 27 22:43:00 UTC 2015
details: http://hg.rename-it.nl/dovecot-2.2-pigeonhole/rev/d24a90790fc8
changeset: 2122:d24a90790fc8
user: Stephan Bosch <stephan at rename-it.nl>
date: Tue Oct 27 23:42:52 2015 +0100
description:
lib-sieve: body extension: Properly implemented the :text body transform.
It now extracts bare text from HTML/XHMTL parts.
Other text/* parts are still returned as is. Any other unrecognized content types are skipped.
diffstat:
Makefile.am | 1 +
README | 3 +-
src/lib-sieve/plugins/body/ext-body-common.c | 152 ++++++++++++++--------
src/lib-sieve/plugins/body/ext-body.c | 18 +--
tests/extensions/body/text.svtest | 184 +++++++++++++++++++++++++++
5 files changed, 284 insertions(+), 74 deletions(-)
diffs (truncated from 571 to 300 lines):
diff -r 400be376fe02 -r d24a90790fc8 Makefile.am
--- a/Makefile.am Wed Oct 21 23:31:06 2015 +0200
+++ b/Makefile.am Tue Oct 27 23:42:52 2015 +0100
@@ -109,6 +109,7 @@
tests/extensions/body/errors.svtest \
tests/extensions/body/raw.svtest \
tests/extensions/body/content.svtest \
+ tests/extensions/body/text.svtest \
tests/extensions/body/match-values.svtest \
tests/extensions/regex/basic.svtest \
tests/extensions/regex/match-values.svtest \
diff -r 400be376fe02 -r d24a90790fc8 README
--- a/README Wed Oct 21 23:31:06 2015 +0200
+++ b/README Tue Oct 27 23:42:52 2015 +0100
@@ -99,8 +99,7 @@
The following Sieve language extensions are also supported:
copy (RFC 3894): fully supported.
- body (RFC 5173): almost fully supported, but the text body-transform
- implementation is simple.
+ body (RFC 5173): fully supported.
environment (RFC 5183): fully supported (v0.4.0+).
variables (RFC 5229): fully supported.
vacation (RFC 5230): fully supported.
diff -r 400be376fe02 -r d24a90790fc8 src/lib-sieve/plugins/body/ext-body-common.c
--- a/src/lib-sieve/plugins/body/ext-body-common.c Wed Oct 21 23:31:06 2015 +0200
+++ b/src/lib-sieve/plugins/body/ext-body-common.c Tue Oct 27 23:42:52 2015 +0100
@@ -11,6 +11,7 @@
#include "message-date.h"
#include "message-parser.h"
#include "message-decoder.h"
+#include "mail-html2text.h"
#include "mail-storage.h"
#include "sieve-common.h"
@@ -21,11 +22,6 @@
#include "ext-body-common.h"
-/* FIXME: This implementation is largely borrowed from the original sieve-cmu.c
- * of the old cmusieve plugin. This nees work to match current specification of
- * the body extension.
- */
-
struct ext_body_part {
const char *content;
unsigned long size;
@@ -34,10 +30,10 @@
struct ext_body_part_cached {
const char *content_type;
- const char *raw_body;
const char *decoded_body;
- size_t raw_body_size;
+ const char *text_body;
size_t decoded_body_size;
+ size_t text_body_size;
bool have_body; /* there's the empty end-of-headers line */
};
@@ -105,7 +101,7 @@
static bool ext_body_get_return_parts
(struct ext_body_message_context *ctx, const char * const *wanted_types,
- bool decode_to_plain)
+ bool extract_text)
{
const struct ext_body_part_cached *body_parts;
unsigned int i, count;
@@ -139,16 +135,16 @@
* cache item is read. If it is missing, this function fails and the cache
* needs to be completed by ext_body_parts_add_missing().
*/
- if (decode_to_plain) {
+ if (extract_text) {
+ if (body_parts[i].text_body == NULL)
+ return FALSE;
+ return_part->content = body_parts[i].text_body;
+ return_part->size = body_parts[i].text_body_size;
+ } else {
if (body_parts[i].decoded_body == NULL)
return FALSE;
return_part->content = body_parts[i].decoded_body;
- return_part->size = body_parts[i].decoded_body_size;
- } else {
- if (body_parts[i].raw_body == NULL)
- return FALSE;
- return_part->content = body_parts[i].raw_body;
- return_part->size = body_parts[i].raw_body_size;
+ return_part->size = body_parts[i].decoded_body_size;
}
}
@@ -157,32 +153,52 @@
static void ext_body_part_save
(struct ext_body_message_context *ctx,
- struct ext_body_part_cached *body_part, bool decoded)
+ struct ext_body_part_cached *body_part, bool extract_text)
{
buffer_t *buf = ctx->tmp_buffer;
+ buffer_t *text_buf = NULL;
char *part_data;
size_t part_size;
/* Add terminating NUL to the body part buffer */
buffer_append_c(buf, '\0');
+ if ( extract_text ) {
+ if ( mail_html2text_content_type_match
+ (body_part->content_type) ) {
+ struct mail_html2text *html2text;
+
+ text_buf = buffer_create_dynamic(default_pool, 4096);
+
+ /* Remove HTML markup */
+ html2text = mail_html2text_init(0);
+ mail_html2text_more(html2text, buf->data, buf->used, text_buf);
+ mail_html2text_deinit(&html2text);
+
+ buf = text_buf;
+ }
+ }
+
part_data = p_malloc(ctx->pool, buf->used);
memcpy(part_data, buf->data, buf->used);
part_size = buf->used - 1;
- /* Depending on whether the part is decoded or not store message body in the
- * appropriate cache location.
+ if ( text_buf != NULL)
+ buffer_free(&text_buf);
+
+ /* Depending on whether the part is processed into text, store message
+ * body in the appropriate cache location.
*/
- if ( !decoded ) {
- body_part->raw_body = part_data;
- body_part->raw_body_size = part_size;
- } else {
+ if ( !extract_text ) {
body_part->decoded_body = part_data;
body_part->decoded_body_size = part_size;
+ } else {
+ body_part->text_body = part_data;
+ body_part->text_body_size = part_size;
}
/* Clear buffer */
- buffer_set_used_size(buf, 0);
+ buffer_set_used_size(ctx->tmp_buffer, 0);
}
static const char *_parse_content_type(const struct message_header_line *hdr)
@@ -214,8 +230,9 @@
static int ext_body_parts_add_missing
(const struct sieve_runtime_env *renv,
struct ext_body_message_context *ctx,
- const char *const *content_types, bool decode_to_plain)
+ const char *const *content_types, bool extract_text)
{
+ buffer_t *buf = ctx->tmp_buffer;
struct mail *mail = sieve_message_get_mail(renv->msgctx);
struct ext_body_part_cached *body_part = NULL, *header_part = NULL;
struct message_parser_ctx *parser;
@@ -229,7 +246,7 @@
int ret;
/* First check whether any are missing */
- if (ext_body_get_return_parts(ctx, content_types, decode_to_plain)) {
+ if (ext_body_get_return_parts(ctx, content_types, extract_text)) {
/* Cache hit; all are present */
return SIEVE_EXEC_OK;
}
@@ -248,10 +265,10 @@
t_array_init(&part_index, 8);
}
- buffer_set_used_size(ctx->tmp_buffer, 0);
+ buffer_set_used_size(buf, 0);
/* Initialize body decoder */
- decoder = decode_to_plain ? message_decoder_init(NULL, 0) : NULL;
+ decoder = message_decoder_init(NULL, 0);
//parser = message_parser_init_from_parts(parts, input, 0,
//MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS);
@@ -270,7 +287,7 @@
message_rfc822 = TRUE;
} else {
if ( save_body ) {
- ext_body_part_save(ctx, body_part, decoder != NULL);
+ ext_body_part_save(ctx, body_part, extract_text);
}
}
}
@@ -321,14 +338,13 @@
/* Reading headers */
/* Decode block */
- if ( decoder != NULL )
- (void)message_decoder_decode_next_block(decoder, &block, &decoded);
+ (void)message_decoder_decode_next_block(decoder, &block, &decoded);
/* Check for end of headers */
if ( block.hdr == NULL ) {
/* Save headers for message/rfc822 part */
if ( header_part != NULL ) {
- ext_body_part_save(ctx, header_part, decoder != NULL);
+ ext_body_part_save(ctx, header_part, extract_text);
header_part = NULL;
}
@@ -348,14 +364,14 @@
} else if ( header_part != NULL ) {
/* Save message/rfc822 header as part content */
if ( block.hdr->continued ) {
- buffer_append(ctx->tmp_buffer, block.hdr->value, block.hdr->value_len);
+ buffer_append(buf, block.hdr->value, block.hdr->value_len);
} else {
- buffer_append(ctx->tmp_buffer, block.hdr->name, block.hdr->name_len);
- buffer_append(ctx->tmp_buffer, block.hdr->middle, block.hdr->middle_len);
- buffer_append(ctx->tmp_buffer, block.hdr->value, block.hdr->value_len);
+ buffer_append(buf, block.hdr->name, block.hdr->name_len);
+ buffer_append(buf, block.hdr->middle, block.hdr->middle_len);
+ buffer_append(buf, block.hdr->value, block.hdr->value_len);
}
if ( !block.hdr->no_newline ) {
- buffer_append(ctx->tmp_buffer, "\r\n", 2);
+ buffer_append(buf, "\r\n", 2);
}
}
@@ -384,32 +400,27 @@
/* Reading body */
if ( save_body ) {
- if ( decoder != NULL ) {
- (void)message_decoder_decode_next_block(decoder, &block, &decoded);
- buffer_append(ctx->tmp_buffer, decoded.data, decoded.size);
- } else {
- buffer_append(ctx->tmp_buffer, block.data, block.size);
- }
+ (void)message_decoder_decode_next_block(decoder, &block, &decoded);
+ buffer_append(buf, decoded.data, decoded.size);
}
}
/* Save last body part if necessary */
if ( header_part != NULL ) {
- ext_body_part_save(ctx, header_part, decoder != NULL);
+ ext_body_part_save(ctx, header_part, FALSE);
} else if ( body_part != NULL && save_body ) {
- ext_body_part_save(ctx, body_part, decoder != NULL);
+ ext_body_part_save(ctx, body_part, extract_text);
}
/* Try to fill the return_body_parts array once more */
- have_all = ext_body_get_return_parts(ctx, content_types, decode_to_plain);
+ have_all = ext_body_get_return_parts(ctx, content_types, extract_text);
/* This time, failure is a bug */
i_assert(have_all);
/* Cleanup */
(void)message_parser_deinit(&parser, &parts);
- if (decoder != NULL)
- message_decoder_deinit(&decoder);
+ message_decoder_deinit(&decoder);
/* Return status */
if ( input->stream_errno != 0 ) {
@@ -453,7 +464,7 @@
static int ext_body_get_content
(const struct sieve_runtime_env *renv, const char * const *content_types,
- int decode_to_plain, struct ext_body_part **parts_r)
+ struct ext_body_part **parts_r)
{
const struct sieve_extension *this_ext = renv->oprtn->ext;
struct ext_body_message_context *ctx =
@@ -463,7 +474,41 @@
T_BEGIN {
/* Fill the return_body_parts array */
status = ext_body_parts_add_missing
- (renv, ctx, content_types, decode_to_plain != 0);
+ (renv, ctx, content_types, FALSE);
+ } T_END;
+
+ /* Check status */
+ if ( status <= 0 )
+ return status;
+
+ /* Return the array of body items */
+ (void) array_append_space(&ctx->return_body_parts); /* NULL-terminate */
+ *parts_r = array_idx_modifiable(&ctx->return_body_parts, 0);
+
+ return status;
+}
+
More information about the dovecot-cvs
mailing list