dovecot-2.2-pigeonhole: lib-sieve: Moved message body parsing co...
pigeonhole at rename-it.nl
pigeonhole at rename-it.nl
Fri Nov 13 23:30:39 UTC 2015
details: http://hg.rename-it.nl/dovecot-2.2-pigeonhole/rev/d1e5a06fc9d7
changeset: 2130:d1e5a06fc9d7
user: Stephan Bosch <stephan at rename-it.nl>
date: Sat Nov 14 00:30:30 2015 +0100
description:
lib-sieve: Moved message body parsing code from body extension to Sieve core.
This makes this available for other extensions.
diffstat:
src/lib-sieve/plugins/body/ext-body-common.c | 582 +--------------------------
src/lib-sieve/sieve-message.c | 568 ++++++++++++++++++++++++++-
src/lib-sieve/sieve-message.h | 23 +-
3 files changed, 594 insertions(+), 579 deletions(-)
diffs (truncated from 1277 to 300 lines):
diff -r 334f0ba2fd9b -r d1e5a06fc9d7 src/lib-sieve/plugins/body/ext-body-common.c
--- a/src/lib-sieve/plugins/body/ext-body-common.c Mon Nov 02 18:56:39 2015 +0100
+++ b/src/lib-sieve/plugins/body/ext-body-common.c Sat Nov 14 00:30:30 2015 +0100
@@ -7,11 +7,6 @@
#include "array.h"
#include "str.h"
#include "istream.h"
-#include "rfc822-parser.h"
-#include "message-date.h"
-#include "message-parser.h"
-#include "message-decoder.h"
-#include "mail-html2text.h"
#include "mail-storage.h"
#include "sieve-common.h"
@@ -22,571 +17,6 @@
#include "ext-body-common.h"
-struct ext_body_part {
- const char *content;
- unsigned long size;
-};
-
-struct ext_body_part_cached {
- const char *content_type;
-
- const char *decoded_body;
- const char *text_body;
- size_t decoded_body_size;
- size_t text_body_size;
-
- bool have_body; /* there's the empty end-of-headers line */
-};
-
-struct ext_body_message_context {
- pool_t pool;
- ARRAY(struct ext_body_part_cached) cached_body_parts;
- ARRAY(struct ext_body_part) return_body_parts;
- buffer_t *tmp_buffer;
- buffer_t *raw_body;
-};
-
-static bool _is_wanted_content_type
-(const char * const *wanted_types, const char *content_type)
-{
- const char *subtype = strchr(content_type, '/');
- size_t type_len;
-
- type_len = ( subtype == NULL ? strlen(content_type) :
- (size_t)(subtype - content_type) );
-
- i_assert( wanted_types != NULL );
-
- for (; *wanted_types != NULL; wanted_types++) {
- const char *wanted_subtype;
-
- if (**wanted_types == '\0') {
- /* empty string matches everything */
- return TRUE;
- }
-
- wanted_subtype = strchr(*wanted_types, '/');
- if (wanted_subtype == NULL) {
- /* match only main type */
- if (strlen(*wanted_types) == type_len &&
- strncasecmp(*wanted_types, content_type, type_len) == 0)
- return TRUE;
- } else {
- /* match whole type/subtype */
- if (strcasecmp(*wanted_types, content_type) == 0)
- return TRUE;
- }
- }
- return FALSE;
-}
-
-static bool _want_multipart_content_type
-(const char * const *wanted_types)
-{
- for (; *wanted_types != NULL; wanted_types++) {
- if (**wanted_types == '\0') {
- /* empty string matches everything */
- return TRUE;
- }
-
- /* match only main type */
- if ( strncasecmp(*wanted_types, "multipart", 9) == 0 &&
- ( strlen(*wanted_types) == 9 || *(*wanted_types+9) == '/' ) )
- return TRUE;
- }
-
- return FALSE;
-}
-
-
-static bool ext_body_get_return_parts
-(struct ext_body_message_context *ctx, const char * const *wanted_types,
- bool extract_text)
-{
- const struct ext_body_part_cached *body_parts;
- unsigned int i, count;
- struct ext_body_part *return_part;
-
- /* Check whether any body parts are cached already */
- body_parts = array_get(&ctx->cached_body_parts, &count);
- if ( count == 0 )
- return FALSE;
-
- /* Clear result array */
- array_clear(&ctx->return_body_parts);
-
- /* Fill result array with requested content_types */
- for (i = 0; i < count; i++) {
- if (!body_parts[i].have_body) {
- /* Part has no body; according to RFC this MUST not match to anything and
- * therefore it is not included in the result.
- */
- continue;
- }
-
- /* Skip content types that are not requested */
- if (!_is_wanted_content_type(wanted_types, body_parts[i].content_type))
- continue;
-
- /* Add new item to the result */
- return_part = array_append_space(&ctx->return_body_parts);
-
- /* Depending on whether a decoded body part is requested, the appropriate
- * cache item is read. If it is missing, this function fails and the cache
- * needs to be completed by ext_body_parts_add_missing().
- */
- if (extract_text) {
- if (body_parts[i].text_body == NULL)
- return FALSE;
- return_part->content = body_parts[i].text_body;
- return_part->size = body_parts[i].text_body_size;
- } else {
- if (body_parts[i].decoded_body == NULL)
- return FALSE;
- return_part->content = body_parts[i].decoded_body;
- return_part->size = body_parts[i].decoded_body_size;
- }
- }
-
- return TRUE;
-}
-
-static void ext_body_part_save
-(struct ext_body_message_context *ctx,
- struct ext_body_part_cached *body_part, bool extract_text)
-{
- buffer_t *buf = ctx->tmp_buffer;
- buffer_t *text_buf = NULL;
- char *part_data;
- size_t part_size;
-
- /* Add terminating NUL to the body part buffer */
- buffer_append_c(buf, '\0');
-
- if ( extract_text ) {
- if ( mail_html2text_content_type_match
- (body_part->content_type) ) {
- struct mail_html2text *html2text;
-
- text_buf = buffer_create_dynamic(default_pool, 4096);
-
- /* Remove HTML markup */
- html2text = mail_html2text_init(0);
- mail_html2text_more(html2text, buf->data, buf->used, text_buf);
- mail_html2text_deinit(&html2text);
-
- buf = text_buf;
- }
- }
-
- part_data = p_malloc(ctx->pool, buf->used);
- memcpy(part_data, buf->data, buf->used);
- part_size = buf->used - 1;
-
- if ( text_buf != NULL)
- buffer_free(&text_buf);
-
- /* Depending on whether the part is processed into text, store message
- * body in the appropriate cache location.
- */
- if ( !extract_text ) {
- body_part->decoded_body = part_data;
- body_part->decoded_body_size = part_size;
- } else {
- body_part->text_body = part_data;
- body_part->text_body_size = part_size;
- }
-
- /* Clear buffer */
- buffer_set_used_size(ctx->tmp_buffer, 0);
-}
-
-static const char *_parse_content_type(const struct message_header_line *hdr)
-{
- struct rfc822_parser_context parser;
- string_t *content_type;
-
- /* Initialize parsing */
- rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
- (void)rfc822_skip_lwsp(&parser);
-
- /* Parse content type */
- content_type = t_str_new(64);
- if (rfc822_parse_content_type(&parser, content_type) < 0)
- return "";
-
- /* Content-type value must end here, otherwise it is invalid after all */
- (void)rfc822_skip_lwsp(&parser);
- if ( parser.data != parser.end && *parser.data != ';' )
- return "";
-
- /* Success */
- return str_c(content_type);
-}
-
-/* ext_body_parts_add_missing():
- * Add requested message body parts to the cache that are missing.
- */
-static int ext_body_parts_add_missing
-(const struct sieve_runtime_env *renv,
- struct ext_body_message_context *ctx,
- const char *const *content_types, bool extract_text)
-{
- buffer_t *buf = ctx->tmp_buffer;
- struct mail *mail = sieve_message_get_mail(renv->msgctx);
- struct ext_body_part_cached *body_part = NULL, *header_part = NULL;
- struct message_parser_ctx *parser;
- struct message_decoder_context *decoder;
- struct message_block block, decoded;
- struct message_part *parts, *prev_part = NULL;
- ARRAY(struct message_part *) part_index;
- struct istream *input;
- unsigned int idx = 0;
- bool save_body = FALSE, want_multipart, have_all;
- int ret;
-
- /* First check whether any are missing */
- if (ext_body_get_return_parts(ctx, content_types, extract_text)) {
- /* Cache hit; all are present */
- return SIEVE_EXEC_OK;
- }
-
- /* Get the message stream */
- if ( mail_get_stream(mail, NULL, NULL, &input) < 0 ) {
- return sieve_runtime_mail_error(renv, mail,
- "body test: failed to read input message");
- }
- if (mail_get_parts(mail, &parts) < 0) {
- return sieve_runtime_mail_error(renv, mail,
- "body test: failed to parse input message");
- }
-
- if ( (want_multipart=_want_multipart_content_type(content_types)) ) {
- t_array_init(&part_index, 8);
- }
-
- buffer_set_used_size(buf, 0);
-
- /* Initialize body decoder */
- decoder = message_decoder_init(NULL, 0);
-
- //parser = message_parser_init_from_parts(parts, input, 0,
- //MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS);
- parser = message_parser_init(ctx->pool, input, 0,
- MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS);
- while ( (ret = message_parser_parse_next_block(parser, &block)) > 0 ) {
-
- if ( block.part != prev_part ) {
- bool message_rfc822 = FALSE;
-
- /* Save previous body part */
- if ( body_part != NULL ) {
- /* Treat message/rfc822 separately; headers become content */
- if ( block.part->parent == prev_part &&
- strcmp(body_part->content_type, "message/rfc822") == 0 ) {
- message_rfc822 = TRUE;
- } else {
- if ( save_body ) {
- ext_body_part_save(ctx, body_part, extract_text);
- }
- }
- }
-
- /* Start processing next */
- body_part = array_idx_modifiable(&ctx->cached_body_parts, idx);
- body_part->content_type = "text/plain";
-
- /* Check whether this is the epilogue block of a wanted multipart part */
- if ( want_multipart ) {
- array_idx_set(&part_index, idx, &block.part);
-
- if ( prev_part != NULL && prev_part->next != block.part &&
- block.part->parent != prev_part ) {
- struct message_part *const *iparts;
More information about the dovecot-cvs
mailing list