dovecot-2.2-pigeonhole: lib-sieve: Created message body part ite...

Sun Nov 29 10:53:29 UTC 2015

details:   http://hg.rename-it.nl/dovecot-2.2-pigeonhole/rev/be0971931cfe
changeset: 2138:be0971931cfe
user:      Stephan Bosch <stephan at rename-it.nl>
date:      Sun Nov 29 11:49:47 2015 +0100
description:
lib-sieve: Created message body part iterator and mime header list.

diffstat:

 src/lib-sieve/sieve-message.c |  441 ++++++++++++++++++++++++++++++++++++++---
 src/lib-sieve/sieve-message.h |   30 ++
 2 files changed, 438 insertions(+), 33 deletions(-)

diffs (truncated from 661 to 300 lines):

diff -r c107764e6008 -r be0971931cfe src/lib-sieve/sieve-message.c

--- a/src/lib-sieve/sieve-message.c	Sun Nov 29 11:48:29 2015 +0100
+++ b/src/lib-sieve/sieve-message.c	Sun Nov 29 11:49:47 2015 +0100
@@ -12,6 +12,7 @@
 #include "message-date.h"
 #include "message-parser.h"
 #include "message-decoder.h"
+#include "message-header-decode.h"
 #include "mail-html2text.h"
 #include "mail-storage.h"
 #include "mail-user.h"
@@ -52,16 +53,18 @@
  * Message context
  */
 
-struct sieve_message_version {
-	struct mail *mail;
-	struct mailbox *box;
-	struct mailbox_transaction_context *trans;
-	struct edit_mail *edit_mail;
+struct sieve_message_header {
+	const char *name;
+
+	const unsigned char *value, *utf8_value;
+	size_t value_len, utf8_value_len;
 };
 
 struct sieve_message_part {
 	struct sieve_message_part *parent, *next, *children;
 
+	ARRAY(struct sieve_message_header) headers;
+
 	const char *content_type;
 	const char *content_disposition;
 
@@ -74,6 +77,13 @@
 	unsigned int epilogue:1;  /* this is a multipart epilogue */
 };
 
+struct sieve_message_version {
+	struct mail *mail;
+	struct mailbox *box;
+	struct mailbox_transaction_context *trans;
+	struct edit_mail *edit_mail;
+};
+
 struct sieve_message_context {
 	pool_t pool;
 	pool_t context_pool;
@@ -841,12 +851,28 @@
  * Message body
  */
 
+static void str_replace_nuls(string_t *str)
+{
+	char *data = str_c_modifiable(str);
+	unsigned int i, len = str_len(str);
+
+	for (i = 0; i < len; i++) {
+		if (data[i] == '\0')
+			data[i] = ' ';
+	}
+}
+
 static bool _is_wanted_content_type
 (const char * const *wanted_types, const char *content_type)
+ATTR_NULL(1)
 {
-	const char *subtype = strchr(content_type, '/');
+	const char *subtype;
 	size_t type_len;
 
+	if ( wanted_types == NULL )
+		return TRUE;
+
+	subtype = strchr(content_type, '/');
 	type_len = ( subtype == NULL ? strlen(content_type) :
 		(size_t)(subtype - content_type) );
 
@@ -1039,13 +1065,15 @@
 static int sieve_message_parts_add_missing
 (const struct sieve_runtime_env *renv,
 	const char *const *content_types,
-	bool extract_text)
+	bool extract_text, bool iter_all)
+	ATTR_NULL(2)
 {
 	struct sieve_message_context *msgctx = renv->msgctx;
 	pool_t pool = msgctx->context_pool;
 	struct mail *mail = sieve_message_get_mail(renv->msgctx);
 	enum message_parser_flags mparser_flags =
 		MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS;
+	ARRAY(struct sieve_message_header) headers;
 	struct sieve_message_part *body_part, *header_part, *last_part;
 	struct message_parser_ctx *parser;
 	struct message_decoder_context *decoder;
@@ -1055,11 +1083,12 @@
 	struct istream *input;
 	unsigned int idx = 0;
 	bool save_body = FALSE, have_all;
+	string_t *hdr_content = NULL;
 	int ret;
 
 	/* First check whether any are missing */
-	if (sieve_message_body_get_return_parts
-		(renv, content_types, extract_text)) {
+	if ( !iter_all && sieve_message_body_get_return_parts
+		(renv, content_types, extract_text) ) {
 		/* Cache hit; all are present */
 		return SIEVE_EXEC_OK;
 	}
@@ -1077,6 +1106,13 @@
 	buf = buffer_create_dynamic(default_pool, 4096);
 	body_part = header_part = last_part = NULL;
 
+	if (iter_all) {
+		t_array_init(&headers, 64);
+		hdr_content = t_str_new(512);
+	} else {
+		memset(&headers, 0, sizeof(headers));
+	}
+
 	/* Initialize body decoder */
 	decoder = message_decoder_init(NULL, 0);
 
@@ -1088,6 +1124,9 @@
 	while ( (ret=message_parser_parse_next_block
 		(parser, &block)) > 0 ) {
 		struct sieve_message_part **body_part_idx;
+		struct message_header_line *hdr = block.hdr;
+		struct sieve_message_header *header;
+		unsigned char *data;
 
 		if ( block.part != prev_mpart ) {
 			bool message_rfc822 = FALSE;
@@ -1104,15 +1143,23 @@
 							(renv, buf, body_part, extract_text);
 					}
 				}
+				if ( iter_all && !array_is_created(&body_part->headers) &&
+					array_count(&headers) > 0 ) {
+					p_array_init(&body_part->headers, pool, array_count(&headers));
+					array_copy(&body_part->headers.arr, 0,
+						&headers.arr, 0, array_count(&headers));
+				}
 			}
 
 			/* Start processing next part */
 			body_part_idx = array_idx_modifiable
 				(&msgctx->cached_body_parts, idx);
-			if (*body_part_idx == NULL)
+			if ( *body_part_idx == NULL )
 				*body_part_idx = p_new(pool, struct sieve_message_part, 1);
 			body_part = *body_part_idx;
 			body_part->content_type = "text/plain";
+			if ( iter_all )
+				array_clear(&headers);
 
 			/* Copy tree structure */
 			if ( block.part->context != NULL ) {
@@ -1124,7 +1171,7 @@
 				body_part->content_type = epipart->content_type;
 				body_part->have_body = TRUE;
 				body_part->epilogue = TRUE;
-				save_body = _is_wanted_content_type
+				save_body = iter_all || _is_wanted_content_type
 					(content_types, body_part->content_type);
 
 			} else {
@@ -1171,8 +1218,12 @@
 			idx++;
 		}
 
-		if ( block.hdr != NULL || block.size == 0 ) {
-			bool is_ctype = FALSE;
+		if ( hdr != NULL || block.size == 0 ) {
+			enum {
+				_HDR_CONTENT_TYPE,
+				_HDR_CONTENT_DISPOSITION,
+				_HDR_OTHER
+			} hdr_field;
 
 			/* Reading headers */
 
@@ -1181,7 +1232,7 @@
 				(decoder, &block, &decoded);
 
 			/* Check for end of headers */
-			if ( block.hdr == NULL ) {
+			if ( hdr == NULL ) {
 				/* Save headers for message/rfc822 part */
 				if ( header_part != NULL ) {
 					sieve_message_part_save
@@ -1191,7 +1242,7 @@
 
 				/* Save bodies only if we have a wanted content-type */
 				i_assert( body_part != NULL );
-				save_body = _is_wanted_content_type
+				save_body = iter_all || _is_wanted_content_type
 					(content_types, body_part->content_type);
 				continue;
 			}
@@ -1199,47 +1250,101 @@
 			/* Encountered the empty line that indicates the end of the headers and
 			 * the start of the body
 			 */
-			if ( block.hdr->eoh ) {
+			if ( hdr->eoh ) {
 				i_assert( body_part != NULL );
 				body_part->have_body = TRUE;
+				continue;
 			} else if ( header_part != NULL ) {
 				/* Save message/rfc822 header as part content */
-				if ( block.hdr->continued ) {
-					buffer_append(buf, block.hdr->value, block.hdr->value_len);
+				if ( hdr->continued ) {
+					buffer_append(buf, hdr->value, hdr->value_len);
 				} else {
-					buffer_append(buf, block.hdr->name, block.hdr->name_len);
-					buffer_append(buf, block.hdr->middle, block.hdr->middle_len);
-					buffer_append(buf, block.hdr->value, block.hdr->value_len);
+					buffer_append(buf, hdr->name, hdr->name_len);
+					buffer_append(buf, hdr->middle, hdr->middle_len);
+					buffer_append(buf, hdr->value, hdr->value_len);
 				}
-				if ( !block.hdr->no_newline ) {
+				if ( !hdr->no_newline ) {
 					buffer_append(buf, "\r\n", 2);
 				}
 			}
 
-			/* We're interested in only the Content-Type: header */
-			if ( strcasecmp(block.hdr->name, "Content-Type" ) == 0 )
-				is_ctype = TRUE;
-			else if ( strcasecmp(block.hdr->name, "Content-Disposition" ) != 0 )
+			if ( strcasecmp(hdr->name, "Content-Type" ) == 0 )
+				hdr_field = _HDR_CONTENT_TYPE;
+			else if ( strcasecmp(hdr->name, "Content-Disposition" ) != 0 )
+				hdr_field = _HDR_CONTENT_DISPOSITION;
+			else if ( iter_all && !array_is_created(&body_part->headers) )
+				hdr_field = _HDR_OTHER;
+			else {
+				/* Not interested in this header */
 				continue;
+			}
 
 			/* Header can have folding whitespace. Acquire the full value before
 			 * continuing
 			 */
-			if ( block.hdr->continues ) {
-				block.hdr->use_full_value = TRUE;
+			if ( hdr->continues ) {
+				hdr->use_full_value = TRUE;
 				continue;
 			}
 
+			if ( iter_all && !array_is_created(&body_part->headers) ) {
+				/* Add header */
+				header = array_append_space(&headers);
+				header->name = p_strdup(pool, hdr->name);
+
+				// FIXME: trim header values
+	
+				/* Decode MIME encoded-words. */
+				str_truncate(hdr_content, 0);
+				message_header_decode_utf8
+					(hdr->full_value, hdr->full_value_len, hdr_content, NULL);
+				if ( hdr->full_value_len != str_len(hdr_content) ||
+					strncmp(str_c(hdr_content), (const char *)hdr->full_value,
+						hdr->full_value_len) != 0 ) {
+					if ( strlen(str_c(hdr_content)) != str_len(hdr_content) ) {
+						/* replace NULs with spaces */
+						str_replace_nuls(hdr_content);
+					}
+					/* store raw */
+					data = p_malloc(pool, hdr->full_value_len + 1);
+					data[hdr->full_value_len] = '\0';
+					header->value = memcpy(data,
+						hdr->full_value, hdr->full_value_len);
+					header->value_len = hdr->full_value_len;
+					/* store decoded */
+					data = p_malloc(pool, str_len(hdr_content) + 1);
+					data[str_len(hdr_content)] = '\0';
+					header->utf8_value = memcpy(data,
+						str_data(hdr_content), str_len(hdr_content));
+					header->utf8_value_len = str_len(hdr_content);
+				} else {
+					/* raw == decoded */
+					data = p_malloc(pool, hdr->full_value_len + 1);
+					data[hdr->full_value_len] = '\0';
+					header->value = header->utf8_value =
+						memcpy(data, hdr->full_value, hdr->full_value_len);
+					header->value_len = header->utf8_value_len =
+						hdr->full_value_len;
+				}
+
+				if ( hdr_field == _HDR_OTHER )
+					continue;
+			}
+