dovecot-2.2: imap: URLFETCH BINARY BODYPARTSTRUCTURE returns bin...

dovecot at dovecot.org dovecot at dovecot.org
Tue Oct 23 20:16:21 EEST 2012


details:   http://hg.dovecot.org/dovecot-2.2/rev/3484591230ac
changeset: 15237:3484591230ac
user:      Timo Sirainen <tss at iki.fi>
date:      Tue Oct 23 20:15:36 2012 +0300
description:
imap: URLFETCH BINARY BODYPARTSTRUCTURE returns binary-decoded line counts.

diffstat:

 src/lib-imap-storage/imap-msgpart.c       |   7 +-
 src/lib-mail/message-binary-part.c        |   7 +-
 src/lib-mail/message-binary-part.h        |   3 +
 src/lib-storage/fail-mail.c               |   1 +
 src/lib-storage/index/index-mail-binary.c |  91 ++++++++++++++++++++++++++----
 src/lib-storage/index/index-mail.h        |   3 +-
 src/lib-storage/mail-storage-private.h    |   3 +-
 src/lib-storage/mail-storage.h            |   7 +-
 src/lib-storage/mail.c                    |  11 ++-
 9 files changed, 107 insertions(+), 26 deletions(-)

diffs (truncated from 378 to 300 lines):

diff -r d5706259963b -r 3484591230ac src/lib-imap-storage/imap-msgpart.c
--- a/src/lib-imap-storage/imap-msgpart.c	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-imap-storage/imap-msgpart.c	Tue Oct 23 20:15:36 2012 +0300
@@ -706,6 +706,7 @@
 	struct imap_msgpart_open_result result;
 	struct message_part *part;
 	bool include_hdr;
+	unsigned int lines;
 	int ret;
 
 	if (!msgpart->decode_cte_to_binary ||
@@ -733,7 +734,7 @@
 			return -1;
 	}
 	include_hdr = msgpart->fetch_type == FETCH_FULL;
-	return mail_get_binary_size(mail, part, include_hdr, size_r);
+	return mail_get_binary_size(mail, part, include_hdr, size_r, &lines);
 }
 
 static int
@@ -769,13 +770,15 @@
 {
 	struct message_part **pos;
 	uoff_t size;
+	unsigned int lines;
 
-	if (mail_get_binary_size(mail, part, FALSE, &size) < 0)
+	if (mail_get_binary_size(mail, part, FALSE, &size, &lines) < 0)
 		return -1;
 
 	*binpart_r = t_new(struct message_part, 1);
 	**binpart_r = *part;
 	(*binpart_r)->body_size.virtual_size = size;
+	(*binpart_r)->body_size.lines = lines;
 
 	pos = &(*binpart_r)->children;
 	for (part = part->children; part != NULL; part = part->next) {
diff -r d5706259963b -r 3484591230ac src/lib-mail/message-binary-part.c
--- a/src/lib-mail/message-binary-part.c	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-mail/message-binary-part.c	Tue Oct 23 20:15:36 2012 +0300
@@ -13,6 +13,7 @@
 		numpack_encode(dest, part->physical_pos);
 		numpack_encode(dest, part->binary_hdr_size);
 		numpack_encode(dest, part->binary_body_size);
+		numpack_encode(dest, part->binary_body_lines_count);
 	}
 }
 
@@ -20,7 +21,7 @@
 				    struct message_binary_part **parts_r)
 {
 	const uint8_t *p = data, *end = p + size;
-	uint64_t n1, n2, n3;
+	uint64_t n1, n2, n3, n4;
 	struct message_binary_part *part = NULL, *prev_part = NULL;
 
 	while (p != end) {
@@ -29,11 +30,13 @@
 		prev_part = part;
 		if (numpack_decode(&p, end, &n1) < 0 ||
 		    numpack_decode(&p, end, &n2) < 0 ||
-		    numpack_decode(&p, end, &n3) < 0)
+		    numpack_decode(&p, end, &n3) < 0 ||
+		    numpack_decode(&p, end, &n4) < 0)
 			return -1;
 		part->physical_pos = n1;
 		part->binary_hdr_size = n2;
 		part->binary_body_size = n3;
+		part->binary_body_lines_count = n4;
 	}
 	*parts_r = part;
 	return 0;
diff -r d5706259963b -r 3484591230ac src/lib-mail/message-binary-part.h
--- a/src/lib-mail/message-binary-part.h	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-mail/message-binary-part.h	Tue Oct 23 20:15:36 2012 +0300
@@ -12,6 +12,9 @@
 	   "binary". */
 	uoff_t binary_hdr_size;
 	uoff_t binary_body_size;
+	/* BODYSTRUCTURE for text/ and message/rfc822 parts includes lines
+	   count. Decoding may change these numbers. */
+	unsigned int binary_body_lines_count;
 };
 
 /* Serialize message binary_part. */
diff -r d5706259963b -r 3484591230ac src/lib-storage/fail-mail.c
--- a/src/lib-storage/fail-mail.c	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-storage/fail-mail.c	Tue Oct 23 20:15:36 2012 +0300
@@ -181,6 +181,7 @@
 			    const struct message_part *part ATTR_UNUSED,
 			    bool include_hdr ATTR_UNUSED,
 			    uoff_t *size_r ATTR_UNUSED,
+			    unsigned int *body_lines_r ATTR_UNUSED,
 			    bool *binary_r ATTR_UNUSED,
 			    struct istream **stream_r ATTR_UNUSED)
 {
diff -r d5706259963b -r 3484591230ac src/lib-storage/index/index-mail-binary.c
--- a/src/lib-storage/index/index-mail-binary.c	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-storage/index/index-mail-binary.c	Tue Oct 23 20:15:36 2012 +0300
@@ -24,7 +24,8 @@
 
 struct binary_block {
 	struct istream *input;
-	struct message_binary_part bin_part;
+	uoff_t physical_pos;
+	unsigned int body_lines_count;
 	bool converted, converted_hdr;
 };
 
@@ -130,7 +131,7 @@
 		if (ctx->copy_start_offset != 0)
 			binary_copy_to(ctx, part->physical_pos);
 		block = array_append_space(&ctx->blocks);
-		block->bin_part.physical_pos = part->physical_pos;
+		block->physical_pos = part->physical_pos;
 		block->converted = TRUE;
 		block->converted_hdr = TRUE;
 
@@ -164,7 +165,7 @@
 
 	/* single part - write decoded data */
 	block = array_append_space(&ctx->blocks);
-	block->bin_part.physical_pos = part->physical_pos;
+	block->physical_pos = part->physical_pos;
 
 	i_stream_seek(ctx->input, part->physical_pos +
 		      part->header_size.physical_size);
@@ -248,7 +249,7 @@
 		bin_part.physical_pos = part->physical_pos;
 		found = FALSE;
 		for (i = 0; i < count; i++) {
-			if (blocks[i].bin_part.physical_pos != part->physical_pos ||
+			if (blocks[i].physical_pos != part->physical_pos ||
 			    !blocks[i].converted)
 				continue;
 
@@ -287,12 +288,62 @@
 
 	blocks = array_get(&ctx->blocks, &count);
 	streams = t_new(struct istream *, count+1);
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
 		streams[i] = blocks[i].input;
+		i_assert(streams[i]->v_offset == 0);
+	}
 	return streams;
 }
 
 static int
+blocks_count_lines(struct binary_ctx *ctx, struct istream *full_input)
+{
+	struct binary_block *blocks, *cur_block;
+	unsigned int block_idx, block_count;
+	uoff_t cur_offset, cur_size;
+	const unsigned char *data, *p;
+	size_t size, skip;
+	ssize_t ret;
+
+	blocks = array_get_modifiable(&ctx->blocks, &block_count);
+	cur_block = blocks;
+	cur_offset = 0;
+	block_idx = 0;
+
+	while ((ret = i_stream_read_data(full_input, &data, &size, 0)) > 0) {
+		i_assert(cur_offset <= cur_block->input->v_offset);
+		if (cur_block->input->eof) {
+			cur_size = cur_block->input->v_offset +
+				i_stream_get_data_size(cur_block->input);
+			i_assert(size >= cur_size - cur_offset);
+			size = cur_size - cur_offset;
+		}
+		skip = size;
+		while ((p = memchr(data, '\n', size)) != NULL) {
+			size -= p-data+1;
+			data = p+1;
+			cur_block->body_lines_count++;
+		}
+		i_stream_skip(full_input, skip);
+		cur_offset += skip;
+
+		if (cur_block->input->eof) {
+			if (++block_idx == block_count)
+				cur_block = NULL;
+			else
+				cur_block++;
+			cur_offset = 0;
+		}
+	}
+	i_assert(ret == -1);
+	if (full_input->stream_errno != 0)
+		return -1;
+	i_assert(!i_stream_have_bytes_left(cur_block->input));
+	i_assert(block_idx+1 == block_count);
+	return 0;
+}
+
+static int
 index_mail_read_binary_to_cache(struct mail *_mail,
 				const struct message_part *part,
 				bool include_hdr, bool *binary_r,
@@ -325,8 +376,7 @@
 
 	cache->input = i_streams_merge(blocks_get_streams(&ctx),
 				       IO_BLOCK_SIZE, fd_callback, _mail);
-
-	if (i_stream_get_size(cache->input, TRUE, &cache->size) < 0) {
+	if (blocks_count_lines(&ctx, cache->input) < 0) {
 		mail_storage_set_critical(_mail->box->storage,
 					  "read(%s) failed: %m",
 					  i_stream_get_name(cache->input));
@@ -334,6 +384,9 @@
 		binary_streams_free(&ctx);
 		return -1;
 	}
+	i_assert(!i_stream_have_bytes_left(cache->input));
+	cache->size = cache->input->v_offset;
+	i_stream_seek(cache->input, 0);
 
 	if (part->parent == NULL && include_hdr &&
 	    mail->data.bin_parts == NULL) {
@@ -389,13 +442,14 @@
 
 static int
 index_mail_get_binary_size(struct mail *_mail,
-			   const struct message_part *part,
-			   bool include_hdr, uoff_t *size_r)
+			   const struct message_part *part, bool include_hdr,
+			   uoff_t *size_r, unsigned int *lines_r)
 {
 	struct index_mail *mail = (struct index_mail *)_mail;
 	struct message_part *all_parts, *msg_part;
 	const struct message_binary_part *bin_part, *root_bin_part;
 	uoff_t size, end_offset;
+	unsigned int lines;
 	bool binary, converted;
 
 	if (mail_get_parts(_mail, &all_parts) < 0)
@@ -411,6 +465,11 @@
 
 	size = part->header_size.virtual_size +
 		part->body_size.virtual_size;
+	/* note that we assume here that binary translation doesn't change the
+	   headers' line counts. this isn't true if the original message
+	   contained duplicate Content-Transfer-Encoding lines, but since
+	   that's invalid anyway we don't bother trying to handle it. */
+	lines = part->header_size.lines + part->body_size.lines;
 	end_offset = part->physical_pos + size;
 
 	bin_part = mail->data.bin_parts; root_bin_part = NULL;
@@ -428,6 +487,8 @@
 				msg_part->body_size.virtual_size;
 			size += bin_part->binary_hdr_size +
 				bin_part->binary_body_size;
+			lines -= msg_part->body_size.lines;
+			lines += bin_part->binary_body_lines_count;
 		}
 	}
 	if (!include_hdr) {
@@ -435,15 +496,18 @@
 			size -= root_bin_part->binary_hdr_size;
 		else
 			size -= part->header_size.virtual_size;
+		lines -= part->header_size.lines;
 	}
 	*size_r = size;
+	*lines_r = lines;
 	return 0;
 }
 
 int index_mail_get_binary_stream(struct mail *_mail,
 				 const struct message_part *part,
 				 bool include_hdr, uoff_t *size_r,
-				 bool *binary_r, struct istream **stream_r)
+				 unsigned int *lines_r, bool *binary_r,
+				 struct istream **stream_r)
 {
 	struct index_mail *mail = (struct index_mail *)_mail;
 	struct mail_binary_cache *cache = &_mail->box->storage->binary_cache;
@@ -451,9 +515,12 @@
 	bool binary, converted;
 
 	if (stream_r == NULL) {
-		return index_mail_get_binary_size(_mail, part,
-						  include_hdr, size_r);
+		return index_mail_get_binary_size(_mail, part, include_hdr,
+						  size_r, lines_r);
 	}
+	/* current implementation doesn't bother implementing this,
+	   because it's not needed by anything. */
+	i_assert(lines_r == NULL);
 
 	/* FIXME: always put the header to temp file. skip it when needed. */
 	if (cache->box == _mail->box && cache->uid == _mail->uid &&
diff -r d5706259963b -r 3484591230ac src/lib-storage/index/index-mail.h
--- a/src/lib-storage/index/index-mail.h	Tue Oct 23 20:09:35 2012 +0300
+++ b/src/lib-storage/index/index-mail.h	Tue Oct 23 20:15:36 2012 +0300
@@ -207,7 +207,8 @@
 int index_mail_get_binary_stream(struct mail *_mail,
 				 const struct message_part *part,
 				 bool include_hdr, uoff_t *size_r,


More information about the dovecot-cvs mailing list