dovecot-1.1: mbox: Don't stop at From_-lines in the message bodi...

dovecot at dovecot.org dovecot at dovecot.org
Sun May 4 16:58:01 EEST 2008


details:   http://hg.dovecot.org/dovecot-1.1/rev/7871b6219480
changeset: 7479:7871b6219480
user:      Timo Sirainen <tss at iki.fi>
date:      Sun May 04 16:57:58 2008 +0300
description:
mbox: Don't stop at From_-lines in the message bodies. Use Content-Length:
header to figure out if it belongs to a message body or not.

diffstat:

3 files changed, 141 insertions(+), 75 deletions(-)
src/lib-storage/index/mbox/istream-raw-mbox.c |   91 ++++++++++++++-------
src/lib-storage/index/mbox/istream-raw-mbox.h |   19 ++--
src/lib-storage/index/mbox/mbox-mail.c        |  106 ++++++++++++++++---------

diffs (truncated from 335 to 300 lines):

diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/istream-raw-mbox.c
--- a/src/lib-storage/index/mbox/istream-raw-mbox.c	Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/istream-raw-mbox.c	Sun May 04 16:57:58 2008 +0300
@@ -238,15 +238,19 @@ static ssize_t i_stream_raw_mbox_read(st
 				   FIXME: if From-line is longer than input
 				   buffer, we break. probably irrelevant.. */
 				i++;
-				from_after_pos = i;
-				from_start_pos = i - 6;
-				if (from_start_pos > 0 &&
-				    buf[from_start_pos-1] == '\r') {
-					/* CR also belongs to it. */
-					crlf_ending = TRUE;
-					from_start_pos--;
-				} else {
-					crlf_ending = FALSE;
+				if (rstream->hdr_offset + rstream->mail_size ==
+				    stream->istream.v_offset + i - 6 ||
+				    rstream->mail_size == (uoff_t)-1) {
+					from_after_pos = i;
+					from_start_pos = i - 6;
+					if (from_start_pos > 0 &&
+					    buf[from_start_pos-1] == '\r') {
+						/* CR also belongs to it. */
+						crlf_ending = TRUE;
+						from_start_pos--;
+					} else {
+						crlf_ending = FALSE;
+					}
 				}
 				fromp = mbox_from;
 			} else if (from_start_pos != (size_t)-1) {
@@ -290,6 +294,17 @@ static ssize_t i_stream_raw_mbox_read(st
 			new_pos--;
 	}
 
+	if (stream->istream.v_offset -
+	    rstream->hdr_offset + new_pos > rstream->mail_size) {
+		/* istream_raw_mbox_set_next_offset() used invalid
+		   cached next_offset? */
+		i_error("Unexpectedly lost From-line at %"PRIuUOFF_T,
+			rstream->hdr_offset + rstream->mail_size);
+		rstream->eof = TRUE;
+		rstream->corrupted = TRUE;
+		return -1;
+	}
+
 	stream->buffer = buf;
 	if (new_pos == stream->pos) {
 		if (stream->istream.eof || ret > 0)
@@ -379,8 +394,7 @@ static int istream_raw_mbox_is_valid_fro
 	char *sender;
 
 	/* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
-	if (i_stream_read_data(rstream->istream.parent, &data, &size, 30) == -1)
-		return -1;
+	(void)i_stream_read_data(rstream->istream.parent, &data, &size, 30);
 
 	if ((size == 1 && data[0] == '\n') ||
 	    (size == 2 && data[0] == '\r' && data[1] == '\n')) {
@@ -469,33 +483,41 @@ uoff_t istream_raw_mbox_get_body_offset(
 	return rstream->body_offset;
 }
 
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size)
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+				      uoff_t expected_body_size)
 {
 	struct raw_mbox_istream *rstream =
 		(struct raw_mbox_istream *)stream->real_stream;
 	const unsigned char *data;
 	size_t size;
-	uoff_t old_offset;
+	uoff_t old_offset, body_size;
 
 	i_assert(rstream->hdr_offset != (uoff_t)-1);
 	i_assert(rstream->body_offset != (uoff_t)-1);
 
-	if (rstream->mail_size != (uoff_t)-1) {
-		return rstream->mail_size -
-			(rstream->body_offset - rstream->hdr_offset);
-	}
-
+	body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 :
+		rstream->mail_size - (rstream->body_offset -
+				      rstream->hdr_offset);
 	old_offset = stream->v_offset;
-	if (body_size != (uoff_t)-1) {
+	if (expected_body_size != (uoff_t)-1) {
+		/* if we already have the existing body size, use it as long as
+		   it's >= expected body_size. otherwise the previous parsing
+		   may have stopped at a From_-line that belongs to the body. */
+		if (body_size != (uoff_t)-1 && body_size >= expected_body_size)
+			return body_size;
+
 		i_stream_seek(rstream->istream.parent,
-			      rstream->body_offset + body_size);
+			      rstream->body_offset + expected_body_size);
 		if (istream_raw_mbox_is_valid_from(rstream) > 0) {
-			rstream->mail_size = body_size +
+			rstream->mail_size = expected_body_size +
 				(rstream->body_offset - rstream->hdr_offset);
 			i_stream_seek(stream, old_offset);
-			return body_size;
-		}
-	}
+			return expected_body_size;
+		}
+		/* invalid expected_body_size */
+	}
+	if (body_size != (uoff_t)-1)
+		return body_size;
 
 	/* have to read through the message body */
 	while (i_stream_read_data(stream, &data, &size, 0) > 0)
@@ -535,12 +557,13 @@ bool istream_raw_mbox_has_crlf_ending(st
 	return rstream->crlf_ending;
 }
 
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
-{
-	struct raw_mbox_istream *rstream =
-		(struct raw_mbox_istream *)stream->real_stream;
-
-	body_size = istream_raw_mbox_get_body_size(stream, body_size);
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size)
+{
+	struct raw_mbox_istream *rstream =
+		(struct raw_mbox_istream *)stream->real_stream;
+	uoff_t body_size;
+
+	body_size = istream_raw_mbox_get_body_size(stream, expected_body_size);
 	rstream->mail_size = (uoff_t)-1;
 
 	rstream->received_time = rstream->next_received_time;
@@ -606,6 +629,14 @@ int istream_raw_mbox_seek(struct istream
 	return rstream->corrupted ? -1 : 0;
 }
 
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset)
+{
+	struct raw_mbox_istream *rstream =
+		(struct raw_mbox_istream *)stream->real_stream;
+
+	rstream->mail_size = offset - rstream->hdr_offset;
+}
+
 bool istream_raw_mbox_is_eof(struct istream *stream)
 {
 	struct raw_mbox_istream *rstream =
diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/istream-raw-mbox.h
--- a/src/lib-storage/index/mbox/istream-raw-mbox.h	Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/istream-raw-mbox.h	Sun May 04 16:57:58 2008 +0300
@@ -12,10 +12,11 @@ uoff_t istream_raw_mbox_get_header_offse
 /* Return offset to beginning of the body. */
 uoff_t istream_raw_mbox_get_body_offset(struct istream *stream);
 
-/* Return the number of bytes in the body of this message. If body_size isn't
-   (uoff_t)-1, we'll use it as potentially valid body size to avoid actually
-   reading through the whole message. */
-uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size);
+/* Return the number of bytes in the body of this message. If
+   expected_body_size isn't (uoff_t)-1, we'll use it as potentially valid body
+   size to avoid actually reading through the whole message. */
+uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
+				      uoff_t expected_body_size);
 
 /* Return received time of current message, or (time_t)-1 if the timestamp is
    broken. */
@@ -26,14 +27,18 @@ const char *istream_raw_mbox_get_sender(
 /* Return TRUE if the empty line between this and the next mail contains CR. */
 bool istream_raw_mbox_has_crlf_ending(struct istream *stream);
 
-/* Jump to next message. If body_size isn't (uoff_t)-1, we'll use it as
-   potentially valid body size. */
-void istream_raw_mbox_next(struct istream *stream, uoff_t body_size);
+/* Jump to next message. If expected_body_size isn't (uoff_t)-1, we'll use it
+   as potentially valid body size. */
+void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size);
 
 /* Seek to message at given offset. offset must point to beginning of
    "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't
    contain a valid From-line. */
 int istream_raw_mbox_seek(struct istream *stream, uoff_t offset);
+/* Set next message's start offset. If this isn't set, read stops at the next
+   valid From_-line, even if it belongs to the current message's body
+   (Content-Length: header can be used to determine that). */
+void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset);
 
 /* Returns TRUE if we've read the whole mbox. */
 bool istream_raw_mbox_is_eof(struct istream *stream);
diff -r 0eb6a0c01001 -r 7871b6219480 src/lib-storage/index/mbox/mbox-mail.c
--- a/src/lib-storage/index/mbox/mbox-mail.c	Sun May 04 14:35:51 2008 +0300
+++ b/src/lib-storage/index/mbox/mbox-mail.c	Sun May 04 16:57:58 2008 +0300
@@ -166,12 +166,37 @@ mbox_mail_get_special(struct mail *_mail
 	return index_mail_get_special(_mail, field, value_r);
 }
 
+static bool
+mbox_mail_get_next_offset(struct index_mail *mail, uoff_t *next_offset_r)
+{
+	struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
+	struct mail *_mail = &mail->mail.mail;
+	const struct mail_index_header *hdr;
+
+	hdr = mail_index_get_header(mail->trans->trans_view);
+	if (_mail->seq >= hdr->messages_count) {
+		if (_mail->seq != hdr->messages_count) {
+			/* we're appending a new message */
+			return FALSE;
+		}
+
+		/* last message, use the synced mbox size */
+		int trailer_size;
+
+		trailer_size = (mbox->storage->storage.flags &
+				MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ? 2 : 1;
+		*next_offset_r = hdr->sync_size - trailer_size;
+		return TRUE;
+	}
+	return mbox_file_lookup_offset(mbox, mail->trans->trans_view,
+				       _mail->seq + 1, next_offset_r);
+}
+
 static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r)
 {
 	struct index_mail *mail = (struct index_mail *)_mail;
 	struct index_mail_data *data = &mail->data;
 	struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
-	const struct mail_index_header *hdr;
 	struct istream *input;
 	struct message_size hdr_size;
 	uoff_t old_offset, body_offset, body_size, next_offset;
@@ -194,26 +219,10 @@ static int mbox_mail_get_physical_size(s
 
 	/* use the next message's offset to avoid reading through the entire
 	   message body to find out its size */
-	hdr = mail_index_get_header(mail->trans->trans_view);
-	if (_mail->seq >= hdr->messages_count) {
-		if (_mail->seq == hdr->messages_count) {
-			/* last message, use the synced mbox size */
-			int trailer_size;
-
-			trailer_size = (mbox->storage->storage.flags &
-					MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ?
-				2 : 1;
-			body_size = hdr->sync_size - body_offset - trailer_size;
-		} else {
-			/* we're appending a new message */
-			body_size = (uoff_t)-1;
-		}
-	} else if (mbox_file_lookup_offset(mbox, mail->trans->trans_view,
-					   _mail->seq + 1, &next_offset) > 0) {
+	if (mbox_mail_get_next_offset(mail, &next_offset))
 		body_size = next_offset - body_offset;
-	} else {
+	else
 		body_size = (uoff_t)-1;
-	}
 
 	/* verify that the calculated body size is correct */
 	body_size = istream_raw_mbox_get_body_size(mbox->mbox_stream,
@@ -223,6 +232,42 @@ static int mbox_mail_get_physical_size(s
 	*size_r = data->physical_size;
 
 	i_stream_seek(input, old_offset);
+	return 0;
+}
+
+static int mbox_mail_init_stream(struct index_mail *mail)
+{
+	struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox;
+	struct istream *raw_stream;
+	uoff_t hdr_offset, next_offset;
+
+	if (mbox_mail_seek(mail) < 0)
+		return -1;
+
+	if (!mbox_mail_get_next_offset(mail, &next_offset)) {
+		if (mbox_mail_seek(mail) < 0)
+			return -1;
+		if (!mbox_mail_get_next_offset(mail, &next_offset)) {
+			i_warning("mbox %s: Can't find next message offset",
+				  mbox->path);
+			next_offset = (uoff_t)-1;
+		}
+	}
+
+	raw_stream = mbox->mbox_stream;
+	hdr_offset = istream_raw_mbox_get_header_offset(raw_stream);
+	i_stream_seek(raw_stream, hdr_offset);
+
+	if (next_offset != (uoff_t)-1)
+		istream_raw_mbox_set_next_offset(raw_stream, next_offset);
+
+	raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1);
+	mail->data.stream =
+		i_stream_create_header_filter(raw_stream,
+				HEADER_FILTER_EXCLUDE | HEADER_FILTER_NO_CR,
+				mbox_hide_headers, mbox_hide_headers_count,


More information about the dovecot-cvs mailing list