dovecot-2.0: Split quoted-printable parsing to q-p body parsing ...

dovecot at dovecot.org dovecot at dovecot.org
Sun Jul 26 23:32:33 EEST 2009


details:   http://hg.dovecot.org/dovecot-2.0/rev/2dce50fc0d9f
changeset: 9662:2dce50fc0d9f
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Jul 26 16:32:22 2009 -0400
description:
Split quoted-printable parsing to q-p body parsing and "Q" header parsing.
Fixed also several other issues in quoted-printable parsing to get it fully
RFC compliant.

diffstat:

5 files changed, 171 insertions(+), 26 deletions(-)
src/lib-mail/Makefile.am             |    5 +
src/lib-mail/message-header-decode.c |    6 +-
src/lib-mail/quoted-printable.c      |  101 ++++++++++++++++++++++++++--------
src/lib-mail/quoted-printable.h      |    6 +-
src/lib-mail/test-quoted-printable.c |   79 ++++++++++++++++++++++++++

diffs (272 lines):

diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/Makefile.am
--- a/src/lib-mail/Makefile.am	Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/Makefile.am	Sun Jul 26 16:32:22 2009 -0400
@@ -60,6 +60,7 @@ test_programs = \
 	test-message-header-parser \
 	test-message-id \
 	test-message-parser \
+	test-quoted-printable \
 	test-rfc2231-parser
 
 noinst_PROGRAMS = $(test_programs)
@@ -100,6 +101,10 @@ test_message_parser_LDADD = message-pars
 test_message_parser_LDADD = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs)
 test_message_parser_DEPENDENCIES = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs)
 
+test_quoted_printable_SOURCES = test-quoted-printable.c
+test_quoted_printable_LDADD = quoted-printable.lo $(test_libs)
+test_quoted_printable_DEPENDENCIES = quoted-printable.lo $(test_libs)
+
 test_rfc2231_parser_SOURCES = test-rfc2231-parser.c
 test_rfc2231_parser_LDADD = rfc2231-parser.lo rfc822-parser.lo $(test_libs)
 test_rfc2231_parser_DEPENDENCIES = rfc2231-parser.lo rfc822-parser.lo $(test_libs)
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/message-header-decode.c
--- a/src/lib-mail/message-header-decode.c	Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/message-header-decode.c	Sun Jul 26 16:32:22 2009 -0400
@@ -36,9 +36,9 @@ message_header_decode_encoded(const unsi
 	switch (data[start_pos[0]+1]) {
 	case 'q':
 	case 'Q':
-		quoted_printable_decode(data + start_pos[1] + 1,
-					start_pos[2] - start_pos[1] - 1,
-					NULL, decodebuf);
+		quoted_printable_q_decode(data + start_pos[1] + 1,
+					  start_pos[2] - start_pos[1] - 1,
+					  decodebuf);
 		break;
 	case 'b':
 	case 'B':
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/quoted-printable.c
--- a/src/lib-mail/quoted-printable.c	Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/quoted-printable.c	Sun Jul 26 16:32:22 2009 -0400
@@ -5,8 +5,86 @@
 #include "hex-binary.h"
 #include "quoted-printable.h"
 
+#define QP_IS_TRAILING_SPACE(c) \
+	((c) == ' ' || (c) == '\t')
+
 void quoted_printable_decode(const unsigned char *src, size_t src_size,
 			     size_t *src_pos_r, buffer_t *dest)
+{
+	char hexbuf[3];
+	size_t src_pos, pos, next;
+
+	hexbuf[2] = '\0';
+
+	next = 0;
+	for (src_pos = 0; src_pos < src_size; src_pos++) {
+		if (src[src_pos] != '=' && src[src_pos] != '\n')
+			continue;
+
+		if (src[src_pos] == '\n') {
+			/* drop trailing whitespace */
+			pos = src_pos;
+			if (pos > 0 && src[pos-1] == '\r')
+				pos--;
+			while (pos > 0 && QP_IS_TRAILING_SPACE(src[pos-1]))
+				pos--;
+			buffer_append(dest, src + next, pos - next);
+			next = src_pos+1;
+			buffer_append(dest, "\r\n", 2);
+			continue;
+		}
+
+		/* '=' */
+		buffer_append(dest, src + next, src_pos - next);
+		next = src_pos;
+
+		if (src_pos+1 >= src_size)
+			break;
+
+		if (src[src_pos+1] == '\n') {
+			/* =\n -> skip both */
+			src_pos += 2;
+			next += 2;
+			continue;
+		}
+
+		if (src_pos+2 >= src_size)
+			break;
+
+		if (src[src_pos+1] == '\r' && src[src_pos+2] == '\n') {
+			/* =\r\n -> skip both */
+			src_pos += 3;
+			next += 3;
+			continue;
+		}
+
+		/* =<hex> */
+		hexbuf[0] = src[src_pos+1];
+		hexbuf[1] = src[src_pos+2];
+
+		if (hex_to_binary(hexbuf, dest) == 0) {
+			src_pos += 2;
+			next = src_pos + 1;
+		} else {
+			/* non-hex data, show as-is */
+			next = src_pos;
+		}
+	}
+	if (src_pos == src_size) {
+		/* add everything but trailing spaces */
+		if (src_pos > 0 && src[src_pos-1] == '\r')
+			src_pos--;
+		while (src_pos > 0 && QP_IS_TRAILING_SPACE(src[src_pos-1]))
+			src_pos--;
+		buffer_append(dest, src + next, src_pos - next);
+		next = src_pos;
+	}
+
+	*src_pos_r = next;
+}
+
+void quoted_printable_q_decode(const unsigned char *src, size_t src_size,
+			       buffer_t *dest)
 {
 	char hexbuf[3];
 	size_t src_pos, next;
@@ -19,31 +97,16 @@ void quoted_printable_decode(const unsig
 			continue;
 
 		buffer_append(dest, src + next, src_pos - next);
-		next = src_pos+1;
+		next = src_pos;
 
 		if (src[src_pos] == '_') {
 			buffer_append_c(dest, ' ');
-			continue;
-		}
-
-		if (src_pos+1 >= src_size)
-			break;
-
-		if (src[src_pos+1] == '\n') {
-			/* =\n -> skip both */
-			src_pos++;
+			next++;
 			continue;
 		}
 
 		if (src_pos+2 >= src_size)
 			break;
-
-		if (src[src_pos+1] == '\r' && src[src_pos+2] == '\n') {
-			/* =\r\n -> skip both */
-			src_pos += 2;
-			next++;
-			continue;
-		}
 
 		/* =<hex> */
 		hexbuf[0] = src[src_pos+1];
@@ -57,9 +120,5 @@ void quoted_printable_decode(const unsig
 			next = src_pos;
 		}
 	}
-
 	buffer_append(dest, src + next, src_size - next);
-
-	if (src_pos_r != NULL)
-		*src_pos_r = src_pos;
 }
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/quoted-printable.h
--- a/src/lib-mail/quoted-printable.h	Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/quoted-printable.h	Sun Jul 26 16:32:22 2009 -0400
@@ -5,9 +5,11 @@
    size of src, and may be same as src. Decoding errors are ignored.
 
    This function may be called multiple times for parsing the same stream.
-   If src_pos is non-NULL, it's updated to first non-translated character in
-   src. */
+   src_pos is updated to first non-translated character in src. */
 void quoted_printable_decode(const unsigned char *src, size_t src_size,
 			     size_t *src_pos_r, buffer_t *dest);
+/* Decode MIME "Q" encoding. */
+void quoted_printable_q_decode(const unsigned char *src, size_t src_size,
+			       buffer_t *dest);
 
 #endif
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/test-quoted-printable.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/test-quoted-printable.c	Sun Jul 26 16:32:22 2009 -0400
@@ -0,0 +1,79 @@
+/* Copyright (c) 2007-2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "str.h"
+#include "quoted-printable.h"
+#include "test-common.h"
+
+static void test_quoted_printable_decode(void)
+{
+	const char *data[] = {
+		"foo  \r\nbar=", "foo\r\nbar",
+		"foo =\nbar", "foo bar",
+		"foo =\r\nbar", "foo bar",
+		"foo  \nbar=", "foo\r\nbar",
+		"=0A=0D  ", "\n\r",
+		"foo_bar", "foo_bar",
+		"foo=", "foo",
+		"foo=A", "foo",
+		"foo=Ax", "foo=Ax",
+		"foo=Ax=xy", "foo=Ax=xy"
+	};
+	buffer_t *buf;
+	unsigned int i, start, end, len;
+	size_t src_pos;
+
+	test_begin("quoted printable decode");
+	buf = buffer_create_dynamic(pool_datastack_create(), 128);
+	for (i = 0; i < N_ELEMENTS(data); i += 2) {
+		len = strlen(data[i]);
+		for (start = 0, end = 1; end <= len; ) {
+			quoted_printable_decode(CONST_PTR_OFFSET(data[i], start),
+						end - start, &src_pos, buf);
+			src_pos += start;
+			start = src_pos;
+			if (src_pos <= end)
+				end++;
+			else
+				end = src_pos + 1;
+		}
+		test_assert(strcmp(data[i+1], str_c(buf)) == 0);
+		buffer_set_used_size(buf, 0);
+	}
+	test_end();
+}
+
+static void test_quoted_printable_q_decode(void)
+{
+	const char *data[] = {
+		"=0A=0D  ", "\n\r  ",
+		"__foo__bar__", "  foo  bar  ",
+		"foo=", "foo=",
+		"foo=A", "foo=A",
+		"foo=Ax", "foo=Ax",
+		"foo=Ax=xy", "foo=Ax=xy"
+	};
+	buffer_t *buf;
+	unsigned int i;
+
+	test_begin("quoted printable q decode");
+	buf = buffer_create_dynamic(pool_datastack_create(), 128);
+	for (i = 0; i < N_ELEMENTS(data); i += 2) {
+		quoted_printable_q_decode((const void *)data[i], strlen(data[i]),
+					  buf);
+		test_assert(strcmp(data[i+1], str_c(buf)) == 0);
+		buffer_set_used_size(buf, 0);
+	}
+	test_end();
+}
+
+int main(void)
+{
+	static void (*test_functions[])(void) = {
+		test_quoted_printable_decode,
+		test_quoted_printable_q_decode,
+		NULL
+	};
+	return test_run(test_functions);
+}


More information about the dovecot-cvs mailing list