dovecot-2.0: Split quoted-printable parsing to q-p body parsing ...
dovecot at dovecot.org
dovecot at dovecot.org
Sun Jul 26 23:32:33 EEST 2009
details: http://hg.dovecot.org/dovecot-2.0/rev/2dce50fc0d9f
changeset: 9662:2dce50fc0d9f
user: Timo Sirainen <tss at iki.fi>
date: Sun Jul 26 16:32:22 2009 -0400
description:
Split quoted-printable parsing to q-p body parsing and "Q" header parsing.
Fixed also several other issues in quoted-printable parsing to get it fully
RFC compliant.
diffstat:
5 files changed, 171 insertions(+), 26 deletions(-)
src/lib-mail/Makefile.am | 5 +
src/lib-mail/message-header-decode.c | 6 +-
src/lib-mail/quoted-printable.c | 101 ++++++++++++++++++++++++++--------
src/lib-mail/quoted-printable.h | 6 +-
src/lib-mail/test-quoted-printable.c | 79 ++++++++++++++++++++++++++
diffs (272 lines):
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/Makefile.am
--- a/src/lib-mail/Makefile.am Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/Makefile.am Sun Jul 26 16:32:22 2009 -0400
@@ -60,6 +60,7 @@ test_programs = \
test-message-header-parser \
test-message-id \
test-message-parser \
+ test-quoted-printable \
test-rfc2231-parser
noinst_PROGRAMS = $(test_programs)
@@ -100,6 +101,10 @@ test_message_parser_LDADD = message-pars
test_message_parser_LDADD = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs)
test_message_parser_DEPENDENCIES = message-parser.lo message-header-parser.lo message-size.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs)
+test_quoted_printable_SOURCES = test-quoted-printable.c
+test_quoted_printable_LDADD = quoted-printable.lo $(test_libs)
+test_quoted_printable_DEPENDENCIES = quoted-printable.lo $(test_libs)
+
test_rfc2231_parser_SOURCES = test-rfc2231-parser.c
test_rfc2231_parser_LDADD = rfc2231-parser.lo rfc822-parser.lo $(test_libs)
test_rfc2231_parser_DEPENDENCIES = rfc2231-parser.lo rfc822-parser.lo $(test_libs)
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/message-header-decode.c
--- a/src/lib-mail/message-header-decode.c Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/message-header-decode.c Sun Jul 26 16:32:22 2009 -0400
@@ -36,9 +36,9 @@ message_header_decode_encoded(const unsi
switch (data[start_pos[0]+1]) {
case 'q':
case 'Q':
- quoted_printable_decode(data + start_pos[1] + 1,
- start_pos[2] - start_pos[1] - 1,
- NULL, decodebuf);
+ quoted_printable_q_decode(data + start_pos[1] + 1,
+ start_pos[2] - start_pos[1] - 1,
+ decodebuf);
break;
case 'b':
case 'B':
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/quoted-printable.c
--- a/src/lib-mail/quoted-printable.c Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/quoted-printable.c Sun Jul 26 16:32:22 2009 -0400
@@ -5,8 +5,86 @@
#include "hex-binary.h"
#include "quoted-printable.h"
+#define QP_IS_TRAILING_SPACE(c) \
+ ((c) == ' ' || (c) == '\t')
+
void quoted_printable_decode(const unsigned char *src, size_t src_size,
size_t *src_pos_r, buffer_t *dest)
+{
+ char hexbuf[3];
+ size_t src_pos, pos, next;
+
+ hexbuf[2] = '\0';
+
+ next = 0;
+ for (src_pos = 0; src_pos < src_size; src_pos++) {
+ if (src[src_pos] != '=' && src[src_pos] != '\n')
+ continue;
+
+ if (src[src_pos] == '\n') {
+ /* drop trailing whitespace */
+ pos = src_pos;
+ if (pos > 0 && src[pos-1] == '\r')
+ pos--;
+ while (pos > 0 && QP_IS_TRAILING_SPACE(src[pos-1]))
+ pos--;
+ buffer_append(dest, src + next, pos - next);
+ next = src_pos+1;
+ buffer_append(dest, "\r\n", 2);
+ continue;
+ }
+
+ /* '=' */
+ buffer_append(dest, src + next, src_pos - next);
+ next = src_pos;
+
+ if (src_pos+1 >= src_size)
+ break;
+
+ if (src[src_pos+1] == '\n') {
+ /* =\n -> skip both */
+ src_pos += 2;
+ next += 2;
+ continue;
+ }
+
+ if (src_pos+2 >= src_size)
+ break;
+
+ if (src[src_pos+1] == '\r' && src[src_pos+2] == '\n') {
+ /* =\r\n -> skip both */
+ src_pos += 3;
+ next += 3;
+ continue;
+ }
+
+ /* =<hex> */
+ hexbuf[0] = src[src_pos+1];
+ hexbuf[1] = src[src_pos+2];
+
+ if (hex_to_binary(hexbuf, dest) == 0) {
+ src_pos += 2;
+ next = src_pos + 1;
+ } else {
+ /* non-hex data, show as-is */
+ next = src_pos;
+ }
+ }
+ if (src_pos == src_size) {
+ /* add everything but trailing spaces */
+ if (src_pos > 0 && src[src_pos-1] == '\r')
+ src_pos--;
+ while (src_pos > 0 && QP_IS_TRAILING_SPACE(src[src_pos-1]))
+ src_pos--;
+ buffer_append(dest, src + next, src_pos - next);
+ next = src_pos;
+ }
+
+ *src_pos_r = next;
+}
+
+void quoted_printable_q_decode(const unsigned char *src, size_t src_size,
+ buffer_t *dest)
{
char hexbuf[3];
size_t src_pos, next;
@@ -19,31 +97,16 @@ void quoted_printable_decode(const unsig
continue;
buffer_append(dest, src + next, src_pos - next);
- next = src_pos+1;
+ next = src_pos;
if (src[src_pos] == '_') {
buffer_append_c(dest, ' ');
- continue;
- }
-
- if (src_pos+1 >= src_size)
- break;
-
- if (src[src_pos+1] == '\n') {
- /* =\n -> skip both */
- src_pos++;
+ next++;
continue;
}
if (src_pos+2 >= src_size)
break;
-
- if (src[src_pos+1] == '\r' && src[src_pos+2] == '\n') {
- /* =\r\n -> skip both */
- src_pos += 2;
- next++;
- continue;
- }
/* =<hex> */
hexbuf[0] = src[src_pos+1];
@@ -57,9 +120,5 @@ void quoted_printable_decode(const unsig
next = src_pos;
}
}
-
buffer_append(dest, src + next, src_size - next);
-
- if (src_pos_r != NULL)
- *src_pos_r = src_pos;
}
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/quoted-printable.h
--- a/src/lib-mail/quoted-printable.h Sun Jul 26 16:06:20 2009 -0400
+++ b/src/lib-mail/quoted-printable.h Sun Jul 26 16:32:22 2009 -0400
@@ -5,9 +5,11 @@
size of src, and may be same as src. Decoding errors are ignored.
This function may be called multiple times for parsing the same stream.
- If src_pos is non-NULL, it's updated to first non-translated character in
- src. */
+ src_pos is updated to first non-translated character in src. */
void quoted_printable_decode(const unsigned char *src, size_t src_size,
size_t *src_pos_r, buffer_t *dest);
+/* Decode MIME "Q" encoding. */
+void quoted_printable_q_decode(const unsigned char *src, size_t src_size,
+ buffer_t *dest);
#endif
diff -r 1780d56a9160 -r 2dce50fc0d9f src/lib-mail/test-quoted-printable.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/test-quoted-printable.c Sun Jul 26 16:32:22 2009 -0400
@@ -0,0 +1,79 @@
+/* Copyright (c) 2007-2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "str.h"
+#include "quoted-printable.h"
+#include "test-common.h"
+
+static void test_quoted_printable_decode(void)
+{
+ const char *data[] = {
+ "foo \r\nbar=", "foo\r\nbar",
+ "foo =\nbar", "foo bar",
+ "foo =\r\nbar", "foo bar",
+ "foo \nbar=", "foo\r\nbar",
+ "=0A=0D ", "\n\r",
+ "foo_bar", "foo_bar",
+ "foo=", "foo",
+ "foo=A", "foo",
+ "foo=Ax", "foo=Ax",
+ "foo=Ax=xy", "foo=Ax=xy"
+ };
+ buffer_t *buf;
+ unsigned int i, start, end, len;
+ size_t src_pos;
+
+ test_begin("quoted printable decode");
+ buf = buffer_create_dynamic(pool_datastack_create(), 128);
+ for (i = 0; i < N_ELEMENTS(data); i += 2) {
+ len = strlen(data[i]);
+ for (start = 0, end = 1; end <= len; ) {
+ quoted_printable_decode(CONST_PTR_OFFSET(data[i], start),
+ end - start, &src_pos, buf);
+ src_pos += start;
+ start = src_pos;
+ if (src_pos <= end)
+ end++;
+ else
+ end = src_pos + 1;
+ }
+ test_assert(strcmp(data[i+1], str_c(buf)) == 0);
+ buffer_set_used_size(buf, 0);
+ }
+ test_end();
+}
+
+static void test_quoted_printable_q_decode(void)
+{
+ const char *data[] = {
+ "=0A=0D ", "\n\r ",
+ "__foo__bar__", " foo bar ",
+ "foo=", "foo=",
+ "foo=A", "foo=A",
+ "foo=Ax", "foo=Ax",
+ "foo=Ax=xy", "foo=Ax=xy"
+ };
+ buffer_t *buf;
+ unsigned int i;
+
+ test_begin("quoted printable q decode");
+ buf = buffer_create_dynamic(pool_datastack_create(), 128);
+ for (i = 0; i < N_ELEMENTS(data); i += 2) {
+ quoted_printable_q_decode((const void *)data[i], strlen(data[i]),
+ buf);
+ test_assert(strcmp(data[i+1], str_c(buf)) == 0);
+ buffer_set_used_size(buf, 0);
+ }
+ test_end();
+}
+
+int main(void)
+{
+ static void (*test_functions[])(void) = {
+ test_quoted_printable_decode,
+ test_quoted_printable_q_decode,
+ NULL
+ };
+ return test_run(test_functions);
+}
More information about the dovecot-cvs
mailing list