dovecot-2.2: lib-mail: message-decoder no longer skips lib-chars...
dovecot at dovecot.org
dovecot at dovecot.org
Sat Jan 10 02:33:45 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/e3640ccaa76d
changeset: 18146:e3640ccaa76d
user: Timo Sirainen <tss at iki.fi>
date: Sat Jan 10 04:32:42 2015 +0200
description:
lib-mail: message-decoder no longer skips lib-charset for UTF8 -> UTF8 translations.
With the previous lib-charset fix this makes message-decoder handle partial
UTF-8 text in input blocks correctly.
diffstat:
src/lib-mail/Makefile.am | 4 +-
src/lib-mail/message-decoder.c | 20 +--------------
src/lib-mail/test-message-decoder.c | 47 ++++++++++++++----------------------
3 files changed, 22 insertions(+), 49 deletions(-)
diffs (135 lines):
diff -r f191dbcaec5f -r e3640ccaa76d src/lib-mail/Makefile.am
--- a/src/lib-mail/Makefile.am Sat Jan 10 04:30:40 2015 +0200
+++ b/src/lib-mail/Makefile.am Sat Jan 10 04:32:42 2015 +0200
@@ -133,8 +133,8 @@
test_message_date_DEPENDENCIES = $(test_deps)
test_message_decoder_SOURCES = test-message-decoder.c
-test_message_decoder_LDADD = message-decoder.lo rfc822-parser.lo rfc2231-parser.lo $(test_libs)
-test_message_decoder_DEPENDENCIES = $(test_deps)
+test_message_decoder_LDADD = message-decoder.lo quoted-printable.lo rfc822-parser.lo rfc2231-parser.lo ../lib-charset/libcharset.la $(test_libs)
+test_message_decoder_DEPENDENCIES = ../lib-charset/libcharset.la $(test_deps)
test_message_header_decode_SOURCES = test-message-header-decode.c
test_message_header_decode_LDADD = message-header-decode.lo quoted-printable.lo message-header-encode.lo $(test_libs)
diff -r f191dbcaec5f -r e3640ccaa76d src/lib-mail/message-decoder.c
--- a/src/lib-mail/message-decoder.c Sat Jan 10 04:30:40 2015 +0200
+++ b/src/lib-mail/message-decoder.c Sat Jan 10 04:32:42 2015 +0200
@@ -38,7 +38,6 @@
char *content_charset;
enum message_cte message_cte;
- unsigned int charset_utf8:1;
unsigned int binary_input:1;
};
@@ -142,7 +141,6 @@
for (; *results != NULL; results += 2) {
if (strcasecmp(results[0], "charset") == 0) {
ctx->content_charset = i_strdup(results[1]);
- ctx->charset_utf8 = charset_is_utf8(results[1]);
break;
}
}
@@ -243,7 +241,7 @@
(part->flags & (MESSAGE_PART_FLAG_TEXT |
MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0;
- if (ctx->charset_utf8 || ctx->binary_input)
+ if (ctx->binary_input)
return;
if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
@@ -334,21 +332,6 @@
if (ctx->binary_input) {
output->data = data;
output->size = size;
- } else if (ctx->charset_utf8 || ctx->charset_trans == NULL) {
- /* handle unknown charsets the same as UTF-8. it might find
- usable ASCII text. */
- buffer_set_used_size(ctx->buf2, 0);
- if (ctx->normalizer != NULL) {
- (void)ctx->normalizer(data, size, ctx->buf2);
- output->data = ctx->buf2->data;
- output->size = ctx->buf2->used;
- } else if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
- output->data = data;
- output->size = size;
- } else {
- output->data = ctx->buf2->data;
- output->size = ctx->buf2->used;
- }
} else {
buffer_set_used_size(ctx->buf2, 0);
if (ctx->translation_size != 0)
@@ -400,6 +383,5 @@
{
i_free_and_null(ctx->content_charset);
ctx->message_cte = MESSAGE_CTE_78BIT;
- ctx->charset_utf8 = TRUE;
buffer_set_used_size(ctx->encoding_buf, 0);
}
diff -r f191dbcaec5f -r e3640ccaa76d src/lib-mail/test-message-decoder.c
--- a/src/lib-mail/test-message-decoder.c Sat Jan 10 04:30:40 2015 +0200
+++ b/src/lib-mail/test-message-decoder.c Sat Jan 10 04:32:42 2015 +0200
@@ -16,34 +16,6 @@
buffer_append(dest, data, size);
}
-int quoted_printable_decode(const unsigned char *src, size_t src_size,
- size_t *src_pos_r, buffer_t *dest)
-{
- while (src_size > 0 && src[src_size-1] == ' ')
- src_size--;
- buffer_append(dest, src, src_size);
- *src_pos_r = src_size;
- return 0;
-}
-
-int charset_to_utf8_begin(const char *charset ATTR_UNUSED,
- normalizer_func_t *normalizer ATTR_UNUSED,
- struct charset_translation **t_r)
-{
- *t_r = NULL;
- return 0;
-}
-void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED) { }
-bool charset_is_utf8(const char *charset ATTR_UNUSED) { return TRUE; }
-
-enum charset_result
-charset_to_utf8(struct charset_translation *t ATTR_UNUSED,
- const unsigned char *src, size_t *src_size, buffer_t *dest)
-{
- buffer_append(dest, src, *src_size);
- return CHARSET_RET_OK;
-}
-
static void test_message_decoder(void)
{
struct message_decoder_context *ctx;
@@ -86,6 +58,25 @@
test_assert(output.size == 14);
test_assert(memcmp(output.data, " bar", 14) == 0);
+ /* partial text - \xC3\xA4 in quoted-printable. we should get a single
+ UTF-8 letter as result */
+ input.data = (const void *)"="; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"C"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"3"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"=A"; input.size = 2;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 0);
+ input.data = (const void *)"4"; input.size = 1;
+ test_assert(message_decoder_decode_next_block(ctx, &input, &output));
+ test_assert(output.size == 2);
+ test_assert(memcmp(output.data, "\xC3\xA4", 2) == 0);
+
message_decoder_deinit(&ctx);
test_end();
More information about the dovecot-cvs
mailing list