dovecot-2.2: lib-charset: Added CHARSET_MAX_PENDING_BUF_SIZE mac...

Wed Jan 14 23:23:25 UTC 2015

details:   http://hg.dovecot.org/dovecot-2.2/rev/3d9ec121dc81
changeset: 18150:3d9ec121dc81
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Jan 15 01:05:36 2015 +0200
description:
lib-charset: Added CHARSET_MAX_PENDING_BUF_SIZE macro and asserts for it.

diffstat:

 src/lib-charset/charset-iconv.c |   1 +
 src/lib-charset/charset-utf8.c  |   1 +
 src/lib-charset/charset-utf8.h  |  12 +++++++++++-
 3 files changed, 13 insertions(+), 1 deletions(-)

diffs (51 lines):

diff -r 0e74934072e0 -r 3d9ec121dc81 src/lib-charset/charset-iconv.c

--- a/src/lib-charset/charset-iconv.c	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-iconv.c	Thu Jan 15 01:05:36 2015 +0200
@@ -129,6 +129,7 @@
 	if (prev_invalid_pos != (size_t)-1)
 		result = CHARSET_RET_INVALID_INPUT;
 
+	i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
 	*src_size = pos;
 	return result;
 }
diff -r 0e74934072e0 -r 3d9ec121dc81 src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-utf8.c	Thu Jan 15 01:05:36 2015 +0200
@@ -94,6 +94,7 @@
 
 	uni_utf8_partial_strlen_n(src, *src_size, &pos);
 	if (pos < *src_size) {
+		i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
 		*src_size = pos;
 		res = CHARSET_RET_INCOMPLETE_INPUT;
 	}
diff -r 0e74934072e0 -r 3d9ec121dc81 src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-utf8.h	Thu Jan 15 01:05:36 2015 +0200
@@ -3,6 +3,11 @@
 
 #include "unichar.h"
 
+/* Max number of bytes that iconv can require for a single character.
+   UTF-8 takes max 6 bytes per character. Not sure about others, but I'd think
+   10 is more than enough for everyone.. */
+#define CHARSET_MAX_PENDING_BUF_SIZE 10
+
 struct charset_translation;
 
 enum charset_result {
@@ -25,7 +30,12 @@
 bool charset_is_utf8(const char *charset) ATTR_PURE;
 
 /* Translate src to UTF-8. src_size is updated to contain the number of
-   characters actually translated from src. */
+   characters actually translated from src. The src_size should never shrink
+   more than CHARSET_MAX_PENDING_BUF_SIZE bytes.
+
+   If src contains invalid input, UNICODE_REPLACEMENT_CHAR is placed in such
+   positions and the invalid input is skipped over. Return value is also
+   CHARSET_RET_INCOMPLETE_INPUT in that case. */
 enum charset_result
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest);