dovecot-2.2: lib: Various fixes to str_sanitize*()

dovecot at dovecot.org dovecot at dovecot.org
Fri Jan 30 09:54:57 UTC 2015


details:   http://hg.dovecot.org/dovecot-2.2/rev/ee12ce691bd3
changeset: 18206:ee12ce691bd3
user:      Timo Sirainen <tss at iki.fi>
date:      Fri Jan 30 11:52:33 2015 +0200
description:
lib: Various fixes to str_sanitize*()
 - UTF-8 sequences could have been truncated to become partial sequences
 - Truncation may not have happened at all to text containing UTF-8
 - str_sanitize_append() might have truncated string beyond what we were
   appending, although that happened only if max_len was very small

diffstat:

 src/lib/str-sanitize.c      |  43 +++++++++++++++++++++++++++++++++++--------
 src/lib/test-str-sanitize.c |  32 ++++++++++++++++++++++++++++++--
 2 files changed, 65 insertions(+), 10 deletions(-)

diffs (143 lines):

diff -r 58466077dff9 -r ee12ce691bd3 src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c	Fri Jan 30 03:12:37 2015 +0200
+++ b/src/lib/str-sanitize.c	Fri Jan 30 11:52:33 2015 +0200
@@ -11,26 +11,49 @@
 	unichar_t chr;
 	size_t i;
 
-	for (i = 0; i < max_len; ) {
+	for (i = 0; src[i] != '\0'; ) {
 		len = uni_utf8_char_bytes(src[i]);
-		if (uni_utf8_get_char(src+i, &chr) <= 0)
+		if (i + len > max_len || uni_utf8_get_char(src+i, &chr) <= 0)
 			break;
 		if ((unsigned char)src[i] < 32)
 			break;
 		i += len;
 	}
+	i_assert(i <= max_len);
 	return i;
 }
 
+static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
+{
+	const unsigned char *data = str_data(dest);
+	unsigned int len = str_len(dest);
+
+	if (len == initial_pos)
+		return;
+	if ((data[len-1] & 0x80) == 0) {
+		str_truncate(dest, len-1);
+		return;
+	}
+	/* truncate UTF-8 sequence. */
+	while (len > 0 && (data[len-1] & 0xc0) == 0x80)
+		len--;
+	if (len > 0 && (data[len-1] & 0xc0) == 0xc0)
+		len--;
+	if (len >= initial_pos)
+		str_truncate(dest, len);
+}
+
 void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
 {
-	unsigned int len;
+	unsigned int len, initial_pos = str_len(dest);
 	unichar_t chr;
 	size_t i;
 	int ret;
 
-	for (i = 0; i < max_len && src[i] != '\0'; ) {
+	for (i = 0; src[i] != '\0'; ) {
 		len = uni_utf8_char_bytes(src[i]);
+		if (i + len > max_len)
+			break;
 		ret = uni_utf8_get_char(src+i, &chr);
 		if (ret <= 0) {
 			/* invalid UTF-8 */
@@ -45,12 +68,17 @@
 		if ((unsigned char)src[i] < 32)
 			str_append_c(dest, '?');
 		else
-			str_append_c(dest, src[i]);
+			str_append_n(dest, src+i, len);
 		i += len;
 	}
 
 	if (src[i] != '\0') {
-		str_truncate(dest, str_len(dest) <= 3 ? 0 : str_len(dest)-3);
+		if (max_len < 3)
+			str_truncate(dest, initial_pos);
+		else {
+			while (str_len(dest) - initial_pos > max_len-3)
+				str_sanitize_truncate_char(dest, initial_pos);
+		}
 		str_append(dest, "...");
 	}
 }
@@ -68,7 +96,6 @@
 		return src;
 
 	str = t_str_new(I_MIN(max_len, 256));
-	str_append_n(str, src, i);
-	str_sanitize_append(str, src + i, max_len - i);
+	str_sanitize_append(str, src, max_len);
 	return str_c(str);
 }
diff -r 58466077dff9 -r ee12ce691bd3 src/lib/test-str-sanitize.c
--- a/src/lib/test-str-sanitize.c	Fri Jan 30 03:12:37 2015 +0200
+++ b/src/lib/test-str-sanitize.c	Fri Jan 30 11:52:33 2015 +0200
@@ -1,6 +1,7 @@
 /* Copyright (c) 2007-2015 Dovecot authors, see the included COPYING file */
 
 #include "test-lib.h"
+#include "str.h"
 #include "str-sanitize.h"
 
 struct str_sanitize_test {
@@ -19,11 +20,21 @@
 		{ "abc",   2, "..." },
 		{ "abcd",  3, "..." },
 		{ "abcde", 4, "a..." },
-		{ "с",    10, NULL },
-		{ "с",     1, NULL },
+		{ "\xD1\x81",     1, "..." },
+		{ "\xD1\x81",     2, "\xD1\x81" },
+		{ "\xD1\x81",     3, NULL },
+		{ "\xC3\xA4\xC3\xA4zyxa", 1, "..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 2, "..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 3, "..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 4, "..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 5, "\xC3\xA4..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 6, "\xC3\xA4..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 7, "\xC3\xA4\xC3\xA4..." },
+		{ "\xC3\xA4\xC3\xA4zyxa", 8, "\xC3\xA4\xC3\xA4zyxa" },
 		{ "\001x\x1fy\x81", 10, "?x?y?" }
 	};
 	const char *str;
+	string_t *str2;
 	unsigned int i;
 
 	test_begin("str_sanitize");
@@ -35,4 +46,21 @@
 			test_assert_idx(str == tests[i].str, i);
 	}
 	test_end();
+
+	test_begin("str_sanitize_append");
+	str2 = t_str_new(128);
+	for (i = 0; i < N_ELEMENTS(tests); i++) {
+		if (tests[i].str == NULL)
+			continue;
+		str_truncate(str2, 0);
+		str_append(str2, "1234567890");
+		str_sanitize_append(str2, tests[i].str, tests[i].max_len);
+
+		test_assert_idx(strncmp(str_c(str2), "1234567890", 10) == 0, i);
+		if (tests[i].sanitized != NULL)
+			test_assert_idx(strcmp(str_c(str2)+10, tests[i].sanitized) == 0, i);
+		else
+			test_assert_idx(strcmp(str_c(str2)+10, tests[i].str) == 0, i);
+	}
+	test_end();
 }


More information about the dovecot-cvs mailing list