dovecot-2.2: lib: Various fixes to str_sanitize*()
dovecot at dovecot.org
dovecot at dovecot.org
Fri Jan 30 09:54:57 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/ee12ce691bd3
changeset: 18206:ee12ce691bd3
user: Timo Sirainen <tss at iki.fi>
date: Fri Jan 30 11:52:33 2015 +0200
description:
lib: Various fixes to str_sanitize*()
- UTF-8 sequences could have been truncated to become partial sequences
- Truncation may not have happened at all to text containing UTF-8
- str_sanitize_append() might have truncated string beyond what we were
appending, although that happened only if max_len was very small
diffstat:
src/lib/str-sanitize.c | 43 +++++++++++++++++++++++++++++++++++--------
src/lib/test-str-sanitize.c | 32 ++++++++++++++++++++++++++++++--
2 files changed, 65 insertions(+), 10 deletions(-)
diffs (143 lines):
diff -r 58466077dff9 -r ee12ce691bd3 src/lib/str-sanitize.c
--- a/src/lib/str-sanitize.c Fri Jan 30 03:12:37 2015 +0200
+++ b/src/lib/str-sanitize.c Fri Jan 30 11:52:33 2015 +0200
@@ -11,26 +11,49 @@
unichar_t chr;
size_t i;
- for (i = 0; i < max_len; ) {
+ for (i = 0; src[i] != '\0'; ) {
len = uni_utf8_char_bytes(src[i]);
- if (uni_utf8_get_char(src+i, &chr) <= 0)
+ if (i + len > max_len || uni_utf8_get_char(src+i, &chr) <= 0)
break;
if ((unsigned char)src[i] < 32)
break;
i += len;
}
+ i_assert(i <= max_len);
return i;
}
+static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
+{
+ const unsigned char *data = str_data(dest);
+ unsigned int len = str_len(dest);
+
+ if (len == initial_pos)
+ return;
+ if ((data[len-1] & 0x80) == 0) {
+ str_truncate(dest, len-1);
+ return;
+ }
+ /* truncate UTF-8 sequence. */
+ while (len > 0 && (data[len-1] & 0xc0) == 0x80)
+ len--;
+ if (len > 0 && (data[len-1] & 0xc0) == 0xc0)
+ len--;
+ if (len >= initial_pos)
+ str_truncate(dest, len);
+}
+
void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
{
- unsigned int len;
+ unsigned int len, initial_pos = str_len(dest);
unichar_t chr;
size_t i;
int ret;
- for (i = 0; i < max_len && src[i] != '\0'; ) {
+ for (i = 0; src[i] != '\0'; ) {
len = uni_utf8_char_bytes(src[i]);
+ if (i + len > max_len)
+ break;
ret = uni_utf8_get_char(src+i, &chr);
if (ret <= 0) {
/* invalid UTF-8 */
@@ -45,12 +68,17 @@
if ((unsigned char)src[i] < 32)
str_append_c(dest, '?');
else
- str_append_c(dest, src[i]);
+ str_append_n(dest, src+i, len);
i += len;
}
if (src[i] != '\0') {
- str_truncate(dest, str_len(dest) <= 3 ? 0 : str_len(dest)-3);
+ if (max_len < 3)
+ str_truncate(dest, initial_pos);
+ else {
+ while (str_len(dest) - initial_pos > max_len-3)
+ str_sanitize_truncate_char(dest, initial_pos);
+ }
str_append(dest, "...");
}
}
@@ -68,7 +96,6 @@
return src;
str = t_str_new(I_MIN(max_len, 256));
- str_append_n(str, src, i);
- str_sanitize_append(str, src + i, max_len - i);
+ str_sanitize_append(str, src, max_len);
return str_c(str);
}
diff -r 58466077dff9 -r ee12ce691bd3 src/lib/test-str-sanitize.c
--- a/src/lib/test-str-sanitize.c Fri Jan 30 03:12:37 2015 +0200
+++ b/src/lib/test-str-sanitize.c Fri Jan 30 11:52:33 2015 +0200
@@ -1,6 +1,7 @@
/* Copyright (c) 2007-2015 Dovecot authors, see the included COPYING file */
#include "test-lib.h"
+#include "str.h"
#include "str-sanitize.h"
struct str_sanitize_test {
@@ -19,11 +20,21 @@
{ "abc", 2, "..." },
{ "abcd", 3, "..." },
{ "abcde", 4, "a..." },
- { "с", 10, NULL },
- { "с", 1, NULL },
+ { "\xD1\x81", 1, "..." },
+ { "\xD1\x81", 2, "\xD1\x81" },
+ { "\xD1\x81", 3, NULL },
+ { "\xC3\xA4\xC3\xA4zyxa", 1, "..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 2, "..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 3, "..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 4, "..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 5, "\xC3\xA4..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 6, "\xC3\xA4..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 7, "\xC3\xA4\xC3\xA4..." },
+ { "\xC3\xA4\xC3\xA4zyxa", 8, "\xC3\xA4\xC3\xA4zyxa" },
{ "\001x\x1fy\x81", 10, "?x?y?" }
};
const char *str;
+ string_t *str2;
unsigned int i;
test_begin("str_sanitize");
@@ -35,4 +46,21 @@
test_assert_idx(str == tests[i].str, i);
}
test_end();
+
+ test_begin("str_sanitize_append");
+ str2 = t_str_new(128);
+ for (i = 0; i < N_ELEMENTS(tests); i++) {
+ if (tests[i].str == NULL)
+ continue;
+ str_truncate(str2, 0);
+ str_append(str2, "1234567890");
+ str_sanitize_append(str2, tests[i].str, tests[i].max_len);
+
+ test_assert_idx(strncmp(str_c(str2), "1234567890", 10) == 0, i);
+ if (tests[i].sanitized != NULL)
+ test_assert_idx(strcmp(str_c(str2)+10, tests[i].sanitized) == 0, i);
+ else
+ test_assert_idx(strcmp(str_c(str2)+10, tests[i].str) == 0, i);
+ }
+ test_end();
}
More information about the dovecot-cvs
mailing list