dovecot-2.2: lib-charset: UTF-8 -> UTF-8 translation was never r...
dovecot at dovecot.org
dovecot at dovecot.org
Sat Jan 10 02:27:04 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/7459c0891a85
changeset: 18144:7459c0891a85
user: Timo Sirainen <tss at iki.fi>
date: Sat Jan 10 04:25:21 2015 +0200
description:
lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Instead the incomplete input was just being modified into broken output.
diffstat:
src/lib-charset/charset-iconv.c | 26 +++++---------------------
src/lib-charset/charset-utf8.c | 26 +++++++++++++++++++++-----
src/lib-charset/charset-utf8.h | 5 +++++
3 files changed, 31 insertions(+), 26 deletions(-)
diffs (107 lines):
diff -r 55184e2a689f -r 7459c0891a85 src/lib-charset/charset-iconv.c
--- a/src/lib-charset/charset-iconv.c Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-iconv.c Sat Jan 10 04:25:21 2015 +0200
@@ -53,20 +53,6 @@
(void)iconv(t->cd, NULL, NULL, NULL, NULL);
}
-static int
-charset_append_utf8(struct charset_translation *t,
- const void *src, size_t src_size, buffer_t *dest)
-{
- if (t->normalizer != NULL)
- return t->normalizer(src, src_size, dest);
- else if (!uni_utf8_get_valid_data(src, src_size, dest))
- return -1;
- else {
- buffer_append(dest, src, src_size);
- return 0;
- }
-}
-
static bool
charset_to_utf8_try(struct charset_translation *t,
const unsigned char *src, size_t *src_size, buffer_t *dest,
@@ -74,15 +60,12 @@
{
ICONV_CONST char *ic_srcbuf;
char tmpbuf[8192], *ic_destbuf;
- size_t srcleft, destleft;
+ size_t srcleft, destleft, tmpbuf_used;
bool ret = TRUE;
if (t->cd == (iconv_t)-1) {
/* input is already supposed to be UTF-8 */
- if (charset_append_utf8(t, src, *src_size, dest) < 0)
- *result = CHARSET_RET_INVALID_INPUT;
- else
- *result = CHARSET_RET_OK;
+ *result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
return TRUE;
}
destleft = sizeof(tmpbuf);
@@ -109,8 +92,9 @@
/* we just converted data to UTF-8. it shouldn't be invalid, but
Solaris iconv appears to pass invalid data through sometimes
(e.g. 8 bit characters with UTF-7) */
- if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft,
- dest) < 0)
+ tmpbuf_used = sizeof(tmpbuf) - destleft;
+ if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf,
+ &tmpbuf_used, dest) != CHARSET_RET_OK)
*result = CHARSET_RET_INVALID_INPUT;
return ret;
}
diff -r 55184e2a689f -r 7459c0891a85 src/lib-charset/charset-utf8.c
--- a/src/lib-charset/charset-utf8.c Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-utf8.c Sat Jan 10 04:25:21 2015 +0200
@@ -70,15 +70,31 @@
charset_to_utf8(struct charset_translation *t,
const unsigned char *src, size_t *src_size, buffer_t *dest)
{
- if (t->normalizer != NULL) {
- if (t->normalizer(src, *src_size, dest) < 0)
+ return charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
+}
+
+#endif
+
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+ const unsigned char *src, size_t *src_size, buffer_t *dest)
+{
+ enum charset_result res = CHARSET_RET_OK;
+ size_t pos;
+
+ uni_utf8_partial_strlen_n(src, *src_size, &pos);
+ if (pos < *src_size) {
+ *src_size = pos;
+ res = CHARSET_RET_INCOMPLETE_INPUT;
+ }
+
+ if (normalizer != NULL) {
+ if (normalizer(src, *src_size, dest) < 0)
return CHARSET_RET_INVALID_INPUT;
} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
return CHARSET_RET_INVALID_INPUT;
} else {
buffer_append(dest, src, *src_size);
}
- return CHARSET_RET_OK;
+ return res;
}
-
-#endif
diff -r 55184e2a689f -r 7459c0891a85 src/lib-charset/charset-utf8.h
--- a/src/lib-charset/charset-utf8.h Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-utf8.h Sat Jan 10 04:25:21 2015 +0200
@@ -32,4 +32,9 @@
const char *input, string_t *output,
enum charset_result *result_r) ATTR_NULL(2);
+/* INTERNAL: */
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+ const unsigned char *src, size_t *src_size, buffer_t *dest);
+
#endif
More information about the dovecot-cvs
mailing list