dovecot-2.0: UTF-8 string validity was still checked incorrectly.

dovecot at dovecot.org dovecot at dovecot.org
Thu Aug 19 20:06:27 EEST 2010


details:   http://hg.dovecot.org/dovecot-2.0/rev/a83963495e55
changeset: 12010:a83963495e55
user:      Timo Sirainen <tss at iki.fi>
date:      Thu Aug 19 18:06:22 2010 +0100
description:
UTF-8 string validity was still checked incorrectly.

diffstat:

 src/lib/unichar.c |  5 ++---
 src/lib/unichar.h |  5 +++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diffs (37 lines):

diff -r 706f30fa4028 -r a83963495e55 src/lib/unichar.c
--- a/src/lib/unichar.c	Wed Aug 18 16:24:13 2010 +0100
+++ b/src/lib/unichar.c	Thu Aug 19 18:06:22 2010 +0100
@@ -316,7 +316,7 @@
 static inline unsigned int
 is_valid_utf8_seq(const unsigned char *input, unsigned int size)
 {
-	size_t i, len;
+	unsigned int i, len;
 
 	len = uni_utf8_char_bytes(input[0]);
 	if (unlikely(len > size || len == 1))
@@ -325,8 +325,7 @@
 	/* the rest of the chars should be in 0x80..0xbf range.
 	   anything else is start of a sequence or invalid */
 	for (i = 1; i < len; i++) {
-		if (unlikely(uni_utf8_char_bytes(input[i]) != len-i ||
-			     input[i] < 0x80 || input[i] >= 0xbf))
+		if (unlikely(input[i] < 0x80 || input[i] > 0xbf))
 			return 0;
 	}
 	return len;
diff -r 706f30fa4028 -r a83963495e55 src/lib/unichar.h
--- a/src/lib/unichar.h	Wed Aug 18 16:24:13 2010 +0100
+++ b/src/lib/unichar.h	Thu Aug 19 18:06:22 2010 +0100
@@ -43,8 +43,9 @@
 /* Returns UTF-8 string length with maximum input size. */
 unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE;
 
-/* Returns the number of bytes belonging to this partial UTF-8 character.
-   Invalid input is returned with length 1. */
+/* Returns the number of bytes belonging to this UTF-8 character. The given
+   parameter is the first byte of the UTF-8 sequence. Invalid input is
+   returned with length 1. */
 static inline unsigned int ATTR_CONST
 uni_utf8_char_bytes(char chr)
 {


More information about the dovecot-cvs mailing list