dovecot-2.2: lib: Added uni_utf8_partial_strlen_n()
dovecot at dovecot.org
dovecot at dovecot.org
Sat Jan 10 02:31:45 UTC 2015
details: http://hg.dovecot.org/dovecot-2.2/rev/f191dbcaec5f
changeset: 18145:f191dbcaec5f
user: Timo Sirainen <tss at iki.fi>
date: Sat Jan 10 04:30:40 2015 +0200
description:
lib: Added uni_utf8_partial_strlen_n()
diffstat:
src/lib/test-unichar.c | 16 ++++++++++++++++
src/lib/unichar.c | 18 ++++++++++++++----
src/lib/unichar.h | 6 ++++++
3 files changed, 36 insertions(+), 4 deletions(-)
diffs (83 lines):
diff -r 7459c0891a85 -r f191dbcaec5f src/lib/test-unichar.c
--- a/src/lib/test-unichar.c Sat Jan 10 04:25:21 2015 +0200
+++ b/src/lib/test-unichar.c Sat Jan 10 04:30:40 2015 +0200
@@ -5,6 +5,20 @@
#include "buffer.h"
#include "unichar.h"
+static void test_unichar_uni_utf8_partial_strlen_n(void)
+{
+ static const char input[] = "\xC3\xA4\xC3\xA4";
+ size_t pos;
+
+ test_begin("uni_utf8_partial_strlen_n()");
+ test_assert(uni_utf8_partial_strlen_n(input, 1, &pos) == 0 && pos == 0);
+ test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2);
+ test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2);
+ test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4);
+ test_assert(uni_utf8_partial_strlen_n(input, (size_t)-1, &pos) == 2 && pos == 4);
+ test_end();
+}
+
void test_unichar(void)
{
static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
@@ -32,4 +46,6 @@
test_assert(!uni_utf8_str_is_valid(overlong_utf8));
test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
test_end();
+
+ test_unichar_uni_utf8_partial_strlen_n();
}
diff -r 7459c0891a85 -r f191dbcaec5f src/lib/unichar.c
--- a/src/lib/unichar.c Sat Jan 10 04:25:21 2015 +0200
+++ b/src/lib/unichar.c Sat Jan 10 04:30:40 2015 +0200
@@ -192,18 +192,28 @@
return uni_utf8_strlen_n(input, (size_t)-1);
}
-unsigned int uni_utf8_strlen_n(const void *_input, size_t size)
+unsigned int uni_utf8_strlen_n(const void *input, size_t size)
+{
+ size_t partial_pos;
+
+ return uni_utf8_partial_strlen_n(input, size, &partial_pos);
+}
+
+unsigned int uni_utf8_partial_strlen_n(const void *_input, size_t size,
+ size_t *partial_pos_r)
{
const unsigned char *input = _input;
- unsigned int len = 0;
+ unsigned int count, len = 0;
size_t i;
for (i = 0; i < size && input[i] != '\0'; ) {
- i += uni_utf8_char_bytes(input[i]);
- if (i > size)
+ count = uni_utf8_char_bytes(input[i]);
+ if (i + count > size)
break;
+ i += count;
len++;
}
+ *partial_pos_r = i;
return len;
}
diff -r 7459c0891a85 -r f191dbcaec5f src/lib/unichar.h
--- a/src/lib/unichar.h Sat Jan 10 04:25:21 2015 +0200
+++ b/src/lib/unichar.h Sat Jan 10 04:30:40 2015 +0200
@@ -55,6 +55,12 @@
unsigned int uni_utf8_strlen(const char *input) ATTR_PURE;
/* Returns UTF-8 string length with maximum input size. */
unsigned int uni_utf8_strlen_n(const void *input, size_t size) ATTR_PURE;
+/* Same as uni_utf8_strlen_n(), but if input ends with a partial UTF-8
+ character, don't include it in the return value and set partial_pos_r to
+ where the character begins. Otherwise partial_pos_r is set to the end
+ of the input. */
+unsigned int uni_utf8_partial_strlen_n(const void *input, size_t size,
+ size_t *partial_pos_r);
/* Returns the number of bytes belonging to this UTF-8 character. The given
parameter is the first byte of the UTF-8 sequence. Invalid input is
More information about the dovecot-cvs
mailing list