6 typedef unsigned int utf32;
7 #define UTF32_PRINTF_FORMAT "%u"
10 * "parse" an utf8 character from a string.
11 * Warning: This function only works for valid utf-8 inputs. The behaviour
12 * is undefined for invalid utf-8 input.
14 * @param p A pointer to a pointer into the string. The pointer
15 * is incremented for each consumed char
17 static inline utf32 read_utf8_char(const char **p)
19 const unsigned char *c = (const unsigned char *) *p;
22 if ((*c & 0x80) == 0) {
23 /* 1 character encoding: 0b0??????? */
25 } else if ((*c & 0xE0) == 0xC0) {
26 /* 2 character encoding: 0b110?????, 0b10?????? */
28 result = (result << 6) | (*c++ & 0x3F);
29 } else if ((*c & 0xF0) == 0xE0) {
30 /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
32 result = (result << 6) | (*c++ & 0x3F);
33 result = (result << 6) | (*c++ & 0x3F);
35 /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
36 assert((*c & 0xF8) == 0xF0);
38 result = (result << 6) | (*c++ & 0x3F);
39 result = (result << 6) | (*c++ & 0x3F);
40 result = (result << 6) | (*c++ & 0x3F);