7 typedef unsigned int utf32;
8 #define UTF32_PRINTF_FORMAT "%u"
11 * "parse" an utf8 character from a string.
12 * Warning: This function only works for valid utf-8 inputs. The behaviour
13 * is undefined for invalid utf-8 input.
15 * @param p A pointer to a pointer into the string. The pointer
16 * is incremented for each consumed char
18 static inline utf32 read_utf8_char(const char **p)
20 const unsigned char *c = (const unsigned char *) *p;
23 if ((*c & 0x80) == 0) {
24 /* 1 character encoding: 0b0??????? */
26 } else if ((*c & 0xE0) == 0xC0) {
27 /* 2 character encoding: 0b110?????, 0b10?????? */
29 result = (result << 6) | (*c++ & 0x3F);
30 } else if ((*c & 0xF0) == 0xE0) {
31 /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
33 result = (result << 6) | (*c++ & 0x3F);
34 result = (result << 6) | (*c++ & 0x3F);
36 /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
37 assert((*c & 0xF8) == 0xF0);
39 result = (result << 6) | (*c++ & 0x3F);
40 result = (result << 6) | (*c++ & 0x3F);
41 result = (result << 6) | (*c++ & 0x3F);
48 static inline void obstack_grow_symbol(struct obstack *obstack, utf32 const tc)
51 obstack_1grow(obstack, tc);
52 } else if (tc < 0x800) {
53 obstack_1grow(obstack, 0xC0 | (tc >> 6));
54 obstack_1grow(obstack, 0x80 | (tc & 0x3F));
55 } else if (tc < 0x10000) {
56 obstack_1grow(obstack, 0xE0 | ( tc >> 12));
57 obstack_1grow(obstack, 0x80 | ((tc >> 6) & 0x3F));
58 obstack_1grow(obstack, 0x80 | ( tc & 0x3F));
60 obstack_1grow(obstack, 0xF0 | ( tc >> 18));
61 obstack_1grow(obstack, 0x80 | ((tc >> 12) & 0x3F));
62 obstack_1grow(obstack, 0x80 | ((tc >> 6) & 0x3F));
63 obstack_1grow(obstack, 0x80 | ( tc & 0x3F));