2 * This file is part of cparser.
3 * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
11 typedef unsigned int utf32;
12 #define UTF32_PRINTF_FORMAT "%u"
15 * "parse" an utf8 character from a string.
16 * Warning: This function only works for valid utf-8 inputs. The behaviour
17 * is undefined for invalid utf-8 input.
19 * @param p A pointer to a pointer into the string. The pointer
20 * is incremented for each consumed char
22 static inline utf32 read_utf8_char(const char **p)
24 const unsigned char *c = (const unsigned char *) *p;
27 if ((*c & 0x80) == 0) {
28 /* 1 character encoding: 0b0??????? */
30 } else if ((*c & 0xE0) == 0xC0) {
31 /* 2 character encoding: 0b110?????, 0b10?????? */
33 result = (result << 6) | (*c++ & 0x3F);
34 } else if ((*c & 0xF0) == 0xE0) {
35 /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
37 result = (result << 6) | (*c++ & 0x3F);
38 result = (result << 6) | (*c++ & 0x3F);
40 /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
41 assert((*c & 0xF8) == 0xF0);
43 result = (result << 6) | (*c++ & 0x3F);
44 result = (result << 6) | (*c++ & 0x3F);
45 result = (result << 6) | (*c++ & 0x3F);
52 static inline void obstack_grow_utf8(struct obstack *const obst, utf32 const c)
55 obstack_1grow(obst, c);
56 } else if (c < 0x800) {
57 obstack_1grow(obst, 0xC0 | (c >> 6));
59 } else if (c < 0x10000) {
60 obstack_1grow(obst, 0xE0 | (c >> 12));
63 obstack_1grow(obst, 0xF0 | (c >> 18));
64 obstack_1grow(obst, 0x80 | ((c >> 12) & 0x3F));
66 obstack_1grow(obst, 0x80 | ((c >> 6) & 0x3F));
68 obstack_1grow(obst, 0x80 | ( c & 0x3F));