move grow_symbol function into unicode.h
[cparser] / unicode.h
1 #ifndef UNICODE_H
2 #define UNICODE_H
3
4 #include <assert.h>
5 #include "adt/obst.h"
6
7 typedef unsigned int utf32;
8 #define UTF32_PRINTF_FORMAT "%u"
9
10 /**
11  * "parse" an utf8 character from a string.
12  * Warning: This function only works for valid utf-8 inputs. The behaviour
13  * is undefined for invalid utf-8 input.
14  *
15  * @param p    A pointer to a pointer into the string. The pointer
16  *             is incremented for each consumed char
17  */
18 static inline utf32 read_utf8_char(const char **p)
19 {
20         const unsigned char *c      = (const unsigned char *) *p;
21         utf32                result;
22
23         if ((*c & 0x80) == 0) {
24                 /* 1 character encoding: 0b0??????? */
25                 result = *c++;
26         } else if ((*c & 0xE0) == 0xC0) {
27                 /* 2 character encoding: 0b110?????, 0b10?????? */
28                 result = *c++ & 0x1F;
29                 result = (result << 6) | (*c++ & 0x3F);
30         } else if ((*c & 0xF0) == 0xE0) {
31                 /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
32                 result = *c++ & 0x0F;
33                 result = (result << 6) | (*c++ & 0x3F);
34                 result = (result << 6) | (*c++ & 0x3F);
35         } else {
36                 /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
37                 assert((*c & 0xF8) == 0xF0);
38                 result = *c++ & 0x07;
39                 result = (result << 6) | (*c++ & 0x3F);
40                 result = (result << 6) | (*c++ & 0x3F);
41                 result = (result << 6) | (*c++ & 0x3F);
42         }
43
44         *p = (const char*) c;
45         return result;
46 }
47
48 static inline void obstack_grow_symbol(struct obstack *obstack, utf32 const tc)
49 {
50         if (tc < 0x80U) {
51                 obstack_1grow(obstack, tc);
52         } else if (tc < 0x800) {
53                 obstack_1grow(obstack, 0xC0 | (tc >> 6));
54                 obstack_1grow(obstack, 0x80 | (tc & 0x3F));
55         } else if (tc < 0x10000) {
56                 obstack_1grow(obstack, 0xE0 | ( tc >> 12));
57                 obstack_1grow(obstack, 0x80 | ((tc >>  6) & 0x3F));
58                 obstack_1grow(obstack, 0x80 | ( tc        & 0x3F));
59         } else {
60                 obstack_1grow(obstack, 0xF0 | ( tc >> 18));
61                 obstack_1grow(obstack, 0x80 | ((tc >> 12) & 0x3F));
62                 obstack_1grow(obstack, 0x80 | ((tc >>  6) & 0x3F));
63                 obstack_1grow(obstack, 0x80 | ( tc        & 0x3F));
64         }
65 }
66
67 #endif