2 * This file is part of cparser.
3 * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 typedef struct string_t {
27 const char *begin; /**< UTF-8 encoded string, the last character is
28 * guaranteed to be 0 */
29 size_t size; /**< size of string in bytes (not characters) */
32 typedef unsigned int utf32;
33 #define UTF32_PRINTF_FORMAT "%u"
36 * "parse" an utf8 character from a string.
37 * Warning: This function only works for valid utf-8 inputs. The behaviour
38 * is undefined for invalid utf-8 input.
40 * @param p A pointer to a pointer into the string. The pointer
41 * is incremented for each consumed char
43 static inline utf32 read_utf8_char(const char **p)
45 const unsigned char *c = (const unsigned char *) *p;
48 if ((*c & 0x80) == 0) {
49 /* 1 character encoding: 0b0??????? */
51 } else if ((*c & 0xE0) == 0xC0) {
52 /* 2 character encoding: 0b110?????, 0b10?????? */
54 result = (result << 6) | (*c++ & 0x3F);
55 } else if ((*c & 0xF0) == 0xE0) {
56 /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
58 result = (result << 6) | (*c++ & 0x3F);
59 result = (result << 6) | (*c++ & 0x3F);
61 /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
62 assert((*c & 0xF8) == 0xF0);
64 result = (result << 6) | (*c++ & 0x3F);
65 result = (result << 6) | (*c++ & 0x3F);
66 result = (result << 6) | (*c++ & 0x3F);
73 static inline size_t wstrlen(const string_t *string)
76 const char *p = string->begin;
77 const char *end = p + string->size;