X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;ds=sidebyside;f=string_rep.h;h=f3a1e6b882533142d588a4dd942ecb6648ddff2f;hb=c8dfa04bfc2911fd2e49853003d43a46e0073291;hp=873563b38726c145ba833dc67451b4d4ad5d5ac3;hpb=2beaa4f65961fe297663e1cec9e5632b7f3e1cba;p=cparser diff --git a/string_rep.h b/string_rep.h index 873563b..f3a1e6b 100644 --- a/string_rep.h +++ b/string_rep.h @@ -20,8 +20,8 @@ #ifndef STRING_REP_H #define STRING_REP_H -#include #include +#include "unicode.h" typedef struct string_t { const char *begin; /**< UTF-8 encoded string, the last character is @@ -29,47 +29,6 @@ typedef struct string_t { size_t size; /**< size of string in bytes (not characters) */ } string_t; -typedef unsigned int utf32; -#define UTF32_PRINTF_FORMAT "%u" - -/** - * "parse" an utf8 character from a string. - * Warning: This function only works for valid utf-8 inputs. The behaviour - * is undefined for invalid utf-8 input. - * - * @param p A pointer to a pointer into the string. The pointer - * is incremented for each consumed char - */ -static inline utf32 read_utf8_char(const char **p) -{ - const unsigned char *c = (const unsigned char *) *p; - utf32 result; - - if ((*c & 0x80) == 0) { - /* 1 character encoding: 0b0??????? */ - result = *c++; - } else if ((*c & 0xE0) == 0xC0) { - /* 2 character encoding: 0b110?????, 0b10?????? */ - result = *c++ & 0x1F; - result = (result << 6) | (*c++ & 0x3F); - } else if ((*c & 0xF0) == 0xE0) { - /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */ - result = *c++ & 0x0F; - result = (result << 6) | (*c++ & 0x3F); - result = (result << 6) | (*c++ & 0x3F); - } else { - /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */ - assert((*c & 0xF8) == 0xF0); - result = *c++ & 0x07; - result = (result << 6) | (*c++ & 0x3F); - result = (result << 6) | (*c++ & 0x3F); - result = (result << 6) | (*c++ & 0x3F); - } - - *p = (const char*) c; - return result; -} - static inline size_t wstrlen(const string_t *string) { size_t result = 0;