implement uchar.h (C11 UTF-16/32 conversion) interfaces
authorRich Felker <dalias@aerifal.cx>
Tue, 14 Oct 2014 00:59:42 +0000 (20:59 -0400)
committerRich Felker <dalias@aerifal.cx>
Tue, 14 Oct 2014 00:59:42 +0000 (20:59 -0400)
include/alltypes.h.in
include/uchar.h [new file with mode: 0644]
include/wchar.h
src/multibyte/c16rtomb.c [new file with mode: 0644]
src/multibyte/c32rtomb.c [new file with mode: 0644]
src/multibyte/mbrtoc16.c [new file with mode: 0644]
src/multibyte/mbrtoc32.c [new file with mode: 0644]

index c4ca5d5..98c2f3b 100644 (file)
@@ -58,6 +58,8 @@ TYPEDEF struct { unsigned __attr[2]; } pthread_rwlockattr_t;
 
 TYPEDEF struct _IO_FILE FILE;
 
+TYPEDEF struct __mbstate_t { unsigned __opaque1, __opaque2; } mbstate_t;
+
 TYPEDEF struct __locale_struct * locale_t;
 
 TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t;
diff --git a/include/uchar.h b/include/uchar.h
new file mode 100644 (file)
index 0000000..8dabf1e
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _UCHAR_H
+#define _UCHAR_H
+
+#ifdef __cplusplus
+extern "C" {
+#else
+typedef unsigned short char16_t;
+typedef unsigned char32_t;
+#endif
+
+#define __NEED_mbstate_t
+#define __NEED_size_t
+
+#include <features.h>
+#include <bits/alltypes.h>
+
+size_t c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict);
+size_t mbrtoc16(char16_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
+
+size_t c32rtomb(char *__restrict, char32_t, mbstate_t *__restrict);
+size_t mbrtoc32(char32_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 9fd967c..52da639 100644 (file)
@@ -12,6 +12,7 @@ extern "C" {
 #define __NEED_size_t
 #define __NEED_wchar_t
 #define __NEED_wint_t
+#define __NEED_mbstate_t
 
 #if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
  || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
@@ -42,11 +43,6 @@ extern "C" {
 #undef WEOF
 #define WEOF 0xffffffffU
 
-typedef struct __mbstate_t
-{
-       unsigned __opaque1, __opaque2;
-} mbstate_t;
-
 wchar_t *wcscpy (wchar_t *__restrict, const wchar_t *__restrict);
 wchar_t *wcsncpy (wchar_t *__restrict, const wchar_t *__restrict, size_t);
 
diff --git a/src/multibyte/c16rtomb.c b/src/multibyte/c16rtomb.c
new file mode 100644 (file)
index 0000000..2e8ec97
--- /dev/null
@@ -0,0 +1,33 @@
+#include <uchar.h>
+#include <errno.h>
+#include <wchar.h>
+
+size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+{
+       unsigned *x = (unsigned *)ps;
+       wchar_t wc;
+
+       if (!s) {
+               if (*x) goto ilseq;
+               return 1;
+       }
+
+       if (!*x && c16 - 0xd800u < 0x400) {
+               *x = c16 - 0xd7c0 << 10;
+               return 0;
+       }
+
+       if (*x) {
+               if (c16 - 0xdc00u >= 0x400) goto ilseq;
+               else wc = *x + c16 - 0xdc00;
+               *x = 0;
+       } else {
+               wc = c16;
+       }
+       return wcrtomb(s, wc, 0);
+
+ilseq:
+       *x = 0;
+       errno = EILSEQ;
+       return -1;
+}
diff --git a/src/multibyte/c32rtomb.c b/src/multibyte/c32rtomb.c
new file mode 100644 (file)
index 0000000..6785132
--- /dev/null
@@ -0,0 +1,7 @@
+#include <uchar.h>
+#include <wchar.h>
+
+size_t c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
+{
+       return wcrtomb(s, c32, ps);
+}
diff --git a/src/multibyte/mbrtoc16.c b/src/multibyte/mbrtoc16.c
new file mode 100644 (file)
index 0000000..74b7d77
--- /dev/null
@@ -0,0 +1,28 @@
+#include <uchar.h>
+#include <wchar.h>
+
+size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps)
+{
+       unsigned *pending = (unsigned *)ps;
+
+       if (!s) return mbrtoc16(0, "", 1, ps);
+
+       /* mbrtowc states for partial UTF-8 characters have the high bit set;
+        * we use nonzero states without high bit for pending surrogates. */
+       if ((int)*pending > 0) {
+               if (pc16) *pc16 = *pending;
+               *pending = 0;
+               return -3;
+       }
+
+       wchar_t wc;
+       size_t ret = mbrtowc(&wc, s, n, ps);
+       if (ret <= 4) {
+               if (wc >= 0x10000) {
+                       *pending = (wc & 0x3ff) + 0xdc00;
+                       wc = 0xd7c0 + (wc >> 10);
+               }
+               if (pc16) *pc16 = wc;
+       }
+       return ret;
+}
diff --git a/src/multibyte/mbrtoc32.c b/src/multibyte/mbrtoc32.c
new file mode 100644 (file)
index 0000000..c6d2082
--- /dev/null
@@ -0,0 +1,11 @@
+#include <uchar.h>
+#include <wchar.h>
+
+size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps)
+{
+       if (!s) return mbrtoc32(0, "", 1, ps);
+       wchar_t wc;
+       size_t ret = mbrtowc(&wc, s, n, ps);
+       if (ret <= 4 && pc32) *pc32 = wc;
+       return ret;
+}