From 587b67529484fe9204fde2ce667e7df1f7a96f2e Mon Sep 17 00:00:00 2001 From: midipix Date: Sat, 11 Nov 2017 14:27:47 -0500 Subject: added u16_wcstombs(). --- src/u16_wcstombs.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 src/u16_wcstombs.c (limited to 'src') diff --git a/src/u16_wcstombs.c b/src/u16_wcstombs.c new file mode 100644 index 0000000..9cec117 --- /dev/null +++ b/src/u16_wcstombs.c @@ -0,0 +1,173 @@ +/*******************************************************************/ +/* u16ports: u16 variants of wide character string functions. */ +/* Copyright (C) 2017 Z. Gilboa */ +/* Released under the Standard MIT License; see COPYING.U16PORTS. */ +/*******************************************************************/ + +#include +#include + +/** + * scalar nickname utf-16 utf-8[0] utf-8[1] utf-8[2] utf-8[3] + * ------ -------- -------- -------- -------- -------- -------- + * 00000000 7x 00000000 0xxxxxxx + * 0xxxxxxx 0xxxxxxx + * + * 00000yyy 5y6x 00000yyy 110yyyyy 10xxxxxx + * yyxxxxxx yyxxxxxx + * + * zzzzyyyy 4z6y6x zzzzyyyy 1110zzzz 10yyyyyy 10xxxxxx + * yyxxxxxx yyxxxxxx + * + * 000uuuuu 5u4z6y6x 110110ww 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx + * zzzzyyyy wwzzzzyy + * yyxxxxxx 110111yy + * yyxxxxxx (where wwww = uuuuu - 1) + * +**/ + +size_t u16_wcstombs(char * s, const uint16_t * src, size_t n) +{ + size_t ret; + ssize_t len; + char * dst; + uint16_t wx; + uint16_t wy; + uint16_t wz; + uint16_t ww; + uint16_t wy_low; + uint16_t wy_high; + uint16_t u_low; + uint16_t u_high; + uint16_t uuuuu; + + if (!s) { + for (ret=0; *src; src++) { + if (*src <= 0x7F) + ret += 1; + + else if (*src <= 0x7FF) + ret += 2; + + else if ((*src < 0xD800) || (*src >= 0xE000)) + ret += 3; + + else if (*src++ >= 0xDC00) + return -1; + + else if (*src < 0xDC00) + return -1; + + else if (*src >= 0xE000) + return -1; + + else + ret += 4; + } + + return ret; + } + + if ((len = (ssize_t)n) < 0) + return -1; + + for (dst=s; *src; src++) { + if (*src <= 0x7F) { + *dst++ = (char)*src; + len--; + + } else if (*src <= 0x7FF) { + if ((len = len - 2) < 0) + return -1; + + wy = *src; + wy >>= 6; + + wx = *src; + wx <<= 10; + wx >>= 10; + + *dst++ = (char)(0xC0 | wy); + *dst++ = (char)(0x80 | wx); + + } else if ((*src < 0xD800) || (*src >= 0xE000)) { + if ((len = len - 3) < 0) + return -1; + + wz = *src; + wz >>= 12; + + wy = *src; + wy <<= 4; + wy >>= 10; + + wx = *src; + wx <<= 10; + wx >>= 10; + + *dst++ = (char)(0xE0 | wz); + *dst++ = (char)(0x80 | wy); + *dst++ = (char)(0x80 | wx); + + } else if (src[0] >= 0xDC00) { + return -1; + + } else if (src[1] < 0xDC00) { + return -1; + + } else if (src[1] >= 0xE000) { + return -1; + + } else { + if ((len = len - 4) < 0) + return -1; + + /* low two bytes */ + wy_high = *src; + wy_high <<= 14; + wy_high >>= 10; + + wz = *src; + wz <<= 10; + wz >>= 12; + + ww = *src; + ww <<= 6; + ww >>= 12; + + /* (surrogate pair) */ + src++; + + /* high two bytes */ + wx = *src; + wx <<= 10; + wx >>= 10; + + wy_low = *src; + wy_low <<= 6; + wy_low >>= 12; + + /* uuuuu */ + uuuuu = ww + 1; + u_low = uuuuu; + u_low >>= 2; + + u_high = uuuuu; + u_high <<= 14; + u_high >>= 10; + + *dst++ = (char)(0xF0 | u_low); + *dst++ = (char)(0x80 | u_high | wz); + *dst++ = (char)(0x80 | wy_low | wy_high); + *dst++ = (char)(0x80 | wx); + } + + if (len == 0) + return dst - s; + } + + if (len) + *dst = 0; + + return dst - s; +} -- cgit v1.2.3