/*******************************************************************/ /* u16ports: u16 variants of wide character string functions. */ /* Copyright (C) 2017 SysDeer Technologies, LLC */ /* Released under the Standard MIT License; see COPYING.U16PORTS. */ /*******************************************************************/ #include #include /** * scalar nickname utf-16 utf-8[0] utf-8[1] utf-8[2] utf-8[3] * ------ -------- -------- -------- -------- -------- -------- * 00000000 7x 00000000 0xxxxxxx * 0xxxxxxx 0xxxxxxx * * 00000yyy 5y6x 00000yyy 110yyyyy 10xxxxxx * yyxxxxxx yyxxxxxx * * zzzzyyyy 4z6y6x zzzzyyyy 1110zzzz 10yyyyyy 10xxxxxx * yyxxxxxx yyxxxxxx * * 000uuuuu 5u4z6y6x 110110ww 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx * zzzzyyyy wwzzzzyy * yyxxxxxx 110111yy * yyxxxxxx (where wwww = uuuuu - 1) * **/ size_t u16_wcstombs(char * s, const uint16_t * src, size_t n) { size_t ret; ssize_t len; char * dst; uint16_t wx; uint16_t wy; uint16_t wz; uint16_t ww; uint16_t wy_low; uint16_t wy_high; uint16_t u_low; uint16_t u_high; uint16_t uuuuu; if (!s) { for (ret=0; *src; src++) { if (*src <= 0x7F) ret += 1; else if (*src <= 0x7FF) ret += 2; else if ((*src < 0xD800) || (*src >= 0xE000)) ret += 3; else if (*src++ >= 0xDC00) return -1; else if (*src < 0xDC00) return -1; else if (*src >= 0xE000) return -1; else ret += 4; } return ret; } if ((len = (ssize_t)n) < 0) return -1; for (dst=s; *src; src++) { if (*src <= 0x7F) { *dst++ = (char)*src; len--; } else if (*src <= 0x7FF) { if ((len = len - 2) < 0) return -1; wy = *src; wy >>= 6; wx = *src; wx <<= 10; wx >>= 10; *dst++ = (char)(0xC0 | wy); *dst++ = (char)(0x80 | wx); } else if ((*src < 0xD800) || (*src >= 0xE000)) { if ((len = len - 3) < 0) return -1; wz = *src; wz >>= 12; wy = *src; wy <<= 4; wy >>= 10; wx = *src; wx <<= 10; wx >>= 10; *dst++ = (char)(0xE0 | wz); *dst++ = (char)(0x80 | wy); *dst++ = (char)(0x80 | wx); } else if (src[0] >= 0xDC00) { return -1; } else if (src[1] < 0xDC00) { return -1; } else if (src[1] >= 0xE000) { return -1; } else { if ((len = len - 4) < 0) return -1; /* low two bytes */ wy_high = *src; wy_high <<= 14; wy_high >>= 10; wz = *src; wz <<= 10; wz >>= 12; ww = *src; ww <<= 6; ww >>= 12; /* (surrogate pair) */ src++; /* high two bytes */ wx = *src; wx <<= 10; wx >>= 10; wy_low = *src; wy_low <<= 6; wy_low >>= 12; /* uuuuu */ uuuuu = ww + 1; u_low = uuuuu; u_low >>= 2; u_high = uuuuu; u_high <<= 14; u_high >>= 10; *dst++ = (char)(0xF0 | u_low); *dst++ = (char)(0x80 | u_high | wz); *dst++ = (char)(0x80 | wy_low | wy_high); *dst++ = (char)(0x80 | wx); } if (len == 0) return dst - s; } if (len) *dst = 0; return dst - s; }