summaryrefslogtreecommitdiffhomepage
path: root/src/u16_wcstombs.c
blob: b0cd9f0cc477835d26a00ae513474d01fecd1cd4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*******************************************************************/
/*  u16ports: u16 variants of wide character string functions.     */
/*  Copyright (C) 2017  SysDeer Technologies, LLC                  */
/*  Released under the Standard MIT License; see COPYING.U16PORTS. */
/*******************************************************************/

#include <stdint.h>
#include <u16ports/u16ports.h>

/**
 *  scalar	nickname	utf-16		utf-8[0]  utf-8[1]  utf-8[2]  utf-8[3]
 *  ------	--------	--------	--------  --------  --------  --------
 *  00000000	7x		00000000	0xxxxxxx
 *  0xxxxxxx			0xxxxxxx
 *
 *  00000yyy	5y6x		00000yyy	110yyyyy  10xxxxxx
 *  yyxxxxxx			yyxxxxxx
 *
 *  zzzzyyyy	4z6y6x		zzzzyyyy	1110zzzz  10yyyyyy  10xxxxxx
 *  yyxxxxxx			yyxxxxxx
 *
 *  000uuuuu	5u4z6y6x	110110ww	11110uuu  10uuzzzz  10yyyyyy  10xxxxxx
 *  zzzzyyyy			wwzzzzyy
 *  yyxxxxxx			110111yy
 *				yyxxxxxx        (where wwww = uuuuu - 1)
 *
**/

size_t u16_wcstombs(char * s, const uint16_t * src, size_t n)
{
	size_t		ret;
	ssize_t		len;
	char *		dst;
	uint16_t	wx;
	uint16_t	wy;
	uint16_t	wz;
	uint16_t	ww;
	uint16_t	wy_low;
	uint16_t	wy_high;
	uint16_t	u_low;
	uint16_t	u_high;
	uint16_t	uuuuu;

	if (!s) {
		for (ret=0; *src; src++) {
			if (*src <= 0x7F)
				ret += 1;

			else if (*src <= 0x7FF)
				ret += 2;

			else if ((*src < 0xD800) || (*src >= 0xE000))
				ret += 3;

			else if (*src++ >= 0xDC00)
				return -1;

			else if (*src < 0xDC00)
				return -1;

			else if (*src >= 0xE000)
				return -1;

			else
				ret += 4;
		}

		return ret;
	}

	if ((len = (ssize_t)n) < 0)
		return -1;

	for (dst=s; *src; src++) {
		if (*src <= 0x7F) {
			*dst++ = (char)*src;
			len--;

		} else if (*src <= 0x7FF) {
			if ((len = len - 2) < 0)
				return -1;

			wy  = *src;
			wy >>= 6;

			wx  = *src;
			wx <<= 10;
			wx >>= 10;

			*dst++ = (char)(0xC0 | wy);
			*dst++ = (char)(0x80 | wx);

		} else if ((*src < 0xD800) || (*src >= 0xE000)) {
			if ((len = len - 3) < 0)
				return -1;

			wz  = *src;
			wz >>= 12;

			wy  = *src;
			wy <<= 4;
			wy >>= 10;

			wx  = *src;
			wx <<= 10;
			wx >>= 10;

			*dst++ = (char)(0xE0 | wz);
			*dst++ = (char)(0x80 | wy);
			*dst++ = (char)(0x80 | wx);

		} else if (src[0] >= 0xDC00) {
			return -1;

		} else if (src[1] < 0xDC00) {
			return -1;

		} else if (src[1] >= 0xE000) {
			return -1;

		} else {
			if ((len = len - 4) < 0)
				return -1;

			/* low two bytes */
			wy_high   = *src;
			wy_high <<= 14;
			wy_high >>= 10;

			wz   = *src;
			wz <<= 10;
			wz >>= 12;

			ww   = *src;
			ww <<= 6;
			ww >>= 12;

			/* (surrogate pair) */
			src++;

			/* high two bytes */
			wx   = *src;
			wx <<= 10;
			wx >>= 10;

			wy_low   = *src;
			wy_low <<= 6;
			wy_low >>= 12;

			/* uuuuu */
			uuuuu    = ww + 1;
			u_low    = uuuuu;
			u_low  >>= 2;

			u_high  = uuuuu;
			u_high <<= 14;
			u_high >>= 10;

			*dst++ = (char)(0xF0 | u_low);
			*dst++ = (char)(0x80 | u_high | wz);
			*dst++ = (char)(0x80 | wy_low | wy_high);
			*dst++ = (char)(0x80 | wx);
		}

		if (len == 0)
			return dst - s;
	}

	if (len)
		*dst = 0;

	return dst - s;
}