/********************************************************/ /* ntapi: Native API core library */ /* Copyright (C) 2013--2021 SysDeer Technologies, LLC */ /* Released under GPLv2 and GPLv3; see COPYING.NTAPI. */ /********************************************************/ #include #include #include #include #include "ntapi_impl.h" typedef struct ___two_bytes { unsigned char low; unsigned char high; } __two_bytes; typedef struct ___three_bytes { unsigned char low; unsigned char middle; unsigned char high; } __three_bytes; static void __utf8_to_utf16_handler_1byte_or_null_termination(wchar16_t * dst, const unsigned char * ch) { /***************************/ /* from: 0xxxxxxx */ /* to: 00000000 0xxxxxxx */ /***************************/ *dst = *ch; } static void __utf8_to_utf16_handler_2bytes(wchar16_t * dst, const unsigned char * ch) { /***************************/ /* from: 110yyyyy 10xxxxxx */ /* to: 00000yyy yyxxxxxx */ /***************************/ __two_bytes * src; /* big endian */ src = (__two_bytes *)ch; /* yyyyy */ *dst = (src->low ^ 0xC0); *dst <<= 6; /* xxxxxx */ *dst |= (src->high ^ 0x80); } static void __utf8_to_utf16_handler_3bytes(wchar16_t * dst, const unsigned char * ch) { /************************************/ /* from: 1110zzzz 10yyyyyy 10xxxxxx */ /* to: zzzzyyyy yyxxxxxx */ /************************************/ __three_bytes * src; /* big endian */ wchar16_t yyyyy; src = (__three_bytes *)ch; /* zzzz */ *dst = (src->low ^ 0xE0); *dst <<= 12; /* yyyyy */ yyyyy = (src->middle ^ 0x80); yyyyy <<= 6; *dst |= yyyyy; /* xxxxxx */ *dst |= (src->high ^ 0x80); } static void __utf8_to_utf16_handler_4bytes(wchar16_t * dst, const unsigned char * ch) { /*************************************************/ /* from: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx */ /* to: 110110ww wwzzzzyy 110111yy yyxxxxxx */ /*************************************************/ __two_bytes * src_low; /* big endian */ __two_bytes * src_high; /* big endian */ wchar16_t * dst_lead; wchar16_t * dst_trail; wchar16_t wwww; wchar16_t lead; wchar16_t trail; unsigned char ulow; unsigned char uhigh; unsigned char yy; unsigned char yyyy; unsigned char zzzz; dst_lead = dst_trail = (wchar16_t *)dst; dst_trail++; src_low = src_high = (__two_bytes *)ch; src_high++; /* uuuuu */ ulow = src_low->low ^ 0xF0; uhigh = src_low->high ^ 0x80; ulow <<= 2; uhigh >>= 4; /* wwww */ wwww = (ulow | uhigh) - 1; wwww <<= 6; /* 110110ww wwzzzzyy */ yy = src_high->low ^ 0x80; yy >>= 4; zzzz = src_low->high; zzzz <<= 4; zzzz >>= 2; lead = 0xD800; lead |= wwww; lead |= zzzz; lead |= yy; /* 110111yy yyxxxxxx */ yyyy = src_high->low << 4; trail = yyyy << 2; trail |= src_high->high ^ 0x80; trail |= 0xDC00; /* write */ *dst_lead = lead; *dst_trail = trail; } int32_t __stdcall __ntapi_tt_array_copy_utf8( __out int * argc, __in const char ** argv, __in const char ** envp, __in const char * interp, __in const char * optarg, __in const char * script, __in void * base, __out void * buffer, __in size_t buflen, __out size_t * blklen) { const char ** parg; const char * arg; const char * mark; char * ch; ptrdiff_t diff; ptrdiff_t ptrs; size_t needed; const char * dummy[2] = {0,0}; /* fallback */ argv = argv ? argv : dummy; envp = envp ? envp : dummy; /* ptrs, needed */ ptrs = 0; needed = 0; /* interpr */ if (interp) { ptrs++; needed += sizeof(char *) + __ntapi->tt_string_null_offset_multibyte(interp) + sizeof(char); } /* optarg */ if (optarg) { ptrs++; needed += sizeof(char *) + __ntapi->tt_string_null_offset_multibyte(optarg) + sizeof(char); } /* script / argv[0] */ if ((mark = script ? script : argv[0])) { ptrs++; needed += sizeof(char *) + __ntapi->tt_string_null_offset_multibyte(mark) + sizeof(char); } /* argv */ for (parg=&argv[1]; *parg; parg++) needed += sizeof(char *) + __ntapi->tt_string_null_offset_multibyte(*parg) + sizeof(char); ptrs += (parg - &argv[1]); *argc = (int)ptrs; /* envp */ for (parg=envp; *parg; parg++) needed += sizeof(char *) + __ntapi->tt_string_null_offset_multibyte(*parg) + sizeof(char); ptrs += (parg - envp); ptrs += 2; needed += 2*sizeof(char *); blklen = blklen ? blklen : &needed; *blklen = needed; if (buflen < needed) return NT_STATUS_BUFFER_TOO_SMALL; /* init */ parg = (const char **)buffer; ch = (char *)(parg+ptrs); diff = (ptrdiff_t)base; /* interp */ if (interp) { *parg++ = ch-diff; for (arg=interp; *arg; arg++,ch++) *ch = *arg; *ch++ = '\0'; } /* optarg */ if (optarg) { *parg++ = ch-diff; for (arg=optarg; *arg; arg++,ch++) *ch = *arg; *ch++ = '\0'; } /* script / argv[0] */ if ((mark = script ? script : argv[0])) { *parg++ = ch-diff; for (arg=mark; *arg; arg++,ch++) *ch = *arg; *ch++ = '\0'; } /* argv */ for (++argv; *argv; argv++) { *parg++=ch-diff; for (arg=*argv; *arg; arg++,ch++) *ch = *arg; *ch++ = '\0'; } *parg++ = 0; /* envp */ for (; *envp; envp++) { *parg++=ch-diff; for (arg=*envp; *arg; arg++,ch++) *ch = *arg; *ch++ = '\0'; } *parg++ = 0; return NT_STATUS_SUCCESS; } static void (*__utf8_to_utf16_handlers[5])(wchar16_t *, const unsigned char *) = { 0, __utf8_to_utf16_handler_1byte_or_null_termination, __utf8_to_utf16_handler_2bytes, __utf8_to_utf16_handler_3bytes, __utf8_to_utf16_handler_4bytes}; int32_t __stdcall __ntapi_tt_array_convert_utf8_to_utf16( __in char ** arrv, __out wchar16_t ** warrv, __out void * base, __out wchar16_t * buffer, __in size_t buffer_len, __out size_t * bytes_written) { wchar16_t * ubound; wchar16_t * wch; ptrdiff_t diff; ptrdiff_t wdiff; char * ch; const uint8_t * utf8; uint8_t byte_count; if ((uintptr_t)base % sizeof(wchar16_t)) return NT_STATUS_INVALID_PARAMETER_3; wch = buffer; diff = (ptrdiff_t)base; wdiff = (ptrdiff_t)base / sizeof(wchar16_t); ubound = buffer; ubound += buffer_len / sizeof(wchar16_t); ubound--; ubound--; ubound--; for (; arrv && *arrv && (wch= 0xC2) && (utf8[0] <= 0xDF) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)) byte_count = 2; /* try three bytes */ else if ((++ch) && (utf8[0] == 0xE0) && (utf8[1] >= 0xA0) && (utf8[1] <= 0xBF) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)) byte_count = 3; else if ( (utf8[0] >= 0xE1) && (utf8[0] <= 0xEC) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)) byte_count = 3; else if ( (utf8[0] == 0xED) && (utf8[1] >= 0x80) && (utf8[1] <= 0x9F) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)) byte_count = 3; else if ( (utf8[0] >= 0xEE) && (utf8[0] <= 0xEF) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)) byte_count = 3; /* try four bytes */ else if ((++ch) && (utf8[0] == 0xF0) && (utf8[1] >= 0x90) && (utf8[1] <= 0xBF) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF) && (utf8[3] >= 0x80) && (utf8[3] <= 0xBF)) byte_count = 4; else if ( (utf8[0] >= 0xF1) && (utf8[0] <= 0xF3) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF) && (utf8[3] >= 0x80) && (utf8[3] <= 0xBF)) byte_count = 4; else if ( (utf8[0] == 0xF4) && (utf8[1] >= 0x80) && (utf8[1] <= 0x8F) && (utf8[2] >= 0x80) && (utf8[2] <= 0xBF) && (utf8[3] >= 0x80) && (utf8[3] <= 0xBF)) byte_count = 4; if (byte_count) { __utf8_to_utf16_handlers[byte_count](wch,utf8); wch = &wch[byte_count >> 2]; wch++; } else { return NT_STATUS_ILLEGAL_CHARACTER; } } *wch++ = 0; } if (wch == ubound) return NT_STATUS_BUFFER_TOO_SMALL; *wch++ = 0; *warrv = 0; *bytes_written = sizeof(wchar16_t) * (wch - buffer); return NT_STATUS_SUCCESS; }