From dd89bb8ad4fe184a34b5dbdda237e640fc82121b Mon Sep 17 00:00:00 2001 From: midipix Date: Mon, 27 Jul 2015 04:01:18 -0400 Subject: entered advanced internal development stage. --- include/ntapi/bits/i386/nt_atomic_i386_asm__gcc.h | 533 +++++++++++++++++++ include/ntapi/bits/i386/nt_atomic_i386_asm__msvc.h | 350 +++++++++++++ include/ntapi/bits/i386/nt_thread_i386.h | 45 ++ include/ntapi/bits/nt_atomic_inline_asm.h | 15 + .../ntapi/bits/x86_64/nt_atomic_x86_64_asm__gcc.h | 571 +++++++++++++++++++++ .../ntapi/bits/x86_64/nt_atomic_x86_64_asm__msvc.h | 350 +++++++++++++ include/ntapi/bits/x86_64/nt_thread_x86_64.h | 104 ++++ 7 files changed, 1968 insertions(+) create mode 100644 include/ntapi/bits/i386/nt_atomic_i386_asm__gcc.h create mode 100644 include/ntapi/bits/i386/nt_atomic_i386_asm__msvc.h create mode 100644 include/ntapi/bits/i386/nt_thread_i386.h create mode 100644 include/ntapi/bits/nt_atomic_inline_asm.h create mode 100644 include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__gcc.h create mode 100644 include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__msvc.h create mode 100644 include/ntapi/bits/x86_64/nt_thread_x86_64.h (limited to 'include/ntapi/bits') diff --git a/include/ntapi/bits/i386/nt_atomic_i386_asm__gcc.h b/include/ntapi/bits/i386/nt_atomic_i386_asm__gcc.h new file mode 100644 index 0000000..f6e11ca --- /dev/null +++ b/include/ntapi/bits/i386/nt_atomic_i386_asm__gcc.h @@ -0,0 +1,533 @@ +#include + +static __inline__ void at_locked_inc( + intptr_t volatile * ptr) +{ + __asm__( + "lock;" + "incl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_inc_32( + int32_t volatile * ptr) +{ + __asm__( + "lock;" + "incl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_inc_64( + int64_t volatile * ptr) +{ + __sync_fetch_and_add(ptr,1); +} + + +static __inline__ void at_locked_dec( + intptr_t volatile * ptr) +{ + __asm__( + "lock;" + "decl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_dec_32( + int32_t volatile * ptr) +{ + __asm__( + "lock;" + "decl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_dec_64( + int64_t volatile * ptr) +{ + __sync_fetch_and_sub(ptr,1); +} + + +static __inline__ void at_locked_add( + intptr_t volatile * ptr, + intptr_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_add_32( + int32_t volatile * ptr, + int32_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_add_64( + int64_t volatile * ptr, + int64_t val) +{ + __sync_fetch_and_add(ptr,val); +} + + +static __inline__ void at_locked_sub( + intptr_t volatile * ptr, + intptr_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_sub_32( + int32_t volatile * ptr, + int32_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_sub_64( + int64_t volatile * ptr, + int64_t val) +{ + __sync_fetch_and_sub(ptr,val); +} + + +static __inline__ intptr_t at_locked_xadd( + intptr_t volatile * ptr, + intptr_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int32_t at_locked_xadd_32( + int32_t volatile * ptr, + int32_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int64_t at_locked_xadd_64( + int64_t volatile * ptr, + int64_t val) +{ + return __sync_fetch_and_add(ptr,val); +} + + +static __inline__ intptr_t at_locked_xsub( + intptr_t volatile * ptr, + intptr_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int32_t at_locked_xsub_32( + int32_t volatile * ptr, + int32_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int64_t at_locked_xsub_64( + int64_t volatile * ptr, + int64_t val) +{ + return __sync_fetch_and_sub(ptr,val); +} + + +static __inline__ intptr_t at_locked_cas( + intptr_t volatile * dst, + intptr_t cmp, + intptr_t xchg) +{ + intptr_t ret; + + __asm__( + "lock;" + "cmpxchg %3, %0" + : "=m" (*dst), "=a" (ret) + : "a" (cmp), "r" (xchg) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_cas_32( + int32_t volatile * dst, + int32_t cmp, + int32_t xchg) +{ + int32_t ret; + + __asm__( + "lock;" + "cmpxchg %3, %0" + : "=m" (*dst), "=a" (ret) + : "a" (cmp), "r" (xchg) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_cas_64( + int64_t volatile * dst, + int64_t cmp, + int64_t xchg) +{ + __atomic_compare_exchange_n( + dst, + &cmp, + xchg, + 0, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); + + return cmp; +} + + +static __inline__ intptr_t at_locked_and( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "andl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_and_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "andl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_and_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + int64_t cmp; + int64_t xchg; + + do { + cmp = *dst; + xchg = cmp & mask; + ret = at_locked_cas_64(dst,cmp,xchg); + } while (ret != cmp); + + return ret; +} + + +static __inline__ intptr_t at_locked_or( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "orl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_or_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "orl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_or_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + int64_t cmp; + int64_t xchg; + + do { + cmp = *dst; + xchg = cmp | mask; + ret = at_locked_cas_64(dst,cmp,xchg); + } while (ret != cmp); + + return ret; +} + + +static __inline__ intptr_t at_locked_xor( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "xorl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memxory"); + + return ret; +} + + +static __inline__ int32_t at_locked_xor_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "xorl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memxory"); + + return ret; +} + + +static __inline__ int64_t at_locked_xor_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + int64_t cmp; + int64_t xchg; + + do { + cmp = *dst; + xchg = cmp ^ mask; + ret = at_locked_cas_64(dst,cmp,xchg); + } while (ret != cmp); + + return ret; +} + + +static __inline__ void at_store( + volatile intptr_t * dst, + intptr_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ void at_store_32( + volatile int32_t * dst, + int32_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ void at_store_64( + volatile int64_t * dst, + int64_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ int at_bsf( + unsigned int * index, + uintptr_t mask) +{ + if (mask) { + __asm__( + "bsf %1, %0" + : "=r" (mask) + : "r" (mask)); + + *index = (int)mask; + return 1; + } else + return 0; +} + + +static __inline__ int at_bsr( + unsigned int * index, + uintptr_t mask) +{ + if (mask) { + __asm__( + "bsr %1, %0" + : "=r" (mask) + : "r" (mask)); + + *index = (int)mask; + return 1; + } else + return 0; +} + + +static __inline__ size_t at_popcount( + uintptr_t mask) +{ + __asm__( + "popcnt %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_16( + uint16_t mask) +{ + __asm__( + "popcnt %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_32( + uint32_t mask) +{ + __asm__( + "popcnt %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_64( + uint64_t mask) +{ + int ret = at_popcount_32(mask >> 32); + return ret + ((mask << 32) >> 32); +} diff --git a/include/ntapi/bits/i386/nt_atomic_i386_asm__msvc.h b/include/ntapi/bits/i386/nt_atomic_i386_asm__msvc.h new file mode 100644 index 0000000..c0a0ba8 --- /dev/null +++ b/include/ntapi/bits/i386/nt_atomic_i386_asm__msvc.h @@ -0,0 +1,350 @@ +#include + +long _InterlockedIncrement(long volatile * ptr); +int64_t _InterlockedIncrement64(int64_t volatile * ptr); +long _InterlockedDecrement(long volatile * ptr); +int64_t _InterlockedDecrement64(int64_t volatile * ptr); +long _InterlockedExchangeAdd(long volatile * ptr, long val); +int64_t _InterlockedExchangeAdd64(int64_t volatile * ptr, int64_t val); +long _InterlockedCompareExchange(long volatile * dst, long xchg, long cmp); +int64_t _InterlockedCompareExchange64(int64_t volatile * dst, int64_t xchg, int64_t cmp); +long _InterlockedAnd(long volatile * dst, long mask); +int64_t _InterlockedAnd64(int64_t volatile * dst, int64_t mask); +long _InterlockedOr(long volatile * dst, long mask); +int64_t _InterlockedOr64(int64_t volatile * dst, int64_t mask); +long _InterlockedXor(long volatile * dst, long mask); +int64_t _InterlockedXor64(int64_t volatile * dst, int64_t mask); +uint16_t __popcnt16(uint16_t mask); +unsigned int __popcnt(uint32_t mask); +uint64_t __popcnt64(uint64_t mask); +void _ReadWriteBarrier(void); +unsigned char _BitScanForward(unsigned int * index, uintptr_t mask); +unsigned char _BitScanReverse(unsigned int * index, uintptr_t mask); + +static __inline__ void at_locked_inc( + intptr_t volatile * ptr) +{ + _InterlockedIncrement(ptr); + return; +} + + +static __inline__ void at_locked_inc_32( + int32_t volatile * ptr) +{ + _InterlockedIncrement((long *)ptr); + return; +} + + +static __inline__ void at_locked_inc_64( + int64_t volatile * ptr) +{ + _InterlockedIncrement64(ptr); + return; +} + + +static __inline__ void at_locked_dec( + intptr_t volatile * ptr) +{ + _InterlockedDecrement(ptr); + return; +} + + +static __inline__ void at_locked_dec_32( + int32_t volatile * ptr) +{ + _InterlockedDecrement((long *)ptr); + return; +} + + +static __inline__ void at_locked_dec_64( + int64_t volatile * ptr) +{ + _InterlockedDecrement64(ptr); + return; +} + + +static __inline__ void at_locked_add( + intptr_t volatile * ptr, + intptr_t val) +{ + _InterlockedExchangeAdd(ptr, val); + return; +} + + +static __inline__ void at_locked_add_32( + int32_t volatile * ptr, + int32_t val) +{ + _InterlockedExchangeAdd((long *)ptr, val); + return; +} + + +static __inline__ void at_locked_add_64( + int64_t volatile * ptr, + int64_t val) +{ + _InterlockedExchangeAdd64(ptr, val); + return; +} + + +static __inline__ void at_locked_sub( + intptr_t volatile * ptr, + intptr_t val) +{ + _InterlockedExchangeAdd(ptr, -val); + return; +} + + +static __inline__ void at_locked_sub_32( + int32_t volatile * ptr, + int32_t val) +{ + _InterlockedExchangeAdd((long *)ptr, -val); + return; +} + + +static __inline__ void at_locked_sub_64( + int64_t volatile * ptr, + int64_t val) +{ + _InterlockedExchangeAdd64(ptr, -val); + return; +} + + +static __inline__ intptr_t at_locked_xadd( + intptr_t volatile * ptr, + intptr_t val) +{ + return _InterlockedExchangeAdd(ptr, val); +} + + +static __inline__ int32_t at_locked_xadd_32( + int32_t volatile * ptr, + int32_t val) +{ + return _InterlockedExchangeAdd((long *)ptr, val); +} + + +static __inline__ int64_t at_locked_xadd_64( + int64_t volatile * ptr, + int64_t val) +{ + return _InterlockedExchangeAdd64(ptr, val); +} + + +static __inline__ intptr_t at_locked_xsub( + intptr_t volatile * ptr, + intptr_t val) +{ + return _InterlockedExchangeAdd(ptr, -val); +} + + +static __inline__ int32_t at_locked_xsub_32( + int32_t volatile * ptr, + int32_t val) +{ + return _InterlockedExchangeAdd((long *)ptr, -val); +} + + +static __inline__ int64_t at_locked_xsub_64( + int64_t volatile * ptr, + int64_t val) +{ + return _InterlockedExchangeAdd64(ptr, -val); +} + + +static __inline__ intptr_t at_locked_cas( + intptr_t volatile * dst, + intptr_t cmp, + intptr_t xchg) +{ + return _InterlockedCompareExchange(dst,xchg,cmp); +} + + +static __inline__ int32_t at_locked_cas_32( + int32_t volatile * dst, + int32_t cmp, + int32_t xchg) +{ + return _InterlockedCompareExchange((long *)dst,xchg,cmp); +} + + +static __inline__ int64_t at_locked_cas_64( + int64_t volatile * dst, + int64_t cmp, + int64_t xchg) +{ + return _InterlockedCompareExchange64(dst,xchg,cmp); +} + + +static __inline__ intptr_t at_locked_and( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedAnd(dst,mask); +} + + +static __inline__ int32_t at_locked_and_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedAnd((long *)dst,mask); +} + + +static __inline__ int64_t at_locked_and_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedAnd64(dst,mask); +} + + +static __inline__ intptr_t at_locked_or( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedOr(dst,mask); +} + + +static __inline__ int32_t at_locked_or_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedOr((long *)dst,mask); +} + + +static __inline__ int64_t at_locked_or_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedOr64(dst,mask); +} + + +static __inline__ intptr_t at_locked_xor( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedXor(dst,mask); +} + + +static __inline__ int32_t at_locked_xor_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedXor((long *)dst,mask); +} + + +static __inline__ int64_t at_locked_xor_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedXor64(dst,mask); +} + + +static __inline__ void at_store( + volatile intptr_t * dst, + intptr_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ void at_store_32( + volatile int32_t * dst, + int32_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ void at_store_64( + volatile int64_t * dst, + int64_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ int at_bsf( + unsigned int * index, + uintptr_t mask) +{ + return (int)_BitScanForward(index,mask); +} + + +static __inline__ int at_bsr( + unsigned int * index, + uintptr_t mask) +{ + return (int)_BitScanReverse(index,mask); +} + + +static __inline__ size_t at_popcount( + uintptr_t mask) +{ + return __popcnt(mask); +} + + +static __inline__ size_t at_popcount_16( + uint16_t mask) +{ + return __popcnt16(mask); +} + + +static __inline__ size_t at_popcount_32( + uint32_t mask) +{ + return __popcnt(mask); +} + + +static __inline__ size_t at_popcount_64( + uint64_t mask) +{ + return (size_t)__popcnt64(mask); +} diff --git a/include/ntapi/bits/i386/nt_thread_i386.h b/include/ntapi/bits/i386/nt_thread_i386.h new file mode 100644 index 0000000..466d129 --- /dev/null +++ b/include/ntapi/bits/i386/nt_thread_i386.h @@ -0,0 +1,45 @@ +#include + +typedef struct _nt_floating_save_area_i386 { + uint32_t uc_ctrl_word; /* 0x000 */ + uint32_t uc_status_word; /* 0x004 */ + uint32_t uc_tag_word; /* 0x008 */ + uint32_t uc_error_offset; /* 0x00c */ + uint32_t uc_error_selector; /* 0x010 */ + uint32_t uc_data_offset; /* 0x014 */ + uint32_t uc_data_selector; /* 0x018 */ + unsigned char uc_reg_area[80]; /* 0x01c */ + uint32_t uc_cr0_npx_state; /* 0x06c */ +} nt_floating_save_area_i386; + + +typedef struct _nt_thread_context_i386 { + uint32_t uc_context_flags; /* 0x000 */ + uint32_t uc_dr0; /* 0x004 */ + uint32_t uc_dr1; /* 0x008 */ + uint32_t uc_dr2; /* 0x00c */ + uint32_t uc_dr3; /* 0x010 */ + uint32_t uc_dr6; /* 0x014 */ + uint32_t uc_dr7; /* 0x018 */ + + nt_floating_save_area_i386 + uc_float_save; /* 0x01c */ + + uint32_t uc_seg_gs; /* 0x08c */ + uint32_t uc_seg_fs; /* 0x090 */ + uint32_t uc_seg_es; /* 0x094 */ + uint32_t uc_seg_ds; /* 0x098 */ + uint32_t uc_edi; /* 0x09c */ + uint32_t uc_esi; /* 0x0a0 */ + uint32_t uc_ebx; /* 0x0a4 */ + uint32_t uc_edx; /* 0x0a8 */ + uint32_t uc_ecx; /* 0x0ac */ + uint32_t uc_eax; /* 0x0b0 */ + uint32_t uc_ebp; /* 0x0b4 */ + uint32_t uc_eip; /* 0x0b8 */ + uint32_t uc_seg_cs; /* 0x0bc */ + uint32_t uc_eflags; /* 0x0c0 */ + uint32_t uc_esp; /* 0x0c4 */ + uint32_t uc_seg_ss; /* 0x0c8 */ + unsigned char uc_extended_regs[512]; /* 0x0cc */ +} nt_thread_context_i386; diff --git a/include/ntapi/bits/nt_atomic_inline_asm.h b/include/ntapi/bits/nt_atomic_inline_asm.h new file mode 100644 index 0000000..f749bdf --- /dev/null +++ b/include/ntapi/bits/nt_atomic_inline_asm.h @@ -0,0 +1,15 @@ +#if defined(__X86_MODEL) +#if (__COMPILER__ == __GCC__) +#include "i386/nt_atomic_i386_asm__gcc.h" +#elif (__COMPILER__ == __MSVC__) +#include "i386/nt_atomic_i386_asm__msvc.h" +#endif + +#elif defined(__X86_64_MODEL) +#if (__COMPILER__ == __GCC__) +#include "x86_64/nt_atomic_x86_64_asm__gcc.h" +#elif (__COMPILER__ == __MSVC__) +#include "x86_64/nt_atomic_x86_64_asm__msvc.h" +#endif + +#endif diff --git a/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__gcc.h b/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__gcc.h new file mode 100644 index 0000000..b15bcdc --- /dev/null +++ b/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__gcc.h @@ -0,0 +1,571 @@ +#include + +static __inline__ void at_locked_inc( + intptr_t volatile * ptr) +{ + __asm__( + "lock;" + "incq %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_inc_32( + int32_t volatile * ptr) +{ + __asm__( + "lock;" + "incl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_inc_64( + int64_t volatile * ptr) +{ + __asm__( + "lock;" + "incq %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_dec( + intptr_t volatile * ptr) +{ + __asm__( + "lock;" + "decq %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_dec_32( + int32_t volatile * ptr) +{ + __asm__( + "lock;" + "decl %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_dec_64( + int64_t volatile * ptr) +{ + __asm__( + "lock;" + "decq %0" + : "=m" (*ptr) + : "m" (*ptr) + : "memory"); +} + + +static __inline__ void at_locked_add( + intptr_t volatile * ptr, + intptr_t val) +{ + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_add_32( + int32_t volatile * ptr, + int32_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_add_64( + int64_t volatile * ptr, + int64_t val) +{ + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_sub( + intptr_t volatile * ptr, + intptr_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_sub_32( + int32_t volatile * ptr, + int32_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ void at_locked_sub_64( + int64_t volatile * ptr, + int64_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); +} + + +static __inline__ intptr_t at_locked_xadd( + intptr_t volatile * ptr, + intptr_t val) +{ + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int32_t at_locked_xadd_32( + int32_t volatile * ptr, + int32_t val) +{ + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int64_t at_locked_xadd_64( + int64_t volatile * ptr, + int64_t val) +{ + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ intptr_t at_locked_xsub( + intptr_t volatile * ptr, + intptr_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int32_t at_locked_xsub_32( + int32_t volatile * ptr, + int32_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddl %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ int64_t at_locked_xsub_64( + int64_t volatile * ptr, + int64_t val) +{ + val = -val; + + __asm__( + "lock;" + "xaddq %1, %0" + : "=m" (*ptr), "=r" (val) + : "1" (val) + : "memory"); + return val; +} + + +static __inline__ intptr_t at_locked_cas( + intptr_t volatile * dst, + intptr_t cmp, + intptr_t xchg) +{ + intptr_t ret; + + __asm__( + "lock;" + "cmpxchgq %3, %0" + : "=m" (*dst), "=a" (ret) + : "a" (cmp), "r" (xchg) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_cas_32( + int32_t volatile * dst, + int32_t cmp, + int32_t xchg) +{ + int32_t ret; + + __asm__( + "lock;" + "cmpxchg %3, %0" + : "=m" (*dst), "=a" (ret) + : "a" (cmp), "r" (xchg) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_cas_64( + int64_t volatile * dst, + int64_t cmp, + int64_t xchg) +{ + int64_t ret; + + __asm__( + "lock;" + "cmpxchgq %3, %0" + : "=m" (*dst), "=a" (ret) + : "a" (cmp), "r" (xchg) + : "memory"); + + return ret; +} + + +static __inline__ intptr_t at_locked_and( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "andq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_and_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "andl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_and_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + + __asm__( + "lock;" + "andq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ intptr_t at_locked_or( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "orq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int32_t at_locked_or_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "orl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ int64_t at_locked_or_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + + __asm__( + "lock;" + "orq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memory"); + + return ret; +} + + +static __inline__ intptr_t at_locked_xor( + intptr_t volatile * dst, + intptr_t mask) +{ + intptr_t ret; + + __asm__( + "lock;" + "xorq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memxory"); + + return ret; +} + + +static __inline__ int32_t at_locked_xor_32( + int32_t volatile * dst, + int32_t mask) +{ + int32_t ret; + + __asm__( + "lock;" + "xorl %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memxory"); + + return ret; +} + + +static __inline__ int64_t at_locked_xor_64( + int64_t volatile * dst, + int64_t mask) +{ + int64_t ret; + + __asm__( + "lock;" + "xorq %1, %0" + : "=m" (*dst), "=a" (ret) + : "r" (mask) + : "memxory"); + + return ret; +} + + +static __inline__ void at_store( + volatile intptr_t * dst, + intptr_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ void at_store_32( + volatile int32_t * dst, + int32_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ void at_store_64( + volatile int64_t * dst, + int64_t val) +{ + __asm__( + "mov %1, %0" + : "=m" (*dst) + : "r" (val) + : "memory"); +} + + +static __inline__ int at_bsf( + unsigned int * index, + uintptr_t mask) +{ + if (mask) { + __asm__( + "bsf %1, %0" + : "=r" (mask) + : "r" (mask)); + + *index = (int)mask; + return 1; + } else + return 0; +} + + +static __inline__ int at_bsr( + unsigned int * index, + uintptr_t mask) +{ + if (mask) { + __asm__( + "bsr %1, %0" + : "=r" (mask) + : "r" (mask)); + + *index = (int)mask; + return 1; + } else + return 0; +} + + +static __inline__ size_t at_popcount( + uintptr_t mask) +{ + __asm__( + "popcntq %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_16( + uint16_t mask) +{ + __asm__( + "popcnt %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_32( + uint32_t mask) +{ + __asm__( + "popcnt %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} + + +static __inline__ size_t at_popcount_64( + uint64_t mask) +{ + __asm__( + "popcntq %0, %0" + : "=r" (mask) + : "0" (mask) + : "memory"); + return mask; +} diff --git a/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__msvc.h b/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__msvc.h new file mode 100644 index 0000000..a52bfd4 --- /dev/null +++ b/include/ntapi/bits/x86_64/nt_atomic_x86_64_asm__msvc.h @@ -0,0 +1,350 @@ +#include + +long _InterlockedIncrement(int32_t volatile * ptr); +int64_t _InterlockedIncrement64(int64_t volatile * ptr); +long _InterlockedDecrement(int32_t volatile * ptr); +int64_t _InterlockedDecrement64(int64_t volatile * ptr); +long _InterlockedExchangeAdd(int32_t volatile * ptr, int32_t val); +int64_t _InterlockedExchangeAdd64(int64_t volatile * ptr, int64_t val); +long _InterlockedCompareExchange(int32_t volatile * dst, int32_t xchg, int32_t cmp); +int64_t _InterlockedCompareExchange64(int64_t volatile * dst, int64_t xchg, int64_t cmp); +long _InterlockedAnd(int32_t volatile * dst, int32_t mask); +int64_t _InterlockedAnd64(int64_t volatile * dst, int64_t mask); +long _InterlockedOr(int32_t volatile * dst, int32_t mask); +int64_t _InterlockedOr64(int64_t volatile * dst, int64_t mask); +long _InterlockedXor(int32_t volatile * dst, int32_t mask); +int64_t _InterlockedXor64(int64_t volatile * dst, int64_t mask); +uint16_t __popcnt16(uint16_t mask); +uint32_t __popcnt(uint32_t mask); +uint64_t __popcnt64(uint64_t mask); +void _ReadWriteBarrier(void); +unsigned char _BitScanForward64(unsigned int * index, uintptr_t mask); +unsigned char _BitScanReverse64(unsigned int * index, uintptr_t mask); + +static __inline__ void at_locked_inc( + intptr_t volatile * ptr) +{ + _InterlockedIncrement64(ptr); + return; +} + + +static __inline__ void at_locked_inc_32( + int32_t volatile * ptr) +{ + _InterlockedIncrement(ptr); + return; +} + + +static __inline__ void at_locked_inc_64( + int64_t volatile * ptr) +{ + _InterlockedIncrement64(ptr); + return; +} + + +static __inline__ void at_locked_dec( + intptr_t volatile * ptr) +{ + _InterlockedDecrement64(ptr); + return; +} + + +static __inline__ void at_locked_dec_32( + int32_t volatile * ptr) +{ + _InterlockedDecrement(ptr); + return; +} + + +static __inline__ void at_locked_dec_64( + int64_t volatile * ptr) +{ + _InterlockedDecrement64(ptr); + return; +} + + +static __inline__ void at_locked_add( + intptr_t volatile * ptr, + intptr_t val) +{ + _InterlockedExchangeAdd64(ptr, val); + return; +} + + +static __inline__ void at_locked_add_32( + int32_t volatile * ptr, + int32_t val) +{ + _InterlockedExchangeAdd(ptr, val); + return; +} + + +static __inline__ void at_locked_add_64( + int64_t volatile * ptr, + int64_t val) +{ + _InterlockedExchangeAdd64(ptr, val); + return; +} + + +static __inline__ void at_locked_sub( + intptr_t volatile * ptr, + intptr_t val) +{ + _InterlockedExchangeAdd64(ptr, -val); + return; +} + + +static __inline__ void at_locked_sub_32( + int32_t volatile * ptr, + int32_t val) +{ + _InterlockedExchangeAdd(ptr, -val); + return; +} + + +static __inline__ void at_locked_sub_64( + int64_t volatile * ptr, + int64_t val) +{ + _InterlockedExchangeAdd64(ptr, -val); + return; +} + + +static __inline__ intptr_t at_locked_xadd( + intptr_t volatile * ptr, + intptr_t val) +{ + return _InterlockedExchangeAdd64(ptr, val); +} + + +static __inline__ int32_t at_locked_xadd_32( + int32_t volatile * ptr, + int32_t val) +{ + return _InterlockedExchangeAdd(ptr, val); +} + + +static __inline__ int64_t at_locked_xadd_64( + int64_t volatile * ptr, + int64_t val) +{ + return _InterlockedExchangeAdd64(ptr, val); +} + + +static __inline__ intptr_t at_locked_xsub( + intptr_t volatile * ptr, + intptr_t val) +{ + return _InterlockedExchangeAdd64(ptr, -val); +} + + +static __inline__ int32_t at_locked_xsub_32( + int32_t volatile * ptr, + int32_t val) +{ + return _InterlockedExchangeAdd(ptr, -val); +} + + +static __inline__ int64_t at_locked_xsub_64( + int64_t volatile * ptr, + int64_t val) +{ + return _InterlockedExchangeAdd64(ptr, -val); +} + + +static __inline__ intptr_t at_locked_cas( + intptr_t volatile * dst, + intptr_t cmp, + intptr_t xchg) +{ + return _InterlockedCompareExchange64(dst,xchg,cmp); +} + + +static __inline__ int32_t at_locked_cas_32( + int32_t volatile * dst, + int32_t cmp, + int32_t xchg) +{ + return _InterlockedCompareExchange(dst,xchg,cmp); +} + + +static __inline__ int64_t at_locked_cas_64( + int64_t volatile * dst, + int64_t cmp, + int64_t xchg) +{ + return _InterlockedCompareExchange64(dst,xchg,cmp); +} + + +static __inline__ intptr_t at_locked_and( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedAnd64(dst,mask); +} + + +static __inline__ int32_t at_locked_and_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedAnd(dst,mask); +} + + +static __inline__ int64_t at_locked_and_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedAnd64(dst,mask); +} + + +static __inline__ intptr_t at_locked_or( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedOr64(dst,mask); +} + + +static __inline__ int32_t at_locked_or_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedOr(dst,mask); +} + + +static __inline__ int64_t at_locked_or_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedOr64(dst,mask); +} + + +static __inline__ intptr_t at_locked_xor( + intptr_t volatile * dst, + intptr_t mask) +{ + return _InterlockedXor64(dst,mask); +} + + +static __inline__ int32_t at_locked_xor_32( + int32_t volatile * dst, + int32_t mask) +{ + return _InterlockedXor(dst,mask); +} + + +static __inline__ int64_t at_locked_xor_64( + int64_t volatile * dst, + int64_t mask) +{ + return _InterlockedXor64(dst,mask); +} + + +static __inline__ void at_store( + volatile intptr_t * dst, + intptr_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ void at_store_32( + volatile int32_t * dst, + int32_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ void at_store_64( + volatile int64_t * dst, + int64_t val) +{ + _ReadWriteBarrier(); + *dst = val; + _ReadWriteBarrier(); + + return; +} + + +static __inline__ int at_bsf( + unsigned int * index, + uintptr_t mask) +{ + return (int)_BitScanForward64(index,mask); +} + + +static __inline__ int at_bsr( + unsigned int * index, + uintptr_t mask) +{ + return (int)_BitScanReverse64(index,mask); +} + + +static __inline__ size_t at_popcount( + uintptr_t mask) +{ + return __popcnt64(mask); +} + + +static __inline__ size_t at_popcount_16( + uint16_t mask) +{ + return __popcnt16(mask); +} + + +static __inline__ size_t at_popcount_32( + uint32_t mask) +{ + return __popcnt(mask); +} + + +static __inline__ size_t at_popcount_64( + uint64_t mask) +{ + return __popcnt64(mask); +} diff --git a/include/ntapi/bits/x86_64/nt_thread_x86_64.h b/include/ntapi/bits/x86_64/nt_thread_x86_64.h new file mode 100644 index 0000000..efe5664 --- /dev/null +++ b/include/ntapi/bits/x86_64/nt_thread_x86_64.h @@ -0,0 +1,104 @@ +#ifndef _NT_THREAD_X86_64_H_ +#define _NT_THREAD_X86_64_H_ + +#include + +typedef struct { + uintptr_t uc_low; + intptr_t uc_high; +} nt_m128a_t; + +typedef struct { + uint16_t uc_control_word; /* 0x000 */ + uint16_t uc_status_word; /* 0x002 */ + uint8_t uc_tag_word; /* 0x004 */ + uint8_t uc_reserved1; /* 0x005 */ + uint16_t uc_error_opcode; /* 0x006 */ + uint32_t uc_error_offset; /* 0x008 */ + uint16_t uc_error_selector; /* 0x00c */ + uint16_t uc_reserved2; /* 0x00e */ + uint32_t uc_data_offset; /* 0x010 */ + uint16_t uc_data_selector; /* 0x014 */ + uint16_t uc_reserved3; /* 0x016 */ + uint32_t uc_mx_csr; /* 0x018 */ + uint32_t uc_mx_csr_mask; /* 0x01c */ + nt_m128a_t uc_float_registers[8]; /* 0x020 */ + nt_m128a_t uc_xmm_registers[16]; /* 0x0a0 */ + uint8_t uc_reserved4[96]; /* 0x1a0 */ +} nt_xsave_fmt_t; + +typedef struct { + uintptr_t uc_p1_home; /* 0x000 */ + uintptr_t uc_p2_home; /* 0x008 */ + uintptr_t uc_p3_home; /* 0x010 */ + uintptr_t uc_p4_home; /* 0x018 */ + uintptr_t uc_p5_home; /* 0x020 */ + uintptr_t uc_p6_home; /* 0x028 */ + uint32_t uc_context_flags; /* 0x030 */ + uint32_t uc_mx_csr; /* 0x034 */ + uint16_t uc_seg_cs; /* 0x038 */ + uint16_t uc_seg_ds; /* 0x03a */ + uint16_t uc_seg_es; /* 0x03c */ + uint16_t uc_seg_fs; /* 0x03e */ + uint16_t uc_seg_gs; /* 0x040 */ + uint16_t uc_seg_ss; /* 0x042 */ + uint32_t uc_eflags; /* 0x044 */ + uintptr_t uc_dr0; /* 0x048 */ + uintptr_t uc_dr1; /* 0x050 */ + uintptr_t uc_dr2; /* 0x058 */ + uintptr_t uc_dr3; /* 0x060 */ + uintptr_t uc_dr6; /* 0x068 */ + uintptr_t uc_dr7; /* 0x070 */ + uintptr_t uc_rax; /* 0x078 */ + uintptr_t uc_rcx; /* 0x080 */ + uintptr_t uc_rdx; /* 0x088 */ + uintptr_t uc_rbx; /* 0x090 */ + uintptr_t uc_rsp; /* 0x098 */ + uintptr_t uc_rbp; /* 0x0a0 */ + uintptr_t uc_rsi; /* 0x0a8 */ + uintptr_t uc_rdi; /* 0x0b0 */ + uintptr_t uc_r8; /* 0x0b8 */ + uintptr_t uc_r9; /* 0x0c0 */ + uintptr_t uc_r10; /* 0x0c8 */ + uintptr_t uc_r11; /* 0x0d0 */ + uintptr_t uc_r12; /* 0x0d8 */ + uintptr_t uc_r13; /* 0x0e0 */ + uintptr_t uc_r14; /* 0x0e8 */ + uintptr_t uc_r15; /* 0x0f0 */ + uintptr_t uc_rip; /* 0x0f8 */ + + union { + nt_xsave_fmt_t uc_flt_save; /* 0x100 */ + + struct { + nt_m128a_t uc_header[2]; /* 0x100 */ + nt_m128a_t uc_legacy[8]; /* 0x120 */ + } uc_hdr; + } uc_flt; + + nt_m128a_t uc_xmm0; /* 0x1a0 */ + nt_m128a_t uc_xmm1; /* 0x1b0 */ + nt_m128a_t uc_xmm2; /* 0x1c0 */ + nt_m128a_t uc_xmm3; /* 0x1d0 */ + nt_m128a_t uc_xmm4; /* 0x1e0 */ + nt_m128a_t uc_xmm5; /* 0x1f0 */ + nt_m128a_t uc_xmm6; /* 0x200 */ + nt_m128a_t uc_xmm7; /* 0x210 */ + nt_m128a_t uc_xmm8; /* 0x220 */ + nt_m128a_t uc_xmm9; /* 0x230 */ + nt_m128a_t uc_xmm10; /* 0x240 */ + nt_m128a_t uc_xmm11; /* 0x250 */ + nt_m128a_t uc_xmm12; /* 0x260 */ + nt_m128a_t uc_xmm13; /* 0x270 */ + nt_m128a_t uc_xmm14; /* 0x280 */ + nt_m128a_t uc_xmm15; /* 0x290 */ + nt_m128a_t uc_vector_register[26]; /* 0x300 */ + uintptr_t uc_vector_control; /* 0x4a0 */ + uintptr_t uc_debug_control; /* 0x4a8 */ + uintptr_t uc_last_branch_to_rip; /* 0x4b0 */ + uintptr_t uc_last_branch_from_rip; /* 0x4b8 */ + uintptr_t uc_last_exception_to_rip; /* 0x4c0 */ + uintptr_t uc_last_exception_from_rip; /* 0x4c8 */ +} nt_mcontext_x86_64_t; + +#endif -- cgit v1.2.3