summaryrefslogtreecommitdiff
path: root/libffi/src/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libffi/src/x86')
-rw-r--r--libffi/src/x86/darwin.S444
-rw-r--r--libffi/src/x86/darwin64.S416
-rw-r--r--libffi/src/x86/ffi.c665
-rw-r--r--libffi/src/x86/ffi64.c627
-rw-r--r--libffi/src/x86/ffitarget.h120
-rw-r--r--libffi/src/x86/freebsd.S458
-rw-r--r--libffi/src/x86/sysv.S468
-rw-r--r--libffi/src/x86/unix64.S426
-rw-r--r--libffi/src/x86/win32.S1065
-rw-r--r--libffi/src/x86/win64.S460
10 files changed, 5149 insertions, 0 deletions
diff --git a/libffi/src/x86/darwin.S b/libffi/src/x86/darwin.S
new file mode 100644
index 000000000..8f0f0707a
--- /dev/null
+++ b/libffi/src/x86/darwin.S
@@ -0,0 +1,444 @@
+/* -----------------------------------------------------------------------
+ darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ X86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ -----------------------------------------------------------------------
+ */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl _ffi_prep_args
+
+ .align 4
+.globl _ffi_call_SYSV
+
+_ffi_call_SYSV:
+.LFB1:
+ pushl %ebp
+.LCFI0:
+ movl %esp,%ebp
+.LCFI1:
+ subl $8,%esp
+ /* Make room for all of the new args. */
+ movl 16(%ebp),%ecx
+ subl %ecx,%esp
+
+ movl %esp,%eax
+
+ /* Place all of the ffi_prep_args in position */
+ subl $8,%esp
+ pushl 12(%ebp)
+ pushl %eax
+ call *8(%ebp)
+
+ /* Return stack to previous state and call the function */
+ addl $16,%esp
+
+ call *28(%ebp)
+
+ /* Load %ecx with the return type code */
+ movl 20(%ebp),%ecx
+
+ /* Protect %esi. We're going to pop it in the epilogue. */
+ pushl %esi
+
+ /* If the return value pointer is NULL, assume no return value. */
+ cmpl $0,24(%ebp)
+ jne 0f
+
+ /* Even if there is no space for the return value, we are
+ obliged to handle floating-point values. */
+ cmpl $FFI_TYPE_FLOAT,%ecx
+ jne noretval
+ fstp %st(0)
+
+ jmp epilogue
+0:
+ .align 4
+ call 1f
+.Lstore_table:
+ .long noretval-.Lstore_table /* FFI_TYPE_VOID */
+ .long retint-.Lstore_table /* FFI_TYPE_INT */
+ .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
+ .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
+ .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
+ .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
+ .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
+ .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
+ .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
+ .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
+ .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
+ .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
+ .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
+ .long retint-.Lstore_table /* FFI_TYPE_POINTER */
+ .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
+1:
+ pop %esi
+ add (%esi, %ecx, 4), %esi
+ jmp *%esi
+
+ /* Sign/zero extend as appropriate. */
+retsint8:
+ movsbl %al, %eax
+ jmp retint
+
+retsint16:
+ movswl %ax, %eax
+ jmp retint
+
+retuint8:
+ movzbl %al, %eax
+ jmp retint
+
+retuint16:
+ movzwl %ax, %eax
+ jmp retint
+
+retfloat:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstps (%ecx)
+ jmp epilogue
+
+retdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpl (%ecx)
+ jmp epilogue
+
+retlongdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpt (%ecx)
+ jmp epilogue
+
+retint64:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ movl %edx,4(%ecx)
+ jmp epilogue
+
+retstruct1b:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movb %al,0(%ecx)
+ jmp epilogue
+
+retstruct2b:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movw %ax,0(%ecx)
+ jmp epilogue
+
+retint:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+
+retstruct:
+ /* Nothing to do! */
+
+noretval:
+epilogue:
+ popl %esi
+ movl %ebp,%esp
+ popl %ebp
+ ret
+
+.LFE1:
+.ffi_call_SYSV_end:
+
+ .align 4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl _ffi_closure_SYSV
+
+_ffi_closure_SYSV:
+.LFB2:
+ pushl %ebp
+.LCFI2:
+ movl %esp, %ebp
+.LCFI3:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp) /* resp */
+ leal 8(%ebp), %edx
+ movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
+ leal -12(%ebp), %edx
+ movl %edx, (%esp) /* &resp */
+ movl %ebx, 8(%esp)
+.LCFI7:
+ call L_ffi_closure_SYSV_inner$stub
+ movl 8(%esp), %ebx
+ movl -12(%ebp), %ecx
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lcls_retint
+
+0: cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lcls_retllong
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
+ je .Lcls_retstruct1b
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
+ je .Lcls_retstruct2b
+ cmpl $FFI_TYPE_STRUCT, %eax
+ je .Lcls_retstruct
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+.Lcls_retstruct1b:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retstruct2b:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retstruct:
+ lea -8(%ebp),%esp
+ movl %ebp, %esp
+ popl %ebp
+ ret $4
+.LFE2:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ .align 4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl _ffi_closure_raw_SYSV
+
+_ffi_closure_raw_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ pushl %esi
+.LCFI6:
+ subl $36, %esp
+ movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
+ movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+ movl %edx, 12(%esp) /* user_data */
+ leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
+ movl %edx, 8(%esp) /* raw_args */
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp) /* &res */
+ movl %esi, (%esp) /* cif */
+ call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
+ movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lrcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lrcls_retint
+0:
+ cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lrcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lrcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lrcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lrcls_retllong
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+.LFE3:
+#endif
+
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+ .indirect_symbol _ffi_closure_SYSV_inner
+ hlt ; hlt ; hlt ; hlt ; hlt
+
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+ .set L$set$0,LECIE1-LSCIE1
+ .long L$set$0
+LSCIE1:
+ .long 0x0
+ .byte 0x1
+ .ascii "zR\0"
+ .byte 0x1
+ .byte 0x7c
+ .byte 0x8
+ .byte 0x1
+ .byte 0x10
+ .byte 0xc
+ .byte 0x5
+ .byte 0x4
+ .byte 0x88
+ .byte 0x1
+ .align 2
+LECIE1:
+.globl _ffi_call_SYSV.eh
+_ffi_call_SYSV.eh:
+LSFDE1:
+ .set L$set$1,LEFDE1-LASFDE1
+ .long L$set$1
+LASFDE1:
+ .long LASFDE1-EH_frame1
+ .long .LFB1-.
+ .set L$set$2,.LFE1-.LFB1
+ .long L$set$2
+ .byte 0x0
+ .byte 0x4
+ .set L$set$3,.LCFI0-.LFB1
+ .long L$set$3
+ .byte 0xe
+ .byte 0x8
+ .byte 0x84
+ .byte 0x2
+ .byte 0x4
+ .set L$set$4,.LCFI1-.LCFI0
+ .long L$set$4
+ .byte 0xd
+ .byte 0x4
+ .align 2
+LEFDE1:
+.globl _ffi_closure_SYSV.eh
+_ffi_closure_SYSV.eh:
+LSFDE2:
+ .set L$set$5,LEFDE2-LASFDE2
+ .long L$set$5
+LASFDE2:
+ .long LASFDE2-EH_frame1
+ .long .LFB2-.
+ .set L$set$6,.LFE2-.LFB2
+ .long L$set$6
+ .byte 0x0
+ .byte 0x4
+ .set L$set$7,.LCFI2-.LFB2
+ .long L$set$7
+ .byte 0xe
+ .byte 0x8
+ .byte 0x84
+ .byte 0x2
+ .byte 0x4
+ .set L$set$8,.LCFI3-.LCFI2
+ .long L$set$8
+ .byte 0xd
+ .byte 0x4
+ .align 2
+LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.globl _ffi_closure_raw_SYSV.eh
+_ffi_closure_raw_SYSV.eh:
+LSFDE3:
+ .set L$set$10,LEFDE3-LASFDE3
+ .long L$set$10
+LASFDE3:
+ .long LASFDE3-EH_frame1
+ .long .LFB3-.
+ .set L$set$11,.LFE3-.LFB3
+ .long L$set$11
+ .byte 0x0
+ .byte 0x4
+ .set L$set$12,.LCFI4-.LFB3
+ .long L$set$12
+ .byte 0xe
+ .byte 0x8
+ .byte 0x84
+ .byte 0x2
+ .byte 0x4
+ .set L$set$13,.LCFI5-.LCFI4
+ .long L$set$13
+ .byte 0xd
+ .byte 0x4
+ .byte 0x4
+ .set L$set$14,.LCFI6-.LCFI5
+ .long L$set$14
+ .byte 0x85
+ .byte 0x3
+ .align 2
+LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/darwin64.S b/libffi/src/x86/darwin64.S
new file mode 100644
index 000000000..2f7394ef4
--- /dev/null
+++ b/libffi/src/x86/darwin64.S
@@ -0,0 +1,416 @@
+/* -----------------------------------------------------------------------
+ darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+ Copyright (c) 2008 Red Hat, Inc.
+ derived from unix64.S
+
+ x86-64 Foreign Function Interface for Darwin.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ OTHER DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+ .file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void));
+
+ Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .align 3
+ .globl _ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+ movq (%rsp), %r10 /* Load return address. */
+ leaq (%rdi, %rsi), %rax /* Find local stack base. */
+ movq %rdx, (%rax) /* Save flags. */
+ movq %rcx, 8(%rax) /* Save raddr. */
+ movq %rbp, 16(%rax) /* Save old frame pointer. */
+ movq %r10, 24(%rax) /* Relocate return address. */
+ movq %rax, %rbp /* Finalize local stack frame. */
+LUW1:
+ movq %rdi, %r10 /* Save a copy of the register area. */
+ movq %r8, %r11 /* Save a copy of the target fn. */
+ movl %r9d, %eax /* Set number of SSE registers. */
+
+ /* Load up all argument registers. */
+ movq (%r10), %rdi
+ movq 8(%r10), %rsi
+ movq 16(%r10), %rdx
+ movq 24(%r10), %rcx
+ movq 32(%r10), %r8
+ movq 40(%r10), %r9
+ testl %eax, %eax
+ jnz Lload_sse
+Lret_from_load_sse:
+
+ /* Deallocate the reg arg area. */
+ leaq 176(%r10), %rsp
+
+ /* Call the user function. */
+ call *%r11
+
+ /* Deallocate stack arg area; local stack frame in redzone. */
+ leaq 24(%rbp), %rsp
+
+ movq 0(%rbp), %rcx /* Reload flags. */
+ movq 8(%rbp), %rdi /* Reload raddr. */
+ movq 16(%rbp), %rbp /* Reload old frame pointer. */
+LUW2:
+
+ /* The first byte of the flags contains the FFI_TYPE. */
+ movzbl %cl, %r10d
+ leaq Lstore_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+Lstore_table:
+ .long Lst_void-Lstore_table /* FFI_TYPE_VOID */
+ .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */
+ .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */
+ .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */
+ .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */
+ .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */
+ .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */
+ .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */
+ .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */
+ .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */
+ .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */
+ .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */
+ .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */
+ .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */
+
+ .text
+ .align 3
+Lst_void:
+ ret
+ .align 3
+Lst_uint8:
+ movzbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 3
+Lst_sint8:
+ movsbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 3
+Lst_uint16:
+ movzwq %ax, %rax
+ movq %rax, (%rdi)
+ .align 3
+Lst_sint16:
+ movswq %ax, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 3
+Lst_uint32:
+ movl %eax, %eax
+ movq %rax, (%rdi)
+ .align 3
+Lst_sint32:
+ cltq
+ movq %rax, (%rdi)
+ ret
+ .align 3
+Lst_int64:
+ movq %rax, (%rdi)
+ ret
+ .align 3
+Lst_float:
+ movss %xmm0, (%rdi)
+ ret
+ .align 3
+Lst_double:
+ movsd %xmm0, (%rdi)
+ ret
+Lst_ldouble:
+ fstpt (%rdi)
+ ret
+ .align 3
+Lst_struct:
+ leaq -20(%rsp), %rsi /* Scratch area in redzone. */
+
+ /* We have to locate the values now, and since we don't want to
+ write too much data into the user's return value, we spill the
+ value to a 16 byte scratch area first. Bits 8, 9, and 10
+ control where the values are located. Only one of the three
+ bits will be set; see ffi_prep_cif_machdep for the pattern. */
+ movd %xmm0, %r10
+ movd %xmm1, %r11
+ testl $0x100, %ecx
+ cmovnz %rax, %rdx
+ cmovnz %r10, %rax
+ testl $0x200, %ecx
+ cmovnz %r10, %rdx
+ testl $0x400, %ecx
+ cmovnz %r10, %rax
+ cmovnz %r11, %rdx
+ movq %rax, (%rsi)
+ movq %rdx, 8(%rsi)
+
+ /* Bits 12-31 contain the true size of the structure. Copy from
+ the scratch area to the true destination. */
+ shrl $12, %ecx
+ rep movsb
+ ret
+
+ /* Many times we can avoid loading any SSE registers at all.
+ It's not worth an indirect jump to load the exact set of
+ SSE registers needed; zero or all is a good compromise. */
+ .align 3
+LUW3:
+Lload_sse:
+ movdqa 48(%r10), %xmm0
+ movdqa 64(%r10), %xmm1
+ movdqa 80(%r10), %xmm2
+ movdqa 96(%r10), %xmm3
+ movdqa 112(%r10), %xmm4
+ movdqa 128(%r10), %xmm5
+ movdqa 144(%r10), %xmm6
+ movdqa 160(%r10), %xmm7
+ jmp Lret_from_load_sse
+
+LUW4:
+ .align 3
+ .globl _ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+ /* The carry flag is set by the trampoline iff SSE registers
+ are used. Don't clobber it before the branch instruction. */
+ leaq -200(%rsp), %rsp
+LUW6:
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ jc Lsave_sse
+Lret_from_save_sse:
+
+ movq %r10, %rdi
+ leaq 176(%rsp), %rsi
+ movq %rsp, %rdx
+ leaq 208(%rsp), %rcx
+ call _ffi_closure_unix64_inner
+
+ /* Deallocate stack frame early; return value is now in redzone. */
+ addq $200, %rsp
+LUW7:
+
+ /* The first byte of the return value contains the FFI_TYPE. */
+ movzbl %al, %r10d
+ leaq Lload_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+Lload_table:
+ .long Lld_void-Lload_table /* FFI_TYPE_VOID */
+ .long Lld_int32-Lload_table /* FFI_TYPE_INT */
+ .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */
+ .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */
+ .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */
+ .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */
+ .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */
+ .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */
+ .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */
+ .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */
+ .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */
+ .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */
+ .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */
+ .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */
+ .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */
+
+ .text
+ .align 3
+Lld_void:
+ ret
+ .align 3
+Lld_int8:
+ movzbl -24(%rsp), %eax
+ ret
+ .align 3
+Lld_int16:
+ movzwl -24(%rsp), %eax
+ ret
+ .align 3
+Lld_int32:
+ movl -24(%rsp), %eax
+ ret
+ .align 3
+Lld_int64:
+ movq -24(%rsp), %rax
+ ret
+ .align 3
+Lld_float:
+ movss -24(%rsp), %xmm0
+ ret
+ .align 3
+Lld_double:
+ movsd -24(%rsp), %xmm0
+ ret
+ .align 3
+Lld_ldouble:
+ fldt -24(%rsp)
+ ret
+ .align 3
+Lld_struct:
+ /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+ %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
+ both rdx and xmm1 with the second word. For the remaining,
+ bit 8 set means xmm0 gets the second word, and bit 9 means
+ that rax gets the second word. */
+ movq -24(%rsp), %rcx
+ movq -16(%rsp), %rdx
+ movq -16(%rsp), %xmm1
+ testl $0x100, %eax
+ cmovnz %rdx, %rcx
+ movd %rcx, %xmm0
+ testl $0x200, %eax
+ movq -24(%rsp), %rax
+ cmovnz %rdx, %rax
+ ret
+
+ /* See the comment above Lload_sse; the same logic applies here. */
+ .align 3
+LUW8:
+Lsave_sse:
+ movdqa %xmm0, 48(%rsp)
+ movdqa %xmm1, 64(%rsp)
+ movdqa %xmm2, 80(%rsp)
+ movdqa %xmm3, 96(%rsp)
+ movdqa %xmm4, 112(%rsp)
+ movdqa %xmm5, 128(%rsp)
+ movdqa %xmm6, 144(%rsp)
+ movdqa %xmm7, 160(%rsp)
+ jmp Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+ .set L$set$0,LECIE1-LSCIE1 /* CIE Length */
+ .long L$set$0
+LSCIE1:
+ .long 0x0 /* CIE Identifier Tag */
+ .byte 0x1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */
+ .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .byte 0x1 /* uleb128 0x1; Augmentation size */
+ .byte 0x10 /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .byte 0x7 /* uleb128 0x7 */
+ .byte 0x8 /* uleb128 0x8 */
+ .byte 0x90 /* DW_CFA_offset, column 0x10 */
+ .byte 0x1
+ .align 3
+LECIE1:
+ .globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+ .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */
+ .long L$set$1
+LASFDE1:
+ .long LASFDE1-EH_frame1 /* FDE CIE offset */
+ .quad LUW0-. /* FDE initial location */
+ .set L$set$2,LUW4-LUW0 /* FDE address range */
+ .quad L$set$2
+ .byte 0x0 /* Augmentation size */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$3,LUW1-LUW0
+ .long L$set$3
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+ .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
+ .byte 0x6
+ .byte 0x20
+ .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
+ .byte 0x2
+ .byte 0xa /* DW_CFA_remember_state */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$4,LUW2-LUW1
+ .long L$set$4
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .byte 0x7
+ .byte 0x8
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$5,LUW3-LUW2
+ .long L$set$5
+ .byte 0xb /* DW_CFA_restore_state */
+
+ .align 3
+LEFDE1:
+ .globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+ .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */
+ .long L$set$6
+LASFDE3:
+ .long LASFDE3-EH_frame1 /* FDE CIE offset */
+ .quad LUW5-. /* FDE initial location */
+ .set L$set$7,LUW9-LUW5 /* FDE address range */
+ .quad L$set$7
+ .byte 0x0 /* Augmentation size */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$8,LUW6-LUW5
+ .long L$set$8
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 208,1 /* uleb128 208 */
+ .byte 0xa /* DW_CFA_remember_state */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$9,LUW7-LUW6
+ .long L$set$9
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .set L$set$10,LUW8-LUW7
+ .long L$set$10
+ .byte 0xb /* DW_CFA_restore_state */
+
+ .align 3
+LEFDE3:
+ .subsections_via_symbols
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
new file mode 100644
index 000000000..fea9d6dea
--- /dev/null
+++ b/libffi/src/x86/ffi.c
@@ -0,0 +1,665 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
+ Copyright (c) 2002 Ranjit Mathew
+ Copyright (c) 2002 Bo Thorsen
+ Copyright (c) 2002 Roger Sayle
+ Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+
+ x86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+ if (ecif->cif->flags == FFI_TYPE_STRUCT
+#ifdef X86_WIN64
+ && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+ && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+ )
+ {
+ *(void **) argp = ecif->rvalue;
+ argp += sizeof(void*);
+ }
+
+ p_argv = ecif->avalue;
+
+ for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+ i != 0;
+ i--, p_arg++)
+ {
+ size_t z;
+
+ /* Align if necessary */
+ if ((sizeof(void*) - 1) & (size_t) argp)
+ argp = (char *) ALIGN(argp, sizeof(void*));
+
+ z = (*p_arg)->size;
+#ifdef X86_WIN64
+ if (z > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+ )
+ {
+ z = sizeof(ffi_arg);
+ *(void **)argp = *p_argv;
+ }
+ else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+ {
+ memcpy(argp, *p_argv, z);
+ }
+ else
+#endif
+ if (z < sizeof(ffi_arg))
+ {
+ z = sizeof(ffi_arg);
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT8:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT16:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT16:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_SINT32:
+ *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_UINT32:
+ *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+ break;
+
+ default:
+ FFI_ASSERT(0);
+ }
+ }
+ else
+ {
+ memcpy(argp, *p_argv, z);
+ }
+ p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+ argp += z;
+#endif
+ }
+
+ return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ unsigned int i;
+ ffi_type **ptr;
+
+ /* Set the return type flag */
+ switch (cif->rtype->type)
+ {
+ case FFI_TYPE_VOID:
+#if defined(X86) || defined (X86_WIN32) || defined(X86_FREEBSD) || defined(X86_DARWIN) || defined(X86_WIN64)
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_SINT16:
+#endif
+#ifdef X86_WIN64
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+#endif
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+ cif->flags = (unsigned) cif->rtype->type;
+ break;
+
+ case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+ case FFI_TYPE_POINTER:
+#endif
+ cif->flags = FFI_TYPE_SINT64;
+ break;
+
+ case FFI_TYPE_STRUCT:
+#ifndef X86
+ if (cif->rtype->size == 1)
+ {
+ cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+ }
+ else if (cif->rtype->size == 2)
+ {
+ cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+ }
+ else if (cif->rtype->size == 4)
+ {
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+ cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+ }
+ else if (cif->rtype->size == 8)
+ {
+ cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+ }
+ else
+#endif
+ {
+ cif->flags = FFI_TYPE_STRUCT;
+ /* allocate space for return value pointer */
+ cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+ }
+ break;
+
+ default:
+#ifdef X86_WIN64
+ cif->flags = FFI_TYPE_SINT64;
+ break;
+ case FFI_TYPE_INT:
+ cif->flags = FFI_TYPE_SINT32;
+#else
+ cif->flags = FFI_TYPE_INT;
+#endif
+ break;
+ }
+
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
+ if (((*ptr)->alignment - 1) & cif->bytes)
+ cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+ cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+ }
+
+#ifdef X86_WIN64
+ /* ensure space for storing four registers */
+ cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+#ifdef X86_DARWIN
+ cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+ return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+ unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+#ifdef X86_WIN64
+ if (rvalue == NULL
+ && cif->flags == FFI_TYPE_STRUCT
+ && cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8)
+ {
+ ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+ }
+#else
+ if (rvalue == NULL
+ && cif->flags == FFI_TYPE_STRUCT)
+ {
+ ecif.rvalue = alloca(cif->rtype->size);
+ }
+#endif
+ else
+ ecif.rvalue = rvalue;
+
+
+ switch (cif->abi)
+ {
+#ifdef X86_WIN64
+ case FFI_WIN64:
+ {
+ /* Make copies of all struct arguments
+ NOTE: not sure if responsibility should be here or in caller */
+ unsigned int i;
+ for (i=0; i < cif->nargs;i++) {
+ size_t size = cif->arg_types[i]->size;
+ if ((cif->arg_types[i]->type == FFI_TYPE_STRUCT
+ && (size != 1 && size != 2 && size != 4 && size != 8))
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ || cif->arg_types[i]->type == FFI_TYPE_LONGDOUBLE
+#endif
+ )
+ {
+ void *local = alloca(size);
+ memcpy(local, avalue[i], size);
+ avalue[i] = local;
+ }
+ }
+ ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+ cif->flags, ecif.rvalue, fn);
+ }
+ break;
+#elif defined(X86_WIN32)
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ ffi_call_win32(ffi_prep_args, &ecif, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+#else
+ case FFI_SYSV:
+ ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+ fn);
+ break;
+#endif
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+ on MSVC - standard cdecl convention applies. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+ void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+ __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+ __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+ __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+ __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+ ffi_cif *cif;
+ void **arg_area;
+ void *result;
+ void *resp = &result;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will change RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+
+ (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+ /* The result is returned in rax. This does the right thing for
+ result types except for floats; we have to 'mov xmm0, rax' in the
+ caller to correct this.
+ TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+ */
+ return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+ /* our various things... */
+ ffi_cif *cif;
+ void **arg_area;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /* this call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will change RESP to point to the
+ * structure return address. */
+
+ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+ (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+ return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+ ffi_cif *cif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+#ifdef X86_WIN64
+ if (cif->rtype->size > sizeof(ffi_arg)
+ || (cif->flags == FFI_TYPE_STRUCT
+ && (cif->rtype->size != 1 && cif->rtype->size != 2
+ && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+ *rvalue = *(void **) argp;
+ argp += sizeof(void *);
+ }
+#else
+ if ( cif->flags == FFI_TYPE_STRUCT ) {
+ *rvalue = *(void **) argp;
+ argp += sizeof(void *);
+ }
+#endif
+
+ p_argv = avalue;
+
+ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+ {
+ size_t z;
+
+ /* Align if necessary */
+ if ((sizeof(void*) - 1) & (size_t) argp) {
+ argp = (char *) ALIGN(argp, sizeof(void*));
+ }
+
+#ifdef X86_WIN64
+ if ((*p_arg)->size > sizeof(ffi_arg)
+ || ((*p_arg)->type == FFI_TYPE_STRUCT
+ && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+ && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+ {
+ z = sizeof(void *);
+ *p_argv = *(void **)argp;
+ }
+ else
+#endif
+ {
+ z = (*p_arg)->size;
+
+ /* because we're little endian, this is what it turns into. */
+
+ *p_argv = (void*) argp;
+ }
+
+ p_argv++;
+#ifdef X86_WIN64
+ argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+ argp += z;
+#endif
+ }
+
+ return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ void* __fun = (void*)(FUN); \
+ void* __ctx = (void*)(CTX); \
+ *(unsigned char*) &__tramp[0] = 0x41; \
+ *(unsigned char*) &__tramp[1] = 0xbb; \
+ *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+ *(unsigned char*) &__tramp[6] = 0x48; \
+ *(unsigned char*) &__tramp[7] = 0xb8; \
+ *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+ *(unsigned char *) &__tramp[16] = 0x49; \
+ *(unsigned char *) &__tramp[17] = 0xba; \
+ *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+ *(unsigned char *) &__tramp[26] = 0x41; \
+ *(unsigned char *) &__tramp[27] = 0xff; \
+ *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned int __dis = __fun - (__ctx + 10); \
+ *(unsigned char*) &__tramp[0] = 0xb8; \
+ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+ *(unsigned char *) &__tramp[5] = 0xe9; \
+ *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ unsigned int __fun = (unsigned int)(FUN); \
+ unsigned int __ctx = (unsigned int)(CTX); \
+ unsigned int __dis = __fun - (__ctx + 10); \
+ unsigned short __size = (unsigned short)(SIZE); \
+ *(unsigned char*) &__tramp[0] = 0xb8; \
+ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+ *(unsigned char *) &__tramp[5] = 0xe8; \
+ *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \
+ *(unsigned char *) &__tramp[10] = 0xc2; \
+ *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+ if (cif->abi == FFI_WIN64)
+ {
+ int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+ FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+ &ffi_closure_win64,
+ codeloc, mask);
+ /* make sure we can execute here */
+ }
+#else
+ if (cif->abi == FFI_SYSV)
+ {
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+ &ffi_closure_SYSV,
+ (void*)codeloc);
+ }
+#ifdef X86_WIN32
+ else if (cif->abi == FFI_STDCALL)
+ {
+ FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+ &ffi_closure_STDCALL,
+ (void*)codeloc, cif->bytes);
+ }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+ else
+ {
+ return FFI_BAD_ABI;
+ }
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+ void *user_data,
+ void *codeloc)
+{
+ int i;
+
+ if (cif->abi != FFI_SYSV) {
+ return FFI_BAD_ABI;
+ }
+
+ /* we currently don't support certain kinds of arguments for raw
+ closures. This should be implemented by a separate assembly
+ language routine, since it would require argument processing,
+ something we don't do now for performance. */
+
+ for (i = cif->nargs-1; i >= 0; i--)
+ {
+ FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+ FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+ }
+
+
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+ codeloc);
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+static void
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+ memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument. as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+ extended_cif ecif;
+ void **avalue = (void **)fake_avalue;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+
+ if ((rvalue == NULL) &&
+ (cif->rtype->type == FFI_TYPE_STRUCT))
+ {
+ ecif.rvalue = alloca(cif->rtype->size);
+ }
+ else
+ ecif.rvalue = rvalue;
+
+
+ switch (cif->abi)
+ {
+#ifdef X86_WIN32
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ ffi_call_win32(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+#else
+ case FFI_SYSV:
+ ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+ ecif.rvalue, fn);
+ break;
+#endif
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+#endif
+
+#endif /* !__x86_64__ || X86_WIN64 */
+
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
new file mode 100644
index 000000000..bd907d720
--- /dev/null
+++ b/libffi/src/x86/ffi64.c
@@ -0,0 +1,627 @@
+/* -----------------------------------------------------------------------
+ ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
+ Copyright (c) 2008, 2010 Red Hat, Inc.
+
+ x86-64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+struct register_args
+{
+ /* Registers for argument passing. */
+ UINT64 gpr[MAX_GPR_REGS];
+ __int128_t sse[MAX_SSE_REGS];
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+ gcc/config/i386/i386.c. Do *not* change one without the other. */
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the
+ exception of SSESF, SSEDF classes, that are basically SSE class,
+ just gcc will use SF or DFmode move instead of DImode to avoid
+ reformatting penalties.
+
+ Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding). */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+ size_t byte_offset)
+{
+ switch (type->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ {
+ int size = byte_offset + type->size;
+
+ if (size <= 4)
+ {
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ return 1;
+ }
+ else if (size <= 8)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ }
+ else if (size <= 12)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else if (size <= 16)
+ {
+ classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else
+ FFI_ASSERT (0);
+ }
+ case FFI_TYPE_FLOAT:
+ if (!(byte_offset % 8))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case FFI_TYPE_STRUCT:
+ {
+ const int UNITS_PER_WORD = 8;
+ int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ ffi_type **ptr;
+ int i;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* If the struct is larger than 32 bytes, pass it on the stack. */
+ if (type->size > 32)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Merge the fields of structure. */
+ for (ptr = type->elements; *ptr != NULL; ptr++)
+ {
+ int num;
+
+ byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+ num = classify_argument (*ptr, subclasses, byte_offset % 8);
+ if (num == 0)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos = byte_offset / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+
+ byte_offset += (*ptr)->size;
+ }
+
+ if (words > 2)
+ {
+ /* When size > 16 bytes, if the first one isn't
+ X86_64_SSE_CLASS or any other ones aren't
+ X86_64_SSEUP_CLASS, everything should be passed in
+ memory. */
+ if (classes[0] != X86_64_SSE_CLASS)
+ return 0;
+
+ for (i = 1; i < words; i++)
+ if (classes[i] != X86_64_SSEUP_CLASS)
+ return 0;
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && classes[i - 1] != X86_64_SSE_CLASS
+ && classes[i - 1] != X86_64_SSEUP_CLASS)
+ {
+ /* The first one should never be X86_64_SSEUP_CLASS. */
+ FFI_ASSERT (i != 0);
+ classes[i] = X86_64_SSE_CLASS;
+ }
+
+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+ everything should be passed in memory. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (classes[i - 1] != X86_64_X87_CLASS))
+ {
+ /* The first one should never be X86_64_X87UP_CLASS. */
+ FFI_ASSERT (i != 0);
+ return 0;
+ }
+ }
+ return words;
+ }
+
+ default:
+ FFI_ASSERT(0);
+ }
+ return 0; /* Never reached. */
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return zero iff parameter should be passed in memory, otherwise
+ the number of registers. */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+ _Bool in_return, int *pngpr, int *pnsse)
+{
+ int i, n, ngpr, nsse;
+
+ n = classify_argument (type, classes, 0);
+ if (n == 0)
+ return 0;
+
+ ngpr = nsse = 0;
+ for (i = 0; i < n; ++i)
+ switch (classes[i])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ ngpr++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ nsse++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return != 0;
+ default:
+ abort ();
+ }
+
+ *pngpr = ngpr;
+ *pnsse = nsse;
+
+ return n;
+}
+
+/* Perform machine dependent cif processing. */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ size_t bytes;
+
+ gprcount = ssecount = 0;
+
+ flags = cif->rtype->type;
+ if (flags != FFI_TYPE_VOID)
+ {
+ n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value is passed in memory. A pointer to that
+ memory is the first argument. Allocate a register for it. */
+ gprcount++;
+ /* We don't have to do anything in asm for the return. */
+ flags = FFI_TYPE_VOID;
+ }
+ else if (flags == FFI_TYPE_STRUCT)
+ {
+ /* Mark which registers the result appears in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && !sse1)
+ flags |= 1 << 8;
+ else if (!sse0 && sse1)
+ flags |= 1 << 9;
+ else if (sse0 && sse1)
+ flags |= 1 << 10;
+ /* Mark the true size of the structure. */
+ flags |= cif->rtype->size << 12;
+ }
+ }
+
+ /* Go over all arguments and determine the way they should be passed.
+ If it's in a register and there is space for it, let that be so. If
+ not, add it's size to the stack byte count. */
+ for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+ {
+ if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = cif->arg_types[i]->alignment;
+
+ if (align < 8)
+ align = 8;
+
+ bytes = ALIGN (bytes, align);
+ bytes += cif->arg_types[i]->size;
+ }
+ else
+ {
+ gprcount += ngpr;
+ ssecount += nsse;
+ }
+ }
+ if (ssecount)
+ flags |= 1 << 11;
+ cif->flags = flags;
+ cif->bytes = ALIGN (bytes, 8);
+
+ return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int gprcount, ssecount, ngpr, nsse, i, avn;
+ _Bool ret_in_memory;
+ struct register_args *reg_args;
+
+ /* Can't call 32-bit mode from 64-bit mode. */
+ FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+ /* If the return value is a struct and we don't have a return value
+ address then we need to make one. Note the setting of flags to
+ VOID above in ffi_prep_cif_machdep. */
+ ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+ && (cif->flags & 0xff) == FFI_TYPE_VOID);
+ if (rvalue == NULL && ret_in_memory)
+ rvalue = alloca (cif->rtype->size);
+
+ /* Allocate the space for the arguments, plus 4 words of temp space. */
+ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+ reg_args = (struct register_args *) stack;
+ argp = stack + sizeof (struct register_args);
+
+ gprcount = ssecount = 0;
+
+ /* If the return value is passed in memory, add the pointer as the
+ first integer argument. */
+ if (ret_in_memory)
+ reg_args->gpr[gprcount++] = (long) rvalue;
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
+ {
+ size_t size = arg_types[i]->size;
+ int n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ memcpy (argp, avalue[i], size);
+ argp += size;
+ }
+ else
+ {
+ /* The argument is passed entirely in registers. */
+ char *a = (char *) avalue[i];
+ int j;
+
+ for (j = 0; j < n; j++, a += 8, size -= 8)
+ {
+ switch (classes[j])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ gprcount++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSEDF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT64 *) a;
+ break;
+ case X86_64_SSESF_CLASS:
+ reg_args->sse[ssecount++] = *(UINT32 *) a;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+ }
+
+ ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+ cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ volatile unsigned short *tramp;
+
+ tramp = (volatile unsigned short *) &closure->tramp[0];
+
+ tramp[0] = 0xbb49; /* mov <code>, %r11 */
+ *(void * volatile *) &tramp[1] = ffi_closure_unix64;
+ tramp[5] = 0xba49; /* mov <data>, %r10 */
+ *(void * volatile *) &tramp[6] = codeloc;
+
+ /* Set the carry bit iff the function uses any sse registers.
+ This is clc or stc, together with the first byte of the jmp. */
+ tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+ tramp[11] = 0xe3ff; /* jmp *%r11 */
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+ struct register_args *reg_args, char *argp)
+{
+ ffi_cif *cif;
+ void **avalue;
+ ffi_type **arg_types;
+ long i, avn;
+ int gprcount, ssecount, ngpr, nsse;
+ int ret;
+
+ cif = closure->cif;
+ avalue = alloca(cif->nargs * sizeof(void *));
+ gprcount = ssecount = 0;
+
+ ret = cif->rtype->type;
+ if (ret != FFI_TYPE_VOID)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value goes in memory. Arrange for the closure
+ return value to go directly back to the original caller. */
+ rvalue = (void *) reg_args->gpr[gprcount++];
+ /* We don't have to do anything in asm for the return. */
+ ret = FFI_TYPE_VOID;
+ }
+ else if (ret == FFI_TYPE_STRUCT && n == 2)
+ {
+ /* Mark which register the second word of the structure goes in. */
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+ _Bool sse1 = SSE_CLASS_P (classes[1]);
+ if (!sse0 && sse1)
+ ret |= 1 << 8;
+ else if (sse0 && !sse1)
+ ret |= 1 << 9;
+ }
+ }
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ int n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ avalue[i] = argp;
+ argp += arg_types[i]->size;
+ }
+ /* If the argument is in a single register, or two consecutive
+ integer registers, then we can use that address directly. */
+ else if (n == 1
+ || (n == 2 && !(SSE_CLASS_P (classes[0])
+ || SSE_CLASS_P (classes[1]))))
+ {
+ /* The argument is in a single register. */
+ if (SSE_CLASS_P (classes[0]))
+ {
+ avalue[i] = &reg_args->sse[ssecount];
+ ssecount += n;
+ }
+ else
+ {
+ avalue[i] = &reg_args->gpr[gprcount];
+ gprcount += n;
+ }
+ }
+ /* Otherwise, allocate space to make them consecutive. */
+ else
+ {
+ char *a = alloca (16);
+ int j;
+
+ avalue[i] = a;
+ for (j = 0; j < n; j++, a += 8)
+ {
+ if (SSE_CLASS_P (classes[j]))
+ memcpy (a, &reg_args->sse[ssecount++], 8);
+ else
+ memcpy (a, &reg_args->gpr[gprcount++], 8);
+ }
+ }
+ }
+
+ /* Invoke the closure. */
+ closure->fun (cif, rvalue, avalue, closure->user_data);
+
+ /* Tell assembly how to perform return type promotions. */
+ return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h
new file mode 100644
index 000000000..b85016cc0
--- /dev/null
+++ b/libffi/src/x86/ffitarget.h
@@ -0,0 +1,120 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 1996-2003, 2010 Red Hat, Inc.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ Target configuration macros for x86 and x86-64.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+/* ---- System specific configurations ----------------------------------- */
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64 ffi_arg;
+typedef __int64 ffi_sarg;
+#else
+typedef unsigned long long ffi_arg;
+typedef long long ffi_sarg;
+#endif
+#else
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+#endif
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+
+ /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+ FFI_SYSV,
+ FFI_STDCALL,
+ /* TODO: Add fastcall support for the sake of completeness */
+ FFI_DEFAULT_ABI = FFI_SYSV,
+#endif
+
+#ifdef X86_WIN64
+ FFI_WIN64,
+ FFI_DEFAULT_ABI = FFI_WIN64,
+#else
+
+ /* ---- Intel x86 and AMD x86-64 - */
+#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(__i386) || defined(__amd64))
+ FFI_SYSV,
+ FFI_UNIX64, /* Unix variants all use the same ABI for x86-64 */
+#if defined(__i386__) || defined(__i386)
+ FFI_DEFAULT_ABI = FFI_SYSV,
+#else
+ FFI_DEFAULT_ABI = FFI_UNIX64,
+#endif
+#endif
+#endif /* X86_WIN64 */
+
+ FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 13
+#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
+#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
+#endif
+#endif
+
+#endif
+
diff --git a/libffi/src/x86/freebsd.S b/libffi/src/x86/freebsd.S
new file mode 100644
index 000000000..afde51316
--- /dev/null
+++ b/libffi/src/x86/freebsd.S
@@ -0,0 +1,458 @@
+/* -----------------------------------------------------------------------
+ freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
+ Copyright (c) 2008 Björn König
+
+ X86 Foreign Function Interface for FreeBSD
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+ .align 4
+.globl ffi_call_SYSV
+ .type ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+ pushl %ebp
+.LCFI0:
+ movl %esp,%ebp
+.LCFI1:
+ /* Make room for all of the new args. */
+ movl 16(%ebp),%ecx
+ subl %ecx,%esp
+
+ movl %esp,%eax
+
+ /* Place all of the ffi_prep_args in position */
+ pushl 12(%ebp)
+ pushl %eax
+ call *8(%ebp)
+
+ /* Return stack to previous state and call the function */
+ addl $8,%esp
+
+ call *28(%ebp)
+
+ /* Load %ecx with the return type code */
+ movl 20(%ebp),%ecx
+
+ /* Protect %esi. We're going to pop it in the epilogue. */
+ pushl %esi
+
+ /* If the return value pointer is NULL, assume no return value. */
+ cmpl $0,24(%ebp)
+ jne 0f
+
+ /* Even if there is no space for the return value, we are
+ obliged to handle floating-point values. */
+ cmpl $FFI_TYPE_FLOAT,%ecx
+ jne noretval
+ fstp %st(0)
+
+ jmp epilogue
+
+0:
+ call 1f
+
+.Lstore_table:
+ .long noretval-.Lstore_table /* FFI_TYPE_VOID */
+ .long retint-.Lstore_table /* FFI_TYPE_INT */
+ .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
+ .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
+ .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
+ .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
+ .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
+ .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
+ .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
+ .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
+ .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
+ .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
+ .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
+ .long retint-.Lstore_table /* FFI_TYPE_POINTER */
+ .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
+
+1:
+ pop %esi
+ add (%esi, %ecx, 4), %esi
+ jmp *%esi
+
+ /* Sign/zero extend as appropriate. */
+retsint8:
+ movsbl %al, %eax
+ jmp retint
+
+retsint16:
+ movswl %ax, %eax
+ jmp retint
+
+retuint8:
+ movzbl %al, %eax
+ jmp retint
+
+retuint16:
+ movzwl %ax, %eax
+ jmp retint
+
+retfloat:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstps (%ecx)
+ jmp epilogue
+
+retdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpl (%ecx)
+ jmp epilogue
+
+retlongdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpt (%ecx)
+ jmp epilogue
+
+retint64:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ movl %edx,4(%ecx)
+ jmp epilogue
+
+retstruct1b:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movb %al,0(%ecx)
+ jmp epilogue
+
+retstruct2b:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movw %ax,0(%ecx)
+ jmp epilogue
+
+retint:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+
+retstruct:
+ /* Nothing to do! */
+
+noretval:
+epilogue:
+ popl %esi
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.LFE1:
+.ffi_call_SYSV_end:
+ .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+ .align 4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+ .type ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+ pushl %ebp
+.LCFI2:
+ movl %esp, %ebp
+.LCFI3:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp) /* resp */
+ leal 8(%ebp), %edx
+ movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
+ leal -12(%ebp), %edx
+ movl %edx, (%esp) /* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+ call ffi_closure_SYSV_inner
+#else
+ movl %ebx, 8(%esp)
+.LCFI7:
+ call 1f
+1: popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+ call ffi_closure_SYSV_inner@PLT
+ movl 8(%esp), %ebx
+#endif
+ movl -12(%ebp), %ecx
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lcls_retint
+
+0: cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lcls_retllong
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
+ je .Lcls_retstruct1b
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
+ je .Lcls_retstruct2b
+ cmpl $FFI_TYPE_STRUCT, %eax
+ je .Lcls_retstruct
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+.Lcls_retstruct1b:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retstruct2b:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retstruct:
+ movl %ebp, %esp
+ popl %ebp
+ ret $4
+.LFE2:
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ .align 4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+ .type ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ pushl %esi
+.LCFI6:
+ subl $36, %esp
+ movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
+ movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+ movl %edx, 12(%esp) /* user_data */
+ leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
+ movl %edx, 8(%esp) /* raw_args */
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp) /* &res */
+ movl %esi, (%esp) /* cif */
+ call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
+ movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lrcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lrcls_retint
+0:
+ cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lrcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lrcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lrcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lrcls_retllong
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+.LFE3:
+ .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+ .section .eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+ .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
+.LSCIE1:
+ .long 0x0 /* CIE Identifier Tag */
+ .byte 0x1 /* CIE Version */
+#ifdef __PIC__
+ .ascii "zR\0" /* CIE Augmentation */
+#else
+ .ascii "\0" /* CIE Augmentation */
+#endif
+ .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
+ .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+#ifdef __PIC__
+ .byte 0x1 /* .uleb128 0x1; Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+#endif
+ .byte 0xc /* DW_CFA_def_cfa */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .byte 0x1 /* .uleb128 0x1 */
+ .align 4
+.LECIE1:
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1 /* FDE Length */
+.LASFDE1:
+ .long .LASFDE1-.Lframe1 /* FDE CIE offset */
+#ifdef __PIC__
+ .long .LFB1-. /* FDE initial location */
+#else
+ .long .LFB1 /* FDE initial location */
+#endif
+ .long .LFE1-.LFB1 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI0-.LFB1
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI1-.LCFI0
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+ .align 4
+.LEFDE1:
+.LSFDE2:
+ .long .LEFDE2-.LASFDE2 /* FDE Length */
+.LASFDE2:
+ .long .LASFDE2-.Lframe1 /* FDE CIE offset */
+#ifdef __PIC__
+ .long .LFB2-. /* FDE initial location */
+#else
+ .long .LFB2
+#endif
+ .long .LFE2-.LFB2 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI2-.LFB2
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI3-.LCFI2
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI7-.LCFI3
+ .byte 0x83 /* DW_CFA_offset, column 0x3 */
+ .byte 0xa /* .uleb128 0xa */
+#endif
+ .align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3 /* FDE Length */
+.LASFDE3:
+ .long .LASFDE3-.Lframe1 /* FDE CIE offset */
+#ifdef __PIC__
+ .long .LFB3-. /* FDE initial location */
+#else
+ .long .LFB3
+#endif
+ .long .LFE3-.LFB3 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI4-.LFB3
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI5-.LCFI4
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI6-.LCFI5
+ .byte 0x86 /* DW_CFA_offset, column 0x6 */
+ .byte 0x3 /* .uleb128 0x3 */
+ .align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S
new file mode 100644
index 000000000..f108dd80d
--- /dev/null
+++ b/libffi/src/x86/sysv.S
@@ -0,0 +1,468 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc.
+
+ X86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+ .align 4
+.globl ffi_call_SYSV
+ .type ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+ pushl %ebp
+.LCFI0:
+ movl %esp,%ebp
+.LCFI1:
+ /* Make room for all of the new args. */
+ movl 16(%ebp),%ecx
+ subl %ecx,%esp
+
+ /* Align the stack pointer to 16-bytes */
+ andl $0xfffffff0, %esp
+
+ movl %esp,%eax
+
+ /* Place all of the ffi_prep_args in position */
+ pushl 12(%ebp)
+ pushl %eax
+ call *8(%ebp)
+
+ /* Return stack to previous state and call the function */
+ addl $8,%esp
+
+ call *28(%ebp)
+
+ /* Load %ecx with the return type code */
+ movl 20(%ebp),%ecx
+
+ /* Protect %esi. We're going to pop it in the epilogue. */
+ pushl %esi
+
+ /* If the return value pointer is NULL, assume no return value. */
+ cmpl $0,24(%ebp)
+ jne 0f
+
+ /* Even if there is no space for the return value, we are
+ obliged to handle floating-point values. */
+ cmpl $FFI_TYPE_FLOAT,%ecx
+ jne noretval
+ fstp %st(0)
+
+ jmp epilogue
+
+0:
+ call 1f
+
+.Lstore_table:
+ .long noretval-.Lstore_table /* FFI_TYPE_VOID */
+ .long retint-.Lstore_table /* FFI_TYPE_INT */
+ .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
+ .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
+ .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
+ .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
+ .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
+ .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
+ .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
+ .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
+ .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
+ .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
+ .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
+ .long retint-.Lstore_table /* FFI_TYPE_POINTER */
+
+1:
+ pop %esi
+ add (%esi, %ecx, 4), %esi
+ jmp *%esi
+
+ /* Sign/zero extend as appropriate. */
+retsint8:
+ movsbl %al, %eax
+ jmp retint
+
+retsint16:
+ movswl %ax, %eax
+ jmp retint
+
+retuint8:
+ movzbl %al, %eax
+ jmp retint
+
+retuint16:
+ movzwl %ax, %eax
+ jmp retint
+
+retfloat:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstps (%ecx)
+ jmp epilogue
+
+retdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpl (%ecx)
+ jmp epilogue
+
+retlongdouble:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ fstpt (%ecx)
+ jmp epilogue
+
+retint64:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ movl %edx,4(%ecx)
+ jmp epilogue
+
+retint:
+ /* Load %ecx with the pointer to storage for the return value */
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+
+retstruct:
+ /* Nothing to do! */
+
+noretval:
+epilogue:
+ popl %esi
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.LFE1:
+.ffi_call_SYSV_end:
+ .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+ .align 4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+ .type ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+ pushl %ebp
+.LCFI2:
+ movl %esp, %ebp
+.LCFI3:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp) /* resp */
+ leal 8(%ebp), %edx
+ movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
+ leal -12(%ebp), %edx
+ movl %edx, (%esp) /* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+ call ffi_closure_SYSV_inner
+#else
+ movl %ebx, 8(%esp)
+.LCFI7:
+ call 1f
+1: popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+ call ffi_closure_SYSV_inner@PLT
+ movl 8(%esp), %ebx
+#endif
+ movl -12(%ebp), %ecx
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lcls_retint
+
+0: cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lcls_retllong
+ cmpl $FFI_TYPE_STRUCT, %eax
+ je .Lcls_retstruct
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+.Lcls_retstruct:
+ movl %ebp, %esp
+ popl %ebp
+ ret $4
+.LFE2:
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+/* Precalculate for e.g. the Solaris 10/x86 assembler. */
+#if FFI_TRAMPOLINE_SIZE == 10
+#define RAW_CLOSURE_CIF_OFFSET 12
+#define RAW_CLOSURE_FUN_OFFSET 16
+#define RAW_CLOSURE_USER_DATA_OFFSET 20
+#elif FFI_TRAMPOLINE_SIZE == 24
+#define RAW_CLOSURE_CIF_OFFSET 24
+#define RAW_CLOSURE_FUN_OFFSET 28
+#define RAW_CLOSURE_USER_DATA_OFFSET 32
+#else
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#endif
+#define CIF_FLAGS_OFFSET 20
+
+ .align 4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+ .type ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ pushl %esi
+.LCFI6:
+ subl $36, %esp
+ movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
+ movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+ movl %edx, 12(%esp) /* user_data */
+ leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
+ movl %edx, 8(%esp) /* raw_args */
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp) /* &res */
+ movl %esi, (%esp) /* cif */
+ call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
+ movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
+ cmpl $FFI_TYPE_INT, %eax
+ je .Lrcls_retint
+
+ /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+ FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
+ cmpl $FFI_TYPE_UINT64, %eax
+ jge 0f
+ cmpl $FFI_TYPE_UINT8, %eax
+ jge .Lrcls_retint
+0:
+ cmpl $FFI_TYPE_FLOAT, %eax
+ je .Lrcls_retfloat
+ cmpl $FFI_TYPE_DOUBLE, %eax
+ je .Lrcls_retdouble
+ cmpl $FFI_TYPE_LONGDOUBLE, %eax
+ je .Lrcls_retldouble
+ cmpl $FFI_TYPE_SINT64, %eax
+ je .Lrcls_retllong
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+.LFE3:
+ .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+#if defined __PIC__
+# if defined __sun__ && defined __svr4__
+/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22
+ doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this. */
+# define FDE_ENCODING 0x30 /* datarel */
+# define FDE_ENCODE(X) X@GOTOFF
+# else
+# define FDE_ENCODING 0x1b /* pcrel sdata4 */
+# if defined HAVE_AS_X86_PCREL
+# define FDE_ENCODE(X) X-.
+# else
+# define FDE_ENCODE(X) X@rel
+# endif
+# endif
+#else
+# define FDE_ENCODING 0 /* absolute */
+# define FDE_ENCODE(X) X
+#endif
+
+ .section .eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+ .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */
+.LSCIE1:
+ .long 0x0 /* CIE Identifier Tag */
+ .byte 0x1 /* CIE Version */
+#ifdef HAVE_AS_ASCII_PSEUDO_OP
+#ifdef __PIC__
+ .ascii "zR\0" /* CIE Augmentation */
+#else
+ .ascii "\0" /* CIE Augmentation */
+#endif
+#elif defined HAVE_AS_STRING_PSEUDO_OP
+#ifdef __PIC__
+ .string "zR" /* CIE Augmentation */
+#else
+ .string "" /* CIE Augmentation */
+#endif
+#else
+#error missing .ascii/.string
+#endif
+ .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
+ .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+#ifdef __PIC__
+ .byte 0x1 /* .uleb128 0x1; Augmentation size */
+ .byte FDE_ENCODING
+#endif
+ .byte 0xc /* DW_CFA_def_cfa */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .byte 0x1 /* .uleb128 0x1 */
+ .align 4
+.LECIE1:
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1 /* FDE Length */
+.LASFDE1:
+ .long .LASFDE1-.Lframe1 /* FDE CIE offset */
+ .long FDE_ENCODE(.LFB1) /* FDE initial location */
+ .long .LFE1-.LFB1 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI0-.LFB1
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI1-.LCFI0
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+ .align 4
+.LEFDE1:
+.LSFDE2:
+ .long .LEFDE2-.LASFDE2 /* FDE Length */
+.LASFDE2:
+ .long .LASFDE2-.Lframe1 /* FDE CIE offset */
+ .long FDE_ENCODE(.LFB2) /* FDE initial location */
+ .long .LFE2-.LFB2 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI2-.LFB2
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI3-.LCFI2
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI7-.LCFI3
+ .byte 0x83 /* DW_CFA_offset, column 0x3 */
+ .byte 0xa /* .uleb128 0xa */
+#endif
+ .align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3 /* FDE Length */
+.LASFDE3:
+ .long .LASFDE3-.Lframe1 /* FDE CIE offset */
+ .long FDE_ENCODE(.LFB3) /* FDE initial location */
+ .long .LFE3-.LFB3 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI4-.LFB3
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 */
+ .byte 0x2 /* .uleb128 0x2 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI5-.LCFI4
+ .byte 0xd /* DW_CFA_def_cfa_register */
+ .byte 0x5 /* .uleb128 0x5 */
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI6-.LCFI5
+ .byte 0x86 /* DW_CFA_offset, column 0x6 */
+ .byte 0x3 /* .uleb128 0x3 */
+ .align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
new file mode 100644
index 000000000..7a6619a54
--- /dev/null
+++ b/libffi/src/x86/unix64.S
@@ -0,0 +1,426 @@
+/* -----------------------------------------------------------------------
+ unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
+ Copyright (c) 2008 Red Hat, Inc
+
+ x86-64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void));
+
+ Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .align 2
+ .globl ffi_call_unix64
+ .type ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+ movq (%rsp), %r10 /* Load return address. */
+ leaq (%rdi, %rsi), %rax /* Find local stack base. */
+ movq %rdx, (%rax) /* Save flags. */
+ movq %rcx, 8(%rax) /* Save raddr. */
+ movq %rbp, 16(%rax) /* Save old frame pointer. */
+ movq %r10, 24(%rax) /* Relocate return address. */
+ movq %rax, %rbp /* Finalize local stack frame. */
+.LUW1:
+ movq %rdi, %r10 /* Save a copy of the register area. */
+ movq %r8, %r11 /* Save a copy of the target fn. */
+ movl %r9d, %eax /* Set number of SSE registers. */
+
+ /* Load up all argument registers. */
+ movq (%r10), %rdi
+ movq 8(%r10), %rsi
+ movq 16(%r10), %rdx
+ movq 24(%r10), %rcx
+ movq 32(%r10), %r8
+ movq 40(%r10), %r9
+ testl %eax, %eax
+ jnz .Lload_sse
+.Lret_from_load_sse:
+
+ /* Deallocate the reg arg area. */
+ leaq 176(%r10), %rsp
+
+ /* Call the user function. */
+ call *%r11
+
+ /* Deallocate stack arg area; local stack frame in redzone. */
+ leaq 24(%rbp), %rsp
+
+ movq 0(%rbp), %rcx /* Reload flags. */
+ movq 8(%rbp), %rdi /* Reload raddr. */
+ movq 16(%rbp), %rbp /* Reload old frame pointer. */
+.LUW2:
+
+ /* The first byte of the flags contains the FFI_TYPE. */
+ movzbl %cl, %r10d
+ leaq .Lstore_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+.Lstore_table:
+ .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
+ .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
+ .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
+ .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
+ .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
+ .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
+ .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
+ .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
+ .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
+ .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
+ .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
+ .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
+ .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
+
+ .align 2
+.Lst_void:
+ ret
+ .align 2
+
+.Lst_uint8:
+ movzbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_sint8:
+ movsbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_uint16:
+ movzwq %ax, %rax
+ movq %rax, (%rdi)
+ .align 2
+.Lst_sint16:
+ movswq %ax, %rax
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_uint32:
+ movl %eax, %eax
+ movq %rax, (%rdi)
+ .align 2
+.Lst_sint32:
+ cltq
+ movq %rax, (%rdi)
+ ret
+ .align 2
+.Lst_int64:
+ movq %rax, (%rdi)
+ ret
+
+ .align 2
+.Lst_float:
+ movss %xmm0, (%rdi)
+ ret
+ .align 2
+.Lst_double:
+ movsd %xmm0, (%rdi)
+ ret
+.Lst_ldouble:
+ fstpt (%rdi)
+ ret
+
+ .align 2
+.Lst_struct:
+ leaq -20(%rsp), %rsi /* Scratch area in redzone. */
+
+ /* We have to locate the values now, and since we don't want to
+ write too much data into the user's return value, we spill the
+ value to a 16 byte scratch area first. Bits 8, 9, and 10
+ control where the values are located. Only one of the three
+ bits will be set; see ffi_prep_cif_machdep for the pattern. */
+ movd %xmm0, %r10
+ movd %xmm1, %r11
+ testl $0x100, %ecx
+ cmovnz %rax, %rdx
+ cmovnz %r10, %rax
+ testl $0x200, %ecx
+ cmovnz %r10, %rdx
+ testl $0x400, %ecx
+ cmovnz %r10, %rax
+ cmovnz %r11, %rdx
+ movq %rax, (%rsi)
+ movq %rdx, 8(%rsi)
+
+ /* Bits 12-31 contain the true size of the structure. Copy from
+ the scratch area to the true destination. */
+ shrl $12, %ecx
+ rep movsb
+ ret
+
+ /* Many times we can avoid loading any SSE registers at all.
+ It's not worth an indirect jump to load the exact set of
+ SSE registers needed; zero or all is a good compromise. */
+ .align 2
+.LUW3:
+.Lload_sse:
+ movdqa 48(%r10), %xmm0
+ movdqa 64(%r10), %xmm1
+ movdqa 80(%r10), %xmm2
+ movdqa 96(%r10), %xmm3
+ movdqa 112(%r10), %xmm4
+ movdqa 128(%r10), %xmm5
+ movdqa 144(%r10), %xmm6
+ movdqa 160(%r10), %xmm7
+ jmp .Lret_from_load_sse
+
+.LUW4:
+ .size ffi_call_unix64,.-ffi_call_unix64
+
+ .align 2
+ .globl ffi_closure_unix64
+ .type ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW5:
+ /* The carry flag is set by the trampoline iff SSE registers
+ are used. Don't clobber it before the branch instruction. */
+ leaq -200(%rsp), %rsp
+.LUW6:
+ movq %rdi, (%rsp)
+ movq %rsi, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %r8, 32(%rsp)
+ movq %r9, 40(%rsp)
+ jc .Lsave_sse
+.Lret_from_save_sse:
+
+ movq %r10, %rdi
+ leaq 176(%rsp), %rsi
+ movq %rsp, %rdx
+ leaq 208(%rsp), %rcx
+ call ffi_closure_unix64_inner@PLT
+
+ /* Deallocate stack frame early; return value is now in redzone. */
+ addq $200, %rsp
+.LUW7:
+
+ /* The first byte of the return value contains the FFI_TYPE. */
+ movzbl %al, %r10d
+ leaq .Lload_table(%rip), %r11
+ movslq (%r11, %r10, 4), %r10
+ addq %r11, %r10
+ jmp *%r10
+
+.Lload_table:
+ .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
+ .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
+ .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
+ .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
+ .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
+ .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
+ .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
+ .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
+ .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
+ .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
+ .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
+
+ .align 2
+.Lld_void:
+ ret
+
+ .align 2
+.Lld_int8:
+ movzbl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int16:
+ movzwl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int32:
+ movl -24(%rsp), %eax
+ ret
+ .align 2
+.Lld_int64:
+ movq -24(%rsp), %rax
+ ret
+
+ .align 2
+.Lld_float:
+ movss -24(%rsp), %xmm0
+ ret
+ .align 2
+.Lld_double:
+ movsd -24(%rsp), %xmm0
+ ret
+ .align 2
+.Lld_ldouble:
+ fldt -24(%rsp)
+ ret
+
+ .align 2
+.Lld_struct:
+ /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+ %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
+ both rdx and xmm1 with the second word. For the remaining,
+ bit 8 set means xmm0 gets the second word, and bit 9 means
+ that rax gets the second word. */
+ movq -24(%rsp), %rcx
+ movq -16(%rsp), %rdx
+ movq -16(%rsp), %xmm1
+ testl $0x100, %eax
+ cmovnz %rdx, %rcx
+ movd %rcx, %xmm0
+ testl $0x200, %eax
+ movq -24(%rsp), %rax
+ cmovnz %rdx, %rax
+ ret
+
+ /* See the comment above .Lload_sse; the same logic applies here. */
+ .align 2
+.LUW8:
+.Lsave_sse:
+ movdqa %xmm0, 48(%rsp)
+ movdqa %xmm1, 64(%rsp)
+ movdqa %xmm2, 80(%rsp)
+ movdqa %xmm3, 96(%rsp)
+ movdqa %xmm4, 112(%rsp)
+ movdqa %xmm5, 128(%rsp)
+ movdqa %xmm6, 144(%rsp)
+ movdqa %xmm7, 160(%rsp)
+ jmp .Lret_from_save_sse
+
+.LUW9:
+ .size ffi_closure_unix64,.-ffi_closure_unix64
+
+#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
+ .section .eh_frame,"a",@unwind
+#else
+ .section .eh_frame,"a",@progbits
+#endif
+.Lframe1:
+ .long .LECIE1-.LSCIE1 /* CIE Length */
+.LSCIE1:
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .uleb128 1 /* CIE Code Alignment Factor */
+ .sleb128 -8 /* CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .uleb128 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .uleb128 7
+ .uleb128 8
+ .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */
+ .uleb128 1
+ .align 8
+.LECIE1:
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1 /* FDE Length */
+.LASFDE1:
+ .long .LASFDE1-.Lframe1 /* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+ .long .LUW0-. /* FDE initial location */
+#else
+ .long .LUW0@rel
+#endif
+ .long .LUW4-.LUW0 /* FDE address range */
+ .uleb128 0x0 /* Augmentation size */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW1-.LUW0
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+ .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
+ .uleb128 6
+ .uleb128 32
+ .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
+ .uleb128 2
+ .byte 0xa /* DW_CFA_remember_state */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW2-.LUW1
+ .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
+ .uleb128 7
+ .uleb128 8
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW3-.LUW2
+ .byte 0xb /* DW_CFA_restore_state */
+
+ .align 8
+.LEFDE1:
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3 /* FDE Length */
+.LASFDE3:
+ .long .LASFDE3-.Lframe1 /* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+ .long .LUW5-. /* FDE initial location */
+#else
+ .long .LUW5@rel
+#endif
+ .long .LUW9-.LUW5 /* FDE address range */
+ .uleb128 0x0 /* Augmentation size */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW6-.LUW5
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 208
+ .byte 0xa /* DW_CFA_remember_state */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW7-.LUW6
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .uleb128 8
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LUW8-.LUW7
+ .byte 0xb /* DW_CFA_restore_state */
+
+ .align 8
+.LEFDE3:
+
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S
new file mode 100644
index 000000000..34ec0fd82
--- /dev/null
+++ b/libffi/src/x86/win32.S
@@ -0,0 +1,1065 @@
+/* -----------------------------------------------------------------------
+ win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc.
+ Copyright (c) 2001 John Beniton
+ Copyright (c) 2002 Ranjit Mathew
+ Copyright (c) 2009 Daniel Witte
+
+
+ X86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ -----------------------------------------------------------------------
+ */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#ifdef _MSC_VER
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+ ffi_prep_args : NEAR PTR DWORD,
+ ecif : NEAR PTR DWORD,
+ cif_bytes : DWORD,
+ cif_flags : DWORD,
+ rvalue : NEAR PTR DWORD,
+ fn : NEAR PTR DWORD
+
+ ;; Make room for all of the new args.
+ mov ecx, cif_bytes
+ sub esp, ecx
+
+ mov eax, esp
+
+ ;; Place all of the ffi_prep_args in position
+ push ecif
+ push eax
+ call ffi_prep_args
+
+ ;; Return stack to previous state and call the function
+ add esp, 8
+
+ call fn
+
+ ;; cdecl: we restore esp in the epilogue, so there's no need to
+ ;; remove the space we pushed for the args.
+ ;; stdcall: the callee has already cleaned the stack.
+
+ ;; Load ecx with the return type code
+ mov ecx, cif_flags
+
+ ;; If the return value pointer is NULL, assume no return value.
+ cmp rvalue, 0
+ jne ca_jumptable
+
+ ;; Even if there is no space for the return value, we are
+ ;; obliged to handle floating-point values.
+ cmp ecx, FFI_TYPE_FLOAT
+ jne ca_epilogue
+ fstp st(0)
+
+ jmp ca_epilogue
+
+ca_jumptable:
+ jmp [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+ ;; Do not insert anything here between label and jump table.
+ dd offset ca_epilogue ;; FFI_TYPE_VOID
+ dd offset ca_retint ;; FFI_TYPE_INT
+ dd offset ca_retfloat ;; FFI_TYPE_FLOAT
+ dd offset ca_retdouble ;; FFI_TYPE_DOUBLE
+ dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE
+ dd offset ca_retint8 ;; FFI_TYPE_UINT8
+ dd offset ca_retint8 ;; FFI_TYPE_SINT8
+ dd offset ca_retint16 ;; FFI_TYPE_UINT16
+ dd offset ca_retint16 ;; FFI_TYPE_SINT16
+ dd offset ca_retint ;; FFI_TYPE_UINT32
+ dd offset ca_retint ;; FFI_TYPE_SINT32
+ dd offset ca_retint64 ;; FFI_TYPE_UINT64
+ dd offset ca_retint64 ;; FFI_TYPE_SINT64
+ dd offset ca_epilogue ;; FFI_TYPE_STRUCT
+ dd offset ca_retint ;; FFI_TYPE_POINTER
+ dd offset ca_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset ca_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+
+ca_retint8:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], al
+ jmp ca_epilogue
+
+ca_retint16:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], ax
+ jmp ca_epilogue
+
+ca_retint:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], eax
+ jmp ca_epilogue
+
+ca_retint64:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], eax
+ mov [ecx + 4], edx
+ jmp ca_epilogue
+
+ca_retfloat:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ fstp DWORD PTR [ecx]
+ jmp ca_epilogue
+
+ca_retdouble:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ fstp QWORD PTR [ecx]
+ jmp ca_epilogue
+
+ca_retlongdouble:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ fstp TBYTE PTR [ecx]
+ jmp ca_epilogue
+
+ca_epilogue:
+ ;; Epilogue code is autogenerated.
+ ret
+ffi_call_win32 ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+ ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+ sub esp, 40
+ lea edx, [ebp - 24]
+ mov [ebp - 12], edx ;; resp
+ lea edx, [ebp + 8]
+ mov [esp + 8], edx ;; args
+ lea edx, [ebp - 12]
+ mov [esp + 4], edx ;; &resp
+ mov [esp], eax ;; closure
+ call ffi_closure_SYSV_inner
+ mov ecx, [ebp - 12]
+
+cs_jumptable:
+ jmp [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+ ;; Do not insert anything here between the label and jump table.
+ dd offset cs_epilogue ;; FFI_TYPE_VOID
+ dd offset cs_retint ;; FFI_TYPE_INT
+ dd offset cs_retfloat ;; FFI_TYPE_FLOAT
+ dd offset cs_retdouble ;; FFI_TYPE_DOUBLE
+ dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE
+ dd offset cs_retint8 ;; FFI_TYPE_UINT8
+ dd offset cs_retint8 ;; FFI_TYPE_SINT8
+ dd offset cs_retint16 ;; FFI_TYPE_UINT16
+ dd offset cs_retint16 ;; FFI_TYPE_SINT16
+ dd offset cs_retint ;; FFI_TYPE_UINT32
+ dd offset cs_retint ;; FFI_TYPE_SINT32
+ dd offset cs_retint64 ;; FFI_TYPE_UINT64
+ dd offset cs_retint64 ;; FFI_TYPE_SINT64
+ dd offset cs_retstruct ;; FFI_TYPE_STRUCT
+ dd offset cs_retint ;; FFI_TYPE_POINTER
+ dd offset cs_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cs_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cs_retint8:
+ mov al, [ecx]
+ jmp cs_epilogue
+
+cs_retint16:
+ mov ax, [ecx]
+ jmp cs_epilogue
+
+cs_retint:
+ mov eax, [ecx]
+ jmp cs_epilogue
+
+cs_retint64:
+ mov eax, [ecx + 0]
+ mov edx, [ecx + 4]
+ jmp cs_epilogue
+
+cs_retfloat:
+ fld DWORD PTR [ecx]
+ jmp cs_epilogue
+
+cs_retdouble:
+ fld QWORD PTR [ecx]
+ jmp cs_epilogue
+
+cs_retlongdouble:
+ fld TBYTE PTR [ecx]
+ jmp cs_epilogue
+
+cs_retstruct:
+ ;; Caller expects us to pop struct return value pointer hidden arg.
+ ;; Epilogue code is autogenerated.
+ ret 4
+
+cs_epilogue:
+ ;; Epilogue code is autogenerated.
+ ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ffi_closure_raw_SYSV PROC NEAR USES esi
+ ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+ sub esp, 40
+ mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif
+ mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data
+ mov [esp + 12], edx ;; user_data
+ lea edx, [ebp + 8]
+ mov [esp + 8], edx ;; raw_args
+ lea edx, [ebp - 24]
+ mov [esp + 4], edx ;; &res
+ mov [esp], esi ;; cif
+ call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET] ;; closure->fun
+ mov eax, [esi + CIF_FLAGS_OFFSET] ;; cif->flags
+ lea ecx, [ebp - 24]
+
+cr_jumptable:
+ jmp [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+ ;; Do not insert anything here between the label and jump table.
+ dd offset cr_epilogue ;; FFI_TYPE_VOID
+ dd offset cr_retint ;; FFI_TYPE_INT
+ dd offset cr_retfloat ;; FFI_TYPE_FLOAT
+ dd offset cr_retdouble ;; FFI_TYPE_DOUBLE
+ dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE
+ dd offset cr_retint8 ;; FFI_TYPE_UINT8
+ dd offset cr_retint8 ;; FFI_TYPE_SINT8
+ dd offset cr_retint16 ;; FFI_TYPE_UINT16
+ dd offset cr_retint16 ;; FFI_TYPE_SINT16
+ dd offset cr_retint ;; FFI_TYPE_UINT32
+ dd offset cr_retint ;; FFI_TYPE_SINT32
+ dd offset cr_retint64 ;; FFI_TYPE_UINT64
+ dd offset cr_retint64 ;; FFI_TYPE_SINT64
+ dd offset cr_epilogue ;; FFI_TYPE_STRUCT
+ dd offset cr_retint ;; FFI_TYPE_POINTER
+ dd offset cr_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cr_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cr_retint8:
+ mov al, [ecx]
+ jmp cr_epilogue
+
+cr_retint16:
+ mov ax, [ecx]
+ jmp cr_epilogue
+
+cr_retint:
+ mov eax, [ecx]
+ jmp cr_epilogue
+
+cr_retint64:
+ mov eax, [ecx + 0]
+ mov edx, [ecx + 4]
+ jmp cr_epilogue
+
+cr_retfloat:
+ fld DWORD PTR [ecx]
+ jmp cr_epilogue
+
+cr_retdouble:
+ fld QWORD PTR [ecx]
+ jmp cr_epilogue
+
+cr_retlongdouble:
+ fld TBYTE PTR [ecx]
+ jmp cr_epilogue
+
+cr_epilogue:
+ ;; Epilogue code is autogenerated.
+ ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+ ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+ sub esp, 40
+ lea edx, [ebp - 24]
+ mov [ebp - 12], edx ;; resp
+ lea edx, [ebp + 12] ;; account for stub return address on stack
+ mov [esp + 8], edx ;; args
+ lea edx, [ebp - 12]
+ mov [esp + 4], edx ;; &resp
+ mov [esp], eax ;; closure
+ call ffi_closure_SYSV_inner
+ mov ecx, [ebp - 12]
+
+cd_jumptable:
+ jmp [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+ ;; Do not insert anything here between the label and jump table.
+ dd offset cd_epilogue ;; FFI_TYPE_VOID
+ dd offset cd_retint ;; FFI_TYPE_INT
+ dd offset cd_retfloat ;; FFI_TYPE_FLOAT
+ dd offset cd_retdouble ;; FFI_TYPE_DOUBLE
+ dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE
+ dd offset cd_retint8 ;; FFI_TYPE_UINT8
+ dd offset cd_retint8 ;; FFI_TYPE_SINT8
+ dd offset cd_retint16 ;; FFI_TYPE_UINT16
+ dd offset cd_retint16 ;; FFI_TYPE_SINT16
+ dd offset cd_retint ;; FFI_TYPE_UINT32
+ dd offset cd_retint ;; FFI_TYPE_SINT32
+ dd offset cd_retint64 ;; FFI_TYPE_UINT64
+ dd offset cd_retint64 ;; FFI_TYPE_SINT64
+ dd offset cd_epilogue ;; FFI_TYPE_STRUCT
+ dd offset cd_retint ;; FFI_TYPE_POINTER
+ dd offset cd_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cd_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retint8:
+ mov al, [ecx]
+ jmp cd_epilogue
+
+cd_retint16:
+ mov ax, [ecx]
+ jmp cd_epilogue
+
+cd_retint:
+ mov eax, [ecx]
+ jmp cd_epilogue
+
+cd_retint64:
+ mov eax, [ecx + 0]
+ mov edx, [ecx + 4]
+ jmp cd_epilogue
+
+cd_retfloat:
+ fld DWORD PTR [ecx]
+ jmp cd_epilogue
+
+cd_retdouble:
+ fld QWORD PTR [ecx]
+ jmp cd_epilogue
+
+cd_retlongdouble:
+ fld TBYTE PTR [ecx]
+ jmp cd_epilogue
+
+cd_epilogue:
+ ;; Epilogue code is autogenerated.
+ ret
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
+
+#else
+
+ .text
+
+ # This assumes we are using gas.
+ .balign 16
+ .globl _ffi_call_win32
+#ifndef __OS2__
+ .def _ffi_call_win32; .scl 2; .type 32; .endef
+#endif
+_ffi_call_win32:
+.LFB1:
+ pushl %ebp
+.LCFI0:
+ movl %esp,%ebp
+.LCFI1:
+ # Make room for all of the new args.
+ movl 16(%ebp),%ecx
+ subl %ecx,%esp
+
+ movl %esp,%eax
+
+ # Place all of the ffi_prep_args in position
+ pushl 12(%ebp)
+ pushl %eax
+ call *8(%ebp)
+
+ # Return stack to previous state and call the function
+ addl $8,%esp
+
+ # FIXME: Align the stack to a 128-bit boundary to avoid
+ # potential performance hits.
+
+ call *28(%ebp)
+
+ # stdcall functions pop arguments off the stack themselves
+
+ # Load %ecx with the return type code
+ movl 20(%ebp),%ecx
+
+ # If the return value pointer is NULL, assume no return value.
+ cmpl $0,24(%ebp)
+ jne 0f
+
+ # Even if there is no space for the return value, we are
+ # obliged to handle floating-point values.
+ cmpl $FFI_TYPE_FLOAT,%ecx
+ jne .Lnoretval
+ fstp %st(0)
+
+ jmp .Lepilogue
+
+0:
+ call 1f
+ # Do not insert anything here between the call and the jump table.
+.Lstore_table:
+ .long .Lnoretval /* FFI_TYPE_VOID */
+ .long .Lretint /* FFI_TYPE_INT */
+ .long .Lretfloat /* FFI_TYPE_FLOAT */
+ .long .Lretdouble /* FFI_TYPE_DOUBLE */
+ .long .Lretlongdouble /* FFI_TYPE_LONGDOUBLE */
+ .long .Lretuint8 /* FFI_TYPE_UINT8 */
+ .long .Lretsint8 /* FFI_TYPE_SINT8 */
+ .long .Lretuint16 /* FFI_TYPE_UINT16 */
+ .long .Lretsint16 /* FFI_TYPE_SINT16 */
+ .long .Lretint /* FFI_TYPE_UINT32 */
+ .long .Lretint /* FFI_TYPE_SINT32 */
+ .long .Lretint64 /* FFI_TYPE_UINT64 */
+ .long .Lretint64 /* FFI_TYPE_SINT64 */
+ .long .Lretstruct /* FFI_TYPE_STRUCT */
+ .long .Lretint /* FFI_TYPE_POINTER */
+ .long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */
+ .long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+ add %ecx, %ecx
+ add %ecx, %ecx
+ add (%esp),%ecx
+ add $4, %esp
+ jmp *(%ecx)
+
+ /* Sign/zero extend as appropriate. */
+.Lretsint8:
+ movsbl %al, %eax
+ jmp .Lretint
+
+.Lretsint16:
+ movswl %ax, %eax
+ jmp .Lretint
+
+.Lretuint8:
+ movzbl %al, %eax
+ jmp .Lretint
+
+.Lretuint16:
+ movzwl %ax, %eax
+ jmp .Lretint
+
+.Lretint:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ jmp .Lepilogue
+
+.Lretfloat:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ fstps (%ecx)
+ jmp .Lepilogue
+
+.Lretdouble:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ fstpl (%ecx)
+ jmp .Lepilogue
+
+.Lretlongdouble:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ fstpt (%ecx)
+ jmp .Lepilogue
+
+.Lretint64:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ movl %edx,4(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct1b:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ movb %al,0(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct2b:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ movw %ax,0(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct4b:
+ # Load %ecx with the pointer to storage for the return value
+ movl 24(%ebp),%ecx
+ movl %eax,0(%ecx)
+ jmp .Lepilogue
+
+.Lretstruct:
+ # Nothing to do!
+
+.Lnoretval:
+.Lepilogue:
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.ffi_call_win32_end:
+.LFE1:
+
+ # This assumes we are using gas.
+ .balign 16
+ .globl _ffi_closure_SYSV
+#ifndef __OS2__
+ .def _ffi_closure_SYSV; .scl 2; .type 32; .endef
+#endif
+_ffi_closure_SYSV:
+.LFB3:
+ pushl %ebp
+.LCFI4:
+ movl %esp, %ebp
+.LCFI5:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp) /* resp */
+ leal 8(%ebp), %edx
+ movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
+ leal -12(%ebp), %edx
+ movl %edx, (%esp) /* &resp */
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+
+0:
+ call 1f
+ # Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+ .long .Lcls_noretval /* FFI_TYPE_VOID */
+ .long .Lcls_retint /* FFI_TYPE_INT */
+ .long .Lcls_retfloat /* FFI_TYPE_FLOAT */
+ .long .Lcls_retdouble /* FFI_TYPE_DOUBLE */
+ .long .Lcls_retldouble /* FFI_TYPE_LONGDOUBLE */
+ .long .Lcls_retuint8 /* FFI_TYPE_UINT8 */
+ .long .Lcls_retsint8 /* FFI_TYPE_SINT8 */
+ .long .Lcls_retuint16 /* FFI_TYPE_UINT16 */
+ .long .Lcls_retsint16 /* FFI_TYPE_SINT16 */
+ .long .Lcls_retint /* FFI_TYPE_UINT32 */
+ .long .Lcls_retint /* FFI_TYPE_SINT32 */
+ .long .Lcls_retllong /* FFI_TYPE_UINT64 */
+ .long .Lcls_retllong /* FFI_TYPE_SINT64 */
+ .long .Lcls_retstruct /* FFI_TYPE_STRUCT */
+ .long .Lcls_retint /* FFI_TYPE_POINTER */
+ .long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
+ .long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
+
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+ /* Sign/zero extend as appropriate. */
+.Lcls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retint:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retfloat:
+ flds (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retdouble:
+ fldl (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retldouble:
+ fldt (%ecx)
+ jmp .Lcls_epilogue
+
+.Lcls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lcls_epilogue
+
+.Lcls_retstruct:
+ # Caller expects us to pop struct return value pointer hidden arg.
+ movl %ebp, %esp
+ popl %ebp
+ ret $0x4
+
+.Lcls_noretval:
+.Lcls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ # This assumes we are using gas.
+ .balign 16
+ .globl _ffi_closure_raw_SYSV
+#ifndef __OS2__
+ .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef
+#endif
+_ffi_closure_raw_SYSV:
+.LFB4:
+ pushl %ebp
+.LCFI6:
+ movl %esp, %ebp
+.LCFI7:
+ pushl %esi
+.LCFI8:
+ subl $36, %esp
+ movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
+ movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+ movl %edx, 12(%esp) /* user_data */
+ leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
+ movl %edx, 8(%esp) /* raw_args */
+ leal -24(%ebp), %edx
+ movl %edx, 4(%esp) /* &res */
+ movl %esi, (%esp) /* cif */
+ call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
+ movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
+0:
+ call 1f
+ # Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+ .long .Lrcls_noretval /* FFI_TYPE_VOID */
+ .long .Lrcls_retint /* FFI_TYPE_INT */
+ .long .Lrcls_retfloat /* FFI_TYPE_FLOAT */
+ .long .Lrcls_retdouble /* FFI_TYPE_DOUBLE */
+ .long .Lrcls_retldouble /* FFI_TYPE_LONGDOUBLE */
+ .long .Lrcls_retuint8 /* FFI_TYPE_UINT8 */
+ .long .Lrcls_retsint8 /* FFI_TYPE_SINT8 */
+ .long .Lrcls_retuint16 /* FFI_TYPE_UINT16 */
+ .long .Lrcls_retsint16 /* FFI_TYPE_SINT16 */
+ .long .Lrcls_retint /* FFI_TYPE_UINT32 */
+ .long .Lrcls_retint /* FFI_TYPE_SINT32 */
+ .long .Lrcls_retllong /* FFI_TYPE_UINT64 */
+ .long .Lrcls_retllong /* FFI_TYPE_SINT64 */
+ .long .Lrcls_retstruct /* FFI_TYPE_STRUCT */
+ .long .Lrcls_retint /* FFI_TYPE_POINTER */
+ .long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
+ .long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+ /* Sign/zero extend as appropriate. */
+.Lrcls_retsint8:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retsint16:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint8:
+ movzbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retuint16:
+ movzwl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retint:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retfloat:
+ flds -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retdouble:
+ fldl -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retldouble:
+ fldt -24(%ebp)
+ jmp .Lrcls_epilogue
+
+.Lrcls_retllong:
+ movl -24(%ebp), %eax
+ movl -20(%ebp), %edx
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct1:
+ movsbl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct2:
+ movswl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct4:
+ movl -24(%ebp), %eax
+ jmp .Lrcls_epilogue
+
+.Lrcls_retstruct:
+ # Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+ addl $36, %esp
+ popl %esi
+ popl %ebp
+ ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+ # This assumes we are using gas.
+ .balign 16
+ .globl _ffi_closure_STDCALL
+#ifndef __OS2__
+ .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef
+#endif
+_ffi_closure_STDCALL:
+.LFB5:
+ pushl %ebp
+.LCFI9:
+ movl %esp, %ebp
+.LCFI10:
+ subl $40, %esp
+ leal -24(%ebp), %edx
+ movl %edx, -12(%ebp) /* resp */
+ leal 12(%ebp), %edx /* account for stub return address on stack */
+ movl %edx, 4(%esp) /* args */
+ leal -12(%ebp), %edx
+ movl %edx, (%esp) /* &resp */
+ call _ffi_closure_SYSV_inner
+ movl -12(%ebp), %ecx
+0:
+ call 1f
+ # Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+ .long .Lscls_noretval /* FFI_TYPE_VOID */
+ .long .Lscls_retint /* FFI_TYPE_INT */
+ .long .Lscls_retfloat /* FFI_TYPE_FLOAT */
+ .long .Lscls_retdouble /* FFI_TYPE_DOUBLE */
+ .long .Lscls_retldouble /* FFI_TYPE_LONGDOUBLE */
+ .long .Lscls_retuint8 /* FFI_TYPE_UINT8 */
+ .long .Lscls_retsint8 /* FFI_TYPE_SINT8 */
+ .long .Lscls_retuint16 /* FFI_TYPE_UINT16 */
+ .long .Lscls_retsint16 /* FFI_TYPE_SINT16 */
+ .long .Lscls_retint /* FFI_TYPE_UINT32 */
+ .long .Lscls_retint /* FFI_TYPE_SINT32 */
+ .long .Lscls_retllong /* FFI_TYPE_UINT64 */
+ .long .Lscls_retllong /* FFI_TYPE_SINT64 */
+ .long .Lscls_retstruct /* FFI_TYPE_STRUCT */
+ .long .Lscls_retint /* FFI_TYPE_POINTER */
+ .long .Lscls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
+ .long .Lscls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
+ .long .Lscls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+ add %eax, %eax
+ add %eax, %eax
+ add (%esp),%eax
+ add $4, %esp
+ jmp *(%eax)
+
+ /* Sign/zero extend as appropriate. */
+.Lscls_retsint8:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retsint16:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint8:
+ movzbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retuint16:
+ movzwl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retint:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retfloat:
+ flds (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retdouble:
+ fldl (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retldouble:
+ fldt (%ecx)
+ jmp .Lscls_epilogue
+
+.Lscls_retllong:
+ movl (%ecx), %eax
+ movl 4(%ecx), %edx
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct1:
+ movsbl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct2:
+ movswl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct4:
+ movl (%ecx), %eax
+ jmp .Lscls_epilogue
+
+.Lscls_retstruct:
+ # Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+ movl %ebp, %esp
+ popl %ebp
+ ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#ifndef __OS2__
+ .section .eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+ .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */
+.LASCIE1:
+ .long 0x0 /* CIE Identifier Tag */
+ .byte 0x1 /* CIE Version */
+#ifdef __PIC__
+ .ascii "zR\0" /* CIE Augmentation */
+#else
+ .ascii "\0" /* CIE Augmentation */
+#endif
+ .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */
+ .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+#ifdef __PIC__
+ .byte 0x1 /* .uleb128 0x1; Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+#endif
+ .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x4 /* .uleb128 0x4 */
+ .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+ .byte 0x1 /* .uleb128 0x1 */
+ .align 4
+.LECIE1:
+
+.LSFDE1:
+ .long .LEFDE1-.LASFDE1 /* FDE Length */
+.LASFDE1:
+ .long .LASFDE1-.Lframe1 /* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+ .long .LFB1-. /* FDE initial location */
+#else
+ .long .LFB1
+#endif
+ .long .LFE1-.LFB1 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ /* DW_CFA_xxx CFI instructions go here. */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI0-.LFB1
+ .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+ .byte 0x2 /* .uleb128 0x2 */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI1-.LCFI0
+ .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+ .byte 0x5 /* .uleb128 0x5 */
+
+ /* End of DW_CFA_xxx CFI instructions. */
+ .align 4
+.LEFDE1:
+
+
+.LSFDE3:
+ .long .LEFDE3-.LASFDE3 /* FDE Length */
+.LASFDE3:
+ .long .LASFDE3-.Lframe1 /* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+ .long .LFB3-. /* FDE initial location */
+#else
+ .long .LFB3
+#endif
+ .long .LFE3-.LFB3 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ /* DW_CFA_xxx CFI instructions go here. */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI4-.LFB3
+ .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+ .byte 0x2 /* .uleb128 0x2 */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI5-.LCFI4
+ .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+ .byte 0x5 /* .uleb128 0x5 */
+
+ /* End of DW_CFA_xxx CFI instructions. */
+ .align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+ .long .LEFDE4-.LASFDE4 /* FDE Length */
+.LASFDE4:
+ .long .LASFDE4-.Lframe1 /* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+ .long .LFB4-. /* FDE initial location */
+#else
+ .long .LFB4
+#endif
+ .long .LFE4-.LFB4 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ /* DW_CFA_xxx CFI instructions go here. */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI6-.LFB4
+ .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+ .byte 0x2 /* .uleb128 0x2 */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI7-.LCFI6
+ .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+ .byte 0x5 /* .uleb128 0x5 */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI8-.LCFI7
+ .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+ .byte 0x3 /* .uleb128 0x3 */
+
+ /* End of DW_CFA_xxx CFI instructions. */
+ .align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+ .long .LEFDE5-.LASFDE5 /* FDE Length */
+.LASFDE5:
+ .long .LASFDE5-.Lframe1 /* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+ .long .LFB5-. /* FDE initial location */
+#else
+ .long .LFB5
+#endif
+ .long .LFE5-.LFB5 /* FDE address range */
+#ifdef __PIC__
+ .byte 0x0 /* .uleb128 0x0; Augmentation size */
+#endif
+ /* DW_CFA_xxx CFI instructions go here. */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI9-.LFB5
+ .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+ .byte 0x8 /* .uleb128 0x8 */
+ .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+ .byte 0x2 /* .uleb128 0x2 */
+
+ .byte 0x4 /* DW_CFA_advance_loc4 */
+ .long .LCFI10-.LCFI9
+ .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+ .byte 0x5 /* .uleb128 0x5 */
+
+ /* End of DW_CFA_xxx CFI instructions. */
+ .align 4
+.LEFDE5:
+
+#endif /* !_MSC_VER */
+
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
new file mode 100644
index 000000000..6e9181867
--- /dev/null
+++ b/libffi/src/x86/win64.S
@@ -0,0 +1,460 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+/* Constants for ffi_call_win64 */
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+ extended_cif *ecif, unsigned bytes, unsigned flags,
+ unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC ffi_call_win64
+
+EXTRN __chkstk:NEAR
+EXTRN ffi_closure_win64_inner:NEAR
+
+_TEXT SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;; rax points to 'closure'
+;;; r11 contains a bit mask that specifies which of the
+;;; first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;;
+ffi_closure_win64 PROC FRAME
+ ;; copy register arguments onto stack
+ test r11, 1
+ jne first_is_float
+ mov QWORD PTR [rsp+8], rcx
+ jmp second
+first_is_float:
+ movlpd QWORD PTR [rsp+8], xmm0
+
+second:
+ test r11, 2
+ jne second_is_float
+ mov QWORD PTR [rsp+16], rdx
+ jmp third
+second_is_float:
+ movlpd QWORD PTR [rsp+16], xmm1
+
+third:
+ test r11, 4
+ jne third_is_float
+ mov QWORD PTR [rsp+24], r8
+ jmp fourth
+third_is_float:
+ movlpd QWORD PTR [rsp+24], xmm2
+
+fourth:
+ test r11, 8
+ jne fourth_is_float
+ mov QWORD PTR [rsp+32], r9
+ jmp done
+fourth_is_float:
+ movlpd QWORD PTR [rsp+32], xmm3
+
+done:
+ .ALLOCSTACK 40
+ sub rsp, 40
+ .ENDPROLOG
+ mov rcx, rax ; context is first parameter
+ mov rdx, rsp ; stack is second parameter
+ add rdx, 48 ; point to start of arguments
+ mov rax, ffi_closure_win64_inner
+ call rax ; call the real closure function
+ add rsp, 40
+ movd xmm0, rax ; If the closure returned a float,
+ ; ffi_closure_win64_inner wrote it to rax
+ ret 0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+ ;; copy registers onto stack
+ mov QWORD PTR [rsp+32], r9
+ mov QWORD PTR [rsp+24], r8
+ mov QWORD PTR [rsp+16], rdx
+ mov QWORD PTR [rsp+8], rcx
+ .PUSHREG rbp
+ push rbp
+ .ALLOCSTACK 48
+ sub rsp, 48 ; 00000030H
+ .SETFRAME rbp, 32
+ lea rbp, QWORD PTR [rsp+32]
+ .ENDPROLOG
+
+ mov eax, DWORD PTR CIF_BYTES[rbp]
+ add rax, 15
+ and rax, -16
+ call __chkstk
+ sub rsp, rax
+ lea rax, QWORD PTR [rsp+32]
+ mov QWORD PTR STACK[rbp], rax
+
+ mov rdx, QWORD PTR ECIF[rbp]
+ mov rcx, QWORD PTR STACK[rbp]
+ call QWORD PTR PREP_ARGS_FN[rbp]
+
+ mov rsp, QWORD PTR STACK[rbp]
+
+ movlpd xmm3, QWORD PTR [rsp+24]
+ movd r9, xmm3
+
+ movlpd xmm2, QWORD PTR [rsp+16]
+ movd r8, xmm2
+
+ movlpd xmm1, QWORD PTR [rsp+8]
+ movd rdx, xmm1
+
+ movlpd xmm0, QWORD PTR [rsp]
+ movd rcx, xmm0
+
+ call QWORD PTR FN[rbp]
+ret_struct4b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ jne ret_struct2b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov DWORD PTR [rcx], eax
+ jmp ret_void$
+
+ret_struct2b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ jne ret_struct1b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov WORD PTR [rcx], ax
+ jmp ret_void$
+
+ret_struct1b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ jne ret_uint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov BYTE PTR [rcx], al
+ jmp ret_void$
+
+ret_uint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ jne ret_sint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_sint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ jne ret_uint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_uint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ jne ret_sint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ jne ret_uint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_uint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ jne ret_sint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov eax, eax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ jne ret_float$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ cdqe
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_float$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ jne SHORT ret_double$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movss DWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_double$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ jne SHORT ret_sint64$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movlpd QWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_sint64$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+ jne ret_void$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_void$:
+ xor rax, rax
+
+ lea rsp, QWORD PTR [rbp+16]
+ pop rbp
+ ret 0
+ffi_call_win64 ENDP
+_TEXT ENDS
+END
+#else
+.text
+
+.extern _ffi_closure_win64_inner
+
+# ffi_closure_win64 will be called with these registers set:
+# rax points to 'closure'
+# r11 contains a bit mask that specifies which of the
+# first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+#
+ .balign 16
+ .globl _ffi_closure_win64
+_ffi_closure_win64:
+ # copy register arguments onto stack
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+ sub $40, %rsp
+#.ENDPROLOG
+ mov %rax, %rcx # context is first parameter
+ mov %rsp, %rdx # stack is second parameter
+ add $48, %rdx # point to start of arguments
+ mov $_ffi_closure_win64_inner, %rax
+ callq *%rax # call the real closure function
+ add $40, %rsp
+ movq %rax, %xmm0 # If the closure returned a float,
+ # ffi_closure_win64_inner wrote it to rax
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+ .globl _ffi_call_win64
+_ffi_call_win64:
+ # copy registers onto stack
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+ #.PUSHREG rbp
+ push %rbp
+ #.ALLOCSTACK 48
+ sub $48,%rsp
+ #.SETFRAME rbp, 32
+ lea 32(%rsp),%rbp
+ #.ENDPROLOG
+
+ mov CIF_BYTES(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, STACK(%rbp)
+
+ mov ECIF(%rbp), %rdx
+ mov STACK(%rbp), %rcx
+ callq *PREP_ARGS_FN(%rbp)
+
+ mov STACK(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *FN(%rbp)
+.Lret_struct4b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ jne .Lret_struct2b
+
+ mov RVALUE(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+ jne .Lret_struct1b
+
+ mov RVALUE(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+ jne .Lret_uint8
+
+ mov RVALUE(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+ jne .Lret_sint8
+
+ mov RVALUE(%rbp), %rcx
+ movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+ jne .Lret_uint16
+
+ mov RVALUE(%rbp), %rcx
+ movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+ jne .Lret_sint16
+
+ mov RVALUE(%rbp), %rcx
+ movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+ jne .Lret_uint32
+
+ mov RVALUE(%rbp), %rcx
+ movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+ jne .Lret_sint32
+
+ mov RVALUE(%rbp), %rcx
+ movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+ cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ jne .Lret_float
+
+ mov RVALUE(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+ cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ jne .Lret_double
+
+ mov RVALUE(%rbp), %rax
+ movss %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_double:
+ cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ jne .Lret_sint64
+
+ mov RVALUE(%rbp), %rax
+ movlpd %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_sint64:
+ cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+ jne .Lret_void
+
+ mov RVALUE(%rbp), %rcx
+ mov %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+