obtained gcc-4.6.4.tar.bz2 from upstream website;upstream

verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
author: upstream source tree <ports@midipix.org> 2015-03-15 20:14:05 -0400
committer: upstream source tree <ports@midipix.org> 2015-03-15 20:14:05 -0400
commit: 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree: 976dc5ab7fddf506dadce60ae936f43f58787092 /libffi/src/x86
download: cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
10 files changed, 5149 insertions, 0 deletions
diff --git a/libffi/src/x86/darwin.S b/libffi/src/x86/darwin.S
new file mode 100644
index 000000000..8f0f0707a
--- /dev/null
+++ b/libffi/src/x86/darwin.S
@@ -0,0 +1,444 @@
+/* -----------------------------------------------------------------------
+   darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	Copyright (C) 2008  Free Software Foundation, Inc.
+
+   X86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl _ffi_prep_args
+
+	.align 4
+.globl _ffi_call_SYSV
+
+_ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        subl $8,%esp
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	subl  $8,%esp
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $16,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+	jmp   epilogue
+0:
+	.align 4
+	call 1f
+.Lstore_table:
+	.long   noretval-.Lstore_table		/* FFI_TYPE_VOID */
+	.long   retint-.Lstore_table		/* FFI_TYPE_INT */
+	.long   retfloat-.Lstore_table		/* FFI_TYPE_FLOAT */
+	.long   retdouble-.Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long   retlongdouble-.Lstore_table     /* FFI_TYPE_LONGDOUBLE */
+	.long   retuint8-.Lstore_table		/* FFI_TYPE_UINT8 */
+	.long   retsint8-.Lstore_table		/* FFI_TYPE_SINT8 */
+	.long   retuint16-.Lstore_table		/* FFI_TYPE_UINT16 */
+	.long   retsint16-.Lstore_table		/* FFI_TYPE_SINT16 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_UINT32 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_SINT32 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_UINT64 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_SINT64 */
+	.long   retstruct-.Lstore_table		/* FFI_TYPE_STRUCT */
+	.long   retint-.Lstore_table		/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpt (%ecx)
+	jmp   epilogue
+
+retint64:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+	popl %esi
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+.LFE1:
+.ffi_call_SYSV_end:
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl _ffi_closure_SYSV
+
+_ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	L_ffi_closure_SYSV_inner$stub
+	movl	8(%esp), %ebx
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	lea -8(%ebp),%esp
+	movl	%ebp, %esp
+	popl	%ebp
+	ret $4
+.LFE2:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl _ffi_closure_raw_SYSV
+
+_ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+#endif
+
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+	.indirect_symbol _ffi_closure_SYSV_inner
+	hlt ; hlt ; hlt ; hlt ; hlt
+
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0
+LSCIE1:
+	.long	0x0
+	.byte	0x1
+	.ascii "zR\0"
+	.byte	0x1
+	.byte	0x7c
+	.byte	0x8
+	.byte	0x1
+	.byte	0x10
+	.byte	0xc
+	.byte	0x5
+	.byte	0x4
+	.byte	0x88
+	.byte	0x1
+	.align 2
+LECIE1:
+.globl _ffi_call_SYSV.eh
+_ffi_call_SYSV.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1
+	.long	.LFB1-.
+	.set L$set$2,.LFE1-.LFB1
+	.long L$set$2
+	.byte	0x0
+	.byte	0x4
+	.set L$set$3,.LCFI0-.LFB1
+	.long L$set$3
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$4,.LCFI1-.LCFI0
+	.long L$set$4
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE1:
+.globl _ffi_closure_SYSV.eh
+_ffi_closure_SYSV.eh:
+LSFDE2:
+	.set	L$set$5,LEFDE2-LASFDE2
+	.long	L$set$5
+LASFDE2:
+	.long	LASFDE2-EH_frame1
+	.long	.LFB2-.
+	.set L$set$6,.LFE2-.LFB2
+	.long L$set$6
+	.byte	0x0
+	.byte	0x4
+	.set L$set$7,.LCFI2-.LFB2
+	.long L$set$7
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$8,.LCFI3-.LCFI2
+	.long L$set$8
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.globl _ffi_closure_raw_SYSV.eh
+_ffi_closure_raw_SYSV.eh:
+LSFDE3:
+	.set	L$set$10,LEFDE3-LASFDE3
+	.long	L$set$10
+LASFDE3:
+	.long	LASFDE3-EH_frame1
+	.long	.LFB3-.
+	.set L$set$11,.LFE3-.LFB3
+	.long L$set$11
+	.byte	0x0
+	.byte	0x4
+	.set L$set$12,.LCFI4-.LFB3
+	.long L$set$12
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$13,.LCFI5-.LCFI4
+	.long L$set$13
+	.byte	0xd
+	.byte	0x4
+	.byte	0x4
+	.set L$set$14,.LCFI6-.LCFI5
+	.long L$set$14
+	.byte	0x85
+	.byte	0x3
+	.align 2
+LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/darwin64.S b/libffi/src/x86/darwin64.S
new file mode 100644
index 000000000..2f7394ef4
--- /dev/null
+++ b/libffi/src/x86/darwin64.S
@@ -0,0 +1,416 @@
+/* -----------------------------------------------------------------------
+   darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+	        Copyright (c) 2008 Red Hat, Inc.
+   derived from unix64.S
+
+   x86-64 Foreign Function Interface for Darwin.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+		    void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
+	.globl	_ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	Lload_sse
+Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lstore_table:
+	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
+	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
+	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
+	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
+	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
+	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
+	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
+	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lst_void:
+	ret
+	.align	3
+Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align	3
+Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+	.align	3
+Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align	3
+LUW3:
+Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	Lret_from_load_sse
+
+LUW4:
+	.align	3
+	.globl	_ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      Lsave_sse
+Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	_ffi_closure_unix64_inner
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lload_table:
+	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
+	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
+	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
+	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
+	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lld_void:
+	ret
+	.align	3
+Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+	.align	3
+Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+	.align	3
+Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above Lload_sse; the same logic applies here.  */
+	.align	3
+LUW8:
+Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
+	.long	L$set$0
+LSCIE1:
+	.long	0x0		/* CIE Identifier Tag */
+	.byte	0x1		/* CIE Version */
+	.ascii	"zR\0"		/* CIE Augmentation */
+	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
+	.byte	0x10		/* CIE RA Column */
+	.byte	0x1		/* uleb128 0x1; Augmentation size */
+	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7		/* uleb128 0x7 */
+	.byte	0x8		/* uleb128 0x8 */
+	.byte	0x90		/* DW_CFA_offset, column 0x10 */
+	.byte	0x1
+	.align	3
+LECIE1:
+	.globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
+	.quad	LUW0-.			/* FDE initial location */
+	.set	L$set$2,LUW4-LUW0	/* FDE address range */
+	.quad	L$set$2
+	.byte	0x0			/* Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$3,LUW1-LUW0
+	.long	L$set$3
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.byte	0x6
+	.byte	0x20
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.byte	0x2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$4,LUW2-LUW1
+	.long	L$set$4
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7
+	.byte	0x8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$5,LUW3-LUW2
+	.long	L$set$5
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE1:
+	.globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
+	.long	L$set$6
+LASFDE3:
+	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
+	.quad	LUW5-.			/* FDE initial location */
+	.set	L$set$7,LUW9-LUW5	/* FDE address range */
+	.quad	L$set$7
+	.byte	0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$8,LUW6-LUW5
+	.long	L$set$8
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	208,1			/* uleb128 208 */
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$9,LUW7-LUW6
+	.long	L$set$9
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	0x8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$10,LUW8-LUW7
+	.long	L$set$10
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE3:
+	.subsections_via_symbols
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
new file mode 100644
index 000000000..fea9d6dea
--- /dev/null
+++ b/libffi/src/x86/ffi.c
@@ -0,0 +1,665 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if !defined(__x86_64__) || defined(_WIN64)
+
+#ifdef _WIN64
+#include <windows.h>
+#endif
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if (ecif->cif->flags == FFI_TYPE_STRUCT
+#ifdef X86_WIN64
+      && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
+          && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
+#endif
+      )
+    {
+      *(void **) argp = ecif->rvalue;
+      argp += sizeof(void*);
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp)
+        argp = (char *) ALIGN(argp, sizeof(void*));
+
+      z = (*p_arg)->size;
+#ifdef X86_WIN64
+      if (z > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && (z != 1 && z != 2 && z != 4 && z != 8))
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+          || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
+#endif
+          )
+        {
+          z = sizeof(ffi_arg);
+          *(void **)argp = *p_argv;
+        }
+      else if ((*p_arg)->type == FFI_TYPE_FLOAT)
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      else
+#endif
+      if (z < sizeof(ffi_arg))
+        {
+          z = sizeof(ffi_arg);
+          switch ((*p_arg)->type)
+            {
+            case FFI_TYPE_SINT8:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT8:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT16:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT16:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_SINT32:
+              *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_UINT32:
+              *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv);
+              break;
+
+            case FFI_TYPE_STRUCT:
+              *(ffi_arg *) argp = *(ffi_arg *)(* p_argv);
+              break;
+
+            default:
+              FFI_ASSERT(0);
+            }
+        }
+      else
+        {
+          memcpy(argp, *p_argv, z);
+        }
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned int i;
+  ffi_type **ptr;
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+#if defined(X86) || defined (X86_WIN32) || defined(X86_FREEBSD) || defined(X86_DARWIN) || defined(X86_WIN64)
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#endif
+#ifdef X86_WIN64
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+#endif
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+#ifndef X86_WIN64
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+#endif
+#endif
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+#ifdef X86_WIN64
+    case FFI_TYPE_POINTER:
+#endif
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+#ifdef X86_WIN64
+          cif->flags = FFI_TYPE_SMALL_STRUCT_4B;
+#else
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+#endif
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+#endif
+        {
+          cif->flags = FFI_TYPE_STRUCT;
+          /* allocate space for return value pointer */
+          cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
+        }
+      break;
+
+    default:
+#ifdef X86_WIN64
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+    case FFI_TYPE_INT:
+      cif->flags = FFI_TYPE_SINT32;
+#else
+      cif->flags = FFI_TYPE_INT;
+#endif
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      if (((*ptr)->alignment - 1) & cif->bytes)
+        cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment);
+      cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG);
+    }
+
+#ifdef X86_WIN64
+  /* ensure space for storing four registers */
+  cif->bytes += 4 * sizeof(ffi_arg);
+#endif
+
+#ifdef X86_DARWIN
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+  return FFI_OK;
+}
+
+#ifdef X86_WIN64
+extern int
+ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#elif defined(X86_WIN32)
+extern void
+ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *,
+               unsigned, unsigned, unsigned *, void (*fn)(void));
+#else
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+                          unsigned, unsigned, unsigned *, void (*fn)(void));
+#endif
+
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+#ifdef X86_WIN64
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT
+      && cif->rtype->size != 1 && cif->rtype->size != 2
+      && cif->rtype->size != 4 && cif->rtype->size != 8)
+    {
+      ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
+    }
+#else
+  if (rvalue == NULL
+      && cif->flags == FFI_TYPE_STRUCT)
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+#endif
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN64
+    case FFI_WIN64:
+      {
+        /* Make copies of all struct arguments
+           NOTE: not sure if responsibility should be here or in caller */
+        unsigned int i;
+        for (i=0; i < cif->nargs;i++) {
+          size_t size = cif->arg_types[i]->size;
+          if ((cif->arg_types[i]->type == FFI_TYPE_STRUCT
+               && (size != 1 && size != 2 && size != 4 && size != 8))
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+              || cif->arg_types[i]->type == FFI_TYPE_LONGDOUBLE
+#endif
+              )
+            {
+              void *local = alloca(size);
+              memcpy(local, avalue[i], size);
+              avalue[i] = local;
+            }
+        }
+        ffi_call_win64(ffi_prep_args, &ecif, cif->bytes,
+                       cif->flags, ecif.rvalue, fn);
+      }
+      break;
+#elif defined(X86_WIN32)
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      ffi_call_win32(ffi_prep_args, &ecif, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+                    fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+/* The following __attribute__((regparm(1))) decorations will have no effect
+   on MSVC - standard cdecl convention applies. */
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+                                         void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+#ifdef X86_WIN64
+void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
+#endif
+
+/* This function is jumped to by the trampoline */
+
+#ifdef X86_WIN64
+void * FFI_HIDDEN
+ffi_closure_win64_inner (ffi_closure *closure, void *args) {
+  ffi_cif       *cif;
+  void         **arg_area;
+  void          *result;
+  void          *resp = &result;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif);
+  
+  (closure->fun) (cif, resp, arg_area, closure->user_data);
+
+  /* The result is returned in rax.  This does the right thing for
+     result types except for floats; we have to 'mov xmm0, rax' in the
+     caller to correct this.
+     TODO: structure sizes of 3 5 6 7 are returned by reference, too!!!
+  */
+  return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp;
+}
+
+#else
+unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
+ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
+{
+  /* our various things...  */
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will change RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+#endif /* !X86_WIN64 */
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+                            ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+#ifdef X86_WIN64
+  if (cif->rtype->size > sizeof(ffi_arg)
+      || (cif->flags == FFI_TYPE_STRUCT
+          && (cif->rtype->size != 1 && cif->rtype->size != 2
+              && cif->rtype->size != 4 && cif->rtype->size != 8))) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#else
+  if ( cif->flags == FFI_TYPE_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += sizeof(void *);
+  }
+#endif
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(void*) - 1) & (size_t) argp) {
+        argp = (char *) ALIGN(argp, sizeof(void*));
+      }
+
+#ifdef X86_WIN64
+      if ((*p_arg)->size > sizeof(ffi_arg)
+          || ((*p_arg)->type == FFI_TYPE_STRUCT
+              && ((*p_arg)->size != 1 && (*p_arg)->size != 2
+                  && (*p_arg)->size != 4 && (*p_arg)->size != 8)))
+        {
+          z = sizeof(void *);
+          *p_argv = *(void **)argp;
+        }
+      else
+#endif
+        {
+          z = (*p_arg)->size;
+          
+          /* because we're little endian, this is what it turns into.   */
+          
+          *p_argv = (void*) argp;
+        }
+          
+      p_argv++;
+#ifdef X86_WIN64
+      argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+#else
+      argp += z;
+#endif
+    }
+  
+  return;
+}
+
+#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   void*  __fun = (void*)(FUN); \
+   void*  __ctx = (void*)(CTX); \
+   *(unsigned char*) &__tramp[0] = 0x41; \
+   *(unsigned char*) &__tramp[1] = 0xbb; \
+   *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \
+   *(unsigned char*) &__tramp[6] = 0x48; \
+   *(unsigned char*) &__tramp[7] = 0xb8; \
+   *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \
+   *(unsigned char *)  &__tramp[16] = 0x49; \
+   *(unsigned char *)  &__tramp[17] = 0xba; \
+   *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \
+   *(unsigned char *)  &__tramp[26] = 0x41; \
+   *(unsigned char *)  &__tramp[27] = 0xff; \
+   *(unsigned char *)  &__tramp[28] = 0xe2; /* jmp %r10 */ \
+ }
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);  \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ }
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ }
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+#ifdef X86_WIN64
+#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE)
+#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0)
+  if (cif->abi == FFI_WIN64) 
+    {
+      int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3);
+      FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0],
+                                 &ffi_closure_win64,
+                                 codeloc, mask);
+      /* make sure we can execute here */
+    }
+#else
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)codeloc);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)codeloc, cif->bytes);
+    }
+#endif /* X86_WIN32 */
+#endif /* !X86_WIN64 */
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+                          ffi_cif* cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+    return FFI_BAD_ABI;
+  }
+
+  /* we currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+                       codeloc);
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return */
+  /* value address then we need to make one                     */
+
+  if ((rvalue == NULL) && 
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+#ifdef X86_WIN32
+    case FFI_SYSV:
+    case FFI_STDCALL:
+      ffi_call_win32(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                     ecif.rvalue, fn);
+      break;
+#else
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+                    ecif.rvalue, fn);
+      break;
+#endif
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* !__x86_64__  || X86_WIN64 */
+
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
new file mode 100644
index 000000000..bd907d720
--- /dev/null
+++ b/libffi/src/x86/ffi64.c
@@ -0,0 +1,627 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+   
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  __int128_t sse[MAX_SSE_REGS];
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      {
+	int size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  reg_args->gpr[gprcount] = 0;
+		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++] = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++] = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *(void * volatile *) &tramp[1] = ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *(void * volatile *) &tramp[6] = codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h
new file mode 100644
index 000000000..b85016cc0
--- /dev/null
+++ b/libffi/src/x86/ffitarget.h
@@ -0,0 +1,120 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+   Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+/* ---- System specific configurations ----------------------------------- */
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+  /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+  FFI_SYSV,
+  FFI_STDCALL,
+  /* TODO: Add fastcall support for the sake of completeness */
+  FFI_DEFAULT_ABI = FFI_SYSV,
+#endif
+
+#ifdef X86_WIN64
+  FFI_WIN64,
+  FFI_DEFAULT_ABI = FFI_WIN64,
+#else
+
+  /* ---- Intel x86 and AMD x86-64 - */
+#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(__i386) || defined(__amd64))
+  FFI_SYSV,
+  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
+#if defined(__i386__) || defined(__i386)
+  FFI_DEFAULT_ABI = FFI_SYSV,
+#else
+  FFI_DEFAULT_ABI = FFI_UNIX64,
+#endif
+#endif
+#endif /* X86_WIN64 */
+
+  FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 13
+#else
+#ifdef X86_WIN64
+#define FFI_TRAMPOLINE_SIZE 29
+#define FFI_NATIVE_RAW_API 0
+#define FFI_NO_RAW_API 1
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#endif
+#ifndef X86_WIN64
+#define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
+#endif
+#endif
+
+#endif
+
diff --git a/libffi/src/x86/freebsd.S b/libffi/src/x86/freebsd.S
new file mode 100644
index 000000000..afde51316
--- /dev/null
+++ b/libffi/src/x86/freebsd.S
@@ -0,0 +1,458 @@
+/* -----------------------------------------------------------------------
+   freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	       Copyright (c) 2008  Björn König
+	
+   X86 Foreign Function Interface for FreeBSD
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1	/* FDE initial location */
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB2-.	/* FDE initial location */
+#else
+	.long	.LFB2
+#endif
+	.long	.LFE2-.LFB2	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S
new file mode 100644
index 000000000..f108dd80d
--- /dev/null
+++ b/libffi/src/x86/sysv.S
@@ -0,0 +1,468 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+        /* Align the stack pointer to 16-bytes */
+        andl  $0xfffffff0, %esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+/* Precalculate for e.g. the Solaris 10/x86 assembler.  */
+#if FFI_TRAMPOLINE_SIZE == 10
+#define RAW_CLOSURE_CIF_OFFSET 12
+#define RAW_CLOSURE_FUN_OFFSET 16
+#define RAW_CLOSURE_USER_DATA_OFFSET 20
+#elif FFI_TRAMPOLINE_SIZE == 24
+#define RAW_CLOSURE_CIF_OFFSET 24
+#define RAW_CLOSURE_FUN_OFFSET 28
+#define RAW_CLOSURE_USER_DATA_OFFSET 32
+#else
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#endif
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+#if defined __PIC__
+# if defined __sun__ && defined __svr4__
+/* 32-bit Solaris 2/x86 uses datarel encoding for PIC.  GNU ld before 2.22
+   doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this.  */
+#  define FDE_ENCODING		0x30	/* datarel */
+#  define FDE_ENCODE(X)		X@GOTOFF
+# else
+#  define FDE_ENCODING		0x1b	/* pcrel sdata4 */
+#  if defined HAVE_AS_X86_PCREL
+#   define FDE_ENCODE(X)	X-.
+#  else
+#   define FDE_ENCODE(X)	X@rel
+#  endif
+# endif
+#else
+# define FDE_ENCODING		0	/* absolute */
+# define FDE_ENCODE(X)		X
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef HAVE_AS_ASCII_PSEUDO_OP
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+#elif defined HAVE_AS_STRING_PSEUDO_OP
+#ifdef __PIC__
+	.string "zR"	/* CIE Augmentation */
+#else
+	.string ""	/* CIE Augmentation */
+#endif
+#else
+#error missing .ascii/.string
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	FDE_ENCODING
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB1)	/* FDE initial location */
+	.long	.LFE1-.LFB1		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB2)	/* FDE initial location */
+	.long	.LFE2-.LFB2		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+	.long	FDE_ENCODE(.LFB3)	/* FDE initial location */
+	.long	.LFE3-.LFB3		/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
new file mode 100644
index 000000000..7a6619a54
--- /dev/null
+++ b/libffi/src/x86/unix64.S
@@ -0,0 +1,426 @@
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+	      Copyright (c) 2008  Red Hat, Inc
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	2
+	.globl	ffi_call_unix64
+	.type	ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+.LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+.LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	.Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lstore_table:
+	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_INT */
+	.long	.Lst_float-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	.Lst_double-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lst_ldouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lst_uint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	.Lst_sint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	.Lst_uint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	.Lst_sint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	.Lst_uint32-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	.Lst_struct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lst_void:
+	ret
+	.align 2
+
+.Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+
+	.align 2
+.Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align 2
+.Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+.Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+
+	.align 2
+.Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.LUW3:
+.Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	.Lret_from_load_sse
+
+.LUW4:
+	.size    ffi_call_unix64,.-ffi_call_unix64
+
+	.align	2
+	.globl ffi_closure_unix64
+	.type	ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+.LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      .Lsave_sse
+.Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	ffi_closure_unix64_inner@PLT
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+.LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	.Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+.Lload_table:
+	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_INT */
+	.long	.Lld_float-.Lload_table		/* FFI_TYPE_FLOAT */
+	.long	.Lld_double-.Lload_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lld_ldouble-.Lload_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_UINT8 */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_SINT8 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_UINT16 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_SINT16 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_UINT32 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_SINT32 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_UINT64 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_SINT64 */
+	.long	.Lld_struct-.Lload_table	/* FFI_TYPE_STRUCT */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_POINTER */
+
+	.align 2
+.Lld_void:
+	ret
+
+	.align 2
+.Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+
+	.align 2
+.Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+
+	.align 2
+.Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above .Lload_sse; the same logic applies here.  */
+	.align 2
+.LUW8:
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
+.LUW9:
+	.size	ffi_closure_unix64,.-ffi_closure_unix64
+
+#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
+	.section	.eh_frame,"a",@unwind
+#else
+	.section	.eh_frame,"a",@progbits
+#endif
+.Lframe1:
+	.long	.LECIE1-.LSCIE1		/* CIE Length */
+.LSCIE1:
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii "zR\0"			/* CIE Augmentation */
+	.uleb128 1			/* CIE Code Alignment Factor */
+	.sleb128 -8			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.uleb128 1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0x80+16			/* DW_CFA_offset, %rip offset 1*-8 */
+	.uleb128 1
+	.align 8
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW0-.			/* FDE initial location */
+#else
+	.long	.LUW0@rel
+#endif
+	.long	.LUW4-.LUW0		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW1-.LUW0
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.uleb128 6
+	.uleb128 32
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.uleb128 2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW2-.LUW1
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW3-.LUW2
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE1:
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if HAVE_AS_X86_PCREL
+	.long	.LUW5-.			/* FDE initial location */
+#else
+	.long	.LUW5@rel
+#endif
+	.long	.LUW9-.LUW5		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW6-.LUW5
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 208
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW7-.LUW6
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW8-.LUW7
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE3:
+
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S
new file mode 100644
index 000000000..34ec0fd82
--- /dev/null
+++ b/libffi/src/x86/win32.S
@@ -0,0 +1,1065 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+	     Copyright (c) 2009  Daniel Witte
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   -----------------------------------------------------------------------
+   */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#ifdef _MSC_VER
+
+.386
+.MODEL FLAT, C
+
+EXTRN ffi_closure_SYSV_inner:NEAR
+
+_TEXT SEGMENT
+
+ffi_call_win32 PROC NEAR,
+    ffi_prep_args : NEAR PTR DWORD,
+    ecif          : NEAR PTR DWORD,
+    cif_bytes     : DWORD,
+    cif_flags     : DWORD,
+    rvalue        : NEAR PTR DWORD,
+    fn            : NEAR PTR DWORD
+
+        ;; Make room for all of the new args.
+        mov  ecx, cif_bytes
+        sub  esp, ecx
+
+        mov  eax, esp
+
+        ;; Place all of the ffi_prep_args in position
+        push ecif
+        push eax
+        call ffi_prep_args
+
+        ;; Return stack to previous state and call the function
+        add  esp, 8
+
+        call fn
+
+        ;; cdecl:   we restore esp in the epilogue, so there's no need to
+        ;;          remove the space we pushed for the args.
+        ;; stdcall: the callee has already cleaned the stack.
+
+        ;; Load ecx with the return type code
+        mov  ecx, cif_flags
+
+        ;; If the return value pointer is NULL, assume no return value.
+        cmp  rvalue, 0
+        jne  ca_jumptable
+
+        ;; Even if there is no space for the return value, we are
+        ;; obliged to handle floating-point values.
+        cmp  ecx, FFI_TYPE_FLOAT
+        jne  ca_epilogue
+        fstp st(0)
+
+        jmp  ca_epilogue
+
+ca_jumptable:
+        jmp  [ca_jumpdata + 4 * ecx]
+ca_jumpdata:
+        ;; Do not insert anything here between label and jump table.
+        dd offset ca_epilogue       ;; FFI_TYPE_VOID
+        dd offset ca_retint         ;; FFI_TYPE_INT
+        dd offset ca_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset ca_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset ca_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset ca_retint8        ;; FFI_TYPE_UINT8
+        dd offset ca_retint8        ;; FFI_TYPE_SINT8
+        dd offset ca_retint16       ;; FFI_TYPE_UINT16
+        dd offset ca_retint16       ;; FFI_TYPE_SINT16
+        dd offset ca_retint         ;; FFI_TYPE_UINT32
+        dd offset ca_retint         ;; FFI_TYPE_SINT32
+        dd offset ca_retint64       ;; FFI_TYPE_UINT64
+        dd offset ca_retint64       ;; FFI_TYPE_SINT64
+        dd offset ca_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset ca_retint         ;; FFI_TYPE_POINTER
+        dd offset ca_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset ca_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset ca_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+ca_retint8:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], al
+        jmp   ca_epilogue
+
+ca_retint16:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], ax
+        jmp   ca_epilogue
+
+ca_retint:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        jmp   ca_epilogue
+
+ca_retint64:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        mov   [ecx + 0], eax
+        mov   [ecx + 4], edx
+        jmp   ca_epilogue
+
+ca_retfloat:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  DWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  QWORD PTR [ecx]
+        jmp   ca_epilogue
+
+ca_retlongdouble:
+        ;; Load %ecx with the pointer to storage for the return value
+        mov   ecx, rvalue
+        fstp  TBYTE PTR [ecx]
+        jmp   ca_epilogue
+
+ca_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_call_win32 ENDP
+
+ffi_closure_SYSV PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 8]
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cs_jumptable:
+        jmp  [cs_jumpdata + 4 * eax]
+cs_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cs_epilogue       ;; FFI_TYPE_VOID
+        dd offset cs_retint         ;; FFI_TYPE_INT
+        dd offset cs_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cs_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cs_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cs_retint8        ;; FFI_TYPE_UINT8
+        dd offset cs_retint8        ;; FFI_TYPE_SINT8
+        dd offset cs_retint16       ;; FFI_TYPE_UINT16
+        dd offset cs_retint16       ;; FFI_TYPE_SINT16
+        dd offset cs_retint         ;; FFI_TYPE_UINT32
+        dd offset cs_retint         ;; FFI_TYPE_SINT32
+        dd offset cs_retint64       ;; FFI_TYPE_UINT64
+        dd offset cs_retint64       ;; FFI_TYPE_SINT64
+        dd offset cs_retstruct      ;; FFI_TYPE_STRUCT
+        dd offset cs_retint         ;; FFI_TYPE_POINTER
+        dd offset cs_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cs_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cs_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cs_retint8:
+        mov   al, [ecx]
+        jmp   cs_epilogue
+
+cs_retint16:
+        mov   ax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint:
+        mov   eax, [ecx]
+        jmp   cs_epilogue
+
+cs_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cs_epilogue
+
+cs_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cs_epilogue
+
+cs_retstruct:
+        ;; Caller expects us to pop struct return value pointer hidden arg.
+        ;; Epilogue code is autogenerated.
+        ret	4
+
+cs_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_SYSV ENDP
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+ffi_closure_raw_SYSV PROC NEAR USES esi
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        mov  esi, [eax + RAW_CLOSURE_CIF_OFFSET]        ;; closure->cif
+        mov  edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET]  ;; closure->user_data
+        mov  [esp + 12], edx                            ;; user_data
+        lea  edx, [ebp + 8]
+        mov  [esp + 8], edx                             ;; raw_args
+        lea  edx, [ebp - 24]
+        mov  [esp + 4], edx                             ;; &res
+        mov  [esp], esi                                 ;; cif
+        call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET]   ;; closure->fun
+        mov  eax, [esi + CIF_FLAGS_OFFSET]              ;; cif->flags
+        lea  ecx, [ebp - 24]
+
+cr_jumptable:
+        jmp  [cr_jumpdata + 4 * eax]
+cr_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cr_epilogue       ;; FFI_TYPE_VOID
+        dd offset cr_retint         ;; FFI_TYPE_INT
+        dd offset cr_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cr_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cr_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cr_retint8        ;; FFI_TYPE_UINT8
+        dd offset cr_retint8        ;; FFI_TYPE_SINT8
+        dd offset cr_retint16       ;; FFI_TYPE_UINT16
+        dd offset cr_retint16       ;; FFI_TYPE_SINT16
+        dd offset cr_retint         ;; FFI_TYPE_UINT32
+        dd offset cr_retint         ;; FFI_TYPE_SINT32
+        dd offset cr_retint64       ;; FFI_TYPE_UINT64
+        dd offset cr_retint64       ;; FFI_TYPE_SINT64
+        dd offset cr_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cr_retint         ;; FFI_TYPE_POINTER
+        dd offset cr_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cr_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cr_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cr_retint8:
+        mov   al, [ecx]
+        jmp   cr_epilogue
+
+cr_retint16:
+        mov   ax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint:
+        mov   eax, [ecx]
+        jmp   cr_epilogue
+
+cr_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cr_epilogue
+
+cr_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cr_epilogue
+
+cr_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cr_epilogue
+
+cr_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_raw_SYSV ENDP
+
+#endif /* !FFI_NO_RAW_API */
+
+ffi_closure_STDCALL PROC NEAR FORCEFRAME
+    ;; the ffi_closure ctx is passed in eax by the trampoline.
+
+        sub  esp, 40
+        lea  edx, [ebp - 24]
+        mov  [ebp - 12], edx         ;; resp
+        lea  edx, [ebp + 12]         ;; account for stub return address on stack
+        mov  [esp + 8], edx          ;; args
+        lea  edx, [ebp - 12]
+        mov  [esp + 4], edx          ;; &resp
+        mov  [esp], eax              ;; closure
+        call ffi_closure_SYSV_inner
+        mov  ecx, [ebp - 12]
+
+cd_jumptable:
+        jmp  [cd_jumpdata + 4 * eax]
+cd_jumpdata:
+        ;; Do not insert anything here between the label and jump table.
+        dd offset cd_epilogue       ;; FFI_TYPE_VOID
+        dd offset cd_retint         ;; FFI_TYPE_INT
+        dd offset cd_retfloat       ;; FFI_TYPE_FLOAT
+        dd offset cd_retdouble      ;; FFI_TYPE_DOUBLE
+        dd offset cd_retlongdouble  ;; FFI_TYPE_LONGDOUBLE
+        dd offset cd_retint8        ;; FFI_TYPE_UINT8
+        dd offset cd_retint8        ;; FFI_TYPE_SINT8
+        dd offset cd_retint16       ;; FFI_TYPE_UINT16
+        dd offset cd_retint16       ;; FFI_TYPE_SINT16
+        dd offset cd_retint         ;; FFI_TYPE_UINT32
+        dd offset cd_retint         ;; FFI_TYPE_SINT32
+        dd offset cd_retint64       ;; FFI_TYPE_UINT64
+        dd offset cd_retint64       ;; FFI_TYPE_SINT64
+        dd offset cd_epilogue       ;; FFI_TYPE_STRUCT
+        dd offset cd_retint         ;; FFI_TYPE_POINTER
+        dd offset cd_retint8        ;; FFI_TYPE_SMALL_STRUCT_1B
+        dd offset cd_retint16       ;; FFI_TYPE_SMALL_STRUCT_2B
+        dd offset cd_retint         ;; FFI_TYPE_SMALL_STRUCT_4B
+
+cd_retint8:
+        mov   al, [ecx]
+        jmp   cd_epilogue
+
+cd_retint16:
+        mov   ax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint:
+        mov   eax, [ecx]
+        jmp   cd_epilogue
+
+cd_retint64:
+        mov   eax, [ecx + 0]
+        mov   edx, [ecx + 4]
+        jmp   cd_epilogue
+
+cd_retfloat:
+        fld   DWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retdouble:
+        fld   QWORD PTR [ecx]
+        jmp   cd_epilogue
+
+cd_retlongdouble:
+        fld   TBYTE PTR [ecx]
+        jmp   cd_epilogue
+
+cd_epilogue:
+        ;; Epilogue code is autogenerated.
+        ret
+ffi_closure_STDCALL ENDP
+
+_TEXT ENDS
+END
+
+#else
+
+	.text
+ 
+        # This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_call_win32
+#ifndef __OS2__
+	.def	_ffi_call_win32;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_call_win32:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        # Make room for all of the new args.
+        movl  16(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        # Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+ 
+        # Return stack to previous state and call the function
+        addl  $8,%esp
+ 
+        # FIXME: Align the stack to a 128-bit boundary to avoid
+        # potential performance hits.
+
+        call  *28(%ebp)
+ 
+        # stdcall functions pop arguments off the stack themselves
+
+        # Load %ecx with the return type code
+        movl  20(%ebp),%ecx
+ 
+        # If the return value pointer is NULL, assume no return value.
+        cmpl  $0,24(%ebp)
+        jne   0f
+ 
+        # Even if there is no space for the return value, we are
+        # obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   .Lnoretval
+        fstp  %st(0)
+ 
+        jmp   .Lepilogue
+
+0:
+	call	1f
+	# Do not insert anything here between the call and the jump table.
+.Lstore_table:
+	.long	.Lnoretval		/* FFI_TYPE_VOID */
+	.long	.Lretint		/* FFI_TYPE_INT */
+	.long	.Lretfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lretdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lretlongdouble		/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lretuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lretsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lretuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lretsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lretint		/* FFI_TYPE_UINT32 */
+	.long	.Lretint		/* FFI_TYPE_SINT32 */
+	.long	.Lretint64		/* FFI_TYPE_UINT64 */
+	.long	.Lretint64		/* FFI_TYPE_SINT64 */
+	.long	.Lretstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lretint		/* FFI_TYPE_POINTER */
+	.long	.Lretstruct1b		/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lretstruct2b		/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lretstruct4b		/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%ecx, %ecx
+	add	%ecx, %ecx
+	add	(%esp),%ecx
+	add	$4, %esp
+	jmp	*(%ecx)
+
+	/* Sign/zero extend as appropriate.  */
+.Lretsint8:
+	movsbl	%al, %eax
+	jmp	.Lretint
+
+.Lretsint16:
+	movswl	%ax, %eax
+	jmp	.Lretint
+
+.Lretuint8:
+	movzbl	%al, %eax
+	jmp	.Lretint
+
+.Lretuint16:
+	movzwl	%ax, %eax
+	jmp	.Lretint
+
+.Lretint:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretfloat:
+         # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstps (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretdouble:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretlongdouble:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   .Lepilogue
+ 
+.Lretint64:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+	jmp   .Lepilogue
+
+.Lretstruct1b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   .Lepilogue
+ 
+.Lretstruct2b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct4b:
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   .Lepilogue
+
+.Lretstruct:
+        # Nothing to do!
+ 
+.Lnoretval:
+.Lepilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.ffi_call_win32_end:
+.LFE1:
+
+        # This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+
+0:
+	call	1f
+	# Do not insert anything here between the call and the jump table.
+.Lcls_store_table:
+	.long	.Lcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lcls_retint		/* FFI_TYPE_INT */
+	.long	.Lcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lcls_retdouble		/* FFI_TYPE_DOUBLE */
+	.long	.Lcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lcls_retuint16		/* FFI_TYPE_UINT16 */
+	.long	.Lcls_retsint16		/* FFI_TYPE_SINT16 */
+	.long	.Lcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lcls_retstruct		/* FFI_TYPE_STRUCT */
+	.long	.Lcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lcls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+
+.Lcls_retstruct:
+        # Caller expects us to pop struct return value pointer hidden arg.
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$0x4
+
+.Lcls_noretval:
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_SYSV_end:
+.LFE3:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+        # This assumes we are using gas.
+        .balign 16
+	.globl	_ffi_closure_raw_SYSV
+#ifndef __OS2__
+	.def	_ffi_closure_raw_SYSV;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_raw_SYSV:
+.LFB4:
+	pushl	%ebp
+.LCFI6:
+	movl	%esp, %ebp
+.LCFI7:
+	pushl	%esi
+.LCFI8:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+0:
+	call	1f
+	# Do not insert anything here between the call and the jump table.
+.Lrcls_store_table:
+	.long	.Lrcls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lrcls_retint		/* FFI_TYPE_INT */
+	.long	.Lrcls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lrcls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lrcls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lrcls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lrcls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lrcls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lrcls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lrcls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lrcls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lrcls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lrcls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lrcls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lrcls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lrcls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lrcls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lrcls_retsint8:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retsint16:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint8:
+	movzbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retuint16:
+	movzwl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct1:
+	movsbl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct2:
+	movswl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct4:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+
+.Lrcls_retstruct:
+	# Nothing to do!
+
+.Lrcls_noretval:
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.ffi_closure_raw_SYSV_end:
+.LFE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+        # This assumes we are using gas.
+	.balign	16
+	.globl	_ffi_closure_STDCALL
+#ifndef __OS2__
+	.def	_ffi_closure_STDCALL;	.scl	2;	.type	32;	.endef
+#endif
+_ffi_closure_STDCALL:
+.LFB5:
+	pushl	%ebp
+.LCFI9:
+	movl	%esp, %ebp
+.LCFI10:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	movl	%edx, 4(%esp)	/* args */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+0:
+	call	1f
+	# Do not insert anything here between the call and the jump table.
+.Lscls_store_table:
+	.long	.Lscls_noretval		/* FFI_TYPE_VOID */
+	.long	.Lscls_retint		/* FFI_TYPE_INT */
+	.long	.Lscls_retfloat		/* FFI_TYPE_FLOAT */
+	.long	.Lscls_retdouble	/* FFI_TYPE_DOUBLE */
+	.long	.Lscls_retldouble	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lscls_retuint8		/* FFI_TYPE_UINT8 */
+	.long	.Lscls_retsint8		/* FFI_TYPE_SINT8 */
+	.long	.Lscls_retuint16	/* FFI_TYPE_UINT16 */
+	.long	.Lscls_retsint16	/* FFI_TYPE_SINT16 */
+	.long	.Lscls_retint		/* FFI_TYPE_UINT32 */
+	.long	.Lscls_retint		/* FFI_TYPE_SINT32 */
+	.long	.Lscls_retllong		/* FFI_TYPE_UINT64 */
+	.long	.Lscls_retllong		/* FFI_TYPE_SINT64 */
+	.long	.Lscls_retstruct	/* FFI_TYPE_STRUCT */
+	.long	.Lscls_retint		/* FFI_TYPE_POINTER */
+	.long	.Lscls_retstruct1	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long	.Lscls_retstruct2	/* FFI_TYPE_SMALL_STRUCT_2B */
+	.long	.Lscls_retstruct4	/* FFI_TYPE_SMALL_STRUCT_4B */
+1:
+	add	%eax, %eax
+	add	%eax, %eax
+	add	(%esp),%eax
+	add	$4, %esp
+	jmp	*(%eax)
+
+	/* Sign/zero extend as appropriate.  */
+.Lscls_retsint8:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retsint16:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint8:
+	movzbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retuint16:
+	movzwl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retfloat:
+	flds	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lscls_epilogue
+
+.Lscls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct4:
+	movl	(%ecx), %eax
+	jmp	.Lscls_epilogue
+
+.Lscls_retstruct:
+	# Nothing to do!
+
+.Lscls_noretval:
+.Lscls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.ffi_closure_STDCALL_end:
+.LFE5:
+
+#ifndef __OS2__
+	.section	.eh_frame,"w"
+#endif
+.Lframe1:
+.LSCIE1:
+	.long	.LECIE1-.LASCIE1  /* Length of Common Information Entry */
+.LASCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE1:
+
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE3:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE4:
+	.long	.LEFDE4-.LASFDE4	/* FDE Length */
+.LASFDE4:
+	.long	.LASFDE4-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB4-.	/* FDE initial location */
+#else
+	.long	.LFB4
+#endif
+	.long	.LFE4-.LFB4	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LFB4
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI6
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI8-.LCFI7
+	.byte	0x86	/* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */
+	.byte	0x3	/* .uleb128 0x3 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE4:
+
+#endif /* !FFI_NO_RAW_API */
+
+.LSFDE5:
+	.long	.LEFDE5-.LASFDE5	/* FDE Length */
+.LASFDE5:
+	.long	.LASFDE5-.Lframe1	/* FDE CIE offset */
+#if defined __PIC__ && defined HAVE_AS_X86_PCREL
+	.long	.LFB5-.	/* FDE initial location */
+#else
+	.long	.LFB5
+#endif
+	.long	.LFE5-.LFB5	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	/* DW_CFA_xxx CFI instructions go here.  */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI9-.LFB5
+	.byte	0xe	/* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */
+	.byte	0x2	/* .uleb128 0x2 */
+
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI10-.LCFI9
+	.byte	0xd	/* DW_CFA_def_cfa_register CFA = r5 = %ebp */
+	.byte	0x5	/* .uleb128 0x5 */
+
+	/* End of DW_CFA_xxx CFI instructions.  */
+	.align 4
+.LEFDE5:
+
+#endif /* !_MSC_VER */
+
diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S
new file mode 100644
index 000000000..6e9181867
--- /dev/null
+++ b/libffi/src/x86/win64.S
@@ -0,0 +1,460 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+	
+/* Constants for ffi_call_win64 */	
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+                   extended_cif *ecif, unsigned bytes, unsigned flags,
+                   unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC	ffi_call_win64
+
+EXTRN	__chkstk:NEAR
+EXTRN	ffi_closure_win64_inner:NEAR
+
+_TEXT	SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;;    rax points to 'closure'
+;;;    r11 contains a bit mask that specifies which of the
+;;;    first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;; 
+ffi_closure_win64 PROC FRAME
+	;; copy register arguments onto stack
+	test	r11, 1
+	jne	first_is_float	
+	mov	QWORD PTR [rsp+8], rcx
+	jmp	second
+first_is_float:
+	movlpd	QWORD PTR [rsp+8], xmm0
+
+second:
+	test	r11, 2
+	jne	second_is_float	
+	mov	QWORD PTR [rsp+16], rdx
+	jmp	third
+second_is_float:
+	movlpd	QWORD PTR [rsp+16], xmm1
+
+third:
+	test	r11, 4
+	jne	third_is_float	
+	mov	QWORD PTR [rsp+24], r8
+	jmp	fourth
+third_is_float:
+	movlpd	QWORD PTR [rsp+24], xmm2
+
+fourth:
+	test	r11, 8
+	jne	fourth_is_float	
+	mov	QWORD PTR [rsp+32], r9
+	jmp	done
+fourth_is_float:
+	movlpd	QWORD PTR [rsp+32], xmm3
+
+done:
+        .ALLOCSTACK 40
+	sub	rsp, 40
+        .ENDPROLOG
+	mov	rcx, rax	; context is first parameter
+	mov	rdx, rsp	; stack is second parameter
+	add	rdx, 48		; point to start of arguments
+	mov	rax, ffi_closure_win64_inner
+	call	rax		; call the real closure function
+	add	rsp, 40
+	movd	xmm0, rax	; If the closure returned a float,
+                                ; ffi_closure_win64_inner wrote it to rax
+	ret	0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+        ;; copy registers onto stack
+	mov	QWORD PTR [rsp+32], r9
+	mov	QWORD PTR [rsp+24], r8
+	mov	QWORD PTR [rsp+16], rdx
+	mov	QWORD PTR [rsp+8], rcx
+        .PUSHREG rbp
+	push	rbp
+        .ALLOCSTACK 48
+	sub	rsp, 48					; 00000030H
+        .SETFRAME rbp, 32
+	lea	rbp, QWORD PTR [rsp+32]
+        .ENDPROLOG
+
+	mov	eax, DWORD PTR CIF_BYTES[rbp]
+	add	rax, 15
+	and	rax, -16
+	call	__chkstk
+	sub	rsp, rax
+	lea	rax, QWORD PTR [rsp+32]
+	mov	QWORD PTR STACK[rbp], rax
+
+	mov	rdx, QWORD PTR ECIF[rbp]
+	mov	rcx, QWORD PTR STACK[rbp]
+	call	QWORD PTR PREP_ARGS_FN[rbp]
+
+	mov	rsp, QWORD PTR STACK[rbp]
+
+	movlpd	xmm3, QWORD PTR [rsp+24]
+	movd	r9, xmm3
+
+	movlpd	xmm2, QWORD PTR [rsp+16]
+	movd	r8, xmm2
+
+	movlpd	xmm1, QWORD PTR [rsp+8]
+	movd	rdx, xmm1
+
+	movlpd	xmm0, QWORD PTR [rsp]
+	movd	rcx, xmm0
+
+	call	QWORD PTR FN[rbp]
+ret_struct4b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ 	jne	ret_struct2b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	DWORD PTR [rcx], eax
+	jmp	ret_void$
+
+ret_struct2b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ 	jne	ret_struct1b$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	WORD PTR [rcx], ax
+	jmp	ret_void$
+
+ret_struct1b$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ 	jne	ret_uint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov	BYTE PTR [rcx], al
+	jmp	ret_void$
+
+ret_uint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ 	jne	ret_sint8$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_sint8$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ 	jne	ret_uint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, al
+	mov	QWORD PTR [rcx], rax
+	jmp	ret_void$
+
+ret_uint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ 	jne	ret_sint16$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movzx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint16$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ 	jne	ret_uint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	movsx   rax, ax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_uint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ 	jne	ret_sint32$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	mov     eax, eax
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_sint32$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ 	jne	ret_float$
+
+	mov	rcx, QWORD PTR RVALUE[rbp]
+	cdqe
+	mov	QWORD PTR [rcx], rax
+	jmp	SHORT ret_void$
+
+ret_float$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ 	jne	SHORT ret_double$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movss	DWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_double$:
+ 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ 	jne	SHORT ret_sint64$
+
+ 	mov	rax, QWORD PTR RVALUE[rbp]
+ 	movlpd	QWORD PTR [rax], xmm0
+ 	jmp	SHORT ret_void$
+
+ret_sint64$:
+  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+  	jne	ret_void$
+
+ 	mov	rcx, QWORD PTR RVALUE[rbp]
+ 	mov	QWORD PTR [rcx], rax
+ 	jmp	SHORT ret_void$
+	
+ret_void$:
+	xor	rax, rax
+
+	lea	rsp, QWORD PTR [rbp+16]
+	pop	rbp
+	ret	0
+ffi_call_win64 ENDP
+_TEXT	ENDS
+END
+#else        
+.text
+
+.extern _ffi_closure_win64_inner
+
+# ffi_closure_win64 will be called with these registers set:
+#    rax points to 'closure'
+#    r11 contains a bit mask that specifies which of the
+#    first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+# 
+	.balign 16
+        .globl _ffi_closure_win64	
+_ffi_closure_win64:     
+	# copy register arguments onto stack
+	test	$1,%r11
+	jne	.Lfirst_is_float	
+	mov	%rcx, 8(%rsp)
+	jmp	.Lsecond
+.Lfirst_is_float:
+	movlpd	%xmm0, 8(%rsp)
+
+.Lsecond:
+	test	$2, %r11
+	jne	.Lsecond_is_float	
+	mov	%rdx, 16(%rsp)
+	jmp	.Lthird
+.Lsecond_is_float:
+	movlpd	%xmm1, 16(%rsp)
+
+.Lthird:
+	test	$4, %r11
+	jne	.Lthird_is_float	
+	mov	%r8,24(%rsp)
+	jmp	.Lfourth
+.Lthird_is_float:
+	movlpd	%xmm2, 24(%rsp)
+
+.Lfourth:
+	test	$8, %r11
+	jne	.Lfourth_is_float	
+	mov	%r9, 32(%rsp)
+	jmp	.Ldone
+.Lfourth_is_float:
+	movlpd	%xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+	sub	$40, %rsp
+#.ENDPROLOG
+	mov	%rax, %rcx	# context is first parameter
+	mov	%rsp, %rdx	# stack is second parameter
+	add	$48, %rdx	# point to start of arguments
+	mov	$_ffi_closure_win64_inner, %rax
+	callq	*%rax		# call the real closure function
+	add	$40, %rsp
+	movq	%rax, %xmm0	# If the closure returned a float,
+                                # ffi_closure_win64_inner wrote it to rax
+	retq
+.ffi_closure_win64_end:
+
+	.balign 16
+        .globl	_ffi_call_win64
+_ffi_call_win64:        
+        # copy registers onto stack
+	mov	%r9,32(%rsp)
+	mov	%r8,24(%rsp)
+	mov	%rdx,16(%rsp)
+	mov	%rcx,8(%rsp)
+        #.PUSHREG rbp
+	push	%rbp
+        #.ALLOCSTACK 48
+	sub	$48,%rsp
+        #.SETFRAME rbp, 32
+	lea	32(%rsp),%rbp
+        #.ENDPROLOG
+
+	mov	CIF_BYTES(%rbp),%eax
+	add	$15, %rax
+	and	$-16, %rax
+	cmpq	$0x1000, %rax
+	jb	Lch_done
+Lch_probe:
+	subq	$0x1000,%rsp
+	orl	$0x0, (%rsp)
+	subq	$0x1000,%rax
+	cmpq	$0x1000,%rax
+	ja	Lch_probe
+Lch_done:
+	subq	%rax, %rsp
+	orl	$0x0, (%rsp)
+	lea	32(%rsp), %rax
+	mov	%rax, STACK(%rbp)
+
+	mov	ECIF(%rbp), %rdx
+	mov	STACK(%rbp), %rcx
+	callq	*PREP_ARGS_FN(%rbp)
+
+	mov	STACK(%rbp), %rsp
+
+	movlpd	24(%rsp), %xmm3
+	movd	%xmm3, %r9
+
+	movlpd	16(%rsp), %xmm2
+	movd	%xmm2, %r8
+
+	movlpd	8(%rsp), %xmm1
+	movd	%xmm1, %rdx
+
+	movlpd	(%rsp), %xmm0
+	movd	%xmm0, %rcx
+
+	callq	*FN(%rbp)
+.Lret_struct4b:
+ 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ 	jne .Lret_struct2b
+
+	mov	RVALUE(%rbp), %rcx
+	mov	%eax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_struct2b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+	jne .Lret_struct1b
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%ax, (%rcx)
+	jmp .Lret_void
+	
+.Lret_struct1b:
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+	jne .Lret_uint8
+	
+	mov	RVALUE(%rbp), %rcx
+	mov	%al, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint8:
+	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+	jne .Lret_sint8
+	
+        mov     RVALUE(%rbp), %rcx
+        movzbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint8:
+	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+	jne .Lret_uint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movsbq  %al, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint16:
+	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+	jne .Lret_sint16
+	
+        mov     RVALUE(%rbp), %rcx
+        movzwq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint16:
+	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+	jne .Lret_uint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movswq  %ax, %rax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_uint32:
+	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+	jne .Lret_sint32
+	
+        mov     RVALUE(%rbp), %rcx
+        movl    %eax, %eax
+	movq    %rax, (%rcx)
+	jmp .Lret_void
+
+.Lret_sint32:
+ 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ 	jne	.Lret_float
+
+	mov	RVALUE(%rbp), %rcx
+	cltq
+	movq	%rax, (%rcx)
+	jmp	.Lret_void
+
+.Lret_float:
+ 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ 	jne	.Lret_double
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movss	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_double:
+ 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ 	jne	.Lret_sint64
+
+ 	mov	RVALUE(%rbp), %rax
+ 	movlpd	%xmm0, (%rax)
+ 	jmp	.Lret_void
+
+.Lret_sint64:
+  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+  	jne	.Lret_void
+
+ 	mov	RVALUE(%rbp), %rcx
+ 	mov	%rax, (%rcx)
+ 	jmp	.Lret_void
+	
+.Lret_void:
+	xor	%rax, %rax
+
+	lea	16(%rbp), %rsp
+	pop	%rbp
+	retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+
author	upstream source tree <ports@midipix.org>	2015-03-15 20:14:05 -0400
committer	upstream source tree <ports@midipix.org>	2015-03-15 20:14:05 -0400
commit	554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree	976dc5ab7fddf506dadce60ae936f43f58787092 /libffi/src/x86
download	cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2 cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz